├── LICENSE
├── README.md
├── configs
├── convnext
│ ├── convnext_tiny.json
│ └── kw1x_convnext_tiny.json
├── mobilenetv2_050
│ ├── kw1d2x_mobilenetv2_050.json
│ ├── kw1x_mobilenetv2_050.json
│ ├── kw4x_mobilenetv2_050.json
│ └── mobilenetv2_050.json
├── mobilenetv2_100
│ ├── kw1d2x_mobilenetv2_100.json
│ ├── kw1x_mobilenetv2_100.json
│ ├── kw4x_mobilenetv2_100.json
│ └── mobilenetv2_100.json
├── resnet18
│ ├── kw1d2x_resnet18.json
│ ├── kw1d4x_resnet18.json
│ ├── kw1x_resnet18.json
│ ├── kw2x_resnet18.json
│ ├── kw4x_resnet18.json
│ └── resnet18.json
└── resnet50
│ ├── kw1d2x_resnet50.json
│ ├── kw1x_resnet50.json
│ ├── kw4x_resnet50.json
│ └── resnet50.json
├── datasets.py
├── detection
├── README.md
├── configs
│ ├── _base_
│ │ └── models
│ │ │ ├── mask_rcnn_convnext_fpn.py
│ │ │ ├── mask_rcnn_mobilenetv2_fpn.py
│ │ │ └── mask_rcnn_r50_fpn.py
│ └── kernelwarehouse
│ │ ├── convnext_tiny
│ │ ├── mask_rcnn_convnext_tiny_adamw_1x_coco.py
│ │ └── mask_rcnn_kw1x_convnext_tiny_adamw_1x_coco.py
│ │ ├── mobilenetv2
│ │ ├── mask_rcnn_kw1x_mobilenetv2_100_adamw_1x_coco.py
│ │ ├── mask_rcnn_kw4x_mobilenetv2_100_adamw_1x_coco.py
│ │ └── mask_rcnn_mobilenetv2_100_adamw_1x_coco.py
│ │ └── resnet50
│ │ ├── mask_rcnn_kw1x_resnet50_adamw_1x_coco.py
│ │ ├── mask_rcnn_kw4x_resnet50_adamw_1x_coco.py
│ │ └── mask_rcnn_resnet50_adamw_1x_coco.py
├── mmcv_custom
│ ├── __init__.py
│ ├── customized_text.py
│ ├── layer_decay_optimizer_constructor.py
│ └── runner
│ │ └── checkpoint.py
└── mmdet
│ └── models
│ └── backbones
│ ├── __init__.py
│ ├── convnext.py
│ ├── kernel_warehouse.py
│ ├── kw_convnext.py
│ ├── kw_mobilenetv2.py
│ ├── kw_resnet.py
│ ├── mobilenetv2.py
│ └── resnet.py
├── engine.py
├── fig
├── Fig_Architecture.pdf
└── Fig_Architecture.png
├── main.py
├── models
├── __init__.py
├── convnext.py
├── kw1d2x_mobilenetv2.py
├── kw_convnext.py
├── kw_mobilenetv2.py
├── kw_resnet.py
├── mobilenetv2.py
└── resnet.py
├── modules
└── kernel_warehouse.py
├── optim_factory.py
└── utils.py
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 |
3 | Version 2.0, January 2004
4 | http://www.apache.org/licenses/
5 |
6 | Copyright (c) 2024 OSVAI/KernelWarehouse
7 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
8 |
9 | 1. Definitions.
10 |
11 | "License" shall mean the terms and conditions for use, reproduction,
12 | and distribution as defined by Sections 1 through 9 of this document.
13 |
14 | "Licensor" shall mean the copyright owner or entity authorized by
15 | the copyright owner that is granting the License.
16 |
17 | "Legal Entity" shall mean the union of the acting entity and all
18 | other entities that control, are controlled by, or are under common
19 | control with that entity. For the purposes of this definition,
20 | "control" means (i) the power, direct or indirect, to cause the
21 | direction or management of such entity, whether by contract or
22 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
23 | outstanding shares, or (iii) beneficial ownership of such entity.
24 |
25 | "You" (or "Your") shall mean an individual or Legal Entity
26 | exercising permissions granted by this License.
27 |
28 | "Source" form shall mean the preferred form for making modifications,
29 | including but not limited to software source code, documentation
30 | source, and configuration files.
31 |
32 | "Object" form shall mean any form resulting from mechanical
33 | transformation or translation of a Source form, including but
34 | not limited to compiled object code, generated documentation,
35 | and conversions to other media types.
36 |
37 | "Work" shall mean the work of authorship, whether in Source or
38 | Object form, made available under the License, as indicated by a
39 | copyright notice that is included in or attached to the work
40 | (an example is provided in the Appendix below).
41 |
42 | "Derivative Works" shall mean any work, whether in Source or Object
43 | form, that is based on (or derived from) the Work and for which the
44 | editorial revisions, annotations, elaborations, or other modifications
45 | represent, as a whole, an original work of authorship. For the purposes
46 | of this License, Derivative Works shall not include works that remain
47 | separable from, or merely link (or bind by name) to the interfaces of,
48 | the Work and Derivative Works thereof.
49 |
50 | "Contribution" shall mean any work of authorship, including
51 | the original version of the Work and any modifications or additions
52 | to that Work or Derivative Works thereof, that is intentionally
53 | submitted to Licensor for inclusion in the Work by the copyright owner
54 | or by an individual or Legal Entity authorized to submit on behalf of
55 | the copyright owner. For the purposes of this definition, "submitted"
56 | means any form of electronic, verbal, or written communication sent
57 | to the Licensor or its representatives, including but not limited to
58 | communication on electronic mailing lists, source code control systems,
59 | and issue tracking systems that are managed by, or on behalf of, the
60 | Licensor for the purpose of discussing and improving the Work, but
61 | excluding communication that is conspicuously marked or otherwise
62 | designated in writing by the copyright owner as "Not a Contribution."
63 |
64 | "Contributor" shall mean Licensor and any individual or Legal Entity
65 | on behalf of whom a Contribution has been received by Licensor and
66 | subsequently incorporated within the Work.
67 |
68 | 2. Grant of Copyright License. Subject to the terms and conditions of
69 | this License, each Contributor hereby grants to You a perpetual,
70 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
71 | copyright license to reproduce, prepare Derivative Works of,
72 | publicly display, publicly perform, sublicense, and distribute the
73 | Work and such Derivative Works in Source or Object form.
74 |
75 | 3. Grant of Patent License. Subject to the terms and conditions of
76 | this License, each Contributor hereby grants to You a perpetual,
77 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
78 | (except as stated in this section) patent license to make, have made,
79 | use, offer to sell, sell, import, and otherwise transfer the Work,
80 | where such license applies only to those patent claims licensable
81 | by such Contributor that are necessarily infringed by their
82 | Contribution(s) alone or by combination of their Contribution(s)
83 | with the Work to which such Contribution(s) was submitted. If You
84 | institute patent litigation against any entity (including a
85 | cross-claim or counterclaim in a lawsuit) alleging that the Work
86 | or a Contribution incorporated within the Work constitutes direct
87 | or contributory patent infringement, then any patent licenses
88 | granted to You under this License for that Work shall terminate
89 | as of the date such litigation is filed.
90 |
91 | 4. Redistribution. You may reproduce and distribute copies of the
92 | Work or Derivative Works thereof in any medium, with or without
93 | modifications, and in Source or Object form, provided that You
94 | meet the following conditions:
95 |
96 | (a) You must give any other recipients of the Work or
97 | Derivative Works a copy of this License; and
98 |
99 | (b) You must cause any modified files to carry prominent notices
100 | stating that You changed the files; and
101 |
102 | (c) You must retain, in the Source form of any Derivative Works
103 | that You distribute, all copyright, patent, trademark, and
104 | attribution notices from the Source form of the Work,
105 | excluding those notices that do not pertain to any part of
106 | the Derivative Works; and
107 |
108 | (d) If the Work includes a "NOTICE" text file as part of its
109 | distribution, then any Derivative Works that You distribute must
110 | include a readable copy of the attribution notices contained
111 | within such NOTICE file, excluding those notices that do not
112 | pertain to any part of the Derivative Works, in at least one
113 | of the following places: within a NOTICE text file distributed
114 | as part of the Derivative Works; within the Source form or
115 | documentation, if provided along with the Derivative Works; or,
116 | within a display generated by the Derivative Works, if and
117 | wherever such third-party notices normally appear. The contents
118 | of the NOTICE file are for informational purposes only and
119 | do not modify the License. You may add Your own attribution
120 | notices within Derivative Works that You distribute, alongside
121 | or as an addendum to the NOTICE text from the Work, provided
122 | that such additional attribution notices cannot be construed
123 | as modifying the License.
124 |
125 | You may add Your own copyright statement to Your modifications and
126 | may provide additional or different license terms and conditions
127 | for use, reproduction, or distribution of Your modifications, or
128 | for any such Derivative Works as a whole, provided Your use,
129 | reproduction, and distribution of the Work otherwise complies with
130 | the conditions stated in this License.
131 |
132 | 5. Submission of Contributions. Unless You explicitly state otherwise,
133 | any Contribution intentionally submitted for inclusion in the Work
134 | by You to the Licensor shall be under the terms and conditions of
135 | this License, without any additional terms or conditions.
136 | Notwithstanding the above, nothing herein shall supersede or modify
137 | the terms of any separate license agreement you may have executed
138 | with Licensor regarding such Contributions.
139 |
140 | 6. Trademarks. This License does not grant permission to use the trade
141 | names, trademarks, service marks, or product names of the Licensor,
142 | except as required for reasonable and customary use in describing the
143 | origin of the Work and reproducing the content of the NOTICE file.
144 |
145 | 7. Disclaimer of Warranty. Unless required by applicable law or
146 | agreed to in writing, Licensor provides the Work (and each
147 | Contributor provides its Contributions) on an "AS IS" BASIS,
148 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
149 | implied, including, without limitation, any warranties or conditions
150 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
151 | PARTICULAR PURPOSE. You are solely responsible for determining the
152 | appropriateness of using or redistributing the Work and assume any
153 | risks associated with Your exercise of permissions under this License.
154 |
155 | 8. Limitation of Liability. In no event and under no legal theory,
156 | whether in tort (including negligence), contract, or otherwise,
157 | unless required by applicable law (such as deliberate and grossly
158 | negligent acts) or agreed to in writing, shall any Contributor be
159 | liable to You for damages, including any direct, indirect, special,
160 | incidental, or consequential damages of any character arising as a
161 | result of this License or out of the use or inability to use the
162 | Work (including but not limited to damages for loss of goodwill,
163 | work stoppage, computer failure or malfunction, or any and all
164 | other commercial damages or losses), even if such Contributor
165 | has been advised of the possibility of such damages.
166 |
167 | 9. Accepting Warranty or Additional Liability. While redistributing
168 | the Work or Derivative Works thereof, You may choose to offer,
169 | and charge a fee for, acceptance of support, warranty, indemnity,
170 | or other liability obligations and/or rights consistent with this
171 | License. However, in accepting such obligations, You may act only
172 | on Your own behalf and on Your sole responsibility, not on behalf
173 | of any other Contributor, and only if You agree to indemnify,
174 | defend, and hold each Contributor harmless for any liability
175 | incurred by, or claims asserted against, such Contributor by reason
176 | of your accepting any such warranty or additional liability.
177 |
178 | END OF TERMS AND CONDITIONS
179 |
180 | APPENDIX: How to apply the Apache License to your work.
181 |
182 | To apply the Apache License to your work, attach the following
183 | boilerplate notice, with the fields enclosed by brackets "[]"
184 | replaced with your own identifying information. (Don't include
185 | the brackets!) The text should be enclosed in the appropriate
186 | comment syntax for the file format. We also recommend that a
187 | file or class name and description of purpose be included on the
188 | same "printed page" as the copyright notice for easier
189 | identification within third-party archives.
190 |
191 | Copyright [yyyy] [name of copyright owner]
192 |
193 | Licensed under the Apache License, Version 2.0 (the "License");
194 | you may not use this file except in compliance with the License.
195 | You may obtain a copy of the License at
196 |
197 | http://www.apache.org/licenses/LICENSE-2.0
198 |
199 | Unless required by applicable law or agreed to in writing, software
200 | distributed under the License is distributed on an "AS IS" BASIS,
201 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
202 | See the License for the specific language governing permissions and
203 | limitations under the License.
204 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # KernelWarehouse: Rethinking the Design of Dynamic Convolution
2 |
3 | By Chao Li and Anbang Yao.
4 |
5 | This repository is an official PyTorch implementation of ["KernelWarehouse: Rethinking the Design of Dynamic Convolution", KW for short](http://arxiv.org/abs/2406.07879), which has been accepted to ICML 2024.
6 |
7 | Dynamic convolution learns a linear mixture of n static kernels weighted with their input-dependent attentions, demonstrating superior performance than normal convolution. However, it increases the number of convolutional parameters by n times, and thus is not parameter efficient. This leads to no research progress that can allow researchers to explore the setting n>100 (an order of magnitude larger than the typical setting n<10) for pushing forward the performance boundary of dynamic convolution while enjoying parameter efficiency. To fill this gap, in this paper, we propose **KernelWarehouse**, a more general form of dynamic convolution, which redefines the basic concepts of "kernels", "assembling kernels" and "attention function" through the lens of exploiting convolutional parameter dependencies within the same layer and across neighboring layers of a ConvNet. We testify the effectiveness of KernelWarehouse on [ImageNet](http://www.image-net.org) and [MS-COCO](https://cocodataset.org/#home) datasets using various ConvNet architectures. Intriguingly, KernelWarehouse is also applicable to Vision Transformers, and it can even reduce the model size of a backbone while improving the model accuracy. For instance, KernelWarehouse (n=4) achieves 5.61%|3.90%|4.38% absolute top-1 accuracy gain on the ResNet18|MobileNetV2|DeiT-Tiny backbone, and KernelWarehouse (n=1/4) with 65.10% model size reduction still achieves 2.29% gain on the ResNet18 backbone.
8 |
9 |

10 |
11 | Schematic illustration of KernelWarehouse. Briefly speaking, KernelWarehouse sequentially divides the static kernel $\mathbf{W}$ at any regular convolutional layer of a ConvNet into $m$ disjoint kernel cells $\mathbf{w}_ 1, \dots, \mathbf{w}_ m$ having the same dimensions first, and then computes each kernel cell $\mathbf{w}_ i$ as a linear mixture $\mathbf{w}_ i=\alpha_{i1} \mathbf{e}_ 1+\dots+\alpha_{in}\mathbf{e}_ n$ based on a predefined "warehouse" (consisting of $n$ same dimensioned kernel cells $\mathbf{e}_ 1,\dots,\mathbf{e}_ n$ , e.g., $n=108$) which is shared to all same-stage convolutional layers, and finally replaces the static kernel $\mathbf{W}$ by assembling its corresponding $m$ mixtures in order, yielding a high degree of freedom to fit a desired convolutional parameter budget. The input-dependent scalar attentions $\alpha_{i1},\dots,\alpha_{in}$ are computed with a novel contrasting-driven attention function (CAF).
12 |
13 | ## Dataset
14 |
15 | Following [this repository](https://github.com/pytorch/examples/tree/master/imagenet#requirements),
16 |
17 | - Download the ImageNet dataset from http://www.image-net.org/.
18 | - Then, move and extract the training and validation images to labeled subfolders, using [the following script](https://github.com/pytorch/examples/blob/main/imagenet/extract_ILSVRC.sh).
19 |
20 | ## Requirements
21 |
22 | - python >= 3.7.0
23 | - torch >= 1.8.1, torchvision >= 0.9.1
24 | - timm == 0.3.2, tensorboardX, six
25 |
26 | ## Results and Models
27 |
28 | Results comparison on the ImageNet validation set with the ResNet18, ResNet50 and ConvNeXt-Tiny backbones trained for 300 epochs.
29 |
30 | | Models | Params | Top-1 Acc(%) | Top-5 Acc(%) | Google Drive | Baidu Drive |
31 | |:------------|:-------:|:------------:|:------------:|:-------------------------------------------------------------------------------------------:|:-----------------------------------------------------------------:|
32 | | ResNet18 | 11.69M | 70.44 | 89.72 | [model](https://drive.google.com/file/d/1xpAiJyjVycr6b0RyKsDTXbUUz7etA2UV/view?usp=sharing) | [model](https://pan.baidu.com/s/16IRpCY0l_uHhJd-FMWllGg?pwd=w46e) |
33 | | + KW (1/4×) | 4.08M | 72.73 | 90.83 | [model](https://drive.google.com/file/d/1DMcTWMLjiLRpY38041nLlOU0Vb_zmUh6/view?usp=sharing) | [model](https://pan.baidu.com/s/11nv7S5oH7DHkuzCXgQbiXA?pwd=marv) |
34 | | + KW (1/2×) | 7.43M | 73.33 | 91.42 | [model](https://drive.google.com/file/d/1k5tyyDHu9mqsQtiwZu7tTMPPolPOjdbT/view?usp=sharing) | [model](https://pan.baidu.com/s/1bOjjVf5Z2YdKs5d7NlPamQ?pwd=t8ax) |
35 | | + KW (1×) | 11.93M | 74.77 | 92.13 | [model](https://drive.google.com/file/d/1aLaO8QV9QYdfv5GE2tg9iDWLeEJfkrKY/view?usp=sharing) | [model](https://pan.baidu.com/s/1TPcg7iMNdA_7w2FlkZaFWg?pwd=tu3x) |
36 | | + KW (2×) | 23.24M | 75.19 | 92.18 | [model](https://drive.google.com/file/d/1UB7nrqI2mkAIY_p7I5eOcitrmKYbjtsj/view?usp=sharing) | [model](https://pan.baidu.com/s/1WODJhFpgG8IPtZDeiOCFuw?pwd=jhtg) |
37 | | + KW (4×) | 45.86M | 76.05 | 92.68 | [model](https://drive.google.com/file/d/1gl1__aDrJIMs-Zrv3IgH7mfVnwgFCLbr/view?usp=sharing) | [model](https://pan.baidu.com/s/1u1jiDOt8IkE3nakFJPRkzA?pwd=75f1) |
38 | | ResNet50 | 25.56M | 78.44 | 94.24 | [model](https://drive.google.com/file/d/1x57Lip3xt1yFYGz7k6aCTR2yhP56TtKM/view?usp=sharing) | [model](https://pan.baidu.com/s/1O3CiCq1BNssIL1L2jNV_IA?pwd=ufyh) |
39 | | + KW (1/2×) | 17.64M | 79.30 | 94.71 | [model](https://drive.google.com/file/d/1lm_TlIUX7nmyjM7HNDWb7SxO65rgyrF9/view?usp=sharing) | [model](https://pan.baidu.com/s/1TeIqjTrUzNobDr07gN1dUw?pwd=7b6j) |
40 | | + KW (1×) | 28.05M | 80.38 | 95.19 | [model](https://drive.google.com/file/d/1lA155EYydHae_m__hk86tX_m9S0i85i8/view?usp=sharing) | [model](https://pan.baidu.com/s/1moFwCD7PhZ8Y0SPHB20hbA?pwd=jp8p) |
41 | | + KW (4×) | 102.02M | 81.05 | 95.21 | [model](https://drive.google.com/file/d/1pxU1oHIXerjVwzK7eFK7AGf1XZD_caMQ/view?usp=sharing) | [model](https://pan.baidu.com/s/1h-AHOozmqF1JIanes0-kiA?pwd=xmq7) |
42 | | ConvNeXt | 28.59M | 82.07 | 95.86 | [model](https://drive.google.com/file/d/1Yz_rooa7PMwE9Bdor00Mivtk61Xas7oh/view?usp=sharing) | [model](https://pan.baidu.com/s/1WCdlXqxpNkPCqvxowQnQfg?pwd=nm6j) |
43 | | + KW (1×) | 39.37M | 82.51 | 96.07 | [model](https://drive.google.com/file/d/13wqfY3jHxcskhHRchs0ynaQyVq0qUvn-/view?usp=sharing) | [model](https://pan.baidu.com/s/1EmTlEPMZnRAeO8QgMOfSPQ?pwd=z22e) |
44 |
45 | Results comparison on the ImageNet validation set with the MobileNetV2(1.0×, 0.5×) backbones trained for 150 epochs.
46 |
47 |
48 | | Models | Params | Top-1 Acc(%) | Top-5 Acc(%) | Google Drive | Baidu Drive |
49 | |:-------------------|:------:|:------------:|:------------:|:-------------:|:-------------:|
50 | | MobileNetV2 (1.0×) | 3.50M | 72.04 | 90.42 | [model](https://drive.google.com/file/d/1t97r1FM8hX2AtCjDn7k2TsM7HY6XwQjz/view?usp=sharing) | [model](https://pan.baidu.com/s/1GD_q4gSZowvssJpUdY7wXw?pwd=bks8)|
51 | | + KW (1/2×) | 2.65M | 72.59 | 90.71 | [model](https://drive.google.com/file/d/1I8JI1CtfKtUPMygqEVdD19c3PhSZReKJ/view?usp=sharing) | [model](https://pan.baidu.com/s/1EhHTze4gqcS16UnTzdjekg?pwd=65k8)|
52 | | + KW (1×) | 5.17M | 74.68 | 91.90 | [model](https://drive.google.com/file/d/1EWiUX8qaRj1kTI1ktBNAhWnhauV5eVdk/view?usp=sharing) | [model](https://pan.baidu.com/s/1zyLMX4PpUVAL9gVICFRmiA?pwd=8rcb)|
53 | | + KW (4×) | 11.38M | 75.92 | 92.22 | [model](https://drive.google.com/file/d/1xnzx41_sj3kZbR5Fzsfsb_PK5SEINXZ4/view?usp=sharing) | [model](https://pan.baidu.com/s/1Mb3buGekUCmseHWQNwLnjQ?pwd=ncrm)|
54 | | MobileNetV2 (0.5×) | 1.97M | 64.32 | 85.22 | [model](https://drive.google.com/file/d/1-L4EgH5hFQydocXjjT9oJCFVEItsD_eU/view?usp=sharing) | [model](https://pan.baidu.com/s/1zap9BSnry1WJy0SZDt4SIw?pwd=ueqv)|
55 | | + KW (1/2×) | 1.47M | 65.20 | 85.98 | [model](https://drive.google.com/file/d/1SByM8kJjb7seeYpY8lrSrv-hUOMJWRJE/view?usp=sharing) | [model](https://pan.baidu.com/s/13mzjh203BhRSETJiaJF3cw?pwd=tdck)|
56 | | + KW (1×) | 2.85M | 68.29 | 87.93 | [model](https://drive.google.com/file/d/1KFKy05JhhMnfj-tAz2SKzNRcBVMoJa19/view?usp=sharing) | [model](https://pan.baidu.com/s/1MHW2k5IkX1NPgM1KhQL29A?pwd=dajd)|
57 | | + KW (4×) | 4.65M | 70.26 | 89.19 | [model](https://drive.google.com/file/d/1Jt94_M7JQ6RDViYN3-P-4uoA8a5_bVYE/view?usp=sharing) | [model](https://pan.baidu.com/s/1uLBpreSm9MOtjPRWcM5SjA?pwd=whz2)|
58 |
59 |
60 | ## Training
61 |
62 | To train a model with KernelWarehouse:
63 |
64 | ```shell
65 | python -m torch.distributed.launch --nproc_per_node={number of gpus} main.py --kw_config {path to config json} \
66 | --batch_size {batch size per gpu} --update_freq {number of gradient accumulation steps} --data_path {path to dataset} \
67 | --output_dir {path to output folder}
68 | ```
69 |
70 | For example, to train ResNet18 + KW (1×) on 8 GPUs with batch size of 4096:
71 |
72 | ```shell
73 | python -m torch.distributed.launch --nproc_per_node=8 main.py --kw_config configs/resnet18/kw1x_resnet18.json \
74 | --batch_size 128 --update_freq 4 --data_path {path to dataset} --output_dir {path to output folder}
75 | ```
76 |
77 | For example, to train MobileNetV2 + KW (4×) on 8 GPUs with batch size of 256:
78 |
79 | ```shell
80 | python -m torch.distributed.launch --nproc_per_node=8 main.py --kw_config configs/mobilenetv2_100/kw4x_mobilenetv2_100.json \
81 | --batch_size 32 --update_freq 1 --data_path {path to dataset} --output_dir {path to output folder}
82 | ```
83 |
84 | You can add "--use_amp true" to enable Automatic Mixed Precision to reduce memory usage and speed up training.
85 |
86 | More config files for other models can be found in [configs](configs).
87 |
88 | ## Evaluation
89 |
90 | To evaluate a pre-trained model:
91 |
92 | ```shell
93 | python -m torch.distributed.launch --nproc_per_node={number of gpus} main.py --kw_config {path to config json} \
94 | --eval true --data_path {path to dataset} --resume {path to model}
95 | ```
96 |
97 | ## Training and evaluation on object detection and instance segmentation
98 | Please refer to README.md in the folder of [detection](detection) for details.
99 |
100 | ## Citation
101 | If you find our work useful in your research, please consider citing:
102 | ```
103 | @inproceedings{li2024kernelwarehouse,
104 | title={KernelWarehouse: Rethinking the Design of Dynamic Convolution},
105 | author={Chao Li and Anbang Yao},
106 | booktitle={International Conference on Machine Learning},
107 | year={2024}
108 | }
109 | ```
110 |
111 | ## License
112 | KernelWarehouse is released under the Apache license. We encourage use for both research and commercial purposes, as long as proper attribution is given.
113 |
114 | ## Acknowledgment
115 | This repository is built based on [ConvNeXt](https://github.com/facebookresearch/ConvNeXt), [mmdetection](https://github.com/open-mmlab/mmdetection), [Dynamic-convolution-Pytorch](https://github.com/kaijieshi7/Dynamic-convolution-Pytorch), [Swin-Transformer-Object-Detection](https://github.com/SwinTransformer/Swin-Transformer-Object-Detection) repositories. We thank the authors for releasing their amazing codes.
116 |
--------------------------------------------------------------------------------
/configs/convnext/convnext_tiny.json:
--------------------------------------------------------------------------------
1 | {
2 | "model": "convnext_tiny",
3 | "drop_path": 0.1,
4 | "model_ema": true,
5 | "model_ema_eval": true
6 | }
--------------------------------------------------------------------------------
/configs/convnext/kw1x_convnext_tiny.json:
--------------------------------------------------------------------------------
1 | {
2 | "model": "kw_convnext_tiny",
3 | "reduction": 0.0625,
4 | "cell_num_ratio": 1,
5 | "cell_inplane_ratio": 1,
6 | "cell_outplane_ratio": 1,
7 | "sharing_range": "layer_pwconv",
8 | "drop_path": 0.3,
9 | "model_ema": true,
10 | "model_ema_eval": true
11 | }
12 |
--------------------------------------------------------------------------------
/configs/mobilenetv2_050/kw1d2x_mobilenetv2_050.json:
--------------------------------------------------------------------------------
1 | {
2 | "model": "kw1d2x_mobilenetv2_050",
3 | "epochs": 150,
4 | "drop_path": 0.0,
5 | "opt": "sgd",
6 | "weight_decay": 4e-5,
7 | "lr": 0.05,
8 | "warmup_epochs": 0,
9 | "color_jitter": 0,
10 | "aa": null,
11 | "smoothing": 0,
12 | "reprob": 0,
13 | "mixup": 0,
14 | "cutmix": 0,
15 | "traditional_preprocess": true,
16 | "temp_epoch": 15
17 | }
--------------------------------------------------------------------------------
/configs/mobilenetv2_050/kw1x_mobilenetv2_050.json:
--------------------------------------------------------------------------------
1 | {
2 | "model": "kw_mobilenetv2_050",
3 | "epochs": 150,
4 | "drop_path": 0.0,
5 | "opt": "sgd",
6 | "weight_decay": 4e-5,
7 | "lr": 0.05,
8 | "warmup_epochs": 0,
9 | "color_jitter": 0,
10 | "aa": null,
11 | "smoothing": 0,
12 | "reprob": 0,
13 | "mixup": 0,
14 | "cutmix": 0,
15 | "cell_num_ratio": 1,
16 | "cell_inplane_ratio": 1,
17 | "cell_outplane_ratio": 1,
18 | "sharing_range": ["layer", "pwconv"],
19 | "traditional_preprocess": true,
20 | "temp_epoch": 15
21 | }
--------------------------------------------------------------------------------
/configs/mobilenetv2_050/kw4x_mobilenetv2_050.json:
--------------------------------------------------------------------------------
1 | {
2 | "model": "kw_mobilenetv2_050",
3 | "epochs": 150,
4 | "drop_path": 0.0,
5 | "opt": "sgd",
6 | "weight_decay": 4e-5,
7 | "lr": 0.05,
8 | "warmup_epochs": 0,
9 | "color_jitter": 0,
10 | "aa": null,
11 | "reprob": 0,
12 | "mixup": 0,
13 | "cutmix": 0,
14 | "cell_num_ratio": 4,
15 | "cell_inplane_ratio": 1,
16 | "cell_outplane_ratio": 1,
17 | "sharing_range": ["layer"],
18 | "traditional_preprocess": true,
19 | "temp_epoch": 15
20 | }
--------------------------------------------------------------------------------
/configs/mobilenetv2_050/mobilenetv2_050.json:
--------------------------------------------------------------------------------
1 | {
2 | "model": "mobilenetv2_050",
3 | "epochs": 150,
4 | "drop_path": 0.0,
5 | "opt": "sgd",
6 | "weight_decay": 4e-5,
7 | "lr": 0.05,
8 | "warmup_epochs": 0,
9 | "color_jitter": 0,
10 | "aa": null,
11 | "smoothing": 0,
12 | "reprob": 0,
13 | "mixup": 0,
14 | "cutmix": 0,
15 | "traditional_preprocess": true
16 | }
--------------------------------------------------------------------------------
/configs/mobilenetv2_100/kw1d2x_mobilenetv2_100.json:
--------------------------------------------------------------------------------
1 | {
2 | "model": "kw1d2x_mobilenetv2_100",
3 | "epochs": 150,
4 | "drop_path": 0.0,
5 | "opt": "sgd",
6 | "weight_decay": 4e-5,
7 | "lr": 0.05,
8 | "warmup_epochs": 0,
9 | "color_jitter": 0,
10 | "aa": null,
11 | "smoothing": 0,
12 | "reprob": 0,
13 | "mixup": 0,
14 | "cutmix": 0,
15 | "traditional_preprocess": true,
16 | "temp_epoch": 15
17 | }
--------------------------------------------------------------------------------
/configs/mobilenetv2_100/kw1x_mobilenetv2_100.json:
--------------------------------------------------------------------------------
1 | {
2 | "model": "kw_mobilenetv2_100",
3 | "epochs": 150,
4 | "drop_path": 0.0,
5 | "opt": "sgd",
6 | "weight_decay": 4e-5,
7 | "lr": 0.05,
8 | "warmup_epochs": 0,
9 | "color_jitter": 0,
10 | "aa": null,
11 | "reprob": 0,
12 | "mixup": 0,
13 | "cutmix": 0,
14 | "cell_num_ratio": 1,
15 | "cell_inplane_ratio": 1,
16 | "cell_outplane_ratio": 1,
17 | "sharing_range": ["layer", "pwconv"],
18 | "traditional_preprocess": true,
19 | "temp_epoch": 15
20 | }
--------------------------------------------------------------------------------
/configs/mobilenetv2_100/kw4x_mobilenetv2_100.json:
--------------------------------------------------------------------------------
1 | {
2 | "model": "kw_mobilenetv2_100",
3 | "epochs": 150,
4 | "drop_path": 0.0,
5 | "opt": "sgd",
6 | "weight_decay": 4e-5,
7 | "lr": 0.05,
8 | "warmup_epochs": 0,
9 | "color_jitter": 0,
10 | "aa": null,
11 | "reprob": 0,
12 | "mixup": 0,
13 | "cutmix": 0,
14 | "cell_num_ratio": 4,
15 | "cell_inplane_ratio": 1,
16 | "cell_outplane_ratio": 1,
17 | "sharing_range": ["layer"],
18 | "traditional_preprocess": true,
19 | "temp_epoch": 15
20 | }
--------------------------------------------------------------------------------
/configs/mobilenetv2_100/mobilenetv2_100.json:
--------------------------------------------------------------------------------
1 | {
2 | "model": "mobilenetv2_100",
3 | "epochs": 150,
4 | "drop_path": 0.0,
5 | "opt": "sgd",
6 | "weight_decay": 4e-5,
7 | "lr": 0.05,
8 | "warmup_epochs": 0,
9 | "color_jitter": 0,
10 | "aa": null,
11 | "smoothing": 0,
12 | "reprob": 0,
13 | "mixup": 0,
14 | "cutmix": 0,
15 | "traditional_preprocess": true
16 | }
--------------------------------------------------------------------------------
/configs/resnet18/kw1d2x_resnet18.json:
--------------------------------------------------------------------------------
1 | {
2 | "model": "kw_resnet18",
3 | "reduction": 0.0625,
4 | "cell_num_ratio": 0.5,
5 | "cell_inplane_ratio": 0.5,
6 | "cell_outplane_ratio": 0.5,
7 | "sharing_range": "layer_conv",
8 | "drop_path": 0.0,
9 | "dropout": 0.0
10 | }
--------------------------------------------------------------------------------
/configs/resnet18/kw1d4x_resnet18.json:
--------------------------------------------------------------------------------
1 | {
2 | "model": "kw_resnet18",
3 | "reduction": 0.0625,
4 | "cell_num_ratio": 0.25,
5 | "cell_inplane_ratio": 0.5,
6 | "cell_outplane_ratio": 0.5,
7 | "sharing_range": "layer_conv",
8 | "drop_path": 0.0,
9 | "dropout": 0.0,
10 | "mixup": 0,
11 | "cutmix": 0
12 | }
--------------------------------------------------------------------------------
/configs/resnet18/kw1x_resnet18.json:
--------------------------------------------------------------------------------
1 | {
2 | "model": "kw_resnet18",
3 | "reduction": 0.0625,
4 | "cell_num_ratio": 1,
5 | "cell_inplane_ratio": 1,
6 | "cell_outplane_ratio": 1,
7 | "sharing_range": "layer_conv",
8 | "drop_path": 0.1
9 | }
--------------------------------------------------------------------------------
/configs/resnet18/kw2x_resnet18.json:
--------------------------------------------------------------------------------
1 | {
2 | "model": "kw_resnet18",
3 | "reduction": 0.0625,
4 | "cell_num_ratio": 2,
5 | "cell_inplane_ratio": 1,
6 | "cell_outplane_ratio": 1,
7 | "sharing_range": "layer_conv",
8 | "drop_path": 0.1
9 | }
--------------------------------------------------------------------------------
/configs/resnet18/kw4x_resnet18.json:
--------------------------------------------------------------------------------
1 | {
2 | "model": "kw_resnet18",
3 | "reduction": 0.0625,
4 | "cell_num_ratio": 4,
5 | "cell_inplane_ratio": 1,
6 | "cell_outplane_ratio": 1,
7 | "sharing_range": "layer_conv",
8 | "drop_path": 0.1
9 | }
--------------------------------------------------------------------------------
/configs/resnet18/resnet18.json:
--------------------------------------------------------------------------------
1 | {
2 | "model": "resnet18",
3 | "drop_path": 0.0
4 | }
--------------------------------------------------------------------------------
/configs/resnet50/kw1d2x_resnet50.json:
--------------------------------------------------------------------------------
1 | {
2 | "model": "kw_resnet50",
3 | "reduction": 0.0625,
4 | "cell_num_ratio": 0.5,
5 | "cell_inplane_ratio": 0.5,
6 | "cell_outplane_ratio": 0.5,
7 | "sharing_range": "layer_conv",
8 | "nonlocal_basis_ratio": 0.0625,
9 | "drop_path": 0.1
10 | }
--------------------------------------------------------------------------------
/configs/resnet50/kw1x_resnet50.json:
--------------------------------------------------------------------------------
1 | {
2 | "model": "kw_resnet50",
3 | "reduction": 0.0625,
4 | "cell_num_ratio": 1,
5 | "cell_inplane_ratio": 1,
6 | "cell_outplane_ratio": 1,
7 | "sharing_range": "layer_conv",
8 | "drop_path": 0.1
9 | }
--------------------------------------------------------------------------------
/configs/resnet50/kw4x_resnet50.json:
--------------------------------------------------------------------------------
1 | {
2 | "model": "kw_resnet50",
3 | "reduction": 0.0625,
4 | "cell_num_ratio": 4,
5 | "cell_inplane_ratio": 1,
6 | "cell_outplane_ratio": 1,
7 | "sharing_range": "layer_conv",
8 | "drop_path": 0.1
9 | }
--------------------------------------------------------------------------------
/configs/resnet50/resnet50.json:
--------------------------------------------------------------------------------
1 | {
2 | "model": "resnet50",
3 | "drop_path": 0.1
4 | }
--------------------------------------------------------------------------------
/datasets.py:
--------------------------------------------------------------------------------
1 | import os
2 | from torchvision import datasets, transforms
3 |
4 | from timm.data.constants import \
5 | IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, IMAGENET_INCEPTION_MEAN, IMAGENET_INCEPTION_STD
6 | from timm.data import create_transform
7 |
8 | def build_dataset(is_train, args):
9 | if args.traditional_preprocess:
10 | transform = build_transform_tradition(is_train, args)
11 | else:
12 | transform = build_transform(is_train, args)
13 |
14 | print("Transform = ")
15 | if isinstance(transform, tuple):
16 | for trans in transform:
17 | print(" - - - - - - - - - - ")
18 | for t in trans.transforms:
19 | print(t)
20 | else:
21 | for t in transform.transforms:
22 | print(t)
23 | print("---------------------------")
24 |
25 | if args.data_set == 'CIFAR':
26 | dataset = datasets.CIFAR100(args.data_path, train=is_train, transform=transform, download=True)
27 | nb_classes = 100
28 | elif args.data_set == 'IMNET':
29 | print("reading from datapath", args.data_path)
30 | root = os.path.join(args.data_path, 'train' if is_train else 'val')
31 | dataset = datasets.ImageFolder(root, transform=transform)
32 | nb_classes = 1000
33 | elif args.data_set == "image_folder":
34 | root = args.data_path if is_train else args.eval_data_path
35 | dataset = datasets.ImageFolder(root, transform=transform)
36 | nb_classes = args.nb_classes
37 | assert len(dataset.class_to_idx) == nb_classes
38 | else:
39 | raise NotImplementedError()
40 | print("Number of the class = %d" % nb_classes)
41 |
42 | return dataset, nb_classes
43 |
44 |
45 | def build_transform(is_train, args):
46 | resize_im = args.input_size > 32
47 | imagenet_default_mean_and_std = args.imagenet_default_mean_and_std
48 | mean = IMAGENET_INCEPTION_MEAN if not imagenet_default_mean_and_std else IMAGENET_DEFAULT_MEAN
49 | std = IMAGENET_INCEPTION_STD if not imagenet_default_mean_and_std else IMAGENET_DEFAULT_STD
50 |
51 | if is_train:
52 | # this should always dispatch to transforms_imagenet_train
53 | transform = create_transform(
54 | input_size=args.input_size,
55 | is_training=True,
56 | color_jitter=args.color_jitter,
57 | auto_augment=args.aa,
58 | interpolation=args.train_interpolation,
59 | re_prob=args.reprob,
60 | re_mode=args.remode,
61 | re_count=args.recount,
62 | mean=mean,
63 | std=std,
64 | )
65 | if not resize_im:
66 | transform.transforms[0] = transforms.RandomCrop(
67 | args.input_size, padding=4)
68 | return transform
69 |
70 | t = []
71 | if resize_im:
72 | # warping (no cropping) when evaluated at 384 or larger
73 | if args.input_size >= 384:
74 | t.append(
75 | transforms.Resize((args.input_size, args.input_size),
76 | interpolation=transforms.InterpolationMode.BICUBIC),
77 | )
78 | print(f"Warping {args.input_size} size input images...")
79 | else:
80 | if args.crop_pct is None:
81 | args.crop_pct = 224 / 256
82 | size = int(args.input_size / args.crop_pct)
83 | t.append(
84 | # to maintain same ratio w.r.t. 224 images
85 | transforms.Resize(size, interpolation=transforms.InterpolationMode.BICUBIC),
86 | )
87 | t.append(transforms.CenterCrop(args.input_size))
88 |
89 | t.append(transforms.ToTensor())
90 | t.append(transforms.Normalize(mean, std))
91 | return transforms.Compose(t)
92 |
93 |
94 | def build_transform_tradition(is_train, args):
95 | normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
96 | std=[0.229, 0.224, 0.225])
97 | if is_train:
98 | t = transforms.Compose([
99 | transforms.RandomResizedCrop(224),
100 | transforms.RandomHorizontalFlip(),
101 | transforms.ToTensor(),
102 | normalize,
103 | ])
104 | else:
105 | t = transforms.Compose([
106 | transforms.Resize(256),
107 | transforms.CenterCrop(224),
108 | transforms.ToTensor(),
109 | normalize,
110 | ])
111 | return t
112 |
113 |
114 |
115 |
--------------------------------------------------------------------------------
/detection/README.md:
--------------------------------------------------------------------------------
1 | # MS-COCO Object Detection with KernelWarehouse
2 |
3 | We use the popular [MMDetection](https://github.com/open-mmlab/mmdetection) toolbox for experiments on the MS-COCO dataset with the pre-trained ResNet50, MobileNetV2 (1.0×) and ConvNeXt-Tiny models as the backbones for the detector. We select the mainstream Faster RCNN and Mask R-CNN detectors with Feature Pyramid Networks as the necks to build the basic object detection systems.
4 |
5 |
6 | ## Training
7 |
8 | Please follow [Swin-Transformer-Object-Detection](https://github.com/SwinTransformer/Swin-Transformer-Object-Detection) on how to prepare the environment and the dataset. Then attach our code to the origin project and modify the config files according to your own path to the pre-trained models and directories to save logs and models.
9 |
10 | To train a detector with pre-trained models as backbone:
11 |
12 | ```shell
13 | bash tools/dist_train.sh {path to config file} {number of gpus}
14 | ```
15 |
16 | ## Evaluation
17 |
18 | To evaluate a fine-tuned model:
19 | ```shell
20 | bash tools/dist_test.sh {path to config file} {path to fine-tuned model} {number of gpus} --eval bbox segm --show
21 | ```
22 |
23 |
24 | ## Results and Models
25 |
26 | | Backbones | Detectors | box AP | mask AP | Config | Google Drive | Baidu Drive |
27 | |:------------|:-------:|:------:|:-------:|:-------------:|:-------------:|:-------------:|
28 | | ResNet50 | Mask R-CNN | 39.6 | 36.4 | [config](configs/kernelwarehouse/mask_rcnn_resnet50_adamw_1x_coco.py) | [model](https://drive.google.com/file/d/1j6wSJLett-JeVDTh7CW7CHhC4jQHDzad/view?usp=sharing) | [model](https://pan.baidu.com/s/1U7q2U0jYXjDCAVxqUMWmHw?pwd=4wih) |
29 | | + KW (1×) | Mask R-CNN | 41.8 | 38.4 | [config](configs/kernelwarehouse/resnet50/mask_rcnn_kw1x_resnet50_adamw_1x_coco.py) | [model](https://drive.google.com/file/d/1XBXKF8TU0iMFVBt-IF048hAmYTL9-spk/view?usp=sharing) | [model](https://pan.baidu.com/s/1AI01STe9v0KzAKVVPMUhog?pwd=a7ce) |
30 | | + KW (4×) | Mask R-CNN | 42.4 | 38.9 | [config](configs/kernelwarehouse/resnet50/mask_rcnn_kw4x_resnet50_adamw_1x_coco.py) | [model](https://drive.google.com/file/d/1GUDEr2jNT0Il8A04g_f8sRQ1WFAycOO8/view?usp=sharing) | [model](https://pan.baidu.com/s/1ZSJkfVy8xr5IB_OfubXzRw?pwd=xig5) |
31 | | MobileNetV2 (1.0×) | Mask R-CNN | 33.8 | 31.7 | [config](configs/kernelwarehouse/mobilenetv2/mask_rcnn_mobilenetv2_100_adamw_1x_coco.py) | [model](https://drive.google.com/file/d/1UJifIqx56cOOm2hx-D3DSHh4MWgFzOVB/view?usp=sharing) | [model](https://pan.baidu.com/s/1S7vo59mzEVL_8ai9Sg1iUQ?pwd=4sh8) |
32 | | + KW (1×) | Mask R-CNN | 36.4 | 33.7 | [config](configs/kernelwarehouse/mobilenetv2/mask_rcnn_kw1x_mobilenetv2_100_adamw_1x_coco.py) | [model](https://drive.google.com/file/d/1wdzs-Ry6LefgG4Nc9RWUlrDrsyGOWhL5/view?usp=sharing) | [model](https://pan.baidu.com/s/1q3U4Euw2qNCWXipPCn4vtQ?pwd=8g38) |
33 | | + KW (4×) | Mask R-CNN | 38.0 | 34.9 | [config](configs/kernelwarehouse/mobilenetv2/mask_rcnn_kw4x_mobilenetv2_100_adamw_1x_coco.py) | [model](https://drive.google.com/file/d/14nfWpHUHgH0mA4gbEPX3F_3UqOXPIGK7/view?usp=sharing) | [model](https://pan.baidu.com/s/1HidKe3MgnIEERvvKgdYMHg?pwd=n5uu) |
34 | | ConvNeXt-Tiny | Mask R-CNN | 43.4 | 39.7 | [config](configs/kernelwarehouse/convnext_tiny/mask_rcnn_convnext_tiny_adamw_1x_coco.py) | [model](https://drive.google.com/file/d/1xarty4OTZOKGo1ltAUcTJCoKHCIOipC6/view?usp=sharing) | [model](https://pan.baidu.com/s/1bouC_aK9C1czPrIYkkS3Ug?pwd=79f4) |
35 | | + KW (4×) | Mask R-CNN | 44.7 | 40.6 | [config](configs/kernelwarehouse/convnext_tiny/mask_rcnn_kw1x_convnext_tiny_adamw_1x_coco.py) | [model](https://drive.google.com/file/d/1simtPisVzZo__iSXZwrynWi6TlUwPG3b/view?usp=sharing) | [model](https://pan.baidu.com/s/1iBD4lCrvSTX0Wu7e2I0BKg?pwd=am2w) |
--------------------------------------------------------------------------------
/detection/configs/_base_/models/mask_rcnn_convnext_fpn.py:
--------------------------------------------------------------------------------
1 | model = dict(
2 | type='MaskRCNN',
3 | backbone=dict(
4 | type='ConvNeXt',
5 | in_chans=3,
6 | depths=[3, 3, 9, 3],
7 | dims=[96, 192, 384, 768],
8 | drop_path_rate=0.2,
9 | layer_scale_init_value=1e-6,
10 | out_indices=[0, 1, 2, 3],
11 | ),
12 | neck=dict(
13 | type='FPN',
14 | in_channels=[256, 512, 1024, 2048],
15 | out_channels=256,
16 | num_outs=5),
17 | rpn_head=dict(
18 | type='RPNHead',
19 | in_channels=256,
20 | feat_channels=256,
21 | anchor_generator=dict(
22 | type='AnchorGenerator',
23 | scales=[8],
24 | ratios=[0.5, 1.0, 2.0],
25 | strides=[4, 8, 16, 32, 64]),
26 | bbox_coder=dict(
27 | type='DeltaXYWHBBoxCoder',
28 | target_means=[.0, .0, .0, .0],
29 | target_stds=[1.0, 1.0, 1.0, 1.0]),
30 | loss_cls=dict(
31 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
32 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
33 | roi_head=dict(
34 | type='StandardRoIHead',
35 | bbox_roi_extractor=dict(
36 | type='SingleRoIExtractor',
37 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
38 | out_channels=256,
39 | featmap_strides=[4, 8, 16, 32]),
40 | bbox_head=dict(
41 | type='Shared2FCBBoxHead',
42 | in_channels=256,
43 | fc_out_channels=1024,
44 | roi_feat_size=7,
45 | num_classes=80,
46 | bbox_coder=dict(
47 | type='DeltaXYWHBBoxCoder',
48 | target_means=[0., 0., 0., 0.],
49 | target_stds=[0.1, 0.1, 0.2, 0.2]),
50 | reg_class_agnostic=False,
51 | loss_cls=dict(
52 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
53 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
54 | mask_roi_extractor=dict(
55 | type='SingleRoIExtractor',
56 | roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
57 | out_channels=256,
58 | featmap_strides=[4, 8, 16, 32]),
59 | mask_head=dict(
60 | type='FCNMaskHead',
61 | num_convs=4,
62 | in_channels=256,
63 | conv_out_channels=256,
64 | num_classes=80,
65 | loss_mask=dict(
66 | type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
67 | # model training and testing settings
68 | train_cfg=dict(
69 | rpn=dict(
70 | assigner=dict(
71 | type='MaxIoUAssigner',
72 | pos_iou_thr=0.7,
73 | neg_iou_thr=0.3,
74 | min_pos_iou=0.3,
75 | match_low_quality=True,
76 | ignore_iof_thr=-1),
77 | sampler=dict(
78 | type='RandomSampler',
79 | num=256,
80 | pos_fraction=0.5,
81 | neg_pos_ub=-1,
82 | add_gt_as_proposals=False),
83 | allowed_border=-1,
84 | pos_weight=-1,
85 | debug=False),
86 | rpn_proposal=dict(
87 | nms_pre=2000,
88 | max_per_img=1000,
89 | nms=dict(type='nms', iou_threshold=0.7),
90 | min_bbox_size=0),
91 | rcnn=dict(
92 | assigner=dict(
93 | type='MaxIoUAssigner',
94 | pos_iou_thr=0.5,
95 | neg_iou_thr=0.5,
96 | min_pos_iou=0.5,
97 | match_low_quality=True,
98 | ignore_iof_thr=-1),
99 | sampler=dict(
100 | type='RandomSampler',
101 | num=512,
102 | pos_fraction=0.25,
103 | neg_pos_ub=-1,
104 | add_gt_as_proposals=True),
105 | mask_size=28,
106 | pos_weight=-1,
107 | debug=False)),
108 | test_cfg=dict(
109 | rpn=dict(
110 | nms_pre=1000,
111 | max_per_img=1000,
112 | nms=dict(type='nms', iou_threshold=0.7),
113 | min_bbox_size=0),
114 | rcnn=dict(
115 | score_thr=0.05,
116 | nms=dict(type='nms', iou_threshold=0.5),
117 | max_per_img=100,
118 | mask_thr_binary=0.5)))
--------------------------------------------------------------------------------
/detection/configs/_base_/models/mask_rcnn_mobilenetv2_fpn.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | model = dict(
3 | type='MaskRCNN',
4 | backbone=dict(
5 | type='MobileNetV2',
6 | width_mult=1.0,
7 | norm_eval=True,
8 | out_indices=(1, 2, 3, 4),
9 | frozen_stages=1),
10 | neck=dict(
11 | type='FPN',
12 | in_channels=[24, 32, 96, 1280],
13 | out_channels=256,
14 | num_outs=5),
15 | rpn_head=dict(
16 | type='RPNHead',
17 | in_channels=256,
18 | feat_channels=256,
19 | anchor_generator=dict(
20 | type='AnchorGenerator',
21 | scales=[8],
22 | ratios=[0.5, 1.0, 2.0],
23 | strides=[4, 8, 16, 32, 64]),
24 | bbox_coder=dict(
25 | type='DeltaXYWHBBoxCoder',
26 | target_means=[.0, .0, .0, .0],
27 | target_stds=[1.0, 1.0, 1.0, 1.0]),
28 | loss_cls=dict(
29 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
30 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
31 | roi_head=dict(
32 | type='StandardRoIHead',
33 | bbox_roi_extractor=dict(
34 | type='SingleRoIExtractor',
35 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
36 | out_channels=256,
37 | featmap_strides=[4, 8, 16, 32]),
38 | bbox_head=dict(
39 | type='Shared2FCBBoxHead',
40 | in_channels=256,
41 | fc_out_channels=1024,
42 | roi_feat_size=7,
43 | num_classes=80,
44 | bbox_coder=dict(
45 | type='DeltaXYWHBBoxCoder',
46 | target_means=[0., 0., 0., 0.],
47 | target_stds=[0.1, 0.1, 0.2, 0.2]),
48 | reg_class_agnostic=False,
49 | loss_cls=dict(
50 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
51 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
52 | mask_roi_extractor=dict(
53 | type='SingleRoIExtractor',
54 | roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
55 | out_channels=256,
56 | featmap_strides=[4, 8, 16, 32]),
57 | mask_head=dict(
58 | type='FCNMaskHead',
59 | num_convs=4,
60 | in_channels=256,
61 | conv_out_channels=256,
62 | num_classes=80,
63 | loss_mask=dict(
64 | type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
65 | # model training and testing settings
66 | train_cfg=dict(
67 | rpn=dict(
68 | assigner=dict(
69 | type='MaxIoUAssigner',
70 | pos_iou_thr=0.7,
71 | neg_iou_thr=0.3,
72 | min_pos_iou=0.3,
73 | match_low_quality=True,
74 | ignore_iof_thr=-1),
75 | sampler=dict(
76 | type='RandomSampler',
77 | num=256,
78 | pos_fraction=0.5,
79 | neg_pos_ub=-1,
80 | add_gt_as_proposals=False),
81 | allowed_border=-1,
82 | pos_weight=-1,
83 | debug=False),
84 | rpn_proposal=dict(
85 | nms_pre=2000,
86 | max_per_img=1000,
87 | nms=dict(type='nms', iou_threshold=0.7),
88 | min_bbox_size=0),
89 | rcnn=dict(
90 | assigner=dict(
91 | type='MaxIoUAssigner',
92 | pos_iou_thr=0.5,
93 | neg_iou_thr=0.5,
94 | min_pos_iou=0.5,
95 | match_low_quality=True,
96 | ignore_iof_thr=-1),
97 | sampler=dict(
98 | type='RandomSampler',
99 | num=512,
100 | pos_fraction=0.25,
101 | neg_pos_ub=-1,
102 | add_gt_as_proposals=True),
103 | mask_size=28,
104 | pos_weight=-1,
105 | debug=False)),
106 | test_cfg=dict(
107 | rpn=dict(
108 | nms_pre=1000,
109 | max_per_img=1000,
110 | nms=dict(type='nms', iou_threshold=0.7),
111 | min_bbox_size=0),
112 | rcnn=dict(
113 | score_thr=0.05,
114 | nms=dict(type='nms', iou_threshold=0.5),
115 | max_per_img=100,
116 | mask_thr_binary=0.5)))
117 |
--------------------------------------------------------------------------------
/detection/configs/_base_/models/mask_rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
1 | model = dict(
2 | type='MaskRCNN',
3 | backbone=dict(
4 | type='ResNet',
5 | depth=50,
6 | num_stages=4,
7 | out_indices=(0, 1, 2, 3),
8 | frozen_stages=1,
9 | norm_eval=True,),
10 | neck=dict(
11 | type='FPN',
12 | in_channels=[256, 512, 1024, 2048],
13 | out_channels=256,
14 | num_outs=5),
15 | rpn_head=dict(
16 | type='RPNHead',
17 | in_channels=256,
18 | feat_channels=256,
19 | anchor_generator=dict(
20 | type='AnchorGenerator',
21 | scales=[8],
22 | ratios=[0.5, 1.0, 2.0],
23 | strides=[4, 8, 16, 32, 64]),
24 | bbox_coder=dict(
25 | type='DeltaXYWHBBoxCoder',
26 | target_means=[.0, .0, .0, .0],
27 | target_stds=[1.0, 1.0, 1.0, 1.0]),
28 | loss_cls=dict(
29 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
30 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
31 | roi_head=dict(
32 | type='StandardRoIHead',
33 | bbox_roi_extractor=dict(
34 | type='SingleRoIExtractor',
35 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
36 | out_channels=256,
37 | featmap_strides=[4, 8, 16, 32]),
38 | bbox_head=dict(
39 | type='Shared2FCBBoxHead',
40 | in_channels=256,
41 | fc_out_channels=1024,
42 | roi_feat_size=7,
43 | num_classes=80,
44 | bbox_coder=dict(
45 | type='DeltaXYWHBBoxCoder',
46 | target_means=[0., 0., 0., 0.],
47 | target_stds=[0.1, 0.1, 0.2, 0.2]),
48 | reg_class_agnostic=False,
49 | loss_cls=dict(
50 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
51 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
52 | mask_roi_extractor=dict(
53 | type='SingleRoIExtractor',
54 | roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
55 | out_channels=256,
56 | featmap_strides=[4, 8, 16, 32]),
57 | mask_head=dict(
58 | type='FCNMaskHead',
59 | num_convs=4,
60 | in_channels=256,
61 | conv_out_channels=256,
62 | num_classes=80,
63 | loss_mask=dict(
64 | type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
65 | # model training and testing settings
66 | train_cfg=dict(
67 | rpn=dict(
68 | assigner=dict(
69 | type='MaxIoUAssigner',
70 | pos_iou_thr=0.7,
71 | neg_iou_thr=0.3,
72 | min_pos_iou=0.3,
73 | match_low_quality=True,
74 | ignore_iof_thr=-1),
75 | sampler=dict(
76 | type='RandomSampler',
77 | num=256,
78 | pos_fraction=0.5,
79 | neg_pos_ub=-1,
80 | add_gt_as_proposals=False),
81 | allowed_border=-1,
82 | pos_weight=-1,
83 | debug=False),
84 | rpn_proposal=dict(
85 | nms_pre=2000,
86 | max_per_img=1000,
87 | nms=dict(type='nms', iou_threshold=0.7),
88 | min_bbox_size=0),
89 | rcnn=dict(
90 | assigner=dict(
91 | type='MaxIoUAssigner',
92 | pos_iou_thr=0.5,
93 | neg_iou_thr=0.5,
94 | min_pos_iou=0.5,
95 | match_low_quality=True,
96 | ignore_iof_thr=-1),
97 | sampler=dict(
98 | type='RandomSampler',
99 | num=512,
100 | pos_fraction=0.25,
101 | neg_pos_ub=-1,
102 | add_gt_as_proposals=True),
103 | mask_size=28,
104 | pos_weight=-1,
105 | debug=False)),
106 | test_cfg=dict(
107 | rpn=dict(
108 | nms_pre=1000,
109 | max_per_img=1000,
110 | nms=dict(type='nms', iou_threshold=0.7),
111 | min_bbox_size=0),
112 | rcnn=dict(
113 | score_thr=0.05,
114 | nms=dict(type='nms', iou_threshold=0.5),
115 | max_per_img=100,
116 | mask_thr_binary=0.5)))
117 |
--------------------------------------------------------------------------------
/detection/configs/kernelwarehouse/convnext_tiny/mask_rcnn_convnext_tiny_adamw_1x_coco.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | '../../_base_/models/mask_rcnn_convnext_fpn.py',
3 | '../../_base_/datasets/coco_instance.py',
4 | '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py'
5 | ]
6 |
7 | model = dict(
8 | pretrained='the path to pre-trained model',
9 | backbone=dict(
10 | type='ConvNeXt',
11 | in_chans=3,
12 | depths=[3, 3, 9, 3],
13 | dims=[96, 192, 384, 768],
14 | drop_path_rate=0.4,
15 | layer_scale_init_value=1.0,
16 | out_indices=[0, 1, 2, 3],
17 | ),
18 | neck=dict(in_channels=[96, 192, 384, 768]))
19 |
20 | img_norm_cfg = dict(
21 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
22 |
23 | # augmentation strategy originates from DETR / Sparse RCNN
24 | train_pipeline = [
25 | dict(type='LoadImageFromFile'),
26 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
27 | dict(type='RandomFlip', flip_ratio=0.5),
28 | dict(type='AutoAugment',
29 | policies=[
30 | [
31 | dict(type='Resize',
32 | img_scale=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
33 | (608, 1333), (640, 1333), (672, 1333), (704, 1333),
34 | (736, 1333), (768, 1333), (800, 1333)],
35 | multiscale_mode='value',
36 | keep_ratio=True)
37 | ],
38 | [
39 | dict(type='Resize',
40 | img_scale=[(400, 1333), (500, 1333), (600, 1333)],
41 | multiscale_mode='value',
42 | keep_ratio=True),
43 | dict(type='RandomCrop',
44 | crop_type='absolute_range',
45 | crop_size=(384, 600),
46 | allow_negative_crop=True),
47 | dict(type='Resize',
48 | img_scale=[(480, 1333), (512, 1333), (544, 1333),
49 | (576, 1333), (608, 1333), (640, 1333),
50 | (672, 1333), (704, 1333), (736, 1333),
51 | (768, 1333), (800, 1333)],
52 | multiscale_mode='value',
53 | override=True,
54 | keep_ratio=True)
55 | ]
56 | ]),
57 | dict(type='Normalize', **img_norm_cfg),
58 | dict(type='Pad', size_divisor=32),
59 | dict(type='DefaultFormatBundle'),
60 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
61 | ]
62 | data = dict(train=dict(pipeline=train_pipeline))
63 |
64 | optimizer = dict(constructor='LearningRateDecayOptimizerConstructor', _delete_=True, type='AdamW',
65 | lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05,
66 | paramwise_cfg={'decay_rate': 0.95,
67 | 'decay_type': 'layer_wise',
68 | 'num_layers': 6})
69 |
70 | lr_config = dict(step=[9, 11])
71 | runner = dict(type='EpochBasedRunner', max_epochs=12)
--------------------------------------------------------------------------------
/detection/configs/kernelwarehouse/convnext_tiny/mask_rcnn_kw1x_convnext_tiny_adamw_1x_coco.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | '../../_base_/models/mask_rcnn_convnext_fpn.py',
3 | '../../_base_/datasets/coco_instance.py',
4 | '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py'
5 | ]
6 |
7 | model = dict(
8 | pretrained='the path to pre-trained model',
9 | backbone=dict(
10 | type='KW_ConvNeXt',
11 | in_chans=3,
12 | depths=[3, 3, 9, 3],
13 | dims=[96, 192, 384, 768],
14 | drop_path_rate=0.4,
15 | layer_scale_init_value=1.0,
16 | out_indices=[0, 1, 2, 3],
17 | ),
18 | neck=dict(in_channels=[96, 192, 384, 768]))
19 |
20 | img_norm_cfg = dict(
21 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
22 |
23 | # augmentation strategy originates from DETR / Sparse RCNN
24 | train_pipeline = [
25 | dict(type='LoadImageFromFile'),
26 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
27 | dict(type='RandomFlip', flip_ratio=0.5),
28 | dict(type='AutoAugment',
29 | policies=[
30 | [
31 | dict(type='Resize',
32 | img_scale=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
33 | (608, 1333), (640, 1333), (672, 1333), (704, 1333),
34 | (736, 1333), (768, 1333), (800, 1333)],
35 | multiscale_mode='value',
36 | keep_ratio=True)
37 | ],
38 | [
39 | dict(type='Resize',
40 | img_scale=[(400, 1333), (500, 1333), (600, 1333)],
41 | multiscale_mode='value',
42 | keep_ratio=True),
43 | dict(type='RandomCrop',
44 | crop_type='absolute_range',
45 | crop_size=(384, 600),
46 | allow_negative_crop=True),
47 | dict(type='Resize',
48 | img_scale=[(480, 1333), (512, 1333), (544, 1333),
49 | (576, 1333), (608, 1333), (640, 1333),
50 | (672, 1333), (704, 1333), (736, 1333),
51 | (768, 1333), (800, 1333)],
52 | multiscale_mode='value',
53 | override=True,
54 | keep_ratio=True)
55 | ]
56 | ]),
57 | dict(type='Normalize', **img_norm_cfg),
58 | dict(type='Pad', size_divisor=32),
59 | dict(type='DefaultFormatBundle'),
60 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
61 | ]
62 | data = dict(train=dict(pipeline=train_pipeline))
63 |
64 | optimizer = dict(constructor='LearningRateDecayOptimizerConstructor', _delete_=True, type='AdamW',
65 | lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05,
66 | paramwise_cfg={'decay_rate': 0.95,
67 | 'decay_type': 'layer_wise',
68 | 'num_layers': 6})
69 |
70 | lr_config = dict(step=[9, 11])
71 | runner = dict(type='EpochBasedRunner', max_epochs=12)
72 |
--------------------------------------------------------------------------------
/detection/configs/kernelwarehouse/mobilenetv2/mask_rcnn_kw1x_mobilenetv2_100_adamw_1x_coco.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | '../../_base_/models/mask_rcnn_mobilenetv2_fpn.py',
3 | '../../_base_/datasets/coco_instance.py',
4 | '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py'
5 | ]
6 |
7 | model = dict(
8 | pretrained='the path to pre-trained model',
9 | backbone=dict(
10 | type='KW_MobileNetV2',
11 | width_mult=1.0,
12 | norm_eval=True,
13 | out_indices=(1, 2, 3, 4),
14 | frozen_stages=1,
15 | cell_num_ratio=1,
16 | sharing_range=('layer', 'pwconv')
17 | ),
18 | neck=dict(
19 | type='FPN',
20 | in_channels=[24, 32, 96, 1280],
21 | out_channels=256,
22 | num_outs=5
23 | ),
24 | )
25 |
26 | img_norm_cfg = dict(
27 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
28 |
29 | # augmentation strategy originates from DETR / Sparse RCNN
30 | train_pipeline = [
31 | dict(type='LoadImageFromFile'),
32 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
33 | dict(type='RandomFlip', flip_ratio=0.5),
34 | dict(type='AutoAugment',
35 | policies=[
36 | [
37 | dict(type='Resize',
38 | img_scale=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
39 | (608, 1333), (640, 1333), (672, 1333), (704, 1333),
40 | (736, 1333), (768, 1333), (800, 1333)],
41 | multiscale_mode='value',
42 | keep_ratio=True)
43 | ],
44 | [
45 | dict(type='Resize',
46 | img_scale=[(400, 1333), (500, 1333), (600, 1333)],
47 | multiscale_mode='value',
48 | keep_ratio=True),
49 | dict(type='RandomCrop',
50 | crop_type='absolute_range',
51 | crop_size=(384, 600),
52 | allow_negative_crop=True),
53 | dict(type='Resize',
54 | img_scale=[(480, 1333), (512, 1333), (544, 1333),
55 | (576, 1333), (608, 1333), (640, 1333),
56 | (672, 1333), (704, 1333), (736, 1333),
57 | (768, 1333), (800, 1333)],
58 | multiscale_mode='value',
59 | override=True,
60 | keep_ratio=True)
61 | ]
62 | ]),
63 | dict(type='Normalize', **img_norm_cfg),
64 | dict(type='Pad', size_divisor=32),
65 | dict(type='DefaultFormatBundle'),
66 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
67 | ]
68 | data = dict(train=dict(pipeline=train_pipeline))
69 |
70 | optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05,
71 | paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.),
72 | 'relative_position_bias_table': dict(decay_mult=0.),
73 | 'norm': dict(decay_mult=0.)}))
74 |
75 | lr_config = dict(step=[8, 11])
76 | runner = dict(type='EpochBasedRunner', max_epochs=12)
77 | find_unused_parameters = True
78 |
79 |
80 |
--------------------------------------------------------------------------------
/detection/configs/kernelwarehouse/mobilenetv2/mask_rcnn_kw4x_mobilenetv2_100_adamw_1x_coco.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | '../../_base_/models/mask_rcnn_mobilenetv2_fpn.py',
3 | '../../_base_/datasets/coco_instance.py',
4 | '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py'
5 | ]
6 |
7 | model = dict(
8 | pretrained='the path to pre-trained model',
9 | backbone=dict(
10 | type='KW_MobileNetV2',
11 | width_mult=1.0,
12 | norm_eval=True,
13 | out_indices=(1, 2, 3, 4),
14 | frozen_stages=1,
15 | cell_num_ratio=4,
16 | sharing_range=('layer')
17 | ),
18 | neck=dict(
19 | type='FPN',
20 | in_channels=[24, 32, 96, 1280],
21 | out_channels=256,
22 | num_outs=5
23 | ),
24 | )
25 |
26 | img_norm_cfg = dict(
27 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
28 |
29 | # augmentation strategy originates from DETR / Sparse RCNN
30 | train_pipeline = [
31 | dict(type='LoadImageFromFile'),
32 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
33 | dict(type='RandomFlip', flip_ratio=0.5),
34 | dict(type='AutoAugment',
35 | policies=[
36 | [
37 | dict(type='Resize',
38 | img_scale=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
39 | (608, 1333), (640, 1333), (672, 1333), (704, 1333),
40 | (736, 1333), (768, 1333), (800, 1333)],
41 | multiscale_mode='value',
42 | keep_ratio=True)
43 | ],
44 | [
45 | dict(type='Resize',
46 | img_scale=[(400, 1333), (500, 1333), (600, 1333)],
47 | multiscale_mode='value',
48 | keep_ratio=True),
49 | dict(type='RandomCrop',
50 | crop_type='absolute_range',
51 | crop_size=(384, 600),
52 | allow_negative_crop=True),
53 | dict(type='Resize',
54 | img_scale=[(480, 1333), (512, 1333), (544, 1333),
55 | (576, 1333), (608, 1333), (640, 1333),
56 | (672, 1333), (704, 1333), (736, 1333),
57 | (768, 1333), (800, 1333)],
58 | multiscale_mode='value',
59 | override=True,
60 | keep_ratio=True)
61 | ]
62 | ]),
63 | dict(type='Normalize', **img_norm_cfg),
64 | dict(type='Pad', size_divisor=32),
65 | dict(type='DefaultFormatBundle'),
66 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
67 | ]
68 | data = dict(train=dict(pipeline=train_pipeline))
69 |
70 | optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05,
71 | paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.),
72 | 'relative_position_bias_table': dict(decay_mult=0.),
73 | 'norm': dict(decay_mult=0.)}))
74 |
75 | lr_config = dict(step=[8, 11])
76 | runner = dict(type='EpochBasedRunner', max_epochs=12)
77 | find_unused_parameters = True
78 |
--------------------------------------------------------------------------------
/detection/configs/kernelwarehouse/mobilenetv2/mask_rcnn_mobilenetv2_100_adamw_1x_coco.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | '../../_base_/models/mask_rcnn_mobilenetv2_fpn.py',
3 | '../../_base_/datasets/coco_instance.py',
4 | '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py'
5 | ]
6 |
7 | model = dict(
8 | pretrained='the path to pre-trained model',
9 | backbone=dict(
10 | type='MobileNetV2',
11 | width_mult=1.0,
12 | norm_eval=True,
13 | out_indices=(1, 2, 3, 4),
14 | frozen_stages=1,
15 | ),
16 | neck=dict(
17 | type='FPN',
18 | in_channels=[24, 32, 96, 1280],
19 | out_channels=256,
20 | num_outs=5
21 | ),
22 | )
23 |
24 | img_norm_cfg = dict(
25 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
26 |
27 | # augmentation strategy originates from DETR / Sparse RCNN
28 | train_pipeline = [
29 | dict(type='LoadImageFromFile'),
30 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
31 | dict(type='RandomFlip', flip_ratio=0.5),
32 | dict(type='AutoAugment',
33 | policies=[
34 | [
35 | dict(type='Resize',
36 | img_scale=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
37 | (608, 1333), (640, 1333), (672, 1333), (704, 1333),
38 | (736, 1333), (768, 1333), (800, 1333)],
39 | multiscale_mode='value',
40 | keep_ratio=True)
41 | ],
42 | [
43 | dict(type='Resize',
44 | img_scale=[(400, 1333), (500, 1333), (600, 1333)],
45 | multiscale_mode='value',
46 | keep_ratio=True),
47 | dict(type='RandomCrop',
48 | crop_type='absolute_range',
49 | crop_size=(384, 600),
50 | allow_negative_crop=True),
51 | dict(type='Resize',
52 | img_scale=[(480, 1333), (512, 1333), (544, 1333),
53 | (576, 1333), (608, 1333), (640, 1333),
54 | (672, 1333), (704, 1333), (736, 1333),
55 | (768, 1333), (800, 1333)],
56 | multiscale_mode='value',
57 | override=True,
58 | keep_ratio=True)
59 | ]
60 | ]),
61 | dict(type='Normalize', **img_norm_cfg),
62 | dict(type='Pad', size_divisor=32),
63 | dict(type='DefaultFormatBundle'),
64 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
65 | ]
66 | data = dict(train=dict(pipeline=train_pipeline))
67 |
68 | optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05,
69 | paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.),
70 | 'relative_position_bias_table': dict(decay_mult=0.),
71 | 'norm': dict(decay_mult=0.)}))
72 |
73 | lr_config = dict(step=[8, 11])
74 | runner = dict(type='EpochBasedRunner', max_epochs=12)
75 | find_unused_parameters = True
76 |
--------------------------------------------------------------------------------
/detection/configs/kernelwarehouse/resnet50/mask_rcnn_kw1x_resnet50_adamw_1x_coco.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | '../../_base_/models/mask_rcnn_r50_fpn.py',
3 | '../../_base_/datasets/coco_instance.py',
4 | '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py'
5 | ]
6 |
7 | model = dict(
8 | pretrained='the path to pre-trained model',
9 | backbone=dict(
10 | type='KW_ResNet',
11 | depth=50,
12 | norm_eval=True,
13 | out_indices=(0, 1, 2, 3),
14 | frozen_stages=1,
15 | cell_num_ratio=1,
16 | ),
17 | neck=dict(
18 | type='FPN',
19 | in_channels=[256, 512, 1024, 2048],
20 | out_channels=256,
21 | num_outs=5
22 | ),
23 | )
24 |
25 | img_norm_cfg = dict(
26 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
27 |
28 | # augmentation strategy originates from DETR / Sparse RCNN
29 | train_pipeline = [
30 | dict(type='LoadImageFromFile'),
31 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
32 | dict(type='RandomFlip', flip_ratio=0.5),
33 | dict(type='AutoAugment',
34 | policies=[
35 | [
36 | dict(type='Resize',
37 | img_scale=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
38 | (608, 1333), (640, 1333), (672, 1333), (704, 1333),
39 | (736, 1333), (768, 1333), (800, 1333)],
40 | multiscale_mode='value',
41 | keep_ratio=True)
42 | ],
43 | [
44 | dict(type='Resize',
45 | img_scale=[(400, 1333), (500, 1333), (600, 1333)],
46 | multiscale_mode='value',
47 | keep_ratio=True),
48 | dict(type='RandomCrop',
49 | crop_type='absolute_range',
50 | crop_size=(384, 600),
51 | allow_negative_crop=True),
52 | dict(type='Resize',
53 | img_scale=[(480, 1333), (512, 1333), (544, 1333),
54 | (576, 1333), (608, 1333), (640, 1333),
55 | (672, 1333), (704, 1333), (736, 1333),
56 | (768, 1333), (800, 1333)],
57 | multiscale_mode='value',
58 | override=True,
59 | keep_ratio=True)
60 | ]
61 | ]),
62 | dict(type='Normalize', **img_norm_cfg),
63 | dict(type='Pad', size_divisor=32),
64 | dict(type='DefaultFormatBundle'),
65 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
66 | ]
67 | data = dict(train=dict(pipeline=train_pipeline))
68 |
69 | optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05,
70 | paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.),
71 | 'relative_position_bias_table': dict(decay_mult=0.),
72 | 'norm': dict(decay_mult=0.)}))
73 |
74 | lr_config = dict(step=[8, 11])
75 | runner = dict(type='EpochBasedRunner', max_epochs=12)
76 | find_unused_parameters = True
77 |
78 |
79 |
--------------------------------------------------------------------------------
/detection/configs/kernelwarehouse/resnet50/mask_rcnn_kw4x_resnet50_adamw_1x_coco.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | '../../_base_/models/mask_rcnn_r50_fpn.py',
3 | '../../_base_/datasets/coco_instance.py',
4 | '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py'
5 | ]
6 |
7 | model = dict(
8 | pretrained='the path to pre-trained model',
9 | backbone=dict(
10 | type='KW_ResNet',
11 | depth=50,
12 | norm_eval=True,
13 | out_indices=(0, 1, 2, 3),
14 | frozen_stages=1,
15 | cell_num_ratio=4,
16 | ),
17 | neck=dict(
18 | type='FPN',
19 | in_channels=[256, 512, 1024, 2048],
20 | out_channels=256,
21 | num_outs=5
22 | ),
23 | )
24 |
25 | img_norm_cfg = dict(
26 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
27 |
28 | # augmentation strategy originates from DETR / Sparse RCNN
29 | train_pipeline = [
30 | dict(type='LoadImageFromFile'),
31 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
32 | dict(type='RandomFlip', flip_ratio=0.5),
33 | dict(type='AutoAugment',
34 | policies=[
35 | [
36 | dict(type='Resize',
37 | img_scale=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
38 | (608, 1333), (640, 1333), (672, 1333), (704, 1333),
39 | (736, 1333), (768, 1333), (800, 1333)],
40 | multiscale_mode='value',
41 | keep_ratio=True)
42 | ],
43 | [
44 | dict(type='Resize',
45 | img_scale=[(400, 1333), (500, 1333), (600, 1333)],
46 | multiscale_mode='value',
47 | keep_ratio=True),
48 | dict(type='RandomCrop',
49 | crop_type='absolute_range',
50 | crop_size=(384, 600),
51 | allow_negative_crop=True),
52 | dict(type='Resize',
53 | img_scale=[(480, 1333), (512, 1333), (544, 1333),
54 | (576, 1333), (608, 1333), (640, 1333),
55 | (672, 1333), (704, 1333), (736, 1333),
56 | (768, 1333), (800, 1333)],
57 | multiscale_mode='value',
58 | override=True,
59 | keep_ratio=True)
60 | ]
61 | ]),
62 | dict(type='Normalize', **img_norm_cfg),
63 | dict(type='Pad', size_divisor=32),
64 | dict(type='DefaultFormatBundle'),
65 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
66 | ]
67 | data = dict(train=dict(pipeline=train_pipeline))
68 |
69 | optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05,
70 | paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.),
71 | 'relative_position_bias_table': dict(decay_mult=0.),
72 | 'norm': dict(decay_mult=0.)}))
73 |
74 | lr_config = dict(step=[8, 11])
75 | runner = dict(type='EpochBasedRunner', max_epochs=12)
76 | find_unused_parameters = True
77 |
--------------------------------------------------------------------------------
/detection/configs/kernelwarehouse/resnet50/mask_rcnn_resnet50_adamw_1x_coco.py:
--------------------------------------------------------------------------------
1 | _base_ = [
2 | '../../_base_/models/mask_rcnn_r50_fpn.py',
3 | '../../_base_/datasets/coco_instance.py',
4 | '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py'
5 | ]
6 |
7 | model = dict(
8 | pretrained='the path to pre-trained model',
9 | backbone=dict(
10 | type='ResNet',
11 | depth=50,
12 | norm_eval=True,
13 | out_indices=(0, 1, 2, 3),
14 | frozen_stages=1,
15 | ),
16 | neck=dict(
17 | type='FPN',
18 | in_channels=[256, 512, 1024, 2048],
19 | out_channels=256,
20 | num_outs=5
21 | ),
22 | )
23 |
24 | img_norm_cfg = dict(
25 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
26 |
27 | # augmentation strategy originates from DETR / Sparse RCNN
28 | train_pipeline = [
29 | dict(type='LoadImageFromFile'),
30 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
31 | dict(type='RandomFlip', flip_ratio=0.5),
32 | dict(type='AutoAugment',
33 | policies=[
34 | [
35 | dict(type='Resize',
36 | img_scale=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
37 | (608, 1333), (640, 1333), (672, 1333), (704, 1333),
38 | (736, 1333), (768, 1333), (800, 1333)],
39 | multiscale_mode='value',
40 | keep_ratio=True)
41 | ],
42 | [
43 | dict(type='Resize',
44 | img_scale=[(400, 1333), (500, 1333), (600, 1333)],
45 | multiscale_mode='value',
46 | keep_ratio=True),
47 | dict(type='RandomCrop',
48 | crop_type='absolute_range',
49 | crop_size=(384, 600),
50 | allow_negative_crop=True),
51 | dict(type='Resize',
52 | img_scale=[(480, 1333), (512, 1333), (544, 1333),
53 | (576, 1333), (608, 1333), (640, 1333),
54 | (672, 1333), (704, 1333), (736, 1333),
55 | (768, 1333), (800, 1333)],
56 | multiscale_mode='value',
57 | override=True,
58 | keep_ratio=True)
59 | ]
60 | ]),
61 | dict(type='Normalize', **img_norm_cfg),
62 | dict(type='Pad', size_divisor=32),
63 | dict(type='DefaultFormatBundle'),
64 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
65 | ]
66 | data = dict(train=dict(pipeline=train_pipeline))
67 |
68 | optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05,
69 | paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.),
70 | 'relative_position_bias_table': dict(decay_mult=0.),
71 | 'norm': dict(decay_mult=0.)}))
72 |
73 | lr_config = dict(step=[8, 11])
74 | runner = dict(type='EpochBasedRunner', max_epochs=12)
75 | find_unused_parameters = True
76 |
--------------------------------------------------------------------------------
/detection/mmcv_custom/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 |
3 | # All rights reserved.
4 |
5 | # This source code is licensed under the license found in the
6 | # LICENSE file in the root directory of this source tree.
7 |
8 |
9 | # -*- coding: utf-8 -*-
10 |
11 | from .checkpoint import load_checkpoint
12 | from .layer_decay_optimizer_constructor import LearningRateDecayOptimizerConstructor
13 | from .customized_text import CustomizedTextLoggerHook
14 |
15 | __all__ = ['load_checkpoint', 'LearningRateDecayOptimizerConstructor', 'CustomizedTextLoggerHook']
16 |
--------------------------------------------------------------------------------
/detection/mmcv_custom/customized_text.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 |
3 | # All rights reserved.
4 |
5 | # This source code is licensed under the license found in the
6 | # LICENSE file in the root directory of this source tree.
7 |
8 |
9 | import datetime
10 | from collections import OrderedDict
11 |
12 | import torch
13 |
14 | import mmcv
15 | from mmcv.runner import HOOKS
16 | from mmcv.runner import TextLoggerHook
17 |
18 |
19 | @HOOKS.register_module()
20 | class CustomizedTextLoggerHook(TextLoggerHook):
21 | """Customized Text Logger hook.
22 |
23 | This logger prints out both lr and layer_0_lr.
24 |
25 | """
26 |
27 | def _log_info(self, log_dict, runner):
28 | # print exp name for users to distinguish experiments
29 | # at every ``interval_exp_name`` iterations and the end of each epoch
30 | if runner.meta is not None and 'exp_name' in runner.meta:
31 | if (self.every_n_iters(runner, self.interval_exp_name)) or (
32 | self.by_epoch and self.end_of_epoch(runner)):
33 | exp_info = f'Exp name: {runner.meta["exp_name"]}'
34 | runner.logger.info(exp_info)
35 |
36 | if log_dict['mode'] == 'train':
37 | lr_str = {}
38 | for lr_type in ['lr', 'layer_0_lr']:
39 | if isinstance(log_dict[lr_type], dict):
40 | lr_str[lr_type] = []
41 | for k, val in log_dict[lr_type].items():
42 | lr_str.append(f'{lr_type}_{k}: {val:.3e}')
43 | lr_str[lr_type] = ' '.join(lr_str)
44 | else:
45 | lr_str[lr_type] = f'{lr_type}: {log_dict[lr_type]:.3e}'
46 |
47 | # by epoch: Epoch [4][100/1000]
48 | # by iter: Iter [100/100000]
49 | if self.by_epoch:
50 | log_str = f'Epoch [{log_dict["epoch"]}]' \
51 | f'[{log_dict["iter"]}/{len(runner.data_loader)}]\t'
52 | else:
53 | log_str = f'Iter [{log_dict["iter"]}/{runner.max_iters}]\t'
54 | log_str += f'{lr_str["lr"]}, {lr_str["layer_0_lr"]}, '
55 |
56 | if 'time' in log_dict.keys():
57 | self.time_sec_tot += (log_dict['time'] * self.interval)
58 | time_sec_avg = self.time_sec_tot / (
59 | runner.iter - self.start_iter + 1)
60 | eta_sec = time_sec_avg * (runner.max_iters - runner.iter - 1)
61 | eta_str = str(datetime.timedelta(seconds=int(eta_sec)))
62 | log_str += f'eta: {eta_str}, '
63 | log_str += f'time: {log_dict["time"]:.3f}, ' \
64 | f'data_time: {log_dict["data_time"]:.3f}, '
65 | # statistic memory
66 | if torch.cuda.is_available():
67 | log_str += f'memory: {log_dict["memory"]}, '
68 | else:
69 | # val/test time
70 | # here 1000 is the length of the val dataloader
71 | # by epoch: Epoch[val] [4][1000]
72 | # by iter: Iter[val] [1000]
73 | if self.by_epoch:
74 | log_str = f'Epoch({log_dict["mode"]}) ' \
75 | f'[{log_dict["epoch"]}][{log_dict["iter"]}]\t'
76 | else:
77 | log_str = f'Iter({log_dict["mode"]}) [{log_dict["iter"]}]\t'
78 |
79 | log_items = []
80 | for name, val in log_dict.items():
81 | # TODO: resolve this hack
82 | # these items have been in log_str
83 | if name in [
84 | 'mode', 'Epoch', 'iter', 'lr', 'layer_0_lr', 'time', 'data_time',
85 | 'memory', 'epoch'
86 | ]:
87 | continue
88 | if isinstance(val, float):
89 | val = f'{val:.4f}'
90 | log_items.append(f'{name}: {val}')
91 | log_str += ', '.join(log_items)
92 |
93 | runner.logger.info(log_str)
94 |
95 |
96 | def log(self, runner):
97 | if 'eval_iter_num' in runner.log_buffer.output:
98 | # this doesn't modify runner.iter and is regardless of by_epoch
99 | cur_iter = runner.log_buffer.output.pop('eval_iter_num')
100 | else:
101 | cur_iter = self.get_iter(runner, inner_iter=True)
102 |
103 | log_dict = OrderedDict(
104 | mode=self.get_mode(runner),
105 | epoch=self.get_epoch(runner),
106 | iter=cur_iter)
107 |
108 | # record lr and layer_0_lr
109 | cur_lr = runner.current_lr()
110 | if isinstance(cur_lr, list):
111 | log_dict['layer_0_lr'] = min(cur_lr)
112 | log_dict['lr'] = max(cur_lr)
113 | else:
114 | assert isinstance(cur_lr, dict)
115 | log_dict['lr'], log_dict['layer_0_lr'] = {}, {}
116 | for k, lr_ in cur_lr.items():
117 | assert isinstance(lr_, list)
118 | log_dict['layer_0_lr'].update({k: min(lr_)})
119 | log_dict['lr'].update({k: max(lr_)})
120 |
121 | if 'time' in runner.log_buffer.output:
122 | # statistic memory
123 | if torch.cuda.is_available():
124 | log_dict['memory'] = self._get_max_memory(runner)
125 |
126 | log_dict = dict(log_dict, **runner.log_buffer.output)
127 |
128 | self._log_info(log_dict, runner)
129 | self._dump_log(log_dict, runner)
130 | return log_dict
131 |
--------------------------------------------------------------------------------
/detection/mmcv_custom/layer_decay_optimizer_constructor.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 |
3 | # All rights reserved.
4 |
5 | # This source code is licensed under the license found in the
6 | # LICENSE file in the root directory of this source tree.
7 |
8 |
9 | import json
10 | from mmcv.runner import OPTIMIZER_BUILDERS, DefaultOptimizerConstructor
11 | from mmcv.runner import get_dist_info
12 |
13 |
14 | def get_num_layer_layer_wise(var_name, num_max_layer=12):
15 |
16 | if var_name in ("backbone.cls_token", "backbone.mask_token", "backbone.pos_embed"):
17 | return 0
18 | elif var_name.startswith("backbone.downsample_layers"):
19 | stage_id = int(var_name.split('.')[2])
20 | if stage_id == 0:
21 | layer_id = 0
22 | elif stage_id == 1:
23 | layer_id = 2
24 | elif stage_id == 2:
25 | layer_id = 3
26 | elif stage_id == 3:
27 | layer_id = num_max_layer
28 | return layer_id
29 | elif var_name.startswith("backbone.stages"):
30 | stage_id = int(var_name.split('.')[2])
31 | block_id = int(var_name.split('.')[3])
32 | if stage_id == 0:
33 | layer_id = 1
34 | elif stage_id == 1:
35 | layer_id = 2
36 | elif stage_id == 2:
37 | layer_id = 3 + block_id // 3
38 | elif stage_id == 3:
39 | layer_id = num_max_layer
40 | return layer_id
41 | else:
42 | return num_max_layer + 1
43 |
44 |
45 | def get_num_layer_stage_wise(var_name, num_max_layer):
46 | if var_name in ("backbone.cls_token", "backbone.mask_token", "backbone.pos_embed"):
47 | return 0
48 | elif var_name.startswith("backbone.downsample_layers"):
49 | return 0
50 | elif var_name.startswith("backbone.stages"):
51 | stage_id = int(var_name.split('.')[2])
52 | return stage_id + 1
53 | else:
54 | return num_max_layer - 1
55 |
56 |
57 | @OPTIMIZER_BUILDERS.register_module()
58 | class LearningRateDecayOptimizerConstructor(DefaultOptimizerConstructor):
59 | def add_params(self, params, module, prefix='', is_dcn_module=None):
60 | """Add all parameters of module to the params list.
61 | The parameters of the given module will be added to the list of param
62 | groups, with specific rules defined by paramwise_cfg.
63 | Args:
64 | params (list[dict]): A list of param groups, it will be modified
65 | in place.
66 | module (nn.Module): The module to be added.
67 | prefix (str): The prefix of the module
68 | is_dcn_module (int|float|None): If the current module is a
69 | submodule of DCN, `is_dcn_module` will be passed to
70 | control conv_offset layer's learning rate. Defaults to None.
71 | """
72 | parameter_groups = {}
73 | print(self.paramwise_cfg)
74 | num_layers = self.paramwise_cfg.get('num_layers') + 2
75 | decay_rate = self.paramwise_cfg.get('decay_rate')
76 | decay_type = self.paramwise_cfg.get('decay_type', "layer_wise")
77 | print("Build LearningRateDecayOptimizerConstructor %s %f - %d" % (decay_type, decay_rate, num_layers))
78 | weight_decay = self.base_wd
79 |
80 | for name, param in module.named_parameters():
81 | if not param.requires_grad:
82 | continue # frozen weights
83 | if len(param.shape) == 1 or name.endswith(".bias") or name in ('pos_embed', 'cls_token'):
84 | group_name = "no_decay"
85 | this_weight_decay = 0.
86 | else:
87 | group_name = "decay"
88 | this_weight_decay = weight_decay
89 |
90 | if decay_type == "layer_wise":
91 | layer_id = get_num_layer_layer_wise(name, self.paramwise_cfg.get('num_layers'))
92 | elif decay_type == "stage_wise":
93 | layer_id = get_num_layer_stage_wise(name, num_layers)
94 |
95 | group_name = "layer_%d_%s" % (layer_id, group_name)
96 |
97 | if group_name not in parameter_groups:
98 | scale = decay_rate ** (num_layers - layer_id - 1)
99 |
100 | parameter_groups[group_name] = {
101 | "weight_decay": this_weight_decay,
102 | "params": [],
103 | "param_names": [],
104 | "lr_scale": scale,
105 | "group_name": group_name,
106 | "lr": scale * self.base_lr,
107 | }
108 |
109 | parameter_groups[group_name]["params"].append(param)
110 | parameter_groups[group_name]["param_names"].append(name)
111 | rank, _ = get_dist_info()
112 | if rank == 0:
113 | to_display = {}
114 | for key in parameter_groups:
115 | to_display[key] = {
116 | "param_names": parameter_groups[key]["param_names"],
117 | "lr_scale": parameter_groups[key]["lr_scale"],
118 | "lr": parameter_groups[key]["lr"],
119 | "weight_decay": parameter_groups[key]["weight_decay"],
120 | }
121 | print("Param groups = %s" % json.dumps(to_display, indent=2))
122 |
123 | params.extend(parameter_groups.values())
124 |
--------------------------------------------------------------------------------
/detection/mmcv_custom/runner/checkpoint.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Open-MMLab. All rights reserved.
2 | import os.path as osp
3 | import time
4 | from tempfile import TemporaryDirectory
5 |
6 | import torch
7 | from torch.optim import Optimizer
8 |
9 | import mmcv
10 | from mmcv.parallel import is_module_wrapper
11 | from mmcv.runner.checkpoint import weights_to_cpu, get_state_dict
12 |
13 | try:
14 | import apex
15 | except:
16 | print('apex is not installed')
17 |
18 |
19 | def save_checkpoint(model, filename, optimizer=None, meta=None):
20 | """Save checkpoint to file.
21 |
22 | The checkpoint will have 4 fields: ``meta``, ``state_dict`` and
23 | ``optimizer``, ``amp``. By default ``meta`` will contain version
24 | and time info.
25 |
26 | Args:
27 | model (Module): Module whose params are to be saved.
28 | filename (str): Checkpoint filename.
29 | optimizer (:obj:`Optimizer`, optional): Optimizer to be saved.
30 | meta (dict, optional): Metadata to be saved in checkpoint.
31 | """
32 | if meta is None:
33 | meta = {}
34 | elif not isinstance(meta, dict):
35 | raise TypeError(f'meta must be a dict or None, but got {type(meta)}')
36 | meta.update(mmcv_version=mmcv.__version__, time=time.asctime())
37 |
38 | if is_module_wrapper(model):
39 | model = model.module
40 |
41 | if hasattr(model, 'CLASSES') and model.CLASSES is not None:
42 | # save class name to the meta
43 | meta.update(CLASSES=model.CLASSES)
44 |
45 | checkpoint = {
46 | 'meta': meta,
47 | 'state_dict': weights_to_cpu(get_state_dict(model))
48 | }
49 | # save optimizer state dict in the checkpoint
50 | if isinstance(optimizer, Optimizer):
51 | checkpoint['optimizer'] = optimizer.state_dict()
52 | elif isinstance(optimizer, dict):
53 | checkpoint['optimizer'] = {}
54 | for name, optim in optimizer.items():
55 | checkpoint['optimizer'][name] = optim.state_dict()
56 |
57 | # save amp state dict in the checkpoint
58 | # checkpoint['amp'] = apex.amp.state_dict()
59 |
60 | if filename.startswith('pavi://'):
61 | try:
62 | from pavi import modelcloud
63 | from pavi.exception import NodeNotFoundError
64 | except ImportError:
65 | raise ImportError(
66 | 'Please install pavi to load checkpoint from modelcloud.')
67 | model_path = filename[7:]
68 | root = modelcloud.Folder()
69 | model_dir, model_name = osp.split(model_path)
70 | try:
71 | model = modelcloud.get(model_dir)
72 | except NodeNotFoundError:
73 | model = root.create_training_model(model_dir)
74 | with TemporaryDirectory() as tmp_dir:
75 | checkpoint_file = osp.join(tmp_dir, model_name)
76 | with open(checkpoint_file, 'wb') as f:
77 | torch.save(checkpoint, f)
78 | f.flush()
79 | model.create_file(checkpoint_file, name=model_name)
80 | else:
81 | mmcv.mkdir_or_exist(osp.dirname(filename))
82 | # immediately flush buffer
83 | with open(filename, 'wb') as f:
84 | torch.save(checkpoint, f)
85 | f.flush()
86 |
--------------------------------------------------------------------------------
/detection/mmdet/models/backbones/__init__.py:
--------------------------------------------------------------------------------
1 | from .darknet import Darknet
2 | from .detectors_resnet import DetectoRS_ResNet
3 | from .detectors_resnext import DetectoRS_ResNeXt
4 | from .hourglass import HourglassNet
5 | from .hrnet import HRNet
6 | from .regnet import RegNet
7 | from .res2net import Res2Net
8 | from .resnext import ResNeXt
9 | from .ssd_vgg import SSDVGG
10 | from .trident_resnet import TridentResNet
11 | from .swin_transformer import SwinTransformer
12 | from .resnet import ResNet
13 | from .kw_resnet import KW_ResNet
14 | from .convnext import ConvNeXt
15 | from .kw_convnext import KW_ConvNeXt
16 | from .mobilenetv2 import MobileNetV2
17 | from .kw_mobilenetv2 import KW_MobileNetV2
18 |
19 | __all__ = [
20 | 'RegNet', 'ResNet', 'ResNeXt', 'SSDVGG', 'HRNet', 'Res2Net',
21 | 'HourglassNet', 'DetectoRS_ResNet', 'DetectoRS_ResNeXt', 'Darknet',
22 | 'TridentResNet', 'SwinTransformer', 'KW_ResNet', 'ConvNeXt', 'KW_ConvNeXt', 'MobileNetV2', 'KW_MobileNetV2'
23 | ]
--------------------------------------------------------------------------------
/detection/mmdet/models/backbones/convnext.py:
--------------------------------------------------------------------------------
1 | from functools import partial
2 | import torch
3 | import torch.nn as nn
4 | import torch.nn.functional as F
5 | from timm.models.layers import trunc_normal_, DropPath
6 |
7 | from mmcv_custom import load_checkpoint
8 | from mmdet.utils import get_root_logger
9 | from ..builder import BACKBONES
10 |
11 |
12 | class Block(nn.Module):
13 | r""" ConvNeXt Block. There are two equivalent implementations:
14 | (1) DwConv -> LayerNorm (channels_first) -> 1x1 Conv -> GELU -> 1x1 Conv; all in (N, C, H, W)
15 | (2) DwConv -> Permute to (N, H, W, C); LayerNorm (channels_last) -> Linear -> GELU -> Linear; Permute back
16 | We use (2) as we find it slightly faster in PyTorch
17 |
18 | Args:
19 | dim (int): Number of input channels.
20 | drop_path (float): Stochastic depth rate. Default: 0.0
21 | layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
22 | """
23 |
24 | def __init__(self, dim, drop_path=0., layer_scale_init_value=1e-6):
25 | super().__init__()
26 | self.dwconv = nn.Conv2d(dim, dim, kernel_size=7, padding=3, groups=dim) # depthwise conv
27 | self.norm = LayerNorm(dim, eps=1e-6)
28 | self.pwconv1 = nn.Linear(dim, 4 * dim) # pointwise/1x1 convs, implemented with linear layers
29 | self.act = nn.GELU()
30 | self.pwconv2 = nn.Linear(4 * dim, dim)
31 | self.gamma = nn.Parameter(layer_scale_init_value * torch.ones((dim)),
32 | requires_grad=True) if layer_scale_init_value > 0 else None
33 | self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
34 |
35 | def forward(self, x):
36 | input = x
37 | x = self.dwconv(x)
38 | x = x.permute(0, 2, 3, 1) # (N, C, H, W) -> (N, H, W, C)
39 | x = self.norm(x)
40 | x = self.pwconv1(x)
41 | x = self.act(x)
42 | x = self.pwconv2(x)
43 | if self.gamma is not None:
44 | x = self.gamma * x
45 | x = x.permute(0, 3, 1, 2) # (N, H, W, C) -> (N, C, H, W)
46 |
47 | x = input + self.drop_path(x)
48 | return x
49 |
50 |
51 | @BACKBONES.register_module()
52 | class ConvNeXt(nn.Module):
53 | r""" ConvNeXt
54 | A PyTorch impl of : `A ConvNet for the 2020s` -
55 | https://arxiv.org/pdf/2201.03545.pdf
56 | Args:
57 | in_chans (int): Number of input image channels. Default: 3
58 | num_classes (int): Number of classes for classification head. Default: 1000
59 | depths (tuple(int)): Number of blocks at each stage. Default: [3, 3, 9, 3]
60 | dims (int): Feature dimension at each stage. Default: [96, 192, 384, 768]
61 | drop_path_rate (float): Stochastic depth rate. Default: 0.
62 | layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
63 | head_init_scale (float): Init scaling value for classifier weights and biases. Default: 1.
64 | """
65 |
66 | def __init__(self, in_chans=3, depths=[3, 3, 9, 3], dims=[96, 192, 384, 768],
67 | drop_path_rate=0., layer_scale_init_value=1e-6, out_indices=[0, 1, 2, 3],
68 | ):
69 | super().__init__()
70 |
71 | self.downsample_layers = nn.ModuleList() # stem and 3 intermediate downsampling conv layers
72 | stem = nn.Sequential(
73 | nn.Conv2d(in_chans, dims[0], kernel_size=4, stride=4),
74 | LayerNorm(dims[0], eps=1e-6, data_format="channels_first")
75 | )
76 | self.downsample_layers.append(stem)
77 | for i in range(3):
78 | downsample_layer = nn.Sequential(
79 | LayerNorm(dims[i], eps=1e-6, data_format="channels_first"),
80 | nn.Conv2d(dims[i], dims[i + 1], kernel_size=2, stride=2),
81 | )
82 | self.downsample_layers.append(downsample_layer)
83 |
84 | self.stages = nn.ModuleList() # 4 feature resolution stages, each consisting of multiple residual blocks
85 | dp_rates = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]
86 | cur = 0
87 | for i in range(4):
88 | stage = nn.Sequential(
89 | *[Block(dim=dims[i], drop_path=dp_rates[cur + j],
90 | layer_scale_init_value=layer_scale_init_value) for j in range(depths[i])]
91 | )
92 | self.stages.append(stage)
93 | cur += depths[i]
94 |
95 | self.out_indices = out_indices
96 |
97 | norm_layer = partial(LayerNorm, eps=1e-6, data_format="channels_first")
98 | for i_layer in range(4):
99 | layer = norm_layer(dims[i_layer])
100 | layer_name = f'norm{i_layer}'
101 | self.add_module(layer_name, layer)
102 |
103 | self.apply(self._init_weights)
104 |
105 | def _init_weights(self, m):
106 | if isinstance(m, (nn.Conv2d, nn.Linear)):
107 | trunc_normal_(m.weight, std=.02)
108 | nn.init.constant_(m.bias, 0)
109 |
110 | def init_weights(self, pretrained=None):
111 | """Initialize the weights in backbone.
112 | Args:
113 | pretrained (str, optional): Path to pre-trained weights.
114 | Defaults to None.
115 | """
116 |
117 | def _init_weights(m):
118 | if isinstance(m, nn.Linear):
119 | trunc_normal_(m.weight, std=.02)
120 | if isinstance(m, nn.Linear) and m.bias is not None:
121 | nn.init.constant_(m.bias, 0)
122 | elif isinstance(m, nn.LayerNorm):
123 | nn.init.constant_(m.bias, 0)
124 | nn.init.constant_(m.weight, 1.0)
125 |
126 | if isinstance(pretrained, str):
127 | self.apply(_init_weights)
128 | logger = get_root_logger()
129 | load_checkpoint(self, pretrained, strict=False, logger=logger)
130 | elif pretrained is None:
131 | self.apply(_init_weights)
132 | else:
133 | raise TypeError('pretrained must be a str or None')
134 |
135 | def forward_features(self, x):
136 | outs = []
137 | for i in range(4):
138 | x = self.downsample_layers[i](x)
139 | x = self.stages[i](x)
140 | if i in self.out_indices:
141 | norm_layer = getattr(self, f'norm{i}')
142 | x_out = norm_layer(x)
143 | outs.append(x_out)
144 |
145 | return tuple(outs)
146 |
147 | def forward(self, x):
148 | x = self.forward_features(x)
149 | return x
150 |
151 |
152 | class LayerNorm(nn.Module):
153 | r""" LayerNorm that supports two data formats: channels_last (default) or channels_first.
154 | The ordering of the dimensions in the inputs. channels_last corresponds to inputs with
155 | shape (batch_size, height, width, channels) while channels_first corresponds to inputs
156 | with shape (batch_size, channels, height, width).
157 | """
158 |
159 | def __init__(self, normalized_shape, eps=1e-6, data_format="channels_last"):
160 | super().__init__()
161 | self.weight = nn.Parameter(torch.ones(normalized_shape))
162 | self.bias = nn.Parameter(torch.zeros(normalized_shape))
163 | self.eps = eps
164 | self.data_format = data_format
165 | if self.data_format not in ["channels_last", "channels_first"]:
166 | raise NotImplementedError
167 | self.normalized_shape = (normalized_shape,)
168 |
169 | def forward(self, x):
170 | if self.data_format == "channels_last":
171 | return F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps)
172 | elif self.data_format == "channels_first":
173 | u = x.mean(1, keepdim=True)
174 | s = (x - u).pow(2).mean(1, keepdim=True)
175 | x = (x - u) / torch.sqrt(s + self.eps)
176 | x = self.weight[:, None, None] * x + self.bias[:, None, None]
177 | return x
--------------------------------------------------------------------------------
/detection/mmdet/models/backbones/kw_convnext.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
2 |
3 | # All rights reserved.
4 |
5 | # This source code is licensed under the license found in the
6 | # LICENSE file in the root directory of this source tree.
7 |
8 | from functools import partial
9 | import torch
10 | import torch.nn as nn
11 | import torch.nn.functional as F
12 | from timm.models.layers import trunc_normal_, DropPath
13 | from timm.models.registry import register_model
14 | from .kernel_warehouse import Warehouse_Manager
15 |
16 | from ..builder import BACKBONES
17 | from mmcv.runner import load_checkpoint
18 | from mmdet.utils import get_root_logger
19 |
20 | class Block(nn.Module):
21 | r""" ConvNeXt Block. There are two equivalent implementations:
22 | (1) DwConv -> LayerNorm (channels_first) -> 1x1 Conv -> GELU -> 1x1 Conv; all in (N, C, H, W)
23 | (2) DwConv -> Permute to (N, H, W, C); LayerNorm (channels_last) -> Linear -> GELU -> Linear; Permute back
24 | We use (2) as we find it slightly faster in PyTorch
25 |
26 | Args:
27 | dim (int): Number of input channels.
28 | drop_path (float): Stochastic depth rate. Default: 0.0
29 | layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
30 | """
31 |
32 | def __init__(self, dim, drop_path=0., layer_scale_init_value=1e-6,
33 | warehouse_manager=None, stage_idx=-1, layer_idx=-1):
34 | super().__init__()
35 | self.dwconv = warehouse_manager.reserve(dim, dim, kernel_size=7, padding=3, groups=dim, layer_type='conv2d',
36 | warehouse_name='stage{}_layer{}_dwconv{}'.format(stage_idx, layer_idx, 0)) # depthwise conv
37 | self.norm = LayerNorm(dim, eps=1e-6)
38 | self.pwconv1 = warehouse_manager.reserve(dim, 4 * dim, kernel_size=1, padding=0, layer_type='conv2d',
39 | warehouse_name='stage{}_layer{}_pwconv{}'.format(stage_idx, layer_idx, 1)) # pointwise/1x1 convs, implemented with linear layers
40 | self.act = nn.GELU()
41 | self.pwconv2 = warehouse_manager.reserve(4 * dim, dim, kernel_size=1, padding=0, layer_type='conv2d',
42 | warehouse_name='stage{}_layer{}_pwconv{}'.format(stage_idx, layer_idx, 2))
43 | self.gamma = nn.Parameter(layer_scale_init_value * torch.ones([1, dim, 1, 1]),
44 | requires_grad=True) if layer_scale_init_value > 0 else None
45 | self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
46 |
47 | def forward(self, x):
48 | input = x
49 | x = self.dwconv(x)
50 | x = x.permute(0, 2, 3, 1) # (N, C, H, W) -> (N, H, W, C)
51 | x = self.norm(x).permute(0, 3, 1, 2)
52 | x = self.pwconv1(x)
53 | x = self.act(x)
54 | x = self.pwconv2(x)
55 | if self.gamma is not None:
56 | x = self.gamma * x
57 | #x = x.permute(0, 3, 1, 2) # (N, H, W, C) -> (N, C, H, W)
58 |
59 | x = input + self.drop_path(x)
60 | return x
61 |
62 | @BACKBONES.register_module()
63 | class KW_ConvNeXt(nn.Module):
64 | r""" ConvNeXt
65 | A PyTorch impl of : `A ConvNet for the 2020s` -
66 | https://arxiv.org/pdf/2201.03545.pdf
67 |
68 | Args:
69 | in_chans (int): Number of input image channels. Default: 3
70 | num_classes (int): Number of classes for classification head. Default: 1000
71 | depths (tuple(int)): Number of blocks at each stage. Default: [3, 3, 9, 3]
72 | dims (int): Feature dimension at each stage. Default: [96, 192, 384, 768]
73 | drop_path_rate (float): Stochastic depth rate. Default: 0.
74 | layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
75 | head_init_scale (float): Init scaling value for classifier weights and biases. Default: 1.
76 | """
77 |
78 | def __init__(self, in_chans=3, num_classes=1000,
79 | depths=[3, 3, 9, 3], dims=[96, 192, 384, 768], drop_path_rate=0.,
80 | layer_scale_init_value=1e-6, head_init_scale=1.,
81 | reduction=0.0625, cell_num_ratio=1, cell_inplane_ratio=1, cell_outplane_ratio=1,
82 | sharing_range=('layer', 'pwconv'), out_indices=[0, 1, 2, 3], **kwargs
83 | ):
84 | super().__init__()
85 |
86 | self.warehouse_manager = Warehouse_Manager(reduction, cell_num_ratio, cell_inplane_ratio,
87 | cell_outplane_ratio, sharing_range, norm_layer=nn.LayerNorm)
88 | self.downsample_layers = nn.ModuleList() # stem and 3 intermediate downsampling conv layers
89 | stem = nn.Sequential(
90 | self.warehouse_manager.reserve(in_chans, dims[0], kernel_size=4, stride=4, layer_type='conv2d',
91 | warehouse_name='stage{}_conv0'.format('stem')),
92 | LayerNorm(dims[0], eps=1e-6, data_format="channels_first")
93 | )
94 | self.downsample_layers.append(stem)
95 | for i in range(3):
96 | downsample_layer = nn.Sequential(
97 | LayerNorm(dims[i], eps=1e-6, data_format="channels_first"),
98 | self.warehouse_manager.reserve(dims[i], dims[i + 1], kernel_size=2, stride=2, layer_type='conv2d',
99 | warehouse_name='stage{}_layer{}_conv0'.format(i, 'ds')),
100 | )
101 | self.downsample_layers.append(downsample_layer)
102 |
103 | self.stages = nn.ModuleList() # 4 feature resolution stages, each consisting of multiple residual blocks
104 | dp_rates = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]
105 | cur = 0
106 | for i in range(4):
107 | stage = nn.Sequential(
108 | *[Block(dim=dims[i], drop_path=dp_rates[cur + j],
109 | layer_scale_init_value=layer_scale_init_value,
110 | warehouse_manager=self.warehouse_manager,
111 | stage_idx=i, layer_idx=j,
112 | ) for j in range(depths[i])]
113 | )
114 | self.stages.append(stage)
115 | cur += depths[i]
116 |
117 | self.warehouse_manager.store()
118 | self.warehouse_manager.allocate(self)
119 | self.net_update_temperature(0)
120 |
121 | self.out_indices = out_indices
122 |
123 | norm_layer = partial(LayerNorm, eps=1e-6, data_format="channels_first")
124 | for i_layer in range(4):
125 | layer = norm_layer(dims[i_layer])
126 | layer_name = f'norm{i_layer}'
127 | self.add_module(layer_name, layer)
128 |
129 | self.apply(self._init_weights)
130 |
131 | def net_update_temperature(self, temp):
132 | for m in self.modules():
133 | if hasattr(m, "update_temperature"):
134 | m.update_temperature(temp)
135 |
136 | def train(self, mode=True):
137 | """Convert the model into training mode while keep normalization layer
138 | freezed."""
139 | super(KW_ConvNeXt, self).train(mode)
140 | if mode:
141 | for m in self.modules():
142 | # trick: eval have effect on BatchNorm only
143 | if isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d)):
144 | m.eval()
145 |
146 | def _init_weights(self, m):
147 | if isinstance(m, (nn.Conv2d, nn.Linear)):
148 | trunc_normal_(m.weight, std=.02)
149 | if m.bias is not None:
150 | nn.init.constant_(m.bias, 0)
151 |
152 | def init_weights(self, pretrained=None):
153 | """Initialize the weights in backbone.
154 | Args:
155 | pretrained (str, optional): Path to pre-trained weights.
156 | Defaults to None.
157 | """
158 |
159 | def _init_weights(m):
160 | if isinstance(m, nn.Linear):
161 | trunc_normal_(m.weight, std=.02)
162 | if isinstance(m, nn.Linear) and m.bias is not None:
163 | nn.init.constant_(m.bias, 0)
164 | elif isinstance(m, nn.LayerNorm):
165 | nn.init.constant_(m.bias, 0)
166 | nn.init.constant_(m.weight, 1.0)
167 |
168 | if isinstance(pretrained, str):
169 | self.apply(_init_weights)
170 | logger = get_root_logger()
171 | load_checkpoint(self, pretrained, strict=False, logger=logger, map_location='cpu')
172 | elif pretrained is None:
173 | self.apply(_init_weights)
174 | else:
175 | raise TypeError('pretrained must be a str or None')
176 |
177 | def forward_features(self, x):
178 | outs = []
179 | for i in range(4):
180 | x = self.downsample_layers[i](x)
181 | x = self.stages[i](x)
182 | if i in self.out_indices:
183 | norm_layer = getattr(self, f'norm{i}')
184 | x_out = norm_layer(x)
185 | outs.append(x_out)
186 |
187 | return tuple(outs)
188 |
189 | def forward(self, x):
190 | x = self.forward_features(x)
191 | return x
192 |
193 |
194 | class LayerNorm(nn.Module):
195 | r""" LayerNorm that supports two data formats: channels_last (default) or channels_first.
196 | The ordering of the dimensions in the inputs. channels_last corresponds to inputs with
197 | shape (batch_size, height, width, channels) while channels_first corresponds to inputs
198 | with shape (batch_size, channels, height, width).
199 | """
200 |
201 | def __init__(self, normalized_shape, eps=1e-6, data_format="channels_last"):
202 | super().__init__()
203 | self.weight = nn.Parameter(torch.ones(normalized_shape))
204 | self.bias = nn.Parameter(torch.zeros(normalized_shape))
205 | self.eps = eps
206 | self.data_format = data_format
207 | if self.data_format not in ["channels_last", "channels_first"]:
208 | raise NotImplementedError
209 | self.normalized_shape = (normalized_shape,)
210 |
211 | def forward(self, x):
212 | if self.data_format == "channels_last":
213 | return F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps)
214 | elif self.data_format == "channels_first":
215 | u = x.mean(1, keepdim=True)
216 | s = (x - u).pow(2).mean(1, keepdim=True)
217 | x = (x - u) / torch.sqrt(s + self.eps)
218 | x = self.weight[:, None, None] * x + self.bias[:, None, None]
219 | return x
220 |
--------------------------------------------------------------------------------
/detection/mmdet/models/backbones/kw_mobilenetv2.py:
--------------------------------------------------------------------------------
1 | from torch import nn
2 | from .kernel_warehouse import Warehouse_Manager
3 |
4 | from ..builder import BACKBONES
5 | from mmcv.runner import load_checkpoint
6 | from mmdet.utils import get_root_logger
7 |
8 |
9 | def _make_divisible(v, divisor, min_value=None):
10 | """
11 | This function is taken from the original tf repo.
12 | It ensures that all layers have a channel number that is divisible by 8
13 | It can be seen here:
14 | https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
15 | :param v:
16 | :param divisor:
17 | :param min_value:
18 | :return:
19 | """
20 | if min_value is None:
21 | min_value = divisor
22 | new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
23 | # Make sure that round down does not go down by more than 10%.
24 | if new_v < 0.9 * v:
25 | new_v += divisor
26 | return new_v
27 |
28 |
29 | class ConvBNReLU(nn.Sequential):
30 | def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1, norm_layer=nn.BatchNorm2d,
31 | warehouse_name=None, warehouse_manager=None, enabled=True):
32 | padding = (kernel_size - 1) // 2
33 | super(ConvBNReLU, self).__init__(
34 | warehouse_manager.reserve(in_planes, out_planes, kernel_size, stride, padding=padding,
35 | groups=groups, bias=False, warehouse_name=warehouse_name, enabled=enabled),
36 | norm_layer(out_planes),
37 | nn.ReLU6(inplace=True)
38 | )
39 |
40 |
41 | class InvertedResidual(nn.Module):
42 | def __init__(self, inp, oup, stride, expand_ratio, norm_layer=nn.BatchNorm2d, stage_idx=None, layer_idx=None,
43 | warehouse_manager=None):
44 | super(InvertedResidual, self).__init__()
45 | self.stride = stride
46 | assert stride in [1, 2]
47 | hidden_dim = int(round(inp * expand_ratio))
48 | self.use_res_connect = self.stride == 1 and inp == oup
49 |
50 | layers = []
51 | if expand_ratio != 1:
52 | # pw
53 | layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1, norm_layer=norm_layer,
54 | warehouse_name='stage{}_layer{}_pwconv{}'.format(stage_idx, layer_idx, 0),
55 | warehouse_manager=warehouse_manager))
56 |
57 | layers.extend([
58 | # dw
59 | ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim, norm_layer=norm_layer,
60 | warehouse_name='stage{}_layer{}_dwconv{}'.format(stage_idx, layer_idx, 0),
61 | warehouse_manager=warehouse_manager),
62 | # pw-linear
63 | warehouse_manager.reserve(hidden_dim, oup, 1, 1, 0, bias=False,
64 | warehouse_name='stage{}_layer{}_pwconv{}'.format(stage_idx, layer_idx, 1)),
65 | norm_layer(oup),
66 | ])
67 | self.conv = nn.Sequential(*layers)
68 |
69 | def forward(self, x):
70 | if self.use_res_connect:
71 | return x + self.conv(x)
72 | else:
73 | return self.conv(x)
74 |
75 |
76 | @BACKBONES.register_module()
77 | class KW_MobileNetV2(nn.Module):
78 | def __init__(self,
79 | num_classes=1000,
80 | width_mult=1.0,
81 | inverted_residual_setting=None,
82 | round_nearest=8,
83 | block=None,
84 | norm_layer=None,
85 | dropout=0.1,
86 | reduction=0.0625,
87 | cell_num_ratio=1,
88 | cell_inplane_ratio=1,
89 | cell_outplane_ratio=1,
90 | sharing_range=('layer', 'pwconv'),
91 | frozen_stages=0,
92 | out_indices=(0, 1, 2, 3),
93 | norm_eval=True,
94 | **kwargs):
95 | """gr
96 | MobileNet V2 main class
97 |
98 | Args:
99 | num_classes (int): Number of classes
100 | width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount
101 | inverted_residual_setting: Network structure
102 | round_nearest (int): Round the number of channels in each layer to be a multiple of this number
103 | Set to 1 to turn off rounding
104 | block: Module specifying inverted residual building block for mobilenet
105 | norm_layer: Module specifying the normalization layer to use
106 |
107 | """
108 | super(KW_MobileNetV2, self).__init__()
109 | if block is None:
110 | block = InvertedResidual
111 |
112 | if norm_layer is None:
113 | norm_layer = nn.BatchNorm2d
114 |
115 | input_channel = 32
116 | last_channel = 1280
117 | self.stage_idx = [1, 3, 6, 13, 18]
118 | self.frozen_stages = frozen_stages
119 | self.out_indices = [self.stage_idx[x] for x in out_indices]
120 | self.norm_eval = norm_eval
121 |
122 | if inverted_residual_setting is None:
123 | inverted_residual_setting = [
124 | # t, c, n, s
125 | [1, 16, 1, 1],
126 | [6, 24, 2, 2],
127 | [6, 32, 3, 2],
128 | [6, 64, 4, 2],
129 | [6, 96, 3, 1], # 0.3M
130 | [6, 160, 3, 2], # 0.92M
131 | [6, 320, 1, 1], # 1.22M
132 | ]
133 |
134 | kw_stage_setting = [1, 2, 3, 4, 5, 6, 6]
135 |
136 | self.warehouse_manager = Warehouse_Manager(reduction, cell_num_ratio, cell_inplane_ratio,
137 | cell_outplane_ratio, sharing_range)
138 |
139 | # only check the first element, assuming user knows t,c,n,s are required
140 | if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4:
141 | raise ValueError("inverted_residual_setting should be non-empty "
142 | "or a 4-element list, got {}".format(inverted_residual_setting))
143 |
144 | # building first layer
145 | input_channel = _make_divisible(input_channel * width_mult, round_nearest)
146 | self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest)
147 | features = [ConvBNReLU(3, input_channel, stride=2, norm_layer=norm_layer,
148 | warehouse_manager=self.warehouse_manager, warehouse_name='stage0_conv0')]
149 |
150 | layer_idx = 0
151 | # building inverted residual blocks
152 | for idx, (t, c, n, s) in enumerate(inverted_residual_setting):
153 | output_channel = _make_divisible(c * width_mult, round_nearest)
154 | for i in range(n):
155 | stride = s if i == 0 else 1
156 |
157 | if i == 0 and idx > 0:
158 | handover = kw_stage_setting[idx] != kw_stage_setting[idx - 1]
159 | else:
160 | handover = False
161 |
162 | stage_idx = (kw_stage_setting[idx] - 1) if handover else kw_stage_setting[idx]
163 |
164 | features.append(block(input_channel, output_channel, stride, expand_ratio=t, norm_layer=norm_layer,
165 | warehouse_manager=self.warehouse_manager, stage_idx=stage_idx,
166 | layer_idx=layer_idx))
167 |
168 | input_channel = output_channel
169 | layer_idx += 1
170 |
171 | if handover:
172 | layer_idx = 0
173 |
174 | # building last several layers
175 | features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1, norm_layer=norm_layer,
176 | warehouse_manager=self.warehouse_manager,
177 | warehouse_name='stage{}_layer{}_pwconv1'.format(kw_stage_setting[-1], layer_idx)))
178 | # make it nn.Sequential
179 | self.features = nn.Sequential(*features)
180 | # building classifier
181 | self.dropout = nn.Dropout(dropout) if dropout > 0 else nn.Identity()
182 | self.classifier = nn.Linear(self.last_channel, num_classes, bias=True)
183 |
184 | # weight initialization
185 | for m in self.modules():
186 | if isinstance(m, nn.Conv2d):
187 | nn.init.kaiming_normal_(m.weight, mode='fan_out')
188 | if m.bias is not None:
189 | nn.init.zeros_(m.bias)
190 | elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
191 | nn.init.ones_(m.weight)
192 | nn.init.zeros_(m.bias)
193 | elif isinstance(m, nn.Linear):
194 | nn.init.normal_(m.weight, 0, 0.01)
195 | nn.init.zeros_(m.bias)
196 |
197 | self.features = nn.Sequential(*features)
198 | self.warehouse_manager.store()
199 | self.warehouse_manager.allocate(self)
200 | self.net_update_temperature(0)
201 |
202 | def net_update_temperature(self, temp):
203 | for m in self.modules():
204 | if hasattr(m, "update_temperature"):
205 | m.update_temperature(temp)
206 |
207 | def _freeze_stages(self):
208 | if self.frozen_stages >= 0:
209 | for i in range(self.stage_idx[self.frozen_stages] + 1):
210 | m = self.features[i]
211 | m.eval()
212 | for param in m.parameters():
213 | param.requires_grad = False
214 |
215 | def train(self, mode=True):
216 | """Convert the model into training mode while keep normalization layer
217 | freezed."""
218 | super(KW_MobileNetV2, self).train(mode)
219 | self._freeze_stages()
220 |
221 | if mode and self.norm_eval:
222 | for m in self.modules():
223 | # trick: eval have effect on BatchNorm only
224 | if isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d)):
225 | m.eval()
226 |
227 | def init_weights(self, pretrained=None):
228 | """Initialize the weights in backbone.
229 | Args:
230 | pretrained (str, optional): Path to pre-trained weights.
231 | Defaults to None.
232 | """
233 |
234 | for m in self.modules():
235 | if isinstance(m, nn.Conv2d):
236 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
237 | if m.bias is not None:
238 | nn.init.zeros_(m.bias)
239 | elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
240 | nn.init.constant_(m.weight, 1)
241 | nn.init.constant_(m.bias, 0)
242 | elif isinstance(m, nn.Linear):
243 | nn.init.normal_(m.weight, 0, 0.01)
244 | if m.bias is not None:
245 | nn.init.zeros_(m.bias)
246 |
247 | if isinstance(pretrained, str):
248 | logger = get_root_logger()
249 | load_checkpoint(self, pretrained, strict=False, logger=logger, map_location='cpu')
250 | elif pretrained is not None:
251 | raise TypeError('pretrained must be a str or None')
252 |
253 | def _forward_impl(self, x):
254 | outs = []
255 | for idx, layer in enumerate(self.features):
256 | x = layer(x)
257 | if idx in self.out_indices:
258 | outs.append(x)
259 | return outs
260 |
261 | def forward(self, x):
262 | return self._forward_impl(x)
--------------------------------------------------------------------------------
/detection/mmdet/models/backbones/kw_resnet.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | from .kernel_warehouse import Warehouse_Manager
4 | from timm.models.layers import trunc_normal_, DropPath
5 |
6 | from ..builder import BACKBONES
7 | from mmcv.runner import load_checkpoint
8 | from mmdet.utils import get_root_logger
9 |
10 | __all__ = ['KW_ResNet']
11 |
12 | def kwconv3x3(in_planes, out_planes, stride=1, warehouse_name=None, warehouse_manager=None, enabled=True):
13 | return warehouse_manager.reserve(in_planes, out_planes, kernel_size=3, stride=stride, padding=1,
14 | warehouse_name=warehouse_name, enabled=enabled, bias=False)
15 |
16 |
17 | def kwconv1x1(in_planes, out_planes, stride=1, warehouse_name=None, warehouse_manager=None, enabled=True):
18 | return warehouse_manager.reserve(in_planes, out_planes, kernel_size=1, stride=stride, padding=0,
19 | warehouse_name=warehouse_name, enabled=enabled, bias=False)
20 |
21 |
22 | class BasicBlock(nn.Module):
23 | expansion = 1
24 |
25 | def __init__(self, inplanes, planes, stride=1, downsample=None,
26 | stage_idx=None, layer_idx=None, warehouse_manager=None, warehouse_handover=False, drop_path=0.):
27 | super(BasicBlock, self).__init__()
28 | conv1_stage_idx = max(stage_idx - 1 if warehouse_handover else stage_idx, 0)
29 | self.conv1 = kwconv3x3(inplanes, planes, stride,
30 | warehouse_name='stage{}_layer{}_conv{}'.format(conv1_stage_idx, layer_idx, 0),
31 | warehouse_manager=warehouse_manager)
32 | self.bn1 = nn.BatchNorm2d(planes)
33 | self.relu = nn.ReLU(inplace=True)
34 | layer_idx = 0 if warehouse_handover else layer_idx
35 | self.conv2 = kwconv3x3(planes, planes,
36 | warehouse_name='stage{}_layer{}_conv{}'.format(stage_idx, layer_idx, 1),
37 | warehouse_manager=warehouse_manager)
38 | self.bn2 = nn.BatchNorm2d(planes)
39 | self.downsample = downsample
40 | self.stride = stride
41 | self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
42 |
43 | def forward(self, x):
44 | identity = x
45 |
46 | out = self.conv1(x)
47 | out = self.bn1(out)
48 | out = self.relu(out)
49 |
50 | out = self.conv2(out)
51 | out = self.bn2(out)
52 |
53 | if self.downsample is not None:
54 | identity = self.downsample(x)
55 |
56 | out = identity + self.drop_path(out)
57 | out = self.relu(out)
58 | return out
59 |
60 |
61 | class Bottleneck(nn.Module):
62 | expansion = 4
63 |
64 | def __init__(self, inplanes, planes, stride=1, downsample=None,
65 | stage_idx=None, layer_idx=None, warehouse_manager=None, warehouse_handover=False, drop_path=0.):
66 | super(Bottleneck, self).__init__()
67 | conv1_stage_idx = stage_idx - 1 if warehouse_handover else stage_idx
68 | self.conv1 = kwconv1x1(inplanes, planes,
69 | warehouse_name='stage{}_layer{}_conv{}'.format(conv1_stage_idx, layer_idx, 0),
70 | warehouse_manager=warehouse_manager, enabled=(conv1_stage_idx >= 0))
71 | self.bn1 = nn.BatchNorm2d(planes)
72 | layer_idx = 0 if warehouse_handover else layer_idx
73 | self.conv2 = kwconv3x3(planes, planes, stride,
74 | warehouse_name='stage{}_layer{}_conv{}'.format(stage_idx, layer_idx, 1),
75 | warehouse_manager=warehouse_manager)
76 | self.bn2 = nn.BatchNorm2d(planes)
77 | self.conv3 = kwconv1x1(planes, planes * self.expansion,
78 | warehouse_name='stage{}_layer{}_conv{}'.format(stage_idx, layer_idx, 2),
79 | warehouse_manager=warehouse_manager)
80 | self.bn3 = nn.BatchNorm2d(planes * self.expansion)
81 | self.relu = nn.ReLU(inplace=True)
82 | self.downsample = downsample
83 | self.stride = stride
84 | self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
85 |
86 | def forward(self, x):
87 | identity = x
88 |
89 | out = self.conv1(x)
90 | out = self.bn1(out)
91 | out = self.relu(out)
92 |
93 | out = self.conv2(out)
94 | out = self.bn2(out)
95 | out = self.relu(out)
96 |
97 | out = self.conv3(out)
98 | out = self.bn3(out)
99 |
100 | if self.downsample is not None:
101 | identity = self.downsample(x)
102 |
103 | out = identity + self.drop_path(out)
104 | out = self.relu(out)
105 | return out
106 |
107 |
108 | @BACKBONES.register_module()
109 | class KW_ResNet(nn.Module):
110 | arch_settings = {
111 | 18: (BasicBlock, (2, 2, 2, 2)),
112 | 34: (BasicBlock, (3, 4, 6, 3)),
113 | 50: (Bottleneck, (3, 4, 6, 3)),
114 | 101: (Bottleneck, (3, 4, 23, 3)),
115 | 152: (Bottleneck, (3, 8, 36, 3))
116 | }
117 |
118 | def __init__(self, depth, num_classes=1000, dropout=0.1, reduction=0.0625, cell_num_ratio=1, cell_inplane_ratio=1,
119 | cell_outplane_ratio=1, sharing_range=('layer', 'conv'), drop_path_rate=0.1, frozen_stages=0,
120 | out_indices=(0, 1, 2, 3), norm_eval=True, **kwargs):
121 | super(KW_ResNet, self).__init__()
122 | self.warehouse_manager = Warehouse_Manager(reduction, cell_num_ratio, cell_inplane_ratio, cell_outplane_ratio,
123 | sharing_range)
124 | block, layers = self.arch_settings[depth]
125 | self.inplanes = 64
126 | self.layer_idx = 0
127 | self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False)
128 | self.bn1 = nn.BatchNorm2d(self.inplanes)
129 | self.relu = nn.ReLU(inplace=True)
130 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
131 | self.layer1 = self._make_layer(block, 64, layers[0],
132 | stage_idx=0, warehouse_manager=self.warehouse_manager, drop_path=drop_path_rate)
133 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
134 | stage_idx=1, warehouse_manager=self.warehouse_manager, drop_path=drop_path_rate)
135 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
136 | stage_idx=2, warehouse_manager=self.warehouse_manager, drop_path=drop_path_rate)
137 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
138 | stage_idx=3, warehouse_manager=self.warehouse_manager, drop_path=drop_path_rate)
139 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
140 | self.fc = nn.Linear(512 * block.expansion, num_classes)
141 |
142 | for m in self.modules():
143 | if isinstance(m, nn.Conv2d):
144 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
145 | elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
146 | nn.init.constant_(m.weight, 1)
147 | nn.init.constant_(m.bias, 0)
148 |
149 | self.warehouse_manager.store()
150 | self.warehouse_manager.allocate(self)
151 |
152 | self.frozen_stages = frozen_stages
153 | self.out_indices = out_indices
154 | self.norm_eval = norm_eval
155 | self.net_update_temperature(0)
156 |
157 | def _make_layer(self, block, planes, blocks, stride=1, stage_idx=-1, warehouse_manager=None, drop_path=0.):
158 | downsample = None
159 | if stride != 1 or self.inplanes != planes * block.expansion:
160 | downsample = nn.Sequential(
161 | warehouse_manager.reserve(
162 | self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, padding=0,
163 | warehouse_name='stage{}_layer{}_conv{}'.format(stage_idx-1, self.layer_idx + 1, 0),
164 | enabled=(stride != 1), bias=False),
165 | nn.BatchNorm2d(planes * block.expansion),
166 | )
167 |
168 | layers = []
169 | layers.append(block(self.inplanes, planes, stride, downsample, stage_idx=stage_idx, layer_idx=self.layer_idx,
170 | warehouse_manager=warehouse_manager, warehouse_handover=True, drop_path=drop_path))
171 | self.layer_idx = 1
172 | self.inplanes = planes * block.expansion
173 | for idx in range(1, blocks):
174 | layers.append(block(self.inplanes, planes, stage_idx=stage_idx, layer_idx=self.layer_idx,
175 | warehouse_manager=warehouse_manager, drop_path=drop_path))
176 | self.layer_idx += 1
177 | return nn.Sequential(*layers)
178 |
179 | def net_update_temperature(self, temp):
180 | for m in self.modules():
181 | if hasattr(m, "update_temperature"):
182 | m.update_temperature(temp)
183 |
184 | def _freeze_stages(self):
185 | if self.frozen_stages >= 0:
186 | self.bn1.eval()
187 | for m in [self.conv1, self.bn1]:
188 | for param in m.parameters():
189 | param.requires_grad = False
190 |
191 | for i in range(1, self.frozen_stages + 1):
192 | m = getattr(self, f'layer{i}')
193 | m.eval()
194 | for param in m.parameters():
195 | param.requires_grad = False
196 |
197 | def train(self, mode=True):
198 | """Convert the model into training mode while keep normalization layer
199 | freezed."""
200 | super(KW_ResNet, self).train(mode)
201 | self._freeze_stages()
202 |
203 | if mode and self.norm_eval:
204 | for m in self.modules():
205 | # trick: eval have effect on BatchNorm only
206 | if isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d)):
207 | m.eval()
208 |
209 | def init_weights(self, pretrained=None):
210 | """Initialize the weights in backbone.
211 | Args:
212 | pretrained (str, optional): Path to pre-trained weights.
213 | Defaults to None.
214 | """
215 |
216 | for m in self.modules():
217 | if isinstance(m, nn.Conv2d):
218 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
219 | if m.bias is not None:
220 | nn.init.zeros_(m.bias)
221 | elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
222 | nn.init.constant_(m.weight, 1)
223 | nn.init.constant_(m.bias, 0)
224 | elif isinstance(m, nn.Linear):
225 | nn.init.normal_(m.weight, 0, 0.01)
226 | if m.bias is not None:
227 | nn.init.zeros_(m.bias)
228 |
229 | if isinstance(pretrained, str):
230 | logger = get_root_logger()
231 | load_checkpoint(self, pretrained, strict=True, logger=logger, map_location='cpu')
232 | elif pretrained is not None:
233 | raise TypeError('pretrained must be a str or None')
234 |
235 | def _forward_impl(self, x):
236 | x = self.conv1(x)
237 | x = self.bn1(x)
238 | x = self.relu(x)
239 | x = self.maxpool(x)
240 | outs = []
241 | for idx in range(4):
242 | layer = getattr(self, f'layer{idx + 1}')
243 | x = layer(x)
244 | if idx in self.out_indices:
245 | outs.append(x)
246 | return tuple(outs)
247 |
248 |
249 | def forward(self, x):
250 | return self._forward_impl(x)
251 |
252 |
--------------------------------------------------------------------------------
/detection/mmdet/models/backbones/mobilenetv2.py:
--------------------------------------------------------------------------------
1 | from torch import nn
2 | from timm.models.registry import register_model
3 |
4 | from ..builder import BACKBONES
5 | from mmcv.runner import load_checkpoint
6 | from mmdet.utils import get_root_logger
7 |
8 | def _make_divisible(v, divisor, min_value=None):
9 | """
10 | This function is taken from the original tf repo.
11 | It ensures that all layers have a channel number that is divisible by 8
12 | It can be seen here:
13 | https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
14 | :param v:
15 | :param divisor:
16 | :param min_value:
17 | :return:
18 | """
19 | if min_value is None:
20 | min_value = divisor
21 | new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
22 | # Make sure that round down does not go down by more than 10%.
23 | if new_v < 0.9 * v:
24 | new_v += divisor
25 | return new_v
26 |
27 |
28 | class ConvBNReLU(nn.Sequential):
29 | def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1, norm_layer=nn.BatchNorm2d):
30 | padding = (kernel_size - 1) // 2
31 | super(ConvBNReLU, self).__init__(
32 | nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False),
33 | norm_layer(out_planes),
34 | nn.ReLU6(inplace=True)
35 | )
36 |
37 |
38 | class InvertedResidual(nn.Module):
39 | def __init__(self, inp, oup, stride, expand_ratio, norm_layer=nn.BatchNorm2d):
40 | super(InvertedResidual, self).__init__()
41 | self.stride = stride
42 | hidden_dim = int(round(inp * expand_ratio))
43 | self.use_res_connect = self.stride == 1 and inp == oup
44 |
45 | layers = []
46 | if expand_ratio != 1:
47 | # pw
48 | layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1, norm_layer=norm_layer))
49 | layers.extend([
50 | # dw
51 | ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim, norm_layer=norm_layer),
52 | # pw-linear
53 | nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
54 | norm_layer(oup),
55 | ])
56 | self.conv = nn.Sequential(*layers)
57 |
58 | def forward(self, x):
59 | if self.use_res_connect:
60 | return x + self.conv(x)
61 | else:
62 | return self.conv(x)
63 |
64 |
65 | @BACKBONES.register_module()
66 | class MobileNetV2(nn.Module):
67 | def __init__(self,
68 | num_classes=1000,
69 | width_mult=1.0,
70 | inverted_residual_setting=None,
71 | round_nearest=8,
72 | block=InvertedResidual,
73 | norm_layer=nn.BatchNorm2d,
74 | dropout=0.0,
75 | frozen_stages=0,
76 | out_indices=(0, 1, 2, 3),
77 | norm_eval=True,
78 | **kwargs):
79 | """gr
80 | MobileNet V2 main class
81 |
82 | Args:
83 | num_classes (int): Number of classes
84 | width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount
85 | inverted_residual_setting: Network structure
86 | round_nearest (int): Round the number of channels in each layer to be a multiple of this number
87 | Set to 1 to turn off rounding
88 | block: Module specifying inverted residual building block for mobilenet
89 | norm_layer: Module specifying the normalization layer to use
90 |
91 | """
92 | super(MobileNetV2, self).__init__()
93 |
94 | input_channel = 32
95 | last_channel = 1280
96 | self.stage_idx = [1, 3, 6, 13, 18]
97 | self.frozen_stages = frozen_stages
98 | self.out_indices = [self.stage_idx[x] for x in out_indices]
99 | self.norm_eval = norm_eval
100 |
101 | if inverted_residual_setting is None:
102 | inverted_residual_setting = [
103 | # t, c, n, s
104 | [1, 16, 1, 1],
105 | [6, 24, 2, 2],
106 | [6, 32, 3, 2],
107 | [6, 64, 4, 2],
108 | [6, 96, 3, 1],
109 | [6, 160, 3, 2],
110 | [6, 320, 1, 1],
111 | ]
112 |
113 | # only check the first element, assuming user knows t,c,n,s are required
114 | if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4:
115 | raise ValueError("inverted_residual_setting should be non-empty "
116 | "or a 4-element list, got {}".format(inverted_residual_setting))
117 |
118 | # building first layer
119 | input_channel = _make_divisible(input_channel * width_mult, round_nearest)
120 | self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest)
121 | features = [ConvBNReLU(3, input_channel, stride=2, norm_layer=norm_layer)]
122 | # building inverted residual blocks
123 | for t, c, n, s in inverted_residual_setting:
124 | output_channel = _make_divisible(c * width_mult, round_nearest)
125 | for i in range(n):
126 | stride = s if i == 0 else 1
127 | features.append(block(input_channel, output_channel, stride, expand_ratio=t, norm_layer=norm_layer))
128 | input_channel = output_channel
129 | # building last several layers
130 | features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1, norm_layer=norm_layer))
131 | # make it nn.Sequential
132 | self.features = nn.Sequential(*features)
133 | # building classifier
134 | self.classifier = nn.Sequential(
135 | nn.Dropout(dropout),
136 | nn.Linear(self.last_channel, num_classes),
137 | )
138 |
139 | # weight initialization
140 | for m in self.modules():
141 | if isinstance(m, nn.Conv2d):
142 | nn.init.kaiming_normal_(m.weight, mode='fan_out')
143 | if m.bias is not None:
144 | nn.init.zeros_(m.bias)
145 | elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
146 | nn.init.ones_(m.weight)
147 | nn.init.zeros_(m.bias)
148 | elif isinstance(m, nn.Linear):
149 | nn.init.normal_(m.weight, 0, 0.01)
150 | nn.init.zeros_(m.bias)
151 |
152 | def _freeze_stages(self):
153 | if self.frozen_stages >= 0:
154 | for i in range(self.stage_idx[self.frozen_stages] + 1):
155 | m = self.features[i]
156 | m.eval()
157 | for param in m.parameters():
158 | param.requires_grad = False
159 |
160 | def train(self, mode=True):
161 | """Convert the model into training mode while keep normalization layer
162 | freezed."""
163 | super(MobileNetV2, self).train(mode)
164 | self._freeze_stages()
165 |
166 | if mode and self.norm_eval:
167 | for m in self.modules():
168 | # trick: eval have effect on BatchNorm only
169 | if isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d)):
170 | m.eval()
171 |
172 | def init_weights(self, pretrained=None):
173 | """Initialize the weights in backbone.
174 | Args:
175 | pretrained (str, optional): Path to pre-trained weights.
176 | Defaults to None.
177 | """
178 |
179 | for m in self.modules():
180 | if isinstance(m, nn.Conv2d):
181 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
182 | if m.bias is not None:
183 | nn.init.zeros_(m.bias)
184 | elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
185 | nn.init.constant_(m.weight, 1)
186 | nn.init.constant_(m.bias, 0)
187 | elif isinstance(m, nn.Linear):
188 | nn.init.normal_(m.weight, 0, 0.01)
189 | if m.bias is not None:
190 | nn.init.zeros_(m.bias)
191 |
192 | if isinstance(pretrained, str):
193 | logger = get_root_logger()
194 | load_checkpoint(self, pretrained, strict=False, logger=logger, map_location='cpu')
195 | elif pretrained is not None:
196 | raise TypeError('pretrained must be a str or None')
197 |
198 | def _forward_impl(self, x):
199 | outs = []
200 | for idx, layer in enumerate(self.features):
201 | x = layer(x)
202 | if idx in self.out_indices:
203 | outs.append(x)
204 | return outs
205 |
206 | def forward(self, x):
207 | return self._forward_impl(x)
--------------------------------------------------------------------------------
/detection/mmdet/models/backbones/resnet.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | from ..builder import BACKBONES
3 | from mmcv.runner import load_checkpoint
4 | from mmdet.utils import get_root_logger
5 |
6 | __all__ = ['ResNet']
7 |
8 |
9 | def conv3x3(in_planes, out_planes, stride=1):
10 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
11 | padding=1, bias=False)
12 |
13 |
14 | def conv1x1(in_planes, out_planes, stride=1):
15 | return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
16 |
17 |
18 | class BasicBlock(nn.Module):
19 | expansion = 1
20 |
21 | def __init__(self, inplanes, planes, stride=1, downsample=None):
22 | super(BasicBlock, self).__init__()
23 | self.conv1 = conv3x3(inplanes, planes, stride)
24 | self.bn1 = nn.BatchNorm2d(planes)
25 | self.relu = nn.ReLU(inplace=True)
26 | self.conv2 = conv3x3(planes, planes)
27 | self.bn2 = nn.BatchNorm2d(planes)
28 | self.downsample = downsample
29 | self.stride = stride
30 |
31 | def forward(self, x):
32 | identity = x
33 |
34 | out = self.conv1(x)
35 | out = self.bn1(out)
36 | out = self.relu(out)
37 |
38 | out = self.conv2(out)
39 | out = self.bn2(out)
40 |
41 | if self.downsample is not None:
42 | identity = self.downsample(x)
43 |
44 | out += identity
45 | out = self.relu(out)
46 | return out
47 |
48 |
49 | class Bottleneck(nn.Module):
50 | expansion = 4
51 |
52 | def __init__(self, inplanes, planes, stride=1, downsample=None):
53 | super(Bottleneck, self).__init__()
54 | self.conv1 = conv1x1(inplanes, planes)
55 | self.bn1 = nn.BatchNorm2d(planes)
56 | self.conv2 = conv3x3(planes, planes, stride)
57 | self.bn2 = nn.BatchNorm2d(planes)
58 | self.conv3 = conv1x1(planes, planes * self.expansion)
59 | self.bn3 = nn.BatchNorm2d(planes * self.expansion)
60 | self.relu = nn.ReLU(inplace=True)
61 | self.downsample = downsample
62 | self.stride = stride
63 |
64 | def forward(self, x):
65 | identity = x
66 |
67 | out = self.conv1(x)
68 | out = self.bn1(out)
69 | out = self.relu(out)
70 |
71 | out = self.conv2(out)
72 | out = self.bn2(out)
73 | out = self.relu(out)
74 |
75 | out = self.conv3(out)
76 | out = self.bn3(out)
77 |
78 | if self.downsample is not None:
79 | identity = self.downsample(x)
80 |
81 | out += identity
82 | out = self.relu(out)
83 | return out
84 |
85 |
86 | @BACKBONES.register_module()
87 | class ResNet(nn.Module):
88 | arch_settings = {
89 | 18: (BasicBlock, (2, 2, 2, 2)),
90 | 34: (BasicBlock, (3, 4, 6, 3)),
91 | 50: (Bottleneck, (3, 4, 6, 3)),
92 | 101: (Bottleneck, (3, 4, 23, 3)),
93 | 152: (Bottleneck, (3, 8, 36, 3))
94 | }
95 |
96 | def __init__(self, depth, num_classes=1000,
97 | frozen_stages=0, out_indices=(0, 1, 2, 3), norm_eval=True, **kwargs):
98 | super(ResNet, self).__init__()
99 | block, layers = self.arch_settings[depth]
100 | self.inplanes = 64
101 | self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
102 | bias=False)
103 | self.bn1 = nn.BatchNorm2d(self.inplanes)
104 | self.relu = nn.ReLU(inplace=True)
105 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
106 | self.layer1 = self._make_layer(block, 64, layers[0])
107 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
108 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
109 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
110 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
111 | self.fc = nn.Linear(512 * block.expansion, num_classes)
112 |
113 | self.frozen_stages = frozen_stages
114 | self.out_indices = out_indices
115 | self.norm_eval = norm_eval
116 |
117 | def _make_layer(self, block, planes, blocks, stride=1):
118 | downsample = None
119 | if stride != 1 or self.inplanes != planes * block.expansion:
120 | downsample = nn.Sequential(
121 | conv1x1(self.inplanes, planes * block.expansion, stride),
122 | nn.BatchNorm2d(planes * block.expansion),
123 | )
124 |
125 | layers = []
126 | layers.append(block(self.inplanes, planes, stride, downsample))
127 | self.inplanes = planes * block.expansion
128 | for _ in range(1, blocks):
129 | layers.append(block(self.inplanes, planes))
130 |
131 | return nn.Sequential(*layers)
132 |
133 | def _freeze_stages(self):
134 | if self.frozen_stages >= 0:
135 | self.bn1.eval()
136 | for m in [self.conv1, self.bn1]:
137 | for param in m.parameters():
138 | param.requires_grad = False
139 |
140 | for i in range(1, self.frozen_stages + 1):
141 | m = getattr(self, f'layer{i}')
142 | m.eval()
143 | for param in m.parameters():
144 | param.requires_grad = False
145 |
146 | def train(self, mode=True):
147 | """Convert the model into training mode while keep normalization layer
148 | freezed."""
149 | super(ResNet, self).train(mode)
150 | self._freeze_stages()
151 |
152 | if mode and self.norm_eval:
153 | for m in self.modules():
154 | # trick: eval have effect on BatchNorm only
155 | if isinstance(m, nn.BatchNorm2d):
156 | m.eval()
157 |
158 | def init_weights(self, pretrained=None):
159 | """Initialize the weights in backbone.
160 | Args:
161 | pretrained (str, optional): Path to pre-trained weights.
162 | Defaults to None.
163 | """
164 | for m in self.modules():
165 | if isinstance(m, nn.Conv2d):
166 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
167 | if m.bias is not None:
168 | nn.init.zeros_(m.bias)
169 | elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
170 | nn.init.constant_(m.weight, 1)
171 | nn.init.constant_(m.bias, 0)
172 | elif isinstance(m, nn.Linear):
173 | nn.init.normal_(m.weight, 0, 0.01)
174 | nn.init.zeros_(m.bias)
175 |
176 | if isinstance(pretrained, str):
177 | logger = get_root_logger()
178 | load_checkpoint(self, pretrained, strict=True, logger=logger, map_location='cpu')
179 | elif pretrained is not None:
180 | raise TypeError('pretrained must be a str or None')
181 |
182 | def _forward_impl(self, x):
183 | x = self.conv1(x)
184 | x = self.bn1(x)
185 | x = self.relu(x)
186 | x = self.maxpool(x)
187 |
188 | outs = []
189 | for idx in range(4):
190 | layer = getattr(self, f'layer{idx + 1}')
191 | x = layer(x)
192 | if idx in self.out_indices:
193 | outs.append(x)
194 | return outs
195 |
196 | def forward(self, x):
197 | return self._forward_impl(x)
198 |
--------------------------------------------------------------------------------
/engine.py:
--------------------------------------------------------------------------------
1 | import math
2 | from typing import Iterable, Optional
3 | import torch
4 | from timm.data import Mixup
5 | from timm.utils import accuracy, ModelEma
6 |
7 | import utils
8 |
9 |
10 | def get_temperature(iteration, epoch, iter_per_epoch, temp_epoch=20, temp_init_value=30.0, temp_end=0.0):
11 | total_iter = iter_per_epoch * temp_epoch
12 | current_iter = iter_per_epoch * epoch + iteration
13 | temperature = temp_end + max(0, (temp_init_value - temp_end) * ((total_iter - current_iter) / max(1.0, total_iter)))
14 | return temperature
15 |
16 |
17 | def train_one_epoch(model: torch.nn.Module, criterion: torch.nn.Module,
18 | data_loader: Iterable, optimizer: torch.optim.Optimizer,
19 | device: torch.device, epoch: int, loss_scaler, max_norm: float = 0,
20 | model_ema: Optional[ModelEma] = None, mixup_fn: Optional[Mixup] = None, log_writer=None,
21 | wandb_logger=None, start_steps=None, lr_schedule_values=None, wd_schedule_values=None,
22 | num_training_steps_per_epoch=None, update_freq=None, use_amp=False, args=None):
23 | model.train(True)
24 | metric_logger = utils.MetricLogger(delimiter=" ")
25 | metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
26 | metric_logger.add_meter('min_lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
27 | header = 'Epoch: [{}]'.format(epoch)
28 | print_freq = 10
29 |
30 | optimizer.zero_grad()
31 |
32 | for data_iter_step, (samples, targets) in enumerate(metric_logger.log_every(data_loader, print_freq, header)):
33 | step = data_iter_step // update_freq
34 | if step >= num_training_steps_per_epoch:
35 | continue
36 | it = start_steps + step # global training iteration
37 | # Update LR & WD for the first acc
38 | if lr_schedule_values is not None or wd_schedule_values is not None and data_iter_step % update_freq == 0:
39 | for i, param_group in enumerate(optimizer.param_groups):
40 | if lr_schedule_values is not None:
41 | param_group["lr"] = lr_schedule_values[it] * param_group["lr_scale"]
42 | if wd_schedule_values is not None and param_group["weight_decay"] > 0:
43 | param_group["weight_decay"] = wd_schedule_values[it]
44 |
45 | if hasattr(model.module, 'net_update_temperature'):
46 | temp = get_temperature(data_iter_step + 1, epoch, len(data_loader),
47 | temp_epoch=args.temp_epoch, temp_init_value=args.temp_init_value)
48 | model.module.net_update_temperature(temp)
49 |
50 | samples = samples.to(device, non_blocking=True)
51 | targets = targets.to(device, non_blocking=True)
52 |
53 | if mixup_fn is not None:
54 | samples, targets = mixup_fn(samples, targets)
55 |
56 | if use_amp:
57 | with torch.cuda.amp.autocast():
58 | output = model(samples)
59 | loss = criterion(output, targets)
60 | else: # full precision
61 | output = model(samples)
62 | loss = criterion(output, targets)
63 |
64 | loss_value = loss.item()
65 |
66 | if not math.isfinite(loss_value): # this could trigger if using AMP
67 | print("Loss is {}, stopping training".format(loss_value))
68 | assert math.isfinite(loss_value)
69 |
70 | if use_amp:
71 | # this attribute is added by timm on one optimizer (adahessian)
72 | is_second_order = hasattr(optimizer, 'is_second_order') and optimizer.is_second_order
73 | loss /= update_freq
74 | grad_norm = loss_scaler(loss, optimizer, clip_grad=max_norm,
75 | parameters=model.parameters(), create_graph=is_second_order,
76 | update_grad=(data_iter_step + 1) % update_freq == 0)
77 | if (data_iter_step + 1) % update_freq == 0:
78 | optimizer.zero_grad()
79 | if model_ema is not None:
80 | model_ema.update(model)
81 | else: # full precision
82 | loss /= update_freq
83 | loss.backward()
84 | if (data_iter_step + 1) % update_freq == 0:
85 | optimizer.step()
86 | optimizer.zero_grad()
87 | if model_ema is not None:
88 | model_ema.update(model)
89 |
90 | torch.cuda.synchronize()
91 |
92 | if mixup_fn is None:
93 | class_acc = (output.max(-1)[-1] == targets).float().mean()
94 | else:
95 | class_acc = None
96 | metric_logger.update(loss=loss_value)
97 | metric_logger.update(class_acc=class_acc)
98 | min_lr = 10.
99 | max_lr = 0.
100 | for group in optimizer.param_groups:
101 | min_lr = min(min_lr, group["lr"])
102 | max_lr = max(max_lr, group["lr"])
103 |
104 | metric_logger.update(lr=max_lr)
105 | metric_logger.update(min_lr=min_lr)
106 | weight_decay_value = None
107 | for group in optimizer.param_groups:
108 | if group["weight_decay"] > 0:
109 | weight_decay_value = group["weight_decay"]
110 | metric_logger.update(weight_decay=weight_decay_value)
111 | if use_amp:
112 | metric_logger.update(grad_norm=grad_norm)
113 |
114 | if log_writer is not None:
115 | log_writer.update(loss=loss_value, head="loss")
116 | log_writer.update(class_acc=class_acc, head="loss")
117 | log_writer.update(lr=max_lr, head="opt")
118 | log_writer.update(min_lr=min_lr, head="opt")
119 | log_writer.update(weight_decay=weight_decay_value, head="opt")
120 | if use_amp:
121 | log_writer.update(grad_norm=grad_norm, head="opt")
122 | log_writer.set_step()
123 |
124 | if wandb_logger:
125 | wandb_logger._wandb.log({
126 | 'Rank-0 Batch Wise/train_loss': loss_value,
127 | 'Rank-0 Batch Wise/train_max_lr': max_lr,
128 | 'Rank-0 Batch Wise/train_min_lr': min_lr
129 | }, commit=False)
130 | if class_acc:
131 | wandb_logger._wandb.log({'Rank-0 Batch Wise/train_class_acc': class_acc}, commit=False)
132 | if use_amp:
133 | wandb_logger._wandb.log({'Rank-0 Batch Wise/train_grad_norm': grad_norm}, commit=False)
134 | wandb_logger._wandb.log({'Rank-0 Batch Wise/global_train_step': it})
135 |
136 |
137 | # gather the stats from all processes
138 | metric_logger.synchronize_between_processes()
139 | print("Averaged stats:", metric_logger)
140 | return {k: meter.global_avg for k, meter in metric_logger.meters.items()}
141 |
142 |
143 | @torch.no_grad()
144 | def evaluate(data_loader, model, device, use_amp=False):
145 | criterion = torch.nn.CrossEntropyLoss()
146 |
147 | metric_logger = utils.MetricLogger(delimiter=" ")
148 | header = 'Test:'
149 |
150 | # switch to evaluation mode
151 | model.eval()
152 | for batch in metric_logger.log_every(data_loader, 10, header):
153 | images = batch[0]
154 | target = batch[-1]
155 |
156 | images = images.to(device, non_blocking=True)
157 | target = target.to(device, non_blocking=True)
158 |
159 | # compute output
160 | if use_amp:
161 | with torch.cuda.amp.autocast():
162 | output = model(images)
163 | loss = criterion(output, target)
164 | else:
165 | output = model(images)
166 | loss = criterion(output, target)
167 |
168 | acc1, acc5 = accuracy(output, target, topk=(1, 5))
169 |
170 | batch_size = images.shape[0]
171 | metric_logger.update(loss=loss.item())
172 | metric_logger.meters['acc1'].update(acc1.item(), n=batch_size)
173 | metric_logger.meters['acc5'].update(acc5.item(), n=batch_size)
174 | # gather the stats from all processes
175 | metric_logger.synchronize_between_processes()
176 | print('* Acc@1 {top1.global_avg:.3f} Acc@5 {top5.global_avg:.3f} loss {losses.global_avg:.3f}'
177 | .format(top1=metric_logger.acc1, top5=metric_logger.acc5, losses=metric_logger.loss))
178 |
179 | return {k: meter.global_avg for k, meter in metric_logger.meters.items()}
180 |
--------------------------------------------------------------------------------
/fig/Fig_Architecture.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OSVAI/KernelWarehouse/a4ac17d88aae6f29c9f43635c8d2f3bd41187980/fig/Fig_Architecture.pdf
--------------------------------------------------------------------------------
/fig/Fig_Architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OSVAI/KernelWarehouse/a4ac17d88aae6f29c9f43635c8d2f3bd41187980/fig/Fig_Architecture.png
--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .resnet import resnet18, resnet50, ResNet
2 | from .kw_resnet import kw_resnet18, kw_resnet50, KW_ResNet
3 | from .convnext import convnext_tiny
4 | from .kw_convnext import kw_convnext_tiny
5 | from . mobilenetv2 import mobilenetv2_100, mobilenetv2_050
6 | from .kw_mobilenetv2 import kw_mobilenetv2_100, kw_mobilenetv2_050
7 | from .kw1d2x_mobilenetv2 import kw1d2x_mobilenetv2_100, kw1d2x_mobilenetv2_050
--------------------------------------------------------------------------------
/models/convnext.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | from timm.models.layers import trunc_normal_, DropPath
5 | from timm.models.registry import register_model
6 |
7 |
8 | class Block(nn.Module):
9 | r""" ConvNeXt Block. There are two equivalent implementations:
10 | (1) DwConv -> LayerNorm (channels_first) -> 1x1 Conv -> GELU -> 1x1 Conv; all in (N, C, H, W)
11 | (2) DwConv -> Permute to (N, H, W, C); LayerNorm (channels_last) -> Linear -> GELU -> Linear; Permute back
12 | We use (2) as we find it slightly faster in PyTorch
13 |
14 | Args:
15 | dim (int): Number of input channels.
16 | drop_path (float): Stochastic depth rate. Default: 0.0
17 | layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
18 | """
19 | def __init__(self, dim, drop_path=0., layer_scale_init_value=1e-6):
20 | super().__init__()
21 | self.dwconv = nn.Conv2d(dim, dim, kernel_size=7, padding=3, groups=dim) # depthwise conv
22 | self.norm = LayerNorm(dim, eps=1e-6)
23 | self.pwconv1 = nn.Linear(dim, 4 * dim) # pointwise/1x1 convs, implemented with linear layers
24 | self.act = nn.GELU()
25 | self.pwconv2 = nn.Linear(4 * dim, dim)
26 | self.gamma = nn.Parameter(layer_scale_init_value * torch.ones((dim)),
27 | requires_grad=True) if layer_scale_init_value > 0 else None
28 | self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
29 |
30 | def forward(self, x):
31 | input = x
32 | x = self.dwconv(x)
33 | x = x.permute(0, 2, 3, 1) # (N, C, H, W) -> (N, H, W, C)
34 | x = self.norm(x)
35 | x = self.pwconv1(x)
36 | x = self.act(x)
37 | x = self.pwconv2(x)
38 | if self.gamma is not None:
39 | x = self.gamma * x
40 | x = x.permute(0, 3, 1, 2) # (N, H, W, C) -> (N, C, H, W)
41 | x = input + self.drop_path(x)
42 | return x
43 |
44 | @register_model
45 | class ConvNeXt(nn.Module):
46 | r""" ConvNeXt
47 | A PyTorch impl of : `A ConvNet for the 2020s` -
48 | https://arxiv.org/pdf/2201.03545.pdf
49 |
50 | Args:
51 | in_chans (int): Number of input image channels. Default: 3
52 | num_classes (int): Number of classes for classification head. Default: 1000
53 | depths (tuple(int)): Number of blocks at each stage. Default: [3, 3, 9, 3]
54 | dims (int): Feature dimension at each stage. Default: [96, 192, 384, 768]
55 | drop_path_rate (float): Stochastic depth rate. Default: 0.
56 | layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
57 | head_init_scale (float): Init scaling value for classifier weights and biases. Default: 1.
58 | """
59 | def __init__(self, in_chans=3, num_classes=1000,
60 | depths=[3, 3, 9, 3], dims=[96, 192, 384, 768], drop_path_rate=0.,
61 | layer_scale_init_value=1e-6, head_init_scale=1., **kwargs
62 | ):
63 | super().__init__()
64 |
65 | self.downsample_layers = nn.ModuleList() # stem and 3 intermediate downsampling conv layers
66 | stem = nn.Sequential(
67 | nn.Conv2d(in_chans, dims[0], kernel_size=4, stride=4),
68 | LayerNorm(dims[0], eps=1e-6, data_format="channels_first")
69 | )
70 | self.downsample_layers.append(stem)
71 | for i in range(3):
72 | downsample_layer = nn.Sequential(
73 | LayerNorm(dims[i], eps=1e-6, data_format="channels_first"),
74 | nn.Conv2d(dims[i], dims[i+1], kernel_size=2, stride=2),
75 | )
76 | self.downsample_layers.append(downsample_layer)
77 |
78 | self.stages = nn.ModuleList() # 4 feature resolution stages, each consisting of multiple residual blocks
79 | dp_rates=[x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]
80 | cur = 0
81 | for i in range(4):
82 | stage = nn.Sequential(
83 | *[Block(dim=dims[i], drop_path=dp_rates[cur + j],
84 | layer_scale_init_value=layer_scale_init_value) for j in range(depths[i])]
85 | )
86 | self.stages.append(stage)
87 | cur += depths[i]
88 |
89 | self.norm = nn.LayerNorm(dims[-1], eps=1e-6) # final norm layer
90 | self.head = nn.Linear(dims[-1], num_classes)
91 |
92 | self.apply(self._init_weights)
93 | self.head.weight.data.mul_(head_init_scale)
94 | self.head.bias.data.mul_(head_init_scale)
95 |
96 | def _init_weights(self, m):
97 | if isinstance(m, (nn.Conv2d, nn.Linear)):
98 | trunc_normal_(m.weight, std=.02)
99 | nn.init.constant_(m.bias, 0)
100 |
101 | def forward_features(self, x):
102 | for i in range(4):
103 | x = self.downsample_layers[i](x)
104 | x = self.stages[i](x)
105 | return self.norm(x.mean([-2, -1])) # global average pooling, (N, C, H, W) -> (N, C)
106 |
107 | def forward(self, x):
108 | x = self.forward_features(x)
109 | x = self.head(x)
110 | return x
111 |
112 |
113 | class LayerNorm(nn.Module):
114 | r""" LayerNorm that supports two data formats: channels_last (default) or channels_first.
115 | The ordering of the dimensions in the inputs. channels_last corresponds to inputs with
116 | shape (batch_size, height, width, channels) while channels_first corresponds to inputs
117 | with shape (batch_size, channels, height, width).
118 | """
119 | def __init__(self, normalized_shape, eps=1e-6, data_format="channels_last"):
120 | super().__init__()
121 | self.weight = nn.Parameter(torch.ones(normalized_shape))
122 | self.bias = nn.Parameter(torch.zeros(normalized_shape))
123 | self.eps = eps
124 | self.data_format = data_format
125 | if self.data_format not in ["channels_last", "channels_first"]:
126 | raise NotImplementedError
127 | self.normalized_shape = (normalized_shape, )
128 |
129 | def forward(self, x):
130 | if self.data_format == "channels_last":
131 | return F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps)
132 | elif self.data_format == "channels_first":
133 | u = x.mean(1, keepdim=True)
134 | s = (x - u).pow(2).mean(1, keepdim=True)
135 | x = (x - u) / torch.sqrt(s + self.eps)
136 | x = self.weight[:, None, None] * x + self.bias[:, None, None]
137 | return x
138 |
139 |
140 | @register_model
141 | def convnext_tiny(**kwargs):
142 | model = ConvNeXt(depths=[3, 3, 9, 3], dims=[96, 192, 384, 768], **kwargs)
143 | return model
144 |
145 |
--------------------------------------------------------------------------------
/models/kw1d2x_mobilenetv2.py:
--------------------------------------------------------------------------------
1 | from torch import nn
2 | from modules.kernel_warehouse import Warehouse_Manager
3 | from timm.models.registry import register_model
4 |
5 | def _make_divisible(v, divisor, min_value=None):
6 | """
7 | This function is taken from the original tf repo.
8 | It ensures that all layers have a channel number that is divisible by 8
9 | It can be seen here:
10 | https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
11 | :param v:
12 | :param divisor:
13 | :param min_value:
14 | :return:
15 | """
16 | if min_value is None:
17 | min_value = divisor
18 | new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
19 | # Make sure that round down does not go down by more than 10%.
20 | if new_v < 0.9 * v:
21 | new_v += divisor
22 | return new_v
23 |
24 |
25 | class ConvBNReLU(nn.Sequential):
26 | def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1, norm_layer=nn.BatchNorm2d,
27 | warehouse_name=None, warehouse_manager=None, enabled=True):
28 | padding = (kernel_size - 1) // 2
29 | super(ConvBNReLU, self).__init__(
30 | warehouse_manager.reserve(in_planes, out_planes, kernel_size, stride, padding=padding,
31 | groups=groups, bias=False, warehouse_name=warehouse_name, enabled=enabled),
32 | norm_layer(out_planes),
33 | nn.ReLU6(inplace=True)
34 | )
35 |
36 |
37 | class InvertedResidual(nn.Module):
38 | def __init__(self, inp, oup, stride, expand_ratio, norm_layer=nn.BatchNorm2d, stage_idx=None, layer_idx=None,
39 | warehouse_manager=None, pwlinear_enabled=True):
40 | super(InvertedResidual, self).__init__()
41 | self.stride = stride
42 | assert stride in [1, 2]
43 | hidden_dim = int(round(inp * expand_ratio))
44 | self.use_res_connect = self.stride == 1 and inp == oup
45 |
46 | layers = []
47 | if expand_ratio != 1:
48 | # pw
49 | layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1, norm_layer=norm_layer,
50 | warehouse_name='stage{}_layer{}_pwconv{}'.format(stage_idx, layer_idx, 0),
51 | warehouse_manager=warehouse_manager, enabled=pwlinear_enabled))
52 | layers.extend([
53 | # dw
54 | ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim, norm_layer=norm_layer,
55 | warehouse_name='stage{}_layer{}_dwconv{}'.format(stage_idx, layer_idx, 0),
56 | warehouse_manager=warehouse_manager),
57 | # pw-linear
58 | warehouse_manager.reserve(hidden_dim, oup, 1, 1, 0, bias=False,
59 | warehouse_name='stage{}_layer{}_pwconv{}'.format(stage_idx, layer_idx, 1),
60 | enabled=pwlinear_enabled),
61 | norm_layer(oup),
62 | ])
63 | self.conv = nn.Sequential(*layers)
64 |
65 | def forward(self, x):
66 | if self.use_res_connect:
67 | return x + self.conv(x)
68 | else:
69 | return self.conv(x)
70 |
71 |
72 | class KW1d2x_MobileNetV2(nn.Module):
73 | def __init__(self,
74 | num_classes=1000,
75 | width_mult=1.0,
76 | inverted_residual_setting=None,
77 | round_nearest=8,
78 | block=None,
79 | norm_layer=None,
80 | dropout=0.1,
81 | **kwargs):
82 | """gr
83 | MobileNet V2 main class
84 |
85 | Args:
86 | num_classes (int): Number of classes
87 | width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount
88 | inverted_residual_setting: Network structure
89 | round_nearest (int): Round the number of channels in each layer to be a multiple of this number
90 | Set to 1 to turn off rounding
91 | block: Module specifying inverted residual building block for mobilenet
92 | norm_layer: Module specifying the normalization layer to use
93 |
94 | """
95 |
96 | reduction = 0.03125
97 | cell_num_ratio = (1, 1, 1, 1, 1, 1, 0.5, 1, 0.5, 0.5)
98 | cell_inplane_ratio = (1, 1, 1, 1, 1, 1, 0.5, 1, 0.5, 0.025)
99 | cell_outplane_ratio = (1, 1, 1, 1, 1, 1, 0.5, 1, 0.5, 1)
100 | sharing_range = tuple("layer")
101 |
102 | super(KW1d2x_MobileNetV2, self).__init__()
103 | if block is None:
104 | block = InvertedResidual
105 |
106 | if norm_layer is None:
107 | norm_layer = nn.BatchNorm2d
108 |
109 | input_channel = 32
110 | last_channel = 1280
111 |
112 | if inverted_residual_setting is None:
113 | inverted_residual_setting = [
114 | # t, c, n, s
115 | [1, 16, 1, 1],
116 | [6, 24, 2, 2],
117 | [6, 32, 3, 2],
118 | [6, 64, 4, 2],
119 | [6, 96, 3, 1],
120 | [6, 160, 3, 2],
121 | [6, 320, 1, 1],
122 | ]
123 |
124 | kw_stage_setting = [1, 2, 3, 4, 5, 6, 6]
125 | pwlinear_enabled = [False, False, False, False, False, True, True]
126 |
127 | self.warehouse_manager = Warehouse_Manager(reduction, cell_num_ratio, cell_inplane_ratio,
128 | cell_outplane_ratio, sharing_range)
129 |
130 | # only check the first element, assuming user knows t,c,n,s are required
131 | if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4:
132 | raise ValueError("inverted_residual_setting should be non-empty "
133 | "or a 4-element list, got {}".format(inverted_residual_setting))
134 |
135 | # building first layer
136 | input_channel = _make_divisible(input_channel * width_mult, round_nearest)
137 | self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest)
138 | features = [ConvBNReLU(3, input_channel, stride=2, norm_layer=norm_layer,
139 | warehouse_manager=self.warehouse_manager, warehouse_name='stage0_conv0')]
140 |
141 | layer_idx = 0
142 | # building inverted residual blocks
143 | for idx, (t, c, n, s) in enumerate(inverted_residual_setting):
144 | output_channel = _make_divisible(c * width_mult, round_nearest)
145 | for i in range(n):
146 | stride = s if i == 0 else 1
147 |
148 | if i == 0 and idx >0:
149 | handover = kw_stage_setting[idx] != kw_stage_setting[idx - 1]
150 | else:
151 | handover = False
152 |
153 | stage_idx = (kw_stage_setting[idx] - 1) if handover else kw_stage_setting[idx]
154 | features.append(block(input_channel, output_channel, stride, expand_ratio=t, norm_layer=norm_layer,
155 | warehouse_manager=self.warehouse_manager, stage_idx=stage_idx,
156 | layer_idx=layer_idx, pwlinear_enabled=pwlinear_enabled[stage_idx-1]))
157 |
158 | input_channel = output_channel
159 | layer_idx += 1
160 |
161 | if handover:
162 | layer_idx = 0
163 |
164 | # building last several layers
165 | features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1, norm_layer=norm_layer,
166 | warehouse_manager=self.warehouse_manager,
167 | warehouse_name='stage{}_layer{}_pwconv1'.format(kw_stage_setting[-1], layer_idx)))
168 | # make it nn.Sequential
169 | self.features = nn.Sequential(*features)
170 | # building classifier
171 | self.classifier = nn.Sequential(
172 | nn.Dropout(dropout),
173 | self.warehouse_manager.reserve(self.last_channel, num_classes, kernel_size=1,
174 | warehouse_name='classifier', layer_type='linear'),
175 | )
176 |
177 | # weight initialization
178 | for m in self.modules():
179 | if isinstance(m, nn.Conv2d):
180 | nn.init.kaiming_normal_(m.weight, mode='fan_out')
181 | if m.bias is not None:
182 | nn.init.zeros_(m.bias)
183 | elif isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d, nn.GroupNorm)):
184 | nn.init.ones_(m.weight)
185 | nn.init.zeros_(m.bias)
186 | elif isinstance(m, nn.Linear):
187 | nn.init.normal_(m.weight, 0, 0.01)
188 | nn.init.zeros_(m.bias)
189 |
190 | self.warehouse_manager.store()
191 | self.warehouse_manager.allocate(self)
192 |
193 | def net_update_temperature(self, temp):
194 | for m in self.modules():
195 | if hasattr(m, "update_temperature"):
196 | m.update_temperature(temp)
197 |
198 | def _forward_impl(self, x):
199 | # This exists since TorchScript doesn't support inheritance, so the superclass method
200 | # (this one) needs to have a name other than `forward` that can be accessed in a subclass
201 | x = self.features(x)
202 | # Cannot use "squeeze" as batch-size can be 1 => must use reshape with x.shape[0]
203 | x = nn.functional.adaptive_avg_pool2d(x, 1).reshape(x.shape[0], -1)
204 | x = self.classifier(x)
205 | return x
206 |
207 | def forward(self, x):
208 | return self._forward_impl(x)
209 |
210 |
211 | def kw1d2x_mobilenetv2(**kwargs):
212 | model = KW1d2x_MobileNetV2(**kwargs)
213 | return model
214 |
215 |
216 | @register_model
217 | def kw1d2x_mobilenetv2_050(**kwargs):
218 | return kw1d2x_mobilenetv2(width_mult=0.5, **kwargs)
219 |
220 |
221 | @register_model
222 | def kw1d2x_mobilenetv2_100(**kwargs):
223 | return kw1d2x_mobilenetv2(width_mult=1.0, **kwargs)
224 |
--------------------------------------------------------------------------------
/models/kw_convnext.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | from timm.models.layers import trunc_normal_, DropPath
5 | from timm.models.registry import register_model
6 | from modules.kernel_warehouse import Warehouse_Manager
7 |
8 |
9 | class Block(nn.Module):
10 | r""" ConvNeXt Block. There are two equivalent implementations:
11 | (1) DwConv -> LayerNorm (channels_first) -> 1x1 Conv -> GELU -> 1x1 Conv; all in (N, C, H, W)
12 | (2) DwConv -> Permute to (N, H, W, C); LayerNorm (channels_last) -> Linear -> GELU -> Linear; Permute back
13 | We use (2) as we find it slightly faster in PyTorch
14 |
15 | Args:
16 | dim (int): Number of input channels.
17 | drop_path (float): Stochastic depth rate. Default: 0.0
18 | layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
19 | """
20 |
21 | def __init__(self, dim, drop_path=0., layer_scale_init_value=1e-6,
22 | warehouse_manager=None, stage_idx=-1, layer_idx=-1):
23 | super().__init__()
24 | self.dwconv = warehouse_manager.reserve(dim, dim, kernel_size=7, padding=3, groups=dim,
25 | warehouse_name='stage{}_layer{}_dwconv{}'.format(stage_idx, layer_idx, 0)) # depthwise conv
26 | self.norm = LayerNorm(dim, eps=1e-6)
27 | self.pwconv1 = warehouse_manager.reserve(dim, 4 * dim, kernel_size=1, padding=0,
28 | warehouse_name='stage{}_layer{}_pwconv{}'.format(stage_idx, layer_idx, 1)) # pointwise/1x1 convs, implemented with linear layers
29 | self.act = nn.GELU()
30 | self.pwconv2 = warehouse_manager.reserve(4 * dim, dim, kernel_size=1, padding=0,
31 | warehouse_name='stage{}_layer{}_pwconv{}'.format(stage_idx, layer_idx, 2))
32 | self.gamma = nn.Parameter(layer_scale_init_value * torch.ones([1, dim, 1, 1]),
33 | requires_grad=True) if layer_scale_init_value > 0 else None
34 | self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
35 |
36 | def forward(self, x):
37 | input = x
38 | x = self.dwconv(x)
39 | x = x.permute(0, 2, 3, 1) # (N, C, H, W) -> (N, H, W, C)
40 | x = self.norm(x).permute(0, 3, 1, 2)
41 | x = self.pwconv1(x)
42 | x = self.act(x)
43 | x = self.pwconv2(x)
44 | if self.gamma is not None:
45 | x = self.gamma * x
46 | x = input + self.drop_path(x)
47 | return x
48 |
49 |
50 | @register_model
51 | class KW_ConvNeXt(nn.Module):
52 | r""" ConvNeXt
53 | A PyTorch impl of : `A ConvNet for the 2020s` -
54 | https://arxiv.org/pdf/2201.03545.pdf
55 |
56 | Args:
57 | in_chans (int): Number of input image channels. Default: 3
58 | num_classes (int): Number of classes for classification head. Default: 1000
59 | depths (tuple(int)): Number of blocks at each stage. Default: [3, 3, 9, 3]
60 | dims (int): Feature dimension at each stage. Default: [96, 192, 384, 768]
61 | drop_path_rate (float): Stochastic depth rate. Default: 0.
62 | layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
63 | head_init_scale (float): Init scaling value for classifier weights and biases. Default: 1.
64 | """
65 |
66 | def __init__(self, in_chans=3, num_classes=1000,
67 | depths=[3, 3, 9, 3], dims=[96, 192, 384, 768], drop_path_rate=0.,
68 | layer_scale_init_value=1e-6, head_init_scale=1.,
69 | reduction=0.0625, cell_num_ratio=1, cell_inplane_ratio=1, cell_outplane_ratio=1,
70 | sharing_range=('layer', 'pwconv'), nonlocal_basis_ratio=1, **kwargs
71 | ):
72 | super().__init__()
73 |
74 | self.warehouse_manager = Warehouse_Manager(reduction, cell_num_ratio, cell_inplane_ratio,
75 | cell_outplane_ratio, sharing_range, nonlocal_basis_ratio,
76 | norm_layer=nn.LayerNorm,
77 | )
78 | self.downsample_layers = nn.ModuleList() # stem and 3 intermediate downsampling conv layers
79 | stem = nn.Sequential(
80 | self.warehouse_manager.reserve(in_chans, dims[0], kernel_size=4, stride=4,
81 | warehouse_name='stage{}_conv0'.format('stem')),
82 | LayerNorm(dims[0], eps=1e-6, data_format="channels_first")
83 | )
84 | self.downsample_layers.append(stem)
85 | for i in range(3):
86 | downsample_layer = nn.Sequential(
87 | LayerNorm(dims[i], eps=1e-6, data_format="channels_first"),
88 | self.warehouse_manager.reserve(dims[i], dims[i + 1], kernel_size=2, stride=2,
89 | warehouse_name='stage{}_layer{}_conv0'.format(i, 'ds')),
90 | )
91 | self.downsample_layers.append(downsample_layer)
92 |
93 | self.stages = nn.ModuleList() # 4 feature resolution stages, each consisting of multiple residual blocks
94 | dp_rates = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]
95 | cur = 0
96 | for i in range(4):
97 | stage = nn.Sequential(
98 | *[Block(dim=dims[i], drop_path=dp_rates[cur + j],
99 | layer_scale_init_value=layer_scale_init_value,
100 | warehouse_manager=self.warehouse_manager,
101 | stage_idx=i, layer_idx=j,
102 | ) for j in range(depths[i])]
103 | )
104 | self.stages.append(stage)
105 | cur += depths[i]
106 |
107 | self.norm = nn.LayerNorm(dims[-1], eps=1e-6) # final norm layer
108 | self.head = nn.Linear(dims[-1], num_classes)
109 |
110 | self.apply(self._init_weights)
111 | self.head.weight.data.mul_(head_init_scale)
112 | self.head.bias.data.mul_(head_init_scale)
113 |
114 | self.warehouse_manager.store()
115 | self.warehouse_manager.allocate(self)
116 |
117 | def _init_weights(self, m):
118 | if isinstance(m, (nn.Conv2d, nn.Linear)):
119 | trunc_normal_(m.weight, std=.02)
120 | nn.init.constant_(m.bias, 0)
121 |
122 | def net_update_temperature(self, temp):
123 | for m in self.modules():
124 | if hasattr(m, "update_temperature"):
125 | m.update_temperature(temp)
126 |
127 | def forward_features(self, x):
128 | for i in range(4):
129 | x = self.downsample_layers[i](x)
130 | x = self.stages[i](x)
131 | return self.norm(x.mean([-2, -1])) # global average pooling, (N, C, H, W) -> (N, C)
132 |
133 | def forward(self, x):
134 | x = self.forward_features(x)
135 | x = self.head(x)
136 | return x
137 |
138 |
139 | class LayerNorm(nn.Module):
140 | r""" LayerNorm that supports two data formats: channels_last (default) or channels_first.
141 | The ordering of the dimensions in the inputs. channels_last corresponds to inputs with
142 | shape (batch_size, height, width, channels) while channels_first corresponds to inputs
143 | with shape (batch_size, channels, height, width).
144 | """
145 |
146 | def __init__(self, normalized_shape, eps=1e-6, data_format="channels_last"):
147 | super().__init__()
148 | self.weight = nn.Parameter(torch.ones(normalized_shape))
149 | self.bias = nn.Parameter(torch.zeros(normalized_shape))
150 | self.eps = eps
151 | self.data_format = data_format
152 | if self.data_format not in ["channels_last", "channels_first"]:
153 | raise NotImplementedError
154 | self.normalized_shape = (normalized_shape,)
155 |
156 | def forward(self, x):
157 | if self.data_format == "channels_last":
158 | return F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps)
159 | elif self.data_format == "channels_first":
160 | u = x.mean(1, keepdim=True)
161 | s = (x - u).pow(2).mean(1, keepdim=True)
162 | x = (x - u) / torch.sqrt(s + self.eps)
163 | x = self.weight[:, None, None] * x + self.bias[:, None, None]
164 | return x
165 |
166 |
167 | @register_model
168 | def kw_convnext_tiny(pretrained=False, in_22k=False, **kwargs):
169 | model = KW_ConvNeXt(depths=[3, 3, 9, 3], dims=[96, 192, 384, 768], **kwargs)
170 | return model
--------------------------------------------------------------------------------
/models/kw_mobilenetv2.py:
--------------------------------------------------------------------------------
1 | from torch import nn
2 | from modules.kernel_warehouse import Warehouse_Manager
3 | from timm.models.registry import register_model
4 |
5 |
6 | def _make_divisible(v, divisor, min_value=None):
7 | """
8 | This function is taken from the original tf repo.
9 | It ensures that all layers have a channel number that is divisible by 8
10 | It can be seen here:
11 | https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
12 | :param v:
13 | :param divisor:
14 | :param min_value:
15 | :return:
16 | """
17 | if min_value is None:
18 | min_value = divisor
19 | new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
20 | # Make sure that round down does not go down by more than 10%.
21 | if new_v < 0.9 * v:
22 | new_v += divisor
23 | return new_v
24 |
25 |
26 | class ConvBNReLU(nn.Sequential):
27 | def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1, norm_layer=nn.BatchNorm2d,
28 | warehouse_name=None, warehouse_manager=None, enabled=True):
29 | padding = (kernel_size - 1) // 2
30 | super(ConvBNReLU, self).__init__(
31 | warehouse_manager.reserve(in_planes, out_planes, kernel_size, stride, padding=padding,
32 | groups=groups, bias=False, warehouse_name=warehouse_name, enabled=enabled),
33 | norm_layer(out_planes),
34 | nn.ReLU6(inplace=True)
35 | )
36 |
37 |
38 | class InvertedResidual(nn.Module):
39 | def __init__(self, inp, oup, stride, expand_ratio, norm_layer=nn.BatchNorm2d, stage_idx=None, layer_idx=None,
40 | warehouse_manager=None):
41 | super(InvertedResidual, self).__init__()
42 | self.stride = stride
43 | assert stride in [1, 2]
44 | hidden_dim = int(round(inp * expand_ratio))
45 | self.use_res_connect = self.stride == 1 and inp == oup
46 |
47 | layers = []
48 | if expand_ratio != 1:
49 | # pw
50 | layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1, norm_layer=norm_layer,
51 | warehouse_name='stage{}_layer{}_pwconv{}'.format(stage_idx, layer_idx, 0),
52 | warehouse_manager=warehouse_manager))
53 |
54 | layers.extend([
55 | # dw
56 | ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim, norm_layer=norm_layer,
57 | warehouse_name='stage{}_layer{}_dwconv{}'.format(stage_idx, layer_idx, 0),
58 | warehouse_manager=warehouse_manager),
59 | # pw-linear
60 | warehouse_manager.reserve(hidden_dim, oup, 1, 1, 0, bias=False,
61 | warehouse_name='stage{}_layer{}_pwconv{}'.format(stage_idx, layer_idx, 1)),
62 | norm_layer(oup),
63 | ])
64 | self.conv = nn.Sequential(*layers)
65 |
66 | def forward(self, x):
67 | if self.use_res_connect:
68 | return x + self.conv(x)
69 | else:
70 | return self.conv(x)
71 |
72 |
73 | class KW_MobileNetV2(nn.Module):
74 | def __init__(self,
75 | num_classes=1000,
76 | width_mult=1.0,
77 | inverted_residual_setting=None,
78 | round_nearest=8,
79 | block=None,
80 | norm_layer=None,
81 | dropout=0.0,
82 | reduction=0.0625,
83 | cell_num_ratio=1,
84 | cell_inplane_ratio=1,
85 | cell_outplane_ratio=1,
86 | sharing_range=None,
87 | nonlocal_basis_ratio=1,
88 | **kwargs):
89 | """gr
90 | MobileNet V2 main class
91 |
92 | Args:
93 | num_classes (int): Number of classes
94 | width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount
95 | inverted_residual_setting: Network structure
96 | round_nearest (int): Round the number of channels in each layer to be a multiple of this number
97 | Set to 1 to turn off rounding
98 | block: Module specifying inverted residual building block for mobilenet
99 | norm_layer: Module specifying the normalization layer to use
100 |
101 | """
102 | super(KW_MobileNetV2, self).__init__()
103 | if block is None:
104 | block = InvertedResidual
105 |
106 | if norm_layer is None:
107 | norm_layer = nn.BatchNorm2d
108 |
109 | input_channel = 32
110 | last_channel = 1280
111 |
112 | if inverted_residual_setting is None:
113 | inverted_residual_setting = [
114 | # t, c, n, s
115 | [1, 16, 1, 1],
116 | [6, 24, 2, 2],
117 | [6, 32, 3, 2],
118 | [6, 64, 4, 2],
119 | [6, 96, 3, 1],
120 | [6, 160, 3, 2],
121 | [6, 320, 1, 1],
122 | ]
123 |
124 | kw_stage_setting = [1, 2, 3, 4, 5, 6, 6]
125 |
126 | self.warehouse_manager = Warehouse_Manager(reduction, cell_num_ratio, cell_inplane_ratio,
127 | cell_outplane_ratio, sharing_range, nonlocal_basis_ratio)
128 |
129 | # only check the first element, assuming user knows t,c,n,s are required
130 | if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4:
131 | raise ValueError("inverted_residual_setting should be non-empty "
132 | "or a 4-element list, got {}".format(inverted_residual_setting))
133 |
134 | # building first layer
135 | input_channel = _make_divisible(input_channel * width_mult, round_nearest)
136 | self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest)
137 | features = [ConvBNReLU(3, input_channel, stride=2, norm_layer=norm_layer,
138 | warehouse_manager=self.warehouse_manager, warehouse_name='stage0_conv0')]
139 |
140 | layer_idx = 0
141 | # building inverted residual blocks
142 | for idx, (t, c, n, s) in enumerate(inverted_residual_setting):
143 | output_channel = _make_divisible(c * width_mult, round_nearest)
144 | for i in range(n):
145 | stride = s if i == 0 else 1
146 |
147 | if i == 0 and idx > 0:
148 | handover = kw_stage_setting[idx] != kw_stage_setting[idx - 1]
149 | else:
150 | handover = False
151 |
152 | stage_idx = (kw_stage_setting[idx] - 1) if handover else kw_stage_setting[idx]
153 |
154 | features.append(block(input_channel, output_channel, stride, expand_ratio=t, norm_layer=norm_layer,
155 | warehouse_manager=self.warehouse_manager, stage_idx=stage_idx,
156 | layer_idx=layer_idx))
157 |
158 | input_channel = output_channel
159 | layer_idx += 1
160 |
161 | if handover:
162 | layer_idx = 0
163 |
164 | # building last several layers
165 | features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1, norm_layer=norm_layer,
166 | warehouse_manager=self.warehouse_manager,
167 | warehouse_name='stage{}_layer{}_pwconv1'.format(kw_stage_setting[-1], layer_idx)))
168 | # make it nn.Sequential
169 | self.features = nn.Sequential(*features)
170 | # building classifier
171 | self.dropout = nn.Dropout(dropout) if dropout > 0 else nn.Identity()
172 | self.classifier = nn.Linear(self.last_channel, num_classes, bias=True)
173 |
174 | # weight initialization
175 | for m in self.modules():
176 | if isinstance(m, nn.Conv2d):
177 | nn.init.kaiming_normal_(m.weight, mode='fan_out')
178 | if m.bias is not None:
179 | nn.init.zeros_(m.bias)
180 | elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
181 | nn.init.ones_(m.weight)
182 | nn.init.zeros_(m.bias)
183 | elif isinstance(m, nn.Linear):
184 | nn.init.normal_(m.weight, 0, 0.01)
185 | nn.init.zeros_(m.bias)
186 |
187 | self.warehouse_manager.store()
188 | self.warehouse_manager.allocate(self)
189 |
190 | def net_update_temperature(self, temp):
191 | for m in self.modules():
192 | if hasattr(m, "update_temperature"):
193 | m.update_temperature(temp)
194 |
195 | def _forward_impl(self, x):
196 | # This exists since TorchScript doesn't support inheritance, so the superclass method
197 | # (this one) needs to have a name other than `forward` that can be accessed in a subclass
198 | x = self.features(x)
199 | # Cannot use "squeeze" as batch-size can be 1 => must use reshape with x.shape[0]
200 | x = nn.functional.adaptive_avg_pool2d(x, 1).reshape(x.shape[0], -1)
201 | x = self.dropout(x)
202 | x = self.classifier(x)
203 | return x
204 |
205 | def forward(self, x):
206 | return self._forward_impl(x)
207 |
208 |
209 | def kw_mobilenetv2(**kwargs):
210 | model = KW_MobileNetV2(**kwargs)
211 | return model
212 |
213 |
214 | @register_model
215 | def kw_mobilenetv2_050(**kwargs):
216 | return kw_mobilenetv2(width_mult=0.5, **kwargs)
217 |
218 |
219 | @register_model
220 | def kw_mobilenetv2_100(**kwargs):
221 | return kw_mobilenetv2(width_mult=1.0, **kwargs)
222 |
--------------------------------------------------------------------------------
/models/kw_resnet.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | from modules.kernel_warehouse import Warehouse_Manager
4 | from timm.models.layers import DropPath
5 | from timm.models.registry import register_model
6 |
7 |
8 | def kwconv3x3(in_planes, out_planes, stride=1, warehouse_name=None, warehouse_manager=None, enabled=True):
9 | return warehouse_manager.reserve(in_planes, out_planes, kernel_size=3, stride=stride, padding=1,
10 | warehouse_name=warehouse_name, enabled=enabled, bias=False)
11 |
12 |
13 | def kwconv1x1(in_planes, out_planes, stride=1, warehouse_name=None, warehouse_manager=None, enabled=True):
14 | return warehouse_manager.reserve(in_planes, out_planes, kernel_size=1, stride=stride, padding=0,
15 | warehouse_name=warehouse_name, enabled=enabled, bias=False)
16 |
17 |
18 | class BasicBlock(nn.Module):
19 | expansion = 1
20 |
21 | def __init__(self, inplanes, planes, stride=1, downsample=None,
22 | stage_idx=None, layer_idx=None, warehouse_manager=None, warehouse_handover=False, drop_path=0.):
23 | super(BasicBlock, self).__init__()
24 | conv1_stage_idx = max(stage_idx - 1 if warehouse_handover else stage_idx, 0)
25 | self.conv1 = kwconv3x3(inplanes, planes, stride,
26 | warehouse_name='stage{}_layer{}_conv{}'.format(conv1_stage_idx, layer_idx, 0),
27 | warehouse_manager=warehouse_manager)
28 | self.bn1 = nn.BatchNorm2d(planes)
29 | self.relu = nn.ReLU(inplace=True)
30 | layer_idx = 0 if warehouse_handover else layer_idx
31 | self.conv2 = kwconv3x3(planes, planes,
32 | warehouse_name='stage{}_layer{}_conv{}'.format(stage_idx, layer_idx, 1),
33 | warehouse_manager=warehouse_manager)
34 | self.bn2 = nn.BatchNorm2d(planes)
35 | self.downsample = downsample
36 | self.stride = stride
37 | self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
38 |
39 | def forward(self, x):
40 | identity = x
41 |
42 | out = self.conv1(x)
43 | out = self.bn1(out)
44 | out = self.relu(out)
45 |
46 | out = self.conv2(out)
47 | out = self.bn2(out)
48 |
49 | if self.downsample is not None:
50 | identity = self.downsample(x)
51 |
52 | out = identity + self.drop_path(out)
53 | out = self.relu(out)
54 | return out
55 |
56 |
57 | class Bottleneck(nn.Module):
58 | expansion = 4
59 |
60 | def __init__(self, inplanes, planes, stride=1, downsample=None,
61 | stage_idx=None, layer_idx=None, warehouse_manager=None, warehouse_handover=False, drop_path=0.):
62 | super(Bottleneck, self).__init__()
63 | conv1_stage_idx = stage_idx - 1 if warehouse_handover else stage_idx
64 | self.conv1 = kwconv1x1(inplanes, planes,
65 | warehouse_name='stage{}_layer{}_conv{}'.format(conv1_stage_idx, layer_idx, 0),
66 | warehouse_manager=warehouse_manager, enabled=(conv1_stage_idx >= 0))
67 | self.bn1 = nn.BatchNorm2d(planes)
68 | layer_idx = 0 if warehouse_handover else layer_idx
69 | self.conv2 = kwconv3x3(planes, planes, stride,
70 | warehouse_name='stage{}_layer{}_conv{}'.format(stage_idx, layer_idx, 1),
71 | warehouse_manager=warehouse_manager)
72 | self.bn2 = nn.BatchNorm2d(planes)
73 | self.conv3 = kwconv1x1(planes, planes * self.expansion,
74 | warehouse_name='stage{}_layer{}_conv{}'.format(stage_idx, layer_idx, 2),
75 | warehouse_manager=warehouse_manager)
76 | self.bn3 = nn.BatchNorm2d(planes * self.expansion)
77 | self.relu = nn.ReLU(inplace=True)
78 | self.downsample = downsample
79 | self.stride = stride
80 | self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
81 |
82 | def forward(self, x):
83 | identity = x
84 |
85 | out = self.conv1(x)
86 | out = self.bn1(out)
87 | out = self.relu(out)
88 |
89 | out = self.conv2(out)
90 | out = self.bn2(out)
91 | out = self.relu(out)
92 |
93 | out = self.conv3(out)
94 | out = self.bn3(out)
95 |
96 | if self.downsample is not None:
97 | identity = self.downsample(x)
98 |
99 | out = identity + self.drop_path(out)
100 | out = self.relu(out)
101 | return out
102 |
103 | @register_model
104 | class KW_ResNet(nn.Module):
105 | def __init__(self, block, layers, num_classes=1000, dropout=0.1, reduction=0.0625,
106 | cell_num_ratio=1, cell_inplane_ratio=1, cell_outplane_ratio=1,
107 | sharing_range=('layer', 'conv'), nonlocal_basis_ratio=1, drop_path_rate=0., **kwargs):
108 | super(KW_ResNet, self).__init__()
109 | self.warehouse_manager = Warehouse_Manager(reduction, cell_num_ratio, cell_inplane_ratio, cell_outplane_ratio,
110 | sharing_range, nonlocal_basis_ratio)
111 | self.inplanes = 64
112 | self.layer_idx = 0
113 | self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False)
114 | self.bn1 = nn.BatchNorm2d(self.inplanes)
115 | self.relu = nn.ReLU(inplace=True)
116 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
117 | self.layer1 = self._make_layer(block, 64, layers[0],
118 | stage_idx=0, warehouse_manager=self.warehouse_manager, drop_path=drop_path_rate)
119 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
120 | stage_idx=1, warehouse_manager=self.warehouse_manager, drop_path=drop_path_rate)
121 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
122 | stage_idx=2, warehouse_manager=self.warehouse_manager, drop_path=drop_path_rate)
123 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
124 | stage_idx=3, warehouse_manager=self.warehouse_manager, drop_path=drop_path_rate)
125 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
126 | self.dropout = nn.Dropout(p=dropout)
127 | self.fc = nn.Linear(512 * block.expansion, num_classes)
128 |
129 | for m in self.modules():
130 | if isinstance(m, nn.Conv2d):
131 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
132 | elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
133 | nn.init.constant_(m.weight, 1)
134 | nn.init.constant_(m.bias, 0)
135 |
136 | self.warehouse_manager.store()
137 | self.warehouse_manager.allocate(self)
138 |
139 | def _make_layer(self, block, planes, blocks, stride=1, stage_idx=-1, warehouse_manager=None, drop_path=0.):
140 | downsample = None
141 | if stride != 1 or self.inplanes != planes * block.expansion:
142 | downsample = nn.Sequential(
143 | warehouse_manager.reserve(
144 | self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, padding=0,
145 | warehouse_name='stage{}_layer{}_conv{}'.format(stage_idx - 1, self.layer_idx + 1, 0),
146 | enabled=(stride != 1), bias=False),
147 | nn.BatchNorm2d(planes * block.expansion),
148 | )
149 |
150 | layers = []
151 | layers.append(block(self.inplanes, planes, stride, downsample, stage_idx=stage_idx, layer_idx=self.layer_idx,
152 | warehouse_manager=warehouse_manager, warehouse_handover=True, drop_path=drop_path))
153 | self.layer_idx = 1
154 | self.inplanes = planes * block.expansion
155 | for idx in range(1, blocks):
156 | layers.append(block(self.inplanes, planes, stage_idx=stage_idx, layer_idx=self.layer_idx,
157 | warehouse_manager=warehouse_manager, drop_path=drop_path))
158 | self.layer_idx += 1
159 | return nn.Sequential(*layers)
160 |
161 | def net_update_temperature(self, temp):
162 | for m in self.modules():
163 | if hasattr(m, "update_temperature"):
164 | m.update_temperature(temp)
165 |
166 | def _forward_impl(self, x):
167 | x = self.conv1(x)
168 | x = self.bn1(x)
169 | x = self.relu(x)
170 | x = self.maxpool(x)
171 |
172 | x = self.layer1(x)
173 | x = self.layer2(x)
174 | x = self.layer3(x)
175 | x = self.layer4(x)
176 |
177 | x = self.avgpool(x)
178 | x = torch.flatten(x, 1)
179 | x = self.dropout(x)
180 | x = self.fc(x)
181 | return x
182 |
183 | def forward(self, x):
184 | return self._forward_impl(x)
185 |
186 |
187 | @register_model
188 | def kw_resnet18(**kwargs):
189 | model = KW_ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
190 | return model
191 |
192 | @register_model
193 | def kw_resnet50(**kwargs):
194 | model = KW_ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
195 | return model
196 |
197 |
--------------------------------------------------------------------------------
/models/mobilenetv2.py:
--------------------------------------------------------------------------------
1 | from torch import nn
2 | from timm.models.registry import register_model
3 |
4 |
5 | def _make_divisible(v, divisor, min_value=None):
6 | """
7 | This function is taken from the original tf repo.
8 | It ensures that all layers have a channel number that is divisible by 8
9 | It can be seen here:
10 | https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
11 | :param v:
12 | :param divisor:
13 | :param min_value:
14 | :return:
15 | """
16 | if min_value is None:
17 | min_value = divisor
18 | new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
19 | # Make sure that round down does not go down by more than 10%.
20 | if new_v < 0.9 * v:
21 | new_v += divisor
22 | return new_v
23 |
24 |
25 | class ConvBNReLU(nn.Sequential):
26 | def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1, norm_layer=nn.BatchNorm2d):
27 | padding = (kernel_size - 1) // 2
28 | super(ConvBNReLU, self).__init__(
29 | nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False),
30 | norm_layer(out_planes),
31 | nn.ReLU6(inplace=True)
32 | )
33 |
34 |
35 | class InvertedResidual(nn.Module):
36 | def __init__(self, inp, oup, stride, expand_ratio, norm_layer=nn.BatchNorm2d):
37 | super(InvertedResidual, self).__init__()
38 | self.stride = stride
39 | hidden_dim = int(round(inp * expand_ratio))
40 | self.use_res_connect = self.stride == 1 and inp == oup
41 |
42 | layers = []
43 | if expand_ratio != 1:
44 | # pw
45 | layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1, norm_layer=norm_layer))
46 | layers.extend([
47 | # dw
48 | ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim, norm_layer=norm_layer),
49 | # pw-linear
50 | nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
51 | norm_layer(oup),
52 | ])
53 | self.conv = nn.Sequential(*layers)
54 |
55 | def forward(self, x):
56 | if self.use_res_connect:
57 | return x + self.conv(x)
58 | else:
59 | return self.conv(x)
60 |
61 |
62 | class MobileNetV2(nn.Module):
63 | def __init__(self,
64 | num_classes=1000,
65 | width_mult=1.0,
66 | inverted_residual_setting=None,
67 | round_nearest=8,
68 | block=InvertedResidual,
69 | norm_layer=nn.BatchNorm2d,
70 | dropout=0.0,
71 | **kwargs):
72 | """gr
73 | MobileNet V2 main class
74 |
75 | Args:
76 | num_classes (int): Number of classes
77 | width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount
78 | inverted_residual_setting: Network structure
79 | round_nearest (int): Round the number of channels in each layer to be a multiple of this number
80 | Set to 1 to turn off rounding
81 | block: Module specifying inverted residual building block for mobilenet
82 | norm_layer: Module specifying the normalization layer to use
83 |
84 | """
85 | super(MobileNetV2, self).__init__()
86 |
87 | input_channel = 32
88 | last_channel = 1280
89 |
90 | if inverted_residual_setting is None:
91 | inverted_residual_setting = [
92 | # t, c, n, s
93 | [1, 16, 1, 1],
94 | [6, 24, 2, 2],
95 | [6, 32, 3, 2],
96 | [6, 64, 4, 2],
97 | [6, 96, 3, 1],
98 | [6, 160, 3, 2],
99 | [6, 320, 1, 1],
100 | ]
101 |
102 | # only check the first element, assuming user knows t,c,n,s are required
103 | if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4:
104 | raise ValueError("inverted_residual_setting should be non-empty "
105 | "or a 4-element list, got {}".format(inverted_residual_setting))
106 |
107 | # building first layer
108 | input_channel = _make_divisible(input_channel * width_mult, round_nearest)
109 | self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest)
110 | features = [ConvBNReLU(3, input_channel, stride=2, norm_layer=norm_layer)]
111 | # building inverted residual blocks
112 | for t, c, n, s in inverted_residual_setting:
113 | output_channel = _make_divisible(c * width_mult, round_nearest)
114 | for i in range(n):
115 | stride = s if i == 0 else 1
116 | features.append(block(input_channel, output_channel, stride, expand_ratio=t, norm_layer=norm_layer))
117 | input_channel = output_channel
118 | # building last several layers
119 | features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1, norm_layer=norm_layer))
120 | # make it nn.Sequential
121 | self.features = nn.Sequential(*features)
122 | # building classifier
123 | self.classifier = nn.Sequential(
124 | nn.Dropout(dropout),
125 | nn.Linear(self.last_channel, num_classes),
126 | )
127 |
128 | # weight initialization
129 | for m in self.modules():
130 | if isinstance(m, nn.Conv2d):
131 | nn.init.kaiming_normal_(m.weight, mode='fan_out')
132 | if m.bias is not None:
133 | nn.init.zeros_(m.bias)
134 | elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
135 | nn.init.ones_(m.weight)
136 | nn.init.zeros_(m.bias)
137 | elif isinstance(m, nn.Linear):
138 | nn.init.normal_(m.weight, 0, 0.01)
139 | nn.init.zeros_(m.bias)
140 |
141 | def _forward_impl(self, x):
142 | # This exists since TorchScript doesn't support inheritance, so the superclass method
143 | # (this one) needs to have a name other than `forward` that can be accessed in a subclass
144 | x = self.features(x)
145 | # Cannot use "squeeze" as batch-size can be 1 => must use reshape with x.shape[0]
146 | x = nn.functional.adaptive_avg_pool2d(x, 1).reshape(x.shape[0], -1)
147 | x = self.classifier(x)
148 | return x
149 |
150 | def forward(self, x):
151 | return self._forward_impl(x)
152 |
153 |
154 | def mobilenet_v2(**kwargs):
155 | model = MobileNetV2(**kwargs)
156 | return model
157 |
158 |
159 | @register_model
160 | def mobilenetv2_050(**kwargs):
161 | return mobilenet_v2(width_mult=0.5, **kwargs)
162 |
163 |
164 | @register_model
165 | def mobilenetv2_100(**kwargs):
166 | return mobilenet_v2(width_mult=1.0, **kwargs)
167 |
--------------------------------------------------------------------------------
/models/resnet.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | from timm.models.layers import DropPath
4 | from timm.models.registry import register_model
5 |
6 |
7 | def conv3x3(in_planes, out_planes, stride=1):
8 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
9 | padding=1, bias=False)
10 |
11 |
12 | def conv1x1(in_planes, out_planes, stride=1):
13 | return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
14 |
15 |
16 | class BasicBlock(nn.Module):
17 | expansion = 1
18 |
19 | def __init__(self, inplanes, planes, stride=1, downsample=None, drop_path=0.):
20 | super(BasicBlock, self).__init__()
21 | self.conv1 = conv3x3(inplanes, planes, stride)
22 | self.bn1 = nn.BatchNorm2d(planes)
23 | self.relu = nn.ReLU(inplace=True)
24 | self.conv2 = conv3x3(planes, planes)
25 | self.bn2 = nn.BatchNorm2d(planes)
26 | self.downsample = downsample
27 | self.stride = stride
28 | self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
29 |
30 | def forward(self, x):
31 | identity = x
32 |
33 | out = self.conv1(x)
34 | out = self.bn1(out)
35 | out = self.relu(out)
36 |
37 | out = self.conv2(out)
38 | out = self.bn2(out)
39 |
40 | if self.downsample is not None:
41 | identity = self.downsample(x)
42 |
43 | out = identity + self.drop_path(out)
44 | out = self.relu(out)
45 | return out
46 |
47 |
48 | class Bottleneck(nn.Module):
49 | expansion = 4
50 |
51 | def __init__(self, inplanes, planes, stride=1, downsample=None, drop_path=0.):
52 | super(Bottleneck, self).__init__()
53 | self.conv1 = conv1x1(inplanes, planes)
54 | self.bn1 = nn.BatchNorm2d(planes)
55 | self.conv2 = conv3x3(planes, planes, stride)
56 | self.bn2 = nn.BatchNorm2d(planes)
57 | self.conv3 = conv1x1(planes, planes * self.expansion)
58 | self.bn3 = nn.BatchNorm2d(planes * self.expansion)
59 | self.relu = nn.ReLU(inplace=True)
60 | self.downsample = downsample
61 | self.stride = stride
62 | self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
63 |
64 | def forward(self, x):
65 | identity = x
66 |
67 | out = self.conv1(x)
68 | out = self.bn1(out)
69 | out = self.relu(out)
70 |
71 | out = self.conv2(out)
72 | out = self.bn2(out)
73 | out = self.relu(out)
74 |
75 | out = self.conv3(out)
76 | out = self.bn3(out)
77 |
78 | if self.downsample is not None:
79 | identity = self.downsample(x)
80 |
81 | out = identity + self.drop_path(out)
82 | out = self.relu(out)
83 | return out
84 |
85 |
86 | class ResNet(nn.Module):
87 | def __init__(self, block, layers, num_classes=1000, drop_path_rate=0., **kwargs):
88 | super(ResNet, self).__init__()
89 | self.inplanes = 64
90 | self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
91 | bias=False)
92 | self.bn1 = nn.BatchNorm2d(self.inplanes)
93 | self.relu = nn.ReLU(inplace=True)
94 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
95 | self.layer1 = self._make_layer(block, 64, layers[0], drop_path=drop_path_rate)
96 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2, drop_path=drop_path_rate)
97 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2, drop_path=drop_path_rate)
98 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2, drop_path=drop_path_rate)
99 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
100 | self.fc = nn.Linear(512 * block.expansion, num_classes)
101 |
102 | for m in self.modules():
103 | if isinstance(m, nn.Conv2d):
104 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
105 | if m.bias is not None:
106 | nn.init.zeros_(m.bias)
107 | elif isinstance(m, nn.BatchNorm2d):
108 | nn.init.constant_(m.weight, 1)
109 | nn.init.constant_(m.bias, 0)
110 | elif isinstance(m, nn.Linear):
111 | nn.init.normal_(m.weight, 0, 0.01)
112 | nn.init.zeros_(m.bias)
113 |
114 | def _make_layer(self, block, planes, blocks, stride=1, drop_path=None):
115 | downsample = None
116 | if stride != 1 or self.inplanes != planes * block.expansion:
117 | downsample = nn.Sequential(
118 | conv1x1(self.inplanes, planes * block.expansion, stride),
119 | nn.BatchNorm2d(planes * block.expansion),
120 | )
121 |
122 | layers = []
123 | layers.append(block(self.inplanes, planes, stride, downsample, drop_path=drop_path))
124 | self.inplanes = planes * block.expansion
125 | for _ in range(1, blocks):
126 | layers.append(block(self.inplanes, planes, drop_path=drop_path))
127 |
128 | return nn.Sequential(*layers)
129 |
130 | def _forward_impl(self, x):
131 | x = self.conv1(x)
132 | x = self.bn1(x)
133 | x = self.relu(x)
134 | x = self.maxpool(x)
135 |
136 | x = self.layer1(x)
137 | x = self.layer2(x)
138 | x = self.layer3(x)
139 | x = self.layer4(x)
140 |
141 | x = self.avgpool(x)
142 | x = torch.flatten(x, 1)
143 | x = self.fc(x)
144 | return x
145 |
146 | def forward(self, x):
147 | return self._forward_impl(x)
148 |
149 |
150 | @register_model
151 | def resnet18(**kwargs):
152 | model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
153 | return model
154 |
155 | @register_model
156 | def resnet50(**kwargs):
157 | model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
158 | return model
159 |
--------------------------------------------------------------------------------
/optim_factory.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import optim as optim
3 |
4 | from timm.optim.adafactor import Adafactor
5 | from timm.optim.adahessian import Adahessian
6 | from timm.optim.adamp import AdamP
7 | from timm.optim.lookahead import Lookahead
8 | from timm.optim.nadam import Nadam
9 | from timm.optim.radam import RAdam
10 | from timm.optim.rmsprop_tf import RMSpropTF
11 | from timm.optim.sgdp import SGDP
12 |
13 | import json
14 |
15 | try:
16 | from apex.optimizers import FusedNovoGrad, FusedAdam, FusedLAMB, FusedSGD
17 | has_apex = True
18 | except ImportError:
19 | has_apex = False
20 |
21 |
22 | def get_num_layer_for_convnext(var_name):
23 | """
24 | Divide [3, 3, 27, 3] layers into 12 groups; each group is three
25 | consecutive blocks, including possible neighboring downsample layers;
26 | """
27 | num_max_layer = 12
28 | if var_name.startswith("downsample_layers"):
29 | stage_id = int(var_name.split('.')[1])
30 | if stage_id == 0:
31 | layer_id = 0
32 | elif stage_id == 1 or stage_id == 2:
33 | layer_id = stage_id + 1
34 | elif stage_id == 3:
35 | layer_id = 12
36 | return layer_id
37 |
38 | elif var_name.startswith("stages"):
39 | stage_id = int(var_name.split('.')[1])
40 | block_id = int(var_name.split('.')[2])
41 | if stage_id == 0 or stage_id == 1:
42 | layer_id = stage_id + 1
43 | elif stage_id == 2:
44 | layer_id = 3 + block_id // 3
45 | elif stage_id == 3:
46 | layer_id = 12
47 | return layer_id
48 | else:
49 | return num_max_layer + 1
50 |
51 | class LayerDecayValueAssigner(object):
52 | def __init__(self, values):
53 | self.values = values
54 |
55 | def get_scale(self, layer_id):
56 | return self.values[layer_id]
57 |
58 | def get_layer_id(self, var_name):
59 | return get_num_layer_for_convnext(var_name)
60 |
61 |
62 | def get_parameter_groups(model, weight_decay=1e-5, skip_list=(), get_num_layer=None, get_layer_scale=None):
63 | parameter_group_names = {}
64 | parameter_group_vars = {}
65 |
66 | for name, param in model.named_parameters():
67 | if not param.requires_grad:
68 | continue # frozen weights
69 | if len(param.shape) == 1 or name.endswith(".bias") or name in skip_list:
70 | group_name = "no_decay"
71 | this_weight_decay = 0.
72 | else:
73 | group_name = "decay"
74 | this_weight_decay = weight_decay
75 | if get_num_layer is not None:
76 | layer_id = get_num_layer(name)
77 | group_name = "layer_%d_%s" % (layer_id, group_name)
78 | else:
79 | layer_id = None
80 |
81 | if group_name not in parameter_group_names:
82 | if get_layer_scale is not None:
83 | scale = get_layer_scale(layer_id)
84 | else:
85 | scale = 1.
86 |
87 | parameter_group_names[group_name] = {
88 | "weight_decay": this_weight_decay,
89 | "params": [],
90 | "lr_scale": scale
91 | }
92 | parameter_group_vars[group_name] = {
93 | "weight_decay": this_weight_decay,
94 | "params": [],
95 | "lr_scale": scale
96 | }
97 |
98 | parameter_group_vars[group_name]["params"].append(param)
99 | parameter_group_names[group_name]["params"].append(name)
100 | print("Param groups = %s" % json.dumps(parameter_group_names, indent=2))
101 | return list(parameter_group_vars.values())
102 |
103 |
104 | def create_optimizer(args, model, get_num_layer=None, get_layer_scale=None, filter_bias_and_bn=True, skip_list=None):
105 | opt_lower = args.opt.lower()
106 | weight_decay = args.weight_decay
107 | # if weight_decay and filter_bias_and_bn:
108 | if filter_bias_and_bn:
109 | skip = {}
110 | if skip_list is not None:
111 | skip = skip_list
112 | elif hasattr(model, 'no_weight_decay'):
113 | skip = model.no_weight_decay()
114 | parameters = get_parameter_groups(model, weight_decay, skip, get_num_layer, get_layer_scale)
115 | weight_decay = 0.
116 | else:
117 | parameters = model.parameters()
118 |
119 | if 'fused' in opt_lower:
120 | assert has_apex and torch.cuda.is_available(), 'APEX and CUDA required for fused optimizers'
121 |
122 | opt_args = dict(lr=args.lr, weight_decay=weight_decay)
123 | if hasattr(args, 'opt_eps') and args.opt_eps is not None:
124 | opt_args['eps'] = args.opt_eps
125 | if hasattr(args, 'opt_betas') and args.opt_betas is not None:
126 | opt_args['betas'] = args.opt_betas
127 |
128 | opt_split = opt_lower.split('_')
129 | opt_lower = opt_split[-1]
130 | if opt_lower == 'sgd' or opt_lower == 'nesterov':
131 | opt_args.pop('eps', None)
132 | optimizer = optim.SGD(parameters, momentum=args.momentum, nesterov=True, **opt_args)
133 | elif opt_lower == 'momentum':
134 | opt_args.pop('eps', None)
135 | optimizer = optim.SGD(parameters, momentum=args.momentum, nesterov=False, **opt_args)
136 | elif opt_lower == 'adam':
137 | optimizer = optim.Adam(parameters, **opt_args)
138 | elif opt_lower == 'adamw':
139 | optimizer = optim.AdamW(parameters, **opt_args)
140 | elif opt_lower == 'nadam':
141 | optimizer = Nadam(parameters, **opt_args)
142 | elif opt_lower == 'radam':
143 | optimizer = RAdam(parameters, **opt_args)
144 | elif opt_lower == 'adamp':
145 | optimizer = AdamP(parameters, wd_ratio=0.01, nesterov=True, **opt_args)
146 | elif opt_lower == 'sgdp':
147 | optimizer = SGDP(parameters, momentum=args.momentum, nesterov=True, **opt_args)
148 | elif opt_lower == 'adadelta':
149 | optimizer = optim.Adadelta(parameters, **opt_args)
150 | elif opt_lower == 'adafactor':
151 | if not args.lr:
152 | opt_args['lr'] = None
153 | optimizer = Adafactor(parameters, **opt_args)
154 | elif opt_lower == 'adahessian':
155 | optimizer = Adahessian(parameters, **opt_args)
156 | elif opt_lower == 'rmsprop':
157 | optimizer = optim.RMSprop(parameters, alpha=0.9, momentum=args.momentum, **opt_args)
158 | elif opt_lower == 'rmsproptf':
159 | optimizer = RMSpropTF(parameters, alpha=0.9, momentum=args.momentum, **opt_args)
160 | elif opt_lower == 'fusedsgd':
161 | opt_args.pop('eps', None)
162 | optimizer = FusedSGD(parameters, momentum=args.momentum, nesterov=True, **opt_args)
163 | elif opt_lower == 'fusedmomentum':
164 | opt_args.pop('eps', None)
165 | optimizer = FusedSGD(parameters, momentum=args.momentum, nesterov=False, **opt_args)
166 | elif opt_lower == 'fusedadam':
167 | optimizer = FusedAdam(parameters, adam_w_mode=False, **opt_args)
168 | elif opt_lower == 'fusedadamw':
169 | optimizer = FusedAdam(parameters, adam_w_mode=True, **opt_args)
170 | elif opt_lower == 'fusedlamb':
171 | optimizer = FusedLAMB(parameters, **opt_args)
172 | elif opt_lower == 'fusednovograd':
173 | opt_args.setdefault('betas', (0.95, 0.98))
174 | optimizer = FusedNovoGrad(parameters, **opt_args)
175 | else:
176 | assert False and "Invalid optimizer"
177 |
178 | if len(opt_split) > 1:
179 | if opt_split[0] == 'lookahead':
180 | optimizer = Lookahead(optimizer)
181 |
182 | return optimizer
183 |
--------------------------------------------------------------------------------