├── LICENSE ├── README.md ├── configs ├── convnext │ ├── convnext_tiny.json │ └── kw1x_convnext_tiny.json ├── mobilenetv2_050 │ ├── kw1d2x_mobilenetv2_050.json │ ├── kw1x_mobilenetv2_050.json │ ├── kw4x_mobilenetv2_050.json │ └── mobilenetv2_050.json ├── mobilenetv2_100 │ ├── kw1d2x_mobilenetv2_100.json │ ├── kw1x_mobilenetv2_100.json │ ├── kw4x_mobilenetv2_100.json │ └── mobilenetv2_100.json ├── resnet18 │ ├── kw1d2x_resnet18.json │ ├── kw1d4x_resnet18.json │ ├── kw1x_resnet18.json │ ├── kw2x_resnet18.json │ ├── kw4x_resnet18.json │ └── resnet18.json └── resnet50 │ ├── kw1d2x_resnet50.json │ ├── kw1x_resnet50.json │ ├── kw4x_resnet50.json │ └── resnet50.json ├── datasets.py ├── detection ├── README.md ├── configs │ ├── _base_ │ │ └── models │ │ │ ├── mask_rcnn_convnext_fpn.py │ │ │ ├── mask_rcnn_mobilenetv2_fpn.py │ │ │ └── mask_rcnn_r50_fpn.py │ └── kernelwarehouse │ │ ├── convnext_tiny │ │ ├── mask_rcnn_convnext_tiny_adamw_1x_coco.py │ │ └── mask_rcnn_kw1x_convnext_tiny_adamw_1x_coco.py │ │ ├── mobilenetv2 │ │ ├── mask_rcnn_kw1x_mobilenetv2_100_adamw_1x_coco.py │ │ ├── mask_rcnn_kw4x_mobilenetv2_100_adamw_1x_coco.py │ │ └── mask_rcnn_mobilenetv2_100_adamw_1x_coco.py │ │ └── resnet50 │ │ ├── mask_rcnn_kw1x_resnet50_adamw_1x_coco.py │ │ ├── mask_rcnn_kw4x_resnet50_adamw_1x_coco.py │ │ └── mask_rcnn_resnet50_adamw_1x_coco.py ├── mmcv_custom │ ├── __init__.py │ ├── customized_text.py │ ├── layer_decay_optimizer_constructor.py │ └── runner │ │ └── checkpoint.py └── mmdet │ └── models │ └── backbones │ ├── __init__.py │ ├── convnext.py │ ├── kernel_warehouse.py │ ├── kw_convnext.py │ ├── kw_mobilenetv2.py │ ├── kw_resnet.py │ ├── mobilenetv2.py │ └── resnet.py ├── engine.py ├── fig ├── Fig_Architecture.pdf └── Fig_Architecture.png ├── main.py ├── models ├── __init__.py ├── convnext.py ├── kw1d2x_mobilenetv2.py ├── kw_convnext.py ├── kw_mobilenetv2.py ├── kw_resnet.py ├── mobilenetv2.py └── resnet.py ├── modules └── kernel_warehouse.py ├── optim_factory.py └── utils.py /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | Copyright (c) 2024 OSVAI/KernelWarehouse 7 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 8 | 9 | 1. Definitions. 10 | 11 | "License" shall mean the terms and conditions for use, reproduction, 12 | and distribution as defined by Sections 1 through 9 of this document. 13 | 14 | "Licensor" shall mean the copyright owner or entity authorized by 15 | the copyright owner that is granting the License. 16 | 17 | "Legal Entity" shall mean the union of the acting entity and all 18 | other entities that control, are controlled by, or are under common 19 | control with that entity. For the purposes of this definition, 20 | "control" means (i) the power, direct or indirect, to cause the 21 | direction or management of such entity, whether by contract or 22 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 23 | outstanding shares, or (iii) beneficial ownership of such entity. 24 | 25 | "You" (or "Your") shall mean an individual or Legal Entity 26 | exercising permissions granted by this License. 27 | 28 | "Source" form shall mean the preferred form for making modifications, 29 | including but not limited to software source code, documentation 30 | source, and configuration files. 31 | 32 | "Object" form shall mean any form resulting from mechanical 33 | transformation or translation of a Source form, including but 34 | not limited to compiled object code, generated documentation, 35 | and conversions to other media types. 36 | 37 | "Work" shall mean the work of authorship, whether in Source or 38 | Object form, made available under the License, as indicated by a 39 | copyright notice that is included in or attached to the work 40 | (an example is provided in the Appendix below). 41 | 42 | "Derivative Works" shall mean any work, whether in Source or Object 43 | form, that is based on (or derived from) the Work and for which the 44 | editorial revisions, annotations, elaborations, or other modifications 45 | represent, as a whole, an original work of authorship. For the purposes 46 | of this License, Derivative Works shall not include works that remain 47 | separable from, or merely link (or bind by name) to the interfaces of, 48 | the Work and Derivative Works thereof. 49 | 50 | "Contribution" shall mean any work of authorship, including 51 | the original version of the Work and any modifications or additions 52 | to that Work or Derivative Works thereof, that is intentionally 53 | submitted to Licensor for inclusion in the Work by the copyright owner 54 | or by an individual or Legal Entity authorized to submit on behalf of 55 | the copyright owner. For the purposes of this definition, "submitted" 56 | means any form of electronic, verbal, or written communication sent 57 | to the Licensor or its representatives, including but not limited to 58 | communication on electronic mailing lists, source code control systems, 59 | and issue tracking systems that are managed by, or on behalf of, the 60 | Licensor for the purpose of discussing and improving the Work, but 61 | excluding communication that is conspicuously marked or otherwise 62 | designated in writing by the copyright owner as "Not a Contribution." 63 | 64 | "Contributor" shall mean Licensor and any individual or Legal Entity 65 | on behalf of whom a Contribution has been received by Licensor and 66 | subsequently incorporated within the Work. 67 | 68 | 2. Grant of Copyright License. Subject to the terms and conditions of 69 | this License, each Contributor hereby grants to You a perpetual, 70 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 71 | copyright license to reproduce, prepare Derivative Works of, 72 | publicly display, publicly perform, sublicense, and distribute the 73 | Work and such Derivative Works in Source or Object form. 74 | 75 | 3. Grant of Patent License. Subject to the terms and conditions of 76 | this License, each Contributor hereby grants to You a perpetual, 77 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 78 | (except as stated in this section) patent license to make, have made, 79 | use, offer to sell, sell, import, and otherwise transfer the Work, 80 | where such license applies only to those patent claims licensable 81 | by such Contributor that are necessarily infringed by their 82 | Contribution(s) alone or by combination of their Contribution(s) 83 | with the Work to which such Contribution(s) was submitted. If You 84 | institute patent litigation against any entity (including a 85 | cross-claim or counterclaim in a lawsuit) alleging that the Work 86 | or a Contribution incorporated within the Work constitutes direct 87 | or contributory patent infringement, then any patent licenses 88 | granted to You under this License for that Work shall terminate 89 | as of the date such litigation is filed. 90 | 91 | 4. Redistribution. You may reproduce and distribute copies of the 92 | Work or Derivative Works thereof in any medium, with or without 93 | modifications, and in Source or Object form, provided that You 94 | meet the following conditions: 95 | 96 | (a) You must give any other recipients of the Work or 97 | Derivative Works a copy of this License; and 98 | 99 | (b) You must cause any modified files to carry prominent notices 100 | stating that You changed the files; and 101 | 102 | (c) You must retain, in the Source form of any Derivative Works 103 | that You distribute, all copyright, patent, trademark, and 104 | attribution notices from the Source form of the Work, 105 | excluding those notices that do not pertain to any part of 106 | the Derivative Works; and 107 | 108 | (d) If the Work includes a "NOTICE" text file as part of its 109 | distribution, then any Derivative Works that You distribute must 110 | include a readable copy of the attribution notices contained 111 | within such NOTICE file, excluding those notices that do not 112 | pertain to any part of the Derivative Works, in at least one 113 | of the following places: within a NOTICE text file distributed 114 | as part of the Derivative Works; within the Source form or 115 | documentation, if provided along with the Derivative Works; or, 116 | within a display generated by the Derivative Works, if and 117 | wherever such third-party notices normally appear. The contents 118 | of the NOTICE file are for informational purposes only and 119 | do not modify the License. You may add Your own attribution 120 | notices within Derivative Works that You distribute, alongside 121 | or as an addendum to the NOTICE text from the Work, provided 122 | that such additional attribution notices cannot be construed 123 | as modifying the License. 124 | 125 | You may add Your own copyright statement to Your modifications and 126 | may provide additional or different license terms and conditions 127 | for use, reproduction, or distribution of Your modifications, or 128 | for any such Derivative Works as a whole, provided Your use, 129 | reproduction, and distribution of the Work otherwise complies with 130 | the conditions stated in this License. 131 | 132 | 5. Submission of Contributions. Unless You explicitly state otherwise, 133 | any Contribution intentionally submitted for inclusion in the Work 134 | by You to the Licensor shall be under the terms and conditions of 135 | this License, without any additional terms or conditions. 136 | Notwithstanding the above, nothing herein shall supersede or modify 137 | the terms of any separate license agreement you may have executed 138 | with Licensor regarding such Contributions. 139 | 140 | 6. Trademarks. This License does not grant permission to use the trade 141 | names, trademarks, service marks, or product names of the Licensor, 142 | except as required for reasonable and customary use in describing the 143 | origin of the Work and reproducing the content of the NOTICE file. 144 | 145 | 7. Disclaimer of Warranty. Unless required by applicable law or 146 | agreed to in writing, Licensor provides the Work (and each 147 | Contributor provides its Contributions) on an "AS IS" BASIS, 148 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 149 | implied, including, without limitation, any warranties or conditions 150 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 151 | PARTICULAR PURPOSE. You are solely responsible for determining the 152 | appropriateness of using or redistributing the Work and assume any 153 | risks associated with Your exercise of permissions under this License. 154 | 155 | 8. Limitation of Liability. In no event and under no legal theory, 156 | whether in tort (including negligence), contract, or otherwise, 157 | unless required by applicable law (such as deliberate and grossly 158 | negligent acts) or agreed to in writing, shall any Contributor be 159 | liable to You for damages, including any direct, indirect, special, 160 | incidental, or consequential damages of any character arising as a 161 | result of this License or out of the use or inability to use the 162 | Work (including but not limited to damages for loss of goodwill, 163 | work stoppage, computer failure or malfunction, or any and all 164 | other commercial damages or losses), even if such Contributor 165 | has been advised of the possibility of such damages. 166 | 167 | 9. Accepting Warranty or Additional Liability. While redistributing 168 | the Work or Derivative Works thereof, You may choose to offer, 169 | and charge a fee for, acceptance of support, warranty, indemnity, 170 | or other liability obligations and/or rights consistent with this 171 | License. However, in accepting such obligations, You may act only 172 | on Your own behalf and on Your sole responsibility, not on behalf 173 | of any other Contributor, and only if You agree to indemnify, 174 | defend, and hold each Contributor harmless for any liability 175 | incurred by, or claims asserted against, such Contributor by reason 176 | of your accepting any such warranty or additional liability. 177 | 178 | END OF TERMS AND CONDITIONS 179 | 180 | APPENDIX: How to apply the Apache License to your work. 181 | 182 | To apply the Apache License to your work, attach the following 183 | boilerplate notice, with the fields enclosed by brackets "[]" 184 | replaced with your own identifying information. (Don't include 185 | the brackets!) The text should be enclosed in the appropriate 186 | comment syntax for the file format. We also recommend that a 187 | file or class name and description of purpose be included on the 188 | same "printed page" as the copyright notice for easier 189 | identification within third-party archives. 190 | 191 | Copyright [yyyy] [name of copyright owner] 192 | 193 | Licensed under the Apache License, Version 2.0 (the "License"); 194 | you may not use this file except in compliance with the License. 195 | You may obtain a copy of the License at 196 | 197 | http://www.apache.org/licenses/LICENSE-2.0 198 | 199 | Unless required by applicable law or agreed to in writing, software 200 | distributed under the License is distributed on an "AS IS" BASIS, 201 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 202 | See the License for the specific language governing permissions and 203 | limitations under the License. 204 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # KernelWarehouse: Rethinking the Design of Dynamic Convolution 2 | 3 | By Chao Li and Anbang Yao. 4 | 5 | This repository is an official PyTorch implementation of ["KernelWarehouse: Rethinking the Design of Dynamic Convolution", KW for short](http://arxiv.org/abs/2406.07879), which has been accepted to ICML 2024. 6 | 7 | Dynamic convolution learns a linear mixture of n static kernels weighted with their input-dependent attentions, demonstrating superior performance than normal convolution. However, it increases the number of convolutional parameters by n times, and thus is not parameter efficient. This leads to no research progress that can allow researchers to explore the setting n>100 (an order of magnitude larger than the typical setting n<10) for pushing forward the performance boundary of dynamic convolution while enjoying parameter efficiency. To fill this gap, in this paper, we propose **KernelWarehouse**, a more general form of dynamic convolution, which redefines the basic concepts of "kernels", "assembling kernels" and "attention function" through the lens of exploiting convolutional parameter dependencies within the same layer and across neighboring layers of a ConvNet. We testify the effectiveness of KernelWarehouse on [ImageNet](http://www.image-net.org) and [MS-COCO](https://cocodataset.org/#home) datasets using various ConvNet architectures. Intriguingly, KernelWarehouse is also applicable to Vision Transformers, and it can even reduce the model size of a backbone while improving the model accuracy. For instance, KernelWarehouse (n=4) achieves 5.61%|3.90%|4.38% absolute top-1 accuracy gain on the ResNet18|MobileNetV2|DeiT-Tiny backbone, and KernelWarehouse (n=1/4) with 65.10% model size reduction still achieves 2.29% gain on the ResNet18 backbone. 8 | 9 |

10 | 11 | Schematic illustration of KernelWarehouse. Briefly speaking, KernelWarehouse sequentially divides the static kernel $\mathbf{W}$ at any regular convolutional layer of a ConvNet into $m$ disjoint kernel cells $\mathbf{w}_ 1, \dots, \mathbf{w}_ m$ having the same dimensions first, and then computes each kernel cell $\mathbf{w}_ i$ as a linear mixture $\mathbf{w}_ i=\alpha_{i1} \mathbf{e}_ 1+\dots+\alpha_{in}\mathbf{e}_ n$ based on a predefined "warehouse" (consisting of $n$ same dimensioned kernel cells $\mathbf{e}_ 1,\dots,\mathbf{e}_ n$ , e.g., $n=108$) which is shared to all same-stage convolutional layers, and finally replaces the static kernel $\mathbf{W}$ by assembling its corresponding $m$ mixtures in order, yielding a high degree of freedom to fit a desired convolutional parameter budget. The input-dependent scalar attentions $\alpha_{i1},\dots,\alpha_{in}$ are computed with a novel contrasting-driven attention function (CAF). 12 | 13 | ## Dataset 14 | 15 | Following [this repository](https://github.com/pytorch/examples/tree/master/imagenet#requirements), 16 | 17 | - Download the ImageNet dataset from http://www.image-net.org/. 18 | - Then, move and extract the training and validation images to labeled subfolders, using [the following script](https://github.com/pytorch/examples/blob/main/imagenet/extract_ILSVRC.sh). 19 | 20 | ## Requirements 21 | 22 | - python >= 3.7.0 23 | - torch >= 1.8.1, torchvision >= 0.9.1 24 | - timm == 0.3.2, tensorboardX, six 25 | 26 | ## Results and Models 27 | 28 | Results comparison on the ImageNet validation set with the ResNet18, ResNet50 and ConvNeXt-Tiny backbones trained for 300 epochs. 29 | 30 | | Models | Params | Top-1 Acc(%) | Top-5 Acc(%) | Google Drive | Baidu Drive | 31 | |:------------|:-------:|:------------:|:------------:|:-------------------------------------------------------------------------------------------:|:-----------------------------------------------------------------:| 32 | | ResNet18 | 11.69M | 70.44 | 89.72 | [model](https://drive.google.com/file/d/1xpAiJyjVycr6b0RyKsDTXbUUz7etA2UV/view?usp=sharing) | [model](https://pan.baidu.com/s/16IRpCY0l_uHhJd-FMWllGg?pwd=w46e) | 33 | | + KW (1/4×) | 4.08M | 72.73 | 90.83 | [model](https://drive.google.com/file/d/1DMcTWMLjiLRpY38041nLlOU0Vb_zmUh6/view?usp=sharing) | [model](https://pan.baidu.com/s/11nv7S5oH7DHkuzCXgQbiXA?pwd=marv) | 34 | | + KW (1/2×) | 7.43M | 73.33 | 91.42 | [model](https://drive.google.com/file/d/1k5tyyDHu9mqsQtiwZu7tTMPPolPOjdbT/view?usp=sharing) | [model](https://pan.baidu.com/s/1bOjjVf5Z2YdKs5d7NlPamQ?pwd=t8ax) | 35 | | + KW (1×) | 11.93M | 74.77 | 92.13 | [model](https://drive.google.com/file/d/1aLaO8QV9QYdfv5GE2tg9iDWLeEJfkrKY/view?usp=sharing) | [model](https://pan.baidu.com/s/1TPcg7iMNdA_7w2FlkZaFWg?pwd=tu3x) | 36 | | + KW (2×) | 23.24M | 75.19 | 92.18 | [model](https://drive.google.com/file/d/1UB7nrqI2mkAIY_p7I5eOcitrmKYbjtsj/view?usp=sharing) | [model](https://pan.baidu.com/s/1WODJhFpgG8IPtZDeiOCFuw?pwd=jhtg) | 37 | | + KW (4×) | 45.86M | 76.05 | 92.68 | [model](https://drive.google.com/file/d/1gl1__aDrJIMs-Zrv3IgH7mfVnwgFCLbr/view?usp=sharing) | [model](https://pan.baidu.com/s/1u1jiDOt8IkE3nakFJPRkzA?pwd=75f1) | 38 | | ResNet50 | 25.56M | 78.44 | 94.24 | [model](https://drive.google.com/file/d/1x57Lip3xt1yFYGz7k6aCTR2yhP56TtKM/view?usp=sharing) | [model](https://pan.baidu.com/s/1O3CiCq1BNssIL1L2jNV_IA?pwd=ufyh) | 39 | | + KW (1/2×) | 17.64M | 79.30 | 94.71 | [model](https://drive.google.com/file/d/1lm_TlIUX7nmyjM7HNDWb7SxO65rgyrF9/view?usp=sharing) | [model](https://pan.baidu.com/s/1TeIqjTrUzNobDr07gN1dUw?pwd=7b6j) | 40 | | + KW (1×) | 28.05M | 80.38 | 95.19 | [model](https://drive.google.com/file/d/1lA155EYydHae_m__hk86tX_m9S0i85i8/view?usp=sharing) | [model](https://pan.baidu.com/s/1moFwCD7PhZ8Y0SPHB20hbA?pwd=jp8p) | 41 | | + KW (4×) | 102.02M | 81.05 | 95.21 | [model](https://drive.google.com/file/d/1pxU1oHIXerjVwzK7eFK7AGf1XZD_caMQ/view?usp=sharing) | [model](https://pan.baidu.com/s/1h-AHOozmqF1JIanes0-kiA?pwd=xmq7) | 42 | | ConvNeXt | 28.59M | 82.07 | 95.86 | [model](https://drive.google.com/file/d/1Yz_rooa7PMwE9Bdor00Mivtk61Xas7oh/view?usp=sharing) | [model](https://pan.baidu.com/s/1WCdlXqxpNkPCqvxowQnQfg?pwd=nm6j) | 43 | | + KW (1×) | 39.37M | 82.51 | 96.07 | [model](https://drive.google.com/file/d/13wqfY3jHxcskhHRchs0ynaQyVq0qUvn-/view?usp=sharing) | [model](https://pan.baidu.com/s/1EmTlEPMZnRAeO8QgMOfSPQ?pwd=z22e) | 44 | 45 | Results comparison on the ImageNet validation set with the MobileNetV2(1.0×, 0.5×) backbones trained for 150 epochs. 46 | 47 | 48 | | Models | Params | Top-1 Acc(%) | Top-5 Acc(%) | Google Drive | Baidu Drive | 49 | |:-------------------|:------:|:------------:|:------------:|:-------------:|:-------------:| 50 | | MobileNetV2 (1.0×) | 3.50M | 72.04 | 90.42 | [model](https://drive.google.com/file/d/1t97r1FM8hX2AtCjDn7k2TsM7HY6XwQjz/view?usp=sharing) | [model](https://pan.baidu.com/s/1GD_q4gSZowvssJpUdY7wXw?pwd=bks8)| 51 | | + KW (1/2×) | 2.65M | 72.59 | 90.71 | [model](https://drive.google.com/file/d/1I8JI1CtfKtUPMygqEVdD19c3PhSZReKJ/view?usp=sharing) | [model](https://pan.baidu.com/s/1EhHTze4gqcS16UnTzdjekg?pwd=65k8)| 52 | | + KW (1×) | 5.17M | 74.68 | 91.90 | [model](https://drive.google.com/file/d/1EWiUX8qaRj1kTI1ktBNAhWnhauV5eVdk/view?usp=sharing) | [model](https://pan.baidu.com/s/1zyLMX4PpUVAL9gVICFRmiA?pwd=8rcb)| 53 | | + KW (4×) | 11.38M | 75.92 | 92.22 | [model](https://drive.google.com/file/d/1xnzx41_sj3kZbR5Fzsfsb_PK5SEINXZ4/view?usp=sharing) | [model](https://pan.baidu.com/s/1Mb3buGekUCmseHWQNwLnjQ?pwd=ncrm)| 54 | | MobileNetV2 (0.5×) | 1.97M | 64.32 | 85.22 | [model](https://drive.google.com/file/d/1-L4EgH5hFQydocXjjT9oJCFVEItsD_eU/view?usp=sharing) | [model](https://pan.baidu.com/s/1zap9BSnry1WJy0SZDt4SIw?pwd=ueqv)| 55 | | + KW (1/2×) | 1.47M | 65.20 | 85.98 | [model](https://drive.google.com/file/d/1SByM8kJjb7seeYpY8lrSrv-hUOMJWRJE/view?usp=sharing) | [model](https://pan.baidu.com/s/13mzjh203BhRSETJiaJF3cw?pwd=tdck)| 56 | | + KW (1×) | 2.85M | 68.29 | 87.93 | [model](https://drive.google.com/file/d/1KFKy05JhhMnfj-tAz2SKzNRcBVMoJa19/view?usp=sharing) | [model](https://pan.baidu.com/s/1MHW2k5IkX1NPgM1KhQL29A?pwd=dajd)| 57 | | + KW (4×) | 4.65M | 70.26 | 89.19 | [model](https://drive.google.com/file/d/1Jt94_M7JQ6RDViYN3-P-4uoA8a5_bVYE/view?usp=sharing) | [model](https://pan.baidu.com/s/1uLBpreSm9MOtjPRWcM5SjA?pwd=whz2)| 58 | 59 | 60 | ## Training 61 | 62 | To train a model with KernelWarehouse: 63 | 64 | ```shell 65 | python -m torch.distributed.launch --nproc_per_node={number of gpus} main.py --kw_config {path to config json} \ 66 | --batch_size {batch size per gpu} --update_freq {number of gradient accumulation steps} --data_path {path to dataset} \ 67 | --output_dir {path to output folder} 68 | ``` 69 | 70 | For example, to train ResNet18 + KW (1×) on 8 GPUs with batch size of 4096: 71 | 72 | ```shell 73 | python -m torch.distributed.launch --nproc_per_node=8 main.py --kw_config configs/resnet18/kw1x_resnet18.json \ 74 | --batch_size 128 --update_freq 4 --data_path {path to dataset} --output_dir {path to output folder} 75 | ``` 76 | 77 | For example, to train MobileNetV2 + KW (4×) on 8 GPUs with batch size of 256: 78 | 79 | ```shell 80 | python -m torch.distributed.launch --nproc_per_node=8 main.py --kw_config configs/mobilenetv2_100/kw4x_mobilenetv2_100.json \ 81 | --batch_size 32 --update_freq 1 --data_path {path to dataset} --output_dir {path to output folder} 82 | ``` 83 | 84 | You can add "--use_amp true" to enable Automatic Mixed Precision to reduce memory usage and speed up training. 85 | 86 | More config files for other models can be found in [configs](configs). 87 | 88 | ## Evaluation 89 | 90 | To evaluate a pre-trained model: 91 | 92 | ```shell 93 | python -m torch.distributed.launch --nproc_per_node={number of gpus} main.py --kw_config {path to config json} \ 94 | --eval true --data_path {path to dataset} --resume {path to model} 95 | ``` 96 | 97 | ## Training and evaluation on object detection and instance segmentation 98 | Please refer to README.md in the folder of [detection](detection) for details. 99 | 100 | ## Citation 101 | If you find our work useful in your research, please consider citing: 102 | ``` 103 | @inproceedings{li2024kernelwarehouse, 104 | title={KernelWarehouse: Rethinking the Design of Dynamic Convolution}, 105 | author={Chao Li and Anbang Yao}, 106 | booktitle={International Conference on Machine Learning}, 107 | year={2024} 108 | } 109 | ``` 110 | 111 | ## License 112 | KernelWarehouse is released under the Apache license. We encourage use for both research and commercial purposes, as long as proper attribution is given. 113 | 114 | ## Acknowledgment 115 | This repository is built based on [ConvNeXt](https://github.com/facebookresearch/ConvNeXt), [mmdetection](https://github.com/open-mmlab/mmdetection), [Dynamic-convolution-Pytorch](https://github.com/kaijieshi7/Dynamic-convolution-Pytorch), [Swin-Transformer-Object-Detection](https://github.com/SwinTransformer/Swin-Transformer-Object-Detection) repositories. We thank the authors for releasing their amazing codes. 116 | -------------------------------------------------------------------------------- /configs/convnext/convnext_tiny.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "convnext_tiny", 3 | "drop_path": 0.1, 4 | "model_ema": true, 5 | "model_ema_eval": true 6 | } -------------------------------------------------------------------------------- /configs/convnext/kw1x_convnext_tiny.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "kw_convnext_tiny", 3 | "reduction": 0.0625, 4 | "cell_num_ratio": 1, 5 | "cell_inplane_ratio": 1, 6 | "cell_outplane_ratio": 1, 7 | "sharing_range": "layer_pwconv", 8 | "drop_path": 0.3, 9 | "model_ema": true, 10 | "model_ema_eval": true 11 | } 12 | -------------------------------------------------------------------------------- /configs/mobilenetv2_050/kw1d2x_mobilenetv2_050.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "kw1d2x_mobilenetv2_050", 3 | "epochs": 150, 4 | "drop_path": 0.0, 5 | "opt": "sgd", 6 | "weight_decay": 4e-5, 7 | "lr": 0.05, 8 | "warmup_epochs": 0, 9 | "color_jitter": 0, 10 | "aa": null, 11 | "smoothing": 0, 12 | "reprob": 0, 13 | "mixup": 0, 14 | "cutmix": 0, 15 | "traditional_preprocess": true, 16 | "temp_epoch": 15 17 | } -------------------------------------------------------------------------------- /configs/mobilenetv2_050/kw1x_mobilenetv2_050.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "kw_mobilenetv2_050", 3 | "epochs": 150, 4 | "drop_path": 0.0, 5 | "opt": "sgd", 6 | "weight_decay": 4e-5, 7 | "lr": 0.05, 8 | "warmup_epochs": 0, 9 | "color_jitter": 0, 10 | "aa": null, 11 | "smoothing": 0, 12 | "reprob": 0, 13 | "mixup": 0, 14 | "cutmix": 0, 15 | "cell_num_ratio": 1, 16 | "cell_inplane_ratio": 1, 17 | "cell_outplane_ratio": 1, 18 | "sharing_range": ["layer", "pwconv"], 19 | "traditional_preprocess": true, 20 | "temp_epoch": 15 21 | } -------------------------------------------------------------------------------- /configs/mobilenetv2_050/kw4x_mobilenetv2_050.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "kw_mobilenetv2_050", 3 | "epochs": 150, 4 | "drop_path": 0.0, 5 | "opt": "sgd", 6 | "weight_decay": 4e-5, 7 | "lr": 0.05, 8 | "warmup_epochs": 0, 9 | "color_jitter": 0, 10 | "aa": null, 11 | "reprob": 0, 12 | "mixup": 0, 13 | "cutmix": 0, 14 | "cell_num_ratio": 4, 15 | "cell_inplane_ratio": 1, 16 | "cell_outplane_ratio": 1, 17 | "sharing_range": ["layer"], 18 | "traditional_preprocess": true, 19 | "temp_epoch": 15 20 | } -------------------------------------------------------------------------------- /configs/mobilenetv2_050/mobilenetv2_050.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "mobilenetv2_050", 3 | "epochs": 150, 4 | "drop_path": 0.0, 5 | "opt": "sgd", 6 | "weight_decay": 4e-5, 7 | "lr": 0.05, 8 | "warmup_epochs": 0, 9 | "color_jitter": 0, 10 | "aa": null, 11 | "smoothing": 0, 12 | "reprob": 0, 13 | "mixup": 0, 14 | "cutmix": 0, 15 | "traditional_preprocess": true 16 | } -------------------------------------------------------------------------------- /configs/mobilenetv2_100/kw1d2x_mobilenetv2_100.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "kw1d2x_mobilenetv2_100", 3 | "epochs": 150, 4 | "drop_path": 0.0, 5 | "opt": "sgd", 6 | "weight_decay": 4e-5, 7 | "lr": 0.05, 8 | "warmup_epochs": 0, 9 | "color_jitter": 0, 10 | "aa": null, 11 | "smoothing": 0, 12 | "reprob": 0, 13 | "mixup": 0, 14 | "cutmix": 0, 15 | "traditional_preprocess": true, 16 | "temp_epoch": 15 17 | } -------------------------------------------------------------------------------- /configs/mobilenetv2_100/kw1x_mobilenetv2_100.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "kw_mobilenetv2_100", 3 | "epochs": 150, 4 | "drop_path": 0.0, 5 | "opt": "sgd", 6 | "weight_decay": 4e-5, 7 | "lr": 0.05, 8 | "warmup_epochs": 0, 9 | "color_jitter": 0, 10 | "aa": null, 11 | "reprob": 0, 12 | "mixup": 0, 13 | "cutmix": 0, 14 | "cell_num_ratio": 1, 15 | "cell_inplane_ratio": 1, 16 | "cell_outplane_ratio": 1, 17 | "sharing_range": ["layer", "pwconv"], 18 | "traditional_preprocess": true, 19 | "temp_epoch": 15 20 | } -------------------------------------------------------------------------------- /configs/mobilenetv2_100/kw4x_mobilenetv2_100.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "kw_mobilenetv2_100", 3 | "epochs": 150, 4 | "drop_path": 0.0, 5 | "opt": "sgd", 6 | "weight_decay": 4e-5, 7 | "lr": 0.05, 8 | "warmup_epochs": 0, 9 | "color_jitter": 0, 10 | "aa": null, 11 | "reprob": 0, 12 | "mixup": 0, 13 | "cutmix": 0, 14 | "cell_num_ratio": 4, 15 | "cell_inplane_ratio": 1, 16 | "cell_outplane_ratio": 1, 17 | "sharing_range": ["layer"], 18 | "traditional_preprocess": true, 19 | "temp_epoch": 15 20 | } -------------------------------------------------------------------------------- /configs/mobilenetv2_100/mobilenetv2_100.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "mobilenetv2_100", 3 | "epochs": 150, 4 | "drop_path": 0.0, 5 | "opt": "sgd", 6 | "weight_decay": 4e-5, 7 | "lr": 0.05, 8 | "warmup_epochs": 0, 9 | "color_jitter": 0, 10 | "aa": null, 11 | "smoothing": 0, 12 | "reprob": 0, 13 | "mixup": 0, 14 | "cutmix": 0, 15 | "traditional_preprocess": true 16 | } -------------------------------------------------------------------------------- /configs/resnet18/kw1d2x_resnet18.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "kw_resnet18", 3 | "reduction": 0.0625, 4 | "cell_num_ratio": 0.5, 5 | "cell_inplane_ratio": 0.5, 6 | "cell_outplane_ratio": 0.5, 7 | "sharing_range": "layer_conv", 8 | "drop_path": 0.0, 9 | "dropout": 0.0 10 | } -------------------------------------------------------------------------------- /configs/resnet18/kw1d4x_resnet18.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "kw_resnet18", 3 | "reduction": 0.0625, 4 | "cell_num_ratio": 0.25, 5 | "cell_inplane_ratio": 0.5, 6 | "cell_outplane_ratio": 0.5, 7 | "sharing_range": "layer_conv", 8 | "drop_path": 0.0, 9 | "dropout": 0.0, 10 | "mixup": 0, 11 | "cutmix": 0 12 | } -------------------------------------------------------------------------------- /configs/resnet18/kw1x_resnet18.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "kw_resnet18", 3 | "reduction": 0.0625, 4 | "cell_num_ratio": 1, 5 | "cell_inplane_ratio": 1, 6 | "cell_outplane_ratio": 1, 7 | "sharing_range": "layer_conv", 8 | "drop_path": 0.1 9 | } -------------------------------------------------------------------------------- /configs/resnet18/kw2x_resnet18.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "kw_resnet18", 3 | "reduction": 0.0625, 4 | "cell_num_ratio": 2, 5 | "cell_inplane_ratio": 1, 6 | "cell_outplane_ratio": 1, 7 | "sharing_range": "layer_conv", 8 | "drop_path": 0.1 9 | } -------------------------------------------------------------------------------- /configs/resnet18/kw4x_resnet18.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "kw_resnet18", 3 | "reduction": 0.0625, 4 | "cell_num_ratio": 4, 5 | "cell_inplane_ratio": 1, 6 | "cell_outplane_ratio": 1, 7 | "sharing_range": "layer_conv", 8 | "drop_path": 0.1 9 | } -------------------------------------------------------------------------------- /configs/resnet18/resnet18.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "resnet18", 3 | "drop_path": 0.0 4 | } -------------------------------------------------------------------------------- /configs/resnet50/kw1d2x_resnet50.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "kw_resnet50", 3 | "reduction": 0.0625, 4 | "cell_num_ratio": 0.5, 5 | "cell_inplane_ratio": 0.5, 6 | "cell_outplane_ratio": 0.5, 7 | "sharing_range": "layer_conv", 8 | "nonlocal_basis_ratio": 0.0625, 9 | "drop_path": 0.1 10 | } -------------------------------------------------------------------------------- /configs/resnet50/kw1x_resnet50.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "kw_resnet50", 3 | "reduction": 0.0625, 4 | "cell_num_ratio": 1, 5 | "cell_inplane_ratio": 1, 6 | "cell_outplane_ratio": 1, 7 | "sharing_range": "layer_conv", 8 | "drop_path": 0.1 9 | } -------------------------------------------------------------------------------- /configs/resnet50/kw4x_resnet50.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "kw_resnet50", 3 | "reduction": 0.0625, 4 | "cell_num_ratio": 4, 5 | "cell_inplane_ratio": 1, 6 | "cell_outplane_ratio": 1, 7 | "sharing_range": "layer_conv", 8 | "drop_path": 0.1 9 | } -------------------------------------------------------------------------------- /configs/resnet50/resnet50.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "resnet50", 3 | "drop_path": 0.1 4 | } -------------------------------------------------------------------------------- /datasets.py: -------------------------------------------------------------------------------- 1 | import os 2 | from torchvision import datasets, transforms 3 | 4 | from timm.data.constants import \ 5 | IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, IMAGENET_INCEPTION_MEAN, IMAGENET_INCEPTION_STD 6 | from timm.data import create_transform 7 | 8 | def build_dataset(is_train, args): 9 | if args.traditional_preprocess: 10 | transform = build_transform_tradition(is_train, args) 11 | else: 12 | transform = build_transform(is_train, args) 13 | 14 | print("Transform = ") 15 | if isinstance(transform, tuple): 16 | for trans in transform: 17 | print(" - - - - - - - - - - ") 18 | for t in trans.transforms: 19 | print(t) 20 | else: 21 | for t in transform.transforms: 22 | print(t) 23 | print("---------------------------") 24 | 25 | if args.data_set == 'CIFAR': 26 | dataset = datasets.CIFAR100(args.data_path, train=is_train, transform=transform, download=True) 27 | nb_classes = 100 28 | elif args.data_set == 'IMNET': 29 | print("reading from datapath", args.data_path) 30 | root = os.path.join(args.data_path, 'train' if is_train else 'val') 31 | dataset = datasets.ImageFolder(root, transform=transform) 32 | nb_classes = 1000 33 | elif args.data_set == "image_folder": 34 | root = args.data_path if is_train else args.eval_data_path 35 | dataset = datasets.ImageFolder(root, transform=transform) 36 | nb_classes = args.nb_classes 37 | assert len(dataset.class_to_idx) == nb_classes 38 | else: 39 | raise NotImplementedError() 40 | print("Number of the class = %d" % nb_classes) 41 | 42 | return dataset, nb_classes 43 | 44 | 45 | def build_transform(is_train, args): 46 | resize_im = args.input_size > 32 47 | imagenet_default_mean_and_std = args.imagenet_default_mean_and_std 48 | mean = IMAGENET_INCEPTION_MEAN if not imagenet_default_mean_and_std else IMAGENET_DEFAULT_MEAN 49 | std = IMAGENET_INCEPTION_STD if not imagenet_default_mean_and_std else IMAGENET_DEFAULT_STD 50 | 51 | if is_train: 52 | # this should always dispatch to transforms_imagenet_train 53 | transform = create_transform( 54 | input_size=args.input_size, 55 | is_training=True, 56 | color_jitter=args.color_jitter, 57 | auto_augment=args.aa, 58 | interpolation=args.train_interpolation, 59 | re_prob=args.reprob, 60 | re_mode=args.remode, 61 | re_count=args.recount, 62 | mean=mean, 63 | std=std, 64 | ) 65 | if not resize_im: 66 | transform.transforms[0] = transforms.RandomCrop( 67 | args.input_size, padding=4) 68 | return transform 69 | 70 | t = [] 71 | if resize_im: 72 | # warping (no cropping) when evaluated at 384 or larger 73 | if args.input_size >= 384: 74 | t.append( 75 | transforms.Resize((args.input_size, args.input_size), 76 | interpolation=transforms.InterpolationMode.BICUBIC), 77 | ) 78 | print(f"Warping {args.input_size} size input images...") 79 | else: 80 | if args.crop_pct is None: 81 | args.crop_pct = 224 / 256 82 | size = int(args.input_size / args.crop_pct) 83 | t.append( 84 | # to maintain same ratio w.r.t. 224 images 85 | transforms.Resize(size, interpolation=transforms.InterpolationMode.BICUBIC), 86 | ) 87 | t.append(transforms.CenterCrop(args.input_size)) 88 | 89 | t.append(transforms.ToTensor()) 90 | t.append(transforms.Normalize(mean, std)) 91 | return transforms.Compose(t) 92 | 93 | 94 | def build_transform_tradition(is_train, args): 95 | normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], 96 | std=[0.229, 0.224, 0.225]) 97 | if is_train: 98 | t = transforms.Compose([ 99 | transforms.RandomResizedCrop(224), 100 | transforms.RandomHorizontalFlip(), 101 | transforms.ToTensor(), 102 | normalize, 103 | ]) 104 | else: 105 | t = transforms.Compose([ 106 | transforms.Resize(256), 107 | transforms.CenterCrop(224), 108 | transforms.ToTensor(), 109 | normalize, 110 | ]) 111 | return t 112 | 113 | 114 | 115 | -------------------------------------------------------------------------------- /detection/README.md: -------------------------------------------------------------------------------- 1 | # MS-COCO Object Detection with KernelWarehouse 2 | 3 | We use the popular [MMDetection](https://github.com/open-mmlab/mmdetection) toolbox for experiments on the MS-COCO dataset with the pre-trained ResNet50, MobileNetV2 (1.0×) and ConvNeXt-Tiny models as the backbones for the detector. We select the mainstream Faster RCNN and Mask R-CNN detectors with Feature Pyramid Networks as the necks to build the basic object detection systems. 4 | 5 | 6 | ## Training 7 | 8 | Please follow [Swin-Transformer-Object-Detection](https://github.com/SwinTransformer/Swin-Transformer-Object-Detection) on how to prepare the environment and the dataset. Then attach our code to the origin project and modify the config files according to your own path to the pre-trained models and directories to save logs and models. 9 | 10 | To train a detector with pre-trained models as backbone: 11 | 12 | ```shell 13 | bash tools/dist_train.sh {path to config file} {number of gpus} 14 | ``` 15 | 16 | ## Evaluation 17 | 18 | To evaluate a fine-tuned model: 19 | ```shell 20 | bash tools/dist_test.sh {path to config file} {path to fine-tuned model} {number of gpus} --eval bbox segm --show 21 | ``` 22 | 23 | 24 | ## Results and Models 25 | 26 | | Backbones | Detectors | box AP | mask AP | Config | Google Drive | Baidu Drive | 27 | |:------------|:-------:|:------:|:-------:|:-------------:|:-------------:|:-------------:| 28 | | ResNet50 | Mask R-CNN | 39.6 | 36.4 | [config](configs/kernelwarehouse/mask_rcnn_resnet50_adamw_1x_coco.py) | [model](https://drive.google.com/file/d/1j6wSJLett-JeVDTh7CW7CHhC4jQHDzad/view?usp=sharing) | [model](https://pan.baidu.com/s/1U7q2U0jYXjDCAVxqUMWmHw?pwd=4wih) | 29 | | + KW (1×) | Mask R-CNN | 41.8 | 38.4 | [config](configs/kernelwarehouse/resnet50/mask_rcnn_kw1x_resnet50_adamw_1x_coco.py) | [model](https://drive.google.com/file/d/1XBXKF8TU0iMFVBt-IF048hAmYTL9-spk/view?usp=sharing) | [model](https://pan.baidu.com/s/1AI01STe9v0KzAKVVPMUhog?pwd=a7ce) | 30 | | + KW (4×) | Mask R-CNN | 42.4 | 38.9 | [config](configs/kernelwarehouse/resnet50/mask_rcnn_kw4x_resnet50_adamw_1x_coco.py) | [model](https://drive.google.com/file/d/1GUDEr2jNT0Il8A04g_f8sRQ1WFAycOO8/view?usp=sharing) | [model](https://pan.baidu.com/s/1ZSJkfVy8xr5IB_OfubXzRw?pwd=xig5) | 31 | | MobileNetV2 (1.0×) | Mask R-CNN | 33.8 | 31.7 | [config](configs/kernelwarehouse/mobilenetv2/mask_rcnn_mobilenetv2_100_adamw_1x_coco.py) | [model](https://drive.google.com/file/d/1UJifIqx56cOOm2hx-D3DSHh4MWgFzOVB/view?usp=sharing) | [model](https://pan.baidu.com/s/1S7vo59mzEVL_8ai9Sg1iUQ?pwd=4sh8) | 32 | | + KW (1×) | Mask R-CNN | 36.4 | 33.7 | [config](configs/kernelwarehouse/mobilenetv2/mask_rcnn_kw1x_mobilenetv2_100_adamw_1x_coco.py) | [model](https://drive.google.com/file/d/1wdzs-Ry6LefgG4Nc9RWUlrDrsyGOWhL5/view?usp=sharing) | [model](https://pan.baidu.com/s/1q3U4Euw2qNCWXipPCn4vtQ?pwd=8g38) | 33 | | + KW (4×) | Mask R-CNN | 38.0 | 34.9 | [config](configs/kernelwarehouse/mobilenetv2/mask_rcnn_kw4x_mobilenetv2_100_adamw_1x_coco.py) | [model](https://drive.google.com/file/d/14nfWpHUHgH0mA4gbEPX3F_3UqOXPIGK7/view?usp=sharing) | [model](https://pan.baidu.com/s/1HidKe3MgnIEERvvKgdYMHg?pwd=n5uu) | 34 | | ConvNeXt-Tiny | Mask R-CNN | 43.4 | 39.7 | [config](configs/kernelwarehouse/convnext_tiny/mask_rcnn_convnext_tiny_adamw_1x_coco.py) | [model](https://drive.google.com/file/d/1xarty4OTZOKGo1ltAUcTJCoKHCIOipC6/view?usp=sharing) | [model](https://pan.baidu.com/s/1bouC_aK9C1czPrIYkkS3Ug?pwd=79f4) | 35 | | + KW (4×) | Mask R-CNN | 44.7 | 40.6 | [config](configs/kernelwarehouse/convnext_tiny/mask_rcnn_kw1x_convnext_tiny_adamw_1x_coco.py) | [model](https://drive.google.com/file/d/1simtPisVzZo__iSXZwrynWi6TlUwPG3b/view?usp=sharing) | [model](https://pan.baidu.com/s/1iBD4lCrvSTX0Wu7e2I0BKg?pwd=am2w) | -------------------------------------------------------------------------------- /detection/configs/_base_/models/mask_rcnn_convnext_fpn.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='MaskRCNN', 3 | backbone=dict( 4 | type='ConvNeXt', 5 | in_chans=3, 6 | depths=[3, 3, 9, 3], 7 | dims=[96, 192, 384, 768], 8 | drop_path_rate=0.2, 9 | layer_scale_init_value=1e-6, 10 | out_indices=[0, 1, 2, 3], 11 | ), 12 | neck=dict( 13 | type='FPN', 14 | in_channels=[256, 512, 1024, 2048], 15 | out_channels=256, 16 | num_outs=5), 17 | rpn_head=dict( 18 | type='RPNHead', 19 | in_channels=256, 20 | feat_channels=256, 21 | anchor_generator=dict( 22 | type='AnchorGenerator', 23 | scales=[8], 24 | ratios=[0.5, 1.0, 2.0], 25 | strides=[4, 8, 16, 32, 64]), 26 | bbox_coder=dict( 27 | type='DeltaXYWHBBoxCoder', 28 | target_means=[.0, .0, .0, .0], 29 | target_stds=[1.0, 1.0, 1.0, 1.0]), 30 | loss_cls=dict( 31 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 32 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 33 | roi_head=dict( 34 | type='StandardRoIHead', 35 | bbox_roi_extractor=dict( 36 | type='SingleRoIExtractor', 37 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 38 | out_channels=256, 39 | featmap_strides=[4, 8, 16, 32]), 40 | bbox_head=dict( 41 | type='Shared2FCBBoxHead', 42 | in_channels=256, 43 | fc_out_channels=1024, 44 | roi_feat_size=7, 45 | num_classes=80, 46 | bbox_coder=dict( 47 | type='DeltaXYWHBBoxCoder', 48 | target_means=[0., 0., 0., 0.], 49 | target_stds=[0.1, 0.1, 0.2, 0.2]), 50 | reg_class_agnostic=False, 51 | loss_cls=dict( 52 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 53 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 54 | mask_roi_extractor=dict( 55 | type='SingleRoIExtractor', 56 | roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), 57 | out_channels=256, 58 | featmap_strides=[4, 8, 16, 32]), 59 | mask_head=dict( 60 | type='FCNMaskHead', 61 | num_convs=4, 62 | in_channels=256, 63 | conv_out_channels=256, 64 | num_classes=80, 65 | loss_mask=dict( 66 | type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))), 67 | # model training and testing settings 68 | train_cfg=dict( 69 | rpn=dict( 70 | assigner=dict( 71 | type='MaxIoUAssigner', 72 | pos_iou_thr=0.7, 73 | neg_iou_thr=0.3, 74 | min_pos_iou=0.3, 75 | match_low_quality=True, 76 | ignore_iof_thr=-1), 77 | sampler=dict( 78 | type='RandomSampler', 79 | num=256, 80 | pos_fraction=0.5, 81 | neg_pos_ub=-1, 82 | add_gt_as_proposals=False), 83 | allowed_border=-1, 84 | pos_weight=-1, 85 | debug=False), 86 | rpn_proposal=dict( 87 | nms_pre=2000, 88 | max_per_img=1000, 89 | nms=dict(type='nms', iou_threshold=0.7), 90 | min_bbox_size=0), 91 | rcnn=dict( 92 | assigner=dict( 93 | type='MaxIoUAssigner', 94 | pos_iou_thr=0.5, 95 | neg_iou_thr=0.5, 96 | min_pos_iou=0.5, 97 | match_low_quality=True, 98 | ignore_iof_thr=-1), 99 | sampler=dict( 100 | type='RandomSampler', 101 | num=512, 102 | pos_fraction=0.25, 103 | neg_pos_ub=-1, 104 | add_gt_as_proposals=True), 105 | mask_size=28, 106 | pos_weight=-1, 107 | debug=False)), 108 | test_cfg=dict( 109 | rpn=dict( 110 | nms_pre=1000, 111 | max_per_img=1000, 112 | nms=dict(type='nms', iou_threshold=0.7), 113 | min_bbox_size=0), 114 | rcnn=dict( 115 | score_thr=0.05, 116 | nms=dict(type='nms', iou_threshold=0.5), 117 | max_per_img=100, 118 | mask_thr_binary=0.5))) -------------------------------------------------------------------------------- /detection/configs/_base_/models/mask_rcnn_mobilenetv2_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='MaskRCNN', 4 | backbone=dict( 5 | type='MobileNetV2', 6 | width_mult=1.0, 7 | norm_eval=True, 8 | out_indices=(1, 2, 3, 4), 9 | frozen_stages=1), 10 | neck=dict( 11 | type='FPN', 12 | in_channels=[24, 32, 96, 1280], 13 | out_channels=256, 14 | num_outs=5), 15 | rpn_head=dict( 16 | type='RPNHead', 17 | in_channels=256, 18 | feat_channels=256, 19 | anchor_generator=dict( 20 | type='AnchorGenerator', 21 | scales=[8], 22 | ratios=[0.5, 1.0, 2.0], 23 | strides=[4, 8, 16, 32, 64]), 24 | bbox_coder=dict( 25 | type='DeltaXYWHBBoxCoder', 26 | target_means=[.0, .0, .0, .0], 27 | target_stds=[1.0, 1.0, 1.0, 1.0]), 28 | loss_cls=dict( 29 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 30 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 31 | roi_head=dict( 32 | type='StandardRoIHead', 33 | bbox_roi_extractor=dict( 34 | type='SingleRoIExtractor', 35 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 36 | out_channels=256, 37 | featmap_strides=[4, 8, 16, 32]), 38 | bbox_head=dict( 39 | type='Shared2FCBBoxHead', 40 | in_channels=256, 41 | fc_out_channels=1024, 42 | roi_feat_size=7, 43 | num_classes=80, 44 | bbox_coder=dict( 45 | type='DeltaXYWHBBoxCoder', 46 | target_means=[0., 0., 0., 0.], 47 | target_stds=[0.1, 0.1, 0.2, 0.2]), 48 | reg_class_agnostic=False, 49 | loss_cls=dict( 50 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 51 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 52 | mask_roi_extractor=dict( 53 | type='SingleRoIExtractor', 54 | roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), 55 | out_channels=256, 56 | featmap_strides=[4, 8, 16, 32]), 57 | mask_head=dict( 58 | type='FCNMaskHead', 59 | num_convs=4, 60 | in_channels=256, 61 | conv_out_channels=256, 62 | num_classes=80, 63 | loss_mask=dict( 64 | type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))), 65 | # model training and testing settings 66 | train_cfg=dict( 67 | rpn=dict( 68 | assigner=dict( 69 | type='MaxIoUAssigner', 70 | pos_iou_thr=0.7, 71 | neg_iou_thr=0.3, 72 | min_pos_iou=0.3, 73 | match_low_quality=True, 74 | ignore_iof_thr=-1), 75 | sampler=dict( 76 | type='RandomSampler', 77 | num=256, 78 | pos_fraction=0.5, 79 | neg_pos_ub=-1, 80 | add_gt_as_proposals=False), 81 | allowed_border=-1, 82 | pos_weight=-1, 83 | debug=False), 84 | rpn_proposal=dict( 85 | nms_pre=2000, 86 | max_per_img=1000, 87 | nms=dict(type='nms', iou_threshold=0.7), 88 | min_bbox_size=0), 89 | rcnn=dict( 90 | assigner=dict( 91 | type='MaxIoUAssigner', 92 | pos_iou_thr=0.5, 93 | neg_iou_thr=0.5, 94 | min_pos_iou=0.5, 95 | match_low_quality=True, 96 | ignore_iof_thr=-1), 97 | sampler=dict( 98 | type='RandomSampler', 99 | num=512, 100 | pos_fraction=0.25, 101 | neg_pos_ub=-1, 102 | add_gt_as_proposals=True), 103 | mask_size=28, 104 | pos_weight=-1, 105 | debug=False)), 106 | test_cfg=dict( 107 | rpn=dict( 108 | nms_pre=1000, 109 | max_per_img=1000, 110 | nms=dict(type='nms', iou_threshold=0.7), 111 | min_bbox_size=0), 112 | rcnn=dict( 113 | score_thr=0.05, 114 | nms=dict(type='nms', iou_threshold=0.5), 115 | max_per_img=100, 116 | mask_thr_binary=0.5))) 117 | -------------------------------------------------------------------------------- /detection/configs/_base_/models/mask_rcnn_r50_fpn.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='MaskRCNN', 3 | backbone=dict( 4 | type='ResNet', 5 | depth=50, 6 | num_stages=4, 7 | out_indices=(0, 1, 2, 3), 8 | frozen_stages=1, 9 | norm_eval=True,), 10 | neck=dict( 11 | type='FPN', 12 | in_channels=[256, 512, 1024, 2048], 13 | out_channels=256, 14 | num_outs=5), 15 | rpn_head=dict( 16 | type='RPNHead', 17 | in_channels=256, 18 | feat_channels=256, 19 | anchor_generator=dict( 20 | type='AnchorGenerator', 21 | scales=[8], 22 | ratios=[0.5, 1.0, 2.0], 23 | strides=[4, 8, 16, 32, 64]), 24 | bbox_coder=dict( 25 | type='DeltaXYWHBBoxCoder', 26 | target_means=[.0, .0, .0, .0], 27 | target_stds=[1.0, 1.0, 1.0, 1.0]), 28 | loss_cls=dict( 29 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 30 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 31 | roi_head=dict( 32 | type='StandardRoIHead', 33 | bbox_roi_extractor=dict( 34 | type='SingleRoIExtractor', 35 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 36 | out_channels=256, 37 | featmap_strides=[4, 8, 16, 32]), 38 | bbox_head=dict( 39 | type='Shared2FCBBoxHead', 40 | in_channels=256, 41 | fc_out_channels=1024, 42 | roi_feat_size=7, 43 | num_classes=80, 44 | bbox_coder=dict( 45 | type='DeltaXYWHBBoxCoder', 46 | target_means=[0., 0., 0., 0.], 47 | target_stds=[0.1, 0.1, 0.2, 0.2]), 48 | reg_class_agnostic=False, 49 | loss_cls=dict( 50 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 51 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 52 | mask_roi_extractor=dict( 53 | type='SingleRoIExtractor', 54 | roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), 55 | out_channels=256, 56 | featmap_strides=[4, 8, 16, 32]), 57 | mask_head=dict( 58 | type='FCNMaskHead', 59 | num_convs=4, 60 | in_channels=256, 61 | conv_out_channels=256, 62 | num_classes=80, 63 | loss_mask=dict( 64 | type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))), 65 | # model training and testing settings 66 | train_cfg=dict( 67 | rpn=dict( 68 | assigner=dict( 69 | type='MaxIoUAssigner', 70 | pos_iou_thr=0.7, 71 | neg_iou_thr=0.3, 72 | min_pos_iou=0.3, 73 | match_low_quality=True, 74 | ignore_iof_thr=-1), 75 | sampler=dict( 76 | type='RandomSampler', 77 | num=256, 78 | pos_fraction=0.5, 79 | neg_pos_ub=-1, 80 | add_gt_as_proposals=False), 81 | allowed_border=-1, 82 | pos_weight=-1, 83 | debug=False), 84 | rpn_proposal=dict( 85 | nms_pre=2000, 86 | max_per_img=1000, 87 | nms=dict(type='nms', iou_threshold=0.7), 88 | min_bbox_size=0), 89 | rcnn=dict( 90 | assigner=dict( 91 | type='MaxIoUAssigner', 92 | pos_iou_thr=0.5, 93 | neg_iou_thr=0.5, 94 | min_pos_iou=0.5, 95 | match_low_quality=True, 96 | ignore_iof_thr=-1), 97 | sampler=dict( 98 | type='RandomSampler', 99 | num=512, 100 | pos_fraction=0.25, 101 | neg_pos_ub=-1, 102 | add_gt_as_proposals=True), 103 | mask_size=28, 104 | pos_weight=-1, 105 | debug=False)), 106 | test_cfg=dict( 107 | rpn=dict( 108 | nms_pre=1000, 109 | max_per_img=1000, 110 | nms=dict(type='nms', iou_threshold=0.7), 111 | min_bbox_size=0), 112 | rcnn=dict( 113 | score_thr=0.05, 114 | nms=dict(type='nms', iou_threshold=0.5), 115 | max_per_img=100, 116 | mask_thr_binary=0.5))) 117 | -------------------------------------------------------------------------------- /detection/configs/kernelwarehouse/convnext_tiny/mask_rcnn_convnext_tiny_adamw_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../../_base_/models/mask_rcnn_convnext_fpn.py', 3 | '../../_base_/datasets/coco_instance.py', 4 | '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py' 5 | ] 6 | 7 | model = dict( 8 | pretrained='the path to pre-trained model', 9 | backbone=dict( 10 | type='ConvNeXt', 11 | in_chans=3, 12 | depths=[3, 3, 9, 3], 13 | dims=[96, 192, 384, 768], 14 | drop_path_rate=0.4, 15 | layer_scale_init_value=1.0, 16 | out_indices=[0, 1, 2, 3], 17 | ), 18 | neck=dict(in_channels=[96, 192, 384, 768])) 19 | 20 | img_norm_cfg = dict( 21 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 22 | 23 | # augmentation strategy originates from DETR / Sparse RCNN 24 | train_pipeline = [ 25 | dict(type='LoadImageFromFile'), 26 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 27 | dict(type='RandomFlip', flip_ratio=0.5), 28 | dict(type='AutoAugment', 29 | policies=[ 30 | [ 31 | dict(type='Resize', 32 | img_scale=[(480, 1333), (512, 1333), (544, 1333), (576, 1333), 33 | (608, 1333), (640, 1333), (672, 1333), (704, 1333), 34 | (736, 1333), (768, 1333), (800, 1333)], 35 | multiscale_mode='value', 36 | keep_ratio=True) 37 | ], 38 | [ 39 | dict(type='Resize', 40 | img_scale=[(400, 1333), (500, 1333), (600, 1333)], 41 | multiscale_mode='value', 42 | keep_ratio=True), 43 | dict(type='RandomCrop', 44 | crop_type='absolute_range', 45 | crop_size=(384, 600), 46 | allow_negative_crop=True), 47 | dict(type='Resize', 48 | img_scale=[(480, 1333), (512, 1333), (544, 1333), 49 | (576, 1333), (608, 1333), (640, 1333), 50 | (672, 1333), (704, 1333), (736, 1333), 51 | (768, 1333), (800, 1333)], 52 | multiscale_mode='value', 53 | override=True, 54 | keep_ratio=True) 55 | ] 56 | ]), 57 | dict(type='Normalize', **img_norm_cfg), 58 | dict(type='Pad', size_divisor=32), 59 | dict(type='DefaultFormatBundle'), 60 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 61 | ] 62 | data = dict(train=dict(pipeline=train_pipeline)) 63 | 64 | optimizer = dict(constructor='LearningRateDecayOptimizerConstructor', _delete_=True, type='AdamW', 65 | lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05, 66 | paramwise_cfg={'decay_rate': 0.95, 67 | 'decay_type': 'layer_wise', 68 | 'num_layers': 6}) 69 | 70 | lr_config = dict(step=[9, 11]) 71 | runner = dict(type='EpochBasedRunner', max_epochs=12) -------------------------------------------------------------------------------- /detection/configs/kernelwarehouse/convnext_tiny/mask_rcnn_kw1x_convnext_tiny_adamw_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../../_base_/models/mask_rcnn_convnext_fpn.py', 3 | '../../_base_/datasets/coco_instance.py', 4 | '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py' 5 | ] 6 | 7 | model = dict( 8 | pretrained='the path to pre-trained model', 9 | backbone=dict( 10 | type='KW_ConvNeXt', 11 | in_chans=3, 12 | depths=[3, 3, 9, 3], 13 | dims=[96, 192, 384, 768], 14 | drop_path_rate=0.4, 15 | layer_scale_init_value=1.0, 16 | out_indices=[0, 1, 2, 3], 17 | ), 18 | neck=dict(in_channels=[96, 192, 384, 768])) 19 | 20 | img_norm_cfg = dict( 21 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 22 | 23 | # augmentation strategy originates from DETR / Sparse RCNN 24 | train_pipeline = [ 25 | dict(type='LoadImageFromFile'), 26 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 27 | dict(type='RandomFlip', flip_ratio=0.5), 28 | dict(type='AutoAugment', 29 | policies=[ 30 | [ 31 | dict(type='Resize', 32 | img_scale=[(480, 1333), (512, 1333), (544, 1333), (576, 1333), 33 | (608, 1333), (640, 1333), (672, 1333), (704, 1333), 34 | (736, 1333), (768, 1333), (800, 1333)], 35 | multiscale_mode='value', 36 | keep_ratio=True) 37 | ], 38 | [ 39 | dict(type='Resize', 40 | img_scale=[(400, 1333), (500, 1333), (600, 1333)], 41 | multiscale_mode='value', 42 | keep_ratio=True), 43 | dict(type='RandomCrop', 44 | crop_type='absolute_range', 45 | crop_size=(384, 600), 46 | allow_negative_crop=True), 47 | dict(type='Resize', 48 | img_scale=[(480, 1333), (512, 1333), (544, 1333), 49 | (576, 1333), (608, 1333), (640, 1333), 50 | (672, 1333), (704, 1333), (736, 1333), 51 | (768, 1333), (800, 1333)], 52 | multiscale_mode='value', 53 | override=True, 54 | keep_ratio=True) 55 | ] 56 | ]), 57 | dict(type='Normalize', **img_norm_cfg), 58 | dict(type='Pad', size_divisor=32), 59 | dict(type='DefaultFormatBundle'), 60 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 61 | ] 62 | data = dict(train=dict(pipeline=train_pipeline)) 63 | 64 | optimizer = dict(constructor='LearningRateDecayOptimizerConstructor', _delete_=True, type='AdamW', 65 | lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05, 66 | paramwise_cfg={'decay_rate': 0.95, 67 | 'decay_type': 'layer_wise', 68 | 'num_layers': 6}) 69 | 70 | lr_config = dict(step=[9, 11]) 71 | runner = dict(type='EpochBasedRunner', max_epochs=12) 72 | -------------------------------------------------------------------------------- /detection/configs/kernelwarehouse/mobilenetv2/mask_rcnn_kw1x_mobilenetv2_100_adamw_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../../_base_/models/mask_rcnn_mobilenetv2_fpn.py', 3 | '../../_base_/datasets/coco_instance.py', 4 | '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py' 5 | ] 6 | 7 | model = dict( 8 | pretrained='the path to pre-trained model', 9 | backbone=dict( 10 | type='KW_MobileNetV2', 11 | width_mult=1.0, 12 | norm_eval=True, 13 | out_indices=(1, 2, 3, 4), 14 | frozen_stages=1, 15 | cell_num_ratio=1, 16 | sharing_range=('layer', 'pwconv') 17 | ), 18 | neck=dict( 19 | type='FPN', 20 | in_channels=[24, 32, 96, 1280], 21 | out_channels=256, 22 | num_outs=5 23 | ), 24 | ) 25 | 26 | img_norm_cfg = dict( 27 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 28 | 29 | # augmentation strategy originates from DETR / Sparse RCNN 30 | train_pipeline = [ 31 | dict(type='LoadImageFromFile'), 32 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 33 | dict(type='RandomFlip', flip_ratio=0.5), 34 | dict(type='AutoAugment', 35 | policies=[ 36 | [ 37 | dict(type='Resize', 38 | img_scale=[(480, 1333), (512, 1333), (544, 1333), (576, 1333), 39 | (608, 1333), (640, 1333), (672, 1333), (704, 1333), 40 | (736, 1333), (768, 1333), (800, 1333)], 41 | multiscale_mode='value', 42 | keep_ratio=True) 43 | ], 44 | [ 45 | dict(type='Resize', 46 | img_scale=[(400, 1333), (500, 1333), (600, 1333)], 47 | multiscale_mode='value', 48 | keep_ratio=True), 49 | dict(type='RandomCrop', 50 | crop_type='absolute_range', 51 | crop_size=(384, 600), 52 | allow_negative_crop=True), 53 | dict(type='Resize', 54 | img_scale=[(480, 1333), (512, 1333), (544, 1333), 55 | (576, 1333), (608, 1333), (640, 1333), 56 | (672, 1333), (704, 1333), (736, 1333), 57 | (768, 1333), (800, 1333)], 58 | multiscale_mode='value', 59 | override=True, 60 | keep_ratio=True) 61 | ] 62 | ]), 63 | dict(type='Normalize', **img_norm_cfg), 64 | dict(type='Pad', size_divisor=32), 65 | dict(type='DefaultFormatBundle'), 66 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 67 | ] 68 | data = dict(train=dict(pipeline=train_pipeline)) 69 | 70 | optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05, 71 | paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.), 72 | 'relative_position_bias_table': dict(decay_mult=0.), 73 | 'norm': dict(decay_mult=0.)})) 74 | 75 | lr_config = dict(step=[8, 11]) 76 | runner = dict(type='EpochBasedRunner', max_epochs=12) 77 | find_unused_parameters = True 78 | 79 | 80 | -------------------------------------------------------------------------------- /detection/configs/kernelwarehouse/mobilenetv2/mask_rcnn_kw4x_mobilenetv2_100_adamw_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../../_base_/models/mask_rcnn_mobilenetv2_fpn.py', 3 | '../../_base_/datasets/coco_instance.py', 4 | '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py' 5 | ] 6 | 7 | model = dict( 8 | pretrained='the path to pre-trained model', 9 | backbone=dict( 10 | type='KW_MobileNetV2', 11 | width_mult=1.0, 12 | norm_eval=True, 13 | out_indices=(1, 2, 3, 4), 14 | frozen_stages=1, 15 | cell_num_ratio=4, 16 | sharing_range=('layer') 17 | ), 18 | neck=dict( 19 | type='FPN', 20 | in_channels=[24, 32, 96, 1280], 21 | out_channels=256, 22 | num_outs=5 23 | ), 24 | ) 25 | 26 | img_norm_cfg = dict( 27 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 28 | 29 | # augmentation strategy originates from DETR / Sparse RCNN 30 | train_pipeline = [ 31 | dict(type='LoadImageFromFile'), 32 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 33 | dict(type='RandomFlip', flip_ratio=0.5), 34 | dict(type='AutoAugment', 35 | policies=[ 36 | [ 37 | dict(type='Resize', 38 | img_scale=[(480, 1333), (512, 1333), (544, 1333), (576, 1333), 39 | (608, 1333), (640, 1333), (672, 1333), (704, 1333), 40 | (736, 1333), (768, 1333), (800, 1333)], 41 | multiscale_mode='value', 42 | keep_ratio=True) 43 | ], 44 | [ 45 | dict(type='Resize', 46 | img_scale=[(400, 1333), (500, 1333), (600, 1333)], 47 | multiscale_mode='value', 48 | keep_ratio=True), 49 | dict(type='RandomCrop', 50 | crop_type='absolute_range', 51 | crop_size=(384, 600), 52 | allow_negative_crop=True), 53 | dict(type='Resize', 54 | img_scale=[(480, 1333), (512, 1333), (544, 1333), 55 | (576, 1333), (608, 1333), (640, 1333), 56 | (672, 1333), (704, 1333), (736, 1333), 57 | (768, 1333), (800, 1333)], 58 | multiscale_mode='value', 59 | override=True, 60 | keep_ratio=True) 61 | ] 62 | ]), 63 | dict(type='Normalize', **img_norm_cfg), 64 | dict(type='Pad', size_divisor=32), 65 | dict(type='DefaultFormatBundle'), 66 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 67 | ] 68 | data = dict(train=dict(pipeline=train_pipeline)) 69 | 70 | optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05, 71 | paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.), 72 | 'relative_position_bias_table': dict(decay_mult=0.), 73 | 'norm': dict(decay_mult=0.)})) 74 | 75 | lr_config = dict(step=[8, 11]) 76 | runner = dict(type='EpochBasedRunner', max_epochs=12) 77 | find_unused_parameters = True 78 | -------------------------------------------------------------------------------- /detection/configs/kernelwarehouse/mobilenetv2/mask_rcnn_mobilenetv2_100_adamw_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../../_base_/models/mask_rcnn_mobilenetv2_fpn.py', 3 | '../../_base_/datasets/coco_instance.py', 4 | '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py' 5 | ] 6 | 7 | model = dict( 8 | pretrained='the path to pre-trained model', 9 | backbone=dict( 10 | type='MobileNetV2', 11 | width_mult=1.0, 12 | norm_eval=True, 13 | out_indices=(1, 2, 3, 4), 14 | frozen_stages=1, 15 | ), 16 | neck=dict( 17 | type='FPN', 18 | in_channels=[24, 32, 96, 1280], 19 | out_channels=256, 20 | num_outs=5 21 | ), 22 | ) 23 | 24 | img_norm_cfg = dict( 25 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 26 | 27 | # augmentation strategy originates from DETR / Sparse RCNN 28 | train_pipeline = [ 29 | dict(type='LoadImageFromFile'), 30 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 31 | dict(type='RandomFlip', flip_ratio=0.5), 32 | dict(type='AutoAugment', 33 | policies=[ 34 | [ 35 | dict(type='Resize', 36 | img_scale=[(480, 1333), (512, 1333), (544, 1333), (576, 1333), 37 | (608, 1333), (640, 1333), (672, 1333), (704, 1333), 38 | (736, 1333), (768, 1333), (800, 1333)], 39 | multiscale_mode='value', 40 | keep_ratio=True) 41 | ], 42 | [ 43 | dict(type='Resize', 44 | img_scale=[(400, 1333), (500, 1333), (600, 1333)], 45 | multiscale_mode='value', 46 | keep_ratio=True), 47 | dict(type='RandomCrop', 48 | crop_type='absolute_range', 49 | crop_size=(384, 600), 50 | allow_negative_crop=True), 51 | dict(type='Resize', 52 | img_scale=[(480, 1333), (512, 1333), (544, 1333), 53 | (576, 1333), (608, 1333), (640, 1333), 54 | (672, 1333), (704, 1333), (736, 1333), 55 | (768, 1333), (800, 1333)], 56 | multiscale_mode='value', 57 | override=True, 58 | keep_ratio=True) 59 | ] 60 | ]), 61 | dict(type='Normalize', **img_norm_cfg), 62 | dict(type='Pad', size_divisor=32), 63 | dict(type='DefaultFormatBundle'), 64 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 65 | ] 66 | data = dict(train=dict(pipeline=train_pipeline)) 67 | 68 | optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05, 69 | paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.), 70 | 'relative_position_bias_table': dict(decay_mult=0.), 71 | 'norm': dict(decay_mult=0.)})) 72 | 73 | lr_config = dict(step=[8, 11]) 74 | runner = dict(type='EpochBasedRunner', max_epochs=12) 75 | find_unused_parameters = True 76 | -------------------------------------------------------------------------------- /detection/configs/kernelwarehouse/resnet50/mask_rcnn_kw1x_resnet50_adamw_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../../_base_/models/mask_rcnn_r50_fpn.py', 3 | '../../_base_/datasets/coco_instance.py', 4 | '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py' 5 | ] 6 | 7 | model = dict( 8 | pretrained='the path to pre-trained model', 9 | backbone=dict( 10 | type='KW_ResNet', 11 | depth=50, 12 | norm_eval=True, 13 | out_indices=(0, 1, 2, 3), 14 | frozen_stages=1, 15 | cell_num_ratio=1, 16 | ), 17 | neck=dict( 18 | type='FPN', 19 | in_channels=[256, 512, 1024, 2048], 20 | out_channels=256, 21 | num_outs=5 22 | ), 23 | ) 24 | 25 | img_norm_cfg = dict( 26 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 27 | 28 | # augmentation strategy originates from DETR / Sparse RCNN 29 | train_pipeline = [ 30 | dict(type='LoadImageFromFile'), 31 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 32 | dict(type='RandomFlip', flip_ratio=0.5), 33 | dict(type='AutoAugment', 34 | policies=[ 35 | [ 36 | dict(type='Resize', 37 | img_scale=[(480, 1333), (512, 1333), (544, 1333), (576, 1333), 38 | (608, 1333), (640, 1333), (672, 1333), (704, 1333), 39 | (736, 1333), (768, 1333), (800, 1333)], 40 | multiscale_mode='value', 41 | keep_ratio=True) 42 | ], 43 | [ 44 | dict(type='Resize', 45 | img_scale=[(400, 1333), (500, 1333), (600, 1333)], 46 | multiscale_mode='value', 47 | keep_ratio=True), 48 | dict(type='RandomCrop', 49 | crop_type='absolute_range', 50 | crop_size=(384, 600), 51 | allow_negative_crop=True), 52 | dict(type='Resize', 53 | img_scale=[(480, 1333), (512, 1333), (544, 1333), 54 | (576, 1333), (608, 1333), (640, 1333), 55 | (672, 1333), (704, 1333), (736, 1333), 56 | (768, 1333), (800, 1333)], 57 | multiscale_mode='value', 58 | override=True, 59 | keep_ratio=True) 60 | ] 61 | ]), 62 | dict(type='Normalize', **img_norm_cfg), 63 | dict(type='Pad', size_divisor=32), 64 | dict(type='DefaultFormatBundle'), 65 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 66 | ] 67 | data = dict(train=dict(pipeline=train_pipeline)) 68 | 69 | optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05, 70 | paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.), 71 | 'relative_position_bias_table': dict(decay_mult=0.), 72 | 'norm': dict(decay_mult=0.)})) 73 | 74 | lr_config = dict(step=[8, 11]) 75 | runner = dict(type='EpochBasedRunner', max_epochs=12) 76 | find_unused_parameters = True 77 | 78 | 79 | -------------------------------------------------------------------------------- /detection/configs/kernelwarehouse/resnet50/mask_rcnn_kw4x_resnet50_adamw_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../../_base_/models/mask_rcnn_r50_fpn.py', 3 | '../../_base_/datasets/coco_instance.py', 4 | '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py' 5 | ] 6 | 7 | model = dict( 8 | pretrained='the path to pre-trained model', 9 | backbone=dict( 10 | type='KW_ResNet', 11 | depth=50, 12 | norm_eval=True, 13 | out_indices=(0, 1, 2, 3), 14 | frozen_stages=1, 15 | cell_num_ratio=4, 16 | ), 17 | neck=dict( 18 | type='FPN', 19 | in_channels=[256, 512, 1024, 2048], 20 | out_channels=256, 21 | num_outs=5 22 | ), 23 | ) 24 | 25 | img_norm_cfg = dict( 26 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 27 | 28 | # augmentation strategy originates from DETR / Sparse RCNN 29 | train_pipeline = [ 30 | dict(type='LoadImageFromFile'), 31 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 32 | dict(type='RandomFlip', flip_ratio=0.5), 33 | dict(type='AutoAugment', 34 | policies=[ 35 | [ 36 | dict(type='Resize', 37 | img_scale=[(480, 1333), (512, 1333), (544, 1333), (576, 1333), 38 | (608, 1333), (640, 1333), (672, 1333), (704, 1333), 39 | (736, 1333), (768, 1333), (800, 1333)], 40 | multiscale_mode='value', 41 | keep_ratio=True) 42 | ], 43 | [ 44 | dict(type='Resize', 45 | img_scale=[(400, 1333), (500, 1333), (600, 1333)], 46 | multiscale_mode='value', 47 | keep_ratio=True), 48 | dict(type='RandomCrop', 49 | crop_type='absolute_range', 50 | crop_size=(384, 600), 51 | allow_negative_crop=True), 52 | dict(type='Resize', 53 | img_scale=[(480, 1333), (512, 1333), (544, 1333), 54 | (576, 1333), (608, 1333), (640, 1333), 55 | (672, 1333), (704, 1333), (736, 1333), 56 | (768, 1333), (800, 1333)], 57 | multiscale_mode='value', 58 | override=True, 59 | keep_ratio=True) 60 | ] 61 | ]), 62 | dict(type='Normalize', **img_norm_cfg), 63 | dict(type='Pad', size_divisor=32), 64 | dict(type='DefaultFormatBundle'), 65 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 66 | ] 67 | data = dict(train=dict(pipeline=train_pipeline)) 68 | 69 | optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05, 70 | paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.), 71 | 'relative_position_bias_table': dict(decay_mult=0.), 72 | 'norm': dict(decay_mult=0.)})) 73 | 74 | lr_config = dict(step=[8, 11]) 75 | runner = dict(type='EpochBasedRunner', max_epochs=12) 76 | find_unused_parameters = True 77 | -------------------------------------------------------------------------------- /detection/configs/kernelwarehouse/resnet50/mask_rcnn_resnet50_adamw_1x_coco.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../../_base_/models/mask_rcnn_r50_fpn.py', 3 | '../../_base_/datasets/coco_instance.py', 4 | '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py' 5 | ] 6 | 7 | model = dict( 8 | pretrained='the path to pre-trained model', 9 | backbone=dict( 10 | type='ResNet', 11 | depth=50, 12 | norm_eval=True, 13 | out_indices=(0, 1, 2, 3), 14 | frozen_stages=1, 15 | ), 16 | neck=dict( 17 | type='FPN', 18 | in_channels=[256, 512, 1024, 2048], 19 | out_channels=256, 20 | num_outs=5 21 | ), 22 | ) 23 | 24 | img_norm_cfg = dict( 25 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 26 | 27 | # augmentation strategy originates from DETR / Sparse RCNN 28 | train_pipeline = [ 29 | dict(type='LoadImageFromFile'), 30 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 31 | dict(type='RandomFlip', flip_ratio=0.5), 32 | dict(type='AutoAugment', 33 | policies=[ 34 | [ 35 | dict(type='Resize', 36 | img_scale=[(480, 1333), (512, 1333), (544, 1333), (576, 1333), 37 | (608, 1333), (640, 1333), (672, 1333), (704, 1333), 38 | (736, 1333), (768, 1333), (800, 1333)], 39 | multiscale_mode='value', 40 | keep_ratio=True) 41 | ], 42 | [ 43 | dict(type='Resize', 44 | img_scale=[(400, 1333), (500, 1333), (600, 1333)], 45 | multiscale_mode='value', 46 | keep_ratio=True), 47 | dict(type='RandomCrop', 48 | crop_type='absolute_range', 49 | crop_size=(384, 600), 50 | allow_negative_crop=True), 51 | dict(type='Resize', 52 | img_scale=[(480, 1333), (512, 1333), (544, 1333), 53 | (576, 1333), (608, 1333), (640, 1333), 54 | (672, 1333), (704, 1333), (736, 1333), 55 | (768, 1333), (800, 1333)], 56 | multiscale_mode='value', 57 | override=True, 58 | keep_ratio=True) 59 | ] 60 | ]), 61 | dict(type='Normalize', **img_norm_cfg), 62 | dict(type='Pad', size_divisor=32), 63 | dict(type='DefaultFormatBundle'), 64 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 65 | ] 66 | data = dict(train=dict(pipeline=train_pipeline)) 67 | 68 | optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05, 69 | paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.), 70 | 'relative_position_bias_table': dict(decay_mult=0.), 71 | 'norm': dict(decay_mult=0.)})) 72 | 73 | lr_config = dict(step=[8, 11]) 74 | runner = dict(type='EpochBasedRunner', max_epochs=12) 75 | find_unused_parameters = True 76 | -------------------------------------------------------------------------------- /detection/mmcv_custom/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # All rights reserved. 4 | 5 | # This source code is licensed under the license found in the 6 | # LICENSE file in the root directory of this source tree. 7 | 8 | 9 | # -*- coding: utf-8 -*- 10 | 11 | from .checkpoint import load_checkpoint 12 | from .layer_decay_optimizer_constructor import LearningRateDecayOptimizerConstructor 13 | from .customized_text import CustomizedTextLoggerHook 14 | 15 | __all__ = ['load_checkpoint', 'LearningRateDecayOptimizerConstructor', 'CustomizedTextLoggerHook'] 16 | -------------------------------------------------------------------------------- /detection/mmcv_custom/customized_text.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # All rights reserved. 4 | 5 | # This source code is licensed under the license found in the 6 | # LICENSE file in the root directory of this source tree. 7 | 8 | 9 | import datetime 10 | from collections import OrderedDict 11 | 12 | import torch 13 | 14 | import mmcv 15 | from mmcv.runner import HOOKS 16 | from mmcv.runner import TextLoggerHook 17 | 18 | 19 | @HOOKS.register_module() 20 | class CustomizedTextLoggerHook(TextLoggerHook): 21 | """Customized Text Logger hook. 22 | 23 | This logger prints out both lr and layer_0_lr. 24 | 25 | """ 26 | 27 | def _log_info(self, log_dict, runner): 28 | # print exp name for users to distinguish experiments 29 | # at every ``interval_exp_name`` iterations and the end of each epoch 30 | if runner.meta is not None and 'exp_name' in runner.meta: 31 | if (self.every_n_iters(runner, self.interval_exp_name)) or ( 32 | self.by_epoch and self.end_of_epoch(runner)): 33 | exp_info = f'Exp name: {runner.meta["exp_name"]}' 34 | runner.logger.info(exp_info) 35 | 36 | if log_dict['mode'] == 'train': 37 | lr_str = {} 38 | for lr_type in ['lr', 'layer_0_lr']: 39 | if isinstance(log_dict[lr_type], dict): 40 | lr_str[lr_type] = [] 41 | for k, val in log_dict[lr_type].items(): 42 | lr_str.append(f'{lr_type}_{k}: {val:.3e}') 43 | lr_str[lr_type] = ' '.join(lr_str) 44 | else: 45 | lr_str[lr_type] = f'{lr_type}: {log_dict[lr_type]:.3e}' 46 | 47 | # by epoch: Epoch [4][100/1000] 48 | # by iter: Iter [100/100000] 49 | if self.by_epoch: 50 | log_str = f'Epoch [{log_dict["epoch"]}]' \ 51 | f'[{log_dict["iter"]}/{len(runner.data_loader)}]\t' 52 | else: 53 | log_str = f'Iter [{log_dict["iter"]}/{runner.max_iters}]\t' 54 | log_str += f'{lr_str["lr"]}, {lr_str["layer_0_lr"]}, ' 55 | 56 | if 'time' in log_dict.keys(): 57 | self.time_sec_tot += (log_dict['time'] * self.interval) 58 | time_sec_avg = self.time_sec_tot / ( 59 | runner.iter - self.start_iter + 1) 60 | eta_sec = time_sec_avg * (runner.max_iters - runner.iter - 1) 61 | eta_str = str(datetime.timedelta(seconds=int(eta_sec))) 62 | log_str += f'eta: {eta_str}, ' 63 | log_str += f'time: {log_dict["time"]:.3f}, ' \ 64 | f'data_time: {log_dict["data_time"]:.3f}, ' 65 | # statistic memory 66 | if torch.cuda.is_available(): 67 | log_str += f'memory: {log_dict["memory"]}, ' 68 | else: 69 | # val/test time 70 | # here 1000 is the length of the val dataloader 71 | # by epoch: Epoch[val] [4][1000] 72 | # by iter: Iter[val] [1000] 73 | if self.by_epoch: 74 | log_str = f'Epoch({log_dict["mode"]}) ' \ 75 | f'[{log_dict["epoch"]}][{log_dict["iter"]}]\t' 76 | else: 77 | log_str = f'Iter({log_dict["mode"]}) [{log_dict["iter"]}]\t' 78 | 79 | log_items = [] 80 | for name, val in log_dict.items(): 81 | # TODO: resolve this hack 82 | # these items have been in log_str 83 | if name in [ 84 | 'mode', 'Epoch', 'iter', 'lr', 'layer_0_lr', 'time', 'data_time', 85 | 'memory', 'epoch' 86 | ]: 87 | continue 88 | if isinstance(val, float): 89 | val = f'{val:.4f}' 90 | log_items.append(f'{name}: {val}') 91 | log_str += ', '.join(log_items) 92 | 93 | runner.logger.info(log_str) 94 | 95 | 96 | def log(self, runner): 97 | if 'eval_iter_num' in runner.log_buffer.output: 98 | # this doesn't modify runner.iter and is regardless of by_epoch 99 | cur_iter = runner.log_buffer.output.pop('eval_iter_num') 100 | else: 101 | cur_iter = self.get_iter(runner, inner_iter=True) 102 | 103 | log_dict = OrderedDict( 104 | mode=self.get_mode(runner), 105 | epoch=self.get_epoch(runner), 106 | iter=cur_iter) 107 | 108 | # record lr and layer_0_lr 109 | cur_lr = runner.current_lr() 110 | if isinstance(cur_lr, list): 111 | log_dict['layer_0_lr'] = min(cur_lr) 112 | log_dict['lr'] = max(cur_lr) 113 | else: 114 | assert isinstance(cur_lr, dict) 115 | log_dict['lr'], log_dict['layer_0_lr'] = {}, {} 116 | for k, lr_ in cur_lr.items(): 117 | assert isinstance(lr_, list) 118 | log_dict['layer_0_lr'].update({k: min(lr_)}) 119 | log_dict['lr'].update({k: max(lr_)}) 120 | 121 | if 'time' in runner.log_buffer.output: 122 | # statistic memory 123 | if torch.cuda.is_available(): 124 | log_dict['memory'] = self._get_max_memory(runner) 125 | 126 | log_dict = dict(log_dict, **runner.log_buffer.output) 127 | 128 | self._log_info(log_dict, runner) 129 | self._dump_log(log_dict, runner) 130 | return log_dict 131 | -------------------------------------------------------------------------------- /detection/mmcv_custom/layer_decay_optimizer_constructor.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # All rights reserved. 4 | 5 | # This source code is licensed under the license found in the 6 | # LICENSE file in the root directory of this source tree. 7 | 8 | 9 | import json 10 | from mmcv.runner import OPTIMIZER_BUILDERS, DefaultOptimizerConstructor 11 | from mmcv.runner import get_dist_info 12 | 13 | 14 | def get_num_layer_layer_wise(var_name, num_max_layer=12): 15 | 16 | if var_name in ("backbone.cls_token", "backbone.mask_token", "backbone.pos_embed"): 17 | return 0 18 | elif var_name.startswith("backbone.downsample_layers"): 19 | stage_id = int(var_name.split('.')[2]) 20 | if stage_id == 0: 21 | layer_id = 0 22 | elif stage_id == 1: 23 | layer_id = 2 24 | elif stage_id == 2: 25 | layer_id = 3 26 | elif stage_id == 3: 27 | layer_id = num_max_layer 28 | return layer_id 29 | elif var_name.startswith("backbone.stages"): 30 | stage_id = int(var_name.split('.')[2]) 31 | block_id = int(var_name.split('.')[3]) 32 | if stage_id == 0: 33 | layer_id = 1 34 | elif stage_id == 1: 35 | layer_id = 2 36 | elif stage_id == 2: 37 | layer_id = 3 + block_id // 3 38 | elif stage_id == 3: 39 | layer_id = num_max_layer 40 | return layer_id 41 | else: 42 | return num_max_layer + 1 43 | 44 | 45 | def get_num_layer_stage_wise(var_name, num_max_layer): 46 | if var_name in ("backbone.cls_token", "backbone.mask_token", "backbone.pos_embed"): 47 | return 0 48 | elif var_name.startswith("backbone.downsample_layers"): 49 | return 0 50 | elif var_name.startswith("backbone.stages"): 51 | stage_id = int(var_name.split('.')[2]) 52 | return stage_id + 1 53 | else: 54 | return num_max_layer - 1 55 | 56 | 57 | @OPTIMIZER_BUILDERS.register_module() 58 | class LearningRateDecayOptimizerConstructor(DefaultOptimizerConstructor): 59 | def add_params(self, params, module, prefix='', is_dcn_module=None): 60 | """Add all parameters of module to the params list. 61 | The parameters of the given module will be added to the list of param 62 | groups, with specific rules defined by paramwise_cfg. 63 | Args: 64 | params (list[dict]): A list of param groups, it will be modified 65 | in place. 66 | module (nn.Module): The module to be added. 67 | prefix (str): The prefix of the module 68 | is_dcn_module (int|float|None): If the current module is a 69 | submodule of DCN, `is_dcn_module` will be passed to 70 | control conv_offset layer's learning rate. Defaults to None. 71 | """ 72 | parameter_groups = {} 73 | print(self.paramwise_cfg) 74 | num_layers = self.paramwise_cfg.get('num_layers') + 2 75 | decay_rate = self.paramwise_cfg.get('decay_rate') 76 | decay_type = self.paramwise_cfg.get('decay_type', "layer_wise") 77 | print("Build LearningRateDecayOptimizerConstructor %s %f - %d" % (decay_type, decay_rate, num_layers)) 78 | weight_decay = self.base_wd 79 | 80 | for name, param in module.named_parameters(): 81 | if not param.requires_grad: 82 | continue # frozen weights 83 | if len(param.shape) == 1 or name.endswith(".bias") or name in ('pos_embed', 'cls_token'): 84 | group_name = "no_decay" 85 | this_weight_decay = 0. 86 | else: 87 | group_name = "decay" 88 | this_weight_decay = weight_decay 89 | 90 | if decay_type == "layer_wise": 91 | layer_id = get_num_layer_layer_wise(name, self.paramwise_cfg.get('num_layers')) 92 | elif decay_type == "stage_wise": 93 | layer_id = get_num_layer_stage_wise(name, num_layers) 94 | 95 | group_name = "layer_%d_%s" % (layer_id, group_name) 96 | 97 | if group_name not in parameter_groups: 98 | scale = decay_rate ** (num_layers - layer_id - 1) 99 | 100 | parameter_groups[group_name] = { 101 | "weight_decay": this_weight_decay, 102 | "params": [], 103 | "param_names": [], 104 | "lr_scale": scale, 105 | "group_name": group_name, 106 | "lr": scale * self.base_lr, 107 | } 108 | 109 | parameter_groups[group_name]["params"].append(param) 110 | parameter_groups[group_name]["param_names"].append(name) 111 | rank, _ = get_dist_info() 112 | if rank == 0: 113 | to_display = {} 114 | for key in parameter_groups: 115 | to_display[key] = { 116 | "param_names": parameter_groups[key]["param_names"], 117 | "lr_scale": parameter_groups[key]["lr_scale"], 118 | "lr": parameter_groups[key]["lr"], 119 | "weight_decay": parameter_groups[key]["weight_decay"], 120 | } 121 | print("Param groups = %s" % json.dumps(to_display, indent=2)) 122 | 123 | params.extend(parameter_groups.values()) 124 | -------------------------------------------------------------------------------- /detection/mmcv_custom/runner/checkpoint.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Open-MMLab. All rights reserved. 2 | import os.path as osp 3 | import time 4 | from tempfile import TemporaryDirectory 5 | 6 | import torch 7 | from torch.optim import Optimizer 8 | 9 | import mmcv 10 | from mmcv.parallel import is_module_wrapper 11 | from mmcv.runner.checkpoint import weights_to_cpu, get_state_dict 12 | 13 | try: 14 | import apex 15 | except: 16 | print('apex is not installed') 17 | 18 | 19 | def save_checkpoint(model, filename, optimizer=None, meta=None): 20 | """Save checkpoint to file. 21 | 22 | The checkpoint will have 4 fields: ``meta``, ``state_dict`` and 23 | ``optimizer``, ``amp``. By default ``meta`` will contain version 24 | and time info. 25 | 26 | Args: 27 | model (Module): Module whose params are to be saved. 28 | filename (str): Checkpoint filename. 29 | optimizer (:obj:`Optimizer`, optional): Optimizer to be saved. 30 | meta (dict, optional): Metadata to be saved in checkpoint. 31 | """ 32 | if meta is None: 33 | meta = {} 34 | elif not isinstance(meta, dict): 35 | raise TypeError(f'meta must be a dict or None, but got {type(meta)}') 36 | meta.update(mmcv_version=mmcv.__version__, time=time.asctime()) 37 | 38 | if is_module_wrapper(model): 39 | model = model.module 40 | 41 | if hasattr(model, 'CLASSES') and model.CLASSES is not None: 42 | # save class name to the meta 43 | meta.update(CLASSES=model.CLASSES) 44 | 45 | checkpoint = { 46 | 'meta': meta, 47 | 'state_dict': weights_to_cpu(get_state_dict(model)) 48 | } 49 | # save optimizer state dict in the checkpoint 50 | if isinstance(optimizer, Optimizer): 51 | checkpoint['optimizer'] = optimizer.state_dict() 52 | elif isinstance(optimizer, dict): 53 | checkpoint['optimizer'] = {} 54 | for name, optim in optimizer.items(): 55 | checkpoint['optimizer'][name] = optim.state_dict() 56 | 57 | # save amp state dict in the checkpoint 58 | # checkpoint['amp'] = apex.amp.state_dict() 59 | 60 | if filename.startswith('pavi://'): 61 | try: 62 | from pavi import modelcloud 63 | from pavi.exception import NodeNotFoundError 64 | except ImportError: 65 | raise ImportError( 66 | 'Please install pavi to load checkpoint from modelcloud.') 67 | model_path = filename[7:] 68 | root = modelcloud.Folder() 69 | model_dir, model_name = osp.split(model_path) 70 | try: 71 | model = modelcloud.get(model_dir) 72 | except NodeNotFoundError: 73 | model = root.create_training_model(model_dir) 74 | with TemporaryDirectory() as tmp_dir: 75 | checkpoint_file = osp.join(tmp_dir, model_name) 76 | with open(checkpoint_file, 'wb') as f: 77 | torch.save(checkpoint, f) 78 | f.flush() 79 | model.create_file(checkpoint_file, name=model_name) 80 | else: 81 | mmcv.mkdir_or_exist(osp.dirname(filename)) 82 | # immediately flush buffer 83 | with open(filename, 'wb') as f: 84 | torch.save(checkpoint, f) 85 | f.flush() 86 | -------------------------------------------------------------------------------- /detection/mmdet/models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | from .darknet import Darknet 2 | from .detectors_resnet import DetectoRS_ResNet 3 | from .detectors_resnext import DetectoRS_ResNeXt 4 | from .hourglass import HourglassNet 5 | from .hrnet import HRNet 6 | from .regnet import RegNet 7 | from .res2net import Res2Net 8 | from .resnext import ResNeXt 9 | from .ssd_vgg import SSDVGG 10 | from .trident_resnet import TridentResNet 11 | from .swin_transformer import SwinTransformer 12 | from .resnet import ResNet 13 | from .kw_resnet import KW_ResNet 14 | from .convnext import ConvNeXt 15 | from .kw_convnext import KW_ConvNeXt 16 | from .mobilenetv2 import MobileNetV2 17 | from .kw_mobilenetv2 import KW_MobileNetV2 18 | 19 | __all__ = [ 20 | 'RegNet', 'ResNet', 'ResNeXt', 'SSDVGG', 'HRNet', 'Res2Net', 21 | 'HourglassNet', 'DetectoRS_ResNet', 'DetectoRS_ResNeXt', 'Darknet', 22 | 'TridentResNet', 'SwinTransformer', 'KW_ResNet', 'ConvNeXt', 'KW_ConvNeXt', 'MobileNetV2', 'KW_MobileNetV2' 23 | ] -------------------------------------------------------------------------------- /detection/mmdet/models/backbones/convnext.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | from timm.models.layers import trunc_normal_, DropPath 6 | 7 | from mmcv_custom import load_checkpoint 8 | from mmdet.utils import get_root_logger 9 | from ..builder import BACKBONES 10 | 11 | 12 | class Block(nn.Module): 13 | r""" ConvNeXt Block. There are two equivalent implementations: 14 | (1) DwConv -> LayerNorm (channels_first) -> 1x1 Conv -> GELU -> 1x1 Conv; all in (N, C, H, W) 15 | (2) DwConv -> Permute to (N, H, W, C); LayerNorm (channels_last) -> Linear -> GELU -> Linear; Permute back 16 | We use (2) as we find it slightly faster in PyTorch 17 | 18 | Args: 19 | dim (int): Number of input channels. 20 | drop_path (float): Stochastic depth rate. Default: 0.0 21 | layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6. 22 | """ 23 | 24 | def __init__(self, dim, drop_path=0., layer_scale_init_value=1e-6): 25 | super().__init__() 26 | self.dwconv = nn.Conv2d(dim, dim, kernel_size=7, padding=3, groups=dim) # depthwise conv 27 | self.norm = LayerNorm(dim, eps=1e-6) 28 | self.pwconv1 = nn.Linear(dim, 4 * dim) # pointwise/1x1 convs, implemented with linear layers 29 | self.act = nn.GELU() 30 | self.pwconv2 = nn.Linear(4 * dim, dim) 31 | self.gamma = nn.Parameter(layer_scale_init_value * torch.ones((dim)), 32 | requires_grad=True) if layer_scale_init_value > 0 else None 33 | self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() 34 | 35 | def forward(self, x): 36 | input = x 37 | x = self.dwconv(x) 38 | x = x.permute(0, 2, 3, 1) # (N, C, H, W) -> (N, H, W, C) 39 | x = self.norm(x) 40 | x = self.pwconv1(x) 41 | x = self.act(x) 42 | x = self.pwconv2(x) 43 | if self.gamma is not None: 44 | x = self.gamma * x 45 | x = x.permute(0, 3, 1, 2) # (N, H, W, C) -> (N, C, H, W) 46 | 47 | x = input + self.drop_path(x) 48 | return x 49 | 50 | 51 | @BACKBONES.register_module() 52 | class ConvNeXt(nn.Module): 53 | r""" ConvNeXt 54 | A PyTorch impl of : `A ConvNet for the 2020s` - 55 | https://arxiv.org/pdf/2201.03545.pdf 56 | Args: 57 | in_chans (int): Number of input image channels. Default: 3 58 | num_classes (int): Number of classes for classification head. Default: 1000 59 | depths (tuple(int)): Number of blocks at each stage. Default: [3, 3, 9, 3] 60 | dims (int): Feature dimension at each stage. Default: [96, 192, 384, 768] 61 | drop_path_rate (float): Stochastic depth rate. Default: 0. 62 | layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6. 63 | head_init_scale (float): Init scaling value for classifier weights and biases. Default: 1. 64 | """ 65 | 66 | def __init__(self, in_chans=3, depths=[3, 3, 9, 3], dims=[96, 192, 384, 768], 67 | drop_path_rate=0., layer_scale_init_value=1e-6, out_indices=[0, 1, 2, 3], 68 | ): 69 | super().__init__() 70 | 71 | self.downsample_layers = nn.ModuleList() # stem and 3 intermediate downsampling conv layers 72 | stem = nn.Sequential( 73 | nn.Conv2d(in_chans, dims[0], kernel_size=4, stride=4), 74 | LayerNorm(dims[0], eps=1e-6, data_format="channels_first") 75 | ) 76 | self.downsample_layers.append(stem) 77 | for i in range(3): 78 | downsample_layer = nn.Sequential( 79 | LayerNorm(dims[i], eps=1e-6, data_format="channels_first"), 80 | nn.Conv2d(dims[i], dims[i + 1], kernel_size=2, stride=2), 81 | ) 82 | self.downsample_layers.append(downsample_layer) 83 | 84 | self.stages = nn.ModuleList() # 4 feature resolution stages, each consisting of multiple residual blocks 85 | dp_rates = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))] 86 | cur = 0 87 | for i in range(4): 88 | stage = nn.Sequential( 89 | *[Block(dim=dims[i], drop_path=dp_rates[cur + j], 90 | layer_scale_init_value=layer_scale_init_value) for j in range(depths[i])] 91 | ) 92 | self.stages.append(stage) 93 | cur += depths[i] 94 | 95 | self.out_indices = out_indices 96 | 97 | norm_layer = partial(LayerNorm, eps=1e-6, data_format="channels_first") 98 | for i_layer in range(4): 99 | layer = norm_layer(dims[i_layer]) 100 | layer_name = f'norm{i_layer}' 101 | self.add_module(layer_name, layer) 102 | 103 | self.apply(self._init_weights) 104 | 105 | def _init_weights(self, m): 106 | if isinstance(m, (nn.Conv2d, nn.Linear)): 107 | trunc_normal_(m.weight, std=.02) 108 | nn.init.constant_(m.bias, 0) 109 | 110 | def init_weights(self, pretrained=None): 111 | """Initialize the weights in backbone. 112 | Args: 113 | pretrained (str, optional): Path to pre-trained weights. 114 | Defaults to None. 115 | """ 116 | 117 | def _init_weights(m): 118 | if isinstance(m, nn.Linear): 119 | trunc_normal_(m.weight, std=.02) 120 | if isinstance(m, nn.Linear) and m.bias is not None: 121 | nn.init.constant_(m.bias, 0) 122 | elif isinstance(m, nn.LayerNorm): 123 | nn.init.constant_(m.bias, 0) 124 | nn.init.constant_(m.weight, 1.0) 125 | 126 | if isinstance(pretrained, str): 127 | self.apply(_init_weights) 128 | logger = get_root_logger() 129 | load_checkpoint(self, pretrained, strict=False, logger=logger) 130 | elif pretrained is None: 131 | self.apply(_init_weights) 132 | else: 133 | raise TypeError('pretrained must be a str or None') 134 | 135 | def forward_features(self, x): 136 | outs = [] 137 | for i in range(4): 138 | x = self.downsample_layers[i](x) 139 | x = self.stages[i](x) 140 | if i in self.out_indices: 141 | norm_layer = getattr(self, f'norm{i}') 142 | x_out = norm_layer(x) 143 | outs.append(x_out) 144 | 145 | return tuple(outs) 146 | 147 | def forward(self, x): 148 | x = self.forward_features(x) 149 | return x 150 | 151 | 152 | class LayerNorm(nn.Module): 153 | r""" LayerNorm that supports two data formats: channels_last (default) or channels_first. 154 | The ordering of the dimensions in the inputs. channels_last corresponds to inputs with 155 | shape (batch_size, height, width, channels) while channels_first corresponds to inputs 156 | with shape (batch_size, channels, height, width). 157 | """ 158 | 159 | def __init__(self, normalized_shape, eps=1e-6, data_format="channels_last"): 160 | super().__init__() 161 | self.weight = nn.Parameter(torch.ones(normalized_shape)) 162 | self.bias = nn.Parameter(torch.zeros(normalized_shape)) 163 | self.eps = eps 164 | self.data_format = data_format 165 | if self.data_format not in ["channels_last", "channels_first"]: 166 | raise NotImplementedError 167 | self.normalized_shape = (normalized_shape,) 168 | 169 | def forward(self, x): 170 | if self.data_format == "channels_last": 171 | return F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps) 172 | elif self.data_format == "channels_first": 173 | u = x.mean(1, keepdim=True) 174 | s = (x - u).pow(2).mean(1, keepdim=True) 175 | x = (x - u) / torch.sqrt(s + self.eps) 176 | x = self.weight[:, None, None] * x + self.bias[:, None, None] 177 | return x -------------------------------------------------------------------------------- /detection/mmdet/models/backbones/kw_convnext.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | 3 | # All rights reserved. 4 | 5 | # This source code is licensed under the license found in the 6 | # LICENSE file in the root directory of this source tree. 7 | 8 | from functools import partial 9 | import torch 10 | import torch.nn as nn 11 | import torch.nn.functional as F 12 | from timm.models.layers import trunc_normal_, DropPath 13 | from timm.models.registry import register_model 14 | from .kernel_warehouse import Warehouse_Manager 15 | 16 | from ..builder import BACKBONES 17 | from mmcv.runner import load_checkpoint 18 | from mmdet.utils import get_root_logger 19 | 20 | class Block(nn.Module): 21 | r""" ConvNeXt Block. There are two equivalent implementations: 22 | (1) DwConv -> LayerNorm (channels_first) -> 1x1 Conv -> GELU -> 1x1 Conv; all in (N, C, H, W) 23 | (2) DwConv -> Permute to (N, H, W, C); LayerNorm (channels_last) -> Linear -> GELU -> Linear; Permute back 24 | We use (2) as we find it slightly faster in PyTorch 25 | 26 | Args: 27 | dim (int): Number of input channels. 28 | drop_path (float): Stochastic depth rate. Default: 0.0 29 | layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6. 30 | """ 31 | 32 | def __init__(self, dim, drop_path=0., layer_scale_init_value=1e-6, 33 | warehouse_manager=None, stage_idx=-1, layer_idx=-1): 34 | super().__init__() 35 | self.dwconv = warehouse_manager.reserve(dim, dim, kernel_size=7, padding=3, groups=dim, layer_type='conv2d', 36 | warehouse_name='stage{}_layer{}_dwconv{}'.format(stage_idx, layer_idx, 0)) # depthwise conv 37 | self.norm = LayerNorm(dim, eps=1e-6) 38 | self.pwconv1 = warehouse_manager.reserve(dim, 4 * dim, kernel_size=1, padding=0, layer_type='conv2d', 39 | warehouse_name='stage{}_layer{}_pwconv{}'.format(stage_idx, layer_idx, 1)) # pointwise/1x1 convs, implemented with linear layers 40 | self.act = nn.GELU() 41 | self.pwconv2 = warehouse_manager.reserve(4 * dim, dim, kernel_size=1, padding=0, layer_type='conv2d', 42 | warehouse_name='stage{}_layer{}_pwconv{}'.format(stage_idx, layer_idx, 2)) 43 | self.gamma = nn.Parameter(layer_scale_init_value * torch.ones([1, dim, 1, 1]), 44 | requires_grad=True) if layer_scale_init_value > 0 else None 45 | self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() 46 | 47 | def forward(self, x): 48 | input = x 49 | x = self.dwconv(x) 50 | x = x.permute(0, 2, 3, 1) # (N, C, H, W) -> (N, H, W, C) 51 | x = self.norm(x).permute(0, 3, 1, 2) 52 | x = self.pwconv1(x) 53 | x = self.act(x) 54 | x = self.pwconv2(x) 55 | if self.gamma is not None: 56 | x = self.gamma * x 57 | #x = x.permute(0, 3, 1, 2) # (N, H, W, C) -> (N, C, H, W) 58 | 59 | x = input + self.drop_path(x) 60 | return x 61 | 62 | @BACKBONES.register_module() 63 | class KW_ConvNeXt(nn.Module): 64 | r""" ConvNeXt 65 | A PyTorch impl of : `A ConvNet for the 2020s` - 66 | https://arxiv.org/pdf/2201.03545.pdf 67 | 68 | Args: 69 | in_chans (int): Number of input image channels. Default: 3 70 | num_classes (int): Number of classes for classification head. Default: 1000 71 | depths (tuple(int)): Number of blocks at each stage. Default: [3, 3, 9, 3] 72 | dims (int): Feature dimension at each stage. Default: [96, 192, 384, 768] 73 | drop_path_rate (float): Stochastic depth rate. Default: 0. 74 | layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6. 75 | head_init_scale (float): Init scaling value for classifier weights and biases. Default: 1. 76 | """ 77 | 78 | def __init__(self, in_chans=3, num_classes=1000, 79 | depths=[3, 3, 9, 3], dims=[96, 192, 384, 768], drop_path_rate=0., 80 | layer_scale_init_value=1e-6, head_init_scale=1., 81 | reduction=0.0625, cell_num_ratio=1, cell_inplane_ratio=1, cell_outplane_ratio=1, 82 | sharing_range=('layer', 'pwconv'), out_indices=[0, 1, 2, 3], **kwargs 83 | ): 84 | super().__init__() 85 | 86 | self.warehouse_manager = Warehouse_Manager(reduction, cell_num_ratio, cell_inplane_ratio, 87 | cell_outplane_ratio, sharing_range, norm_layer=nn.LayerNorm) 88 | self.downsample_layers = nn.ModuleList() # stem and 3 intermediate downsampling conv layers 89 | stem = nn.Sequential( 90 | self.warehouse_manager.reserve(in_chans, dims[0], kernel_size=4, stride=4, layer_type='conv2d', 91 | warehouse_name='stage{}_conv0'.format('stem')), 92 | LayerNorm(dims[0], eps=1e-6, data_format="channels_first") 93 | ) 94 | self.downsample_layers.append(stem) 95 | for i in range(3): 96 | downsample_layer = nn.Sequential( 97 | LayerNorm(dims[i], eps=1e-6, data_format="channels_first"), 98 | self.warehouse_manager.reserve(dims[i], dims[i + 1], kernel_size=2, stride=2, layer_type='conv2d', 99 | warehouse_name='stage{}_layer{}_conv0'.format(i, 'ds')), 100 | ) 101 | self.downsample_layers.append(downsample_layer) 102 | 103 | self.stages = nn.ModuleList() # 4 feature resolution stages, each consisting of multiple residual blocks 104 | dp_rates = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))] 105 | cur = 0 106 | for i in range(4): 107 | stage = nn.Sequential( 108 | *[Block(dim=dims[i], drop_path=dp_rates[cur + j], 109 | layer_scale_init_value=layer_scale_init_value, 110 | warehouse_manager=self.warehouse_manager, 111 | stage_idx=i, layer_idx=j, 112 | ) for j in range(depths[i])] 113 | ) 114 | self.stages.append(stage) 115 | cur += depths[i] 116 | 117 | self.warehouse_manager.store() 118 | self.warehouse_manager.allocate(self) 119 | self.net_update_temperature(0) 120 | 121 | self.out_indices = out_indices 122 | 123 | norm_layer = partial(LayerNorm, eps=1e-6, data_format="channels_first") 124 | for i_layer in range(4): 125 | layer = norm_layer(dims[i_layer]) 126 | layer_name = f'norm{i_layer}' 127 | self.add_module(layer_name, layer) 128 | 129 | self.apply(self._init_weights) 130 | 131 | def net_update_temperature(self, temp): 132 | for m in self.modules(): 133 | if hasattr(m, "update_temperature"): 134 | m.update_temperature(temp) 135 | 136 | def train(self, mode=True): 137 | """Convert the model into training mode while keep normalization layer 138 | freezed.""" 139 | super(KW_ConvNeXt, self).train(mode) 140 | if mode: 141 | for m in self.modules(): 142 | # trick: eval have effect on BatchNorm only 143 | if isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d)): 144 | m.eval() 145 | 146 | def _init_weights(self, m): 147 | if isinstance(m, (nn.Conv2d, nn.Linear)): 148 | trunc_normal_(m.weight, std=.02) 149 | if m.bias is not None: 150 | nn.init.constant_(m.bias, 0) 151 | 152 | def init_weights(self, pretrained=None): 153 | """Initialize the weights in backbone. 154 | Args: 155 | pretrained (str, optional): Path to pre-trained weights. 156 | Defaults to None. 157 | """ 158 | 159 | def _init_weights(m): 160 | if isinstance(m, nn.Linear): 161 | trunc_normal_(m.weight, std=.02) 162 | if isinstance(m, nn.Linear) and m.bias is not None: 163 | nn.init.constant_(m.bias, 0) 164 | elif isinstance(m, nn.LayerNorm): 165 | nn.init.constant_(m.bias, 0) 166 | nn.init.constant_(m.weight, 1.0) 167 | 168 | if isinstance(pretrained, str): 169 | self.apply(_init_weights) 170 | logger = get_root_logger() 171 | load_checkpoint(self, pretrained, strict=False, logger=logger, map_location='cpu') 172 | elif pretrained is None: 173 | self.apply(_init_weights) 174 | else: 175 | raise TypeError('pretrained must be a str or None') 176 | 177 | def forward_features(self, x): 178 | outs = [] 179 | for i in range(4): 180 | x = self.downsample_layers[i](x) 181 | x = self.stages[i](x) 182 | if i in self.out_indices: 183 | norm_layer = getattr(self, f'norm{i}') 184 | x_out = norm_layer(x) 185 | outs.append(x_out) 186 | 187 | return tuple(outs) 188 | 189 | def forward(self, x): 190 | x = self.forward_features(x) 191 | return x 192 | 193 | 194 | class LayerNorm(nn.Module): 195 | r""" LayerNorm that supports two data formats: channels_last (default) or channels_first. 196 | The ordering of the dimensions in the inputs. channels_last corresponds to inputs with 197 | shape (batch_size, height, width, channels) while channels_first corresponds to inputs 198 | with shape (batch_size, channels, height, width). 199 | """ 200 | 201 | def __init__(self, normalized_shape, eps=1e-6, data_format="channels_last"): 202 | super().__init__() 203 | self.weight = nn.Parameter(torch.ones(normalized_shape)) 204 | self.bias = nn.Parameter(torch.zeros(normalized_shape)) 205 | self.eps = eps 206 | self.data_format = data_format 207 | if self.data_format not in ["channels_last", "channels_first"]: 208 | raise NotImplementedError 209 | self.normalized_shape = (normalized_shape,) 210 | 211 | def forward(self, x): 212 | if self.data_format == "channels_last": 213 | return F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps) 214 | elif self.data_format == "channels_first": 215 | u = x.mean(1, keepdim=True) 216 | s = (x - u).pow(2).mean(1, keepdim=True) 217 | x = (x - u) / torch.sqrt(s + self.eps) 218 | x = self.weight[:, None, None] * x + self.bias[:, None, None] 219 | return x 220 | -------------------------------------------------------------------------------- /detection/mmdet/models/backbones/kw_mobilenetv2.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | from .kernel_warehouse import Warehouse_Manager 3 | 4 | from ..builder import BACKBONES 5 | from mmcv.runner import load_checkpoint 6 | from mmdet.utils import get_root_logger 7 | 8 | 9 | def _make_divisible(v, divisor, min_value=None): 10 | """ 11 | This function is taken from the original tf repo. 12 | It ensures that all layers have a channel number that is divisible by 8 13 | It can be seen here: 14 | https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py 15 | :param v: 16 | :param divisor: 17 | :param min_value: 18 | :return: 19 | """ 20 | if min_value is None: 21 | min_value = divisor 22 | new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) 23 | # Make sure that round down does not go down by more than 10%. 24 | if new_v < 0.9 * v: 25 | new_v += divisor 26 | return new_v 27 | 28 | 29 | class ConvBNReLU(nn.Sequential): 30 | def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1, norm_layer=nn.BatchNorm2d, 31 | warehouse_name=None, warehouse_manager=None, enabled=True): 32 | padding = (kernel_size - 1) // 2 33 | super(ConvBNReLU, self).__init__( 34 | warehouse_manager.reserve(in_planes, out_planes, kernel_size, stride, padding=padding, 35 | groups=groups, bias=False, warehouse_name=warehouse_name, enabled=enabled), 36 | norm_layer(out_planes), 37 | nn.ReLU6(inplace=True) 38 | ) 39 | 40 | 41 | class InvertedResidual(nn.Module): 42 | def __init__(self, inp, oup, stride, expand_ratio, norm_layer=nn.BatchNorm2d, stage_idx=None, layer_idx=None, 43 | warehouse_manager=None): 44 | super(InvertedResidual, self).__init__() 45 | self.stride = stride 46 | assert stride in [1, 2] 47 | hidden_dim = int(round(inp * expand_ratio)) 48 | self.use_res_connect = self.stride == 1 and inp == oup 49 | 50 | layers = [] 51 | if expand_ratio != 1: 52 | # pw 53 | layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1, norm_layer=norm_layer, 54 | warehouse_name='stage{}_layer{}_pwconv{}'.format(stage_idx, layer_idx, 0), 55 | warehouse_manager=warehouse_manager)) 56 | 57 | layers.extend([ 58 | # dw 59 | ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim, norm_layer=norm_layer, 60 | warehouse_name='stage{}_layer{}_dwconv{}'.format(stage_idx, layer_idx, 0), 61 | warehouse_manager=warehouse_manager), 62 | # pw-linear 63 | warehouse_manager.reserve(hidden_dim, oup, 1, 1, 0, bias=False, 64 | warehouse_name='stage{}_layer{}_pwconv{}'.format(stage_idx, layer_idx, 1)), 65 | norm_layer(oup), 66 | ]) 67 | self.conv = nn.Sequential(*layers) 68 | 69 | def forward(self, x): 70 | if self.use_res_connect: 71 | return x + self.conv(x) 72 | else: 73 | return self.conv(x) 74 | 75 | 76 | @BACKBONES.register_module() 77 | class KW_MobileNetV2(nn.Module): 78 | def __init__(self, 79 | num_classes=1000, 80 | width_mult=1.0, 81 | inverted_residual_setting=None, 82 | round_nearest=8, 83 | block=None, 84 | norm_layer=None, 85 | dropout=0.1, 86 | reduction=0.0625, 87 | cell_num_ratio=1, 88 | cell_inplane_ratio=1, 89 | cell_outplane_ratio=1, 90 | sharing_range=('layer', 'pwconv'), 91 | frozen_stages=0, 92 | out_indices=(0, 1, 2, 3), 93 | norm_eval=True, 94 | **kwargs): 95 | """gr 96 | MobileNet V2 main class 97 | 98 | Args: 99 | num_classes (int): Number of classes 100 | width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount 101 | inverted_residual_setting: Network structure 102 | round_nearest (int): Round the number of channels in each layer to be a multiple of this number 103 | Set to 1 to turn off rounding 104 | block: Module specifying inverted residual building block for mobilenet 105 | norm_layer: Module specifying the normalization layer to use 106 | 107 | """ 108 | super(KW_MobileNetV2, self).__init__() 109 | if block is None: 110 | block = InvertedResidual 111 | 112 | if norm_layer is None: 113 | norm_layer = nn.BatchNorm2d 114 | 115 | input_channel = 32 116 | last_channel = 1280 117 | self.stage_idx = [1, 3, 6, 13, 18] 118 | self.frozen_stages = frozen_stages 119 | self.out_indices = [self.stage_idx[x] for x in out_indices] 120 | self.norm_eval = norm_eval 121 | 122 | if inverted_residual_setting is None: 123 | inverted_residual_setting = [ 124 | # t, c, n, s 125 | [1, 16, 1, 1], 126 | [6, 24, 2, 2], 127 | [6, 32, 3, 2], 128 | [6, 64, 4, 2], 129 | [6, 96, 3, 1], # 0.3M 130 | [6, 160, 3, 2], # 0.92M 131 | [6, 320, 1, 1], # 1.22M 132 | ] 133 | 134 | kw_stage_setting = [1, 2, 3, 4, 5, 6, 6] 135 | 136 | self.warehouse_manager = Warehouse_Manager(reduction, cell_num_ratio, cell_inplane_ratio, 137 | cell_outplane_ratio, sharing_range) 138 | 139 | # only check the first element, assuming user knows t,c,n,s are required 140 | if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4: 141 | raise ValueError("inverted_residual_setting should be non-empty " 142 | "or a 4-element list, got {}".format(inverted_residual_setting)) 143 | 144 | # building first layer 145 | input_channel = _make_divisible(input_channel * width_mult, round_nearest) 146 | self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest) 147 | features = [ConvBNReLU(3, input_channel, stride=2, norm_layer=norm_layer, 148 | warehouse_manager=self.warehouse_manager, warehouse_name='stage0_conv0')] 149 | 150 | layer_idx = 0 151 | # building inverted residual blocks 152 | for idx, (t, c, n, s) in enumerate(inverted_residual_setting): 153 | output_channel = _make_divisible(c * width_mult, round_nearest) 154 | for i in range(n): 155 | stride = s if i == 0 else 1 156 | 157 | if i == 0 and idx > 0: 158 | handover = kw_stage_setting[idx] != kw_stage_setting[idx - 1] 159 | else: 160 | handover = False 161 | 162 | stage_idx = (kw_stage_setting[idx] - 1) if handover else kw_stage_setting[idx] 163 | 164 | features.append(block(input_channel, output_channel, stride, expand_ratio=t, norm_layer=norm_layer, 165 | warehouse_manager=self.warehouse_manager, stage_idx=stage_idx, 166 | layer_idx=layer_idx)) 167 | 168 | input_channel = output_channel 169 | layer_idx += 1 170 | 171 | if handover: 172 | layer_idx = 0 173 | 174 | # building last several layers 175 | features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1, norm_layer=norm_layer, 176 | warehouse_manager=self.warehouse_manager, 177 | warehouse_name='stage{}_layer{}_pwconv1'.format(kw_stage_setting[-1], layer_idx))) 178 | # make it nn.Sequential 179 | self.features = nn.Sequential(*features) 180 | # building classifier 181 | self.dropout = nn.Dropout(dropout) if dropout > 0 else nn.Identity() 182 | self.classifier = nn.Linear(self.last_channel, num_classes, bias=True) 183 | 184 | # weight initialization 185 | for m in self.modules(): 186 | if isinstance(m, nn.Conv2d): 187 | nn.init.kaiming_normal_(m.weight, mode='fan_out') 188 | if m.bias is not None: 189 | nn.init.zeros_(m.bias) 190 | elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): 191 | nn.init.ones_(m.weight) 192 | nn.init.zeros_(m.bias) 193 | elif isinstance(m, nn.Linear): 194 | nn.init.normal_(m.weight, 0, 0.01) 195 | nn.init.zeros_(m.bias) 196 | 197 | self.features = nn.Sequential(*features) 198 | self.warehouse_manager.store() 199 | self.warehouse_manager.allocate(self) 200 | self.net_update_temperature(0) 201 | 202 | def net_update_temperature(self, temp): 203 | for m in self.modules(): 204 | if hasattr(m, "update_temperature"): 205 | m.update_temperature(temp) 206 | 207 | def _freeze_stages(self): 208 | if self.frozen_stages >= 0: 209 | for i in range(self.stage_idx[self.frozen_stages] + 1): 210 | m = self.features[i] 211 | m.eval() 212 | for param in m.parameters(): 213 | param.requires_grad = False 214 | 215 | def train(self, mode=True): 216 | """Convert the model into training mode while keep normalization layer 217 | freezed.""" 218 | super(KW_MobileNetV2, self).train(mode) 219 | self._freeze_stages() 220 | 221 | if mode and self.norm_eval: 222 | for m in self.modules(): 223 | # trick: eval have effect on BatchNorm only 224 | if isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d)): 225 | m.eval() 226 | 227 | def init_weights(self, pretrained=None): 228 | """Initialize the weights in backbone. 229 | Args: 230 | pretrained (str, optional): Path to pre-trained weights. 231 | Defaults to None. 232 | """ 233 | 234 | for m in self.modules(): 235 | if isinstance(m, nn.Conv2d): 236 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 237 | if m.bias is not None: 238 | nn.init.zeros_(m.bias) 239 | elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): 240 | nn.init.constant_(m.weight, 1) 241 | nn.init.constant_(m.bias, 0) 242 | elif isinstance(m, nn.Linear): 243 | nn.init.normal_(m.weight, 0, 0.01) 244 | if m.bias is not None: 245 | nn.init.zeros_(m.bias) 246 | 247 | if isinstance(pretrained, str): 248 | logger = get_root_logger() 249 | load_checkpoint(self, pretrained, strict=False, logger=logger, map_location='cpu') 250 | elif pretrained is not None: 251 | raise TypeError('pretrained must be a str or None') 252 | 253 | def _forward_impl(self, x): 254 | outs = [] 255 | for idx, layer in enumerate(self.features): 256 | x = layer(x) 257 | if idx in self.out_indices: 258 | outs.append(x) 259 | return outs 260 | 261 | def forward(self, x): 262 | return self._forward_impl(x) -------------------------------------------------------------------------------- /detection/mmdet/models/backbones/kw_resnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from .kernel_warehouse import Warehouse_Manager 4 | from timm.models.layers import trunc_normal_, DropPath 5 | 6 | from ..builder import BACKBONES 7 | from mmcv.runner import load_checkpoint 8 | from mmdet.utils import get_root_logger 9 | 10 | __all__ = ['KW_ResNet'] 11 | 12 | def kwconv3x3(in_planes, out_planes, stride=1, warehouse_name=None, warehouse_manager=None, enabled=True): 13 | return warehouse_manager.reserve(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, 14 | warehouse_name=warehouse_name, enabled=enabled, bias=False) 15 | 16 | 17 | def kwconv1x1(in_planes, out_planes, stride=1, warehouse_name=None, warehouse_manager=None, enabled=True): 18 | return warehouse_manager.reserve(in_planes, out_planes, kernel_size=1, stride=stride, padding=0, 19 | warehouse_name=warehouse_name, enabled=enabled, bias=False) 20 | 21 | 22 | class BasicBlock(nn.Module): 23 | expansion = 1 24 | 25 | def __init__(self, inplanes, planes, stride=1, downsample=None, 26 | stage_idx=None, layer_idx=None, warehouse_manager=None, warehouse_handover=False, drop_path=0.): 27 | super(BasicBlock, self).__init__() 28 | conv1_stage_idx = max(stage_idx - 1 if warehouse_handover else stage_idx, 0) 29 | self.conv1 = kwconv3x3(inplanes, planes, stride, 30 | warehouse_name='stage{}_layer{}_conv{}'.format(conv1_stage_idx, layer_idx, 0), 31 | warehouse_manager=warehouse_manager) 32 | self.bn1 = nn.BatchNorm2d(planes) 33 | self.relu = nn.ReLU(inplace=True) 34 | layer_idx = 0 if warehouse_handover else layer_idx 35 | self.conv2 = kwconv3x3(planes, planes, 36 | warehouse_name='stage{}_layer{}_conv{}'.format(stage_idx, layer_idx, 1), 37 | warehouse_manager=warehouse_manager) 38 | self.bn2 = nn.BatchNorm2d(planes) 39 | self.downsample = downsample 40 | self.stride = stride 41 | self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() 42 | 43 | def forward(self, x): 44 | identity = x 45 | 46 | out = self.conv1(x) 47 | out = self.bn1(out) 48 | out = self.relu(out) 49 | 50 | out = self.conv2(out) 51 | out = self.bn2(out) 52 | 53 | if self.downsample is not None: 54 | identity = self.downsample(x) 55 | 56 | out = identity + self.drop_path(out) 57 | out = self.relu(out) 58 | return out 59 | 60 | 61 | class Bottleneck(nn.Module): 62 | expansion = 4 63 | 64 | def __init__(self, inplanes, planes, stride=1, downsample=None, 65 | stage_idx=None, layer_idx=None, warehouse_manager=None, warehouse_handover=False, drop_path=0.): 66 | super(Bottleneck, self).__init__() 67 | conv1_stage_idx = stage_idx - 1 if warehouse_handover else stage_idx 68 | self.conv1 = kwconv1x1(inplanes, planes, 69 | warehouse_name='stage{}_layer{}_conv{}'.format(conv1_stage_idx, layer_idx, 0), 70 | warehouse_manager=warehouse_manager, enabled=(conv1_stage_idx >= 0)) 71 | self.bn1 = nn.BatchNorm2d(planes) 72 | layer_idx = 0 if warehouse_handover else layer_idx 73 | self.conv2 = kwconv3x3(planes, planes, stride, 74 | warehouse_name='stage{}_layer{}_conv{}'.format(stage_idx, layer_idx, 1), 75 | warehouse_manager=warehouse_manager) 76 | self.bn2 = nn.BatchNorm2d(planes) 77 | self.conv3 = kwconv1x1(planes, planes * self.expansion, 78 | warehouse_name='stage{}_layer{}_conv{}'.format(stage_idx, layer_idx, 2), 79 | warehouse_manager=warehouse_manager) 80 | self.bn3 = nn.BatchNorm2d(planes * self.expansion) 81 | self.relu = nn.ReLU(inplace=True) 82 | self.downsample = downsample 83 | self.stride = stride 84 | self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() 85 | 86 | def forward(self, x): 87 | identity = x 88 | 89 | out = self.conv1(x) 90 | out = self.bn1(out) 91 | out = self.relu(out) 92 | 93 | out = self.conv2(out) 94 | out = self.bn2(out) 95 | out = self.relu(out) 96 | 97 | out = self.conv3(out) 98 | out = self.bn3(out) 99 | 100 | if self.downsample is not None: 101 | identity = self.downsample(x) 102 | 103 | out = identity + self.drop_path(out) 104 | out = self.relu(out) 105 | return out 106 | 107 | 108 | @BACKBONES.register_module() 109 | class KW_ResNet(nn.Module): 110 | arch_settings = { 111 | 18: (BasicBlock, (2, 2, 2, 2)), 112 | 34: (BasicBlock, (3, 4, 6, 3)), 113 | 50: (Bottleneck, (3, 4, 6, 3)), 114 | 101: (Bottleneck, (3, 4, 23, 3)), 115 | 152: (Bottleneck, (3, 8, 36, 3)) 116 | } 117 | 118 | def __init__(self, depth, num_classes=1000, dropout=0.1, reduction=0.0625, cell_num_ratio=1, cell_inplane_ratio=1, 119 | cell_outplane_ratio=1, sharing_range=('layer', 'conv'), drop_path_rate=0.1, frozen_stages=0, 120 | out_indices=(0, 1, 2, 3), norm_eval=True, **kwargs): 121 | super(KW_ResNet, self).__init__() 122 | self.warehouse_manager = Warehouse_Manager(reduction, cell_num_ratio, cell_inplane_ratio, cell_outplane_ratio, 123 | sharing_range) 124 | block, layers = self.arch_settings[depth] 125 | self.inplanes = 64 126 | self.layer_idx = 0 127 | self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False) 128 | self.bn1 = nn.BatchNorm2d(self.inplanes) 129 | self.relu = nn.ReLU(inplace=True) 130 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 131 | self.layer1 = self._make_layer(block, 64, layers[0], 132 | stage_idx=0, warehouse_manager=self.warehouse_manager, drop_path=drop_path_rate) 133 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2, 134 | stage_idx=1, warehouse_manager=self.warehouse_manager, drop_path=drop_path_rate) 135 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2, 136 | stage_idx=2, warehouse_manager=self.warehouse_manager, drop_path=drop_path_rate) 137 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2, 138 | stage_idx=3, warehouse_manager=self.warehouse_manager, drop_path=drop_path_rate) 139 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) 140 | self.fc = nn.Linear(512 * block.expansion, num_classes) 141 | 142 | for m in self.modules(): 143 | if isinstance(m, nn.Conv2d): 144 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 145 | elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): 146 | nn.init.constant_(m.weight, 1) 147 | nn.init.constant_(m.bias, 0) 148 | 149 | self.warehouse_manager.store() 150 | self.warehouse_manager.allocate(self) 151 | 152 | self.frozen_stages = frozen_stages 153 | self.out_indices = out_indices 154 | self.norm_eval = norm_eval 155 | self.net_update_temperature(0) 156 | 157 | def _make_layer(self, block, planes, blocks, stride=1, stage_idx=-1, warehouse_manager=None, drop_path=0.): 158 | downsample = None 159 | if stride != 1 or self.inplanes != planes * block.expansion: 160 | downsample = nn.Sequential( 161 | warehouse_manager.reserve( 162 | self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, padding=0, 163 | warehouse_name='stage{}_layer{}_conv{}'.format(stage_idx-1, self.layer_idx + 1, 0), 164 | enabled=(stride != 1), bias=False), 165 | nn.BatchNorm2d(planes * block.expansion), 166 | ) 167 | 168 | layers = [] 169 | layers.append(block(self.inplanes, planes, stride, downsample, stage_idx=stage_idx, layer_idx=self.layer_idx, 170 | warehouse_manager=warehouse_manager, warehouse_handover=True, drop_path=drop_path)) 171 | self.layer_idx = 1 172 | self.inplanes = planes * block.expansion 173 | for idx in range(1, blocks): 174 | layers.append(block(self.inplanes, planes, stage_idx=stage_idx, layer_idx=self.layer_idx, 175 | warehouse_manager=warehouse_manager, drop_path=drop_path)) 176 | self.layer_idx += 1 177 | return nn.Sequential(*layers) 178 | 179 | def net_update_temperature(self, temp): 180 | for m in self.modules(): 181 | if hasattr(m, "update_temperature"): 182 | m.update_temperature(temp) 183 | 184 | def _freeze_stages(self): 185 | if self.frozen_stages >= 0: 186 | self.bn1.eval() 187 | for m in [self.conv1, self.bn1]: 188 | for param in m.parameters(): 189 | param.requires_grad = False 190 | 191 | for i in range(1, self.frozen_stages + 1): 192 | m = getattr(self, f'layer{i}') 193 | m.eval() 194 | for param in m.parameters(): 195 | param.requires_grad = False 196 | 197 | def train(self, mode=True): 198 | """Convert the model into training mode while keep normalization layer 199 | freezed.""" 200 | super(KW_ResNet, self).train(mode) 201 | self._freeze_stages() 202 | 203 | if mode and self.norm_eval: 204 | for m in self.modules(): 205 | # trick: eval have effect on BatchNorm only 206 | if isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d)): 207 | m.eval() 208 | 209 | def init_weights(self, pretrained=None): 210 | """Initialize the weights in backbone. 211 | Args: 212 | pretrained (str, optional): Path to pre-trained weights. 213 | Defaults to None. 214 | """ 215 | 216 | for m in self.modules(): 217 | if isinstance(m, nn.Conv2d): 218 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 219 | if m.bias is not None: 220 | nn.init.zeros_(m.bias) 221 | elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): 222 | nn.init.constant_(m.weight, 1) 223 | nn.init.constant_(m.bias, 0) 224 | elif isinstance(m, nn.Linear): 225 | nn.init.normal_(m.weight, 0, 0.01) 226 | if m.bias is not None: 227 | nn.init.zeros_(m.bias) 228 | 229 | if isinstance(pretrained, str): 230 | logger = get_root_logger() 231 | load_checkpoint(self, pretrained, strict=True, logger=logger, map_location='cpu') 232 | elif pretrained is not None: 233 | raise TypeError('pretrained must be a str or None') 234 | 235 | def _forward_impl(self, x): 236 | x = self.conv1(x) 237 | x = self.bn1(x) 238 | x = self.relu(x) 239 | x = self.maxpool(x) 240 | outs = [] 241 | for idx in range(4): 242 | layer = getattr(self, f'layer{idx + 1}') 243 | x = layer(x) 244 | if idx in self.out_indices: 245 | outs.append(x) 246 | return tuple(outs) 247 | 248 | 249 | def forward(self, x): 250 | return self._forward_impl(x) 251 | 252 | -------------------------------------------------------------------------------- /detection/mmdet/models/backbones/mobilenetv2.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | from timm.models.registry import register_model 3 | 4 | from ..builder import BACKBONES 5 | from mmcv.runner import load_checkpoint 6 | from mmdet.utils import get_root_logger 7 | 8 | def _make_divisible(v, divisor, min_value=None): 9 | """ 10 | This function is taken from the original tf repo. 11 | It ensures that all layers have a channel number that is divisible by 8 12 | It can be seen here: 13 | https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py 14 | :param v: 15 | :param divisor: 16 | :param min_value: 17 | :return: 18 | """ 19 | if min_value is None: 20 | min_value = divisor 21 | new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) 22 | # Make sure that round down does not go down by more than 10%. 23 | if new_v < 0.9 * v: 24 | new_v += divisor 25 | return new_v 26 | 27 | 28 | class ConvBNReLU(nn.Sequential): 29 | def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1, norm_layer=nn.BatchNorm2d): 30 | padding = (kernel_size - 1) // 2 31 | super(ConvBNReLU, self).__init__( 32 | nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False), 33 | norm_layer(out_planes), 34 | nn.ReLU6(inplace=True) 35 | ) 36 | 37 | 38 | class InvertedResidual(nn.Module): 39 | def __init__(self, inp, oup, stride, expand_ratio, norm_layer=nn.BatchNorm2d): 40 | super(InvertedResidual, self).__init__() 41 | self.stride = stride 42 | hidden_dim = int(round(inp * expand_ratio)) 43 | self.use_res_connect = self.stride == 1 and inp == oup 44 | 45 | layers = [] 46 | if expand_ratio != 1: 47 | # pw 48 | layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1, norm_layer=norm_layer)) 49 | layers.extend([ 50 | # dw 51 | ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim, norm_layer=norm_layer), 52 | # pw-linear 53 | nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), 54 | norm_layer(oup), 55 | ]) 56 | self.conv = nn.Sequential(*layers) 57 | 58 | def forward(self, x): 59 | if self.use_res_connect: 60 | return x + self.conv(x) 61 | else: 62 | return self.conv(x) 63 | 64 | 65 | @BACKBONES.register_module() 66 | class MobileNetV2(nn.Module): 67 | def __init__(self, 68 | num_classes=1000, 69 | width_mult=1.0, 70 | inverted_residual_setting=None, 71 | round_nearest=8, 72 | block=InvertedResidual, 73 | norm_layer=nn.BatchNorm2d, 74 | dropout=0.0, 75 | frozen_stages=0, 76 | out_indices=(0, 1, 2, 3), 77 | norm_eval=True, 78 | **kwargs): 79 | """gr 80 | MobileNet V2 main class 81 | 82 | Args: 83 | num_classes (int): Number of classes 84 | width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount 85 | inverted_residual_setting: Network structure 86 | round_nearest (int): Round the number of channels in each layer to be a multiple of this number 87 | Set to 1 to turn off rounding 88 | block: Module specifying inverted residual building block for mobilenet 89 | norm_layer: Module specifying the normalization layer to use 90 | 91 | """ 92 | super(MobileNetV2, self).__init__() 93 | 94 | input_channel = 32 95 | last_channel = 1280 96 | self.stage_idx = [1, 3, 6, 13, 18] 97 | self.frozen_stages = frozen_stages 98 | self.out_indices = [self.stage_idx[x] for x in out_indices] 99 | self.norm_eval = norm_eval 100 | 101 | if inverted_residual_setting is None: 102 | inverted_residual_setting = [ 103 | # t, c, n, s 104 | [1, 16, 1, 1], 105 | [6, 24, 2, 2], 106 | [6, 32, 3, 2], 107 | [6, 64, 4, 2], 108 | [6, 96, 3, 1], 109 | [6, 160, 3, 2], 110 | [6, 320, 1, 1], 111 | ] 112 | 113 | # only check the first element, assuming user knows t,c,n,s are required 114 | if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4: 115 | raise ValueError("inverted_residual_setting should be non-empty " 116 | "or a 4-element list, got {}".format(inverted_residual_setting)) 117 | 118 | # building first layer 119 | input_channel = _make_divisible(input_channel * width_mult, round_nearest) 120 | self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest) 121 | features = [ConvBNReLU(3, input_channel, stride=2, norm_layer=norm_layer)] 122 | # building inverted residual blocks 123 | for t, c, n, s in inverted_residual_setting: 124 | output_channel = _make_divisible(c * width_mult, round_nearest) 125 | for i in range(n): 126 | stride = s if i == 0 else 1 127 | features.append(block(input_channel, output_channel, stride, expand_ratio=t, norm_layer=norm_layer)) 128 | input_channel = output_channel 129 | # building last several layers 130 | features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1, norm_layer=norm_layer)) 131 | # make it nn.Sequential 132 | self.features = nn.Sequential(*features) 133 | # building classifier 134 | self.classifier = nn.Sequential( 135 | nn.Dropout(dropout), 136 | nn.Linear(self.last_channel, num_classes), 137 | ) 138 | 139 | # weight initialization 140 | for m in self.modules(): 141 | if isinstance(m, nn.Conv2d): 142 | nn.init.kaiming_normal_(m.weight, mode='fan_out') 143 | if m.bias is not None: 144 | nn.init.zeros_(m.bias) 145 | elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): 146 | nn.init.ones_(m.weight) 147 | nn.init.zeros_(m.bias) 148 | elif isinstance(m, nn.Linear): 149 | nn.init.normal_(m.weight, 0, 0.01) 150 | nn.init.zeros_(m.bias) 151 | 152 | def _freeze_stages(self): 153 | if self.frozen_stages >= 0: 154 | for i in range(self.stage_idx[self.frozen_stages] + 1): 155 | m = self.features[i] 156 | m.eval() 157 | for param in m.parameters(): 158 | param.requires_grad = False 159 | 160 | def train(self, mode=True): 161 | """Convert the model into training mode while keep normalization layer 162 | freezed.""" 163 | super(MobileNetV2, self).train(mode) 164 | self._freeze_stages() 165 | 166 | if mode and self.norm_eval: 167 | for m in self.modules(): 168 | # trick: eval have effect on BatchNorm only 169 | if isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d)): 170 | m.eval() 171 | 172 | def init_weights(self, pretrained=None): 173 | """Initialize the weights in backbone. 174 | Args: 175 | pretrained (str, optional): Path to pre-trained weights. 176 | Defaults to None. 177 | """ 178 | 179 | for m in self.modules(): 180 | if isinstance(m, nn.Conv2d): 181 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 182 | if m.bias is not None: 183 | nn.init.zeros_(m.bias) 184 | elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): 185 | nn.init.constant_(m.weight, 1) 186 | nn.init.constant_(m.bias, 0) 187 | elif isinstance(m, nn.Linear): 188 | nn.init.normal_(m.weight, 0, 0.01) 189 | if m.bias is not None: 190 | nn.init.zeros_(m.bias) 191 | 192 | if isinstance(pretrained, str): 193 | logger = get_root_logger() 194 | load_checkpoint(self, pretrained, strict=False, logger=logger, map_location='cpu') 195 | elif pretrained is not None: 196 | raise TypeError('pretrained must be a str or None') 197 | 198 | def _forward_impl(self, x): 199 | outs = [] 200 | for idx, layer in enumerate(self.features): 201 | x = layer(x) 202 | if idx in self.out_indices: 203 | outs.append(x) 204 | return outs 205 | 206 | def forward(self, x): 207 | return self._forward_impl(x) -------------------------------------------------------------------------------- /detection/mmdet/models/backbones/resnet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from ..builder import BACKBONES 3 | from mmcv.runner import load_checkpoint 4 | from mmdet.utils import get_root_logger 5 | 6 | __all__ = ['ResNet'] 7 | 8 | 9 | def conv3x3(in_planes, out_planes, stride=1): 10 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 11 | padding=1, bias=False) 12 | 13 | 14 | def conv1x1(in_planes, out_planes, stride=1): 15 | return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) 16 | 17 | 18 | class BasicBlock(nn.Module): 19 | expansion = 1 20 | 21 | def __init__(self, inplanes, planes, stride=1, downsample=None): 22 | super(BasicBlock, self).__init__() 23 | self.conv1 = conv3x3(inplanes, planes, stride) 24 | self.bn1 = nn.BatchNorm2d(planes) 25 | self.relu = nn.ReLU(inplace=True) 26 | self.conv2 = conv3x3(planes, planes) 27 | self.bn2 = nn.BatchNorm2d(planes) 28 | self.downsample = downsample 29 | self.stride = stride 30 | 31 | def forward(self, x): 32 | identity = x 33 | 34 | out = self.conv1(x) 35 | out = self.bn1(out) 36 | out = self.relu(out) 37 | 38 | out = self.conv2(out) 39 | out = self.bn2(out) 40 | 41 | if self.downsample is not None: 42 | identity = self.downsample(x) 43 | 44 | out += identity 45 | out = self.relu(out) 46 | return out 47 | 48 | 49 | class Bottleneck(nn.Module): 50 | expansion = 4 51 | 52 | def __init__(self, inplanes, planes, stride=1, downsample=None): 53 | super(Bottleneck, self).__init__() 54 | self.conv1 = conv1x1(inplanes, planes) 55 | self.bn1 = nn.BatchNorm2d(planes) 56 | self.conv2 = conv3x3(planes, planes, stride) 57 | self.bn2 = nn.BatchNorm2d(planes) 58 | self.conv3 = conv1x1(planes, planes * self.expansion) 59 | self.bn3 = nn.BatchNorm2d(planes * self.expansion) 60 | self.relu = nn.ReLU(inplace=True) 61 | self.downsample = downsample 62 | self.stride = stride 63 | 64 | def forward(self, x): 65 | identity = x 66 | 67 | out = self.conv1(x) 68 | out = self.bn1(out) 69 | out = self.relu(out) 70 | 71 | out = self.conv2(out) 72 | out = self.bn2(out) 73 | out = self.relu(out) 74 | 75 | out = self.conv3(out) 76 | out = self.bn3(out) 77 | 78 | if self.downsample is not None: 79 | identity = self.downsample(x) 80 | 81 | out += identity 82 | out = self.relu(out) 83 | return out 84 | 85 | 86 | @BACKBONES.register_module() 87 | class ResNet(nn.Module): 88 | arch_settings = { 89 | 18: (BasicBlock, (2, 2, 2, 2)), 90 | 34: (BasicBlock, (3, 4, 6, 3)), 91 | 50: (Bottleneck, (3, 4, 6, 3)), 92 | 101: (Bottleneck, (3, 4, 23, 3)), 93 | 152: (Bottleneck, (3, 8, 36, 3)) 94 | } 95 | 96 | def __init__(self, depth, num_classes=1000, 97 | frozen_stages=0, out_indices=(0, 1, 2, 3), norm_eval=True, **kwargs): 98 | super(ResNet, self).__init__() 99 | block, layers = self.arch_settings[depth] 100 | self.inplanes = 64 101 | self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, 102 | bias=False) 103 | self.bn1 = nn.BatchNorm2d(self.inplanes) 104 | self.relu = nn.ReLU(inplace=True) 105 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 106 | self.layer1 = self._make_layer(block, 64, layers[0]) 107 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 108 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 109 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 110 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) 111 | self.fc = nn.Linear(512 * block.expansion, num_classes) 112 | 113 | self.frozen_stages = frozen_stages 114 | self.out_indices = out_indices 115 | self.norm_eval = norm_eval 116 | 117 | def _make_layer(self, block, planes, blocks, stride=1): 118 | downsample = None 119 | if stride != 1 or self.inplanes != planes * block.expansion: 120 | downsample = nn.Sequential( 121 | conv1x1(self.inplanes, planes * block.expansion, stride), 122 | nn.BatchNorm2d(planes * block.expansion), 123 | ) 124 | 125 | layers = [] 126 | layers.append(block(self.inplanes, planes, stride, downsample)) 127 | self.inplanes = planes * block.expansion 128 | for _ in range(1, blocks): 129 | layers.append(block(self.inplanes, planes)) 130 | 131 | return nn.Sequential(*layers) 132 | 133 | def _freeze_stages(self): 134 | if self.frozen_stages >= 0: 135 | self.bn1.eval() 136 | for m in [self.conv1, self.bn1]: 137 | for param in m.parameters(): 138 | param.requires_grad = False 139 | 140 | for i in range(1, self.frozen_stages + 1): 141 | m = getattr(self, f'layer{i}') 142 | m.eval() 143 | for param in m.parameters(): 144 | param.requires_grad = False 145 | 146 | def train(self, mode=True): 147 | """Convert the model into training mode while keep normalization layer 148 | freezed.""" 149 | super(ResNet, self).train(mode) 150 | self._freeze_stages() 151 | 152 | if mode and self.norm_eval: 153 | for m in self.modules(): 154 | # trick: eval have effect on BatchNorm only 155 | if isinstance(m, nn.BatchNorm2d): 156 | m.eval() 157 | 158 | def init_weights(self, pretrained=None): 159 | """Initialize the weights in backbone. 160 | Args: 161 | pretrained (str, optional): Path to pre-trained weights. 162 | Defaults to None. 163 | """ 164 | for m in self.modules(): 165 | if isinstance(m, nn.Conv2d): 166 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 167 | if m.bias is not None: 168 | nn.init.zeros_(m.bias) 169 | elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): 170 | nn.init.constant_(m.weight, 1) 171 | nn.init.constant_(m.bias, 0) 172 | elif isinstance(m, nn.Linear): 173 | nn.init.normal_(m.weight, 0, 0.01) 174 | nn.init.zeros_(m.bias) 175 | 176 | if isinstance(pretrained, str): 177 | logger = get_root_logger() 178 | load_checkpoint(self, pretrained, strict=True, logger=logger, map_location='cpu') 179 | elif pretrained is not None: 180 | raise TypeError('pretrained must be a str or None') 181 | 182 | def _forward_impl(self, x): 183 | x = self.conv1(x) 184 | x = self.bn1(x) 185 | x = self.relu(x) 186 | x = self.maxpool(x) 187 | 188 | outs = [] 189 | for idx in range(4): 190 | layer = getattr(self, f'layer{idx + 1}') 191 | x = layer(x) 192 | if idx in self.out_indices: 193 | outs.append(x) 194 | return outs 195 | 196 | def forward(self, x): 197 | return self._forward_impl(x) 198 | -------------------------------------------------------------------------------- /engine.py: -------------------------------------------------------------------------------- 1 | import math 2 | from typing import Iterable, Optional 3 | import torch 4 | from timm.data import Mixup 5 | from timm.utils import accuracy, ModelEma 6 | 7 | import utils 8 | 9 | 10 | def get_temperature(iteration, epoch, iter_per_epoch, temp_epoch=20, temp_init_value=30.0, temp_end=0.0): 11 | total_iter = iter_per_epoch * temp_epoch 12 | current_iter = iter_per_epoch * epoch + iteration 13 | temperature = temp_end + max(0, (temp_init_value - temp_end) * ((total_iter - current_iter) / max(1.0, total_iter))) 14 | return temperature 15 | 16 | 17 | def train_one_epoch(model: torch.nn.Module, criterion: torch.nn.Module, 18 | data_loader: Iterable, optimizer: torch.optim.Optimizer, 19 | device: torch.device, epoch: int, loss_scaler, max_norm: float = 0, 20 | model_ema: Optional[ModelEma] = None, mixup_fn: Optional[Mixup] = None, log_writer=None, 21 | wandb_logger=None, start_steps=None, lr_schedule_values=None, wd_schedule_values=None, 22 | num_training_steps_per_epoch=None, update_freq=None, use_amp=False, args=None): 23 | model.train(True) 24 | metric_logger = utils.MetricLogger(delimiter=" ") 25 | metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) 26 | metric_logger.add_meter('min_lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) 27 | header = 'Epoch: [{}]'.format(epoch) 28 | print_freq = 10 29 | 30 | optimizer.zero_grad() 31 | 32 | for data_iter_step, (samples, targets) in enumerate(metric_logger.log_every(data_loader, print_freq, header)): 33 | step = data_iter_step // update_freq 34 | if step >= num_training_steps_per_epoch: 35 | continue 36 | it = start_steps + step # global training iteration 37 | # Update LR & WD for the first acc 38 | if lr_schedule_values is not None or wd_schedule_values is not None and data_iter_step % update_freq == 0: 39 | for i, param_group in enumerate(optimizer.param_groups): 40 | if lr_schedule_values is not None: 41 | param_group["lr"] = lr_schedule_values[it] * param_group["lr_scale"] 42 | if wd_schedule_values is not None and param_group["weight_decay"] > 0: 43 | param_group["weight_decay"] = wd_schedule_values[it] 44 | 45 | if hasattr(model.module, 'net_update_temperature'): 46 | temp = get_temperature(data_iter_step + 1, epoch, len(data_loader), 47 | temp_epoch=args.temp_epoch, temp_init_value=args.temp_init_value) 48 | model.module.net_update_temperature(temp) 49 | 50 | samples = samples.to(device, non_blocking=True) 51 | targets = targets.to(device, non_blocking=True) 52 | 53 | if mixup_fn is not None: 54 | samples, targets = mixup_fn(samples, targets) 55 | 56 | if use_amp: 57 | with torch.cuda.amp.autocast(): 58 | output = model(samples) 59 | loss = criterion(output, targets) 60 | else: # full precision 61 | output = model(samples) 62 | loss = criterion(output, targets) 63 | 64 | loss_value = loss.item() 65 | 66 | if not math.isfinite(loss_value): # this could trigger if using AMP 67 | print("Loss is {}, stopping training".format(loss_value)) 68 | assert math.isfinite(loss_value) 69 | 70 | if use_amp: 71 | # this attribute is added by timm on one optimizer (adahessian) 72 | is_second_order = hasattr(optimizer, 'is_second_order') and optimizer.is_second_order 73 | loss /= update_freq 74 | grad_norm = loss_scaler(loss, optimizer, clip_grad=max_norm, 75 | parameters=model.parameters(), create_graph=is_second_order, 76 | update_grad=(data_iter_step + 1) % update_freq == 0) 77 | if (data_iter_step + 1) % update_freq == 0: 78 | optimizer.zero_grad() 79 | if model_ema is not None: 80 | model_ema.update(model) 81 | else: # full precision 82 | loss /= update_freq 83 | loss.backward() 84 | if (data_iter_step + 1) % update_freq == 0: 85 | optimizer.step() 86 | optimizer.zero_grad() 87 | if model_ema is not None: 88 | model_ema.update(model) 89 | 90 | torch.cuda.synchronize() 91 | 92 | if mixup_fn is None: 93 | class_acc = (output.max(-1)[-1] == targets).float().mean() 94 | else: 95 | class_acc = None 96 | metric_logger.update(loss=loss_value) 97 | metric_logger.update(class_acc=class_acc) 98 | min_lr = 10. 99 | max_lr = 0. 100 | for group in optimizer.param_groups: 101 | min_lr = min(min_lr, group["lr"]) 102 | max_lr = max(max_lr, group["lr"]) 103 | 104 | metric_logger.update(lr=max_lr) 105 | metric_logger.update(min_lr=min_lr) 106 | weight_decay_value = None 107 | for group in optimizer.param_groups: 108 | if group["weight_decay"] > 0: 109 | weight_decay_value = group["weight_decay"] 110 | metric_logger.update(weight_decay=weight_decay_value) 111 | if use_amp: 112 | metric_logger.update(grad_norm=grad_norm) 113 | 114 | if log_writer is not None: 115 | log_writer.update(loss=loss_value, head="loss") 116 | log_writer.update(class_acc=class_acc, head="loss") 117 | log_writer.update(lr=max_lr, head="opt") 118 | log_writer.update(min_lr=min_lr, head="opt") 119 | log_writer.update(weight_decay=weight_decay_value, head="opt") 120 | if use_amp: 121 | log_writer.update(grad_norm=grad_norm, head="opt") 122 | log_writer.set_step() 123 | 124 | if wandb_logger: 125 | wandb_logger._wandb.log({ 126 | 'Rank-0 Batch Wise/train_loss': loss_value, 127 | 'Rank-0 Batch Wise/train_max_lr': max_lr, 128 | 'Rank-0 Batch Wise/train_min_lr': min_lr 129 | }, commit=False) 130 | if class_acc: 131 | wandb_logger._wandb.log({'Rank-0 Batch Wise/train_class_acc': class_acc}, commit=False) 132 | if use_amp: 133 | wandb_logger._wandb.log({'Rank-0 Batch Wise/train_grad_norm': grad_norm}, commit=False) 134 | wandb_logger._wandb.log({'Rank-0 Batch Wise/global_train_step': it}) 135 | 136 | 137 | # gather the stats from all processes 138 | metric_logger.synchronize_between_processes() 139 | print("Averaged stats:", metric_logger) 140 | return {k: meter.global_avg for k, meter in metric_logger.meters.items()} 141 | 142 | 143 | @torch.no_grad() 144 | def evaluate(data_loader, model, device, use_amp=False): 145 | criterion = torch.nn.CrossEntropyLoss() 146 | 147 | metric_logger = utils.MetricLogger(delimiter=" ") 148 | header = 'Test:' 149 | 150 | # switch to evaluation mode 151 | model.eval() 152 | for batch in metric_logger.log_every(data_loader, 10, header): 153 | images = batch[0] 154 | target = batch[-1] 155 | 156 | images = images.to(device, non_blocking=True) 157 | target = target.to(device, non_blocking=True) 158 | 159 | # compute output 160 | if use_amp: 161 | with torch.cuda.amp.autocast(): 162 | output = model(images) 163 | loss = criterion(output, target) 164 | else: 165 | output = model(images) 166 | loss = criterion(output, target) 167 | 168 | acc1, acc5 = accuracy(output, target, topk=(1, 5)) 169 | 170 | batch_size = images.shape[0] 171 | metric_logger.update(loss=loss.item()) 172 | metric_logger.meters['acc1'].update(acc1.item(), n=batch_size) 173 | metric_logger.meters['acc5'].update(acc5.item(), n=batch_size) 174 | # gather the stats from all processes 175 | metric_logger.synchronize_between_processes() 176 | print('* Acc@1 {top1.global_avg:.3f} Acc@5 {top5.global_avg:.3f} loss {losses.global_avg:.3f}' 177 | .format(top1=metric_logger.acc1, top5=metric_logger.acc5, losses=metric_logger.loss)) 178 | 179 | return {k: meter.global_avg for k, meter in metric_logger.meters.items()} 180 | -------------------------------------------------------------------------------- /fig/Fig_Architecture.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSVAI/KernelWarehouse/a4ac17d88aae6f29c9f43635c8d2f3bd41187980/fig/Fig_Architecture.pdf -------------------------------------------------------------------------------- /fig/Fig_Architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSVAI/KernelWarehouse/a4ac17d88aae6f29c9f43635c8d2f3bd41187980/fig/Fig_Architecture.png -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- 1 | from .resnet import resnet18, resnet50, ResNet 2 | from .kw_resnet import kw_resnet18, kw_resnet50, KW_ResNet 3 | from .convnext import convnext_tiny 4 | from .kw_convnext import kw_convnext_tiny 5 | from . mobilenetv2 import mobilenetv2_100, mobilenetv2_050 6 | from .kw_mobilenetv2 import kw_mobilenetv2_100, kw_mobilenetv2_050 7 | from .kw1d2x_mobilenetv2 import kw1d2x_mobilenetv2_100, kw1d2x_mobilenetv2_050 -------------------------------------------------------------------------------- /models/convnext.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from timm.models.layers import trunc_normal_, DropPath 5 | from timm.models.registry import register_model 6 | 7 | 8 | class Block(nn.Module): 9 | r""" ConvNeXt Block. There are two equivalent implementations: 10 | (1) DwConv -> LayerNorm (channels_first) -> 1x1 Conv -> GELU -> 1x1 Conv; all in (N, C, H, W) 11 | (2) DwConv -> Permute to (N, H, W, C); LayerNorm (channels_last) -> Linear -> GELU -> Linear; Permute back 12 | We use (2) as we find it slightly faster in PyTorch 13 | 14 | Args: 15 | dim (int): Number of input channels. 16 | drop_path (float): Stochastic depth rate. Default: 0.0 17 | layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6. 18 | """ 19 | def __init__(self, dim, drop_path=0., layer_scale_init_value=1e-6): 20 | super().__init__() 21 | self.dwconv = nn.Conv2d(dim, dim, kernel_size=7, padding=3, groups=dim) # depthwise conv 22 | self.norm = LayerNorm(dim, eps=1e-6) 23 | self.pwconv1 = nn.Linear(dim, 4 * dim) # pointwise/1x1 convs, implemented with linear layers 24 | self.act = nn.GELU() 25 | self.pwconv2 = nn.Linear(4 * dim, dim) 26 | self.gamma = nn.Parameter(layer_scale_init_value * torch.ones((dim)), 27 | requires_grad=True) if layer_scale_init_value > 0 else None 28 | self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() 29 | 30 | def forward(self, x): 31 | input = x 32 | x = self.dwconv(x) 33 | x = x.permute(0, 2, 3, 1) # (N, C, H, W) -> (N, H, W, C) 34 | x = self.norm(x) 35 | x = self.pwconv1(x) 36 | x = self.act(x) 37 | x = self.pwconv2(x) 38 | if self.gamma is not None: 39 | x = self.gamma * x 40 | x = x.permute(0, 3, 1, 2) # (N, H, W, C) -> (N, C, H, W) 41 | x = input + self.drop_path(x) 42 | return x 43 | 44 | @register_model 45 | class ConvNeXt(nn.Module): 46 | r""" ConvNeXt 47 | A PyTorch impl of : `A ConvNet for the 2020s` - 48 | https://arxiv.org/pdf/2201.03545.pdf 49 | 50 | Args: 51 | in_chans (int): Number of input image channels. Default: 3 52 | num_classes (int): Number of classes for classification head. Default: 1000 53 | depths (tuple(int)): Number of blocks at each stage. Default: [3, 3, 9, 3] 54 | dims (int): Feature dimension at each stage. Default: [96, 192, 384, 768] 55 | drop_path_rate (float): Stochastic depth rate. Default: 0. 56 | layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6. 57 | head_init_scale (float): Init scaling value for classifier weights and biases. Default: 1. 58 | """ 59 | def __init__(self, in_chans=3, num_classes=1000, 60 | depths=[3, 3, 9, 3], dims=[96, 192, 384, 768], drop_path_rate=0., 61 | layer_scale_init_value=1e-6, head_init_scale=1., **kwargs 62 | ): 63 | super().__init__() 64 | 65 | self.downsample_layers = nn.ModuleList() # stem and 3 intermediate downsampling conv layers 66 | stem = nn.Sequential( 67 | nn.Conv2d(in_chans, dims[0], kernel_size=4, stride=4), 68 | LayerNorm(dims[0], eps=1e-6, data_format="channels_first") 69 | ) 70 | self.downsample_layers.append(stem) 71 | for i in range(3): 72 | downsample_layer = nn.Sequential( 73 | LayerNorm(dims[i], eps=1e-6, data_format="channels_first"), 74 | nn.Conv2d(dims[i], dims[i+1], kernel_size=2, stride=2), 75 | ) 76 | self.downsample_layers.append(downsample_layer) 77 | 78 | self.stages = nn.ModuleList() # 4 feature resolution stages, each consisting of multiple residual blocks 79 | dp_rates=[x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))] 80 | cur = 0 81 | for i in range(4): 82 | stage = nn.Sequential( 83 | *[Block(dim=dims[i], drop_path=dp_rates[cur + j], 84 | layer_scale_init_value=layer_scale_init_value) for j in range(depths[i])] 85 | ) 86 | self.stages.append(stage) 87 | cur += depths[i] 88 | 89 | self.norm = nn.LayerNorm(dims[-1], eps=1e-6) # final norm layer 90 | self.head = nn.Linear(dims[-1], num_classes) 91 | 92 | self.apply(self._init_weights) 93 | self.head.weight.data.mul_(head_init_scale) 94 | self.head.bias.data.mul_(head_init_scale) 95 | 96 | def _init_weights(self, m): 97 | if isinstance(m, (nn.Conv2d, nn.Linear)): 98 | trunc_normal_(m.weight, std=.02) 99 | nn.init.constant_(m.bias, 0) 100 | 101 | def forward_features(self, x): 102 | for i in range(4): 103 | x = self.downsample_layers[i](x) 104 | x = self.stages[i](x) 105 | return self.norm(x.mean([-2, -1])) # global average pooling, (N, C, H, W) -> (N, C) 106 | 107 | def forward(self, x): 108 | x = self.forward_features(x) 109 | x = self.head(x) 110 | return x 111 | 112 | 113 | class LayerNorm(nn.Module): 114 | r""" LayerNorm that supports two data formats: channels_last (default) or channels_first. 115 | The ordering of the dimensions in the inputs. channels_last corresponds to inputs with 116 | shape (batch_size, height, width, channels) while channels_first corresponds to inputs 117 | with shape (batch_size, channels, height, width). 118 | """ 119 | def __init__(self, normalized_shape, eps=1e-6, data_format="channels_last"): 120 | super().__init__() 121 | self.weight = nn.Parameter(torch.ones(normalized_shape)) 122 | self.bias = nn.Parameter(torch.zeros(normalized_shape)) 123 | self.eps = eps 124 | self.data_format = data_format 125 | if self.data_format not in ["channels_last", "channels_first"]: 126 | raise NotImplementedError 127 | self.normalized_shape = (normalized_shape, ) 128 | 129 | def forward(self, x): 130 | if self.data_format == "channels_last": 131 | return F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps) 132 | elif self.data_format == "channels_first": 133 | u = x.mean(1, keepdim=True) 134 | s = (x - u).pow(2).mean(1, keepdim=True) 135 | x = (x - u) / torch.sqrt(s + self.eps) 136 | x = self.weight[:, None, None] * x + self.bias[:, None, None] 137 | return x 138 | 139 | 140 | @register_model 141 | def convnext_tiny(**kwargs): 142 | model = ConvNeXt(depths=[3, 3, 9, 3], dims=[96, 192, 384, 768], **kwargs) 143 | return model 144 | 145 | -------------------------------------------------------------------------------- /models/kw1d2x_mobilenetv2.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | from modules.kernel_warehouse import Warehouse_Manager 3 | from timm.models.registry import register_model 4 | 5 | def _make_divisible(v, divisor, min_value=None): 6 | """ 7 | This function is taken from the original tf repo. 8 | It ensures that all layers have a channel number that is divisible by 8 9 | It can be seen here: 10 | https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py 11 | :param v: 12 | :param divisor: 13 | :param min_value: 14 | :return: 15 | """ 16 | if min_value is None: 17 | min_value = divisor 18 | new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) 19 | # Make sure that round down does not go down by more than 10%. 20 | if new_v < 0.9 * v: 21 | new_v += divisor 22 | return new_v 23 | 24 | 25 | class ConvBNReLU(nn.Sequential): 26 | def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1, norm_layer=nn.BatchNorm2d, 27 | warehouse_name=None, warehouse_manager=None, enabled=True): 28 | padding = (kernel_size - 1) // 2 29 | super(ConvBNReLU, self).__init__( 30 | warehouse_manager.reserve(in_planes, out_planes, kernel_size, stride, padding=padding, 31 | groups=groups, bias=False, warehouse_name=warehouse_name, enabled=enabled), 32 | norm_layer(out_planes), 33 | nn.ReLU6(inplace=True) 34 | ) 35 | 36 | 37 | class InvertedResidual(nn.Module): 38 | def __init__(self, inp, oup, stride, expand_ratio, norm_layer=nn.BatchNorm2d, stage_idx=None, layer_idx=None, 39 | warehouse_manager=None, pwlinear_enabled=True): 40 | super(InvertedResidual, self).__init__() 41 | self.stride = stride 42 | assert stride in [1, 2] 43 | hidden_dim = int(round(inp * expand_ratio)) 44 | self.use_res_connect = self.stride == 1 and inp == oup 45 | 46 | layers = [] 47 | if expand_ratio != 1: 48 | # pw 49 | layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1, norm_layer=norm_layer, 50 | warehouse_name='stage{}_layer{}_pwconv{}'.format(stage_idx, layer_idx, 0), 51 | warehouse_manager=warehouse_manager, enabled=pwlinear_enabled)) 52 | layers.extend([ 53 | # dw 54 | ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim, norm_layer=norm_layer, 55 | warehouse_name='stage{}_layer{}_dwconv{}'.format(stage_idx, layer_idx, 0), 56 | warehouse_manager=warehouse_manager), 57 | # pw-linear 58 | warehouse_manager.reserve(hidden_dim, oup, 1, 1, 0, bias=False, 59 | warehouse_name='stage{}_layer{}_pwconv{}'.format(stage_idx, layer_idx, 1), 60 | enabled=pwlinear_enabled), 61 | norm_layer(oup), 62 | ]) 63 | self.conv = nn.Sequential(*layers) 64 | 65 | def forward(self, x): 66 | if self.use_res_connect: 67 | return x + self.conv(x) 68 | else: 69 | return self.conv(x) 70 | 71 | 72 | class KW1d2x_MobileNetV2(nn.Module): 73 | def __init__(self, 74 | num_classes=1000, 75 | width_mult=1.0, 76 | inverted_residual_setting=None, 77 | round_nearest=8, 78 | block=None, 79 | norm_layer=None, 80 | dropout=0.1, 81 | **kwargs): 82 | """gr 83 | MobileNet V2 main class 84 | 85 | Args: 86 | num_classes (int): Number of classes 87 | width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount 88 | inverted_residual_setting: Network structure 89 | round_nearest (int): Round the number of channels in each layer to be a multiple of this number 90 | Set to 1 to turn off rounding 91 | block: Module specifying inverted residual building block for mobilenet 92 | norm_layer: Module specifying the normalization layer to use 93 | 94 | """ 95 | 96 | reduction = 0.03125 97 | cell_num_ratio = (1, 1, 1, 1, 1, 1, 0.5, 1, 0.5, 0.5) 98 | cell_inplane_ratio = (1, 1, 1, 1, 1, 1, 0.5, 1, 0.5, 0.025) 99 | cell_outplane_ratio = (1, 1, 1, 1, 1, 1, 0.5, 1, 0.5, 1) 100 | sharing_range = tuple("layer") 101 | 102 | super(KW1d2x_MobileNetV2, self).__init__() 103 | if block is None: 104 | block = InvertedResidual 105 | 106 | if norm_layer is None: 107 | norm_layer = nn.BatchNorm2d 108 | 109 | input_channel = 32 110 | last_channel = 1280 111 | 112 | if inverted_residual_setting is None: 113 | inverted_residual_setting = [ 114 | # t, c, n, s 115 | [1, 16, 1, 1], 116 | [6, 24, 2, 2], 117 | [6, 32, 3, 2], 118 | [6, 64, 4, 2], 119 | [6, 96, 3, 1], 120 | [6, 160, 3, 2], 121 | [6, 320, 1, 1], 122 | ] 123 | 124 | kw_stage_setting = [1, 2, 3, 4, 5, 6, 6] 125 | pwlinear_enabled = [False, False, False, False, False, True, True] 126 | 127 | self.warehouse_manager = Warehouse_Manager(reduction, cell_num_ratio, cell_inplane_ratio, 128 | cell_outplane_ratio, sharing_range) 129 | 130 | # only check the first element, assuming user knows t,c,n,s are required 131 | if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4: 132 | raise ValueError("inverted_residual_setting should be non-empty " 133 | "or a 4-element list, got {}".format(inverted_residual_setting)) 134 | 135 | # building first layer 136 | input_channel = _make_divisible(input_channel * width_mult, round_nearest) 137 | self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest) 138 | features = [ConvBNReLU(3, input_channel, stride=2, norm_layer=norm_layer, 139 | warehouse_manager=self.warehouse_manager, warehouse_name='stage0_conv0')] 140 | 141 | layer_idx = 0 142 | # building inverted residual blocks 143 | for idx, (t, c, n, s) in enumerate(inverted_residual_setting): 144 | output_channel = _make_divisible(c * width_mult, round_nearest) 145 | for i in range(n): 146 | stride = s if i == 0 else 1 147 | 148 | if i == 0 and idx >0: 149 | handover = kw_stage_setting[idx] != kw_stage_setting[idx - 1] 150 | else: 151 | handover = False 152 | 153 | stage_idx = (kw_stage_setting[idx] - 1) if handover else kw_stage_setting[idx] 154 | features.append(block(input_channel, output_channel, stride, expand_ratio=t, norm_layer=norm_layer, 155 | warehouse_manager=self.warehouse_manager, stage_idx=stage_idx, 156 | layer_idx=layer_idx, pwlinear_enabled=pwlinear_enabled[stage_idx-1])) 157 | 158 | input_channel = output_channel 159 | layer_idx += 1 160 | 161 | if handover: 162 | layer_idx = 0 163 | 164 | # building last several layers 165 | features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1, norm_layer=norm_layer, 166 | warehouse_manager=self.warehouse_manager, 167 | warehouse_name='stage{}_layer{}_pwconv1'.format(kw_stage_setting[-1], layer_idx))) 168 | # make it nn.Sequential 169 | self.features = nn.Sequential(*features) 170 | # building classifier 171 | self.classifier = nn.Sequential( 172 | nn.Dropout(dropout), 173 | self.warehouse_manager.reserve(self.last_channel, num_classes, kernel_size=1, 174 | warehouse_name='classifier', layer_type='linear'), 175 | ) 176 | 177 | # weight initialization 178 | for m in self.modules(): 179 | if isinstance(m, nn.Conv2d): 180 | nn.init.kaiming_normal_(m.weight, mode='fan_out') 181 | if m.bias is not None: 182 | nn.init.zeros_(m.bias) 183 | elif isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d, nn.GroupNorm)): 184 | nn.init.ones_(m.weight) 185 | nn.init.zeros_(m.bias) 186 | elif isinstance(m, nn.Linear): 187 | nn.init.normal_(m.weight, 0, 0.01) 188 | nn.init.zeros_(m.bias) 189 | 190 | self.warehouse_manager.store() 191 | self.warehouse_manager.allocate(self) 192 | 193 | def net_update_temperature(self, temp): 194 | for m in self.modules(): 195 | if hasattr(m, "update_temperature"): 196 | m.update_temperature(temp) 197 | 198 | def _forward_impl(self, x): 199 | # This exists since TorchScript doesn't support inheritance, so the superclass method 200 | # (this one) needs to have a name other than `forward` that can be accessed in a subclass 201 | x = self.features(x) 202 | # Cannot use "squeeze" as batch-size can be 1 => must use reshape with x.shape[0] 203 | x = nn.functional.adaptive_avg_pool2d(x, 1).reshape(x.shape[0], -1) 204 | x = self.classifier(x) 205 | return x 206 | 207 | def forward(self, x): 208 | return self._forward_impl(x) 209 | 210 | 211 | def kw1d2x_mobilenetv2(**kwargs): 212 | model = KW1d2x_MobileNetV2(**kwargs) 213 | return model 214 | 215 | 216 | @register_model 217 | def kw1d2x_mobilenetv2_050(**kwargs): 218 | return kw1d2x_mobilenetv2(width_mult=0.5, **kwargs) 219 | 220 | 221 | @register_model 222 | def kw1d2x_mobilenetv2_100(**kwargs): 223 | return kw1d2x_mobilenetv2(width_mult=1.0, **kwargs) 224 | -------------------------------------------------------------------------------- /models/kw_convnext.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from timm.models.layers import trunc_normal_, DropPath 5 | from timm.models.registry import register_model 6 | from modules.kernel_warehouse import Warehouse_Manager 7 | 8 | 9 | class Block(nn.Module): 10 | r""" ConvNeXt Block. There are two equivalent implementations: 11 | (1) DwConv -> LayerNorm (channels_first) -> 1x1 Conv -> GELU -> 1x1 Conv; all in (N, C, H, W) 12 | (2) DwConv -> Permute to (N, H, W, C); LayerNorm (channels_last) -> Linear -> GELU -> Linear; Permute back 13 | We use (2) as we find it slightly faster in PyTorch 14 | 15 | Args: 16 | dim (int): Number of input channels. 17 | drop_path (float): Stochastic depth rate. Default: 0.0 18 | layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6. 19 | """ 20 | 21 | def __init__(self, dim, drop_path=0., layer_scale_init_value=1e-6, 22 | warehouse_manager=None, stage_idx=-1, layer_idx=-1): 23 | super().__init__() 24 | self.dwconv = warehouse_manager.reserve(dim, dim, kernel_size=7, padding=3, groups=dim, 25 | warehouse_name='stage{}_layer{}_dwconv{}'.format(stage_idx, layer_idx, 0)) # depthwise conv 26 | self.norm = LayerNorm(dim, eps=1e-6) 27 | self.pwconv1 = warehouse_manager.reserve(dim, 4 * dim, kernel_size=1, padding=0, 28 | warehouse_name='stage{}_layer{}_pwconv{}'.format(stage_idx, layer_idx, 1)) # pointwise/1x1 convs, implemented with linear layers 29 | self.act = nn.GELU() 30 | self.pwconv2 = warehouse_manager.reserve(4 * dim, dim, kernel_size=1, padding=0, 31 | warehouse_name='stage{}_layer{}_pwconv{}'.format(stage_idx, layer_idx, 2)) 32 | self.gamma = nn.Parameter(layer_scale_init_value * torch.ones([1, dim, 1, 1]), 33 | requires_grad=True) if layer_scale_init_value > 0 else None 34 | self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() 35 | 36 | def forward(self, x): 37 | input = x 38 | x = self.dwconv(x) 39 | x = x.permute(0, 2, 3, 1) # (N, C, H, W) -> (N, H, W, C) 40 | x = self.norm(x).permute(0, 3, 1, 2) 41 | x = self.pwconv1(x) 42 | x = self.act(x) 43 | x = self.pwconv2(x) 44 | if self.gamma is not None: 45 | x = self.gamma * x 46 | x = input + self.drop_path(x) 47 | return x 48 | 49 | 50 | @register_model 51 | class KW_ConvNeXt(nn.Module): 52 | r""" ConvNeXt 53 | A PyTorch impl of : `A ConvNet for the 2020s` - 54 | https://arxiv.org/pdf/2201.03545.pdf 55 | 56 | Args: 57 | in_chans (int): Number of input image channels. Default: 3 58 | num_classes (int): Number of classes for classification head. Default: 1000 59 | depths (tuple(int)): Number of blocks at each stage. Default: [3, 3, 9, 3] 60 | dims (int): Feature dimension at each stage. Default: [96, 192, 384, 768] 61 | drop_path_rate (float): Stochastic depth rate. Default: 0. 62 | layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6. 63 | head_init_scale (float): Init scaling value for classifier weights and biases. Default: 1. 64 | """ 65 | 66 | def __init__(self, in_chans=3, num_classes=1000, 67 | depths=[3, 3, 9, 3], dims=[96, 192, 384, 768], drop_path_rate=0., 68 | layer_scale_init_value=1e-6, head_init_scale=1., 69 | reduction=0.0625, cell_num_ratio=1, cell_inplane_ratio=1, cell_outplane_ratio=1, 70 | sharing_range=('layer', 'pwconv'), nonlocal_basis_ratio=1, **kwargs 71 | ): 72 | super().__init__() 73 | 74 | self.warehouse_manager = Warehouse_Manager(reduction, cell_num_ratio, cell_inplane_ratio, 75 | cell_outplane_ratio, sharing_range, nonlocal_basis_ratio, 76 | norm_layer=nn.LayerNorm, 77 | ) 78 | self.downsample_layers = nn.ModuleList() # stem and 3 intermediate downsampling conv layers 79 | stem = nn.Sequential( 80 | self.warehouse_manager.reserve(in_chans, dims[0], kernel_size=4, stride=4, 81 | warehouse_name='stage{}_conv0'.format('stem')), 82 | LayerNorm(dims[0], eps=1e-6, data_format="channels_first") 83 | ) 84 | self.downsample_layers.append(stem) 85 | for i in range(3): 86 | downsample_layer = nn.Sequential( 87 | LayerNorm(dims[i], eps=1e-6, data_format="channels_first"), 88 | self.warehouse_manager.reserve(dims[i], dims[i + 1], kernel_size=2, stride=2, 89 | warehouse_name='stage{}_layer{}_conv0'.format(i, 'ds')), 90 | ) 91 | self.downsample_layers.append(downsample_layer) 92 | 93 | self.stages = nn.ModuleList() # 4 feature resolution stages, each consisting of multiple residual blocks 94 | dp_rates = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))] 95 | cur = 0 96 | for i in range(4): 97 | stage = nn.Sequential( 98 | *[Block(dim=dims[i], drop_path=dp_rates[cur + j], 99 | layer_scale_init_value=layer_scale_init_value, 100 | warehouse_manager=self.warehouse_manager, 101 | stage_idx=i, layer_idx=j, 102 | ) for j in range(depths[i])] 103 | ) 104 | self.stages.append(stage) 105 | cur += depths[i] 106 | 107 | self.norm = nn.LayerNorm(dims[-1], eps=1e-6) # final norm layer 108 | self.head = nn.Linear(dims[-1], num_classes) 109 | 110 | self.apply(self._init_weights) 111 | self.head.weight.data.mul_(head_init_scale) 112 | self.head.bias.data.mul_(head_init_scale) 113 | 114 | self.warehouse_manager.store() 115 | self.warehouse_manager.allocate(self) 116 | 117 | def _init_weights(self, m): 118 | if isinstance(m, (nn.Conv2d, nn.Linear)): 119 | trunc_normal_(m.weight, std=.02) 120 | nn.init.constant_(m.bias, 0) 121 | 122 | def net_update_temperature(self, temp): 123 | for m in self.modules(): 124 | if hasattr(m, "update_temperature"): 125 | m.update_temperature(temp) 126 | 127 | def forward_features(self, x): 128 | for i in range(4): 129 | x = self.downsample_layers[i](x) 130 | x = self.stages[i](x) 131 | return self.norm(x.mean([-2, -1])) # global average pooling, (N, C, H, W) -> (N, C) 132 | 133 | def forward(self, x): 134 | x = self.forward_features(x) 135 | x = self.head(x) 136 | return x 137 | 138 | 139 | class LayerNorm(nn.Module): 140 | r""" LayerNorm that supports two data formats: channels_last (default) or channels_first. 141 | The ordering of the dimensions in the inputs. channels_last corresponds to inputs with 142 | shape (batch_size, height, width, channels) while channels_first corresponds to inputs 143 | with shape (batch_size, channels, height, width). 144 | """ 145 | 146 | def __init__(self, normalized_shape, eps=1e-6, data_format="channels_last"): 147 | super().__init__() 148 | self.weight = nn.Parameter(torch.ones(normalized_shape)) 149 | self.bias = nn.Parameter(torch.zeros(normalized_shape)) 150 | self.eps = eps 151 | self.data_format = data_format 152 | if self.data_format not in ["channels_last", "channels_first"]: 153 | raise NotImplementedError 154 | self.normalized_shape = (normalized_shape,) 155 | 156 | def forward(self, x): 157 | if self.data_format == "channels_last": 158 | return F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps) 159 | elif self.data_format == "channels_first": 160 | u = x.mean(1, keepdim=True) 161 | s = (x - u).pow(2).mean(1, keepdim=True) 162 | x = (x - u) / torch.sqrt(s + self.eps) 163 | x = self.weight[:, None, None] * x + self.bias[:, None, None] 164 | return x 165 | 166 | 167 | @register_model 168 | def kw_convnext_tiny(pretrained=False, in_22k=False, **kwargs): 169 | model = KW_ConvNeXt(depths=[3, 3, 9, 3], dims=[96, 192, 384, 768], **kwargs) 170 | return model -------------------------------------------------------------------------------- /models/kw_mobilenetv2.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | from modules.kernel_warehouse import Warehouse_Manager 3 | from timm.models.registry import register_model 4 | 5 | 6 | def _make_divisible(v, divisor, min_value=None): 7 | """ 8 | This function is taken from the original tf repo. 9 | It ensures that all layers have a channel number that is divisible by 8 10 | It can be seen here: 11 | https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py 12 | :param v: 13 | :param divisor: 14 | :param min_value: 15 | :return: 16 | """ 17 | if min_value is None: 18 | min_value = divisor 19 | new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) 20 | # Make sure that round down does not go down by more than 10%. 21 | if new_v < 0.9 * v: 22 | new_v += divisor 23 | return new_v 24 | 25 | 26 | class ConvBNReLU(nn.Sequential): 27 | def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1, norm_layer=nn.BatchNorm2d, 28 | warehouse_name=None, warehouse_manager=None, enabled=True): 29 | padding = (kernel_size - 1) // 2 30 | super(ConvBNReLU, self).__init__( 31 | warehouse_manager.reserve(in_planes, out_planes, kernel_size, stride, padding=padding, 32 | groups=groups, bias=False, warehouse_name=warehouse_name, enabled=enabled), 33 | norm_layer(out_planes), 34 | nn.ReLU6(inplace=True) 35 | ) 36 | 37 | 38 | class InvertedResidual(nn.Module): 39 | def __init__(self, inp, oup, stride, expand_ratio, norm_layer=nn.BatchNorm2d, stage_idx=None, layer_idx=None, 40 | warehouse_manager=None): 41 | super(InvertedResidual, self).__init__() 42 | self.stride = stride 43 | assert stride in [1, 2] 44 | hidden_dim = int(round(inp * expand_ratio)) 45 | self.use_res_connect = self.stride == 1 and inp == oup 46 | 47 | layers = [] 48 | if expand_ratio != 1: 49 | # pw 50 | layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1, norm_layer=norm_layer, 51 | warehouse_name='stage{}_layer{}_pwconv{}'.format(stage_idx, layer_idx, 0), 52 | warehouse_manager=warehouse_manager)) 53 | 54 | layers.extend([ 55 | # dw 56 | ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim, norm_layer=norm_layer, 57 | warehouse_name='stage{}_layer{}_dwconv{}'.format(stage_idx, layer_idx, 0), 58 | warehouse_manager=warehouse_manager), 59 | # pw-linear 60 | warehouse_manager.reserve(hidden_dim, oup, 1, 1, 0, bias=False, 61 | warehouse_name='stage{}_layer{}_pwconv{}'.format(stage_idx, layer_idx, 1)), 62 | norm_layer(oup), 63 | ]) 64 | self.conv = nn.Sequential(*layers) 65 | 66 | def forward(self, x): 67 | if self.use_res_connect: 68 | return x + self.conv(x) 69 | else: 70 | return self.conv(x) 71 | 72 | 73 | class KW_MobileNetV2(nn.Module): 74 | def __init__(self, 75 | num_classes=1000, 76 | width_mult=1.0, 77 | inverted_residual_setting=None, 78 | round_nearest=8, 79 | block=None, 80 | norm_layer=None, 81 | dropout=0.0, 82 | reduction=0.0625, 83 | cell_num_ratio=1, 84 | cell_inplane_ratio=1, 85 | cell_outplane_ratio=1, 86 | sharing_range=None, 87 | nonlocal_basis_ratio=1, 88 | **kwargs): 89 | """gr 90 | MobileNet V2 main class 91 | 92 | Args: 93 | num_classes (int): Number of classes 94 | width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount 95 | inverted_residual_setting: Network structure 96 | round_nearest (int): Round the number of channels in each layer to be a multiple of this number 97 | Set to 1 to turn off rounding 98 | block: Module specifying inverted residual building block for mobilenet 99 | norm_layer: Module specifying the normalization layer to use 100 | 101 | """ 102 | super(KW_MobileNetV2, self).__init__() 103 | if block is None: 104 | block = InvertedResidual 105 | 106 | if norm_layer is None: 107 | norm_layer = nn.BatchNorm2d 108 | 109 | input_channel = 32 110 | last_channel = 1280 111 | 112 | if inverted_residual_setting is None: 113 | inverted_residual_setting = [ 114 | # t, c, n, s 115 | [1, 16, 1, 1], 116 | [6, 24, 2, 2], 117 | [6, 32, 3, 2], 118 | [6, 64, 4, 2], 119 | [6, 96, 3, 1], 120 | [6, 160, 3, 2], 121 | [6, 320, 1, 1], 122 | ] 123 | 124 | kw_stage_setting = [1, 2, 3, 4, 5, 6, 6] 125 | 126 | self.warehouse_manager = Warehouse_Manager(reduction, cell_num_ratio, cell_inplane_ratio, 127 | cell_outplane_ratio, sharing_range, nonlocal_basis_ratio) 128 | 129 | # only check the first element, assuming user knows t,c,n,s are required 130 | if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4: 131 | raise ValueError("inverted_residual_setting should be non-empty " 132 | "or a 4-element list, got {}".format(inverted_residual_setting)) 133 | 134 | # building first layer 135 | input_channel = _make_divisible(input_channel * width_mult, round_nearest) 136 | self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest) 137 | features = [ConvBNReLU(3, input_channel, stride=2, norm_layer=norm_layer, 138 | warehouse_manager=self.warehouse_manager, warehouse_name='stage0_conv0')] 139 | 140 | layer_idx = 0 141 | # building inverted residual blocks 142 | for idx, (t, c, n, s) in enumerate(inverted_residual_setting): 143 | output_channel = _make_divisible(c * width_mult, round_nearest) 144 | for i in range(n): 145 | stride = s if i == 0 else 1 146 | 147 | if i == 0 and idx > 0: 148 | handover = kw_stage_setting[idx] != kw_stage_setting[idx - 1] 149 | else: 150 | handover = False 151 | 152 | stage_idx = (kw_stage_setting[idx] - 1) if handover else kw_stage_setting[idx] 153 | 154 | features.append(block(input_channel, output_channel, stride, expand_ratio=t, norm_layer=norm_layer, 155 | warehouse_manager=self.warehouse_manager, stage_idx=stage_idx, 156 | layer_idx=layer_idx)) 157 | 158 | input_channel = output_channel 159 | layer_idx += 1 160 | 161 | if handover: 162 | layer_idx = 0 163 | 164 | # building last several layers 165 | features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1, norm_layer=norm_layer, 166 | warehouse_manager=self.warehouse_manager, 167 | warehouse_name='stage{}_layer{}_pwconv1'.format(kw_stage_setting[-1], layer_idx))) 168 | # make it nn.Sequential 169 | self.features = nn.Sequential(*features) 170 | # building classifier 171 | self.dropout = nn.Dropout(dropout) if dropout > 0 else nn.Identity() 172 | self.classifier = nn.Linear(self.last_channel, num_classes, bias=True) 173 | 174 | # weight initialization 175 | for m in self.modules(): 176 | if isinstance(m, nn.Conv2d): 177 | nn.init.kaiming_normal_(m.weight, mode='fan_out') 178 | if m.bias is not None: 179 | nn.init.zeros_(m.bias) 180 | elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): 181 | nn.init.ones_(m.weight) 182 | nn.init.zeros_(m.bias) 183 | elif isinstance(m, nn.Linear): 184 | nn.init.normal_(m.weight, 0, 0.01) 185 | nn.init.zeros_(m.bias) 186 | 187 | self.warehouse_manager.store() 188 | self.warehouse_manager.allocate(self) 189 | 190 | def net_update_temperature(self, temp): 191 | for m in self.modules(): 192 | if hasattr(m, "update_temperature"): 193 | m.update_temperature(temp) 194 | 195 | def _forward_impl(self, x): 196 | # This exists since TorchScript doesn't support inheritance, so the superclass method 197 | # (this one) needs to have a name other than `forward` that can be accessed in a subclass 198 | x = self.features(x) 199 | # Cannot use "squeeze" as batch-size can be 1 => must use reshape with x.shape[0] 200 | x = nn.functional.adaptive_avg_pool2d(x, 1).reshape(x.shape[0], -1) 201 | x = self.dropout(x) 202 | x = self.classifier(x) 203 | return x 204 | 205 | def forward(self, x): 206 | return self._forward_impl(x) 207 | 208 | 209 | def kw_mobilenetv2(**kwargs): 210 | model = KW_MobileNetV2(**kwargs) 211 | return model 212 | 213 | 214 | @register_model 215 | def kw_mobilenetv2_050(**kwargs): 216 | return kw_mobilenetv2(width_mult=0.5, **kwargs) 217 | 218 | 219 | @register_model 220 | def kw_mobilenetv2_100(**kwargs): 221 | return kw_mobilenetv2(width_mult=1.0, **kwargs) 222 | -------------------------------------------------------------------------------- /models/kw_resnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from modules.kernel_warehouse import Warehouse_Manager 4 | from timm.models.layers import DropPath 5 | from timm.models.registry import register_model 6 | 7 | 8 | def kwconv3x3(in_planes, out_planes, stride=1, warehouse_name=None, warehouse_manager=None, enabled=True): 9 | return warehouse_manager.reserve(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, 10 | warehouse_name=warehouse_name, enabled=enabled, bias=False) 11 | 12 | 13 | def kwconv1x1(in_planes, out_planes, stride=1, warehouse_name=None, warehouse_manager=None, enabled=True): 14 | return warehouse_manager.reserve(in_planes, out_planes, kernel_size=1, stride=stride, padding=0, 15 | warehouse_name=warehouse_name, enabled=enabled, bias=False) 16 | 17 | 18 | class BasicBlock(nn.Module): 19 | expansion = 1 20 | 21 | def __init__(self, inplanes, planes, stride=1, downsample=None, 22 | stage_idx=None, layer_idx=None, warehouse_manager=None, warehouse_handover=False, drop_path=0.): 23 | super(BasicBlock, self).__init__() 24 | conv1_stage_idx = max(stage_idx - 1 if warehouse_handover else stage_idx, 0) 25 | self.conv1 = kwconv3x3(inplanes, planes, stride, 26 | warehouse_name='stage{}_layer{}_conv{}'.format(conv1_stage_idx, layer_idx, 0), 27 | warehouse_manager=warehouse_manager) 28 | self.bn1 = nn.BatchNorm2d(planes) 29 | self.relu = nn.ReLU(inplace=True) 30 | layer_idx = 0 if warehouse_handover else layer_idx 31 | self.conv2 = kwconv3x3(planes, planes, 32 | warehouse_name='stage{}_layer{}_conv{}'.format(stage_idx, layer_idx, 1), 33 | warehouse_manager=warehouse_manager) 34 | self.bn2 = nn.BatchNorm2d(planes) 35 | self.downsample = downsample 36 | self.stride = stride 37 | self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() 38 | 39 | def forward(self, x): 40 | identity = x 41 | 42 | out = self.conv1(x) 43 | out = self.bn1(out) 44 | out = self.relu(out) 45 | 46 | out = self.conv2(out) 47 | out = self.bn2(out) 48 | 49 | if self.downsample is not None: 50 | identity = self.downsample(x) 51 | 52 | out = identity + self.drop_path(out) 53 | out = self.relu(out) 54 | return out 55 | 56 | 57 | class Bottleneck(nn.Module): 58 | expansion = 4 59 | 60 | def __init__(self, inplanes, planes, stride=1, downsample=None, 61 | stage_idx=None, layer_idx=None, warehouse_manager=None, warehouse_handover=False, drop_path=0.): 62 | super(Bottleneck, self).__init__() 63 | conv1_stage_idx = stage_idx - 1 if warehouse_handover else stage_idx 64 | self.conv1 = kwconv1x1(inplanes, planes, 65 | warehouse_name='stage{}_layer{}_conv{}'.format(conv1_stage_idx, layer_idx, 0), 66 | warehouse_manager=warehouse_manager, enabled=(conv1_stage_idx >= 0)) 67 | self.bn1 = nn.BatchNorm2d(planes) 68 | layer_idx = 0 if warehouse_handover else layer_idx 69 | self.conv2 = kwconv3x3(planes, planes, stride, 70 | warehouse_name='stage{}_layer{}_conv{}'.format(stage_idx, layer_idx, 1), 71 | warehouse_manager=warehouse_manager) 72 | self.bn2 = nn.BatchNorm2d(planes) 73 | self.conv3 = kwconv1x1(planes, planes * self.expansion, 74 | warehouse_name='stage{}_layer{}_conv{}'.format(stage_idx, layer_idx, 2), 75 | warehouse_manager=warehouse_manager) 76 | self.bn3 = nn.BatchNorm2d(planes * self.expansion) 77 | self.relu = nn.ReLU(inplace=True) 78 | self.downsample = downsample 79 | self.stride = stride 80 | self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() 81 | 82 | def forward(self, x): 83 | identity = x 84 | 85 | out = self.conv1(x) 86 | out = self.bn1(out) 87 | out = self.relu(out) 88 | 89 | out = self.conv2(out) 90 | out = self.bn2(out) 91 | out = self.relu(out) 92 | 93 | out = self.conv3(out) 94 | out = self.bn3(out) 95 | 96 | if self.downsample is not None: 97 | identity = self.downsample(x) 98 | 99 | out = identity + self.drop_path(out) 100 | out = self.relu(out) 101 | return out 102 | 103 | @register_model 104 | class KW_ResNet(nn.Module): 105 | def __init__(self, block, layers, num_classes=1000, dropout=0.1, reduction=0.0625, 106 | cell_num_ratio=1, cell_inplane_ratio=1, cell_outplane_ratio=1, 107 | sharing_range=('layer', 'conv'), nonlocal_basis_ratio=1, drop_path_rate=0., **kwargs): 108 | super(KW_ResNet, self).__init__() 109 | self.warehouse_manager = Warehouse_Manager(reduction, cell_num_ratio, cell_inplane_ratio, cell_outplane_ratio, 110 | sharing_range, nonlocal_basis_ratio) 111 | self.inplanes = 64 112 | self.layer_idx = 0 113 | self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False) 114 | self.bn1 = nn.BatchNorm2d(self.inplanes) 115 | self.relu = nn.ReLU(inplace=True) 116 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 117 | self.layer1 = self._make_layer(block, 64, layers[0], 118 | stage_idx=0, warehouse_manager=self.warehouse_manager, drop_path=drop_path_rate) 119 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2, 120 | stage_idx=1, warehouse_manager=self.warehouse_manager, drop_path=drop_path_rate) 121 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2, 122 | stage_idx=2, warehouse_manager=self.warehouse_manager, drop_path=drop_path_rate) 123 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2, 124 | stage_idx=3, warehouse_manager=self.warehouse_manager, drop_path=drop_path_rate) 125 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) 126 | self.dropout = nn.Dropout(p=dropout) 127 | self.fc = nn.Linear(512 * block.expansion, num_classes) 128 | 129 | for m in self.modules(): 130 | if isinstance(m, nn.Conv2d): 131 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 132 | elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): 133 | nn.init.constant_(m.weight, 1) 134 | nn.init.constant_(m.bias, 0) 135 | 136 | self.warehouse_manager.store() 137 | self.warehouse_manager.allocate(self) 138 | 139 | def _make_layer(self, block, planes, blocks, stride=1, stage_idx=-1, warehouse_manager=None, drop_path=0.): 140 | downsample = None 141 | if stride != 1 or self.inplanes != planes * block.expansion: 142 | downsample = nn.Sequential( 143 | warehouse_manager.reserve( 144 | self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, padding=0, 145 | warehouse_name='stage{}_layer{}_conv{}'.format(stage_idx - 1, self.layer_idx + 1, 0), 146 | enabled=(stride != 1), bias=False), 147 | nn.BatchNorm2d(planes * block.expansion), 148 | ) 149 | 150 | layers = [] 151 | layers.append(block(self.inplanes, planes, stride, downsample, stage_idx=stage_idx, layer_idx=self.layer_idx, 152 | warehouse_manager=warehouse_manager, warehouse_handover=True, drop_path=drop_path)) 153 | self.layer_idx = 1 154 | self.inplanes = planes * block.expansion 155 | for idx in range(1, blocks): 156 | layers.append(block(self.inplanes, planes, stage_idx=stage_idx, layer_idx=self.layer_idx, 157 | warehouse_manager=warehouse_manager, drop_path=drop_path)) 158 | self.layer_idx += 1 159 | return nn.Sequential(*layers) 160 | 161 | def net_update_temperature(self, temp): 162 | for m in self.modules(): 163 | if hasattr(m, "update_temperature"): 164 | m.update_temperature(temp) 165 | 166 | def _forward_impl(self, x): 167 | x = self.conv1(x) 168 | x = self.bn1(x) 169 | x = self.relu(x) 170 | x = self.maxpool(x) 171 | 172 | x = self.layer1(x) 173 | x = self.layer2(x) 174 | x = self.layer3(x) 175 | x = self.layer4(x) 176 | 177 | x = self.avgpool(x) 178 | x = torch.flatten(x, 1) 179 | x = self.dropout(x) 180 | x = self.fc(x) 181 | return x 182 | 183 | def forward(self, x): 184 | return self._forward_impl(x) 185 | 186 | 187 | @register_model 188 | def kw_resnet18(**kwargs): 189 | model = KW_ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) 190 | return model 191 | 192 | @register_model 193 | def kw_resnet50(**kwargs): 194 | model = KW_ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) 195 | return model 196 | 197 | -------------------------------------------------------------------------------- /models/mobilenetv2.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | from timm.models.registry import register_model 3 | 4 | 5 | def _make_divisible(v, divisor, min_value=None): 6 | """ 7 | This function is taken from the original tf repo. 8 | It ensures that all layers have a channel number that is divisible by 8 9 | It can be seen here: 10 | https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py 11 | :param v: 12 | :param divisor: 13 | :param min_value: 14 | :return: 15 | """ 16 | if min_value is None: 17 | min_value = divisor 18 | new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) 19 | # Make sure that round down does not go down by more than 10%. 20 | if new_v < 0.9 * v: 21 | new_v += divisor 22 | return new_v 23 | 24 | 25 | class ConvBNReLU(nn.Sequential): 26 | def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1, norm_layer=nn.BatchNorm2d): 27 | padding = (kernel_size - 1) // 2 28 | super(ConvBNReLU, self).__init__( 29 | nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False), 30 | norm_layer(out_planes), 31 | nn.ReLU6(inplace=True) 32 | ) 33 | 34 | 35 | class InvertedResidual(nn.Module): 36 | def __init__(self, inp, oup, stride, expand_ratio, norm_layer=nn.BatchNorm2d): 37 | super(InvertedResidual, self).__init__() 38 | self.stride = stride 39 | hidden_dim = int(round(inp * expand_ratio)) 40 | self.use_res_connect = self.stride == 1 and inp == oup 41 | 42 | layers = [] 43 | if expand_ratio != 1: 44 | # pw 45 | layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1, norm_layer=norm_layer)) 46 | layers.extend([ 47 | # dw 48 | ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim, norm_layer=norm_layer), 49 | # pw-linear 50 | nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), 51 | norm_layer(oup), 52 | ]) 53 | self.conv = nn.Sequential(*layers) 54 | 55 | def forward(self, x): 56 | if self.use_res_connect: 57 | return x + self.conv(x) 58 | else: 59 | return self.conv(x) 60 | 61 | 62 | class MobileNetV2(nn.Module): 63 | def __init__(self, 64 | num_classes=1000, 65 | width_mult=1.0, 66 | inverted_residual_setting=None, 67 | round_nearest=8, 68 | block=InvertedResidual, 69 | norm_layer=nn.BatchNorm2d, 70 | dropout=0.0, 71 | **kwargs): 72 | """gr 73 | MobileNet V2 main class 74 | 75 | Args: 76 | num_classes (int): Number of classes 77 | width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount 78 | inverted_residual_setting: Network structure 79 | round_nearest (int): Round the number of channels in each layer to be a multiple of this number 80 | Set to 1 to turn off rounding 81 | block: Module specifying inverted residual building block for mobilenet 82 | norm_layer: Module specifying the normalization layer to use 83 | 84 | """ 85 | super(MobileNetV2, self).__init__() 86 | 87 | input_channel = 32 88 | last_channel = 1280 89 | 90 | if inverted_residual_setting is None: 91 | inverted_residual_setting = [ 92 | # t, c, n, s 93 | [1, 16, 1, 1], 94 | [6, 24, 2, 2], 95 | [6, 32, 3, 2], 96 | [6, 64, 4, 2], 97 | [6, 96, 3, 1], 98 | [6, 160, 3, 2], 99 | [6, 320, 1, 1], 100 | ] 101 | 102 | # only check the first element, assuming user knows t,c,n,s are required 103 | if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4: 104 | raise ValueError("inverted_residual_setting should be non-empty " 105 | "or a 4-element list, got {}".format(inverted_residual_setting)) 106 | 107 | # building first layer 108 | input_channel = _make_divisible(input_channel * width_mult, round_nearest) 109 | self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest) 110 | features = [ConvBNReLU(3, input_channel, stride=2, norm_layer=norm_layer)] 111 | # building inverted residual blocks 112 | for t, c, n, s in inverted_residual_setting: 113 | output_channel = _make_divisible(c * width_mult, round_nearest) 114 | for i in range(n): 115 | stride = s if i == 0 else 1 116 | features.append(block(input_channel, output_channel, stride, expand_ratio=t, norm_layer=norm_layer)) 117 | input_channel = output_channel 118 | # building last several layers 119 | features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1, norm_layer=norm_layer)) 120 | # make it nn.Sequential 121 | self.features = nn.Sequential(*features) 122 | # building classifier 123 | self.classifier = nn.Sequential( 124 | nn.Dropout(dropout), 125 | nn.Linear(self.last_channel, num_classes), 126 | ) 127 | 128 | # weight initialization 129 | for m in self.modules(): 130 | if isinstance(m, nn.Conv2d): 131 | nn.init.kaiming_normal_(m.weight, mode='fan_out') 132 | if m.bias is not None: 133 | nn.init.zeros_(m.bias) 134 | elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): 135 | nn.init.ones_(m.weight) 136 | nn.init.zeros_(m.bias) 137 | elif isinstance(m, nn.Linear): 138 | nn.init.normal_(m.weight, 0, 0.01) 139 | nn.init.zeros_(m.bias) 140 | 141 | def _forward_impl(self, x): 142 | # This exists since TorchScript doesn't support inheritance, so the superclass method 143 | # (this one) needs to have a name other than `forward` that can be accessed in a subclass 144 | x = self.features(x) 145 | # Cannot use "squeeze" as batch-size can be 1 => must use reshape with x.shape[0] 146 | x = nn.functional.adaptive_avg_pool2d(x, 1).reshape(x.shape[0], -1) 147 | x = self.classifier(x) 148 | return x 149 | 150 | def forward(self, x): 151 | return self._forward_impl(x) 152 | 153 | 154 | def mobilenet_v2(**kwargs): 155 | model = MobileNetV2(**kwargs) 156 | return model 157 | 158 | 159 | @register_model 160 | def mobilenetv2_050(**kwargs): 161 | return mobilenet_v2(width_mult=0.5, **kwargs) 162 | 163 | 164 | @register_model 165 | def mobilenetv2_100(**kwargs): 166 | return mobilenet_v2(width_mult=1.0, **kwargs) 167 | -------------------------------------------------------------------------------- /models/resnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from timm.models.layers import DropPath 4 | from timm.models.registry import register_model 5 | 6 | 7 | def conv3x3(in_planes, out_planes, stride=1): 8 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 9 | padding=1, bias=False) 10 | 11 | 12 | def conv1x1(in_planes, out_planes, stride=1): 13 | return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) 14 | 15 | 16 | class BasicBlock(nn.Module): 17 | expansion = 1 18 | 19 | def __init__(self, inplanes, planes, stride=1, downsample=None, drop_path=0.): 20 | super(BasicBlock, self).__init__() 21 | self.conv1 = conv3x3(inplanes, planes, stride) 22 | self.bn1 = nn.BatchNorm2d(planes) 23 | self.relu = nn.ReLU(inplace=True) 24 | self.conv2 = conv3x3(planes, planes) 25 | self.bn2 = nn.BatchNorm2d(planes) 26 | self.downsample = downsample 27 | self.stride = stride 28 | self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() 29 | 30 | def forward(self, x): 31 | identity = x 32 | 33 | out = self.conv1(x) 34 | out = self.bn1(out) 35 | out = self.relu(out) 36 | 37 | out = self.conv2(out) 38 | out = self.bn2(out) 39 | 40 | if self.downsample is not None: 41 | identity = self.downsample(x) 42 | 43 | out = identity + self.drop_path(out) 44 | out = self.relu(out) 45 | return out 46 | 47 | 48 | class Bottleneck(nn.Module): 49 | expansion = 4 50 | 51 | def __init__(self, inplanes, planes, stride=1, downsample=None, drop_path=0.): 52 | super(Bottleneck, self).__init__() 53 | self.conv1 = conv1x1(inplanes, planes) 54 | self.bn1 = nn.BatchNorm2d(planes) 55 | self.conv2 = conv3x3(planes, planes, stride) 56 | self.bn2 = nn.BatchNorm2d(planes) 57 | self.conv3 = conv1x1(planes, planes * self.expansion) 58 | self.bn3 = nn.BatchNorm2d(planes * self.expansion) 59 | self.relu = nn.ReLU(inplace=True) 60 | self.downsample = downsample 61 | self.stride = stride 62 | self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() 63 | 64 | def forward(self, x): 65 | identity = x 66 | 67 | out = self.conv1(x) 68 | out = self.bn1(out) 69 | out = self.relu(out) 70 | 71 | out = self.conv2(out) 72 | out = self.bn2(out) 73 | out = self.relu(out) 74 | 75 | out = self.conv3(out) 76 | out = self.bn3(out) 77 | 78 | if self.downsample is not None: 79 | identity = self.downsample(x) 80 | 81 | out = identity + self.drop_path(out) 82 | out = self.relu(out) 83 | return out 84 | 85 | 86 | class ResNet(nn.Module): 87 | def __init__(self, block, layers, num_classes=1000, drop_path_rate=0., **kwargs): 88 | super(ResNet, self).__init__() 89 | self.inplanes = 64 90 | self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, 91 | bias=False) 92 | self.bn1 = nn.BatchNorm2d(self.inplanes) 93 | self.relu = nn.ReLU(inplace=True) 94 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 95 | self.layer1 = self._make_layer(block, 64, layers[0], drop_path=drop_path_rate) 96 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2, drop_path=drop_path_rate) 97 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2, drop_path=drop_path_rate) 98 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2, drop_path=drop_path_rate) 99 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) 100 | self.fc = nn.Linear(512 * block.expansion, num_classes) 101 | 102 | for m in self.modules(): 103 | if isinstance(m, nn.Conv2d): 104 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 105 | if m.bias is not None: 106 | nn.init.zeros_(m.bias) 107 | elif isinstance(m, nn.BatchNorm2d): 108 | nn.init.constant_(m.weight, 1) 109 | nn.init.constant_(m.bias, 0) 110 | elif isinstance(m, nn.Linear): 111 | nn.init.normal_(m.weight, 0, 0.01) 112 | nn.init.zeros_(m.bias) 113 | 114 | def _make_layer(self, block, planes, blocks, stride=1, drop_path=None): 115 | downsample = None 116 | if stride != 1 or self.inplanes != planes * block.expansion: 117 | downsample = nn.Sequential( 118 | conv1x1(self.inplanes, planes * block.expansion, stride), 119 | nn.BatchNorm2d(planes * block.expansion), 120 | ) 121 | 122 | layers = [] 123 | layers.append(block(self.inplanes, planes, stride, downsample, drop_path=drop_path)) 124 | self.inplanes = planes * block.expansion 125 | for _ in range(1, blocks): 126 | layers.append(block(self.inplanes, planes, drop_path=drop_path)) 127 | 128 | return nn.Sequential(*layers) 129 | 130 | def _forward_impl(self, x): 131 | x = self.conv1(x) 132 | x = self.bn1(x) 133 | x = self.relu(x) 134 | x = self.maxpool(x) 135 | 136 | x = self.layer1(x) 137 | x = self.layer2(x) 138 | x = self.layer3(x) 139 | x = self.layer4(x) 140 | 141 | x = self.avgpool(x) 142 | x = torch.flatten(x, 1) 143 | x = self.fc(x) 144 | return x 145 | 146 | def forward(self, x): 147 | return self._forward_impl(x) 148 | 149 | 150 | @register_model 151 | def resnet18(**kwargs): 152 | model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) 153 | return model 154 | 155 | @register_model 156 | def resnet50(**kwargs): 157 | model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) 158 | return model 159 | -------------------------------------------------------------------------------- /optim_factory.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import optim as optim 3 | 4 | from timm.optim.adafactor import Adafactor 5 | from timm.optim.adahessian import Adahessian 6 | from timm.optim.adamp import AdamP 7 | from timm.optim.lookahead import Lookahead 8 | from timm.optim.nadam import Nadam 9 | from timm.optim.radam import RAdam 10 | from timm.optim.rmsprop_tf import RMSpropTF 11 | from timm.optim.sgdp import SGDP 12 | 13 | import json 14 | 15 | try: 16 | from apex.optimizers import FusedNovoGrad, FusedAdam, FusedLAMB, FusedSGD 17 | has_apex = True 18 | except ImportError: 19 | has_apex = False 20 | 21 | 22 | def get_num_layer_for_convnext(var_name): 23 | """ 24 | Divide [3, 3, 27, 3] layers into 12 groups; each group is three 25 | consecutive blocks, including possible neighboring downsample layers; 26 | """ 27 | num_max_layer = 12 28 | if var_name.startswith("downsample_layers"): 29 | stage_id = int(var_name.split('.')[1]) 30 | if stage_id == 0: 31 | layer_id = 0 32 | elif stage_id == 1 or stage_id == 2: 33 | layer_id = stage_id + 1 34 | elif stage_id == 3: 35 | layer_id = 12 36 | return layer_id 37 | 38 | elif var_name.startswith("stages"): 39 | stage_id = int(var_name.split('.')[1]) 40 | block_id = int(var_name.split('.')[2]) 41 | if stage_id == 0 or stage_id == 1: 42 | layer_id = stage_id + 1 43 | elif stage_id == 2: 44 | layer_id = 3 + block_id // 3 45 | elif stage_id == 3: 46 | layer_id = 12 47 | return layer_id 48 | else: 49 | return num_max_layer + 1 50 | 51 | class LayerDecayValueAssigner(object): 52 | def __init__(self, values): 53 | self.values = values 54 | 55 | def get_scale(self, layer_id): 56 | return self.values[layer_id] 57 | 58 | def get_layer_id(self, var_name): 59 | return get_num_layer_for_convnext(var_name) 60 | 61 | 62 | def get_parameter_groups(model, weight_decay=1e-5, skip_list=(), get_num_layer=None, get_layer_scale=None): 63 | parameter_group_names = {} 64 | parameter_group_vars = {} 65 | 66 | for name, param in model.named_parameters(): 67 | if not param.requires_grad: 68 | continue # frozen weights 69 | if len(param.shape) == 1 or name.endswith(".bias") or name in skip_list: 70 | group_name = "no_decay" 71 | this_weight_decay = 0. 72 | else: 73 | group_name = "decay" 74 | this_weight_decay = weight_decay 75 | if get_num_layer is not None: 76 | layer_id = get_num_layer(name) 77 | group_name = "layer_%d_%s" % (layer_id, group_name) 78 | else: 79 | layer_id = None 80 | 81 | if group_name not in parameter_group_names: 82 | if get_layer_scale is not None: 83 | scale = get_layer_scale(layer_id) 84 | else: 85 | scale = 1. 86 | 87 | parameter_group_names[group_name] = { 88 | "weight_decay": this_weight_decay, 89 | "params": [], 90 | "lr_scale": scale 91 | } 92 | parameter_group_vars[group_name] = { 93 | "weight_decay": this_weight_decay, 94 | "params": [], 95 | "lr_scale": scale 96 | } 97 | 98 | parameter_group_vars[group_name]["params"].append(param) 99 | parameter_group_names[group_name]["params"].append(name) 100 | print("Param groups = %s" % json.dumps(parameter_group_names, indent=2)) 101 | return list(parameter_group_vars.values()) 102 | 103 | 104 | def create_optimizer(args, model, get_num_layer=None, get_layer_scale=None, filter_bias_and_bn=True, skip_list=None): 105 | opt_lower = args.opt.lower() 106 | weight_decay = args.weight_decay 107 | # if weight_decay and filter_bias_and_bn: 108 | if filter_bias_and_bn: 109 | skip = {} 110 | if skip_list is not None: 111 | skip = skip_list 112 | elif hasattr(model, 'no_weight_decay'): 113 | skip = model.no_weight_decay() 114 | parameters = get_parameter_groups(model, weight_decay, skip, get_num_layer, get_layer_scale) 115 | weight_decay = 0. 116 | else: 117 | parameters = model.parameters() 118 | 119 | if 'fused' in opt_lower: 120 | assert has_apex and torch.cuda.is_available(), 'APEX and CUDA required for fused optimizers' 121 | 122 | opt_args = dict(lr=args.lr, weight_decay=weight_decay) 123 | if hasattr(args, 'opt_eps') and args.opt_eps is not None: 124 | opt_args['eps'] = args.opt_eps 125 | if hasattr(args, 'opt_betas') and args.opt_betas is not None: 126 | opt_args['betas'] = args.opt_betas 127 | 128 | opt_split = opt_lower.split('_') 129 | opt_lower = opt_split[-1] 130 | if opt_lower == 'sgd' or opt_lower == 'nesterov': 131 | opt_args.pop('eps', None) 132 | optimizer = optim.SGD(parameters, momentum=args.momentum, nesterov=True, **opt_args) 133 | elif opt_lower == 'momentum': 134 | opt_args.pop('eps', None) 135 | optimizer = optim.SGD(parameters, momentum=args.momentum, nesterov=False, **opt_args) 136 | elif opt_lower == 'adam': 137 | optimizer = optim.Adam(parameters, **opt_args) 138 | elif opt_lower == 'adamw': 139 | optimizer = optim.AdamW(parameters, **opt_args) 140 | elif opt_lower == 'nadam': 141 | optimizer = Nadam(parameters, **opt_args) 142 | elif opt_lower == 'radam': 143 | optimizer = RAdam(parameters, **opt_args) 144 | elif opt_lower == 'adamp': 145 | optimizer = AdamP(parameters, wd_ratio=0.01, nesterov=True, **opt_args) 146 | elif opt_lower == 'sgdp': 147 | optimizer = SGDP(parameters, momentum=args.momentum, nesterov=True, **opt_args) 148 | elif opt_lower == 'adadelta': 149 | optimizer = optim.Adadelta(parameters, **opt_args) 150 | elif opt_lower == 'adafactor': 151 | if not args.lr: 152 | opt_args['lr'] = None 153 | optimizer = Adafactor(parameters, **opt_args) 154 | elif opt_lower == 'adahessian': 155 | optimizer = Adahessian(parameters, **opt_args) 156 | elif opt_lower == 'rmsprop': 157 | optimizer = optim.RMSprop(parameters, alpha=0.9, momentum=args.momentum, **opt_args) 158 | elif opt_lower == 'rmsproptf': 159 | optimizer = RMSpropTF(parameters, alpha=0.9, momentum=args.momentum, **opt_args) 160 | elif opt_lower == 'fusedsgd': 161 | opt_args.pop('eps', None) 162 | optimizer = FusedSGD(parameters, momentum=args.momentum, nesterov=True, **opt_args) 163 | elif opt_lower == 'fusedmomentum': 164 | opt_args.pop('eps', None) 165 | optimizer = FusedSGD(parameters, momentum=args.momentum, nesterov=False, **opt_args) 166 | elif opt_lower == 'fusedadam': 167 | optimizer = FusedAdam(parameters, adam_w_mode=False, **opt_args) 168 | elif opt_lower == 'fusedadamw': 169 | optimizer = FusedAdam(parameters, adam_w_mode=True, **opt_args) 170 | elif opt_lower == 'fusedlamb': 171 | optimizer = FusedLAMB(parameters, **opt_args) 172 | elif opt_lower == 'fusednovograd': 173 | opt_args.setdefault('betas', (0.95, 0.98)) 174 | optimizer = FusedNovoGrad(parameters, **opt_args) 175 | else: 176 | assert False and "Invalid optimizer" 177 | 178 | if len(opt_split) > 1: 179 | if opt_split[0] == 'lookahead': 180 | optimizer = Lookahead(optimizer) 181 | 182 | return optimizer 183 | --------------------------------------------------------------------------------