├── LICENSE
├── README.md
├── configs
    ├── convnext
    │   ├── convnext_tiny.json
    │   └── kw1x_convnext_tiny.json
    ├── mobilenetv2_050
    │   ├── kw1d2x_mobilenetv2_050.json
    │   ├── kw1x_mobilenetv2_050.json
    │   ├── kw4x_mobilenetv2_050.json
    │   └── mobilenetv2_050.json
    ├── mobilenetv2_100
    │   ├── kw1d2x_mobilenetv2_100.json
    │   ├── kw1x_mobilenetv2_100.json
    │   ├── kw4x_mobilenetv2_100.json
    │   └── mobilenetv2_100.json
    ├── resnet18
    │   ├── kw1d2x_resnet18.json
    │   ├── kw1d4x_resnet18.json
    │   ├── kw1x_resnet18.json
    │   ├── kw2x_resnet18.json
    │   ├── kw4x_resnet18.json
    │   └── resnet18.json
    └── resnet50
    │   ├── kw1d2x_resnet50.json
    │   ├── kw1x_resnet50.json
    │   ├── kw4x_resnet50.json
    │   └── resnet50.json
├── datasets.py
├── detection
    ├── README.md
    ├── configs
    │   ├── _base_
    │   │   └── models
    │   │   │   ├── mask_rcnn_convnext_fpn.py
    │   │   │   ├── mask_rcnn_mobilenetv2_fpn.py
    │   │   │   └── mask_rcnn_r50_fpn.py
    │   └── kernelwarehouse
    │   │   ├── convnext_tiny
    │   │       ├── mask_rcnn_convnext_tiny_adamw_1x_coco.py
    │   │       └── mask_rcnn_kw1x_convnext_tiny_adamw_1x_coco.py
    │   │   ├── mobilenetv2
    │   │       ├── mask_rcnn_kw1x_mobilenetv2_100_adamw_1x_coco.py
    │   │       ├── mask_rcnn_kw4x_mobilenetv2_100_adamw_1x_coco.py
    │   │       └── mask_rcnn_mobilenetv2_100_adamw_1x_coco.py
    │   │   └── resnet50
    │   │       ├── mask_rcnn_kw1x_resnet50_adamw_1x_coco.py
    │   │       ├── mask_rcnn_kw4x_resnet50_adamw_1x_coco.py
    │   │       └── mask_rcnn_resnet50_adamw_1x_coco.py
    ├── mmcv_custom
    │   ├── __init__.py
    │   ├── customized_text.py
    │   ├── layer_decay_optimizer_constructor.py
    │   └── runner
    │   │   └── checkpoint.py
    └── mmdet
    │   └── models
    │       └── backbones
    │           ├── __init__.py
    │           ├── convnext.py
    │           ├── kernel_warehouse.py
    │           ├── kw_convnext.py
    │           ├── kw_mobilenetv2.py
    │           ├── kw_resnet.py
    │           ├── mobilenetv2.py
    │           └── resnet.py
├── engine.py
├── fig
    ├── Fig_Architecture.pdf
    └── Fig_Architecture.png
├── main.py
├── models
    ├── __init__.py
    ├── convnext.py
    ├── kw1d2x_mobilenetv2.py
    ├── kw_convnext.py
    ├── kw_mobilenetv2.py
    ├── kw_resnet.py
    ├── mobilenetv2.py
    └── resnet.py
├── modules
    └── kernel_warehouse.py
├── optim_factory.py
└── utils.py


/LICENSE:
--------------------------------------------------------------------------------
  1 |    Apache License
  2 | 
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    Copyright (c) 2024 OSVAI/KernelWarehouse
  7 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  8 | 
  9 |    1. Definitions.
 10 | 
 11 |       "License" shall mean the terms and conditions for use, reproduction,
 12 |       and distribution as defined by Sections 1 through 9 of this document.
 13 | 
 14 |       "Licensor" shall mean the copyright owner or entity authorized by
 15 |       the copyright owner that is granting the License.
 16 | 
 17 |       "Legal Entity" shall mean the union of the acting entity and all
 18 |       other entities that control, are controlled by, or are under common
 19 |       control with that entity. For the purposes of this definition,
 20 |       "control" means (i) the power, direct or indirect, to cause the
 21 |       direction or management of such entity, whether by contract or
 22 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 23 |       outstanding shares, or (iii) beneficial ownership of such entity.
 24 | 
 25 |       "You" (or "Your") shall mean an individual or Legal Entity
 26 |       exercising permissions granted by this License.
 27 | 
 28 |       "Source" form shall mean the preferred form for making modifications,
 29 |       including but not limited to software source code, documentation
 30 |       source, and configuration files.
 31 | 
 32 |       "Object" form shall mean any form resulting from mechanical
 33 |       transformation or translation of a Source form, including but
 34 |       not limited to compiled object code, generated documentation,
 35 |       and conversions to other media types.
 36 | 
 37 |       "Work" shall mean the work of authorship, whether in Source or
 38 |       Object form, made available under the License, as indicated by a
 39 |       copyright notice that is included in or attached to the work
 40 |       (an example is provided in the Appendix below).
 41 | 
 42 |       "Derivative Works" shall mean any work, whether in Source or Object
 43 |       form, that is based on (or derived from) the Work and for which the
 44 |       editorial revisions, annotations, elaborations, or other modifications
 45 |       represent, as a whole, an original work of authorship. For the purposes
 46 |       of this License, Derivative Works shall not include works that remain
 47 |       separable from, or merely link (or bind by name) to the interfaces of,
 48 |       the Work and Derivative Works thereof.
 49 | 
 50 |       "Contribution" shall mean any work of authorship, including
 51 |       the original version of the Work and any modifications or additions
 52 |       to that Work or Derivative Works thereof, that is intentionally
 53 |       submitted to Licensor for inclusion in the Work by the copyright owner
 54 |       or by an individual or Legal Entity authorized to submit on behalf of
 55 |       the copyright owner. For the purposes of this definition, "submitted"
 56 |       means any form of electronic, verbal, or written communication sent
 57 |       to the Licensor or its representatives, including but not limited to
 58 |       communication on electronic mailing lists, source code control systems,
 59 |       and issue tracking systems that are managed by, or on behalf of, the
 60 |       Licensor for the purpose of discussing and improving the Work, but
 61 |       excluding communication that is conspicuously marked or otherwise
 62 |       designated in writing by the copyright owner as "Not a Contribution."
 63 | 
 64 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 65 |       on behalf of whom a Contribution has been received by Licensor and
 66 |       subsequently incorporated within the Work.
 67 | 
 68 |    2. Grant of Copyright License. Subject to the terms and conditions of
 69 |       this License, each Contributor hereby grants to You a perpetual,
 70 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 71 |       copyright license to reproduce, prepare Derivative Works of,
 72 |       publicly display, publicly perform, sublicense, and distribute the
 73 |       Work and such Derivative Works in Source or Object form.
 74 | 
 75 |    3. Grant of Patent License. Subject to the terms and conditions of
 76 |       this License, each Contributor hereby grants to You a perpetual,
 77 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 78 |       (except as stated in this section) patent license to make, have made,
 79 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 80 |       where such license applies only to those patent claims licensable
 81 |       by such Contributor that are necessarily infringed by their
 82 |       Contribution(s) alone or by combination of their Contribution(s)
 83 |       with the Work to which such Contribution(s) was submitted. If You
 84 |       institute patent litigation against any entity (including a
 85 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 86 |       or a Contribution incorporated within the Work constitutes direct
 87 |       or contributory patent infringement, then any patent licenses
 88 |       granted to You under this License for that Work shall terminate
 89 |       as of the date such litigation is filed.
 90 | 
 91 |    4. Redistribution. You may reproduce and distribute copies of the
 92 |       Work or Derivative Works thereof in any medium, with or without
 93 |       modifications, and in Source or Object form, provided that You
 94 |       meet the following conditions:
 95 | 
 96 |       (a) You must give any other recipients of the Work or
 97 |           Derivative Works a copy of this License; and
 98 | 
 99 |       (b) You must cause any modified files to carry prominent notices
100 |           stating that You changed the files; and
101 | 
102 |       (c) You must retain, in the Source form of any Derivative Works
103 |           that You distribute, all copyright, patent, trademark, and
104 |           attribution notices from the Source form of the Work,
105 |           excluding those notices that do not pertain to any part of
106 |           the Derivative Works; and
107 | 
108 |       (d) If the Work includes a "NOTICE" text file as part of its
109 |           distribution, then any Derivative Works that You distribute must
110 |           include a readable copy of the attribution notices contained
111 |           within such NOTICE file, excluding those notices that do not
112 |           pertain to any part of the Derivative Works, in at least one
113 |           of the following places: within a NOTICE text file distributed
114 |           as part of the Derivative Works; within the Source form or
115 |           documentation, if provided along with the Derivative Works; or,
116 |           within a display generated by the Derivative Works, if and
117 |           wherever such third-party notices normally appear. The contents
118 |           of the NOTICE file are for informational purposes only and
119 |           do not modify the License. You may add Your own attribution
120 |           notices within Derivative Works that You distribute, alongside
121 |           or as an addendum to the NOTICE text from the Work, provided
122 |           that such additional attribution notices cannot be construed
123 |           as modifying the License.
124 | 
125 |       You may add Your own copyright statement to Your modifications and
126 |       may provide additional or different license terms and conditions
127 |       for use, reproduction, or distribution of Your modifications, or
128 |       for any such Derivative Works as a whole, provided Your use,
129 |       reproduction, and distribution of the Work otherwise complies with
130 |       the conditions stated in this License.
131 | 
132 |    5. Submission of Contributions. Unless You explicitly state otherwise,
133 |       any Contribution intentionally submitted for inclusion in the Work
134 |       by You to the Licensor shall be under the terms and conditions of
135 |       this License, without any additional terms or conditions.
136 |       Notwithstanding the above, nothing herein shall supersede or modify
137 |       the terms of any separate license agreement you may have executed
138 |       with Licensor regarding such Contributions.
139 | 
140 |    6. Trademarks. This License does not grant permission to use the trade
141 |       names, trademarks, service marks, or product names of the Licensor,
142 |       except as required for reasonable and customary use in describing the
143 |       origin of the Work and reproducing the content of the NOTICE file.
144 | 
145 |    7. Disclaimer of Warranty. Unless required by applicable law or
146 |       agreed to in writing, Licensor provides the Work (and each
147 |       Contributor provides its Contributions) on an "AS IS" BASIS,
148 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
149 |       implied, including, without limitation, any warranties or conditions
150 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
151 |       PARTICULAR PURPOSE. You are solely responsible for determining the
152 |       appropriateness of using or redistributing the Work and assume any
153 |       risks associated with Your exercise of permissions under this License.
154 | 
155 |    8. Limitation of Liability. In no event and under no legal theory,
156 |       whether in tort (including negligence), contract, or otherwise,
157 |       unless required by applicable law (such as deliberate and grossly
158 |       negligent acts) or agreed to in writing, shall any Contributor be
159 |       liable to You for damages, including any direct, indirect, special,
160 |       incidental, or consequential damages of any character arising as a
161 |       result of this License or out of the use or inability to use the
162 |       Work (including but not limited to damages for loss of goodwill,
163 |       work stoppage, computer failure or malfunction, or any and all
164 |       other commercial damages or losses), even if such Contributor
165 |       has been advised of the possibility of such damages.
166 | 
167 |    9. Accepting Warranty or Additional Liability. While redistributing
168 |       the Work or Derivative Works thereof, You may choose to offer,
169 |       and charge a fee for, acceptance of support, warranty, indemnity,
170 |       or other liability obligations and/or rights consistent with this
171 |       License. However, in accepting such obligations, You may act only
172 |       on Your own behalf and on Your sole responsibility, not on behalf
173 |       of any other Contributor, and only if You agree to indemnify,
174 |       defend, and hold each Contributor harmless for any liability
175 |       incurred by, or claims asserted against, such Contributor by reason
176 |       of your accepting any such warranty or additional liability.
177 | 
178 |    END OF TERMS AND CONDITIONS
179 | 
180 |    APPENDIX: How to apply the Apache License to your work.
181 | 
182 |       To apply the Apache License to your work, attach the following
183 |       boilerplate notice, with the fields enclosed by brackets "[]"
184 |       replaced with your own identifying information. (Don't include
185 |       the brackets!)  The text should be enclosed in the appropriate
186 |       comment syntax for the file format. We also recommend that a
187 |       file or class name and description of purpose be included on the
188 |       same "printed page" as the copyright notice for easier
189 |       identification within third-party archives.
190 | 
191 |    Copyright [yyyy] [name of copyright owner]
192 | 
193 |    Licensed under the Apache License, Version 2.0 (the "License");
194 |    you may not use this file except in compliance with the License.
195 |    You may obtain a copy of the License at
196 | 
197 |        http://www.apache.org/licenses/LICENSE-2.0
198 | 
199 |    Unless required by applicable law or agreed to in writing, software
200 |    distributed under the License is distributed on an "AS IS" BASIS,
201 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
202 |    See the License for the specific language governing permissions and
203 |    limitations under the License.
204 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # KernelWarehouse: Rethinking the Design of Dynamic Convolution
  2 | 
  3 | By Chao Li and Anbang Yao.
  4 | 
  5 | This repository is an official PyTorch implementation of ["KernelWarehouse: Rethinking the Design of Dynamic Convolution", KW for short](http://arxiv.org/abs/2406.07879), which has been accepted to ICML 2024. 
  6 | 
  7 | Dynamic convolution learns a linear mixture of n static kernels weighted with their input-dependent attentions, demonstrating superior performance than normal convolution. However, it increases the number of convolutional parameters by n times, and thus is not parameter efficient. This leads to no research progress that can allow researchers to explore the setting n>100 (an order of magnitude larger than the typical setting n<10) for pushing forward the performance boundary of dynamic convolution while enjoying parameter efficiency. To fill this gap, in this paper, we propose **KernelWarehouse**, a more general form of dynamic convolution, which redefines the basic concepts of "kernels", "assembling kernels" and "attention function" through the lens of exploiting convolutional parameter dependencies within the same layer and across neighboring layers of a ConvNet. We testify the effectiveness of KernelWarehouse on [ImageNet](http://www.image-net.org) and [MS-COCO](https://cocodataset.org/#home) datasets using various ConvNet architectures. Intriguingly, KernelWarehouse is also applicable to Vision Transformers, and it can even reduce the model size of a backbone while improving the model accuracy. For instance, KernelWarehouse (n=4) achieves 5.61%|3.90%|4.38% absolute top-1 accuracy gain on the ResNet18|MobileNetV2|DeiT-Tiny backbone, and KernelWarehouse (n=1/4) with 65.10% model size reduction still achieves 2.29% gain on the ResNet18 backbone. 
  8 | 
  9 | <p align="center"><img src="fig/Fig_Architecture.png" width="800" /></p>
 10 | 
 11 | Schematic illustration of KernelWarehouse. Briefly speaking, KernelWarehouse sequentially divides the static kernel $\mathbf{W}$ at any regular convolutional layer of a ConvNet into $m$ disjoint kernel cells $\mathbf{w}_ 1, \dots, \mathbf{w}_ m$ having the same dimensions first, and then computes each kernel cell $\mathbf{w}_ i$ as a linear mixture $\mathbf{w}_ i=\alpha_{i1} \mathbf{e}_ 1+\dots+\alpha_{in}\mathbf{e}_ n$ based on a predefined "warehouse" (consisting of $n$ same dimensioned kernel cells $\mathbf{e}_ 1,\dots,\mathbf{e}_ n$ , e.g., $n=108$) which is shared to all same-stage convolutional layers, and finally replaces the static kernel $\mathbf{W}$ by assembling its corresponding $m$ mixtures in order, yielding a high degree of freedom to fit a desired convolutional parameter budget. The input-dependent scalar attentions $\alpha_{i1},\dots,\alpha_{in}$ are computed with a novel contrasting-driven attention function (CAF).
 12 | 
 13 | ## Dataset
 14 | 
 15 | Following [this repository](https://github.com/pytorch/examples/tree/master/imagenet#requirements),
 16 | 
 17 | - Download the ImageNet dataset from http://www.image-net.org/.
 18 | - Then, move and extract the training and validation images to labeled subfolders, using [the following script](https://github.com/pytorch/examples/blob/main/imagenet/extract_ILSVRC.sh).
 19 | 
 20 | ## Requirements
 21 | 
 22 | - python >= 3.7.0
 23 | - torch >= 1.8.1, torchvision >= 0.9.1
 24 | - timm == 0.3.2, tensorboardX, six
 25 | 
 26 | ## Results and Models
 27 | 
 28 | Results comparison on the ImageNet validation set with the ResNet18, ResNet50 and ConvNeXt-Tiny backbones trained for 300 epochs.
 29 | 
 30 | | Models      | Params  | Top-1 Acc(%) | Top-5 Acc(%) |                                        Google Drive                                         |                            Baidu Drive                            |
 31 | |:------------|:-------:|:------------:|:------------:|:-------------------------------------------------------------------------------------------:|:-----------------------------------------------------------------:|
 32 | | ResNet18    | 11.69M  |    70.44     |    89.72     | [model](https://drive.google.com/file/d/1xpAiJyjVycr6b0RyKsDTXbUUz7etA2UV/view?usp=sharing) | [model](https://pan.baidu.com/s/16IRpCY0l_uHhJd-FMWllGg?pwd=w46e) |
 33 | | + KW (1/4×) |  4.08M  |    72.73     |    90.83     | [model](https://drive.google.com/file/d/1DMcTWMLjiLRpY38041nLlOU0Vb_zmUh6/view?usp=sharing) | [model](https://pan.baidu.com/s/11nv7S5oH7DHkuzCXgQbiXA?pwd=marv) |
 34 | | + KW (1/2×) |  7.43M  |    73.33     |    91.42     | [model](https://drive.google.com/file/d/1k5tyyDHu9mqsQtiwZu7tTMPPolPOjdbT/view?usp=sharing) | [model](https://pan.baidu.com/s/1bOjjVf5Z2YdKs5d7NlPamQ?pwd=t8ax) |
 35 | | + KW (1×)   | 11.93M  |    74.77     |    92.13     | [model](https://drive.google.com/file/d/1aLaO8QV9QYdfv5GE2tg9iDWLeEJfkrKY/view?usp=sharing) | [model](https://pan.baidu.com/s/1TPcg7iMNdA_7w2FlkZaFWg?pwd=tu3x) |
 36 | | + KW (2×)   | 23.24M  |    75.19     |    92.18     | [model](https://drive.google.com/file/d/1UB7nrqI2mkAIY_p7I5eOcitrmKYbjtsj/view?usp=sharing) | [model](https://pan.baidu.com/s/1WODJhFpgG8IPtZDeiOCFuw?pwd=jhtg) |
 37 | | + KW (4×)   | 45.86M  |    76.05     |    92.68     | [model](https://drive.google.com/file/d/1gl1__aDrJIMs-Zrv3IgH7mfVnwgFCLbr/view?usp=sharing) | [model](https://pan.baidu.com/s/1u1jiDOt8IkE3nakFJPRkzA?pwd=75f1) |
 38 | | ResNet50    | 25.56M  |    78.44     |    94.24     |                                          [model](https://drive.google.com/file/d/1x57Lip3xt1yFYGz7k6aCTR2yhP56TtKM/view?usp=sharing)                                          | [model](https://pan.baidu.com/s/1O3CiCq1BNssIL1L2jNV_IA?pwd=ufyh) |
 39 | | + KW (1/2×) | 17.64M  |    79.30     |    94.71     |                                          [model](https://drive.google.com/file/d/1lm_TlIUX7nmyjM7HNDWb7SxO65rgyrF9/view?usp=sharing)                                          | [model](https://pan.baidu.com/s/1TeIqjTrUzNobDr07gN1dUw?pwd=7b6j) |
 40 | | + KW (1×)   | 28.05M  |    80.38     |    95.19     |                                          [model](https://drive.google.com/file/d/1lA155EYydHae_m__hk86tX_m9S0i85i8/view?usp=sharing)                                          | [model](https://pan.baidu.com/s/1moFwCD7PhZ8Y0SPHB20hbA?pwd=jp8p) |
 41 | | + KW (4×)   | 102.02M |    81.05     |    95.21     |                     [model](https://drive.google.com/file/d/1pxU1oHIXerjVwzK7eFK7AGf1XZD_caMQ/view?usp=sharing)                     | [model](https://pan.baidu.com/s/1h-AHOozmqF1JIanes0-kiA?pwd=xmq7) |
 42 | | ConvNeXt    | 28.59M  |    82.07     |    95.86     |                     [model](https://drive.google.com/file/d/1Yz_rooa7PMwE9Bdor00Mivtk61Xas7oh/view?usp=sharing)                     | [model](https://pan.baidu.com/s/1WCdlXqxpNkPCqvxowQnQfg?pwd=nm6j) |
 43 | | + KW (1×)   | 39.37M  |    82.51     |    96.07     |                  [model](https://drive.google.com/file/d/13wqfY3jHxcskhHRchs0ynaQyVq0qUvn-/view?usp=sharing)                  | [model](https://pan.baidu.com/s/1EmTlEPMZnRAeO8QgMOfSPQ?pwd=z22e) |
 44 | 
 45 | Results comparison on the ImageNet validation set with the MobileNetV2(1.0×, 0.5×) backbones trained for 150 epochs.
 46 | 
 47 | 
 48 | | Models             | Params | Top-1 Acc(%) | Top-5 Acc(%) | Google Drive  | Baidu Drive |
 49 | |:-------------------|:------:|:------------:|:------------:|:-------------:|:-------------:|
 50 | | MobileNetV2 (1.0×) | 3.50M  |    72.04     |    90.42     |   [model](https://drive.google.com/file/d/1t97r1FM8hX2AtCjDn7k2TsM7HY6XwQjz/view?usp=sharing)   | [model](https://pan.baidu.com/s/1GD_q4gSZowvssJpUdY7wXw?pwd=bks8)|
 51 | | + KW (1/2×)        | 2.65M  |    72.59     |    90.71     |   [model](https://drive.google.com/file/d/1I8JI1CtfKtUPMygqEVdD19c3PhSZReKJ/view?usp=sharing)   | [model](https://pan.baidu.com/s/1EhHTze4gqcS16UnTzdjekg?pwd=65k8)|
 52 | | + KW (1×)          | 5.17M  |    74.68     |    91.90     |   [model](https://drive.google.com/file/d/1EWiUX8qaRj1kTI1ktBNAhWnhauV5eVdk/view?usp=sharing)   | [model](https://pan.baidu.com/s/1zyLMX4PpUVAL9gVICFRmiA?pwd=8rcb)|
 53 | | + KW (4×)          | 11.38M |    75.92     |    92.22     |   [model](https://drive.google.com/file/d/1xnzx41_sj3kZbR5Fzsfsb_PK5SEINXZ4/view?usp=sharing)   | [model](https://pan.baidu.com/s/1Mb3buGekUCmseHWQNwLnjQ?pwd=ncrm)|
 54 | | MobileNetV2 (0.5×) | 1.97M  |    64.32     |    85.22     |   [model](https://drive.google.com/file/d/1-L4EgH5hFQydocXjjT9oJCFVEItsD_eU/view?usp=sharing)   | [model](https://pan.baidu.com/s/1zap9BSnry1WJy0SZDt4SIw?pwd=ueqv)|
 55 | | + KW (1/2×)        | 1.47M  |    65.20     |    85.98     |   [model](https://drive.google.com/file/d/1SByM8kJjb7seeYpY8lrSrv-hUOMJWRJE/view?usp=sharing)   | [model](https://pan.baidu.com/s/13mzjh203BhRSETJiaJF3cw?pwd=tdck)|
 56 | | + KW (1×)          | 2.85M  |    68.29     |    87.93     |   [model](https://drive.google.com/file/d/1KFKy05JhhMnfj-tAz2SKzNRcBVMoJa19/view?usp=sharing)   | [model](https://pan.baidu.com/s/1MHW2k5IkX1NPgM1KhQL29A?pwd=dajd)|
 57 | | + KW (4×)          | 4.65M  |    70.26     |    89.19     |   [model](https://drive.google.com/file/d/1Jt94_M7JQ6RDViYN3-P-4uoA8a5_bVYE/view?usp=sharing)   | [model](https://pan.baidu.com/s/1uLBpreSm9MOtjPRWcM5SjA?pwd=whz2)|
 58 | 
 59 | 
 60 | ## Training
 61 | 
 62 | To train a model with KernelWarehouse:
 63 | 
 64 | ```shell
 65 | python -m torch.distributed.launch --nproc_per_node={number of gpus} main.py --kw_config {path to config json} \
 66 | --batch_size {batch size per gpu} --update_freq {number of gradient accumulation steps}  --data_path {path to dataset} \
 67 | --output_dir {path to output folder}
 68 | ```
 69 | 
 70 | For example, to train ResNet18 + KW (1×) on 8 GPUs with batch size of 4096:
 71 | 
 72 | ```shell
 73 | python -m torch.distributed.launch --nproc_per_node=8 main.py --kw_config configs/resnet18/kw1x_resnet18.json \
 74 | --batch_size 128 --update_freq 4 --data_path {path to dataset} --output_dir {path to output folder}
 75 | ```
 76 | 
 77 | For example, to train MobileNetV2 + KW (4×) on 8 GPUs with batch size of 256:
 78 | 
 79 | ```shell
 80 | python -m torch.distributed.launch --nproc_per_node=8 main.py --kw_config configs/mobilenetv2_100/kw4x_mobilenetv2_100.json \
 81 | --batch_size 32 --update_freq 1 --data_path {path to dataset} --output_dir {path to output folder}
 82 | ```
 83 | 
 84 | You can add "--use_amp true" to enable Automatic Mixed Precision to reduce memory usage and speed up training.
 85 | 
 86 | More config files for other models can be found in [configs](configs).
 87 | 
 88 | ## Evaluation
 89 | 
 90 | To evaluate a pre-trained model:
 91 | 
 92 | ```shell
 93 | python -m torch.distributed.launch --nproc_per_node={number of gpus} main.py --kw_config {path to config json} \
 94 | --eval true --data_path {path to dataset} --resume {path to model}
 95 | ```
 96 | 
 97 | ## Training and evaluation on object detection and instance segmentation
 98 | Please refer to README.md in the folder of [detection](detection) for details.
 99 | 
100 | ## Citation
101 | If you find our work useful in your research, please consider citing:
102 | ```
103 | @inproceedings{li2024kernelwarehouse,
104 |       title={KernelWarehouse: Rethinking the Design of Dynamic Convolution}, 
105 |       author={Chao Li and Anbang Yao},
106 |       booktitle={International Conference on Machine Learning},
107 |       year={2024}
108 | }
109 | ```
110 | 
111 | ## License
112 | KernelWarehouse is released under the Apache license. We encourage use for both research and commercial purposes, as long as proper attribution is given.
113 | 
114 | ## Acknowledgment
115 | This repository is built based on [ConvNeXt](https://github.com/facebookresearch/ConvNeXt), [mmdetection](https://github.com/open-mmlab/mmdetection), [Dynamic-convolution-Pytorch](https://github.com/kaijieshi7/Dynamic-convolution-Pytorch), [Swin-Transformer-Object-Detection](https://github.com/SwinTransformer/Swin-Transformer-Object-Detection) repositories. We thank the authors for releasing their amazing codes.
116 | 


--------------------------------------------------------------------------------
/configs/convnext/convnext_tiny.json:
--------------------------------------------------------------------------------
1 | {
2 |     "model": "convnext_tiny",
3 |     "drop_path": 0.1,
4 |     "model_ema": true,
5 |     "model_ema_eval": true
6 | }


--------------------------------------------------------------------------------
/configs/convnext/kw1x_convnext_tiny.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "model": "kw_convnext_tiny",
 3 |     "reduction": 0.0625,
 4 |     "cell_num_ratio": 1,
 5 |     "cell_inplane_ratio": 1,
 6 |     "cell_outplane_ratio": 1,
 7 |     "sharing_range": "layer_pwconv",
 8 |     "drop_path": 0.3,
 9 |     "model_ema": true,
10 |     "model_ema_eval": true
11 | }
12 | 


--------------------------------------------------------------------------------
/configs/mobilenetv2_050/kw1d2x_mobilenetv2_050.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "model": "kw1d2x_mobilenetv2_050",
 3 |     "epochs": 150,
 4 |     "drop_path": 0.0,
 5 |     "opt": "sgd",
 6 |     "weight_decay": 4e-5,
 7 |     "lr": 0.05,
 8 |     "warmup_epochs": 0,
 9 |     "color_jitter": 0,
10 |     "aa": null,
11 |     "smoothing": 0,
12 |     "reprob": 0,
13 |     "mixup": 0,
14 |     "cutmix": 0,
15 |     "traditional_preprocess": true,
16 |     "temp_epoch": 15
17 | }


--------------------------------------------------------------------------------
/configs/mobilenetv2_050/kw1x_mobilenetv2_050.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "model": "kw_mobilenetv2_050",
 3 |     "epochs": 150,
 4 |     "drop_path": 0.0,
 5 |     "opt": "sgd",
 6 |     "weight_decay": 4e-5,
 7 |     "lr": 0.05,
 8 |     "warmup_epochs": 0,
 9 |     "color_jitter": 0,
10 |     "aa": null,
11 |     "smoothing": 0,
12 |     "reprob": 0,
13 |     "mixup": 0,
14 |     "cutmix": 0,
15 |     "cell_num_ratio": 1,
16 |     "cell_inplane_ratio": 1,
17 |     "cell_outplane_ratio": 1,
18 |     "sharing_range": ["layer", "pwconv"],
19 |     "traditional_preprocess": true,
20 |     "temp_epoch": 15
21 | }


--------------------------------------------------------------------------------
/configs/mobilenetv2_050/kw4x_mobilenetv2_050.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "model": "kw_mobilenetv2_050",
 3 |     "epochs": 150,
 4 |     "drop_path": 0.0,
 5 |     "opt": "sgd",
 6 |     "weight_decay": 4e-5,
 7 |     "lr": 0.05,
 8 |     "warmup_epochs": 0,
 9 |     "color_jitter": 0,
10 |     "aa": null,
11 |     "reprob": 0,
12 |     "mixup": 0,
13 |     "cutmix": 0,
14 |     "cell_num_ratio": 4,
15 |     "cell_inplane_ratio": 1,
16 |     "cell_outplane_ratio": 1,
17 |     "sharing_range": ["layer"],
18 |     "traditional_preprocess": true,
19 |     "temp_epoch": 15
20 | }


--------------------------------------------------------------------------------
/configs/mobilenetv2_050/mobilenetv2_050.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "model": "mobilenetv2_050",
 3 |     "epochs": 150,
 4 |     "drop_path": 0.0,
 5 |     "opt": "sgd",
 6 |     "weight_decay": 4e-5,
 7 |     "lr": 0.05,
 8 |     "warmup_epochs": 0,
 9 |     "color_jitter": 0,
10 |     "aa": null,
11 |     "smoothing": 0,
12 |     "reprob": 0,
13 |     "mixup": 0,
14 |     "cutmix": 0,
15 |     "traditional_preprocess": true
16 | }


--------------------------------------------------------------------------------
/configs/mobilenetv2_100/kw1d2x_mobilenetv2_100.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "model": "kw1d2x_mobilenetv2_100",
 3 |     "epochs": 150,
 4 |     "drop_path": 0.0,
 5 |     "opt": "sgd",
 6 |     "weight_decay": 4e-5,
 7 |     "lr": 0.05,
 8 |     "warmup_epochs": 0,
 9 |     "color_jitter": 0,
10 |     "aa": null,
11 |     "smoothing": 0,
12 |     "reprob": 0,
13 |     "mixup": 0,
14 |     "cutmix": 0,
15 |     "traditional_preprocess": true,
16 |     "temp_epoch": 15
17 | }


--------------------------------------------------------------------------------
/configs/mobilenetv2_100/kw1x_mobilenetv2_100.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "model": "kw_mobilenetv2_100",
 3 |     "epochs": 150,
 4 |     "drop_path": 0.0,
 5 |     "opt": "sgd",
 6 |     "weight_decay": 4e-5,
 7 |     "lr": 0.05,
 8 |     "warmup_epochs": 0,
 9 |     "color_jitter": 0,
10 |     "aa": null,
11 |     "reprob": 0,
12 |     "mixup": 0,
13 |     "cutmix": 0,
14 |     "cell_num_ratio": 1,
15 |     "cell_inplane_ratio": 1,
16 |     "cell_outplane_ratio": 1,
17 |     "sharing_range": ["layer", "pwconv"],
18 |     "traditional_preprocess": true,
19 |     "temp_epoch": 15
20 | }


--------------------------------------------------------------------------------
/configs/mobilenetv2_100/kw4x_mobilenetv2_100.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "model": "kw_mobilenetv2_100",
 3 |     "epochs": 150,
 4 |     "drop_path": 0.0,
 5 |     "opt": "sgd",
 6 |     "weight_decay": 4e-5,
 7 |     "lr": 0.05,
 8 |     "warmup_epochs": 0,
 9 |     "color_jitter": 0,
10 |     "aa": null,
11 |     "reprob": 0,
12 |     "mixup": 0,
13 |     "cutmix": 0,
14 |     "cell_num_ratio": 4,
15 |     "cell_inplane_ratio": 1,
16 |     "cell_outplane_ratio": 1,
17 |     "sharing_range": ["layer"],
18 |     "traditional_preprocess": true,
19 |     "temp_epoch": 15
20 | }


--------------------------------------------------------------------------------
/configs/mobilenetv2_100/mobilenetv2_100.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "model": "mobilenetv2_100",
 3 |     "epochs": 150,
 4 |     "drop_path": 0.0,
 5 |     "opt": "sgd",
 6 |     "weight_decay": 4e-5,
 7 |     "lr": 0.05,
 8 |     "warmup_epochs": 0,
 9 |     "color_jitter": 0,
10 |     "aa": null,
11 |     "smoothing": 0,
12 |     "reprob": 0,
13 |     "mixup": 0,
14 |     "cutmix": 0,
15 |     "traditional_preprocess": true
16 | }


--------------------------------------------------------------------------------
/configs/resnet18/kw1d2x_resnet18.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "model": "kw_resnet18",
 3 |     "reduction": 0.0625,
 4 |     "cell_num_ratio": 0.5,
 5 |     "cell_inplane_ratio": 0.5,
 6 |     "cell_outplane_ratio": 0.5,
 7 |     "sharing_range": "layer_conv",
 8 |     "drop_path": 0.0,
 9 |     "dropout": 0.0
10 | }


--------------------------------------------------------------------------------
/configs/resnet18/kw1d4x_resnet18.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "model": "kw_resnet18",
 3 |     "reduction": 0.0625,
 4 |     "cell_num_ratio": 0.25,
 5 |     "cell_inplane_ratio": 0.5,
 6 |     "cell_outplane_ratio": 0.5,
 7 |     "sharing_range": "layer_conv",
 8 |     "drop_path": 0.0,
 9 |     "dropout": 0.0,
10 |     "mixup": 0,
11 |     "cutmix": 0
12 | }


--------------------------------------------------------------------------------
/configs/resnet18/kw1x_resnet18.json:
--------------------------------------------------------------------------------
1 | {
2 |     "model": "kw_resnet18",
3 |     "reduction": 0.0625,
4 |     "cell_num_ratio": 1,
5 |     "cell_inplane_ratio": 1,
6 |     "cell_outplane_ratio": 1,
7 |     "sharing_range": "layer_conv",
8 |     "drop_path": 0.1
9 | }


--------------------------------------------------------------------------------
/configs/resnet18/kw2x_resnet18.json:
--------------------------------------------------------------------------------
1 | {
2 |     "model": "kw_resnet18",
3 |     "reduction": 0.0625,
4 |     "cell_num_ratio": 2,
5 |     "cell_inplane_ratio": 1,
6 |     "cell_outplane_ratio": 1,
7 |     "sharing_range": "layer_conv",
8 |     "drop_path": 0.1
9 | }


--------------------------------------------------------------------------------
/configs/resnet18/kw4x_resnet18.json:
--------------------------------------------------------------------------------
1 | {
2 |     "model": "kw_resnet18",
3 |     "reduction": 0.0625,
4 |     "cell_num_ratio": 4,
5 |     "cell_inplane_ratio": 1,
6 |     "cell_outplane_ratio": 1,
7 |     "sharing_range": "layer_conv",
8 |     "drop_path": 0.1
9 | }


--------------------------------------------------------------------------------
/configs/resnet18/resnet18.json:
--------------------------------------------------------------------------------
1 | {
2 |     "model": "resnet18",
3 |     "drop_path": 0.0
4 | }


--------------------------------------------------------------------------------
/configs/resnet50/kw1d2x_resnet50.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "model": "kw_resnet50",
 3 |     "reduction": 0.0625,
 4 |     "cell_num_ratio": 0.5,
 5 |     "cell_inplane_ratio": 0.5,
 6 |     "cell_outplane_ratio": 0.5,
 7 |     "sharing_range": "layer_conv",
 8 |     "nonlocal_basis_ratio": 0.0625,
 9 |     "drop_path": 0.1
10 | }


--------------------------------------------------------------------------------
/configs/resnet50/kw1x_resnet50.json:
--------------------------------------------------------------------------------
1 | {
2 |     "model": "kw_resnet50",
3 |     "reduction": 0.0625,
4 |     "cell_num_ratio": 1,
5 |     "cell_inplane_ratio": 1,
6 |     "cell_outplane_ratio": 1,
7 |     "sharing_range": "layer_conv",
8 |     "drop_path": 0.1
9 | }


--------------------------------------------------------------------------------
/configs/resnet50/kw4x_resnet50.json:
--------------------------------------------------------------------------------
1 | {
2 |     "model": "kw_resnet50",
3 |     "reduction": 0.0625,
4 |     "cell_num_ratio": 4,
5 |     "cell_inplane_ratio": 1,
6 |     "cell_outplane_ratio": 1,
7 |     "sharing_range": "layer_conv",
8 |     "drop_path": 0.1
9 | }


--------------------------------------------------------------------------------
/configs/resnet50/resnet50.json:
--------------------------------------------------------------------------------
1 | {
2 |     "model": "resnet50",
3 |     "drop_path": 0.1
4 | }


--------------------------------------------------------------------------------
/datasets.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from torchvision import datasets, transforms
  3 | 
  4 | from timm.data.constants import \
  5 |     IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, IMAGENET_INCEPTION_MEAN, IMAGENET_INCEPTION_STD
  6 | from timm.data import create_transform
  7 | 
  8 | def build_dataset(is_train, args):
  9 |     if args.traditional_preprocess:
 10 |         transform = build_transform_tradition(is_train, args)
 11 |     else:
 12 |         transform = build_transform(is_train, args)
 13 | 
 14 |     print("Transform = ")
 15 |     if isinstance(transform, tuple):
 16 |         for trans in transform:
 17 |             print(" - - - - - - - - - - ")
 18 |             for t in trans.transforms:
 19 |                 print(t)
 20 |     else:
 21 |         for t in transform.transforms:
 22 |             print(t)
 23 |     print("---------------------------")
 24 | 
 25 |     if args.data_set == 'CIFAR':
 26 |         dataset = datasets.CIFAR100(args.data_path, train=is_train, transform=transform, download=True)
 27 |         nb_classes = 100
 28 |     elif args.data_set == 'IMNET':
 29 |         print("reading from datapath", args.data_path)
 30 |         root = os.path.join(args.data_path, 'train' if is_train else 'val')
 31 |         dataset = datasets.ImageFolder(root, transform=transform)
 32 |         nb_classes = 1000
 33 |     elif args.data_set == "image_folder":
 34 |         root = args.data_path if is_train else args.eval_data_path
 35 |         dataset = datasets.ImageFolder(root, transform=transform)
 36 |         nb_classes = args.nb_classes
 37 |         assert len(dataset.class_to_idx) == nb_classes
 38 |     else:
 39 |         raise NotImplementedError()
 40 |     print("Number of the class = %d" % nb_classes)
 41 | 
 42 |     return dataset, nb_classes
 43 | 
 44 | 
 45 | def build_transform(is_train, args):
 46 |     resize_im = args.input_size > 32
 47 |     imagenet_default_mean_and_std = args.imagenet_default_mean_and_std
 48 |     mean = IMAGENET_INCEPTION_MEAN if not imagenet_default_mean_and_std else IMAGENET_DEFAULT_MEAN
 49 |     std = IMAGENET_INCEPTION_STD if not imagenet_default_mean_and_std else IMAGENET_DEFAULT_STD
 50 | 
 51 |     if is_train:
 52 |         # this should always dispatch to transforms_imagenet_train
 53 |         transform = create_transform(
 54 |             input_size=args.input_size,
 55 |             is_training=True,
 56 |             color_jitter=args.color_jitter,
 57 |             auto_augment=args.aa,
 58 |             interpolation=args.train_interpolation,
 59 |             re_prob=args.reprob,
 60 |             re_mode=args.remode,
 61 |             re_count=args.recount,
 62 |             mean=mean,
 63 |             std=std,
 64 |         )
 65 |         if not resize_im:
 66 |             transform.transforms[0] = transforms.RandomCrop(
 67 |                 args.input_size, padding=4)
 68 |         return transform
 69 | 
 70 |     t = []
 71 |     if resize_im:
 72 |         # warping (no cropping) when evaluated at 384 or larger
 73 |         if args.input_size >= 384:  
 74 |             t.append(
 75 |             transforms.Resize((args.input_size, args.input_size), 
 76 |                             interpolation=transforms.InterpolationMode.BICUBIC), 
 77 |         )
 78 |             print(f"Warping {args.input_size} size input images...")
 79 |         else:
 80 |             if args.crop_pct is None:
 81 |                 args.crop_pct = 224 / 256
 82 |             size = int(args.input_size / args.crop_pct)
 83 |             t.append(
 84 |                 # to maintain same ratio w.r.t. 224 images
 85 |                 transforms.Resize(size, interpolation=transforms.InterpolationMode.BICUBIC),  
 86 |             )
 87 |             t.append(transforms.CenterCrop(args.input_size))
 88 | 
 89 |     t.append(transforms.ToTensor())
 90 |     t.append(transforms.Normalize(mean, std))
 91 |     return transforms.Compose(t)
 92 | 
 93 | 
 94 | def build_transform_tradition(is_train, args):
 95 |     normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
 96 |                                      std=[0.229, 0.224, 0.225])
 97 |     if is_train:
 98 |         t = transforms.Compose([
 99 |             transforms.RandomResizedCrop(224),
100 |             transforms.RandomHorizontalFlip(),
101 |             transforms.ToTensor(),
102 |             normalize,
103 |         ])
104 |     else:
105 |         t = transforms.Compose([
106 |             transforms.Resize(256),
107 |             transforms.CenterCrop(224),
108 |             transforms.ToTensor(),
109 |             normalize,
110 |         ])
111 |     return t
112 | 
113 | 
114 | 
115 | 


--------------------------------------------------------------------------------
/detection/README.md:
--------------------------------------------------------------------------------
 1 | # MS-COCO Object Detection with KernelWarehouse 
 2 | 
 3 | We use the popular [MMDetection](https://github.com/open-mmlab/mmdetection) toolbox for experiments on the MS-COCO dataset with the pre-trained ResNet50, MobileNetV2 (1.0×) and ConvNeXt-Tiny models as the backbones for the detector. We select the mainstream Faster RCNN and Mask R-CNN detectors with Feature Pyramid Networks as the necks to build the basic object detection systems.
 4 | 
 5 | 
 6 | ## Training
 7 | 
 8 | Please follow [Swin-Transformer-Object-Detection](https://github.com/SwinTransformer/Swin-Transformer-Object-Detection) on how to prepare the environment and the dataset. Then attach our code to the origin project and modify the config files according to your own path to the pre-trained models and directories to save logs and models.
 9 | 
10 | To train a detector with pre-trained models as backbone:
11 | 
12 | ```shell
13 | bash tools/dist_train.sh {path to config file} {number of gpus}
14 | ```
15 | 
16 | ## Evaluation
17 | 
18 | To evaluate a fine-tuned model:
19 | ```shell
20 | bash tools/dist_test.sh {path to config file} {path to fine-tuned model} {number of gpus} --eval bbox segm --show
21 | ```
22 | 
23 | 
24 | ## Results and Models
25 | 
26 | | Backbones          | Detectors | box AP | mask AP | Config | Google Drive | Baidu Drive |
27 | |:------------|:-------:|:------:|:-------:|:-------------:|:-------------:|:-------------:|
28 | | ResNet50           | Mask R-CNN |  39.6  |  36.4   | [config](configs/kernelwarehouse/mask_rcnn_resnet50_adamw_1x_coco.py) | [model](https://drive.google.com/file/d/1j6wSJLett-JeVDTh7CW7CHhC4jQHDzad/view?usp=sharing) | [model](https://pan.baidu.com/s/1U7q2U0jYXjDCAVxqUMWmHw?pwd=4wih) |
29 | | + KW (1×)          | Mask R-CNN |  41.8  |  38.4   | [config](configs/kernelwarehouse/resnet50/mask_rcnn_kw1x_resnet50_adamw_1x_coco.py) | [model](https://drive.google.com/file/d/1XBXKF8TU0iMFVBt-IF048hAmYTL9-spk/view?usp=sharing) | [model](https://pan.baidu.com/s/1AI01STe9v0KzAKVVPMUhog?pwd=a7ce) |
30 | | + KW (4×)          | Mask R-CNN |  42.4  |  38.9   | [config](configs/kernelwarehouse/resnet50/mask_rcnn_kw4x_resnet50_adamw_1x_coco.py) | [model](https://drive.google.com/file/d/1GUDEr2jNT0Il8A04g_f8sRQ1WFAycOO8/view?usp=sharing) | [model](https://pan.baidu.com/s/1ZSJkfVy8xr5IB_OfubXzRw?pwd=xig5) |
31 | | MobileNetV2 (1.0×) | Mask R-CNN |  33.8  |  31.7   | [config](configs/kernelwarehouse/mobilenetv2/mask_rcnn_mobilenetv2_100_adamw_1x_coco.py) | [model](https://drive.google.com/file/d/1UJifIqx56cOOm2hx-D3DSHh4MWgFzOVB/view?usp=sharing) | [model](https://pan.baidu.com/s/1S7vo59mzEVL_8ai9Sg1iUQ?pwd=4sh8) |
32 | | + KW (1×)      | Mask R-CNN |  36.4  |  33.7   | [config](configs/kernelwarehouse/mobilenetv2/mask_rcnn_kw1x_mobilenetv2_100_adamw_1x_coco.py) |  [model](https://drive.google.com/file/d/1wdzs-Ry6LefgG4Nc9RWUlrDrsyGOWhL5/view?usp=sharing) | [model](https://pan.baidu.com/s/1q3U4Euw2qNCWXipPCn4vtQ?pwd=8g38) |
33 | | + KW (4×)      | Mask R-CNN |  38.0  |  34.9   | [config](configs/kernelwarehouse/mobilenetv2/mask_rcnn_kw4x_mobilenetv2_100_adamw_1x_coco.py) | [model](https://drive.google.com/file/d/14nfWpHUHgH0mA4gbEPX3F_3UqOXPIGK7/view?usp=sharing) | [model](https://pan.baidu.com/s/1HidKe3MgnIEERvvKgdYMHg?pwd=n5uu) |
34 | | ConvNeXt-Tiny      | Mask R-CNN |  43.4  |  39.7   | [config](configs/kernelwarehouse/convnext_tiny/mask_rcnn_convnext_tiny_adamw_1x_coco.py) | [model](https://drive.google.com/file/d/1xarty4OTZOKGo1ltAUcTJCoKHCIOipC6/view?usp=sharing) | [model](https://pan.baidu.com/s/1bouC_aK9C1czPrIYkkS3Ug?pwd=79f4) |
35 | | + KW (4×)      | Mask R-CNN |  44.7  |  40.6   | [config](configs/kernelwarehouse/convnext_tiny/mask_rcnn_kw1x_convnext_tiny_adamw_1x_coco.py) | [model](https://drive.google.com/file/d/1simtPisVzZo__iSXZwrynWi6TlUwPG3b/view?usp=sharing) | [model](https://pan.baidu.com/s/1iBD4lCrvSTX0Wu7e2I0BKg?pwd=am2w) |


--------------------------------------------------------------------------------
/detection/configs/_base_/models/mask_rcnn_convnext_fpn.py:
--------------------------------------------------------------------------------
  1 | model = dict(
  2 |     type='MaskRCNN',
  3 |     backbone=dict(
  4 |         type='ConvNeXt',
  5 |         in_chans=3,
  6 |         depths=[3, 3, 9, 3],
  7 |         dims=[96, 192, 384, 768],
  8 |         drop_path_rate=0.2,
  9 |         layer_scale_init_value=1e-6,
 10 |         out_indices=[0, 1, 2, 3],
 11 |     ),
 12 |     neck=dict(
 13 |         type='FPN',
 14 |         in_channels=[256, 512, 1024, 2048],
 15 |         out_channels=256,
 16 |         num_outs=5),
 17 |     rpn_head=dict(
 18 |         type='RPNHead',
 19 |         in_channels=256,
 20 |         feat_channels=256,
 21 |         anchor_generator=dict(
 22 |             type='AnchorGenerator',
 23 |             scales=[8],
 24 |             ratios=[0.5, 1.0, 2.0],
 25 |             strides=[4, 8, 16, 32, 64]),
 26 |         bbox_coder=dict(
 27 |             type='DeltaXYWHBBoxCoder',
 28 |             target_means=[.0, .0, .0, .0],
 29 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 30 |         loss_cls=dict(
 31 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 32 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 33 |     roi_head=dict(
 34 |         type='StandardRoIHead',
 35 |         bbox_roi_extractor=dict(
 36 |             type='SingleRoIExtractor',
 37 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
 38 |             out_channels=256,
 39 |             featmap_strides=[4, 8, 16, 32]),
 40 |         bbox_head=dict(
 41 |             type='Shared2FCBBoxHead',
 42 |             in_channels=256,
 43 |             fc_out_channels=1024,
 44 |             roi_feat_size=7,
 45 |             num_classes=80,
 46 |             bbox_coder=dict(
 47 |                 type='DeltaXYWHBBoxCoder',
 48 |                 target_means=[0., 0., 0., 0.],
 49 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
 50 |             reg_class_agnostic=False,
 51 |             loss_cls=dict(
 52 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
 53 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 54 |         mask_roi_extractor=dict(
 55 |             type='SingleRoIExtractor',
 56 |             roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
 57 |             out_channels=256,
 58 |             featmap_strides=[4, 8, 16, 32]),
 59 |         mask_head=dict(
 60 |             type='FCNMaskHead',
 61 |             num_convs=4,
 62 |             in_channels=256,
 63 |             conv_out_channels=256,
 64 |             num_classes=80,
 65 |             loss_mask=dict(
 66 |                 type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
 67 |     # model training and testing settings
 68 |     train_cfg=dict(
 69 |         rpn=dict(
 70 |             assigner=dict(
 71 |                 type='MaxIoUAssigner',
 72 |                 pos_iou_thr=0.7,
 73 |                 neg_iou_thr=0.3,
 74 |                 min_pos_iou=0.3,
 75 |                 match_low_quality=True,
 76 |                 ignore_iof_thr=-1),
 77 |             sampler=dict(
 78 |                 type='RandomSampler',
 79 |                 num=256,
 80 |                 pos_fraction=0.5,
 81 |                 neg_pos_ub=-1,
 82 |                 add_gt_as_proposals=False),
 83 |             allowed_border=-1,
 84 |             pos_weight=-1,
 85 |             debug=False),
 86 |         rpn_proposal=dict(
 87 |             nms_pre=2000,
 88 |             max_per_img=1000,
 89 |             nms=dict(type='nms', iou_threshold=0.7),
 90 |             min_bbox_size=0),
 91 |         rcnn=dict(
 92 |             assigner=dict(
 93 |                 type='MaxIoUAssigner',
 94 |                 pos_iou_thr=0.5,
 95 |                 neg_iou_thr=0.5,
 96 |                 min_pos_iou=0.5,
 97 |                 match_low_quality=True,
 98 |                 ignore_iof_thr=-1),
 99 |             sampler=dict(
100 |                 type='RandomSampler',
101 |                 num=512,
102 |                 pos_fraction=0.25,
103 |                 neg_pos_ub=-1,
104 |                 add_gt_as_proposals=True),
105 |             mask_size=28,
106 |             pos_weight=-1,
107 |             debug=False)),
108 |     test_cfg=dict(
109 |         rpn=dict(
110 |             nms_pre=1000,
111 |             max_per_img=1000,
112 |             nms=dict(type='nms', iou_threshold=0.7),
113 |             min_bbox_size=0),
114 |         rcnn=dict(
115 |             score_thr=0.05,
116 |             nms=dict(type='nms', iou_threshold=0.5),
117 |             max_per_img=100,
118 |             mask_thr_binary=0.5)))


--------------------------------------------------------------------------------
/detection/configs/_base_/models/mask_rcnn_mobilenetv2_fpn.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='MaskRCNN',
  4 |     backbone=dict(
  5 |         type='MobileNetV2',
  6 |         width_mult=1.0,
  7 |         norm_eval=True,
  8 |         out_indices=(1, 2, 3, 4),
  9 |         frozen_stages=1),
 10 |     neck=dict(
 11 |         type='FPN',
 12 |         in_channels=[24, 32, 96, 1280],
 13 |         out_channels=256,
 14 |         num_outs=5),
 15 |     rpn_head=dict(
 16 |         type='RPNHead',
 17 |         in_channels=256,
 18 |         feat_channels=256,
 19 |         anchor_generator=dict(
 20 |             type='AnchorGenerator',
 21 |             scales=[8],
 22 |             ratios=[0.5, 1.0, 2.0],
 23 |             strides=[4, 8, 16, 32, 64]),
 24 |         bbox_coder=dict(
 25 |             type='DeltaXYWHBBoxCoder',
 26 |             target_means=[.0, .0, .0, .0],
 27 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 28 |         loss_cls=dict(
 29 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 30 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 31 |     roi_head=dict(
 32 |         type='StandardRoIHead',
 33 |         bbox_roi_extractor=dict(
 34 |             type='SingleRoIExtractor',
 35 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
 36 |             out_channels=256,
 37 |             featmap_strides=[4, 8, 16, 32]),
 38 |         bbox_head=dict(
 39 |             type='Shared2FCBBoxHead',
 40 |             in_channels=256,
 41 |             fc_out_channels=1024,
 42 |             roi_feat_size=7,
 43 |             num_classes=80,
 44 |             bbox_coder=dict(
 45 |                 type='DeltaXYWHBBoxCoder',
 46 |                 target_means=[0., 0., 0., 0.],
 47 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
 48 |             reg_class_agnostic=False,
 49 |             loss_cls=dict(
 50 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
 51 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 52 |         mask_roi_extractor=dict(
 53 |             type='SingleRoIExtractor',
 54 |             roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
 55 |             out_channels=256,
 56 |             featmap_strides=[4, 8, 16, 32]),
 57 |         mask_head=dict(
 58 |             type='FCNMaskHead',
 59 |             num_convs=4,
 60 |             in_channels=256,
 61 |             conv_out_channels=256,
 62 |             num_classes=80,
 63 |             loss_mask=dict(
 64 |                 type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
 65 |     # model training and testing settings
 66 |     train_cfg=dict(
 67 |         rpn=dict(
 68 |             assigner=dict(
 69 |                 type='MaxIoUAssigner',
 70 |                 pos_iou_thr=0.7,
 71 |                 neg_iou_thr=0.3,
 72 |                 min_pos_iou=0.3,
 73 |                 match_low_quality=True,
 74 |                 ignore_iof_thr=-1),
 75 |             sampler=dict(
 76 |                 type='RandomSampler',
 77 |                 num=256,
 78 |                 pos_fraction=0.5,
 79 |                 neg_pos_ub=-1,
 80 |                 add_gt_as_proposals=False),
 81 |             allowed_border=-1,
 82 |             pos_weight=-1,
 83 |             debug=False),
 84 |         rpn_proposal=dict(
 85 |             nms_pre=2000,
 86 |             max_per_img=1000,
 87 |             nms=dict(type='nms', iou_threshold=0.7),
 88 |             min_bbox_size=0),
 89 |         rcnn=dict(
 90 |             assigner=dict(
 91 |                 type='MaxIoUAssigner',
 92 |                 pos_iou_thr=0.5,
 93 |                 neg_iou_thr=0.5,
 94 |                 min_pos_iou=0.5,
 95 |                 match_low_quality=True,
 96 |                 ignore_iof_thr=-1),
 97 |             sampler=dict(
 98 |                 type='RandomSampler',
 99 |                 num=512,
100 |                 pos_fraction=0.25,
101 |                 neg_pos_ub=-1,
102 |                 add_gt_as_proposals=True),
103 |             mask_size=28,
104 |             pos_weight=-1,
105 |             debug=False)),
106 |     test_cfg=dict(
107 |         rpn=dict(
108 |             nms_pre=1000,
109 |             max_per_img=1000,
110 |             nms=dict(type='nms', iou_threshold=0.7),
111 |             min_bbox_size=0),
112 |         rcnn=dict(
113 |             score_thr=0.05,
114 |             nms=dict(type='nms', iou_threshold=0.5),
115 |             max_per_img=100,
116 |             mask_thr_binary=0.5)))
117 | 


--------------------------------------------------------------------------------
/detection/configs/_base_/models/mask_rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
  1 | model = dict(
  2 |     type='MaskRCNN',
  3 |     backbone=dict(
  4 |         type='ResNet',
  5 |         depth=50,
  6 |         num_stages=4,
  7 |         out_indices=(0, 1, 2, 3),
  8 |         frozen_stages=1,
  9 |         norm_eval=True,),
 10 |     neck=dict(
 11 |         type='FPN',
 12 |         in_channels=[256, 512, 1024, 2048],
 13 |         out_channels=256,
 14 |         num_outs=5),
 15 |     rpn_head=dict(
 16 |         type='RPNHead',
 17 |         in_channels=256,
 18 |         feat_channels=256,
 19 |         anchor_generator=dict(
 20 |             type='AnchorGenerator',
 21 |             scales=[8],
 22 |             ratios=[0.5, 1.0, 2.0],
 23 |             strides=[4, 8, 16, 32, 64]),
 24 |         bbox_coder=dict(
 25 |             type='DeltaXYWHBBoxCoder',
 26 |             target_means=[.0, .0, .0, .0],
 27 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 28 |         loss_cls=dict(
 29 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 30 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 31 |     roi_head=dict(
 32 |         type='StandardRoIHead',
 33 |         bbox_roi_extractor=dict(
 34 |             type='SingleRoIExtractor',
 35 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
 36 |             out_channels=256,
 37 |             featmap_strides=[4, 8, 16, 32]),
 38 |         bbox_head=dict(
 39 |             type='Shared2FCBBoxHead',
 40 |             in_channels=256,
 41 |             fc_out_channels=1024,
 42 |             roi_feat_size=7,
 43 |             num_classes=80,
 44 |             bbox_coder=dict(
 45 |                 type='DeltaXYWHBBoxCoder',
 46 |                 target_means=[0., 0., 0., 0.],
 47 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
 48 |             reg_class_agnostic=False,
 49 |             loss_cls=dict(
 50 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
 51 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 52 |         mask_roi_extractor=dict(
 53 |             type='SingleRoIExtractor',
 54 |             roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
 55 |             out_channels=256,
 56 |             featmap_strides=[4, 8, 16, 32]),
 57 |         mask_head=dict(
 58 |             type='FCNMaskHead',
 59 |             num_convs=4,
 60 |             in_channels=256,
 61 |             conv_out_channels=256,
 62 |             num_classes=80,
 63 |             loss_mask=dict(
 64 |                 type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
 65 |     # model training and testing settings
 66 |     train_cfg=dict(
 67 |         rpn=dict(
 68 |             assigner=dict(
 69 |                 type='MaxIoUAssigner',
 70 |                 pos_iou_thr=0.7,
 71 |                 neg_iou_thr=0.3,
 72 |                 min_pos_iou=0.3,
 73 |                 match_low_quality=True,
 74 |                 ignore_iof_thr=-1),
 75 |             sampler=dict(
 76 |                 type='RandomSampler',
 77 |                 num=256,
 78 |                 pos_fraction=0.5,
 79 |                 neg_pos_ub=-1,
 80 |                 add_gt_as_proposals=False),
 81 |             allowed_border=-1,
 82 |             pos_weight=-1,
 83 |             debug=False),
 84 |         rpn_proposal=dict(
 85 |             nms_pre=2000,
 86 |             max_per_img=1000,
 87 |             nms=dict(type='nms', iou_threshold=0.7),
 88 |             min_bbox_size=0),
 89 |         rcnn=dict(
 90 |             assigner=dict(
 91 |                 type='MaxIoUAssigner',
 92 |                 pos_iou_thr=0.5,
 93 |                 neg_iou_thr=0.5,
 94 |                 min_pos_iou=0.5,
 95 |                 match_low_quality=True,
 96 |                 ignore_iof_thr=-1),
 97 |             sampler=dict(
 98 |                 type='RandomSampler',
 99 |                 num=512,
100 |                 pos_fraction=0.25,
101 |                 neg_pos_ub=-1,
102 |                 add_gt_as_proposals=True),
103 |             mask_size=28,
104 |             pos_weight=-1,
105 |             debug=False)),
106 |     test_cfg=dict(
107 |         rpn=dict(
108 |             nms_pre=1000,
109 |             max_per_img=1000,
110 |             nms=dict(type='nms', iou_threshold=0.7),
111 |             min_bbox_size=0),
112 |         rcnn=dict(
113 |             score_thr=0.05,
114 |             nms=dict(type='nms', iou_threshold=0.5),
115 |             max_per_img=100,
116 |             mask_thr_binary=0.5)))
117 | 


--------------------------------------------------------------------------------
/detection/configs/kernelwarehouse/convnext_tiny/mask_rcnn_convnext_tiny_adamw_1x_coco.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../../_base_/models/mask_rcnn_convnext_fpn.py',
 3 |     '../../_base_/datasets/coco_instance.py',
 4 |     '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py'
 5 | ]
 6 | 
 7 | model = dict(
 8 |     pretrained='the path to pre-trained model',
 9 |     backbone=dict(
10 |         type='ConvNeXt',
11 |         in_chans=3,
12 |         depths=[3, 3, 9, 3],
13 |         dims=[96, 192, 384, 768], 
14 |         drop_path_rate=0.4,
15 |         layer_scale_init_value=1.0,
16 |         out_indices=[0, 1, 2, 3],
17 |     ),
18 |     neck=dict(in_channels=[96, 192, 384, 768]))
19 | 
20 | img_norm_cfg = dict(
21 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
22 | 
23 | # augmentation strategy originates from DETR / Sparse RCNN
24 | train_pipeline = [
25 |     dict(type='LoadImageFromFile'),
26 |     dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
27 |     dict(type='RandomFlip', flip_ratio=0.5),
28 |     dict(type='AutoAugment',
29 |          policies=[
30 |              [
31 |                  dict(type='Resize',
32 |                       img_scale=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
33 |                                  (608, 1333), (640, 1333), (672, 1333), (704, 1333),
34 |                                  (736, 1333), (768, 1333), (800, 1333)],
35 |                       multiscale_mode='value',
36 |                       keep_ratio=True)
37 |              ],
38 |              [
39 |                  dict(type='Resize',
40 |                       img_scale=[(400, 1333), (500, 1333), (600, 1333)],
41 |                       multiscale_mode='value',
42 |                       keep_ratio=True),
43 |                  dict(type='RandomCrop',
44 |                       crop_type='absolute_range',
45 |                       crop_size=(384, 600),
46 |                       allow_negative_crop=True),
47 |                  dict(type='Resize',
48 |                       img_scale=[(480, 1333), (512, 1333), (544, 1333),
49 |                                  (576, 1333), (608, 1333), (640, 1333),
50 |                                  (672, 1333), (704, 1333), (736, 1333),
51 |                                  (768, 1333), (800, 1333)],
52 |                       multiscale_mode='value',
53 |                       override=True,
54 |                       keep_ratio=True)
55 |              ]
56 |          ]),
57 |     dict(type='Normalize', **img_norm_cfg),
58 |     dict(type='Pad', size_divisor=32),
59 |     dict(type='DefaultFormatBundle'),
60 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
61 | ]
62 | data = dict(train=dict(pipeline=train_pipeline))
63 | 
64 | optimizer = dict(constructor='LearningRateDecayOptimizerConstructor', _delete_=True, type='AdamW',
65 |                  lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05,
66 |                  paramwise_cfg={'decay_rate': 0.95,
67 |                                 'decay_type': 'layer_wise',
68 |                                 'num_layers': 6})
69 | 
70 | lr_config = dict(step=[9, 11])
71 | runner = dict(type='EpochBasedRunner', max_epochs=12)


--------------------------------------------------------------------------------
/detection/configs/kernelwarehouse/convnext_tiny/mask_rcnn_kw1x_convnext_tiny_adamw_1x_coco.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../../_base_/models/mask_rcnn_convnext_fpn.py',
 3 |     '../../_base_/datasets/coco_instance.py',
 4 |     '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py'
 5 | ]
 6 | 
 7 | model = dict(
 8 |     pretrained='the path to pre-trained model',
 9 |     backbone=dict(
10 |         type='KW_ConvNeXt',
11 |         in_chans=3,
12 |         depths=[3, 3, 9, 3], 
13 |         dims=[96, 192, 384, 768], 
14 |         drop_path_rate=0.4,
15 |         layer_scale_init_value=1.0,
16 |         out_indices=[0, 1, 2, 3],
17 |     ),
18 |     neck=dict(in_channels=[96, 192, 384, 768]))
19 | 
20 | img_norm_cfg = dict(
21 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
22 | 
23 | # augmentation strategy originates from DETR / Sparse RCNN
24 | train_pipeline = [
25 |     dict(type='LoadImageFromFile'),
26 |     dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
27 |     dict(type='RandomFlip', flip_ratio=0.5),
28 |     dict(type='AutoAugment',
29 |          policies=[
30 |              [
31 |                  dict(type='Resize',
32 |                       img_scale=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
33 |                                  (608, 1333), (640, 1333), (672, 1333), (704, 1333),
34 |                                  (736, 1333), (768, 1333), (800, 1333)],
35 |                       multiscale_mode='value',
36 |                       keep_ratio=True)
37 |              ],
38 |              [
39 |                  dict(type='Resize',
40 |                       img_scale=[(400, 1333), (500, 1333), (600, 1333)],
41 |                       multiscale_mode='value',
42 |                       keep_ratio=True),
43 |                  dict(type='RandomCrop',
44 |                       crop_type='absolute_range',
45 |                       crop_size=(384, 600),
46 |                       allow_negative_crop=True),
47 |                  dict(type='Resize',
48 |                       img_scale=[(480, 1333), (512, 1333), (544, 1333),
49 |                                  (576, 1333), (608, 1333), (640, 1333),
50 |                                  (672, 1333), (704, 1333), (736, 1333),
51 |                                  (768, 1333), (800, 1333)],
52 |                       multiscale_mode='value',
53 |                       override=True,
54 |                       keep_ratio=True)
55 |              ]
56 |          ]),
57 |     dict(type='Normalize', **img_norm_cfg),
58 |     dict(type='Pad', size_divisor=32),
59 |     dict(type='DefaultFormatBundle'),
60 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
61 | ]
62 | data = dict(train=dict(pipeline=train_pipeline))
63 | 
64 | optimizer = dict(constructor='LearningRateDecayOptimizerConstructor', _delete_=True, type='AdamW', 
65 |                  lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05,
66 |                  paramwise_cfg={'decay_rate': 0.95,
67 |                                 'decay_type': 'layer_wise',
68 |                                 'num_layers': 6})
69 | 
70 | lr_config = dict(step=[9, 11])
71 | runner = dict(type='EpochBasedRunner', max_epochs=12)
72 | 


--------------------------------------------------------------------------------
/detection/configs/kernelwarehouse/mobilenetv2/mask_rcnn_kw1x_mobilenetv2_100_adamw_1x_coco.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../../_base_/models/mask_rcnn_mobilenetv2_fpn.py',
 3 |     '../../_base_/datasets/coco_instance.py',
 4 |     '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py'
 5 | ]
 6 | 
 7 | model = dict(
 8 |     pretrained='the path to pre-trained model',
 9 |     backbone=dict(
10 |         type='KW_MobileNetV2',
11 |         width_mult=1.0,
12 |         norm_eval=True,
13 |         out_indices=(1, 2, 3, 4),
14 |         frozen_stages=1,
15 |         cell_num_ratio=1,
16 |         sharing_range=('layer', 'pwconv')
17 |     ),
18 |     neck=dict(
19 |         type='FPN',
20 |         in_channels=[24, 32, 96, 1280],
21 |         out_channels=256,
22 |         num_outs=5
23 |     ),
24 | )
25 | 
26 | img_norm_cfg = dict(
27 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
28 | 
29 | # augmentation strategy originates from DETR / Sparse RCNN
30 | train_pipeline = [
31 |     dict(type='LoadImageFromFile'),
32 |     dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
33 |     dict(type='RandomFlip', flip_ratio=0.5),
34 |     dict(type='AutoAugment',
35 |          policies=[
36 |              [
37 |                  dict(type='Resize',
38 |                       img_scale=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
39 |                                  (608, 1333), (640, 1333), (672, 1333), (704, 1333),
40 |                                  (736, 1333), (768, 1333), (800, 1333)],
41 |                       multiscale_mode='value',
42 |                       keep_ratio=True)
43 |              ],
44 |              [
45 |                  dict(type='Resize',
46 |                       img_scale=[(400, 1333), (500, 1333), (600, 1333)],
47 |                       multiscale_mode='value',
48 |                       keep_ratio=True),
49 |                  dict(type='RandomCrop',
50 |                       crop_type='absolute_range',
51 |                       crop_size=(384, 600),
52 |                       allow_negative_crop=True),
53 |                  dict(type='Resize',
54 |                       img_scale=[(480, 1333), (512, 1333), (544, 1333),
55 |                                  (576, 1333), (608, 1333), (640, 1333),
56 |                                  (672, 1333), (704, 1333), (736, 1333),
57 |                                  (768, 1333), (800, 1333)],
58 |                       multiscale_mode='value',
59 |                       override=True,
60 |                       keep_ratio=True)
61 |              ]
62 |          ]),
63 |     dict(type='Normalize', **img_norm_cfg),
64 |     dict(type='Pad', size_divisor=32),
65 |     dict(type='DefaultFormatBundle'),
66 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
67 | ]
68 | data = dict(train=dict(pipeline=train_pipeline))
69 | 
70 | optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05,
71 |                  paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.),
72 |                                                  'relative_position_bias_table': dict(decay_mult=0.),
73 |                                                  'norm': dict(decay_mult=0.)}))
74 | 
75 | lr_config = dict(step=[8, 11])
76 | runner = dict(type='EpochBasedRunner', max_epochs=12)
77 | find_unused_parameters = True
78 | 
79 | 
80 | 


--------------------------------------------------------------------------------
/detection/configs/kernelwarehouse/mobilenetv2/mask_rcnn_kw4x_mobilenetv2_100_adamw_1x_coco.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../../_base_/models/mask_rcnn_mobilenetv2_fpn.py',
 3 |     '../../_base_/datasets/coco_instance.py',
 4 |     '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py'
 5 | ]
 6 | 
 7 | model = dict(
 8 |     pretrained='the path to pre-trained model',
 9 |     backbone=dict(
10 |         type='KW_MobileNetV2',
11 |         width_mult=1.0,
12 |         norm_eval=True,
13 |         out_indices=(1, 2, 3, 4),
14 |         frozen_stages=1,
15 |         cell_num_ratio=4,
16 |         sharing_range=('layer')
17 |     ),
18 |     neck=dict(
19 |         type='FPN',
20 |         in_channels=[24, 32, 96, 1280],
21 |         out_channels=256,
22 |         num_outs=5
23 |     ),
24 | )
25 | 
26 | img_norm_cfg = dict(
27 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
28 | 
29 | # augmentation strategy originates from DETR / Sparse RCNN
30 | train_pipeline = [
31 |     dict(type='LoadImageFromFile'),
32 |     dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
33 |     dict(type='RandomFlip', flip_ratio=0.5),
34 |     dict(type='AutoAugment',
35 |          policies=[
36 |              [
37 |                  dict(type='Resize',
38 |                       img_scale=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
39 |                                  (608, 1333), (640, 1333), (672, 1333), (704, 1333),
40 |                                  (736, 1333), (768, 1333), (800, 1333)],
41 |                       multiscale_mode='value',
42 |                       keep_ratio=True)
43 |              ],
44 |              [
45 |                  dict(type='Resize',
46 |                       img_scale=[(400, 1333), (500, 1333), (600, 1333)],
47 |                       multiscale_mode='value',
48 |                       keep_ratio=True),
49 |                  dict(type='RandomCrop',
50 |                       crop_type='absolute_range',
51 |                       crop_size=(384, 600),
52 |                       allow_negative_crop=True),
53 |                  dict(type='Resize',
54 |                       img_scale=[(480, 1333), (512, 1333), (544, 1333),
55 |                                  (576, 1333), (608, 1333), (640, 1333),
56 |                                  (672, 1333), (704, 1333), (736, 1333),
57 |                                  (768, 1333), (800, 1333)],
58 |                       multiscale_mode='value',
59 |                       override=True,
60 |                       keep_ratio=True)
61 |              ]
62 |          ]),
63 |     dict(type='Normalize', **img_norm_cfg),
64 |     dict(type='Pad', size_divisor=32),
65 |     dict(type='DefaultFormatBundle'),
66 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
67 | ]
68 | data = dict(train=dict(pipeline=train_pipeline))
69 | 
70 | optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05,
71 |                  paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.),
72 |                                                  'relative_position_bias_table': dict(decay_mult=0.),
73 |                                                  'norm': dict(decay_mult=0.)}))
74 | 
75 | lr_config = dict(step=[8, 11])
76 | runner = dict(type='EpochBasedRunner', max_epochs=12)
77 | find_unused_parameters = True
78 | 


--------------------------------------------------------------------------------
/detection/configs/kernelwarehouse/mobilenetv2/mask_rcnn_mobilenetv2_100_adamw_1x_coco.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../../_base_/models/mask_rcnn_mobilenetv2_fpn.py',
 3 |     '../../_base_/datasets/coco_instance.py',
 4 |     '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py'
 5 | ]
 6 | 
 7 | model = dict(
 8 |     pretrained='the path to pre-trained model',
 9 |     backbone=dict(
10 |         type='MobileNetV2',
11 |         width_mult=1.0,
12 |         norm_eval=True,
13 |         out_indices=(1, 2, 3, 4),
14 |         frozen_stages=1,
15 |     ),
16 |     neck=dict(
17 |         type='FPN',
18 |         in_channels=[24, 32, 96, 1280],
19 |         out_channels=256,
20 |         num_outs=5
21 |     ),
22 | )
23 | 
24 | img_norm_cfg = dict(
25 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
26 | 
27 | # augmentation strategy originates from DETR / Sparse RCNN
28 | train_pipeline = [
29 |     dict(type='LoadImageFromFile'),
30 |     dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
31 |     dict(type='RandomFlip', flip_ratio=0.5),
32 |     dict(type='AutoAugment',
33 |          policies=[
34 |              [
35 |                  dict(type='Resize',
36 |                       img_scale=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
37 |                                  (608, 1333), (640, 1333), (672, 1333), (704, 1333),
38 |                                  (736, 1333), (768, 1333), (800, 1333)],
39 |                       multiscale_mode='value',
40 |                       keep_ratio=True)
41 |              ],
42 |              [
43 |                  dict(type='Resize',
44 |                       img_scale=[(400, 1333), (500, 1333), (600, 1333)],
45 |                       multiscale_mode='value',
46 |                       keep_ratio=True),
47 |                  dict(type='RandomCrop',
48 |                       crop_type='absolute_range',
49 |                       crop_size=(384, 600),
50 |                       allow_negative_crop=True),
51 |                  dict(type='Resize',
52 |                       img_scale=[(480, 1333), (512, 1333), (544, 1333),
53 |                                  (576, 1333), (608, 1333), (640, 1333),
54 |                                  (672, 1333), (704, 1333), (736, 1333),
55 |                                  (768, 1333), (800, 1333)],
56 |                       multiscale_mode='value',
57 |                       override=True,
58 |                       keep_ratio=True)
59 |              ]
60 |          ]),
61 |     dict(type='Normalize', **img_norm_cfg),
62 |     dict(type='Pad', size_divisor=32),
63 |     dict(type='DefaultFormatBundle'),
64 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
65 | ]
66 | data = dict(train=dict(pipeline=train_pipeline))
67 | 
68 | optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05,
69 |                  paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.),
70 |                                                  'relative_position_bias_table': dict(decay_mult=0.),
71 |                                                  'norm': dict(decay_mult=0.)}))
72 | 
73 | lr_config = dict(step=[8, 11])
74 | runner = dict(type='EpochBasedRunner', max_epochs=12)
75 | find_unused_parameters = True
76 | 


--------------------------------------------------------------------------------
/detection/configs/kernelwarehouse/resnet50/mask_rcnn_kw1x_resnet50_adamw_1x_coco.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../../_base_/models/mask_rcnn_r50_fpn.py',
 3 |     '../../_base_/datasets/coco_instance.py',
 4 |     '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py'
 5 | ]
 6 | 
 7 | model = dict(
 8 |     pretrained='the path to pre-trained model',
 9 |     backbone=dict(
10 |         type='KW_ResNet',
11 |         depth=50,
12 |         norm_eval=True,
13 |         out_indices=(0, 1, 2, 3),
14 |         frozen_stages=1,
15 |         cell_num_ratio=1,
16 |     ),
17 |     neck=dict(
18 |         type='FPN',
19 |         in_channels=[256, 512, 1024, 2048],
20 |         out_channels=256,
21 |         num_outs=5
22 |     ),
23 | )
24 | 
25 | img_norm_cfg = dict(
26 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
27 | 
28 | # augmentation strategy originates from DETR / Sparse RCNN
29 | train_pipeline = [
30 |     dict(type='LoadImageFromFile'),
31 |     dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
32 |     dict(type='RandomFlip', flip_ratio=0.5),
33 |     dict(type='AutoAugment',
34 |          policies=[
35 |              [
36 |                  dict(type='Resize',
37 |                       img_scale=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
38 |                                  (608, 1333), (640, 1333), (672, 1333), (704, 1333),
39 |                                  (736, 1333), (768, 1333), (800, 1333)],
40 |                       multiscale_mode='value',
41 |                       keep_ratio=True)
42 |              ],
43 |              [
44 |                  dict(type='Resize',
45 |                       img_scale=[(400, 1333), (500, 1333), (600, 1333)],
46 |                       multiscale_mode='value',
47 |                       keep_ratio=True),
48 |                  dict(type='RandomCrop',
49 |                       crop_type='absolute_range',
50 |                       crop_size=(384, 600),
51 |                       allow_negative_crop=True),
52 |                  dict(type='Resize',
53 |                       img_scale=[(480, 1333), (512, 1333), (544, 1333),
54 |                                  (576, 1333), (608, 1333), (640, 1333),
55 |                                  (672, 1333), (704, 1333), (736, 1333),
56 |                                  (768, 1333), (800, 1333)],
57 |                       multiscale_mode='value',
58 |                       override=True,
59 |                       keep_ratio=True)
60 |              ]
61 |          ]),
62 |     dict(type='Normalize', **img_norm_cfg),
63 |     dict(type='Pad', size_divisor=32),
64 |     dict(type='DefaultFormatBundle'),
65 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
66 | ]
67 | data = dict(train=dict(pipeline=train_pipeline))
68 | 
69 | optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05,
70 |                  paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.),
71 |                                                  'relative_position_bias_table': dict(decay_mult=0.),
72 |                                                  'norm': dict(decay_mult=0.)}))
73 | 
74 | lr_config = dict(step=[8, 11])
75 | runner = dict(type='EpochBasedRunner', max_epochs=12)
76 | find_unused_parameters = True
77 | 
78 | 
79 | 


--------------------------------------------------------------------------------
/detection/configs/kernelwarehouse/resnet50/mask_rcnn_kw4x_resnet50_adamw_1x_coco.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../../_base_/models/mask_rcnn_r50_fpn.py',
 3 |     '../../_base_/datasets/coco_instance.py',
 4 |     '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py'
 5 | ]
 6 | 
 7 | model = dict(
 8 |     pretrained='the path to pre-trained model',
 9 |     backbone=dict(
10 |         type='KW_ResNet',
11 |         depth=50,
12 |         norm_eval=True,
13 |         out_indices=(0, 1, 2, 3),
14 |         frozen_stages=1,
15 |         cell_num_ratio=4,
16 |     ),
17 |     neck=dict(
18 |         type='FPN',
19 |         in_channels=[256, 512, 1024, 2048],
20 |         out_channels=256,
21 |         num_outs=5
22 |     ),
23 | )
24 | 
25 | img_norm_cfg = dict(
26 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
27 | 
28 | # augmentation strategy originates from DETR / Sparse RCNN
29 | train_pipeline = [
30 |     dict(type='LoadImageFromFile'),
31 |     dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
32 |     dict(type='RandomFlip', flip_ratio=0.5),
33 |     dict(type='AutoAugment',
34 |          policies=[
35 |              [
36 |                  dict(type='Resize',
37 |                       img_scale=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
38 |                                  (608, 1333), (640, 1333), (672, 1333), (704, 1333),
39 |                                  (736, 1333), (768, 1333), (800, 1333)],
40 |                       multiscale_mode='value',
41 |                       keep_ratio=True)
42 |              ],
43 |              [
44 |                  dict(type='Resize',
45 |                       img_scale=[(400, 1333), (500, 1333), (600, 1333)],
46 |                       multiscale_mode='value',
47 |                       keep_ratio=True),
48 |                  dict(type='RandomCrop',
49 |                       crop_type='absolute_range',
50 |                       crop_size=(384, 600),
51 |                       allow_negative_crop=True),
52 |                  dict(type='Resize',
53 |                       img_scale=[(480, 1333), (512, 1333), (544, 1333),
54 |                                  (576, 1333), (608, 1333), (640, 1333),
55 |                                  (672, 1333), (704, 1333), (736, 1333),
56 |                                  (768, 1333), (800, 1333)],
57 |                       multiscale_mode='value',
58 |                       override=True,
59 |                       keep_ratio=True)
60 |              ]
61 |          ]),
62 |     dict(type='Normalize', **img_norm_cfg),
63 |     dict(type='Pad', size_divisor=32),
64 |     dict(type='DefaultFormatBundle'),
65 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
66 | ]
67 | data = dict(train=dict(pipeline=train_pipeline))
68 | 
69 | optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05,
70 |                  paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.),
71 |                                                  'relative_position_bias_table': dict(decay_mult=0.),
72 |                                                  'norm': dict(decay_mult=0.)}))
73 | 
74 | lr_config = dict(step=[8, 11])
75 | runner = dict(type='EpochBasedRunner', max_epochs=12)
76 | find_unused_parameters = True
77 | 


--------------------------------------------------------------------------------
/detection/configs/kernelwarehouse/resnet50/mask_rcnn_resnet50_adamw_1x_coco.py:
--------------------------------------------------------------------------------
 1 | _base_ = [
 2 |     '../../_base_/models/mask_rcnn_r50_fpn.py',
 3 |     '../../_base_/datasets/coco_instance.py',
 4 |     '../../_base_/schedules/schedule_1x.py', '../../_base_/default_runtime.py'
 5 | ]
 6 | 
 7 | model = dict(
 8 |     pretrained='the path to pre-trained model',
 9 |     backbone=dict(
10 |         type='ResNet',
11 |         depth=50,
12 |         norm_eval=True,
13 |         out_indices=(0, 1, 2, 3),
14 |         frozen_stages=1,
15 |     ),
16 |     neck=dict(
17 |         type='FPN',
18 |         in_channels=[256, 512, 1024, 2048],
19 |         out_channels=256,
20 |         num_outs=5
21 |     ),
22 | )
23 | 
24 | img_norm_cfg = dict(
25 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
26 | 
27 | # augmentation strategy originates from DETR / Sparse RCNN
28 | train_pipeline = [
29 |     dict(type='LoadImageFromFile'),
30 |     dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
31 |     dict(type='RandomFlip', flip_ratio=0.5),
32 |     dict(type='AutoAugment',
33 |          policies=[
34 |              [
35 |                  dict(type='Resize',
36 |                       img_scale=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),
37 |                                  (608, 1333), (640, 1333), (672, 1333), (704, 1333),
38 |                                  (736, 1333), (768, 1333), (800, 1333)],
39 |                       multiscale_mode='value',
40 |                       keep_ratio=True)
41 |              ],
42 |              [
43 |                  dict(type='Resize',
44 |                       img_scale=[(400, 1333), (500, 1333), (600, 1333)],
45 |                       multiscale_mode='value',
46 |                       keep_ratio=True),
47 |                  dict(type='RandomCrop',
48 |                       crop_type='absolute_range',
49 |                       crop_size=(384, 600),
50 |                       allow_negative_crop=True),
51 |                  dict(type='Resize',
52 |                       img_scale=[(480, 1333), (512, 1333), (544, 1333),
53 |                                  (576, 1333), (608, 1333), (640, 1333),
54 |                                  (672, 1333), (704, 1333), (736, 1333),
55 |                                  (768, 1333), (800, 1333)],
56 |                       multiscale_mode='value',
57 |                       override=True,
58 |                       keep_ratio=True)
59 |              ]
60 |          ]),
61 |     dict(type='Normalize', **img_norm_cfg),
62 |     dict(type='Pad', size_divisor=32),
63 |     dict(type='DefaultFormatBundle'),
64 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
65 | ]
66 | data = dict(train=dict(pipeline=train_pipeline))
67 | 
68 | optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, betas=(0.9, 0.999), weight_decay=0.05,
69 |                  paramwise_cfg=dict(custom_keys={'absolute_pos_embed': dict(decay_mult=0.),
70 |                                                  'relative_position_bias_table': dict(decay_mult=0.),
71 |                                                  'norm': dict(decay_mult=0.)}))
72 | 
73 | lr_config = dict(step=[8, 11])
74 | runner = dict(type='EpochBasedRunner', max_epochs=12)
75 | find_unused_parameters = True
76 | 


--------------------------------------------------------------------------------
/detection/mmcv_custom/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | 
 3 | # All rights reserved.
 4 | 
 5 | # This source code is licensed under the license found in the
 6 | # LICENSE file in the root directory of this source tree.
 7 | 
 8 | 
 9 | # -*- coding: utf-8 -*-
10 | 
11 | from .checkpoint import load_checkpoint
12 | from .layer_decay_optimizer_constructor import LearningRateDecayOptimizerConstructor
13 | from .customized_text import CustomizedTextLoggerHook
14 | 
15 | __all__ = ['load_checkpoint', 'LearningRateDecayOptimizerConstructor', 'CustomizedTextLoggerHook']
16 | 


--------------------------------------------------------------------------------
/detection/mmcv_custom/customized_text.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | 
  3 | # All rights reserved.
  4 | 
  5 | # This source code is licensed under the license found in the
  6 | # LICENSE file in the root directory of this source tree.
  7 | 
  8 | 
  9 | import datetime
 10 | from collections import OrderedDict
 11 | 
 12 | import torch
 13 | 
 14 | import mmcv
 15 | from mmcv.runner import HOOKS
 16 | from mmcv.runner import TextLoggerHook
 17 | 
 18 | 
 19 | @HOOKS.register_module()
 20 | class CustomizedTextLoggerHook(TextLoggerHook):
 21 |     """Customized Text Logger hook.
 22 | 
 23 |     This logger prints out both lr and layer_0_lr.
 24 |         
 25 |     """
 26 |     
 27 |     def _log_info(self, log_dict, runner):
 28 |         # print exp name for users to distinguish experiments
 29 |         # at every ``interval_exp_name`` iterations and the end of each epoch
 30 |         if runner.meta is not None and 'exp_name' in runner.meta:
 31 |             if (self.every_n_iters(runner, self.interval_exp_name)) or (
 32 |                     self.by_epoch and self.end_of_epoch(runner)):
 33 |                 exp_info = f'Exp name: {runner.meta["exp_name"]}'
 34 |                 runner.logger.info(exp_info)
 35 | 
 36 |         if log_dict['mode'] == 'train':
 37 |             lr_str = {}
 38 |             for lr_type in ['lr', 'layer_0_lr']:
 39 |                 if isinstance(log_dict[lr_type], dict):
 40 |                     lr_str[lr_type] = []
 41 |                     for k, val in log_dict[lr_type].items():
 42 |                         lr_str.append(f'{lr_type}_{k}: {val:.3e}')
 43 |                     lr_str[lr_type] = ' '.join(lr_str)
 44 |                 else:
 45 |                     lr_str[lr_type] = f'{lr_type}: {log_dict[lr_type]:.3e}'
 46 | 
 47 |             # by epoch: Epoch [4][100/1000]
 48 |             # by iter:  Iter [100/100000]
 49 |             if self.by_epoch:
 50 |                 log_str = f'Epoch [{log_dict["epoch"]}]' \
 51 |                           f'[{log_dict["iter"]}/{len(runner.data_loader)}]\t'
 52 |             else:
 53 |                 log_str = f'Iter [{log_dict["iter"]}/{runner.max_iters}]\t'
 54 |             log_str += f'{lr_str["lr"]}, {lr_str["layer_0_lr"]}, '
 55 | 
 56 |             if 'time' in log_dict.keys():
 57 |                 self.time_sec_tot += (log_dict['time'] * self.interval)
 58 |                 time_sec_avg = self.time_sec_tot / (
 59 |                     runner.iter - self.start_iter + 1)
 60 |                 eta_sec = time_sec_avg * (runner.max_iters - runner.iter - 1)
 61 |                 eta_str = str(datetime.timedelta(seconds=int(eta_sec)))
 62 |                 log_str += f'eta: {eta_str}, '
 63 |                 log_str += f'time: {log_dict["time"]:.3f}, ' \
 64 |                            f'data_time: {log_dict["data_time"]:.3f}, '
 65 |                 # statistic memory
 66 |                 if torch.cuda.is_available():
 67 |                     log_str += f'memory: {log_dict["memory"]}, '
 68 |         else:
 69 |             # val/test time
 70 |             # here 1000 is the length of the val dataloader
 71 |             # by epoch: Epoch[val] [4][1000]
 72 |             # by iter: Iter[val] [1000]
 73 |             if self.by_epoch:
 74 |                 log_str = f'Epoch({log_dict["mode"]}) ' \
 75 |                     f'[{log_dict["epoch"]}][{log_dict["iter"]}]\t'
 76 |             else:
 77 |                 log_str = f'Iter({log_dict["mode"]}) [{log_dict["iter"]}]\t'
 78 | 
 79 |         log_items = []
 80 |         for name, val in log_dict.items():
 81 |             # TODO: resolve this hack
 82 |             # these items have been in log_str
 83 |             if name in [
 84 |                     'mode', 'Epoch', 'iter', 'lr', 'layer_0_lr', 'time', 'data_time',
 85 |                     'memory', 'epoch'
 86 |             ]:
 87 |                 continue
 88 |             if isinstance(val, float):
 89 |                 val = f'{val:.4f}'
 90 |             log_items.append(f'{name}: {val}')
 91 |         log_str += ', '.join(log_items)
 92 | 
 93 |         runner.logger.info(log_str)
 94 | 
 95 | 
 96 |     def log(self, runner):
 97 |         if 'eval_iter_num' in runner.log_buffer.output:
 98 |             # this doesn't modify runner.iter and is regardless of by_epoch
 99 |             cur_iter = runner.log_buffer.output.pop('eval_iter_num')
100 |         else:
101 |             cur_iter = self.get_iter(runner, inner_iter=True)
102 | 
103 |         log_dict = OrderedDict(
104 |             mode=self.get_mode(runner),
105 |             epoch=self.get_epoch(runner),
106 |             iter=cur_iter)
107 | 
108 |         # record lr and layer_0_lr
109 |         cur_lr = runner.current_lr()
110 |         if isinstance(cur_lr, list):
111 |             log_dict['layer_0_lr'] = min(cur_lr)
112 |             log_dict['lr'] = max(cur_lr)
113 |         else:
114 |             assert isinstance(cur_lr, dict)
115 |             log_dict['lr'], log_dict['layer_0_lr'] = {}, {}
116 |             for k, lr_ in cur_lr.items():
117 |                 assert isinstance(lr_, list)
118 |                 log_dict['layer_0_lr'].update({k: min(lr_)})
119 |                 log_dict['lr'].update({k: max(lr_)})
120 | 
121 |         if 'time' in runner.log_buffer.output:
122 |             # statistic memory
123 |             if torch.cuda.is_available():
124 |                 log_dict['memory'] = self._get_max_memory(runner)
125 | 
126 |         log_dict = dict(log_dict, **runner.log_buffer.output)
127 | 
128 |         self._log_info(log_dict, runner)
129 |         self._dump_log(log_dict, runner)
130 |         return log_dict
131 | 


--------------------------------------------------------------------------------
/detection/mmcv_custom/layer_decay_optimizer_constructor.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | 
  3 | # All rights reserved.
  4 | 
  5 | # This source code is licensed under the license found in the
  6 | # LICENSE file in the root directory of this source tree.
  7 | 
  8 | 
  9 | import json
 10 | from mmcv.runner import OPTIMIZER_BUILDERS, DefaultOptimizerConstructor
 11 | from mmcv.runner import get_dist_info
 12 | 
 13 | 
 14 | def get_num_layer_layer_wise(var_name, num_max_layer=12):
 15 |     
 16 |     if var_name in ("backbone.cls_token", "backbone.mask_token", "backbone.pos_embed"):
 17 |         return 0
 18 |     elif var_name.startswith("backbone.downsample_layers"):
 19 |         stage_id = int(var_name.split('.')[2])
 20 |         if stage_id == 0:
 21 |             layer_id = 0
 22 |         elif stage_id == 1:
 23 |             layer_id = 2
 24 |         elif stage_id == 2:
 25 |             layer_id = 3
 26 |         elif stage_id == 3:
 27 |             layer_id = num_max_layer
 28 |         return layer_id
 29 |     elif var_name.startswith("backbone.stages"):
 30 |         stage_id = int(var_name.split('.')[2])
 31 |         block_id = int(var_name.split('.')[3])
 32 |         if stage_id == 0:
 33 |             layer_id = 1
 34 |         elif stage_id == 1:
 35 |             layer_id = 2
 36 |         elif stage_id == 2:
 37 |             layer_id = 3 + block_id // 3
 38 |         elif stage_id == 3:
 39 |             layer_id = num_max_layer
 40 |         return layer_id
 41 |     else:
 42 |         return num_max_layer + 1
 43 | 
 44 | 
 45 | def get_num_layer_stage_wise(var_name, num_max_layer):
 46 |     if var_name in ("backbone.cls_token", "backbone.mask_token", "backbone.pos_embed"):
 47 |         return 0
 48 |     elif var_name.startswith("backbone.downsample_layers"):
 49 |         return 0
 50 |     elif var_name.startswith("backbone.stages"):
 51 |         stage_id = int(var_name.split('.')[2])
 52 |         return stage_id + 1
 53 |     else:
 54 |         return num_max_layer - 1
 55 |         
 56 | 
 57 | @OPTIMIZER_BUILDERS.register_module()
 58 | class LearningRateDecayOptimizerConstructor(DefaultOptimizerConstructor):
 59 |     def add_params(self, params, module, prefix='', is_dcn_module=None):
 60 |         """Add all parameters of module to the params list.
 61 |         The parameters of the given module will be added to the list of param
 62 |         groups, with specific rules defined by paramwise_cfg.
 63 |         Args:
 64 |             params (list[dict]): A list of param groups, it will be modified
 65 |                 in place.
 66 |             module (nn.Module): The module to be added.
 67 |             prefix (str): The prefix of the module
 68 |             is_dcn_module (int|float|None): If the current module is a
 69 |                 submodule of DCN, `is_dcn_module` will be passed to
 70 |                 control conv_offset layer's learning rate. Defaults to None.
 71 |         """
 72 |         parameter_groups = {}
 73 |         print(self.paramwise_cfg)
 74 |         num_layers = self.paramwise_cfg.get('num_layers') + 2
 75 |         decay_rate = self.paramwise_cfg.get('decay_rate')
 76 |         decay_type = self.paramwise_cfg.get('decay_type', "layer_wise")
 77 |         print("Build LearningRateDecayOptimizerConstructor %s %f - %d" % (decay_type, decay_rate, num_layers))
 78 |         weight_decay = self.base_wd
 79 | 
 80 |         for name, param in module.named_parameters():
 81 |             if not param.requires_grad:
 82 |                 continue  # frozen weights
 83 |             if len(param.shape) == 1 or name.endswith(".bias") or name in ('pos_embed', 'cls_token'):
 84 |                 group_name = "no_decay"
 85 |                 this_weight_decay = 0.
 86 |             else:
 87 |                 group_name = "decay"
 88 |                 this_weight_decay = weight_decay
 89 | 
 90 |             if decay_type == "layer_wise":
 91 |                 layer_id = get_num_layer_layer_wise(name, self.paramwise_cfg.get('num_layers'))
 92 |             elif decay_type == "stage_wise":
 93 |                 layer_id = get_num_layer_stage_wise(name, num_layers)
 94 |                 
 95 |             group_name = "layer_%d_%s" % (layer_id, group_name)
 96 | 
 97 |             if group_name not in parameter_groups:
 98 |                 scale = decay_rate ** (num_layers - layer_id - 1)
 99 | 
100 |                 parameter_groups[group_name] = {
101 |                     "weight_decay": this_weight_decay,
102 |                     "params": [],
103 |                     "param_names": [], 
104 |                     "lr_scale": scale, 
105 |                     "group_name": group_name, 
106 |                     "lr": scale * self.base_lr, 
107 |                 }
108 | 
109 |             parameter_groups[group_name]["params"].append(param)
110 |             parameter_groups[group_name]["param_names"].append(name)
111 |         rank, _ = get_dist_info()
112 |         if rank == 0:
113 |             to_display = {}
114 |             for key in parameter_groups:
115 |                 to_display[key] = {
116 |                     "param_names": parameter_groups[key]["param_names"], 
117 |                     "lr_scale": parameter_groups[key]["lr_scale"], 
118 |                     "lr": parameter_groups[key]["lr"], 
119 |                     "weight_decay": parameter_groups[key]["weight_decay"], 
120 |                 }
121 |             print("Param groups = %s" % json.dumps(to_display, indent=2))
122 |         
123 |         params.extend(parameter_groups.values())
124 | 


--------------------------------------------------------------------------------
/detection/mmcv_custom/runner/checkpoint.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Open-MMLab. All rights reserved.
 2 | import os.path as osp
 3 | import time
 4 | from tempfile import TemporaryDirectory
 5 | 
 6 | import torch
 7 | from torch.optim import Optimizer
 8 | 
 9 | import mmcv
10 | from mmcv.parallel import is_module_wrapper
11 | from mmcv.runner.checkpoint import weights_to_cpu, get_state_dict
12 | 
13 | try:
14 |     import apex
15 | except:
16 |     print('apex is not installed')
17 | 
18 | 
19 | def save_checkpoint(model, filename, optimizer=None, meta=None):
20 |     """Save checkpoint to file.
21 | 
22 |     The checkpoint will have 4 fields: ``meta``, ``state_dict`` and
23 |     ``optimizer``, ``amp``. By default ``meta`` will contain version
24 |     and time info.
25 | 
26 |     Args:
27 |         model (Module): Module whose params are to be saved.
28 |         filename (str): Checkpoint filename.
29 |         optimizer (:obj:`Optimizer`, optional): Optimizer to be saved.
30 |         meta (dict, optional): Metadata to be saved in checkpoint.
31 |     """
32 |     if meta is None:
33 |         meta = {}
34 |     elif not isinstance(meta, dict):
35 |         raise TypeError(f'meta must be a dict or None, but got {type(meta)}')
36 |     meta.update(mmcv_version=mmcv.__version__, time=time.asctime())
37 | 
38 |     if is_module_wrapper(model):
39 |         model = model.module
40 | 
41 |     if hasattr(model, 'CLASSES') and model.CLASSES is not None:
42 |         # save class name to the meta
43 |         meta.update(CLASSES=model.CLASSES)
44 | 
45 |     checkpoint = {
46 |         'meta': meta,
47 |         'state_dict': weights_to_cpu(get_state_dict(model))
48 |     }
49 |     # save optimizer state dict in the checkpoint
50 |     if isinstance(optimizer, Optimizer):
51 |         checkpoint['optimizer'] = optimizer.state_dict()
52 |     elif isinstance(optimizer, dict):
53 |         checkpoint['optimizer'] = {}
54 |         for name, optim in optimizer.items():
55 |             checkpoint['optimizer'][name] = optim.state_dict()
56 | 
57 |     # save amp state dict in the checkpoint
58 |     # checkpoint['amp'] = apex.amp.state_dict()
59 | 
60 |     if filename.startswith('pavi://'):
61 |         try:
62 |             from pavi import modelcloud
63 |             from pavi.exception import NodeNotFoundError
64 |         except ImportError:
65 |             raise ImportError(
66 |                 'Please install pavi to load checkpoint from modelcloud.')
67 |         model_path = filename[7:]
68 |         root = modelcloud.Folder()
69 |         model_dir, model_name = osp.split(model_path)
70 |         try:
71 |             model = modelcloud.get(model_dir)
72 |         except NodeNotFoundError:
73 |             model = root.create_training_model(model_dir)
74 |         with TemporaryDirectory() as tmp_dir:
75 |             checkpoint_file = osp.join(tmp_dir, model_name)
76 |             with open(checkpoint_file, 'wb') as f:
77 |                 torch.save(checkpoint, f)
78 |                 f.flush()
79 |             model.create_file(checkpoint_file, name=model_name)
80 |     else:
81 |         mmcv.mkdir_or_exist(osp.dirname(filename))
82 |         # immediately flush buffer
83 |         with open(filename, 'wb') as f:
84 |             torch.save(checkpoint, f)
85 |             f.flush()
86 | 


--------------------------------------------------------------------------------
/detection/mmdet/models/backbones/__init__.py:
--------------------------------------------------------------------------------
 1 | from .darknet import Darknet
 2 | from .detectors_resnet import DetectoRS_ResNet
 3 | from .detectors_resnext import DetectoRS_ResNeXt
 4 | from .hourglass import HourglassNet
 5 | from .hrnet import HRNet
 6 | from .regnet import RegNet
 7 | from .res2net import Res2Net
 8 | from .resnext import ResNeXt
 9 | from .ssd_vgg import SSDVGG
10 | from .trident_resnet import TridentResNet
11 | from .swin_transformer import SwinTransformer
12 | from .resnet import ResNet
13 | from .kw_resnet import KW_ResNet
14 | from .convnext import ConvNeXt
15 | from .kw_convnext import KW_ConvNeXt
16 | from .mobilenetv2 import MobileNetV2
17 | from .kw_mobilenetv2 import KW_MobileNetV2
18 | 
19 | __all__ = [
20 |     'RegNet', 'ResNet', 'ResNeXt', 'SSDVGG', 'HRNet', 'Res2Net',
21 |     'HourglassNet', 'DetectoRS_ResNet', 'DetectoRS_ResNeXt', 'Darknet',
22 |     'TridentResNet', 'SwinTransformer', 'KW_ResNet', 'ConvNeXt', 'KW_ConvNeXt', 'MobileNetV2', 'KW_MobileNetV2'
23 | ]


--------------------------------------------------------------------------------
/detection/mmdet/models/backbones/convnext.py:
--------------------------------------------------------------------------------
  1 | from functools import partial
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | from timm.models.layers import trunc_normal_, DropPath
  6 | 
  7 | from mmcv_custom import load_checkpoint
  8 | from mmdet.utils import get_root_logger
  9 | from ..builder import BACKBONES
 10 | 
 11 | 
 12 | class Block(nn.Module):
 13 |     r""" ConvNeXt Block. There are two equivalent implementations:
 14 |     (1) DwConv -> LayerNorm (channels_first) -> 1x1 Conv -> GELU -> 1x1 Conv; all in (N, C, H, W)
 15 |     (2) DwConv -> Permute to (N, H, W, C); LayerNorm (channels_last) -> Linear -> GELU -> Linear; Permute back
 16 |     We use (2) as we find it slightly faster in PyTorch
 17 | 
 18 |     Args:
 19 |         dim (int): Number of input channels.
 20 |         drop_path (float): Stochastic depth rate. Default: 0.0
 21 |         layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
 22 |     """
 23 | 
 24 |     def __init__(self, dim, drop_path=0., layer_scale_init_value=1e-6):
 25 |         super().__init__()
 26 |         self.dwconv = nn.Conv2d(dim, dim, kernel_size=7, padding=3, groups=dim)  # depthwise conv
 27 |         self.norm = LayerNorm(dim, eps=1e-6)
 28 |         self.pwconv1 = nn.Linear(dim, 4 * dim)  # pointwise/1x1 convs, implemented with linear layers
 29 |         self.act = nn.GELU()
 30 |         self.pwconv2 = nn.Linear(4 * dim, dim)
 31 |         self.gamma = nn.Parameter(layer_scale_init_value * torch.ones((dim)),
 32 |                                   requires_grad=True) if layer_scale_init_value > 0 else None
 33 |         self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
 34 | 
 35 |     def forward(self, x):
 36 |         input = x
 37 |         x = self.dwconv(x)
 38 |         x = x.permute(0, 2, 3, 1)  # (N, C, H, W) -> (N, H, W, C)
 39 |         x = self.norm(x)
 40 |         x = self.pwconv1(x)
 41 |         x = self.act(x)
 42 |         x = self.pwconv2(x)
 43 |         if self.gamma is not None:
 44 |             x = self.gamma * x
 45 |         x = x.permute(0, 3, 1, 2)  # (N, H, W, C) -> (N, C, H, W)
 46 | 
 47 |         x = input + self.drop_path(x)
 48 |         return x
 49 | 
 50 | 
 51 | @BACKBONES.register_module()
 52 | class ConvNeXt(nn.Module):
 53 |     r""" ConvNeXt
 54 |         A PyTorch impl of : `A ConvNet for the 2020s`  -
 55 |           https://arxiv.org/pdf/2201.03545.pdf
 56 |     Args:
 57 |         in_chans (int): Number of input image channels. Default: 3
 58 |         num_classes (int): Number of classes for classification head. Default: 1000
 59 |         depths (tuple(int)): Number of blocks at each stage. Default: [3, 3, 9, 3]
 60 |         dims (int): Feature dimension at each stage. Default: [96, 192, 384, 768]
 61 |         drop_path_rate (float): Stochastic depth rate. Default: 0.
 62 |         layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
 63 |         head_init_scale (float): Init scaling value for classifier weights and biases. Default: 1.
 64 |     """
 65 | 
 66 |     def __init__(self, in_chans=3, depths=[3, 3, 9, 3], dims=[96, 192, 384, 768],
 67 |                  drop_path_rate=0., layer_scale_init_value=1e-6, out_indices=[0, 1, 2, 3],
 68 |                  ):
 69 |         super().__init__()
 70 | 
 71 |         self.downsample_layers = nn.ModuleList()  # stem and 3 intermediate downsampling conv layers
 72 |         stem = nn.Sequential(
 73 |             nn.Conv2d(in_chans, dims[0], kernel_size=4, stride=4),
 74 |             LayerNorm(dims[0], eps=1e-6, data_format="channels_first")
 75 |         )
 76 |         self.downsample_layers.append(stem)
 77 |         for i in range(3):
 78 |             downsample_layer = nn.Sequential(
 79 |                 LayerNorm(dims[i], eps=1e-6, data_format="channels_first"),
 80 |                 nn.Conv2d(dims[i], dims[i + 1], kernel_size=2, stride=2),
 81 |             )
 82 |             self.downsample_layers.append(downsample_layer)
 83 | 
 84 |         self.stages = nn.ModuleList()  # 4 feature resolution stages, each consisting of multiple residual blocks
 85 |         dp_rates = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]
 86 |         cur = 0
 87 |         for i in range(4):
 88 |             stage = nn.Sequential(
 89 |                 *[Block(dim=dims[i], drop_path=dp_rates[cur + j],
 90 |                         layer_scale_init_value=layer_scale_init_value) for j in range(depths[i])]
 91 |             )
 92 |             self.stages.append(stage)
 93 |             cur += depths[i]
 94 | 
 95 |         self.out_indices = out_indices
 96 | 
 97 |         norm_layer = partial(LayerNorm, eps=1e-6, data_format="channels_first")
 98 |         for i_layer in range(4):
 99 |             layer = norm_layer(dims[i_layer])
100 |             layer_name = f'norm{i_layer}'
101 |             self.add_module(layer_name, layer)
102 | 
103 |         self.apply(self._init_weights)
104 | 
105 |     def _init_weights(self, m):
106 |         if isinstance(m, (nn.Conv2d, nn.Linear)):
107 |             trunc_normal_(m.weight, std=.02)
108 |             nn.init.constant_(m.bias, 0)
109 | 
110 |     def init_weights(self, pretrained=None):
111 |         """Initialize the weights in backbone.
112 |         Args:
113 |             pretrained (str, optional): Path to pre-trained weights.
114 |                 Defaults to None.
115 |         """
116 | 
117 |         def _init_weights(m):
118 |             if isinstance(m, nn.Linear):
119 |                 trunc_normal_(m.weight, std=.02)
120 |                 if isinstance(m, nn.Linear) and m.bias is not None:
121 |                     nn.init.constant_(m.bias, 0)
122 |             elif isinstance(m, nn.LayerNorm):
123 |                 nn.init.constant_(m.bias, 0)
124 |                 nn.init.constant_(m.weight, 1.0)
125 | 
126 |         if isinstance(pretrained, str):
127 |             self.apply(_init_weights)
128 |             logger = get_root_logger()
129 |             load_checkpoint(self, pretrained, strict=False, logger=logger)
130 |         elif pretrained is None:
131 |             self.apply(_init_weights)
132 |         else:
133 |             raise TypeError('pretrained must be a str or None')
134 | 
135 |     def forward_features(self, x):
136 |         outs = []
137 |         for i in range(4):
138 |             x = self.downsample_layers[i](x)
139 |             x = self.stages[i](x)
140 |             if i in self.out_indices:
141 |                 norm_layer = getattr(self, f'norm{i}')
142 |                 x_out = norm_layer(x)
143 |                 outs.append(x_out)
144 | 
145 |         return tuple(outs)
146 | 
147 |     def forward(self, x):
148 |         x = self.forward_features(x)
149 |         return x
150 | 
151 | 
152 | class LayerNorm(nn.Module):
153 |     r""" LayerNorm that supports two data formats: channels_last (default) or channels_first.
154 |     The ordering of the dimensions in the inputs. channels_last corresponds to inputs with
155 |     shape (batch_size, height, width, channels) while channels_first corresponds to inputs
156 |     with shape (batch_size, channels, height, width).
157 |     """
158 | 
159 |     def __init__(self, normalized_shape, eps=1e-6, data_format="channels_last"):
160 |         super().__init__()
161 |         self.weight = nn.Parameter(torch.ones(normalized_shape))
162 |         self.bias = nn.Parameter(torch.zeros(normalized_shape))
163 |         self.eps = eps
164 |         self.data_format = data_format
165 |         if self.data_format not in ["channels_last", "channels_first"]:
166 |             raise NotImplementedError
167 |         self.normalized_shape = (normalized_shape,)
168 | 
169 |     def forward(self, x):
170 |         if self.data_format == "channels_last":
171 |             return F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps)
172 |         elif self.data_format == "channels_first":
173 |             u = x.mean(1, keepdim=True)
174 |             s = (x - u).pow(2).mean(1, keepdim=True)
175 |             x = (x - u) / torch.sqrt(s + self.eps)
176 |             x = self.weight[:, None, None] * x + self.bias[:, None, None]
177 |             return x


--------------------------------------------------------------------------------
/detection/mmdet/models/backbones/kw_convnext.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | 
  3 | # All rights reserved.
  4 | 
  5 | # This source code is licensed under the license found in the
  6 | # LICENSE file in the root directory of this source tree.
  7 | 
  8 | from functools import partial
  9 | import torch
 10 | import torch.nn as nn
 11 | import torch.nn.functional as F
 12 | from timm.models.layers import trunc_normal_, DropPath
 13 | from timm.models.registry import register_model
 14 | from .kernel_warehouse import Warehouse_Manager
 15 | 
 16 | from ..builder import BACKBONES
 17 | from mmcv.runner import load_checkpoint
 18 | from mmdet.utils import get_root_logger
 19 | 
 20 | class Block(nn.Module):
 21 |     r""" ConvNeXt Block. There are two equivalent implementations:
 22 |     (1) DwConv -> LayerNorm (channels_first) -> 1x1 Conv -> GELU -> 1x1 Conv; all in (N, C, H, W)
 23 |     (2) DwConv -> Permute to (N, H, W, C); LayerNorm (channels_last) -> Linear -> GELU -> Linear; Permute back
 24 |     We use (2) as we find it slightly faster in PyTorch
 25 | 
 26 |     Args:
 27 |         dim (int): Number of input channels.
 28 |         drop_path (float): Stochastic depth rate. Default: 0.0
 29 |         layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
 30 |     """
 31 | 
 32 |     def __init__(self, dim, drop_path=0., layer_scale_init_value=1e-6,
 33 |                  warehouse_manager=None, stage_idx=-1, layer_idx=-1):
 34 |         super().__init__()
 35 |         self.dwconv = warehouse_manager.reserve(dim, dim, kernel_size=7, padding=3, groups=dim, layer_type='conv2d',
 36 |                                                 warehouse_name='stage{}_layer{}_dwconv{}'.format(stage_idx, layer_idx, 0))  # depthwise conv
 37 |         self.norm = LayerNorm(dim, eps=1e-6)
 38 |         self.pwconv1 = warehouse_manager.reserve(dim, 4 * dim, kernel_size=1, padding=0, layer_type='conv2d',
 39 |                                                  warehouse_name='stage{}_layer{}_pwconv{}'.format(stage_idx, layer_idx, 1))  # pointwise/1x1 convs, implemented with linear layers
 40 |         self.act = nn.GELU()
 41 |         self.pwconv2 = warehouse_manager.reserve(4 * dim, dim, kernel_size=1, padding=0, layer_type='conv2d',
 42 |                                                  warehouse_name='stage{}_layer{}_pwconv{}'.format(stage_idx, layer_idx, 2))
 43 |         self.gamma = nn.Parameter(layer_scale_init_value * torch.ones([1, dim, 1, 1]),
 44 |                                   requires_grad=True) if layer_scale_init_value > 0 else None
 45 |         self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
 46 | 
 47 |     def forward(self, x):
 48 |         input = x
 49 |         x = self.dwconv(x)
 50 |         x = x.permute(0, 2, 3, 1)  # (N, C, H, W) -> (N, H, W, C)
 51 |         x = self.norm(x).permute(0, 3, 1, 2)
 52 |         x = self.pwconv1(x)
 53 |         x = self.act(x)
 54 |         x = self.pwconv2(x)
 55 |         if self.gamma is not None:
 56 |             x = self.gamma * x
 57 |         #x = x.permute(0, 3, 1, 2)  # (N, H, W, C) -> (N, C, H, W)
 58 | 
 59 |         x = input + self.drop_path(x)
 60 |         return x
 61 | 
 62 | @BACKBONES.register_module()
 63 | class KW_ConvNeXt(nn.Module):
 64 |     r""" ConvNeXt
 65 |         A PyTorch impl of : `A ConvNet for the 2020s`  -
 66 |           https://arxiv.org/pdf/2201.03545.pdf
 67 | 
 68 |     Args:
 69 |         in_chans (int): Number of input image channels. Default: 3
 70 |         num_classes (int): Number of classes for classification head. Default: 1000
 71 |         depths (tuple(int)): Number of blocks at each stage. Default: [3, 3, 9, 3]
 72 |         dims (int): Feature dimension at each stage. Default: [96, 192, 384, 768]
 73 |         drop_path_rate (float): Stochastic depth rate. Default: 0.
 74 |         layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
 75 |         head_init_scale (float): Init scaling value for classifier weights and biases. Default: 1.
 76 |     """
 77 | 
 78 |     def __init__(self, in_chans=3, num_classes=1000,
 79 |                  depths=[3, 3, 9, 3], dims=[96, 192, 384, 768], drop_path_rate=0.,
 80 |                  layer_scale_init_value=1e-6, head_init_scale=1.,
 81 |                  reduction=0.0625, cell_num_ratio=1, cell_inplane_ratio=1, cell_outplane_ratio=1,
 82 |                  sharing_range=('layer', 'pwconv'), out_indices=[0, 1, 2, 3], **kwargs
 83 |         ):
 84 |         super().__init__()
 85 | 
 86 |         self.warehouse_manager = Warehouse_Manager(reduction, cell_num_ratio, cell_inplane_ratio,
 87 |                                                    cell_outplane_ratio, sharing_range, norm_layer=nn.LayerNorm)
 88 |         self.downsample_layers = nn.ModuleList()  # stem and 3 intermediate downsampling conv layers
 89 |         stem = nn.Sequential(
 90 |             self.warehouse_manager.reserve(in_chans, dims[0], kernel_size=4, stride=4, layer_type='conv2d',
 91 |                                            warehouse_name='stage{}_conv0'.format('stem')),
 92 |             LayerNorm(dims[0], eps=1e-6, data_format="channels_first")
 93 |         )
 94 |         self.downsample_layers.append(stem)
 95 |         for i in range(3):
 96 |             downsample_layer = nn.Sequential(
 97 |                 LayerNorm(dims[i], eps=1e-6, data_format="channels_first"),
 98 |                 self.warehouse_manager.reserve(dims[i], dims[i + 1], kernel_size=2, stride=2, layer_type='conv2d',
 99 |                                                warehouse_name='stage{}_layer{}_conv0'.format(i, 'ds')),
100 |             )
101 |             self.downsample_layers.append(downsample_layer)
102 | 
103 |         self.stages = nn.ModuleList()  # 4 feature resolution stages, each consisting of multiple residual blocks
104 |         dp_rates = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]
105 |         cur = 0
106 |         for i in range(4):
107 |             stage = nn.Sequential(
108 |                 *[Block(dim=dims[i], drop_path=dp_rates[cur + j],
109 |                         layer_scale_init_value=layer_scale_init_value,
110 |                         warehouse_manager=self.warehouse_manager,
111 |                         stage_idx=i, layer_idx=j,
112 |                         ) for j in range(depths[i])]
113 |             )
114 |             self.stages.append(stage)
115 |             cur += depths[i]
116 | 
117 |         self.warehouse_manager.store()
118 |         self.warehouse_manager.allocate(self)
119 |         self.net_update_temperature(0)
120 | 
121 |         self.out_indices = out_indices
122 | 
123 |         norm_layer = partial(LayerNorm, eps=1e-6, data_format="channels_first")
124 |         for i_layer in range(4):
125 |             layer = norm_layer(dims[i_layer])
126 |             layer_name = f'norm{i_layer}'
127 |             self.add_module(layer_name, layer)
128 | 
129 |         self.apply(self._init_weights)
130 | 
131 |     def net_update_temperature(self, temp):
132 |         for m in self.modules():
133 |             if hasattr(m, "update_temperature"):
134 |                 m.update_temperature(temp)
135 | 
136 |     def train(self, mode=True):
137 |         """Convert the model into training mode while keep normalization layer
138 |         freezed."""
139 |         super(KW_ConvNeXt, self).train(mode)
140 |         if mode:
141 |             for m in self.modules():
142 |                 # trick: eval have effect on BatchNorm only
143 |                 if isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d)):
144 |                     m.eval()
145 | 
146 |     def _init_weights(self, m):
147 |         if isinstance(m, (nn.Conv2d, nn.Linear)):
148 |             trunc_normal_(m.weight, std=.02)
149 |             if m.bias is not None:
150 |                 nn.init.constant_(m.bias, 0)
151 | 
152 |     def init_weights(self, pretrained=None):
153 |         """Initialize the weights in backbone.
154 |         Args:
155 |             pretrained (str, optional): Path to pre-trained weights.
156 |                 Defaults to None.
157 |         """
158 | 
159 |         def _init_weights(m):
160 |             if isinstance(m, nn.Linear):
161 |                 trunc_normal_(m.weight, std=.02)
162 |                 if isinstance(m, nn.Linear) and m.bias is not None:
163 |                     nn.init.constant_(m.bias, 0)
164 |             elif isinstance(m, nn.LayerNorm):
165 |                 nn.init.constant_(m.bias, 0)
166 |                 nn.init.constant_(m.weight, 1.0)
167 | 
168 |         if isinstance(pretrained, str):
169 |             self.apply(_init_weights)
170 |             logger = get_root_logger()
171 |             load_checkpoint(self, pretrained, strict=False, logger=logger, map_location='cpu')
172 |         elif pretrained is None:
173 |             self.apply(_init_weights)
174 |         else:
175 |             raise TypeError('pretrained must be a str or None')
176 | 
177 |     def forward_features(self, x):
178 |         outs = []
179 |         for i in range(4):
180 |             x = self.downsample_layers[i](x)
181 |             x = self.stages[i](x)
182 |             if i in self.out_indices:
183 |                 norm_layer = getattr(self, f'norm{i}')
184 |                 x_out = norm_layer(x)
185 |                 outs.append(x_out)
186 | 
187 |         return tuple(outs)
188 | 
189 |     def forward(self, x):
190 |         x = self.forward_features(x)
191 |         return x
192 | 
193 | 
194 | class LayerNorm(nn.Module):
195 |     r""" LayerNorm that supports two data formats: channels_last (default) or channels_first.
196 |     The ordering of the dimensions in the inputs. channels_last corresponds to inputs with
197 |     shape (batch_size, height, width, channels) while channels_first corresponds to inputs
198 |     with shape (batch_size, channels, height, width).
199 |     """
200 | 
201 |     def __init__(self, normalized_shape, eps=1e-6, data_format="channels_last"):
202 |         super().__init__()
203 |         self.weight = nn.Parameter(torch.ones(normalized_shape))
204 |         self.bias = nn.Parameter(torch.zeros(normalized_shape))
205 |         self.eps = eps
206 |         self.data_format = data_format
207 |         if self.data_format not in ["channels_last", "channels_first"]:
208 |             raise NotImplementedError
209 |         self.normalized_shape = (normalized_shape,)
210 | 
211 |     def forward(self, x):
212 |         if self.data_format == "channels_last":
213 |             return F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps)
214 |         elif self.data_format == "channels_first":
215 |             u = x.mean(1, keepdim=True)
216 |             s = (x - u).pow(2).mean(1, keepdim=True)
217 |             x = (x - u) / torch.sqrt(s + self.eps)
218 |             x = self.weight[:, None, None] * x + self.bias[:, None, None]
219 |             return x
220 | 


--------------------------------------------------------------------------------
/detection/mmdet/models/backbones/kw_mobilenetv2.py:
--------------------------------------------------------------------------------
  1 | from torch import nn
  2 | from .kernel_warehouse import Warehouse_Manager
  3 | 
  4 | from ..builder import BACKBONES
  5 | from mmcv.runner import load_checkpoint
  6 | from mmdet.utils import get_root_logger
  7 | 
  8 | 
  9 | def _make_divisible(v, divisor, min_value=None):
 10 |     """
 11 |     This function is taken from the original tf repo.
 12 |     It ensures that all layers have a channel number that is divisible by 8
 13 |     It can be seen here:
 14 |     https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
 15 |     :param v:
 16 |     :param divisor:
 17 |     :param min_value:
 18 |     :return:
 19 |     """
 20 |     if min_value is None:
 21 |         min_value = divisor
 22 |     new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
 23 |     # Make sure that round down does not go down by more than 10%.
 24 |     if new_v < 0.9 * v:
 25 |         new_v += divisor
 26 |     return new_v
 27 | 
 28 | 
 29 | class ConvBNReLU(nn.Sequential):
 30 |     def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1, norm_layer=nn.BatchNorm2d,
 31 |                  warehouse_name=None, warehouse_manager=None, enabled=True):
 32 |         padding = (kernel_size - 1) // 2
 33 |         super(ConvBNReLU, self).__init__(
 34 |             warehouse_manager.reserve(in_planes, out_planes, kernel_size, stride, padding=padding,
 35 |                                       groups=groups, bias=False, warehouse_name=warehouse_name, enabled=enabled),
 36 |             norm_layer(out_planes),
 37 |             nn.ReLU6(inplace=True)
 38 |         )
 39 | 
 40 | 
 41 | class InvertedResidual(nn.Module):
 42 |     def __init__(self, inp, oup, stride, expand_ratio, norm_layer=nn.BatchNorm2d, stage_idx=None, layer_idx=None,
 43 |                  warehouse_manager=None):
 44 |         super(InvertedResidual, self).__init__()
 45 |         self.stride = stride
 46 |         assert stride in [1, 2]
 47 |         hidden_dim = int(round(inp * expand_ratio))
 48 |         self.use_res_connect = self.stride == 1 and inp == oup
 49 | 
 50 |         layers = []
 51 |         if expand_ratio != 1:
 52 |             # pw
 53 |             layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1, norm_layer=norm_layer,
 54 |                                      warehouse_name='stage{}_layer{}_pwconv{}'.format(stage_idx, layer_idx, 0),
 55 |                                      warehouse_manager=warehouse_manager))
 56 | 
 57 |         layers.extend([
 58 |             # dw
 59 |             ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim, norm_layer=norm_layer,
 60 |                        warehouse_name='stage{}_layer{}_dwconv{}'.format(stage_idx, layer_idx, 0),
 61 |                        warehouse_manager=warehouse_manager),
 62 |             # pw-linear
 63 |             warehouse_manager.reserve(hidden_dim, oup, 1, 1, 0, bias=False,
 64 |                                       warehouse_name='stage{}_layer{}_pwconv{}'.format(stage_idx, layer_idx, 1)),
 65 |             norm_layer(oup),
 66 |         ])
 67 |         self.conv = nn.Sequential(*layers)
 68 | 
 69 |     def forward(self, x):
 70 |         if self.use_res_connect:
 71 |             return x + self.conv(x)
 72 |         else:
 73 |             return self.conv(x)
 74 | 
 75 | 
 76 | @BACKBONES.register_module()
 77 | class KW_MobileNetV2(nn.Module):
 78 |     def __init__(self,
 79 |                  num_classes=1000,
 80 |                  width_mult=1.0,
 81 |                  inverted_residual_setting=None,
 82 |                  round_nearest=8,
 83 |                  block=None,
 84 |                  norm_layer=None,
 85 |                  dropout=0.1,
 86 |                  reduction=0.0625,
 87 |                  cell_num_ratio=1,
 88 |                  cell_inplane_ratio=1,
 89 |                  cell_outplane_ratio=1,
 90 |                  sharing_range=('layer', 'pwconv'),
 91 |                  frozen_stages=0,
 92 |                  out_indices=(0, 1, 2, 3),
 93 |                  norm_eval=True,
 94 |                  **kwargs):
 95 |         """gr
 96 |         MobileNet V2 main class
 97 | 
 98 |         Args:
 99 |             num_classes (int): Number of classes
100 |             width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount
101 |             inverted_residual_setting: Network structure
102 |             round_nearest (int): Round the number of channels in each layer to be a multiple of this number
103 |             Set to 1 to turn off rounding
104 |             block: Module specifying inverted residual building block for mobilenet
105 |             norm_layer: Module specifying the normalization layer to use
106 | 
107 |         """
108 |         super(KW_MobileNetV2, self).__init__()
109 |         if block is None:
110 |             block = InvertedResidual
111 | 
112 |         if norm_layer is None:
113 |             norm_layer = nn.BatchNorm2d
114 | 
115 |         input_channel = 32
116 |         last_channel = 1280
117 |         self.stage_idx = [1, 3, 6, 13, 18]
118 |         self.frozen_stages = frozen_stages
119 |         self.out_indices = [self.stage_idx[x] for x in out_indices]
120 |         self.norm_eval = norm_eval
121 | 
122 |         if inverted_residual_setting is None:
123 |             inverted_residual_setting = [
124 |                 # t, c, n, s
125 |                 [1, 16, 1, 1],
126 |                 [6, 24, 2, 2],
127 |                 [6, 32, 3, 2],
128 |                 [6, 64, 4, 2],
129 |                 [6, 96, 3, 1],  # 0.3M
130 |                 [6, 160, 3, 2],  # 0.92M
131 |                 [6, 320, 1, 1],  # 1.22M
132 |             ]
133 | 
134 |         kw_stage_setting = [1, 2, 3, 4, 5, 6, 6]
135 | 
136 |         self.warehouse_manager = Warehouse_Manager(reduction, cell_num_ratio, cell_inplane_ratio,
137 |                                                    cell_outplane_ratio, sharing_range)
138 | 
139 |         # only check the first element, assuming user knows t,c,n,s are required
140 |         if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4:
141 |             raise ValueError("inverted_residual_setting should be non-empty "
142 |                              "or a 4-element list, got {}".format(inverted_residual_setting))
143 | 
144 |         # building first layer
145 |         input_channel = _make_divisible(input_channel * width_mult, round_nearest)
146 |         self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest)
147 |         features = [ConvBNReLU(3, input_channel, stride=2, norm_layer=norm_layer,
148 |                                warehouse_manager=self.warehouse_manager, warehouse_name='stage0_conv0')]
149 | 
150 |         layer_idx = 0
151 |         # building inverted residual blocks
152 |         for idx, (t, c, n, s) in enumerate(inverted_residual_setting):
153 |             output_channel = _make_divisible(c * width_mult, round_nearest)
154 |             for i in range(n):
155 |                 stride = s if i == 0 else 1
156 | 
157 |                 if i == 0 and idx > 0:
158 |                     handover = kw_stage_setting[idx] != kw_stage_setting[idx - 1]
159 |                 else:
160 |                     handover = False
161 | 
162 |                 stage_idx = (kw_stage_setting[idx] - 1) if handover else kw_stage_setting[idx]
163 | 
164 |                 features.append(block(input_channel, output_channel, stride, expand_ratio=t, norm_layer=norm_layer,
165 |                                       warehouse_manager=self.warehouse_manager, stage_idx=stage_idx,
166 |                                       layer_idx=layer_idx))
167 | 
168 |                 input_channel = output_channel
169 |                 layer_idx += 1
170 | 
171 |                 if handover:
172 |                     layer_idx = 0
173 | 
174 |         # building last several layers
175 |         features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1, norm_layer=norm_layer,
176 |                                    warehouse_manager=self.warehouse_manager,
177 |                                    warehouse_name='stage{}_layer{}_pwconv1'.format(kw_stage_setting[-1], layer_idx)))
178 |         # make it nn.Sequential
179 |         self.features = nn.Sequential(*features)
180 |         # building classifier
181 |         self.dropout = nn.Dropout(dropout) if dropout > 0 else nn.Identity()
182 |         self.classifier = nn.Linear(self.last_channel, num_classes, bias=True)
183 | 
184 |         # weight initialization
185 |         for m in self.modules():
186 |             if isinstance(m, nn.Conv2d):
187 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out')
188 |                 if m.bias is not None:
189 |                     nn.init.zeros_(m.bias)
190 |             elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
191 |                 nn.init.ones_(m.weight)
192 |                 nn.init.zeros_(m.bias)
193 |             elif isinstance(m, nn.Linear):
194 |                 nn.init.normal_(m.weight, 0, 0.01)
195 |                 nn.init.zeros_(m.bias)
196 | 
197 |         self.features = nn.Sequential(*features)
198 |         self.warehouse_manager.store()
199 |         self.warehouse_manager.allocate(self)
200 |         self.net_update_temperature(0)
201 | 
202 |     def net_update_temperature(self, temp):
203 |         for m in self.modules():
204 |             if hasattr(m, "update_temperature"):
205 |                 m.update_temperature(temp)
206 | 
207 |     def _freeze_stages(self):
208 |         if self.frozen_stages >= 0:
209 |             for i in range(self.stage_idx[self.frozen_stages] + 1):
210 |                 m = self.features[i]
211 |                 m.eval()
212 |                 for param in m.parameters():
213 |                     param.requires_grad = False
214 | 
215 |     def train(self, mode=True):
216 |         """Convert the model into training mode while keep normalization layer
217 |         freezed."""
218 |         super(KW_MobileNetV2, self).train(mode)
219 |         self._freeze_stages()
220 | 
221 |         if mode and self.norm_eval:
222 |             for m in self.modules():
223 |                 # trick: eval have effect on BatchNorm only
224 |                 if isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d)):
225 |                     m.eval()
226 | 
227 |     def init_weights(self, pretrained=None):
228 |         """Initialize the weights in backbone.
229 |         Args:
230 |             pretrained (str, optional): Path to pre-trained weights.
231 |                 Defaults to None.
232 |         """
233 | 
234 |         for m in self.modules():
235 |             if isinstance(m, nn.Conv2d):
236 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
237 |                 if m.bias is not None:
238 |                     nn.init.zeros_(m.bias)
239 |             elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
240 |                 nn.init.constant_(m.weight, 1)
241 |                 nn.init.constant_(m.bias, 0)
242 |             elif isinstance(m, nn.Linear):
243 |                 nn.init.normal_(m.weight, 0, 0.01)
244 |                 if m.bias is not None:
245 |                     nn.init.zeros_(m.bias)
246 | 
247 |         if isinstance(pretrained, str):
248 |             logger = get_root_logger()
249 |             load_checkpoint(self, pretrained, strict=False, logger=logger, map_location='cpu')
250 |         elif pretrained is not None:
251 |             raise TypeError('pretrained must be a str or None')
252 | 
253 |     def _forward_impl(self, x):
254 |         outs = []
255 |         for idx, layer in enumerate(self.features):
256 |             x = layer(x)
257 |             if idx in self.out_indices:
258 |                 outs.append(x)
259 |         return outs
260 | 
261 |     def forward(self, x):
262 |         return self._forward_impl(x)


--------------------------------------------------------------------------------
/detection/mmdet/models/backbones/kw_resnet.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from .kernel_warehouse import Warehouse_Manager
  4 | from timm.models.layers import trunc_normal_, DropPath
  5 | 
  6 | from ..builder import BACKBONES
  7 | from mmcv.runner import load_checkpoint
  8 | from mmdet.utils import get_root_logger
  9 | 
 10 | __all__ = ['KW_ResNet']
 11 | 
 12 | def kwconv3x3(in_planes, out_planes, stride=1, warehouse_name=None, warehouse_manager=None, enabled=True):
 13 |     return warehouse_manager.reserve(in_planes, out_planes, kernel_size=3, stride=stride, padding=1,
 14 |                                      warehouse_name=warehouse_name, enabled=enabled, bias=False)
 15 | 
 16 | 
 17 | def kwconv1x1(in_planes, out_planes, stride=1, warehouse_name=None, warehouse_manager=None, enabled=True):
 18 |     return warehouse_manager.reserve(in_planes, out_planes, kernel_size=1, stride=stride, padding=0,
 19 |                                      warehouse_name=warehouse_name, enabled=enabled, bias=False)
 20 | 
 21 | 
 22 | class BasicBlock(nn.Module):
 23 |     expansion = 1
 24 | 
 25 |     def __init__(self, inplanes, planes, stride=1, downsample=None,
 26 |                  stage_idx=None, layer_idx=None, warehouse_manager=None, warehouse_handover=False, drop_path=0.):
 27 |         super(BasicBlock, self).__init__()
 28 |         conv1_stage_idx = max(stage_idx - 1 if warehouse_handover else stage_idx, 0)
 29 |         self.conv1 = kwconv3x3(inplanes, planes, stride,
 30 |                                warehouse_name='stage{}_layer{}_conv{}'.format(conv1_stage_idx, layer_idx, 0),
 31 |                                warehouse_manager=warehouse_manager)
 32 |         self.bn1 = nn.BatchNorm2d(planes)
 33 |         self.relu = nn.ReLU(inplace=True)
 34 |         layer_idx = 0 if warehouse_handover else layer_idx
 35 |         self.conv2 = kwconv3x3(planes, planes,
 36 |                                warehouse_name='stage{}_layer{}_conv{}'.format(stage_idx, layer_idx, 1),
 37 |                                warehouse_manager=warehouse_manager)
 38 |         self.bn2 = nn.BatchNorm2d(planes)
 39 |         self.downsample = downsample
 40 |         self.stride = stride
 41 |         self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
 42 | 
 43 |     def forward(self, x):
 44 |         identity = x
 45 | 
 46 |         out = self.conv1(x)
 47 |         out = self.bn1(out)
 48 |         out = self.relu(out)
 49 | 
 50 |         out = self.conv2(out)
 51 |         out = self.bn2(out)
 52 | 
 53 |         if self.downsample is not None:
 54 |             identity = self.downsample(x)
 55 | 
 56 |         out = identity + self.drop_path(out)
 57 |         out = self.relu(out)
 58 |         return out
 59 | 
 60 | 
 61 | class Bottleneck(nn.Module):
 62 |     expansion = 4
 63 | 
 64 |     def __init__(self, inplanes, planes, stride=1, downsample=None,
 65 |                  stage_idx=None, layer_idx=None, warehouse_manager=None, warehouse_handover=False, drop_path=0.):
 66 |         super(Bottleneck, self).__init__()
 67 |         conv1_stage_idx = stage_idx - 1 if warehouse_handover else stage_idx
 68 |         self.conv1 = kwconv1x1(inplanes, planes,
 69 |                                warehouse_name='stage{}_layer{}_conv{}'.format(conv1_stage_idx, layer_idx, 0),
 70 |                                warehouse_manager=warehouse_manager, enabled=(conv1_stage_idx >= 0))
 71 |         self.bn1 = nn.BatchNorm2d(planes)
 72 |         layer_idx = 0 if warehouse_handover else layer_idx
 73 |         self.conv2 = kwconv3x3(planes, planes, stride,
 74 |                                warehouse_name='stage{}_layer{}_conv{}'.format(stage_idx, layer_idx, 1),
 75 |                                warehouse_manager=warehouse_manager)
 76 |         self.bn2 = nn.BatchNorm2d(planes)
 77 |         self.conv3 = kwconv1x1(planes, planes * self.expansion,
 78 |                                warehouse_name='stage{}_layer{}_conv{}'.format(stage_idx, layer_idx, 2),
 79 |                                warehouse_manager=warehouse_manager)
 80 |         self.bn3 = nn.BatchNorm2d(planes * self.expansion)
 81 |         self.relu = nn.ReLU(inplace=True)
 82 |         self.downsample = downsample
 83 |         self.stride = stride
 84 |         self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
 85 | 
 86 |     def forward(self, x):
 87 |         identity = x
 88 | 
 89 |         out = self.conv1(x)
 90 |         out = self.bn1(out)
 91 |         out = self.relu(out)
 92 | 
 93 |         out = self.conv2(out)
 94 |         out = self.bn2(out)
 95 |         out = self.relu(out)
 96 | 
 97 |         out = self.conv3(out)
 98 |         out = self.bn3(out)
 99 | 
100 |         if self.downsample is not None:
101 |             identity = self.downsample(x)
102 | 
103 |         out = identity + self.drop_path(out)
104 |         out = self.relu(out)
105 |         return out
106 | 
107 | 
108 | @BACKBONES.register_module()
109 | class KW_ResNet(nn.Module):
110 |     arch_settings = {
111 |         18: (BasicBlock, (2, 2, 2, 2)),
112 |         34: (BasicBlock, (3, 4, 6, 3)),
113 |         50: (Bottleneck, (3, 4, 6, 3)),
114 |         101: (Bottleneck, (3, 4, 23, 3)),
115 |         152: (Bottleneck, (3, 8, 36, 3))
116 |     }
117 | 
118 |     def __init__(self, depth, num_classes=1000, dropout=0.1, reduction=0.0625, cell_num_ratio=1, cell_inplane_ratio=1,
119 |                  cell_outplane_ratio=1, sharing_range=('layer', 'conv'), drop_path_rate=0.1, frozen_stages=0,
120 |                  out_indices=(0, 1, 2, 3), norm_eval=True, **kwargs):
121 |         super(KW_ResNet, self).__init__()
122 |         self.warehouse_manager = Warehouse_Manager(reduction, cell_num_ratio, cell_inplane_ratio, cell_outplane_ratio,
123 |                                                    sharing_range)
124 |         block, layers = self.arch_settings[depth]
125 |         self.inplanes = 64
126 |         self.layer_idx = 0
127 |         self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False)
128 |         self.bn1 = nn.BatchNorm2d(self.inplanes)
129 |         self.relu = nn.ReLU(inplace=True)
130 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
131 |         self.layer1 = self._make_layer(block, 64, layers[0],
132 |                                        stage_idx=0, warehouse_manager=self.warehouse_manager, drop_path=drop_path_rate)
133 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
134 |                                        stage_idx=1, warehouse_manager=self.warehouse_manager, drop_path=drop_path_rate)
135 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
136 |                                        stage_idx=2, warehouse_manager=self.warehouse_manager, drop_path=drop_path_rate)
137 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
138 |                                        stage_idx=3, warehouse_manager=self.warehouse_manager, drop_path=drop_path_rate)
139 |         self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
140 |         self.fc = nn.Linear(512 * block.expansion, num_classes)
141 | 
142 |         for m in self.modules():
143 |             if isinstance(m, nn.Conv2d):
144 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
145 |             elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
146 |                 nn.init.constant_(m.weight, 1)
147 |                 nn.init.constant_(m.bias, 0)
148 | 
149 |         self.warehouse_manager.store()
150 |         self.warehouse_manager.allocate(self)
151 | 
152 |         self.frozen_stages = frozen_stages
153 |         self.out_indices = out_indices
154 |         self.norm_eval = norm_eval
155 |         self.net_update_temperature(0)
156 | 
157 |     def _make_layer(self, block, planes, blocks, stride=1, stage_idx=-1, warehouse_manager=None, drop_path=0.):
158 |         downsample = None
159 |         if stride != 1 or self.inplanes != planes * block.expansion:
160 |             downsample = nn.Sequential(
161 |                 warehouse_manager.reserve(
162 |                     self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, padding=0,
163 |                     warehouse_name='stage{}_layer{}_conv{}'.format(stage_idx-1, self.layer_idx + 1, 0),
164 |                     enabled=(stride != 1), bias=False),
165 |                 nn.BatchNorm2d(planes * block.expansion),
166 |             )
167 | 
168 |         layers = []
169 |         layers.append(block(self.inplanes, planes, stride, downsample, stage_idx=stage_idx, layer_idx=self.layer_idx,
170 |                             warehouse_manager=warehouse_manager, warehouse_handover=True, drop_path=drop_path))
171 |         self.layer_idx = 1
172 |         self.inplanes = planes * block.expansion
173 |         for idx in range(1, blocks):
174 |             layers.append(block(self.inplanes, planes, stage_idx=stage_idx, layer_idx=self.layer_idx,
175 |                                 warehouse_manager=warehouse_manager, drop_path=drop_path))
176 |             self.layer_idx += 1
177 |         return nn.Sequential(*layers)
178 | 
179 |     def net_update_temperature(self, temp):
180 |         for m in self.modules():
181 |             if hasattr(m, "update_temperature"):
182 |                 m.update_temperature(temp)
183 | 
184 |     def _freeze_stages(self):
185 |         if self.frozen_stages >= 0:
186 |             self.bn1.eval()
187 |             for m in [self.conv1, self.bn1]:
188 |                 for param in m.parameters():
189 |                     param.requires_grad = False
190 | 
191 |         for i in range(1, self.frozen_stages + 1):
192 |             m = getattr(self, f'layer{i}')
193 |             m.eval()
194 |             for param in m.parameters():
195 |                 param.requires_grad = False
196 | 
197 |     def train(self, mode=True):
198 |         """Convert the model into training mode while keep normalization layer
199 |         freezed."""
200 |         super(KW_ResNet, self).train(mode)
201 |         self._freeze_stages()
202 | 
203 |         if mode and self.norm_eval:
204 |             for m in self.modules():
205 |                 # trick: eval have effect on BatchNorm only
206 |                 if isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d)):
207 |                     m.eval()
208 | 
209 |     def init_weights(self, pretrained=None):
210 |         """Initialize the weights in backbone.
211 |         Args:
212 |             pretrained (str, optional): Path to pre-trained weights.
213 |                 Defaults to None.
214 |         """
215 | 
216 |         for m in self.modules():
217 |             if isinstance(m, nn.Conv2d):
218 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
219 |                 if m.bias is not None:
220 |                     nn.init.zeros_(m.bias)
221 |             elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
222 |                 nn.init.constant_(m.weight, 1)
223 |                 nn.init.constant_(m.bias, 0)
224 |             elif isinstance(m, nn.Linear):
225 |                 nn.init.normal_(m.weight, 0, 0.01)
226 |                 if m.bias is not None:
227 |                     nn.init.zeros_(m.bias)
228 | 
229 |         if isinstance(pretrained, str):
230 |             logger = get_root_logger()
231 |             load_checkpoint(self, pretrained, strict=True, logger=logger, map_location='cpu')
232 |         elif pretrained is not None:
233 |             raise TypeError('pretrained must be a str or None')
234 | 
235 |     def _forward_impl(self, x):
236 |         x = self.conv1(x)
237 |         x = self.bn1(x)
238 |         x = self.relu(x)
239 |         x = self.maxpool(x)
240 |         outs = []
241 |         for idx in range(4):
242 |             layer = getattr(self, f'layer{idx + 1}')
243 |             x = layer(x)
244 |             if idx in self.out_indices:
245 |                 outs.append(x)
246 |         return tuple(outs)
247 | 
248 | 
249 |     def forward(self, x):
250 |         return self._forward_impl(x)
251 | 
252 | 


--------------------------------------------------------------------------------
/detection/mmdet/models/backbones/mobilenetv2.py:
--------------------------------------------------------------------------------
  1 | from torch import nn
  2 | from timm.models.registry import register_model
  3 | 
  4 | from ..builder import BACKBONES
  5 | from mmcv.runner import load_checkpoint
  6 | from mmdet.utils import get_root_logger
  7 | 
  8 | def _make_divisible(v, divisor, min_value=None):
  9 |     """
 10 |     This function is taken from the original tf repo.
 11 |     It ensures that all layers have a channel number that is divisible by 8
 12 |     It can be seen here:
 13 |     https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
 14 |     :param v:
 15 |     :param divisor:
 16 |     :param min_value:
 17 |     :return:
 18 |     """
 19 |     if min_value is None:
 20 |         min_value = divisor
 21 |     new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
 22 |     # Make sure that round down does not go down by more than 10%.
 23 |     if new_v < 0.9 * v:
 24 |         new_v += divisor
 25 |     return new_v
 26 | 
 27 | 
 28 | class ConvBNReLU(nn.Sequential):
 29 |     def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1, norm_layer=nn.BatchNorm2d):
 30 |         padding = (kernel_size - 1) // 2
 31 |         super(ConvBNReLU, self).__init__(
 32 |             nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False),
 33 |             norm_layer(out_planes),
 34 |             nn.ReLU6(inplace=True)
 35 |         )
 36 | 
 37 | 
 38 | class InvertedResidual(nn.Module):
 39 |     def __init__(self, inp, oup, stride, expand_ratio, norm_layer=nn.BatchNorm2d):
 40 |         super(InvertedResidual, self).__init__()
 41 |         self.stride = stride
 42 |         hidden_dim = int(round(inp * expand_ratio))
 43 |         self.use_res_connect = self.stride == 1 and inp == oup
 44 | 
 45 |         layers = []
 46 |         if expand_ratio != 1:
 47 |             # pw
 48 |             layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1, norm_layer=norm_layer))
 49 |         layers.extend([
 50 |             # dw
 51 |             ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim, norm_layer=norm_layer),
 52 |             # pw-linear
 53 |             nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
 54 |             norm_layer(oup),
 55 |         ])
 56 |         self.conv = nn.Sequential(*layers)
 57 | 
 58 |     def forward(self, x):
 59 |         if self.use_res_connect:
 60 |             return x + self.conv(x)
 61 |         else:
 62 |             return self.conv(x)
 63 | 
 64 | 
 65 | @BACKBONES.register_module()
 66 | class MobileNetV2(nn.Module):
 67 |     def __init__(self,
 68 |                  num_classes=1000,
 69 |                  width_mult=1.0,
 70 |                  inverted_residual_setting=None,
 71 |                  round_nearest=8,
 72 |                  block=InvertedResidual,
 73 |                  norm_layer=nn.BatchNorm2d,
 74 |                  dropout=0.0,
 75 |                  frozen_stages=0,
 76 |                  out_indices=(0, 1, 2, 3),
 77 |                  norm_eval=True,
 78 |                  **kwargs):
 79 |         """gr
 80 |         MobileNet V2 main class
 81 | 
 82 |         Args:
 83 |             num_classes (int): Number of classes
 84 |             width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount
 85 |             inverted_residual_setting: Network structure
 86 |             round_nearest (int): Round the number of channels in each layer to be a multiple of this number
 87 |             Set to 1 to turn off rounding
 88 |             block: Module specifying inverted residual building block for mobilenet
 89 |             norm_layer: Module specifying the normalization layer to use
 90 | 
 91 |         """
 92 |         super(MobileNetV2, self).__init__()
 93 | 
 94 |         input_channel = 32
 95 |         last_channel = 1280
 96 |         self.stage_idx = [1, 3, 6, 13, 18]
 97 |         self.frozen_stages = frozen_stages
 98 |         self.out_indices = [self.stage_idx[x] for x in out_indices]
 99 |         self.norm_eval = norm_eval
100 | 
101 |         if inverted_residual_setting is None:
102 |             inverted_residual_setting = [
103 |                 # t, c, n, s
104 |                 [1, 16, 1, 1],
105 |                 [6, 24, 2, 2],
106 |                 [6, 32, 3, 2],
107 |                 [6, 64, 4, 2],
108 |                 [6, 96, 3, 1],
109 |                 [6, 160, 3, 2],
110 |                 [6, 320, 1, 1],
111 |             ]
112 | 
113 |         # only check the first element, assuming user knows t,c,n,s are required
114 |         if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4:
115 |             raise ValueError("inverted_residual_setting should be non-empty "
116 |                              "or a 4-element list, got {}".format(inverted_residual_setting))
117 | 
118 |         # building first layer
119 |         input_channel = _make_divisible(input_channel * width_mult, round_nearest)
120 |         self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest)
121 |         features = [ConvBNReLU(3, input_channel, stride=2, norm_layer=norm_layer)]
122 |         # building inverted residual blocks
123 |         for t, c, n, s in inverted_residual_setting:
124 |             output_channel = _make_divisible(c * width_mult, round_nearest)
125 |             for i in range(n):
126 |                 stride = s if i == 0 else 1
127 |                 features.append(block(input_channel, output_channel, stride, expand_ratio=t, norm_layer=norm_layer))
128 |                 input_channel = output_channel
129 |         # building last several layers
130 |         features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1, norm_layer=norm_layer))
131 |         # make it nn.Sequential
132 |         self.features = nn.Sequential(*features)
133 |         # building classifier
134 |         self.classifier = nn.Sequential(
135 |             nn.Dropout(dropout),
136 |             nn.Linear(self.last_channel, num_classes),
137 |         )
138 | 
139 |         # weight initialization
140 |         for m in self.modules():
141 |             if isinstance(m, nn.Conv2d):
142 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out')
143 |                 if m.bias is not None:
144 |                     nn.init.zeros_(m.bias)
145 |             elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
146 |                 nn.init.ones_(m.weight)
147 |                 nn.init.zeros_(m.bias)
148 |             elif isinstance(m, nn.Linear):
149 |                 nn.init.normal_(m.weight, 0, 0.01)
150 |                 nn.init.zeros_(m.bias)
151 | 
152 |     def _freeze_stages(self):
153 |         if self.frozen_stages >= 0:
154 |             for i in range(self.stage_idx[self.frozen_stages] + 1):
155 |                 m = self.features[i]
156 |                 m.eval()
157 |                 for param in m.parameters():
158 |                     param.requires_grad = False
159 | 
160 |     def train(self, mode=True):
161 |         """Convert the model into training mode while keep normalization layer
162 |         freezed."""
163 |         super(MobileNetV2, self).train(mode)
164 |         self._freeze_stages()
165 | 
166 |         if mode and self.norm_eval:
167 |             for m in self.modules():
168 |                 # trick: eval have effect on BatchNorm only
169 |                 if isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d)):
170 |                     m.eval()
171 | 
172 |     def init_weights(self, pretrained=None):
173 |         """Initialize the weights in backbone.
174 |         Args:
175 |             pretrained (str, optional): Path to pre-trained weights.
176 |                 Defaults to None.
177 |         """
178 | 
179 |         for m in self.modules():
180 |             if isinstance(m, nn.Conv2d):
181 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
182 |                 if m.bias is not None:
183 |                     nn.init.zeros_(m.bias)
184 |             elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
185 |                 nn.init.constant_(m.weight, 1)
186 |                 nn.init.constant_(m.bias, 0)
187 |             elif isinstance(m, nn.Linear):
188 |                 nn.init.normal_(m.weight, 0, 0.01)
189 |                 if m.bias is not None:
190 |                     nn.init.zeros_(m.bias)
191 | 
192 |         if isinstance(pretrained, str):
193 |             logger = get_root_logger()
194 |             load_checkpoint(self, pretrained, strict=False, logger=logger, map_location='cpu')
195 |         elif pretrained is not None:
196 |             raise TypeError('pretrained must be a str or None')
197 | 
198 |     def _forward_impl(self, x):
199 |         outs = []
200 |         for idx, layer in enumerate(self.features):
201 |             x = layer(x)
202 |             if idx in self.out_indices:
203 |                 outs.append(x)
204 |         return outs
205 | 
206 |     def forward(self, x):
207 |         return self._forward_impl(x)


--------------------------------------------------------------------------------
/detection/mmdet/models/backbones/resnet.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | from ..builder import BACKBONES
  3 | from mmcv.runner import load_checkpoint
  4 | from mmdet.utils import get_root_logger
  5 | 
  6 | __all__ = ['ResNet']
  7 | 
  8 | 
  9 | def conv3x3(in_planes, out_planes, stride=1):
 10 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
 11 |                      padding=1, bias=False)
 12 | 
 13 | 
 14 | def conv1x1(in_planes, out_planes, stride=1):
 15 |     return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
 16 | 
 17 | 
 18 | class BasicBlock(nn.Module):
 19 |     expansion = 1
 20 | 
 21 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 22 |         super(BasicBlock, self).__init__()
 23 |         self.conv1 = conv3x3(inplanes, planes, stride)
 24 |         self.bn1 = nn.BatchNorm2d(planes)
 25 |         self.relu = nn.ReLU(inplace=True)
 26 |         self.conv2 = conv3x3(planes, planes)
 27 |         self.bn2 = nn.BatchNorm2d(planes)
 28 |         self.downsample = downsample
 29 |         self.stride = stride
 30 | 
 31 |     def forward(self, x):
 32 |         identity = x
 33 | 
 34 |         out = self.conv1(x)
 35 |         out = self.bn1(out)
 36 |         out = self.relu(out)
 37 | 
 38 |         out = self.conv2(out)
 39 |         out = self.bn2(out)
 40 | 
 41 |         if self.downsample is not None:
 42 |             identity = self.downsample(x)
 43 | 
 44 |         out += identity
 45 |         out = self.relu(out)
 46 |         return out
 47 | 
 48 | 
 49 | class Bottleneck(nn.Module):
 50 |     expansion = 4
 51 | 
 52 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 53 |         super(Bottleneck, self).__init__()
 54 |         self.conv1 = conv1x1(inplanes, planes)
 55 |         self.bn1 = nn.BatchNorm2d(planes)
 56 |         self.conv2 = conv3x3(planes, planes, stride)
 57 |         self.bn2 = nn.BatchNorm2d(planes)
 58 |         self.conv3 = conv1x1(planes, planes * self.expansion)
 59 |         self.bn3 = nn.BatchNorm2d(planes * self.expansion)
 60 |         self.relu = nn.ReLU(inplace=True)
 61 |         self.downsample = downsample
 62 |         self.stride = stride
 63 | 
 64 |     def forward(self, x):
 65 |         identity = x
 66 | 
 67 |         out = self.conv1(x)
 68 |         out = self.bn1(out)
 69 |         out = self.relu(out)
 70 | 
 71 |         out = self.conv2(out)
 72 |         out = self.bn2(out)
 73 |         out = self.relu(out)
 74 | 
 75 |         out = self.conv3(out)
 76 |         out = self.bn3(out)
 77 | 
 78 |         if self.downsample is not None:
 79 |             identity = self.downsample(x)
 80 | 
 81 |         out += identity
 82 |         out = self.relu(out)
 83 |         return out
 84 | 
 85 | 
 86 | @BACKBONES.register_module()
 87 | class ResNet(nn.Module):
 88 |     arch_settings = {
 89 |         18: (BasicBlock, (2, 2, 2, 2)),
 90 |         34: (BasicBlock, (3, 4, 6, 3)),
 91 |         50: (Bottleneck, (3, 4, 6, 3)),
 92 |         101: (Bottleneck, (3, 4, 23, 3)),
 93 |         152: (Bottleneck, (3, 8, 36, 3))
 94 |     }
 95 | 
 96 |     def __init__(self, depth, num_classes=1000,
 97 |                  frozen_stages=0, out_indices=(0, 1, 2, 3), norm_eval=True, **kwargs):
 98 |         super(ResNet, self).__init__()
 99 |         block, layers = self.arch_settings[depth]
100 |         self.inplanes = 64
101 |         self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
102 |                                bias=False)
103 |         self.bn1 = nn.BatchNorm2d(self.inplanes)
104 |         self.relu = nn.ReLU(inplace=True)
105 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
106 |         self.layer1 = self._make_layer(block, 64, layers[0])
107 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
108 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
109 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
110 |         self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
111 |         self.fc = nn.Linear(512 * block.expansion, num_classes)
112 | 
113 |         self.frozen_stages = frozen_stages
114 |         self.out_indices = out_indices
115 |         self.norm_eval = norm_eval
116 | 
117 |     def _make_layer(self, block, planes, blocks, stride=1):
118 |         downsample = None
119 |         if stride != 1 or self.inplanes != planes * block.expansion:
120 |             downsample = nn.Sequential(
121 |                 conv1x1(self.inplanes, planes * block.expansion, stride),
122 |                 nn.BatchNorm2d(planes * block.expansion),
123 |             )
124 | 
125 |         layers = []
126 |         layers.append(block(self.inplanes, planes, stride, downsample))
127 |         self.inplanes = planes * block.expansion
128 |         for _ in range(1, blocks):
129 |             layers.append(block(self.inplanes, planes))
130 | 
131 |         return nn.Sequential(*layers)
132 | 
133 |     def _freeze_stages(self):
134 |         if self.frozen_stages >= 0:
135 |             self.bn1.eval()
136 |             for m in [self.conv1, self.bn1]:
137 |                 for param in m.parameters():
138 |                     param.requires_grad = False
139 | 
140 |         for i in range(1, self.frozen_stages + 1):
141 |             m = getattr(self, f'layer{i}')
142 |             m.eval()
143 |             for param in m.parameters():
144 |                 param.requires_grad = False
145 | 
146 |     def train(self, mode=True):
147 |         """Convert the model into training mode while keep normalization layer
148 |         freezed."""
149 |         super(ResNet, self).train(mode)
150 |         self._freeze_stages()
151 | 
152 |         if mode and self.norm_eval:
153 |             for m in self.modules():
154 |                 # trick: eval have effect on BatchNorm only
155 |                 if isinstance(m, nn.BatchNorm2d):
156 |                     m.eval()
157 | 
158 |     def init_weights(self, pretrained=None):
159 |         """Initialize the weights in backbone.
160 |         Args:
161 |             pretrained (str, optional): Path to pre-trained weights.
162 |                 Defaults to None.
163 |         """
164 |         for m in self.modules():
165 |             if isinstance(m, nn.Conv2d):
166 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
167 |                 if m.bias is not None:
168 |                     nn.init.zeros_(m.bias)
169 |             elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
170 |                 nn.init.constant_(m.weight, 1)
171 |                 nn.init.constant_(m.bias, 0)
172 |             elif isinstance(m, nn.Linear):
173 |                 nn.init.normal_(m.weight, 0, 0.01)
174 |                 nn.init.zeros_(m.bias)
175 | 
176 |         if isinstance(pretrained, str):
177 |             logger = get_root_logger()
178 |             load_checkpoint(self, pretrained, strict=True, logger=logger, map_location='cpu')
179 |         elif pretrained is not None:
180 |             raise TypeError('pretrained must be a str or None')
181 | 
182 |     def _forward_impl(self, x):
183 |         x = self.conv1(x)
184 |         x = self.bn1(x)
185 |         x = self.relu(x)
186 |         x = self.maxpool(x)
187 | 
188 |         outs = []
189 |         for idx in range(4):
190 |             layer = getattr(self, f'layer{idx + 1}')
191 |             x = layer(x)
192 |             if idx in self.out_indices:
193 |                 outs.append(x)
194 |         return outs
195 | 
196 |     def forward(self, x):
197 |         return self._forward_impl(x)
198 | 


--------------------------------------------------------------------------------
/engine.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | from typing import Iterable, Optional
  3 | import torch
  4 | from timm.data import Mixup
  5 | from timm.utils import accuracy, ModelEma
  6 | 
  7 | import utils
  8 | 
  9 | 
 10 | def get_temperature(iteration, epoch, iter_per_epoch, temp_epoch=20, temp_init_value=30.0, temp_end=0.0):
 11 |     total_iter = iter_per_epoch * temp_epoch
 12 |     current_iter = iter_per_epoch * epoch + iteration
 13 |     temperature = temp_end + max(0, (temp_init_value - temp_end) * ((total_iter - current_iter) / max(1.0, total_iter)))
 14 |     return temperature
 15 | 
 16 | 
 17 | def train_one_epoch(model: torch.nn.Module, criterion: torch.nn.Module,
 18 |                     data_loader: Iterable, optimizer: torch.optim.Optimizer,
 19 |                     device: torch.device, epoch: int, loss_scaler, max_norm: float = 0,
 20 |                     model_ema: Optional[ModelEma] = None, mixup_fn: Optional[Mixup] = None, log_writer=None,
 21 |                     wandb_logger=None, start_steps=None, lr_schedule_values=None, wd_schedule_values=None,
 22 |                     num_training_steps_per_epoch=None, update_freq=None, use_amp=False, args=None):
 23 |     model.train(True)
 24 |     metric_logger = utils.MetricLogger(delimiter="  ")
 25 |     metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
 26 |     metric_logger.add_meter('min_lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
 27 |     header = 'Epoch: [{}]'.format(epoch)
 28 |     print_freq = 10
 29 | 
 30 |     optimizer.zero_grad()
 31 | 
 32 |     for data_iter_step, (samples, targets) in enumerate(metric_logger.log_every(data_loader, print_freq, header)):
 33 |         step = data_iter_step // update_freq
 34 |         if step >= num_training_steps_per_epoch:
 35 |             continue
 36 |         it = start_steps + step  # global training iteration
 37 |         # Update LR & WD for the first acc
 38 |         if lr_schedule_values is not None or wd_schedule_values is not None and data_iter_step % update_freq == 0:
 39 |             for i, param_group in enumerate(optimizer.param_groups):
 40 |                 if lr_schedule_values is not None:
 41 |                     param_group["lr"] = lr_schedule_values[it] * param_group["lr_scale"]
 42 |                 if wd_schedule_values is not None and param_group["weight_decay"] > 0:
 43 |                     param_group["weight_decay"] = wd_schedule_values[it]
 44 | 
 45 |         if hasattr(model.module, 'net_update_temperature'):
 46 |             temp = get_temperature(data_iter_step + 1, epoch, len(data_loader),
 47 |                                    temp_epoch=args.temp_epoch, temp_init_value=args.temp_init_value)
 48 |             model.module.net_update_temperature(temp)
 49 | 
 50 |         samples = samples.to(device, non_blocking=True)
 51 |         targets = targets.to(device, non_blocking=True)
 52 | 
 53 |         if mixup_fn is not None:
 54 |             samples, targets = mixup_fn(samples, targets)
 55 | 
 56 |         if use_amp:
 57 |             with torch.cuda.amp.autocast():
 58 |                 output = model(samples)
 59 |                 loss = criterion(output, targets)
 60 |         else: # full precision
 61 |             output = model(samples)
 62 |             loss = criterion(output, targets)
 63 | 
 64 |         loss_value = loss.item()
 65 | 
 66 |         if not math.isfinite(loss_value): # this could trigger if using AMP
 67 |             print("Loss is {}, stopping training".format(loss_value))
 68 |             assert math.isfinite(loss_value)
 69 | 
 70 |         if use_amp:
 71 |             # this attribute is added by timm on one optimizer (adahessian)
 72 |             is_second_order = hasattr(optimizer, 'is_second_order') and optimizer.is_second_order
 73 |             loss /= update_freq
 74 |             grad_norm = loss_scaler(loss, optimizer, clip_grad=max_norm,
 75 |                                     parameters=model.parameters(), create_graph=is_second_order,
 76 |                                     update_grad=(data_iter_step + 1) % update_freq == 0)
 77 |             if (data_iter_step + 1) % update_freq == 0:
 78 |                 optimizer.zero_grad()
 79 |                 if model_ema is not None:
 80 |                     model_ema.update(model)
 81 |         else: # full precision
 82 |             loss /= update_freq
 83 |             loss.backward()
 84 |             if (data_iter_step + 1) % update_freq == 0:
 85 |                 optimizer.step()
 86 |                 optimizer.zero_grad()
 87 |                 if model_ema is not None:
 88 |                     model_ema.update(model)
 89 | 
 90 |         torch.cuda.synchronize()
 91 | 
 92 |         if mixup_fn is None:
 93 |             class_acc = (output.max(-1)[-1] == targets).float().mean()
 94 |         else:
 95 |             class_acc = None
 96 |         metric_logger.update(loss=loss_value)
 97 |         metric_logger.update(class_acc=class_acc)
 98 |         min_lr = 10.
 99 |         max_lr = 0.
100 |         for group in optimizer.param_groups:
101 |             min_lr = min(min_lr, group["lr"])
102 |             max_lr = max(max_lr, group["lr"])
103 | 
104 |         metric_logger.update(lr=max_lr)
105 |         metric_logger.update(min_lr=min_lr)
106 |         weight_decay_value = None
107 |         for group in optimizer.param_groups:
108 |             if group["weight_decay"] > 0:
109 |                 weight_decay_value = group["weight_decay"]
110 |         metric_logger.update(weight_decay=weight_decay_value)
111 |         if use_amp:
112 |             metric_logger.update(grad_norm=grad_norm)
113 | 
114 |         if log_writer is not None:
115 |             log_writer.update(loss=loss_value, head="loss")
116 |             log_writer.update(class_acc=class_acc, head="loss")
117 |             log_writer.update(lr=max_lr, head="opt")
118 |             log_writer.update(min_lr=min_lr, head="opt")
119 |             log_writer.update(weight_decay=weight_decay_value, head="opt")
120 |             if use_amp:
121 |                 log_writer.update(grad_norm=grad_norm, head="opt")
122 |             log_writer.set_step()
123 | 
124 |         if wandb_logger:
125 |             wandb_logger._wandb.log({
126 |                 'Rank-0 Batch Wise/train_loss': loss_value,
127 |                 'Rank-0 Batch Wise/train_max_lr': max_lr,
128 |                 'Rank-0 Batch Wise/train_min_lr': min_lr
129 |             }, commit=False)
130 |             if class_acc:
131 |                 wandb_logger._wandb.log({'Rank-0 Batch Wise/train_class_acc': class_acc}, commit=False)
132 |             if use_amp:
133 |                 wandb_logger._wandb.log({'Rank-0 Batch Wise/train_grad_norm': grad_norm}, commit=False)
134 |             wandb_logger._wandb.log({'Rank-0 Batch Wise/global_train_step': it})
135 |             
136 | 
137 |     # gather the stats from all processes
138 |     metric_logger.synchronize_between_processes()
139 |     print("Averaged stats:", metric_logger)
140 |     return {k: meter.global_avg for k, meter in metric_logger.meters.items()}
141 | 
142 | 
143 | @torch.no_grad()
144 | def evaluate(data_loader, model, device, use_amp=False):
145 |     criterion = torch.nn.CrossEntropyLoss()
146 | 
147 |     metric_logger = utils.MetricLogger(delimiter="  ")
148 |     header = 'Test:'
149 | 
150 |     # switch to evaluation mode
151 |     model.eval()
152 |     for batch in metric_logger.log_every(data_loader, 10, header):
153 |         images = batch[0]
154 |         target = batch[-1]
155 | 
156 |         images = images.to(device, non_blocking=True)
157 |         target = target.to(device, non_blocking=True)
158 | 
159 |         # compute output
160 |         if use_amp:
161 |             with torch.cuda.amp.autocast():
162 |                 output = model(images)
163 |                 loss = criterion(output, target)
164 |         else:
165 |             output = model(images)
166 |             loss = criterion(output, target)
167 | 
168 |         acc1, acc5 = accuracy(output, target, topk=(1, 5))
169 | 
170 |         batch_size = images.shape[0]
171 |         metric_logger.update(loss=loss.item())
172 |         metric_logger.meters['acc1'].update(acc1.item(), n=batch_size)
173 |         metric_logger.meters['acc5'].update(acc5.item(), n=batch_size)
174 |     # gather the stats from all processes
175 |     metric_logger.synchronize_between_processes()
176 |     print('* Acc@1 {top1.global_avg:.3f} Acc@5 {top5.global_avg:.3f} loss {losses.global_avg:.3f}'
177 |           .format(top1=metric_logger.acc1, top5=metric_logger.acc5, losses=metric_logger.loss))
178 | 
179 |     return {k: meter.global_avg for k, meter in metric_logger.meters.items()}
180 | 


--------------------------------------------------------------------------------
/fig/Fig_Architecture.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OSVAI/KernelWarehouse/a4ac17d88aae6f29c9f43635c8d2f3bd41187980/fig/Fig_Architecture.pdf


--------------------------------------------------------------------------------
/fig/Fig_Architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OSVAI/KernelWarehouse/a4ac17d88aae6f29c9f43635c8d2f3bd41187980/fig/Fig_Architecture.png


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .resnet import resnet18, resnet50, ResNet
2 | from .kw_resnet import kw_resnet18, kw_resnet50, KW_ResNet
3 | from .convnext import convnext_tiny
4 | from .kw_convnext import kw_convnext_tiny
5 | from . mobilenetv2 import mobilenetv2_100, mobilenetv2_050
6 | from .kw_mobilenetv2 import kw_mobilenetv2_100, kw_mobilenetv2_050
7 | from .kw1d2x_mobilenetv2 import kw1d2x_mobilenetv2_100, kw1d2x_mobilenetv2_050


--------------------------------------------------------------------------------
/models/convnext.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from timm.models.layers import trunc_normal_, DropPath
  5 | from timm.models.registry import register_model
  6 | 
  7 | 
  8 | class Block(nn.Module):
  9 |     r""" ConvNeXt Block. There are two equivalent implementations:
 10 |     (1) DwConv -> LayerNorm (channels_first) -> 1x1 Conv -> GELU -> 1x1 Conv; all in (N, C, H, W)
 11 |     (2) DwConv -> Permute to (N, H, W, C); LayerNorm (channels_last) -> Linear -> GELU -> Linear; Permute back
 12 |     We use (2) as we find it slightly faster in PyTorch
 13 |     
 14 |     Args:
 15 |         dim (int): Number of input channels.
 16 |         drop_path (float): Stochastic depth rate. Default: 0.0
 17 |         layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
 18 |     """
 19 |     def __init__(self, dim, drop_path=0., layer_scale_init_value=1e-6):
 20 |         super().__init__()
 21 |         self.dwconv = nn.Conv2d(dim, dim, kernel_size=7, padding=3, groups=dim) # depthwise conv
 22 |         self.norm = LayerNorm(dim, eps=1e-6)
 23 |         self.pwconv1 = nn.Linear(dim, 4 * dim) # pointwise/1x1 convs, implemented with linear layers
 24 |         self.act = nn.GELU()
 25 |         self.pwconv2 = nn.Linear(4 * dim, dim)
 26 |         self.gamma = nn.Parameter(layer_scale_init_value * torch.ones((dim)), 
 27 |                                     requires_grad=True) if layer_scale_init_value > 0 else None
 28 |         self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
 29 | 
 30 |     def forward(self, x):
 31 |         input = x
 32 |         x = self.dwconv(x)
 33 |         x = x.permute(0, 2, 3, 1) # (N, C, H, W) -> (N, H, W, C)
 34 |         x = self.norm(x)
 35 |         x = self.pwconv1(x)
 36 |         x = self.act(x)
 37 |         x = self.pwconv2(x)
 38 |         if self.gamma is not None:
 39 |             x = self.gamma * x
 40 |         x = x.permute(0, 3, 1, 2) # (N, H, W, C) -> (N, C, H, W)
 41 |         x = input + self.drop_path(x)
 42 |         return x
 43 | 
 44 | @register_model
 45 | class ConvNeXt(nn.Module):
 46 |     r""" ConvNeXt
 47 |         A PyTorch impl of : `A ConvNet for the 2020s`  -
 48 |           https://arxiv.org/pdf/2201.03545.pdf
 49 | 
 50 |     Args:
 51 |         in_chans (int): Number of input image channels. Default: 3
 52 |         num_classes (int): Number of classes for classification head. Default: 1000
 53 |         depths (tuple(int)): Number of blocks at each stage. Default: [3, 3, 9, 3]
 54 |         dims (int): Feature dimension at each stage. Default: [96, 192, 384, 768]
 55 |         drop_path_rate (float): Stochastic depth rate. Default: 0.
 56 |         layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
 57 |         head_init_scale (float): Init scaling value for classifier weights and biases. Default: 1.
 58 |     """
 59 |     def __init__(self, in_chans=3, num_classes=1000, 
 60 |                  depths=[3, 3, 9, 3], dims=[96, 192, 384, 768], drop_path_rate=0., 
 61 |                  layer_scale_init_value=1e-6, head_init_scale=1., **kwargs
 62 |                  ):
 63 |         super().__init__()
 64 | 
 65 |         self.downsample_layers = nn.ModuleList() # stem and 3 intermediate downsampling conv layers
 66 |         stem = nn.Sequential(
 67 |             nn.Conv2d(in_chans, dims[0], kernel_size=4, stride=4),
 68 |             LayerNorm(dims[0], eps=1e-6, data_format="channels_first")
 69 |         )
 70 |         self.downsample_layers.append(stem)
 71 |         for i in range(3):
 72 |             downsample_layer = nn.Sequential(
 73 |                     LayerNorm(dims[i], eps=1e-6, data_format="channels_first"),
 74 |                     nn.Conv2d(dims[i], dims[i+1], kernel_size=2, stride=2),
 75 |             )
 76 |             self.downsample_layers.append(downsample_layer)
 77 | 
 78 |         self.stages = nn.ModuleList() # 4 feature resolution stages, each consisting of multiple residual blocks
 79 |         dp_rates=[x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))] 
 80 |         cur = 0
 81 |         for i in range(4):
 82 |             stage = nn.Sequential(
 83 |                 *[Block(dim=dims[i], drop_path=dp_rates[cur + j], 
 84 |                 layer_scale_init_value=layer_scale_init_value) for j in range(depths[i])]
 85 |             )
 86 |             self.stages.append(stage)
 87 |             cur += depths[i]
 88 | 
 89 |         self.norm = nn.LayerNorm(dims[-1], eps=1e-6) # final norm layer
 90 |         self.head = nn.Linear(dims[-1], num_classes)
 91 | 
 92 |         self.apply(self._init_weights)
 93 |         self.head.weight.data.mul_(head_init_scale)
 94 |         self.head.bias.data.mul_(head_init_scale)
 95 | 
 96 |     def _init_weights(self, m):
 97 |         if isinstance(m, (nn.Conv2d, nn.Linear)):
 98 |             trunc_normal_(m.weight, std=.02)
 99 |             nn.init.constant_(m.bias, 0)
100 | 
101 |     def forward_features(self, x):
102 |         for i in range(4):
103 |             x = self.downsample_layers[i](x)
104 |             x = self.stages[i](x)
105 |         return self.norm(x.mean([-2, -1])) # global average pooling, (N, C, H, W) -> (N, C)
106 | 
107 |     def forward(self, x):
108 |         x = self.forward_features(x)
109 |         x = self.head(x)
110 |         return x
111 | 
112 | 
113 | class LayerNorm(nn.Module):
114 |     r""" LayerNorm that supports two data formats: channels_last (default) or channels_first. 
115 |     The ordering of the dimensions in the inputs. channels_last corresponds to inputs with 
116 |     shape (batch_size, height, width, channels) while channels_first corresponds to inputs 
117 |     with shape (batch_size, channels, height, width).
118 |     """
119 |     def __init__(self, normalized_shape, eps=1e-6, data_format="channels_last"):
120 |         super().__init__()
121 |         self.weight = nn.Parameter(torch.ones(normalized_shape))
122 |         self.bias = nn.Parameter(torch.zeros(normalized_shape))
123 |         self.eps = eps
124 |         self.data_format = data_format
125 |         if self.data_format not in ["channels_last", "channels_first"]:
126 |             raise NotImplementedError 
127 |         self.normalized_shape = (normalized_shape, )
128 |     
129 |     def forward(self, x):
130 |         if self.data_format == "channels_last":
131 |             return F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps)
132 |         elif self.data_format == "channels_first":
133 |             u = x.mean(1, keepdim=True)
134 |             s = (x - u).pow(2).mean(1, keepdim=True)
135 |             x = (x - u) / torch.sqrt(s + self.eps)
136 |             x = self.weight[:, None, None] * x + self.bias[:, None, None]
137 |             return x
138 | 
139 | 
140 | @register_model
141 | def convnext_tiny(**kwargs):
142 |     model = ConvNeXt(depths=[3, 3, 9, 3], dims=[96, 192, 384, 768], **kwargs)
143 |     return model
144 | 
145 | 


--------------------------------------------------------------------------------
/models/kw1d2x_mobilenetv2.py:
--------------------------------------------------------------------------------
  1 | from torch import nn
  2 | from modules.kernel_warehouse import Warehouse_Manager
  3 | from timm.models.registry import register_model
  4 | 
  5 | def _make_divisible(v, divisor, min_value=None):
  6 |     """
  7 |     This function is taken from the original tf repo.
  8 |     It ensures that all layers have a channel number that is divisible by 8
  9 |     It can be seen here:
 10 |     https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
 11 |     :param v:
 12 |     :param divisor:
 13 |     :param min_value:
 14 |     :return:
 15 |     """
 16 |     if min_value is None:
 17 |         min_value = divisor
 18 |     new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
 19 |     # Make sure that round down does not go down by more than 10%.
 20 |     if new_v < 0.9 * v:
 21 |         new_v += divisor
 22 |     return new_v
 23 | 
 24 | 
 25 | class ConvBNReLU(nn.Sequential):
 26 |     def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1, norm_layer=nn.BatchNorm2d,
 27 |                  warehouse_name=None, warehouse_manager=None, enabled=True):
 28 |         padding = (kernel_size - 1) // 2
 29 |         super(ConvBNReLU, self).__init__(
 30 |             warehouse_manager.reserve(in_planes, out_planes, kernel_size, stride, padding=padding,
 31 |                                       groups=groups, bias=False, warehouse_name=warehouse_name, enabled=enabled),
 32 |             norm_layer(out_planes),
 33 |             nn.ReLU6(inplace=True)
 34 |         )
 35 | 
 36 | 
 37 | class InvertedResidual(nn.Module):
 38 |     def __init__(self, inp, oup, stride, expand_ratio, norm_layer=nn.BatchNorm2d, stage_idx=None, layer_idx=None,
 39 |                  warehouse_manager=None, pwlinear_enabled=True):
 40 |         super(InvertedResidual, self).__init__()
 41 |         self.stride = stride
 42 |         assert stride in [1, 2]
 43 |         hidden_dim = int(round(inp * expand_ratio))
 44 |         self.use_res_connect = self.stride == 1 and inp == oup
 45 | 
 46 |         layers = []
 47 |         if expand_ratio != 1:
 48 |             # pw
 49 |             layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1, norm_layer=norm_layer,
 50 |                                      warehouse_name='stage{}_layer{}_pwconv{}'.format(stage_idx, layer_idx, 0),
 51 |                                      warehouse_manager=warehouse_manager, enabled=pwlinear_enabled))
 52 |         layers.extend([
 53 |             # dw
 54 |             ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim, norm_layer=norm_layer,
 55 |                        warehouse_name='stage{}_layer{}_dwconv{}'.format(stage_idx, layer_idx, 0),
 56 |                        warehouse_manager=warehouse_manager),
 57 |             # pw-linear
 58 |             warehouse_manager.reserve(hidden_dim, oup, 1, 1, 0, bias=False,
 59 |                                       warehouse_name='stage{}_layer{}_pwconv{}'.format(stage_idx, layer_idx, 1),
 60 |                                       enabled=pwlinear_enabled),
 61 |             norm_layer(oup),
 62 |         ])
 63 |         self.conv = nn.Sequential(*layers)
 64 | 
 65 |     def forward(self, x):
 66 |         if self.use_res_connect:
 67 |             return x + self.conv(x)
 68 |         else:
 69 |             return self.conv(x)
 70 | 
 71 | 
 72 | class KW1d2x_MobileNetV2(nn.Module):
 73 |     def __init__(self,
 74 |                  num_classes=1000,
 75 |                  width_mult=1.0,
 76 |                  inverted_residual_setting=None,
 77 |                  round_nearest=8,
 78 |                  block=None,
 79 |                  norm_layer=None,
 80 |                  dropout=0.1,
 81 |                  **kwargs):
 82 |         """gr
 83 |         MobileNet V2 main class
 84 | 
 85 |         Args:
 86 |             num_classes (int): Number of classes
 87 |             width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount
 88 |             inverted_residual_setting: Network structure
 89 |             round_nearest (int): Round the number of channels in each layer to be a multiple of this number
 90 |             Set to 1 to turn off rounding
 91 |             block: Module specifying inverted residual building block for mobilenet
 92 |             norm_layer: Module specifying the normalization layer to use
 93 | 
 94 |         """
 95 | 
 96 |         reduction = 0.03125
 97 |         cell_num_ratio = (1, 1, 1, 1, 1, 1, 0.5, 1, 0.5, 0.5)
 98 |         cell_inplane_ratio = (1, 1, 1, 1, 1, 1, 0.5, 1, 0.5, 0.025)
 99 |         cell_outplane_ratio = (1, 1, 1, 1, 1, 1, 0.5, 1, 0.5, 1)
100 |         sharing_range = tuple("layer")
101 | 
102 |         super(KW1d2x_MobileNetV2, self).__init__()
103 |         if block is None:
104 |             block = InvertedResidual
105 | 
106 |         if norm_layer is None:
107 |             norm_layer = nn.BatchNorm2d
108 | 
109 |         input_channel = 32
110 |         last_channel = 1280
111 | 
112 |         if inverted_residual_setting is None:
113 |             inverted_residual_setting = [
114 |                 # t, c, n, s
115 |                 [1, 16, 1, 1],
116 |                 [6, 24, 2, 2],
117 |                 [6, 32, 3, 2],
118 |                 [6, 64, 4, 2],
119 |                 [6, 96, 3, 1],
120 |                 [6, 160, 3, 2],
121 |                 [6, 320, 1, 1],
122 |             ]
123 | 
124 |         kw_stage_setting = [1, 2, 3, 4, 5, 6, 6]
125 |         pwlinear_enabled = [False, False, False, False, False, True, True]
126 | 
127 |         self.warehouse_manager = Warehouse_Manager(reduction, cell_num_ratio, cell_inplane_ratio,
128 |                                                    cell_outplane_ratio, sharing_range)
129 | 
130 |         # only check the first element, assuming user knows t,c,n,s are required
131 |         if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4:
132 |             raise ValueError("inverted_residual_setting should be non-empty "
133 |                              "or a 4-element list, got {}".format(inverted_residual_setting))
134 | 
135 |         # building first layer
136 |         input_channel = _make_divisible(input_channel * width_mult, round_nearest)
137 |         self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest)
138 |         features = [ConvBNReLU(3, input_channel, stride=2, norm_layer=norm_layer,
139 |                                warehouse_manager=self.warehouse_manager, warehouse_name='stage0_conv0')]
140 | 
141 |         layer_idx = 0
142 |         # building inverted residual blocks
143 |         for idx, (t, c, n, s) in enumerate(inverted_residual_setting):
144 |             output_channel = _make_divisible(c * width_mult, round_nearest)
145 |             for i in range(n):
146 |                 stride = s if i == 0 else 1
147 | 
148 |                 if i == 0 and idx >0:
149 |                     handover = kw_stage_setting[idx] != kw_stage_setting[idx - 1]
150 |                 else:
151 |                     handover = False
152 | 
153 |                 stage_idx = (kw_stage_setting[idx] - 1) if handover else kw_stage_setting[idx]
154 |                 features.append(block(input_channel, output_channel, stride, expand_ratio=t, norm_layer=norm_layer,
155 |                                       warehouse_manager=self.warehouse_manager, stage_idx=stage_idx,
156 |                                       layer_idx=layer_idx, pwlinear_enabled=pwlinear_enabled[stage_idx-1]))
157 | 
158 |                 input_channel = output_channel
159 |                 layer_idx += 1
160 | 
161 |                 if handover:
162 |                     layer_idx = 0
163 | 
164 |         # building last several layers
165 |         features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1, norm_layer=norm_layer,
166 |                                    warehouse_manager=self.warehouse_manager,
167 |                                    warehouse_name='stage{}_layer{}_pwconv1'.format(kw_stage_setting[-1], layer_idx)))
168 |         # make it nn.Sequential
169 |         self.features = nn.Sequential(*features)
170 |         # building classifier
171 |         self.classifier = nn.Sequential(
172 |             nn.Dropout(dropout),
173 |             self.warehouse_manager.reserve(self.last_channel, num_classes, kernel_size=1,
174 |                                            warehouse_name='classifier', layer_type='linear'),
175 |         )
176 | 
177 |         # weight initialization
178 |         for m in self.modules():
179 |             if isinstance(m, nn.Conv2d):
180 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out')
181 |                 if m.bias is not None:
182 |                     nn.init.zeros_(m.bias)
183 |             elif isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d, nn.GroupNorm)):
184 |                 nn.init.ones_(m.weight)
185 |                 nn.init.zeros_(m.bias)
186 |             elif isinstance(m, nn.Linear):
187 |                 nn.init.normal_(m.weight, 0, 0.01)
188 |                 nn.init.zeros_(m.bias)
189 | 
190 |         self.warehouse_manager.store()
191 |         self.warehouse_manager.allocate(self)
192 | 
193 |     def net_update_temperature(self, temp):
194 |         for m in self.modules():
195 |             if hasattr(m, "update_temperature"):
196 |                 m.update_temperature(temp)
197 | 
198 |     def _forward_impl(self, x):
199 |         # This exists since TorchScript doesn't support inheritance, so the superclass method
200 |         # (this one) needs to have a name other than `forward` that can be accessed in a subclass
201 |         x = self.features(x)
202 |         # Cannot use "squeeze" as batch-size can be 1 => must use reshape with x.shape[0]
203 |         x = nn.functional.adaptive_avg_pool2d(x, 1).reshape(x.shape[0], -1)
204 |         x = self.classifier(x)
205 |         return x
206 | 
207 |     def forward(self, x):
208 |         return self._forward_impl(x)
209 | 
210 | 
211 | def kw1d2x_mobilenetv2(**kwargs):
212 |     model = KW1d2x_MobileNetV2(**kwargs)
213 |     return model
214 | 
215 | 
216 | @register_model
217 | def kw1d2x_mobilenetv2_050(**kwargs):
218 |     return kw1d2x_mobilenetv2(width_mult=0.5, **kwargs)
219 | 
220 | 
221 | @register_model
222 | def kw1d2x_mobilenetv2_100(**kwargs):
223 |     return kw1d2x_mobilenetv2(width_mult=1.0, **kwargs)
224 | 


--------------------------------------------------------------------------------
/models/kw_convnext.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from timm.models.layers import trunc_normal_, DropPath
  5 | from timm.models.registry import register_model
  6 | from modules.kernel_warehouse import Warehouse_Manager
  7 | 
  8 | 
  9 | class Block(nn.Module):
 10 |     r""" ConvNeXt Block. There are two equivalent implementations:
 11 |     (1) DwConv -> LayerNorm (channels_first) -> 1x1 Conv -> GELU -> 1x1 Conv; all in (N, C, H, W)
 12 |     (2) DwConv -> Permute to (N, H, W, C); LayerNorm (channels_last) -> Linear -> GELU -> Linear; Permute back
 13 |     We use (2) as we find it slightly faster in PyTorch
 14 | 
 15 |     Args:
 16 |         dim (int): Number of input channels.
 17 |         drop_path (float): Stochastic depth rate. Default: 0.0
 18 |         layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
 19 |     """
 20 | 
 21 |     def __init__(self, dim, drop_path=0., layer_scale_init_value=1e-6,
 22 |                  warehouse_manager=None, stage_idx=-1, layer_idx=-1):
 23 |         super().__init__()
 24 |         self.dwconv = warehouse_manager.reserve(dim, dim, kernel_size=7, padding=3, groups=dim,
 25 |                                                 warehouse_name='stage{}_layer{}_dwconv{}'.format(stage_idx, layer_idx, 0))  # depthwise conv
 26 |         self.norm = LayerNorm(dim, eps=1e-6)
 27 |         self.pwconv1 = warehouse_manager.reserve(dim, 4 * dim, kernel_size=1, padding=0,
 28 |                                                  warehouse_name='stage{}_layer{}_pwconv{}'.format(stage_idx, layer_idx, 1))  # pointwise/1x1 convs, implemented with linear layers
 29 |         self.act = nn.GELU()
 30 |         self.pwconv2 = warehouse_manager.reserve(4 * dim, dim, kernel_size=1, padding=0,
 31 |                                                  warehouse_name='stage{}_layer{}_pwconv{}'.format(stage_idx, layer_idx, 2))
 32 |         self.gamma = nn.Parameter(layer_scale_init_value * torch.ones([1, dim, 1, 1]),
 33 |                                   requires_grad=True) if layer_scale_init_value > 0 else None
 34 |         self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
 35 | 
 36 |     def forward(self, x):
 37 |         input = x
 38 |         x = self.dwconv(x)
 39 |         x = x.permute(0, 2, 3, 1)  # (N, C, H, W) -> (N, H, W, C)
 40 |         x = self.norm(x).permute(0, 3, 1, 2)
 41 |         x = self.pwconv1(x)
 42 |         x = self.act(x)
 43 |         x = self.pwconv2(x)
 44 |         if self.gamma is not None:
 45 |             x = self.gamma * x
 46 |         x = input + self.drop_path(x)
 47 |         return x
 48 | 
 49 | 
 50 | @register_model
 51 | class KW_ConvNeXt(nn.Module):
 52 |     r""" ConvNeXt
 53 |         A PyTorch impl of : `A ConvNet for the 2020s`  -
 54 |           https://arxiv.org/pdf/2201.03545.pdf
 55 | 
 56 |     Args:
 57 |         in_chans (int): Number of input image channels. Default: 3
 58 |         num_classes (int): Number of classes for classification head. Default: 1000
 59 |         depths (tuple(int)): Number of blocks at each stage. Default: [3, 3, 9, 3]
 60 |         dims (int): Feature dimension at each stage. Default: [96, 192, 384, 768]
 61 |         drop_path_rate (float): Stochastic depth rate. Default: 0.
 62 |         layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
 63 |         head_init_scale (float): Init scaling value for classifier weights and biases. Default: 1.
 64 |     """
 65 | 
 66 |     def __init__(self, in_chans=3, num_classes=1000,
 67 |                  depths=[3, 3, 9, 3], dims=[96, 192, 384, 768], drop_path_rate=0.,
 68 |                  layer_scale_init_value=1e-6, head_init_scale=1.,
 69 |                  reduction=0.0625, cell_num_ratio=1, cell_inplane_ratio=1, cell_outplane_ratio=1,
 70 |                  sharing_range=('layer', 'pwconv'), nonlocal_basis_ratio=1, **kwargs
 71 |                  ):
 72 |         super().__init__()
 73 | 
 74 |         self.warehouse_manager = Warehouse_Manager(reduction, cell_num_ratio, cell_inplane_ratio,
 75 |                                                    cell_outplane_ratio, sharing_range, nonlocal_basis_ratio,
 76 |                                                    norm_layer=nn.LayerNorm,
 77 |                                                    )
 78 |         self.downsample_layers = nn.ModuleList()  # stem and 3 intermediate downsampling conv layers
 79 |         stem = nn.Sequential(
 80 |             self.warehouse_manager.reserve(in_chans, dims[0], kernel_size=4, stride=4,
 81 |                                            warehouse_name='stage{}_conv0'.format('stem')),
 82 |             LayerNorm(dims[0], eps=1e-6, data_format="channels_first")
 83 |         )
 84 |         self.downsample_layers.append(stem)
 85 |         for i in range(3):
 86 |             downsample_layer = nn.Sequential(
 87 |                 LayerNorm(dims[i], eps=1e-6, data_format="channels_first"),
 88 |                 self.warehouse_manager.reserve(dims[i], dims[i + 1], kernel_size=2, stride=2,
 89 |                                                warehouse_name='stage{}_layer{}_conv0'.format(i, 'ds')),
 90 |             )
 91 |             self.downsample_layers.append(downsample_layer)
 92 | 
 93 |         self.stages = nn.ModuleList()  # 4 feature resolution stages, each consisting of multiple residual blocks
 94 |         dp_rates = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]
 95 |         cur = 0
 96 |         for i in range(4):
 97 |             stage = nn.Sequential(
 98 |                 *[Block(dim=dims[i], drop_path=dp_rates[cur + j],
 99 |                         layer_scale_init_value=layer_scale_init_value,
100 |                         warehouse_manager=self.warehouse_manager,
101 |                         stage_idx=i, layer_idx=j,
102 |                         ) for j in range(depths[i])]
103 |             )
104 |             self.stages.append(stage)
105 |             cur += depths[i]
106 | 
107 |         self.norm = nn.LayerNorm(dims[-1], eps=1e-6)  # final norm layer
108 |         self.head = nn.Linear(dims[-1], num_classes)
109 | 
110 |         self.apply(self._init_weights)
111 |         self.head.weight.data.mul_(head_init_scale)
112 |         self.head.bias.data.mul_(head_init_scale)
113 | 
114 |         self.warehouse_manager.store()
115 |         self.warehouse_manager.allocate(self)
116 | 
117 |     def _init_weights(self, m):
118 |         if isinstance(m, (nn.Conv2d, nn.Linear)):
119 |             trunc_normal_(m.weight, std=.02)
120 |             nn.init.constant_(m.bias, 0)
121 | 
122 |     def net_update_temperature(self, temp):
123 |         for m in self.modules():
124 |             if hasattr(m, "update_temperature"):
125 |                 m.update_temperature(temp)
126 | 
127 |     def forward_features(self, x):
128 |         for i in range(4):
129 |             x = self.downsample_layers[i](x)
130 |             x = self.stages[i](x)
131 |         return self.norm(x.mean([-2, -1]))  # global average pooling, (N, C, H, W) -> (N, C)
132 | 
133 |     def forward(self, x):
134 |         x = self.forward_features(x)
135 |         x = self.head(x)
136 |         return x
137 | 
138 | 
139 | class LayerNorm(nn.Module):
140 |     r""" LayerNorm that supports two data formats: channels_last (default) or channels_first.
141 |     The ordering of the dimensions in the inputs. channels_last corresponds to inputs with
142 |     shape (batch_size, height, width, channels) while channels_first corresponds to inputs
143 |     with shape (batch_size, channels, height, width).
144 |     """
145 | 
146 |     def __init__(self, normalized_shape, eps=1e-6, data_format="channels_last"):
147 |         super().__init__()
148 |         self.weight = nn.Parameter(torch.ones(normalized_shape))
149 |         self.bias = nn.Parameter(torch.zeros(normalized_shape))
150 |         self.eps = eps
151 |         self.data_format = data_format
152 |         if self.data_format not in ["channels_last", "channels_first"]:
153 |             raise NotImplementedError
154 |         self.normalized_shape = (normalized_shape,)
155 | 
156 |     def forward(self, x):
157 |         if self.data_format == "channels_last":
158 |             return F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps)
159 |         elif self.data_format == "channels_first":
160 |             u = x.mean(1, keepdim=True)
161 |             s = (x - u).pow(2).mean(1, keepdim=True)
162 |             x = (x - u) / torch.sqrt(s + self.eps)
163 |             x = self.weight[:, None, None] * x + self.bias[:, None, None]
164 |             return x
165 | 
166 | 
167 | @register_model
168 | def kw_convnext_tiny(pretrained=False, in_22k=False, **kwargs):
169 |     model = KW_ConvNeXt(depths=[3, 3, 9, 3], dims=[96, 192, 384, 768], **kwargs)
170 |     return model


--------------------------------------------------------------------------------
/models/kw_mobilenetv2.py:
--------------------------------------------------------------------------------
  1 | from torch import nn
  2 | from modules.kernel_warehouse import Warehouse_Manager
  3 | from timm.models.registry import register_model
  4 | 
  5 | 
  6 | def _make_divisible(v, divisor, min_value=None):
  7 |     """
  8 |     This function is taken from the original tf repo.
  9 |     It ensures that all layers have a channel number that is divisible by 8
 10 |     It can be seen here:
 11 |     https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
 12 |     :param v:
 13 |     :param divisor:
 14 |     :param min_value:
 15 |     :return:
 16 |     """
 17 |     if min_value is None:
 18 |         min_value = divisor
 19 |     new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
 20 |     # Make sure that round down does not go down by more than 10%.
 21 |     if new_v < 0.9 * v:
 22 |         new_v += divisor
 23 |     return new_v
 24 | 
 25 | 
 26 | class ConvBNReLU(nn.Sequential):
 27 |     def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1, norm_layer=nn.BatchNorm2d,
 28 |                  warehouse_name=None, warehouse_manager=None, enabled=True):
 29 |         padding = (kernel_size - 1) // 2
 30 |         super(ConvBNReLU, self).__init__(
 31 |             warehouse_manager.reserve(in_planes, out_planes, kernel_size, stride, padding=padding,
 32 |                                       groups=groups, bias=False, warehouse_name=warehouse_name, enabled=enabled),
 33 |             norm_layer(out_planes),
 34 |             nn.ReLU6(inplace=True)
 35 |         )
 36 | 
 37 | 
 38 | class InvertedResidual(nn.Module):
 39 |     def __init__(self, inp, oup, stride, expand_ratio, norm_layer=nn.BatchNorm2d, stage_idx=None, layer_idx=None,
 40 |                  warehouse_manager=None):
 41 |         super(InvertedResidual, self).__init__()
 42 |         self.stride = stride
 43 |         assert stride in [1, 2]
 44 |         hidden_dim = int(round(inp * expand_ratio))
 45 |         self.use_res_connect = self.stride == 1 and inp == oup
 46 | 
 47 |         layers = []
 48 |         if expand_ratio != 1:
 49 |             # pw
 50 |             layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1, norm_layer=norm_layer,
 51 |                                      warehouse_name='stage{}_layer{}_pwconv{}'.format(stage_idx, layer_idx, 0),
 52 |                                      warehouse_manager=warehouse_manager))
 53 | 
 54 |         layers.extend([
 55 |             # dw
 56 |             ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim, norm_layer=norm_layer,
 57 |                        warehouse_name='stage{}_layer{}_dwconv{}'.format(stage_idx, layer_idx, 0),
 58 |                        warehouse_manager=warehouse_manager),
 59 |             # pw-linear
 60 |             warehouse_manager.reserve(hidden_dim, oup, 1, 1, 0, bias=False,
 61 |                                       warehouse_name='stage{}_layer{}_pwconv{}'.format(stage_idx, layer_idx, 1)),
 62 |             norm_layer(oup),
 63 |         ])
 64 |         self.conv = nn.Sequential(*layers)
 65 | 
 66 |     def forward(self, x):
 67 |         if self.use_res_connect:
 68 |             return x + self.conv(x)
 69 |         else:
 70 |             return self.conv(x)
 71 | 
 72 | 
 73 | class KW_MobileNetV2(nn.Module):
 74 |     def __init__(self,
 75 |                  num_classes=1000,
 76 |                  width_mult=1.0,
 77 |                  inverted_residual_setting=None,
 78 |                  round_nearest=8,
 79 |                  block=None,
 80 |                  norm_layer=None,
 81 |                  dropout=0.0,
 82 |                  reduction=0.0625,
 83 |                  cell_num_ratio=1,
 84 |                  cell_inplane_ratio=1,
 85 |                  cell_outplane_ratio=1,
 86 |                  sharing_range=None,
 87 |                  nonlocal_basis_ratio=1,
 88 |                  **kwargs):
 89 |         """gr
 90 |         MobileNet V2 main class
 91 | 
 92 |         Args:
 93 |             num_classes (int): Number of classes
 94 |             width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount
 95 |             inverted_residual_setting: Network structure
 96 |             round_nearest (int): Round the number of channels in each layer to be a multiple of this number
 97 |             Set to 1 to turn off rounding
 98 |             block: Module specifying inverted residual building block for mobilenet
 99 |             norm_layer: Module specifying the normalization layer to use
100 | 
101 |         """
102 |         super(KW_MobileNetV2, self).__init__()
103 |         if block is None:
104 |             block = InvertedResidual
105 | 
106 |         if norm_layer is None:
107 |             norm_layer = nn.BatchNorm2d
108 | 
109 |         input_channel = 32
110 |         last_channel = 1280
111 | 
112 |         if inverted_residual_setting is None:
113 |             inverted_residual_setting = [
114 |                 # t, c, n, s
115 |                 [1, 16, 1, 1],
116 |                 [6, 24, 2, 2],
117 |                 [6, 32, 3, 2],
118 |                 [6, 64, 4, 2],
119 |                 [6, 96, 3, 1],
120 |                 [6, 160, 3, 2],
121 |                 [6, 320, 1, 1],
122 |             ]
123 | 
124 |         kw_stage_setting = [1, 2, 3, 4, 5, 6, 6]
125 | 
126 |         self.warehouse_manager = Warehouse_Manager(reduction, cell_num_ratio, cell_inplane_ratio,
127 |                                                    cell_outplane_ratio, sharing_range, nonlocal_basis_ratio)
128 | 
129 |         # only check the first element, assuming user knows t,c,n,s are required
130 |         if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4:
131 |             raise ValueError("inverted_residual_setting should be non-empty "
132 |                              "or a 4-element list, got {}".format(inverted_residual_setting))
133 | 
134 |         # building first layer
135 |         input_channel = _make_divisible(input_channel * width_mult, round_nearest)
136 |         self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest)
137 |         features = [ConvBNReLU(3, input_channel, stride=2, norm_layer=norm_layer,
138 |                                warehouse_manager=self.warehouse_manager, warehouse_name='stage0_conv0')]
139 | 
140 |         layer_idx = 0
141 |         # building inverted residual blocks
142 |         for idx, (t, c, n, s) in enumerate(inverted_residual_setting):
143 |             output_channel = _make_divisible(c * width_mult, round_nearest)
144 |             for i in range(n):
145 |                 stride = s if i == 0 else 1
146 | 
147 |                 if i == 0 and idx > 0:
148 |                     handover = kw_stage_setting[idx] != kw_stage_setting[idx - 1]
149 |                 else:
150 |                     handover = False
151 | 
152 |                 stage_idx = (kw_stage_setting[idx] - 1) if handover else kw_stage_setting[idx]
153 | 
154 |                 features.append(block(input_channel, output_channel, stride, expand_ratio=t, norm_layer=norm_layer,
155 |                                       warehouse_manager=self.warehouse_manager, stage_idx=stage_idx,
156 |                                       layer_idx=layer_idx))
157 | 
158 |                 input_channel = output_channel
159 |                 layer_idx += 1
160 | 
161 |                 if handover:
162 |                     layer_idx = 0
163 | 
164 |         # building last several layers
165 |         features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1, norm_layer=norm_layer,
166 |                                    warehouse_manager=self.warehouse_manager,
167 |                                    warehouse_name='stage{}_layer{}_pwconv1'.format(kw_stage_setting[-1], layer_idx)))
168 |         # make it nn.Sequential
169 |         self.features = nn.Sequential(*features)
170 |         # building classifier
171 |         self.dropout = nn.Dropout(dropout) if dropout > 0 else nn.Identity()
172 |         self.classifier = nn.Linear(self.last_channel, num_classes, bias=True)
173 | 
174 |         # weight initialization
175 |         for m in self.modules():
176 |             if isinstance(m, nn.Conv2d):
177 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out')
178 |                 if m.bias is not None:
179 |                     nn.init.zeros_(m.bias)
180 |             elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
181 |                 nn.init.ones_(m.weight)
182 |                 nn.init.zeros_(m.bias)
183 |             elif isinstance(m, nn.Linear):
184 |                 nn.init.normal_(m.weight, 0, 0.01)
185 |                 nn.init.zeros_(m.bias)
186 | 
187 |         self.warehouse_manager.store()
188 |         self.warehouse_manager.allocate(self)
189 | 
190 |     def net_update_temperature(self, temp):
191 |         for m in self.modules():
192 |             if hasattr(m, "update_temperature"):
193 |                 m.update_temperature(temp)
194 | 
195 |     def _forward_impl(self, x):
196 |         # This exists since TorchScript doesn't support inheritance, so the superclass method
197 |         # (this one) needs to have a name other than `forward` that can be accessed in a subclass
198 |         x = self.features(x)
199 |         # Cannot use "squeeze" as batch-size can be 1 => must use reshape with x.shape[0]
200 |         x = nn.functional.adaptive_avg_pool2d(x, 1).reshape(x.shape[0], -1)
201 |         x = self.dropout(x)
202 |         x = self.classifier(x)
203 |         return x
204 | 
205 |     def forward(self, x):
206 |         return self._forward_impl(x)
207 | 
208 | 
209 | def kw_mobilenetv2(**kwargs):
210 |     model = KW_MobileNetV2(**kwargs)
211 |     return model
212 | 
213 | 
214 | @register_model
215 | def kw_mobilenetv2_050(**kwargs):
216 |     return kw_mobilenetv2(width_mult=0.5, **kwargs)
217 | 
218 | 
219 | @register_model
220 | def kw_mobilenetv2_100(**kwargs):
221 |     return kw_mobilenetv2(width_mult=1.0, **kwargs)
222 | 


--------------------------------------------------------------------------------
/models/kw_resnet.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from modules.kernel_warehouse import Warehouse_Manager
  4 | from timm.models.layers import DropPath
  5 | from timm.models.registry import register_model
  6 | 
  7 | 
  8 | def kwconv3x3(in_planes, out_planes, stride=1, warehouse_name=None, warehouse_manager=None, enabled=True):
  9 |     return warehouse_manager.reserve(in_planes, out_planes, kernel_size=3, stride=stride, padding=1,
 10 |                                      warehouse_name=warehouse_name, enabled=enabled, bias=False)
 11 | 
 12 | 
 13 | def kwconv1x1(in_planes, out_planes, stride=1, warehouse_name=None, warehouse_manager=None, enabled=True):
 14 |     return warehouse_manager.reserve(in_planes, out_planes, kernel_size=1, stride=stride, padding=0,
 15 |                                      warehouse_name=warehouse_name, enabled=enabled, bias=False)
 16 | 
 17 | 
 18 | class BasicBlock(nn.Module):
 19 |     expansion = 1
 20 | 
 21 |     def __init__(self, inplanes, planes, stride=1, downsample=None,
 22 |                  stage_idx=None, layer_idx=None, warehouse_manager=None, warehouse_handover=False, drop_path=0.):
 23 |         super(BasicBlock, self).__init__()
 24 |         conv1_stage_idx = max(stage_idx - 1 if warehouse_handover else stage_idx, 0)
 25 |         self.conv1 = kwconv3x3(inplanes, planes, stride,
 26 |                                warehouse_name='stage{}_layer{}_conv{}'.format(conv1_stage_idx, layer_idx, 0),
 27 |                                warehouse_manager=warehouse_manager)
 28 |         self.bn1 = nn.BatchNorm2d(planes)
 29 |         self.relu = nn.ReLU(inplace=True)
 30 |         layer_idx = 0 if warehouse_handover else layer_idx
 31 |         self.conv2 = kwconv3x3(planes, planes,
 32 |                                warehouse_name='stage{}_layer{}_conv{}'.format(stage_idx, layer_idx, 1),
 33 |                                warehouse_manager=warehouse_manager)
 34 |         self.bn2 = nn.BatchNorm2d(planes)
 35 |         self.downsample = downsample
 36 |         self.stride = stride
 37 |         self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
 38 | 
 39 |     def forward(self, x):
 40 |         identity = x
 41 | 
 42 |         out = self.conv1(x)
 43 |         out = self.bn1(out)
 44 |         out = self.relu(out)
 45 | 
 46 |         out = self.conv2(out)
 47 |         out = self.bn2(out)
 48 | 
 49 |         if self.downsample is not None:
 50 |             identity = self.downsample(x)
 51 | 
 52 |         out = identity + self.drop_path(out)
 53 |         out = self.relu(out)
 54 |         return out
 55 | 
 56 | 
 57 | class Bottleneck(nn.Module):
 58 |     expansion = 4
 59 | 
 60 |     def __init__(self, inplanes, planes, stride=1, downsample=None,
 61 |                  stage_idx=None, layer_idx=None, warehouse_manager=None, warehouse_handover=False, drop_path=0.):
 62 |         super(Bottleneck, self).__init__()
 63 |         conv1_stage_idx = stage_idx - 1 if warehouse_handover else stage_idx
 64 |         self.conv1 = kwconv1x1(inplanes, planes,
 65 |                                warehouse_name='stage{}_layer{}_conv{}'.format(conv1_stage_idx, layer_idx, 0),
 66 |                                warehouse_manager=warehouse_manager, enabled=(conv1_stage_idx >= 0))
 67 |         self.bn1 = nn.BatchNorm2d(planes)
 68 |         layer_idx = 0 if warehouse_handover else layer_idx
 69 |         self.conv2 = kwconv3x3(planes, planes, stride,
 70 |                                warehouse_name='stage{}_layer{}_conv{}'.format(stage_idx, layer_idx, 1),
 71 |                                warehouse_manager=warehouse_manager)
 72 |         self.bn2 = nn.BatchNorm2d(planes)
 73 |         self.conv3 = kwconv1x1(planes, planes * self.expansion,
 74 |                                warehouse_name='stage{}_layer{}_conv{}'.format(stage_idx, layer_idx, 2),
 75 |                                warehouse_manager=warehouse_manager)
 76 |         self.bn3 = nn.BatchNorm2d(planes * self.expansion)
 77 |         self.relu = nn.ReLU(inplace=True)
 78 |         self.downsample = downsample
 79 |         self.stride = stride
 80 |         self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
 81 | 
 82 |     def forward(self, x):
 83 |         identity = x
 84 | 
 85 |         out = self.conv1(x)
 86 |         out = self.bn1(out)
 87 |         out = self.relu(out)
 88 | 
 89 |         out = self.conv2(out)
 90 |         out = self.bn2(out)
 91 |         out = self.relu(out)
 92 | 
 93 |         out = self.conv3(out)
 94 |         out = self.bn3(out)
 95 | 
 96 |         if self.downsample is not None:
 97 |             identity = self.downsample(x)
 98 | 
 99 |         out = identity + self.drop_path(out)
100 |         out = self.relu(out)
101 |         return out
102 | 
103 | @register_model
104 | class KW_ResNet(nn.Module):
105 |     def __init__(self, block, layers, num_classes=1000, dropout=0.1, reduction=0.0625,
106 |                  cell_num_ratio=1, cell_inplane_ratio=1, cell_outplane_ratio=1,
107 |                  sharing_range=('layer', 'conv'), nonlocal_basis_ratio=1, drop_path_rate=0., **kwargs):
108 |         super(KW_ResNet, self).__init__()
109 |         self.warehouse_manager = Warehouse_Manager(reduction, cell_num_ratio, cell_inplane_ratio, cell_outplane_ratio,
110 |                                                    sharing_range, nonlocal_basis_ratio)
111 |         self.inplanes = 64
112 |         self.layer_idx = 0
113 |         self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False)
114 |         self.bn1 = nn.BatchNorm2d(self.inplanes)
115 |         self.relu = nn.ReLU(inplace=True)
116 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
117 |         self.layer1 = self._make_layer(block, 64, layers[0],
118 |                                        stage_idx=0, warehouse_manager=self.warehouse_manager, drop_path=drop_path_rate)
119 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
120 |                                        stage_idx=1, warehouse_manager=self.warehouse_manager, drop_path=drop_path_rate)
121 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
122 |                                        stage_idx=2, warehouse_manager=self.warehouse_manager, drop_path=drop_path_rate)
123 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
124 |                                        stage_idx=3, warehouse_manager=self.warehouse_manager, drop_path=drop_path_rate)
125 |         self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
126 |         self.dropout = nn.Dropout(p=dropout)
127 |         self.fc = nn.Linear(512 * block.expansion, num_classes)
128 | 
129 |         for m in self.modules():
130 |             if isinstance(m, nn.Conv2d):
131 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
132 |             elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
133 |                 nn.init.constant_(m.weight, 1)
134 |                 nn.init.constant_(m.bias, 0)
135 | 
136 |         self.warehouse_manager.store()
137 |         self.warehouse_manager.allocate(self)
138 | 
139 |     def _make_layer(self, block, planes, blocks, stride=1, stage_idx=-1, warehouse_manager=None, drop_path=0.):
140 |         downsample = None
141 |         if stride != 1 or self.inplanes != planes * block.expansion:
142 |             downsample = nn.Sequential(
143 |                 warehouse_manager.reserve(
144 |                     self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, padding=0,
145 |                     warehouse_name='stage{}_layer{}_conv{}'.format(stage_idx - 1, self.layer_idx + 1, 0),
146 |                     enabled=(stride != 1), bias=False),
147 |                 nn.BatchNorm2d(planes * block.expansion),
148 |             )
149 | 
150 |         layers = []
151 |         layers.append(block(self.inplanes, planes, stride, downsample, stage_idx=stage_idx, layer_idx=self.layer_idx,
152 |                             warehouse_manager=warehouse_manager, warehouse_handover=True, drop_path=drop_path))
153 |         self.layer_idx = 1
154 |         self.inplanes = planes * block.expansion
155 |         for idx in range(1, blocks):
156 |             layers.append(block(self.inplanes, planes, stage_idx=stage_idx, layer_idx=self.layer_idx,
157 |                                 warehouse_manager=warehouse_manager, drop_path=drop_path))
158 |             self.layer_idx += 1
159 |         return nn.Sequential(*layers)
160 | 
161 |     def net_update_temperature(self, temp):
162 |         for m in self.modules():
163 |             if hasattr(m, "update_temperature"):
164 |                 m.update_temperature(temp)
165 | 
166 |     def _forward_impl(self, x):
167 |         x = self.conv1(x)
168 |         x = self.bn1(x)
169 |         x = self.relu(x)
170 |         x = self.maxpool(x)
171 | 
172 |         x = self.layer1(x)
173 |         x = self.layer2(x)
174 |         x = self.layer3(x)
175 |         x = self.layer4(x)
176 | 
177 |         x = self.avgpool(x)
178 |         x = torch.flatten(x, 1)
179 |         x = self.dropout(x)
180 |         x = self.fc(x)
181 |         return x
182 | 
183 |     def forward(self, x):
184 |         return self._forward_impl(x)
185 | 
186 | 
187 | @register_model
188 | def kw_resnet18(**kwargs):
189 |     model = KW_ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
190 |     return model
191 | 
192 | @register_model
193 | def kw_resnet50(**kwargs):
194 |     model = KW_ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
195 |     return model
196 | 
197 | 


--------------------------------------------------------------------------------
/models/mobilenetv2.py:
--------------------------------------------------------------------------------
  1 | from torch import nn
  2 | from timm.models.registry import register_model
  3 | 
  4 | 
  5 | def _make_divisible(v, divisor, min_value=None):
  6 |     """
  7 |     This function is taken from the original tf repo.
  8 |     It ensures that all layers have a channel number that is divisible by 8
  9 |     It can be seen here:
 10 |     https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
 11 |     :param v:
 12 |     :param divisor:
 13 |     :param min_value:
 14 |     :return:
 15 |     """
 16 |     if min_value is None:
 17 |         min_value = divisor
 18 |     new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
 19 |     # Make sure that round down does not go down by more than 10%.
 20 |     if new_v < 0.9 * v:
 21 |         new_v += divisor
 22 |     return new_v
 23 | 
 24 | 
 25 | class ConvBNReLU(nn.Sequential):
 26 |     def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1, norm_layer=nn.BatchNorm2d):
 27 |         padding = (kernel_size - 1) // 2
 28 |         super(ConvBNReLU, self).__init__(
 29 |             nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False),
 30 |             norm_layer(out_planes),
 31 |             nn.ReLU6(inplace=True)
 32 |         )
 33 | 
 34 | 
 35 | class InvertedResidual(nn.Module):
 36 |     def __init__(self, inp, oup, stride, expand_ratio, norm_layer=nn.BatchNorm2d):
 37 |         super(InvertedResidual, self).__init__()
 38 |         self.stride = stride
 39 |         hidden_dim = int(round(inp * expand_ratio))
 40 |         self.use_res_connect = self.stride == 1 and inp == oup
 41 | 
 42 |         layers = []
 43 |         if expand_ratio != 1:
 44 |             # pw
 45 |             layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1, norm_layer=norm_layer))
 46 |         layers.extend([
 47 |             # dw
 48 |             ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim, norm_layer=norm_layer),
 49 |             # pw-linear
 50 |             nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
 51 |             norm_layer(oup),
 52 |         ])
 53 |         self.conv = nn.Sequential(*layers)
 54 | 
 55 |     def forward(self, x):
 56 |         if self.use_res_connect:
 57 |             return x + self.conv(x)
 58 |         else:
 59 |             return self.conv(x)
 60 | 
 61 | 
 62 | class MobileNetV2(nn.Module):
 63 |     def __init__(self,
 64 |                  num_classes=1000,
 65 |                  width_mult=1.0,
 66 |                  inverted_residual_setting=None,
 67 |                  round_nearest=8,
 68 |                  block=InvertedResidual,
 69 |                  norm_layer=nn.BatchNorm2d,
 70 |                  dropout=0.0,
 71 |                  **kwargs):
 72 |         """gr
 73 |         MobileNet V2 main class
 74 | 
 75 |         Args:
 76 |             num_classes (int): Number of classes
 77 |             width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount
 78 |             inverted_residual_setting: Network structure
 79 |             round_nearest (int): Round the number of channels in each layer to be a multiple of this number
 80 |             Set to 1 to turn off rounding
 81 |             block: Module specifying inverted residual building block for mobilenet
 82 |             norm_layer: Module specifying the normalization layer to use
 83 | 
 84 |         """
 85 |         super(MobileNetV2, self).__init__()
 86 | 
 87 |         input_channel = 32
 88 |         last_channel = 1280
 89 | 
 90 |         if inverted_residual_setting is None:
 91 |             inverted_residual_setting = [
 92 |                 # t, c, n, s
 93 |                 [1, 16, 1, 1],
 94 |                 [6, 24, 2, 2],
 95 |                 [6, 32, 3, 2],
 96 |                 [6, 64, 4, 2],
 97 |                 [6, 96, 3, 1],
 98 |                 [6, 160, 3, 2],
 99 |                 [6, 320, 1, 1],
100 |             ]
101 | 
102 |         # only check the first element, assuming user knows t,c,n,s are required
103 |         if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4:
104 |             raise ValueError("inverted_residual_setting should be non-empty "
105 |                              "or a 4-element list, got {}".format(inverted_residual_setting))
106 | 
107 |         # building first layer
108 |         input_channel = _make_divisible(input_channel * width_mult, round_nearest)
109 |         self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest)
110 |         features = [ConvBNReLU(3, input_channel, stride=2, norm_layer=norm_layer)]
111 |         # building inverted residual blocks
112 |         for t, c, n, s in inverted_residual_setting:
113 |             output_channel = _make_divisible(c * width_mult, round_nearest)
114 |             for i in range(n):
115 |                 stride = s if i == 0 else 1
116 |                 features.append(block(input_channel, output_channel, stride, expand_ratio=t, norm_layer=norm_layer))
117 |                 input_channel = output_channel
118 |         # building last several layers
119 |         features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1, norm_layer=norm_layer))
120 |         # make it nn.Sequential
121 |         self.features = nn.Sequential(*features)
122 |         # building classifier
123 |         self.classifier = nn.Sequential(
124 |             nn.Dropout(dropout),
125 |             nn.Linear(self.last_channel, num_classes),
126 |         )
127 | 
128 |         # weight initialization
129 |         for m in self.modules():
130 |             if isinstance(m, nn.Conv2d):
131 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out')
132 |                 if m.bias is not None:
133 |                     nn.init.zeros_(m.bias)
134 |             elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
135 |                 nn.init.ones_(m.weight)
136 |                 nn.init.zeros_(m.bias)
137 |             elif isinstance(m, nn.Linear):
138 |                 nn.init.normal_(m.weight, 0, 0.01)
139 |                 nn.init.zeros_(m.bias)
140 | 
141 |     def _forward_impl(self, x):
142 |         # This exists since TorchScript doesn't support inheritance, so the superclass method
143 |         # (this one) needs to have a name other than `forward` that can be accessed in a subclass
144 |         x = self.features(x)
145 |         # Cannot use "squeeze" as batch-size can be 1 => must use reshape with x.shape[0]
146 |         x = nn.functional.adaptive_avg_pool2d(x, 1).reshape(x.shape[0], -1)
147 |         x = self.classifier(x)
148 |         return x
149 | 
150 |     def forward(self, x):
151 |         return self._forward_impl(x)
152 | 
153 | 
154 | def mobilenet_v2(**kwargs):
155 |     model = MobileNetV2(**kwargs)
156 |     return model
157 | 
158 | 
159 | @register_model
160 | def mobilenetv2_050(**kwargs):
161 |     return mobilenet_v2(width_mult=0.5, **kwargs)
162 | 
163 | 
164 | @register_model
165 | def mobilenetv2_100(**kwargs):
166 |     return mobilenet_v2(width_mult=1.0, **kwargs)
167 | 


--------------------------------------------------------------------------------
/models/resnet.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from timm.models.layers import DropPath
  4 | from timm.models.registry import register_model
  5 | 
  6 | 
  7 | def conv3x3(in_planes, out_planes, stride=1):
  8 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
  9 |                      padding=1, bias=False)
 10 | 
 11 | 
 12 | def conv1x1(in_planes, out_planes, stride=1):
 13 |     return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
 14 | 
 15 | 
 16 | class BasicBlock(nn.Module):
 17 |     expansion = 1
 18 | 
 19 |     def __init__(self, inplanes, planes, stride=1, downsample=None, drop_path=0.):
 20 |         super(BasicBlock, self).__init__()
 21 |         self.conv1 = conv3x3(inplanes, planes, stride)
 22 |         self.bn1 = nn.BatchNorm2d(planes)
 23 |         self.relu = nn.ReLU(inplace=True)
 24 |         self.conv2 = conv3x3(planes, planes)
 25 |         self.bn2 = nn.BatchNorm2d(planes)
 26 |         self.downsample = downsample
 27 |         self.stride = stride
 28 |         self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
 29 | 
 30 |     def forward(self, x):
 31 |         identity = x
 32 | 
 33 |         out = self.conv1(x)
 34 |         out = self.bn1(out)
 35 |         out = self.relu(out)
 36 | 
 37 |         out = self.conv2(out)
 38 |         out = self.bn2(out)
 39 | 
 40 |         if self.downsample is not None:
 41 |             identity = self.downsample(x)
 42 | 
 43 |         out = identity + self.drop_path(out)
 44 |         out = self.relu(out)
 45 |         return out
 46 | 
 47 | 
 48 | class Bottleneck(nn.Module):
 49 |     expansion = 4
 50 | 
 51 |     def __init__(self, inplanes, planes, stride=1, downsample=None, drop_path=0.):
 52 |         super(Bottleneck, self).__init__()
 53 |         self.conv1 = conv1x1(inplanes, planes)
 54 |         self.bn1 = nn.BatchNorm2d(planes)
 55 |         self.conv2 = conv3x3(planes, planes, stride)
 56 |         self.bn2 = nn.BatchNorm2d(planes)
 57 |         self.conv3 = conv1x1(planes, planes * self.expansion)
 58 |         self.bn3 = nn.BatchNorm2d(planes * self.expansion)
 59 |         self.relu = nn.ReLU(inplace=True)
 60 |         self.downsample = downsample
 61 |         self.stride = stride
 62 |         self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
 63 | 
 64 |     def forward(self, x):
 65 |         identity = x
 66 | 
 67 |         out = self.conv1(x)
 68 |         out = self.bn1(out)
 69 |         out = self.relu(out)
 70 | 
 71 |         out = self.conv2(out)
 72 |         out = self.bn2(out)
 73 |         out = self.relu(out)
 74 | 
 75 |         out = self.conv3(out)
 76 |         out = self.bn3(out)
 77 | 
 78 |         if self.downsample is not None:
 79 |             identity = self.downsample(x)
 80 | 
 81 |         out = identity + self.drop_path(out)
 82 |         out = self.relu(out)
 83 |         return out
 84 | 
 85 | 
 86 | class ResNet(nn.Module):
 87 |     def __init__(self, block, layers, num_classes=1000, drop_path_rate=0., **kwargs):
 88 |         super(ResNet, self).__init__()
 89 |         self.inplanes = 64
 90 |         self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
 91 |                                bias=False)
 92 |         self.bn1 = nn.BatchNorm2d(self.inplanes)
 93 |         self.relu = nn.ReLU(inplace=True)
 94 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
 95 |         self.layer1 = self._make_layer(block, 64, layers[0], drop_path=drop_path_rate)
 96 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2, drop_path=drop_path_rate)
 97 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2, drop_path=drop_path_rate)
 98 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=2, drop_path=drop_path_rate)
 99 |         self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
100 |         self.fc = nn.Linear(512 * block.expansion, num_classes)
101 | 
102 |         for m in self.modules():
103 |             if isinstance(m, nn.Conv2d):
104 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
105 |                 if m.bias is not None:
106 |                     nn.init.zeros_(m.bias)
107 |             elif isinstance(m, nn.BatchNorm2d):
108 |                 nn.init.constant_(m.weight, 1)
109 |                 nn.init.constant_(m.bias, 0)
110 |             elif isinstance(m, nn.Linear):
111 |                 nn.init.normal_(m.weight, 0, 0.01)
112 |                 nn.init.zeros_(m.bias)
113 | 
114 |     def _make_layer(self, block, planes, blocks, stride=1, drop_path=None):
115 |         downsample = None
116 |         if stride != 1 or self.inplanes != planes * block.expansion:
117 |             downsample = nn.Sequential(
118 |                 conv1x1(self.inplanes, planes * block.expansion, stride),
119 |                 nn.BatchNorm2d(planes * block.expansion),
120 |             )
121 | 
122 |         layers = []
123 |         layers.append(block(self.inplanes, planes, stride, downsample, drop_path=drop_path))
124 |         self.inplanes = planes * block.expansion
125 |         for _ in range(1, blocks):
126 |             layers.append(block(self.inplanes, planes, drop_path=drop_path))
127 | 
128 |         return nn.Sequential(*layers)
129 | 
130 |     def _forward_impl(self, x):
131 |         x = self.conv1(x)
132 |         x = self.bn1(x)
133 |         x = self.relu(x)
134 |         x = self.maxpool(x)
135 | 
136 |         x = self.layer1(x)
137 |         x = self.layer2(x)
138 |         x = self.layer3(x)
139 |         x = self.layer4(x)
140 | 
141 |         x = self.avgpool(x)
142 |         x = torch.flatten(x, 1)
143 |         x = self.fc(x)
144 |         return x
145 | 
146 |     def forward(self, x):
147 |         return self._forward_impl(x)
148 | 
149 | 
150 | @register_model
151 | def resnet18(**kwargs):
152 |     model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
153 |     return model
154 | 
155 | @register_model
156 | def resnet50(**kwargs):
157 |     model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
158 |     return model
159 | 


--------------------------------------------------------------------------------
/optim_factory.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import optim as optim
  3 | 
  4 | from timm.optim.adafactor import Adafactor
  5 | from timm.optim.adahessian import Adahessian
  6 | from timm.optim.adamp import AdamP
  7 | from timm.optim.lookahead import Lookahead
  8 | from timm.optim.nadam import Nadam
  9 | from timm.optim.radam import RAdam
 10 | from timm.optim.rmsprop_tf import RMSpropTF
 11 | from timm.optim.sgdp import SGDP
 12 | 
 13 | import json
 14 | 
 15 | try:
 16 |     from apex.optimizers import FusedNovoGrad, FusedAdam, FusedLAMB, FusedSGD
 17 |     has_apex = True
 18 | except ImportError:
 19 |     has_apex = False
 20 | 
 21 | 
 22 | def get_num_layer_for_convnext(var_name):
 23 |     """
 24 |     Divide [3, 3, 27, 3] layers into 12 groups; each group is three 
 25 |     consecutive blocks, including possible neighboring downsample layers;
 26 |     """
 27 |     num_max_layer = 12
 28 |     if var_name.startswith("downsample_layers"):
 29 |         stage_id = int(var_name.split('.')[1])
 30 |         if stage_id == 0:
 31 |             layer_id = 0
 32 |         elif stage_id == 1 or stage_id == 2:
 33 |             layer_id = stage_id + 1
 34 |         elif stage_id == 3:
 35 |             layer_id = 12
 36 |         return layer_id
 37 | 
 38 |     elif var_name.startswith("stages"):
 39 |         stage_id = int(var_name.split('.')[1])
 40 |         block_id = int(var_name.split('.')[2])
 41 |         if stage_id == 0 or stage_id == 1:
 42 |             layer_id = stage_id + 1
 43 |         elif stage_id == 2:
 44 |             layer_id = 3 + block_id // 3 
 45 |         elif stage_id == 3:
 46 |             layer_id = 12
 47 |         return layer_id
 48 |     else:
 49 |         return num_max_layer + 1
 50 | 
 51 | class LayerDecayValueAssigner(object):
 52 |     def __init__(self, values):
 53 |         self.values = values
 54 | 
 55 |     def get_scale(self, layer_id):
 56 |         return self.values[layer_id]
 57 | 
 58 |     def get_layer_id(self, var_name):
 59 |         return get_num_layer_for_convnext(var_name)
 60 | 
 61 | 
 62 | def get_parameter_groups(model, weight_decay=1e-5, skip_list=(), get_num_layer=None, get_layer_scale=None):
 63 |     parameter_group_names = {}
 64 |     parameter_group_vars = {}
 65 | 
 66 |     for name, param in model.named_parameters():
 67 |         if not param.requires_grad:
 68 |             continue  # frozen weights
 69 |         if len(param.shape) == 1 or name.endswith(".bias") or name in skip_list:
 70 |             group_name = "no_decay"
 71 |             this_weight_decay = 0.
 72 |         else:
 73 |             group_name = "decay"
 74 |             this_weight_decay = weight_decay
 75 |         if get_num_layer is not None:
 76 |             layer_id = get_num_layer(name)
 77 |             group_name = "layer_%d_%s" % (layer_id, group_name)
 78 |         else:
 79 |             layer_id = None
 80 | 
 81 |         if group_name not in parameter_group_names:
 82 |             if get_layer_scale is not None:
 83 |                 scale = get_layer_scale(layer_id)
 84 |             else:
 85 |                 scale = 1.
 86 | 
 87 |             parameter_group_names[group_name] = {
 88 |                 "weight_decay": this_weight_decay,
 89 |                 "params": [],
 90 |                 "lr_scale": scale
 91 |             }
 92 |             parameter_group_vars[group_name] = {
 93 |                 "weight_decay": this_weight_decay,
 94 |                 "params": [],
 95 |                 "lr_scale": scale
 96 |             }
 97 | 
 98 |         parameter_group_vars[group_name]["params"].append(param)
 99 |         parameter_group_names[group_name]["params"].append(name)
100 |     print("Param groups = %s" % json.dumps(parameter_group_names, indent=2))
101 |     return list(parameter_group_vars.values())
102 | 
103 | 
104 | def create_optimizer(args, model, get_num_layer=None, get_layer_scale=None, filter_bias_and_bn=True, skip_list=None):
105 |     opt_lower = args.opt.lower()
106 |     weight_decay = args.weight_decay
107 |     # if weight_decay and filter_bias_and_bn:
108 |     if filter_bias_and_bn:
109 |         skip = {}
110 |         if skip_list is not None:
111 |             skip = skip_list
112 |         elif hasattr(model, 'no_weight_decay'):
113 |             skip = model.no_weight_decay()
114 |         parameters = get_parameter_groups(model, weight_decay, skip, get_num_layer, get_layer_scale)
115 |         weight_decay = 0.
116 |     else:
117 |         parameters = model.parameters()
118 | 
119 |     if 'fused' in opt_lower:
120 |         assert has_apex and torch.cuda.is_available(), 'APEX and CUDA required for fused optimizers'
121 | 
122 |     opt_args = dict(lr=args.lr, weight_decay=weight_decay)
123 |     if hasattr(args, 'opt_eps') and args.opt_eps is not None:
124 |         opt_args['eps'] = args.opt_eps
125 |     if hasattr(args, 'opt_betas') and args.opt_betas is not None:
126 |         opt_args['betas'] = args.opt_betas
127 | 
128 |     opt_split = opt_lower.split('_')
129 |     opt_lower = opt_split[-1]
130 |     if opt_lower == 'sgd' or opt_lower == 'nesterov':
131 |         opt_args.pop('eps', None)
132 |         optimizer = optim.SGD(parameters, momentum=args.momentum, nesterov=True, **opt_args)
133 |     elif opt_lower == 'momentum':
134 |         opt_args.pop('eps', None)
135 |         optimizer = optim.SGD(parameters, momentum=args.momentum, nesterov=False, **opt_args)
136 |     elif opt_lower == 'adam':
137 |         optimizer = optim.Adam(parameters, **opt_args)
138 |     elif opt_lower == 'adamw':
139 |         optimizer = optim.AdamW(parameters, **opt_args)
140 |     elif opt_lower == 'nadam':
141 |         optimizer = Nadam(parameters, **opt_args)
142 |     elif opt_lower == 'radam':
143 |         optimizer = RAdam(parameters, **opt_args)
144 |     elif opt_lower == 'adamp':
145 |         optimizer = AdamP(parameters, wd_ratio=0.01, nesterov=True, **opt_args)
146 |     elif opt_lower == 'sgdp':
147 |         optimizer = SGDP(parameters, momentum=args.momentum, nesterov=True, **opt_args)
148 |     elif opt_lower == 'adadelta':
149 |         optimizer = optim.Adadelta(parameters, **opt_args)
150 |     elif opt_lower == 'adafactor':
151 |         if not args.lr:
152 |             opt_args['lr'] = None
153 |         optimizer = Adafactor(parameters, **opt_args)
154 |     elif opt_lower == 'adahessian':
155 |         optimizer = Adahessian(parameters, **opt_args)
156 |     elif opt_lower == 'rmsprop':
157 |         optimizer = optim.RMSprop(parameters, alpha=0.9, momentum=args.momentum, **opt_args)
158 |     elif opt_lower == 'rmsproptf':
159 |         optimizer = RMSpropTF(parameters, alpha=0.9, momentum=args.momentum, **opt_args)
160 |     elif opt_lower == 'fusedsgd':
161 |         opt_args.pop('eps', None)
162 |         optimizer = FusedSGD(parameters, momentum=args.momentum, nesterov=True, **opt_args)
163 |     elif opt_lower == 'fusedmomentum':
164 |         opt_args.pop('eps', None)
165 |         optimizer = FusedSGD(parameters, momentum=args.momentum, nesterov=False, **opt_args)
166 |     elif opt_lower == 'fusedadam':
167 |         optimizer = FusedAdam(parameters, adam_w_mode=False, **opt_args)
168 |     elif opt_lower == 'fusedadamw':
169 |         optimizer = FusedAdam(parameters, adam_w_mode=True, **opt_args)
170 |     elif opt_lower == 'fusedlamb':
171 |         optimizer = FusedLAMB(parameters, **opt_args)
172 |     elif opt_lower == 'fusednovograd':
173 |         opt_args.setdefault('betas', (0.95, 0.98))
174 |         optimizer = FusedNovoGrad(parameters, **opt_args)
175 |     else:
176 |         assert False and "Invalid optimizer"
177 | 
178 |     if len(opt_split) > 1:
179 |         if opt_split[0] == 'lookahead':
180 |             optimizer = Lookahead(optimizer)
181 | 
182 |     return optimizer
183 | 


--------------------------------------------------------------------------------