├── .gitignore
├── LICENSE
├── README.md
├── cfg
├── 0514
│ ├── prune_0.7_keep_0.05_8x_yolov5l_v4.cfg
│ ├── prune_0.8_keep_0.01_8x_yolov5l_v4.cfg
│ ├── prune_0.8_keep_0.05_8x_yolov5l_v4.cfg
│ ├── yolov5s_v4.cfg
│ └── yolov5s_v4_hand.cfg
├── last_prune
│ ├── prune_0.3_keep_0.05_8x_yolov5l_v4.cfg
│ └── prune_0.8_keep_0.01_8x_yolov5l_v4.cfg
├── prune_0.7_keep_0.1_8x_yolov5l_v4.cfg
├── prune_0.8_keep_0.01_8x_yolov5l_v4.cfg
└── yolov5l_v4.cfg
├── data
├── coco.data
├── coco.names
├── coco_128img.data
├── coco_128img.txt
├── get_coco_dataset.sh
└── get_coco_dataset_gdrive.sh
├── models
├── __init__.py
├── common.py
├── experimental.py
├── export.py
├── hub
│ ├── yolov3-spp.yaml
│ ├── yolov5-fpn.yaml
│ └── yolov5-panet.yaml
├── yolo.py
└── yolov5s_v4.yaml
├── modelsori.py
├── prune_yolov5s.py
├── prune_yolov5s.sh
├── shortcut_prune_yolov5s.py
├── slim_prune_yolov5l_8x.py
├── slim_prune_yolov5s.py
├── slim_prune_yolov5s_8x.py
├── slim_prune_yolov5s_8x.sh
├── test.py
├── test_yolov5s.py
├── tk1_time.xls
└── utils
├── __init__.py
├── adabound.py
├── datasets.py
├── gcp.sh
├── general.py
├── google_utils.py
├── parse_config.py
├── prune_utils.py
├── torch_utils.py
└── utils.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Repo-specific GitIgnore ----------------------------------------------------------------------------------------------
2 | *.jpg
3 | *.jpeg
4 | *.png
5 | *.bmp
6 | *.tif
7 | *.tiff
8 | *.heic
9 | *.JPG
10 | *.JPEG
11 | *.PNG
12 | *.BMP
13 | *.TIF
14 | *.TIFF
15 | *.HEIC
16 | *.mp4
17 | *.mov
18 | *.MOV
19 | *.avi
20 | *.data
21 | *.json
22 |
23 | #data
24 | data/coco128
25 | data/hand_dataset
26 |
27 |
28 | # *.cfg
29 | !cfg/yolov3*.cfg
30 |
31 | storage.googleapis.com
32 | runs/*
33 | data/*
34 | !data/samples/zidane.jpg
35 | !data/samples/bus.jpg
36 | !data/coco.names
37 | !data/coco_paper.names
38 | !data/coco.data
39 | !data/coco_*.data
40 | !data/coco_*.txt
41 | !data/trainvalno5k.shapes
42 | !data/*.sh
43 |
44 | pycocotools/*
45 | results*.txt
46 | gcp_test*.sh
47 |
48 | # MATLAB GitIgnore -----------------------------------------------------------------------------------------------------
49 | *.m~
50 | *.mat
51 | !targets*.mat
52 |
53 | # Neural Network weights -----------------------------------------------------------------------------------------------
54 | *.weights
55 | *.pt
56 | *.onnx
57 | *.mlmodel
58 | *.torchscript
59 | darknet53.conv.74
60 | yolov3-tiny.conv.15
61 |
62 | # GitHub Python GitIgnore ----------------------------------------------------------------------------------------------
63 | # Byte-compiled / optimized / DLL files
64 | __pycache__/
65 | *.py[cod]
66 | *$py.class
67 |
68 | # C extensions
69 | *.so
70 |
71 | # Distribution / packaging
72 | .Python
73 | env/
74 | build/
75 | develop-eggs/
76 | dist/
77 | downloads/
78 | eggs/
79 | .eggs/
80 | lib/
81 | lib64/
82 | parts/
83 | sdist/
84 | var/
85 | wheels/
86 | *.egg-info/
87 | .installed.cfg
88 | *.egg
89 |
90 | # PyInstaller
91 | # Usually these files are written by a python script from a template
92 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
93 | *.manifest
94 | *.spec
95 |
96 | # Installer logs
97 | pip-log.txt
98 | pip-delete-this-directory.txt
99 |
100 | # Unit test / coverage reports
101 | htmlcov/
102 | .tox/
103 | .coverage
104 | .coverage.*
105 | .cache
106 | nosetests.xml
107 | coverage.xml
108 | *.cover
109 | .hypothesis/
110 |
111 | # Translations
112 | *.mo
113 | *.pot
114 |
115 | # Django stuff:
116 | *.log
117 | local_settings.py
118 |
119 | # Flask stuff:
120 | instance/
121 | .webassets-cache
122 |
123 | # Scrapy stuff:
124 | .scrapy
125 |
126 | # Sphinx documentation
127 | docs/_build/
128 |
129 | # PyBuilder
130 | target/
131 |
132 | # Jupyter Notebook
133 | .ipynb_checkpoints
134 |
135 | # pyenv
136 | .python-version
137 |
138 | # celery beat schedule file
139 | celerybeat-schedule
140 |
141 | # SageMath parsed files
142 | *.sage.py
143 |
144 | # dotenv
145 | .env
146 |
147 | # virtualenv
148 | .venv
149 | venv/
150 | ENV/
151 |
152 | # Spyder project settings
153 | .spyderproject
154 | .spyproject
155 |
156 | # Rope project settings
157 | .ropeproject
158 |
159 | # mkdocs documentation
160 | /site
161 |
162 | # mypy
163 | .mypy_cache/
164 |
165 |
166 | # https://github.com/github/gitignore/blob/master/Global/macOS.gitignore -----------------------------------------------
167 |
168 | # General
169 | .DS_Store
170 | .AppleDouble
171 | .LSOverride
172 |
173 | # Icon must end with two \r
174 | Icon
175 | Icon?
176 |
177 | # Thumbnails
178 | ._*
179 |
180 | # Files that might appear in the root of a volume
181 | .DocumentRevisions-V100
182 | .fseventsd
183 | .Spotlight-V100
184 | .TemporaryItems
185 | .Trashes
186 | .VolumeIcon.icns
187 | .com.apple.timemachine.donotpresent
188 |
189 | # Directories potentially created on remote AFP share
190 | .AppleDB
191 | .AppleDesktop
192 | Network Trash Folder
193 | Temporary Items
194 | .apdisk
195 |
196 |
197 | # https://github.com/github/gitignore/blob/master/Global/JetBrains.gitignore
198 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
199 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
200 |
201 | # User-specific stuff:
202 | .idea/*
203 | .idea/**/workspace.xml
204 | .idea/**/tasks.xml
205 | .idea/dictionaries
206 | .html # Bokeh Plots
207 | .pg # TensorFlow Frozen Graphs
208 | .avi # videos
209 |
210 | # Sensitive or high-churn files:
211 | .idea/**/dataSources/
212 | .idea/**/dataSources.ids
213 | .idea/**/dataSources.local.xml
214 | .idea/**/sqlDataSources.xml
215 | .idea/**/dynamic.xml
216 | .idea/**/uiDesigner.xml
217 |
218 | # Gradle:
219 | .idea/**/gradle.xml
220 | .idea/**/libraries
221 |
222 | # CMake
223 | cmake-build-debug/
224 | cmake-build-release/
225 |
226 | # Mongo Explorer plugin:
227 | .idea/**/mongoSettings.xml
228 |
229 | ## File-based project format:
230 | *.iws
231 |
232 | ## Plugin-specific files:
233 |
234 | # IntelliJ
235 | out/
236 |
237 | # mpeltonen/sbt-idea plugin
238 | .idea_modules/
239 |
240 | # JIRA plugin
241 | atlassian-ide-plugin.xml
242 |
243 | # Cursive Clojure plugin
244 | .idea/replstate.xml
245 |
246 | # Crashlytics plugin (for Android Studio and IntelliJ)
247 | com_crashlytics_export_strings.xml
248 | crashlytics.properties
249 | crashlytics-build.properties
250 | fabric.properties
251 |
252 | tensorboard/
253 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # yolov5_prune
2 | 本项目作为 [ZJU-lishuang](https://github.com/ZJU-lishuang)/**[yolov5_prune](https://github.com/ZJU-lishuang/yolov5_prune)**的补充,实现了l模型的剪枝。其余过程都可参考lishuang的repo。
3 |
4 | 本项目基于[tanluren/yolov3-channel-and-layer-pruning](https://github.com/tanluren/yolov3-channel-and-layer-pruning)实现,将项目扩展到yolov5上。
5 |
6 | 项目的基本流程是,使用[ultralytics/yolov5](https://github.com/ultralytics/yolov5)训练自己的数据集,在模型性能达到要求但速度未达到要求时,对模型进行剪枝。首先是稀疏化训练,稀疏化训练很重要,如果模型稀疏度不够,剪枝比例过大会导致剪枝后的模型map接近0。剪枝完成后对模型进行微调回复精度。
7 |
8 | 本项目使用的yolov5为第四版本。
9 | yolov5第三版本参考[yolov5-v3-prune](https://github.com/ZJU-lishuang/yolov5_prune/tree/v3)
10 | yolov5第二版本参考[yolov5-v2-prune](https://github.com/ZJU-lishuang/yolov5_prune/tree/v2)
11 |
12 | TODO: 增加m,l,x的模型剪枝,如果有时间的话。>-<
13 |
14 | PS:在开源数据集和不能开源的数据集上模型均剪枝成功。
15 |
16 | ## 实例流程
17 | 数据集下载[dataset](http://www.robots.ox.ac.uk/~vgg/data/hands/downloads/hand_dataset.tar.gz)
18 | ### STEP1:基础训练
19 | 附件:[训练记录](https://drive.google.com/drive/folders/1v0HZYBhU6d4M2hvEfjia76wYbQlaFz_f?usp=sharing)
20 | ### STEP2:稀疏训练
21 | 附件:[稀疏训练记录](https://drive.google.com/drive/folders/1tJaeSOzQlyrx1l22hhop8G3ZuKshm8rp?usp=sharing)
22 | ### STEP3:八倍通道剪枝
23 | 附件:[剪枝后模型](https://drive.google.com/drive/folders/1V5nA6oGXX43bagpO3cJIFpI0zjAOzt0p?usp=sharing)
24 | ### STEP4:微调finetune
25 | 附件:[微调训练记录](https://drive.google.com/drive/folders/1vT_pN_XlMBniF9YXaPj2KeCNZitxYFLA?usp=sharing)
26 | ### STEP4:微调finetune,使用蒸馏技术优化模型,效果由于单纯的微调模型
27 | 附件:[微调蒸馏训练记录](https://drive.google.com/drive/folders/1T3SGh0FjyjxDckFcKVxpxQHF2XzZ-gfN?usp=sharing)
28 |
29 | ## 剪枝步骤
30 | #### STEP1:基础训练
31 | [yolov5](https://github.com/ZJU-lishuang/yolov5-v4)
32 | 示例代码
33 | ```
34 | python train.py --img 640 --batch 8 --epochs 50 --weights weights/yolov5s_v4.pt --data data/coco_hand.yaml --cfg models/yolov5s.yaml --name s_hand
35 | ```
36 |
37 | #### STEP2:稀疏训练
38 | --prune 0 适用于通道剪枝策略一,--prune 1 适用于其他剪枝策略。
39 | [yolov5](https://github.com/ZJU-lishuang/yolov5-v4)
40 | 示例代码
41 | ```
42 | python train_sparsity.py --img 640 --batch 8 --epochs 50 --data data/coco_hand.yaml --cfg models/yolov5s.yaml --weights runs/train/s_hand/weights/last.pt --name s_hand_sparsity -sr --s 0.001 --prune 1
43 | ```
44 |
45 | #### STEP3:通道剪枝策略一
46 | 不对shortcut直连的层进行剪枝,避免维度处理。
47 | ```
48 | python prune_yolov5s.py --cfg cfg/yolov5s.cfg --data data/fangweisui.data --weights weights/yolov5s_prune0.pt --percent 0.8
49 | ```
50 |
51 | #### STEP3:通道剪枝策略二
52 | 对shortcut层也进行了剪枝,剪枝采用每组shortcut中第一个卷积层的mask。
53 | ```
54 | python shortcut_prune_yolov5s.py --cfg cfg/yolov5s.cfg --data data/fangweisui.data --weights weights/yolov5s_prune1.pt --percent 0.3
55 | ```
56 |
57 | #### STEP3:通道剪枝策略三
58 | 先以全局阈值找出各卷积层的mask,然后对于每组shortcut,它将相连的各卷积层的剪枝mask取并集,用merge后的mask进行剪枝。
59 | ```
60 | python slim_prune_yolov5s.py --cfg cfg/yolov5s.cfg --data data/fangweisui.data --weights weights/yolov5s_prune1.pt --global_percent 0.8 --layer_keep 0.01
61 | ```
62 |
63 | #### STEP3:八倍通道剪枝
64 | 在硬件部署上发现,模型剪枝率相同时,通道数为8的倍数速度最快。(采坑:需要将硬件性能开启到最大)
65 | 示例代码
66 | ```
67 | python slim_prune_yolov5s_8x.py --cfg cfg/yolov5s_v4_hand.cfg --data data/oxfordhand.data --weights weights/last_v4s.pt --global_percent 0.5 --layer_keep 0.01 --img_size 640
68 | ```
69 |
70 | #### STEP4:微调finetune
71 | [yolov5](https://github.com/ZJU-lishuang/yolov5-v4)
72 | 示例代码
73 | ```
74 | python prune_finetune.py --img 640 --batch 8 --epochs 50 --data data/coco_hand.yaml --cfg ./cfg/prune_0.5_keep_0.01_8x_yolov5s_v4_hand.cfg --weights ./weights/prune_0.5_keep_0.01_8x_last_v4s.pt --name s_hand_finetune
75 | ```
76 |
77 | #### STEP4:微调finetune,使用蒸馏技术优化模型
78 | [yolov5](https://github.com/ZJU-lishuang/yolov5-v4)
79 | 示例代码
80 | ```
81 | python prune_finetune.py --img 640 --batch 8 --epochs 50 --data data/coco_hand.yaml --cfg ./cfg/prune_0.5_keep_0.01_8x_yolov5s_v4_hand.cfg --weights ./weights/prune_0.5_keep_0.01_8x_last_v4s.pt --name s_hand_finetune_distill --distill
82 | ```
83 |
84 | #### STEP5:剪枝后模型推理
85 | [yolov5](https://github.com/ZJU-lishuang/yolov5-v4)
86 | 示例代码
87 | ```shell
88 | python prune_detect.py --weights weights/last_s_hand_finetune.pt --img 640 --conf 0.7 --save-txt --source inference/images
89 | ```
90 |
91 |
92 |
--------------------------------------------------------------------------------
/cfg/0514/prune_0.7_keep_0.05_8x_yolov5l_v4.cfg:
--------------------------------------------------------------------------------
1 | [net]
2 | batch=64
3 | subdivisions=8
4 | width=416
5 | height=416
6 | channels=3
7 | momentum=0.949
8 | decay=0.0005
9 | angle=0
10 | saturation=1.5
11 | exposure=1.5
12 | hue=.1
13 | learning_rate=0.00261
14 | burn_in=1000
15 | max_batches=500500
16 | policy=steps
17 | steps=400000,450000
18 | scales=.1,.1
19 | mosaic=1
20 |
21 | [focus]
22 | filters=12
23 |
24 | [convolutional]
25 | batch_normalize=1
26 | filters=56
27 | size=3
28 | stride=1
29 | pad=1
30 | activation=leaky
31 |
32 | [convolutional]
33 | batch_normalize=1
34 | filters=128
35 | size=3
36 | stride=2
37 | pad=1
38 | activation=leaky
39 |
40 | [convolutional]
41 | batch_normalize=1
42 | filters=64
43 | size=1
44 | stride=1
45 | pad=1
46 | activation=leaky
47 |
48 | [route]
49 | layers=-2
50 |
51 | [convolutional]
52 | batch_normalize=1
53 | filters=64
54 | size=1
55 | stride=1
56 | pad=1
57 | activation=leaky
58 |
59 | [convolutional]
60 | batch_normalize=1
61 | filters=64
62 | size=1
63 | stride=1
64 | pad=1
65 | activation=leaky
66 |
67 | [convolutional]
68 | batch_normalize=1
69 | filters=64
70 | size=3
71 | stride=1
72 | pad=1
73 | activation=leaky
74 |
75 | [shortcut]
76 | from=-3
77 | activation=linear
78 |
79 | [convolutional]
80 | batch_normalize=1
81 | filters=64
82 | size=1
83 | stride=1
84 | pad=1
85 | activation=leaky
86 |
87 | [convolutional]
88 | batch_normalize=1
89 | filters=64
90 | size=3
91 | stride=1
92 | pad=1
93 | activation=leaky
94 |
95 | [shortcut]
96 | from=-3
97 | activation=linear
98 |
99 | [convolutional]
100 | batch_normalize=1
101 | filters=64
102 | size=1
103 | stride=1
104 | pad=1
105 | activation=leaky
106 |
107 | [convolutional]
108 | batch_normalize=1
109 | filters=64
110 | size=3
111 | stride=1
112 | pad=1
113 | activation=leaky
114 |
115 | [shortcut]
116 | from=-3
117 | activation=linear
118 |
119 | [route]
120 | layers=-1,-12
121 |
122 | [convolutional]
123 | batch_normalize=1
124 | filters=128
125 | size=1
126 | stride=1
127 | pad=1
128 | activation=leaky
129 |
130 | [convolutional]
131 | batch_normalize=1
132 | filters=256
133 | size=3
134 | stride=2
135 | pad=1
136 | activation=leaky
137 |
138 | [convolutional]
139 | batch_normalize=1
140 | filters=128
141 | size=1
142 | stride=1
143 | pad=1
144 | activation=leaky
145 |
146 | [route]
147 | layers=-2
148 |
149 | [convolutional]
150 | batch_normalize=1
151 | filters=128
152 | size=1
153 | stride=1
154 | pad=1
155 | activation=leaky
156 |
157 | [convolutional]
158 | batch_normalize=1
159 | filters=72
160 | size=1
161 | stride=1
162 | pad=1
163 | activation=leaky
164 |
165 | [convolutional]
166 | batch_normalize=1
167 | filters=128
168 | size=3
169 | stride=1
170 | pad=1
171 | activation=leaky
172 |
173 | [shortcut]
174 | from=-3
175 | activation=linear
176 |
177 | [convolutional]
178 | batch_normalize=1
179 | filters=64
180 | size=1
181 | stride=1
182 | pad=1
183 | activation=leaky
184 |
185 | [convolutional]
186 | batch_normalize=1
187 | filters=128
188 | size=3
189 | stride=1
190 | pad=1
191 | activation=leaky
192 |
193 | [shortcut]
194 | from=-3
195 | activation=linear
196 |
197 | [convolutional]
198 | batch_normalize=1
199 | filters=48
200 | size=1
201 | stride=1
202 | pad=1
203 | activation=leaky
204 |
205 | [convolutional]
206 | batch_normalize=1
207 | filters=128
208 | size=3
209 | stride=1
210 | pad=1
211 | activation=leaky
212 |
213 | [shortcut]
214 | from=-3
215 | activation=linear
216 |
217 | [convolutional]
218 | batch_normalize=1
219 | filters=40
220 | size=1
221 | stride=1
222 | pad=1
223 | activation=leaky
224 |
225 | [convolutional]
226 | batch_normalize=1
227 | filters=128
228 | size=3
229 | stride=1
230 | pad=1
231 | activation=leaky
232 |
233 | [shortcut]
234 | from=-3
235 | activation=linear
236 |
237 | [convolutional]
238 | batch_normalize=1
239 | filters=64
240 | size=1
241 | stride=1
242 | pad=1
243 | activation=leaky
244 |
245 | [convolutional]
246 | batch_normalize=1
247 | filters=128
248 | size=3
249 | stride=1
250 | pad=1
251 | activation=leaky
252 |
253 | [shortcut]
254 | from=-3
255 | activation=linear
256 |
257 | [convolutional]
258 | batch_normalize=1
259 | filters=48
260 | size=1
261 | stride=1
262 | pad=1
263 | activation=leaky
264 |
265 | [convolutional]
266 | batch_normalize=1
267 | filters=128
268 | size=3
269 | stride=1
270 | pad=1
271 | activation=leaky
272 |
273 | [shortcut]
274 | from=-3
275 | activation=linear
276 |
277 | [convolutional]
278 | batch_normalize=1
279 | filters=80
280 | size=1
281 | stride=1
282 | pad=1
283 | activation=leaky
284 |
285 | [convolutional]
286 | batch_normalize=1
287 | filters=128
288 | size=3
289 | stride=1
290 | pad=1
291 | activation=leaky
292 |
293 | [shortcut]
294 | from=-3
295 | activation=linear
296 |
297 | [convolutional]
298 | batch_normalize=1
299 | filters=88
300 | size=1
301 | stride=1
302 | pad=1
303 | activation=leaky
304 |
305 | [convolutional]
306 | batch_normalize=1
307 | filters=128
308 | size=3
309 | stride=1
310 | pad=1
311 | activation=leaky
312 |
313 | [shortcut]
314 | from=-3
315 | activation=linear
316 |
317 | [convolutional]
318 | batch_normalize=1
319 | filters=72
320 | size=1
321 | stride=1
322 | pad=1
323 | activation=leaky
324 |
325 | [convolutional]
326 | batch_normalize=1
327 | filters=128
328 | size=3
329 | stride=1
330 | pad=1
331 | activation=leaky
332 |
333 | [shortcut]
334 | from=-3
335 | activation=linear
336 |
337 | [route]
338 | layers=-1,-30
339 |
340 | [convolutional]
341 | batch_normalize=1
342 | filters=256
343 | size=1
344 | stride=1
345 | pad=1
346 | activation=leaky
347 |
348 | [convolutional]
349 | batch_normalize=1
350 | filters=328
351 | size=3
352 | stride=2
353 | pad=1
354 | activation=leaky
355 |
356 | [convolutional]
357 | batch_normalize=1
358 | filters=232
359 | size=1
360 | stride=1
361 | pad=1
362 | activation=leaky
363 |
364 | [route]
365 | layers=-2
366 |
367 | [convolutional]
368 | batch_normalize=1
369 | filters=254
370 | size=1
371 | stride=1
372 | pad=1
373 | activation=leaky
374 |
375 | [convolutional]
376 | batch_normalize=1
377 | filters=80
378 | size=1
379 | stride=1
380 | pad=1
381 | activation=leaky
382 |
383 | [convolutional]
384 | batch_normalize=1
385 | filters=254
386 | size=3
387 | stride=1
388 | pad=1
389 | activation=leaky
390 |
391 | [shortcut]
392 | from=-3
393 | activation=linear
394 |
395 | [convolutional]
396 | batch_normalize=1
397 | filters=16
398 | size=1
399 | stride=1
400 | pad=1
401 | activation=leaky
402 |
403 | [convolutional]
404 | batch_normalize=1
405 | filters=254
406 | size=3
407 | stride=1
408 | pad=1
409 | activation=leaky
410 |
411 | [shortcut]
412 | from=-3
413 | activation=linear
414 |
415 | [convolutional]
416 | batch_normalize=1
417 | filters=24
418 | size=1
419 | stride=1
420 | pad=1
421 | activation=leaky
422 |
423 | [convolutional]
424 | batch_normalize=1
425 | filters=254
426 | size=3
427 | stride=1
428 | pad=1
429 | activation=leaky
430 |
431 | [shortcut]
432 | from=-3
433 | activation=linear
434 |
435 | [convolutional]
436 | batch_normalize=1
437 | filters=32
438 | size=1
439 | stride=1
440 | pad=1
441 | activation=leaky
442 |
443 | [convolutional]
444 | batch_normalize=1
445 | filters=254
446 | size=3
447 | stride=1
448 | pad=1
449 | activation=leaky
450 |
451 | [shortcut]
452 | from=-3
453 | activation=linear
454 |
455 | [convolutional]
456 | batch_normalize=1
457 | filters=32
458 | size=1
459 | stride=1
460 | pad=1
461 | activation=leaky
462 |
463 | [convolutional]
464 | batch_normalize=1
465 | filters=254
466 | size=3
467 | stride=1
468 | pad=1
469 | activation=leaky
470 |
471 | [shortcut]
472 | from=-3
473 | activation=linear
474 |
475 | [convolutional]
476 | batch_normalize=1
477 | filters=40
478 | size=1
479 | stride=1
480 | pad=1
481 | activation=leaky
482 |
483 | [convolutional]
484 | batch_normalize=1
485 | filters=254
486 | size=3
487 | stride=1
488 | pad=1
489 | activation=leaky
490 |
491 | [shortcut]
492 | from=-3
493 | activation=linear
494 |
495 | [convolutional]
496 | batch_normalize=1
497 | filters=24
498 | size=1
499 | stride=1
500 | pad=1
501 | activation=leaky
502 |
503 | [convolutional]
504 | batch_normalize=1
505 | filters=254
506 | size=3
507 | stride=1
508 | pad=1
509 | activation=leaky
510 |
511 | [shortcut]
512 | from=-3
513 | activation=linear
514 |
515 | [convolutional]
516 | batch_normalize=1
517 | filters=32
518 | size=1
519 | stride=1
520 | pad=1
521 | activation=leaky
522 |
523 | [convolutional]
524 | batch_normalize=1
525 | filters=254
526 | size=3
527 | stride=1
528 | pad=1
529 | activation=leaky
530 |
531 | [shortcut]
532 | from=-3
533 | activation=linear
534 |
535 | [convolutional]
536 | batch_normalize=1
537 | filters=32
538 | size=1
539 | stride=1
540 | pad=1
541 | activation=leaky
542 |
543 | [convolutional]
544 | batch_normalize=1
545 | filters=254
546 | size=3
547 | stride=1
548 | pad=1
549 | activation=leaky
550 |
551 | [shortcut]
552 | from=-3
553 | activation=linear
554 |
555 | [route]
556 | layers=-1,-30
557 |
558 | [convolutional]
559 | batch_normalize=1
560 | filters=224
561 | size=1
562 | stride=1
563 | pad=1
564 | activation=leaky
565 |
566 | [convolutional]
567 | batch_normalize=1
568 | filters=128
569 | size=3
570 | stride=2
571 | pad=1
572 | activation=leaky
573 |
574 | [convolutional]
575 | batch_normalize=1
576 | filters=512
577 | size=1
578 | stride=1
579 | pad=1
580 | activation=leaky
581 |
582 | [maxpool]
583 | stride=1
584 | size=5
585 |
586 | [route]
587 | layers=-2
588 |
589 | [maxpool]
590 | stride=1
591 | size=9
592 |
593 | [route]
594 | layers=-4
595 |
596 | [maxpool]
597 | stride=1
598 | size=13
599 |
600 | [route]
601 | layers=-6,-5,-3,-1
602 |
603 | [convolutional]
604 | batch_normalize=1
605 | filters=112
606 | size=1
607 | stride=1
608 | pad=1
609 | activation=leaky
610 |
611 | [convolutional]
612 | batch_normalize=1
613 | filters=56
614 | size=1
615 | stride=1
616 | pad=1
617 | activation=leaky
618 |
619 | [route]
620 | layers=-2
621 |
622 | [convolutional]
623 | batch_normalize=1
624 | filters=40
625 | size=1
626 | stride=1
627 | pad=1
628 | activation=leaky
629 |
630 | [convolutional]
631 | batch_normalize=1
632 | filters=40
633 | size=1
634 | stride=1
635 | pad=1
636 | activation=leaky
637 |
638 | [convolutional]
639 | batch_normalize=1
640 | filters=40
641 | size=3
642 | stride=1
643 | pad=1
644 | activation=leaky
645 |
646 | [convolutional]
647 | batch_normalize=1
648 | filters=32
649 | size=1
650 | stride=1
651 | pad=1
652 | activation=leaky
653 |
654 | [convolutional]
655 | batch_normalize=1
656 | filters=48
657 | size=3
658 | stride=1
659 | pad=1
660 | activation=leaky
661 |
662 | [convolutional]
663 | batch_normalize=1
664 | filters=32
665 | size=1
666 | stride=1
667 | pad=1
668 | activation=leaky
669 |
670 | [convolutional]
671 | batch_normalize=1
672 | filters=40
673 | size=3
674 | stride=1
675 | pad=1
676 | activation=leaky
677 |
678 | [route]
679 | layers=-1,-9
680 |
681 | [convolutional]
682 | batch_normalize=1
683 | filters=64
684 | size=1
685 | stride=1
686 | pad=1
687 | activation=leaky
688 |
689 | [convolutional]
690 | batch_normalize=1
691 | filters=512
692 | size=1
693 | stride=1
694 | pad=1
695 | activation=leaky
696 |
697 | [upsample]
698 | stride=2
699 |
700 | [route]
701 | layers=-1,-23
702 |
703 | [convolutional]
704 | batch_normalize=1
705 | filters=64
706 | size=1
707 | stride=1
708 | pad=1
709 | activation=leaky
710 |
711 | [route]
712 | layers=-2
713 |
714 | [convolutional]
715 | batch_normalize=1
716 | filters=96
717 | size=1
718 | stride=1
719 | pad=1
720 | activation=leaky
721 |
722 | [convolutional]
723 | batch_normalize=1
724 | filters=120
725 | size=1
726 | stride=1
727 | pad=1
728 | activation=leaky
729 |
730 | [convolutional]
731 | batch_normalize=1
732 | filters=152
733 | size=3
734 | stride=1
735 | pad=1
736 | activation=leaky
737 |
738 | [convolutional]
739 | batch_normalize=1
740 | filters=144
741 | size=1
742 | stride=1
743 | pad=1
744 | activation=leaky
745 |
746 | [convolutional]
747 | batch_normalize=1
748 | filters=144
749 | size=3
750 | stride=1
751 | pad=1
752 | activation=leaky
753 |
754 | [convolutional]
755 | batch_normalize=1
756 | filters=112
757 | size=1
758 | stride=1
759 | pad=1
760 | activation=leaky
761 |
762 | [convolutional]
763 | batch_normalize=1
764 | filters=120
765 | size=3
766 | stride=1
767 | pad=1
768 | activation=leaky
769 |
770 | [route]
771 | layers=-1,-9
772 |
773 | [convolutional]
774 | batch_normalize=1
775 | filters=120
776 | size=1
777 | stride=1
778 | pad=1
779 | activation=leaky
780 |
781 | [convolutional]
782 | batch_normalize=1
783 | filters=256
784 | size=1
785 | stride=1
786 | pad=1
787 | activation=leaky
788 |
789 | [upsample]
790 | stride=2
791 |
792 | [route]
793 | layers=-1,-70
794 |
795 | [convolutional]
796 | batch_normalize=1
797 | filters=72
798 | size=1
799 | stride=1
800 | pad=1
801 | activation=leaky
802 |
803 | [route]
804 | layers=-2
805 |
806 | [convolutional]
807 | batch_normalize=1
808 | filters=80
809 | size=1
810 | stride=1
811 | pad=1
812 | activation=leaky
813 |
814 | [convolutional]
815 | batch_normalize=1
816 | filters=88
817 | size=1
818 | stride=1
819 | pad=1
820 | activation=leaky
821 |
822 | [convolutional]
823 | batch_normalize=1
824 | filters=104
825 | size=3
826 | stride=1
827 | pad=1
828 | activation=leaky
829 |
830 | [convolutional]
831 | batch_normalize=1
832 | filters=96
833 | size=1
834 | stride=1
835 | pad=1
836 | activation=leaky
837 |
838 | [convolutional]
839 | batch_normalize=1
840 | filters=96
841 | size=3
842 | stride=1
843 | pad=1
844 | activation=leaky
845 |
846 | [convolutional]
847 | batch_normalize=1
848 | filters=80
849 | size=1
850 | stride=1
851 | pad=1
852 | activation=leaky
853 |
854 | [convolutional]
855 | batch_normalize=1
856 | filters=112
857 | size=3
858 | stride=1
859 | pad=1
860 | activation=leaky
861 |
862 | [route]
863 | layers=-1,-9
864 |
865 | [convolutional]
866 | batch_normalize=1
867 | filters=208
868 | size=1
869 | stride=1
870 | pad=1
871 | activation=leaky
872 |
873 | [convolutional]
874 | size=1
875 | stride=1
876 | pad=1
877 | filters=24
878 | activation=linear
879 |
880 | [yolo]
881 | mask=0,1,2
882 | anchors=40,39, 51,50, 61,59, 75,69, 62,92, 88,98, 115,77, 93,129, 128,115
883 | classes=3
884 | num=9
885 | jitter=.3
886 | ignore_thresh=.7
887 | truth_thresh=1
888 | scale_x_y=1.2
889 | iou_thresh=0.213
890 | cls_normalizer=1.0
891 | iou_normalizer=0.07
892 | iou_loss=ciou
893 | nms_kind=greedynms
894 | beta_nms=0.6
895 |
896 | [route]
897 | layers=-3
898 |
899 | [convolutional]
900 | batch_normalize=1
901 | filters=120
902 | size=3
903 | stride=2
904 | pad=1
905 | activation=leaky
906 |
907 | [route]
908 | layers=-1,-18
909 |
910 | [convolutional]
911 | batch_normalize=1
912 | filters=128
913 | size=1
914 | stride=1
915 | pad=1
916 | activation=leaky
917 |
918 | [route]
919 | layers=-2
920 |
921 | [convolutional]
922 | batch_normalize=1
923 | filters=112
924 | size=1
925 | stride=1
926 | pad=1
927 | activation=leaky
928 |
929 | [convolutional]
930 | batch_normalize=1
931 | filters=104
932 | size=1
933 | stride=1
934 | pad=1
935 | activation=leaky
936 |
937 | [convolutional]
938 | batch_normalize=1
939 | filters=96
940 | size=3
941 | stride=1
942 | pad=1
943 | activation=leaky
944 |
945 | [convolutional]
946 | batch_normalize=1
947 | filters=80
948 | size=1
949 | stride=1
950 | pad=1
951 | activation=leaky
952 |
953 | [convolutional]
954 | batch_normalize=1
955 | filters=96
956 | size=3
957 | stride=1
958 | pad=1
959 | activation=leaky
960 |
961 | [convolutional]
962 | batch_normalize=1
963 | filters=88
964 | size=1
965 | stride=1
966 | pad=1
967 | activation=leaky
968 |
969 | [convolutional]
970 | batch_normalize=1
971 | filters=112
972 | size=3
973 | stride=1
974 | pad=1
975 | activation=leaky
976 |
977 | [route]
978 | layers=-1,-9
979 |
980 | [convolutional]
981 | batch_normalize=1
982 | filters=240
983 | size=1
984 | stride=1
985 | pad=1
986 | activation=leaky
987 |
988 | [convolutional]
989 | size=1
990 | stride=1
991 | pad=1
992 | filters=24
993 | activation=linear
994 |
995 | [yolo]
996 | mask=3,4,5
997 | anchors=40,39, 51,50, 61,59, 75,69, 62,92, 88,98, 115,77, 93,129, 128,115
998 | classes=3
999 | num=9
1000 | jitter=.3
1001 | ignore_thresh=.7
1002 | truth_thresh=1
1003 | scale_x_y=1.2
1004 | iou_thresh=0.213
1005 | cls_normalizer=1.0
1006 | iou_normalizer=0.07
1007 | iou_loss=ciou
1008 | nms_kind=greedynms
1009 | beta_nms=0.6
1010 |
1011 | [route]
1012 | layers=-3
1013 |
1014 | [convolutional]
1015 | batch_normalize=1
1016 | filters=224
1017 | size=3
1018 | stride=2
1019 | pad=1
1020 | activation=leaky
1021 |
1022 | [route]
1023 | layers=-1,-48
1024 |
1025 | [convolutional]
1026 | batch_normalize=1
1027 | filters=104
1028 | size=1
1029 | stride=1
1030 | pad=1
1031 | activation=leaky
1032 |
1033 | [route]
1034 | layers=-2
1035 |
1036 | [convolutional]
1037 | batch_normalize=1
1038 | filters=40
1039 | size=1
1040 | stride=1
1041 | pad=1
1042 | activation=leaky
1043 |
1044 | [convolutional]
1045 | batch_normalize=1
1046 | filters=40
1047 | size=1
1048 | stride=1
1049 | pad=1
1050 | activation=leaky
1051 |
1052 | [convolutional]
1053 | batch_normalize=1
1054 | filters=40
1055 | size=3
1056 | stride=1
1057 | pad=1
1058 | activation=leaky
1059 |
1060 | [convolutional]
1061 | batch_normalize=1
1062 | filters=32
1063 | size=1
1064 | stride=1
1065 | pad=1
1066 | activation=leaky
1067 |
1068 | [convolutional]
1069 | batch_normalize=1
1070 | filters=48
1071 | size=3
1072 | stride=1
1073 | pad=1
1074 | activation=leaky
1075 |
1076 | [convolutional]
1077 | batch_normalize=1
1078 | filters=48
1079 | size=1
1080 | stride=1
1081 | pad=1
1082 | activation=leaky
1083 |
1084 | [convolutional]
1085 | batch_normalize=1
1086 | filters=40
1087 | size=3
1088 | stride=1
1089 | pad=1
1090 | activation=leaky
1091 |
1092 | [route]
1093 | layers=-1,-9
1094 |
1095 | [convolutional]
1096 | batch_normalize=1
1097 | filters=232
1098 | size=1
1099 | stride=1
1100 | pad=1
1101 | activation=leaky
1102 |
1103 | [convolutional]
1104 | size=1
1105 | stride=1
1106 | pad=1
1107 | filters=24
1108 | activation=linear
1109 |
1110 | [yolo]
1111 | mask=6,7,8
1112 | anchors=40,39, 51,50, 61,59, 75,69, 62,92, 88,98, 115,77, 93,129, 128,115
1113 | classes=3
1114 | num=9
1115 | jitter=.3
1116 | ignore_thresh=.7
1117 | truth_thresh=1
1118 | scale_x_y=1.2
1119 | iou_thresh=0.213
1120 | cls_normalizer=1.0
1121 | iou_normalizer=0.07
1122 | iou_loss=ciou
1123 | nms_kind=greedynms
1124 | beta_nms=0.6
1125 |
1126 |
--------------------------------------------------------------------------------
/cfg/0514/prune_0.8_keep_0.01_8x_yolov5l_v4.cfg:
--------------------------------------------------------------------------------
1 | [net]
2 | batch=64
3 | subdivisions=8
4 | width=416
5 | height=416
6 | channels=3
7 | momentum=0.949
8 | decay=0.0005
9 | angle=0
10 | saturation=1.5
11 | exposure=1.5
12 | hue=.1
13 | learning_rate=0.00261
14 | burn_in=1000
15 | max_batches=500500
16 | policy=steps
17 | steps=400000,450000
18 | scales=.1,.1
19 | mosaic=1
20 |
21 | [focus]
22 | filters=12
23 |
24 | [convolutional]
25 | batch_normalize=1
26 | filters=40
27 | size=3
28 | stride=1
29 | pad=1
30 | activation=leaky
31 |
32 | [convolutional]
33 | batch_normalize=1
34 | filters=112
35 | size=3
36 | stride=2
37 | pad=1
38 | activation=leaky
39 |
40 | [convolutional]
41 | batch_normalize=1
42 | filters=64
43 | size=1
44 | stride=1
45 | pad=1
46 | activation=leaky
47 |
48 | [route]
49 | layers=-2
50 |
51 | [convolutional]
52 | batch_normalize=1
53 | filters=64
54 | size=1
55 | stride=1
56 | pad=1
57 | activation=leaky
58 |
59 | [convolutional]
60 | batch_normalize=1
61 | filters=64
62 | size=1
63 | stride=1
64 | pad=1
65 | activation=leaky
66 |
67 | [convolutional]
68 | batch_normalize=1
69 | filters=64
70 | size=3
71 | stride=1
72 | pad=1
73 | activation=leaky
74 |
75 | [shortcut]
76 | from=-3
77 | activation=linear
78 |
79 | [convolutional]
80 | batch_normalize=1
81 | filters=64
82 | size=1
83 | stride=1
84 | pad=1
85 | activation=leaky
86 |
87 | [convolutional]
88 | batch_normalize=1
89 | filters=64
90 | size=3
91 | stride=1
92 | pad=1
93 | activation=leaky
94 |
95 | [shortcut]
96 | from=-3
97 | activation=linear
98 |
99 | [convolutional]
100 | batch_normalize=1
101 | filters=64
102 | size=1
103 | stride=1
104 | pad=1
105 | activation=leaky
106 |
107 | [convolutional]
108 | batch_normalize=1
109 | filters=64
110 | size=3
111 | stride=1
112 | pad=1
113 | activation=leaky
114 |
115 | [shortcut]
116 | from=-3
117 | activation=linear
118 |
119 | [route]
120 | layers=-1,-12
121 |
122 | [convolutional]
123 | batch_normalize=1
124 | filters=128
125 | size=1
126 | stride=1
127 | pad=1
128 | activation=leaky
129 |
130 | [convolutional]
131 | batch_normalize=1
132 | filters=248
133 | size=3
134 | stride=2
135 | pad=1
136 | activation=leaky
137 |
138 | [convolutional]
139 | batch_normalize=1
140 | filters=128
141 | size=1
142 | stride=1
143 | pad=1
144 | activation=leaky
145 |
146 | [route]
147 | layers=-2
148 |
149 | [convolutional]
150 | batch_normalize=1
151 | filters=128
152 | size=1
153 | stride=1
154 | pad=1
155 | activation=leaky
156 |
157 | [convolutional]
158 | batch_normalize=1
159 | filters=56
160 | size=1
161 | stride=1
162 | pad=1
163 | activation=leaky
164 |
165 | [convolutional]
166 | batch_normalize=1
167 | filters=128
168 | size=3
169 | stride=1
170 | pad=1
171 | activation=leaky
172 |
173 | [shortcut]
174 | from=-3
175 | activation=linear
176 |
177 | [convolutional]
178 | batch_normalize=1
179 | filters=56
180 | size=1
181 | stride=1
182 | pad=1
183 | activation=leaky
184 |
185 | [convolutional]
186 | batch_normalize=1
187 | filters=128
188 | size=3
189 | stride=1
190 | pad=1
191 | activation=leaky
192 |
193 | [shortcut]
194 | from=-3
195 | activation=linear
196 |
197 | [convolutional]
198 | batch_normalize=1
199 | filters=24
200 | size=1
201 | stride=1
202 | pad=1
203 | activation=leaky
204 |
205 | [convolutional]
206 | batch_normalize=1
207 | filters=128
208 | size=3
209 | stride=1
210 | pad=1
211 | activation=leaky
212 |
213 | [shortcut]
214 | from=-3
215 | activation=linear
216 |
217 | [convolutional]
218 | batch_normalize=1
219 | filters=24
220 | size=1
221 | stride=1
222 | pad=1
223 | activation=leaky
224 |
225 | [convolutional]
226 | batch_normalize=1
227 | filters=128
228 | size=3
229 | stride=1
230 | pad=1
231 | activation=leaky
232 |
233 | [shortcut]
234 | from=-3
235 | activation=linear
236 |
237 | [convolutional]
238 | batch_normalize=1
239 | filters=40
240 | size=1
241 | stride=1
242 | pad=1
243 | activation=leaky
244 |
245 | [convolutional]
246 | batch_normalize=1
247 | filters=128
248 | size=3
249 | stride=1
250 | pad=1
251 | activation=leaky
252 |
253 | [shortcut]
254 | from=-3
255 | activation=linear
256 |
257 | [convolutional]
258 | batch_normalize=1
259 | filters=32
260 | size=1
261 | stride=1
262 | pad=1
263 | activation=leaky
264 |
265 | [convolutional]
266 | batch_normalize=1
267 | filters=128
268 | size=3
269 | stride=1
270 | pad=1
271 | activation=leaky
272 |
273 | [shortcut]
274 | from=-3
275 | activation=linear
276 |
277 | [convolutional]
278 | batch_normalize=1
279 | filters=64
280 | size=1
281 | stride=1
282 | pad=1
283 | activation=leaky
284 |
285 | [convolutional]
286 | batch_normalize=1
287 | filters=128
288 | size=3
289 | stride=1
290 | pad=1
291 | activation=leaky
292 |
293 | [shortcut]
294 | from=-3
295 | activation=linear
296 |
297 | [convolutional]
298 | batch_normalize=1
299 | filters=64
300 | size=1
301 | stride=1
302 | pad=1
303 | activation=leaky
304 |
305 | [convolutional]
306 | batch_normalize=1
307 | filters=128
308 | size=3
309 | stride=1
310 | pad=1
311 | activation=leaky
312 |
313 | [shortcut]
314 | from=-3
315 | activation=linear
316 |
317 | [convolutional]
318 | batch_normalize=1
319 | filters=56
320 | size=1
321 | stride=1
322 | pad=1
323 | activation=leaky
324 |
325 | [convolutional]
326 | batch_normalize=1
327 | filters=128
328 | size=3
329 | stride=1
330 | pad=1
331 | activation=leaky
332 |
333 | [shortcut]
334 | from=-3
335 | activation=linear
336 |
337 | [route]
338 | layers=-1,-30
339 |
340 | [convolutional]
341 | batch_normalize=1
342 | filters=240
343 | size=1
344 | stride=1
345 | pad=1
346 | activation=leaky
347 |
348 | [convolutional]
349 | batch_normalize=1
350 | filters=240
351 | size=3
352 | stride=2
353 | pad=1
354 | activation=leaky
355 |
356 | [convolutional]
357 | batch_normalize=1
358 | filters=96
359 | size=1
360 | stride=1
361 | pad=1
362 | activation=leaky
363 |
364 | [route]
365 | layers=-2
366 |
367 | [convolutional]
368 | batch_normalize=1
369 | filters=245
370 | size=1
371 | stride=1
372 | pad=1
373 | activation=leaky
374 |
375 | [convolutional]
376 | batch_normalize=1
377 | filters=56
378 | size=1
379 | stride=1
380 | pad=1
381 | activation=leaky
382 |
383 | [convolutional]
384 | batch_normalize=1
385 | filters=245
386 | size=3
387 | stride=1
388 | pad=1
389 | activation=leaky
390 |
391 | [shortcut]
392 | from=-3
393 | activation=linear
394 |
395 | [convolutional]
396 | batch_normalize=1
397 | filters=8
398 | size=1
399 | stride=1
400 | pad=1
401 | activation=leaky
402 |
403 | [convolutional]
404 | batch_normalize=1
405 | filters=245
406 | size=3
407 | stride=1
408 | pad=1
409 | activation=leaky
410 |
411 | [shortcut]
412 | from=-3
413 | activation=linear
414 |
415 | [convolutional]
416 | batch_normalize=1
417 | filters=16
418 | size=1
419 | stride=1
420 | pad=1
421 | activation=leaky
422 |
423 | [convolutional]
424 | batch_normalize=1
425 | filters=245
426 | size=3
427 | stride=1
428 | pad=1
429 | activation=leaky
430 |
431 | [shortcut]
432 | from=-3
433 | activation=linear
434 |
435 | [convolutional]
436 | batch_normalize=1
437 | filters=24
438 | size=1
439 | stride=1
440 | pad=1
441 | activation=leaky
442 |
443 | [convolutional]
444 | batch_normalize=1
445 | filters=245
446 | size=3
447 | stride=1
448 | pad=1
449 | activation=leaky
450 |
451 | [shortcut]
452 | from=-3
453 | activation=linear
454 |
455 | [convolutional]
456 | batch_normalize=1
457 | filters=16
458 | size=1
459 | stride=1
460 | pad=1
461 | activation=leaky
462 |
463 | [convolutional]
464 | batch_normalize=1
465 | filters=245
466 | size=3
467 | stride=1
468 | pad=1
469 | activation=leaky
470 |
471 | [shortcut]
472 | from=-3
473 | activation=linear
474 |
475 | [convolutional]
476 | batch_normalize=1
477 | filters=24
478 | size=1
479 | stride=1
480 | pad=1
481 | activation=leaky
482 |
483 | [convolutional]
484 | batch_normalize=1
485 | filters=245
486 | size=3
487 | stride=1
488 | pad=1
489 | activation=leaky
490 |
491 | [shortcut]
492 | from=-3
493 | activation=linear
494 |
495 | [convolutional]
496 | batch_normalize=1
497 | filters=16
498 | size=1
499 | stride=1
500 | pad=1
501 | activation=leaky
502 |
503 | [convolutional]
504 | batch_normalize=1
505 | filters=245
506 | size=3
507 | stride=1
508 | pad=1
509 | activation=leaky
510 |
511 | [shortcut]
512 | from=-3
513 | activation=linear
514 |
515 | [convolutional]
516 | batch_normalize=1
517 | filters=32
518 | size=1
519 | stride=1
520 | pad=1
521 | activation=leaky
522 |
523 | [convolutional]
524 | batch_normalize=1
525 | filters=245
526 | size=3
527 | stride=1
528 | pad=1
529 | activation=leaky
530 |
531 | [shortcut]
532 | from=-3
533 | activation=linear
534 |
535 | [convolutional]
536 | batch_normalize=1
537 | filters=24
538 | size=1
539 | stride=1
540 | pad=1
541 | activation=leaky
542 |
543 | [convolutional]
544 | batch_normalize=1
545 | filters=245
546 | size=3
547 | stride=1
548 | pad=1
549 | activation=leaky
550 |
551 | [shortcut]
552 | from=-3
553 | activation=linear
554 |
555 | [route]
556 | layers=-1,-30
557 |
558 | [convolutional]
559 | batch_normalize=1
560 | filters=144
561 | size=1
562 | stride=1
563 | pad=1
564 | activation=leaky
565 |
566 | [convolutional]
567 | batch_normalize=1
568 | filters=104
569 | size=3
570 | stride=2
571 | pad=1
572 | activation=leaky
573 |
574 | [convolutional]
575 | batch_normalize=1
576 | filters=512
577 | size=1
578 | stride=1
579 | pad=1
580 | activation=leaky
581 |
582 | [maxpool]
583 | stride=1
584 | size=5
585 |
586 | [route]
587 | layers=-2
588 |
589 | [maxpool]
590 | stride=1
591 | size=9
592 |
593 | [route]
594 | layers=-4
595 |
596 | [maxpool]
597 | stride=1
598 | size=13
599 |
600 | [route]
601 | layers=-6,-5,-3,-1
602 |
603 | [convolutional]
604 | batch_normalize=1
605 | filters=56
606 | size=1
607 | stride=1
608 | pad=1
609 | activation=leaky
610 |
611 | [convolutional]
612 | batch_normalize=1
613 | filters=40
614 | size=1
615 | stride=1
616 | pad=1
617 | activation=leaky
618 |
619 | [route]
620 | layers=-2
621 |
622 | [convolutional]
623 | batch_normalize=1
624 | filters=8
625 | size=1
626 | stride=1
627 | pad=1
628 | activation=leaky
629 |
630 | [convolutional]
631 | batch_normalize=1
632 | filters=8
633 | size=1
634 | stride=1
635 | pad=1
636 | activation=leaky
637 |
638 | [convolutional]
639 | batch_normalize=1
640 | filters=8
641 | size=3
642 | stride=1
643 | pad=1
644 | activation=leaky
645 |
646 | [convolutional]
647 | batch_normalize=1
648 | filters=8
649 | size=1
650 | stride=1
651 | pad=1
652 | activation=leaky
653 |
654 | [convolutional]
655 | batch_normalize=1
656 | filters=16
657 | size=3
658 | stride=1
659 | pad=1
660 | activation=leaky
661 |
662 | [convolutional]
663 | batch_normalize=1
664 | filters=8
665 | size=1
666 | stride=1
667 | pad=1
668 | activation=leaky
669 |
670 | [convolutional]
671 | batch_normalize=1
672 | filters=8
673 | size=3
674 | stride=1
675 | pad=1
676 | activation=leaky
677 |
678 | [route]
679 | layers=-1,-9
680 |
681 | [convolutional]
682 | batch_normalize=1
683 | filters=40
684 | size=1
685 | stride=1
686 | pad=1
687 | activation=leaky
688 |
689 | [convolutional]
690 | batch_normalize=1
691 | filters=512
692 | size=1
693 | stride=1
694 | pad=1
695 | activation=leaky
696 |
697 | [upsample]
698 | stride=2
699 |
700 | [route]
701 | layers=-1,-23
702 |
703 | [convolutional]
704 | batch_normalize=1
705 | filters=48
706 | size=1
707 | stride=1
708 | pad=1
709 | activation=leaky
710 |
711 | [route]
712 | layers=-2
713 |
714 | [convolutional]
715 | batch_normalize=1
716 | filters=80
717 | size=1
718 | stride=1
719 | pad=1
720 | activation=leaky
721 |
722 | [convolutional]
723 | batch_normalize=1
724 | filters=104
725 | size=1
726 | stride=1
727 | pad=1
728 | activation=leaky
729 |
730 | [convolutional]
731 | batch_normalize=1
732 | filters=128
733 | size=3
734 | stride=1
735 | pad=1
736 | activation=leaky
737 |
738 | [convolutional]
739 | batch_normalize=1
740 | filters=112
741 | size=1
742 | stride=1
743 | pad=1
744 | activation=leaky
745 |
746 | [convolutional]
747 | batch_normalize=1
748 | filters=112
749 | size=3
750 | stride=1
751 | pad=1
752 | activation=leaky
753 |
754 | [convolutional]
755 | batch_normalize=1
756 | filters=96
757 | size=1
758 | stride=1
759 | pad=1
760 | activation=leaky
761 |
762 | [convolutional]
763 | batch_normalize=1
764 | filters=104
765 | size=3
766 | stride=1
767 | pad=1
768 | activation=leaky
769 |
770 | [route]
771 | layers=-1,-9
772 |
773 | [convolutional]
774 | batch_normalize=1
775 | filters=88
776 | size=1
777 | stride=1
778 | pad=1
779 | activation=leaky
780 |
781 | [convolutional]
782 | batch_normalize=1
783 | filters=256
784 | size=1
785 | stride=1
786 | pad=1
787 | activation=leaky
788 |
789 | [upsample]
790 | stride=2
791 |
792 | [route]
793 | layers=-1,-70
794 |
795 | [convolutional]
796 | batch_normalize=1
797 | filters=32
798 | size=1
799 | stride=1
800 | pad=1
801 | activation=leaky
802 |
803 | [route]
804 | layers=-2
805 |
806 | [convolutional]
807 | batch_normalize=1
808 | filters=72
809 | size=1
810 | stride=1
811 | pad=1
812 | activation=leaky
813 |
814 | [convolutional]
815 | batch_normalize=1
816 | filters=72
817 | size=1
818 | stride=1
819 | pad=1
820 | activation=leaky
821 |
822 | [convolutional]
823 | batch_normalize=1
824 | filters=96
825 | size=3
826 | stride=1
827 | pad=1
828 | activation=leaky
829 |
830 | [convolutional]
831 | batch_normalize=1
832 | filters=88
833 | size=1
834 | stride=1
835 | pad=1
836 | activation=leaky
837 |
838 | [convolutional]
839 | batch_normalize=1
840 | filters=88
841 | size=3
842 | stride=1
843 | pad=1
844 | activation=leaky
845 |
846 | [convolutional]
847 | batch_normalize=1
848 | filters=80
849 | size=1
850 | stride=1
851 | pad=1
852 | activation=leaky
853 |
854 | [convolutional]
855 | batch_normalize=1
856 | filters=96
857 | size=3
858 | stride=1
859 | pad=1
860 | activation=leaky
861 |
862 | [route]
863 | layers=-1,-9
864 |
865 | [convolutional]
866 | batch_normalize=1
867 | filters=96
868 | size=1
869 | stride=1
870 | pad=1
871 | activation=leaky
872 |
873 | [convolutional]
874 | size=1
875 | stride=1
876 | pad=1
877 | filters=24
878 | activation=linear
879 |
880 | [yolo]
881 | mask=0,1,2
882 | anchors=39,38, 49,67, 74,49, 74,86, 113,71, 97,119, 163,108, 134,155, 210,199
883 | classes=3
884 | num=9
885 | jitter=.3
886 | ignore_thresh=.7
887 | truth_thresh=1
888 | scale_x_y=1.2
889 | iou_thresh=0.213
890 | cls_normalizer=1.0
891 | iou_normalizer=0.07
892 | iou_loss=ciou
893 | nms_kind=greedynms
894 | beta_nms=0.6
895 |
896 | [route]
897 | layers=-3
898 |
899 | [convolutional]
900 | batch_normalize=1
901 | filters=72
902 | size=3
903 | stride=2
904 | pad=1
905 | activation=leaky
906 |
907 | [route]
908 | layers=-1,-18
909 |
910 | [convolutional]
911 | batch_normalize=1
912 | filters=64
913 | size=1
914 | stride=1
915 | pad=1
916 | activation=leaky
917 |
918 | [route]
919 | layers=-2
920 |
921 | [convolutional]
922 | batch_normalize=1
923 | filters=56
924 | size=1
925 | stride=1
926 | pad=1
927 | activation=leaky
928 |
929 | [convolutional]
930 | batch_normalize=1
931 | filters=48
932 | size=1
933 | stride=1
934 | pad=1
935 | activation=leaky
936 |
937 | [convolutional]
938 | batch_normalize=1
939 | filters=64
940 | size=3
941 | stride=1
942 | pad=1
943 | activation=leaky
944 |
945 | [convolutional]
946 | batch_normalize=1
947 | filters=48
948 | size=1
949 | stride=1
950 | pad=1
951 | activation=leaky
952 |
953 | [convolutional]
954 | batch_normalize=1
955 | filters=64
956 | size=3
957 | stride=1
958 | pad=1
959 | activation=leaky
960 |
961 | [convolutional]
962 | batch_normalize=1
963 | filters=64
964 | size=1
965 | stride=1
966 | pad=1
967 | activation=leaky
968 |
969 | [convolutional]
970 | batch_normalize=1
971 | filters=80
972 | size=3
973 | stride=1
974 | pad=1
975 | activation=leaky
976 |
977 | [route]
978 | layers=-1,-9
979 |
980 | [convolutional]
981 | batch_normalize=1
982 | filters=104
983 | size=1
984 | stride=1
985 | pad=1
986 | activation=leaky
987 |
988 | [convolutional]
989 | size=1
990 | stride=1
991 | pad=1
992 | filters=24
993 | activation=linear
994 |
995 | [yolo]
996 | mask=3,4,5
997 | anchors=39,38, 49,67, 74,49, 74,86, 113,71, 97,119, 163,108, 134,155, 210,199
998 | classes=3
999 | num=9
1000 | jitter=.3
1001 | ignore_thresh=.7
1002 | truth_thresh=1
1003 | scale_x_y=1.2
1004 | iou_thresh=0.213
1005 | cls_normalizer=1.0
1006 | iou_normalizer=0.07
1007 | iou_loss=ciou
1008 | nms_kind=greedynms
1009 | beta_nms=0.6
1010 |
1011 | [route]
1012 | layers=-3
1013 |
1014 | [convolutional]
1015 | batch_normalize=1
1016 | filters=136
1017 | size=3
1018 | stride=2
1019 | pad=1
1020 | activation=leaky
1021 |
1022 | [route]
1023 | layers=-1,-48
1024 |
1025 | [convolutional]
1026 | batch_normalize=1
1027 | filters=64
1028 | size=1
1029 | stride=1
1030 | pad=1
1031 | activation=leaky
1032 |
1033 | [route]
1034 | layers=-2
1035 |
1036 | [convolutional]
1037 | batch_normalize=1
1038 | filters=8
1039 | size=1
1040 | stride=1
1041 | pad=1
1042 | activation=leaky
1043 |
1044 | [convolutional]
1045 | batch_normalize=1
1046 | filters=8
1047 | size=1
1048 | stride=1
1049 | pad=1
1050 | activation=leaky
1051 |
1052 | [convolutional]
1053 | batch_normalize=1
1054 | filters=8
1055 | size=3
1056 | stride=1
1057 | pad=1
1058 | activation=leaky
1059 |
1060 | [convolutional]
1061 | batch_normalize=1
1062 | filters=8
1063 | size=1
1064 | stride=1
1065 | pad=1
1066 | activation=leaky
1067 |
1068 | [convolutional]
1069 | batch_normalize=1
1070 | filters=8
1071 | size=3
1072 | stride=1
1073 | pad=1
1074 | activation=leaky
1075 |
1076 | [convolutional]
1077 | batch_normalize=1
1078 | filters=8
1079 | size=1
1080 | stride=1
1081 | pad=1
1082 | activation=leaky
1083 |
1084 | [convolutional]
1085 | batch_normalize=1
1086 | filters=8
1087 | size=3
1088 | stride=1
1089 | pad=1
1090 | activation=leaky
1091 |
1092 | [route]
1093 | layers=-1,-9
1094 |
1095 | [convolutional]
1096 | batch_normalize=1
1097 | filters=72
1098 | size=1
1099 | stride=1
1100 | pad=1
1101 | activation=leaky
1102 |
1103 | [convolutional]
1104 | size=1
1105 | stride=1
1106 | pad=1
1107 | filters=24
1108 | activation=linear
1109 |
1110 | [yolo]
1111 | mask=6,7,8
1112 | anchors=39,38, 49,67, 74,49, 74,86, 113,71, 97,119, 163,108, 134,155, 210,199
1113 | classes=3
1114 | num=9
1115 | jitter=.3
1116 | ignore_thresh=.7
1117 | truth_thresh=1
1118 | scale_x_y=1.2
1119 | iou_thresh=0.213
1120 | cls_normalizer=1.0
1121 | iou_normalizer=0.07
1122 | iou_loss=ciou
1123 | nms_kind=greedynms
1124 | beta_nms=0.6
1125 |
1126 |
--------------------------------------------------------------------------------
/cfg/0514/yolov5s_v4.cfg:
--------------------------------------------------------------------------------
1 | [net]
2 | # Testing
3 | #batch=1
4 | #subdivisions=1
5 | # Training
6 | batch=64
7 | subdivisions=8
8 | width=608
9 | height=608
10 | channels=3
11 | momentum=0.949
12 | decay=0.0005
13 | angle=0
14 | saturation = 1.5
15 | exposure = 1.5
16 | hue=.1
17 |
18 | learning_rate=0.00261
19 | burn_in=1000
20 | max_batches = 500500
21 | policy=steps
22 | steps=400000,450000
23 | scales=.1,.1
24 |
25 | #cutmix=1
26 | mosaic=1
27 |
28 | #:104x104 54:52x52 85:26x26 104:13x13 for 416
29 | [focus]
30 | filters=12
31 |
32 | [convolutional]
33 | batch_normalize=1
34 | filters=32
35 | size=3
36 | stride=1
37 | pad=1
38 | activation=SiLU
39 |
40 | # Downsample
41 | [convolutional]
42 | batch_normalize=1
43 | filters=64
44 | size=3
45 | stride=2
46 | pad=1
47 | activation=SiLU
48 |
49 | #C3
50 | [convolutional]
51 | batch_normalize=1
52 | filters=32
53 | size=1
54 | stride=1
55 | pad=1
56 | activation=SiLU
57 |
58 | [route]
59 | layers = -2
60 |
61 | [convolutional]
62 | batch_normalize=1
63 | filters=32
64 | size=1
65 | stride=1
66 | pad=1
67 | activation=SiLU
68 |
69 | [convolutional]
70 | batch_normalize=1
71 | filters=32
72 | size=1
73 | stride=1
74 | pad=1
75 | activation=SiLU
76 |
77 | [convolutional]
78 | batch_normalize=1
79 | filters=32
80 | size=3
81 | stride=1
82 | pad=1
83 | activation=SiLU
84 |
85 | [shortcut]
86 | from=-3
87 | activation=linear
88 |
89 | [route]
90 | layers = -1,-6
91 |
92 | [convolutional]
93 | batch_normalize=1
94 | filters=64
95 | size=1
96 | stride=1
97 | pad=1
98 | activation=SiLU
99 |
100 | # Downsample
101 | [convolutional]
102 | batch_normalize=1
103 | filters=128
104 | size=3
105 | stride=2
106 | pad=1
107 | activation=SiLU
108 |
109 | #C3
110 | [convolutional]
111 | batch_normalize=1
112 | filters=64
113 | size=1
114 | stride=1
115 | pad=1
116 | activation=SiLU
117 |
118 | [route]
119 | layers = -2
120 |
121 | [convolutional]
122 | batch_normalize=1
123 | filters=64
124 | size=1
125 | stride=1
126 | pad=1
127 | activation=SiLU
128 |
129 | [convolutional]
130 | batch_normalize=1
131 | filters=64
132 | size=1
133 | stride=1
134 | pad=1
135 | activation=SiLU
136 |
137 | [convolutional]
138 | batch_normalize=1
139 | filters=64
140 | size=3
141 | stride=1
142 | pad=1
143 | activation=SiLU
144 |
145 | [shortcut]
146 | from=-3
147 | activation=linear
148 |
149 | [convolutional]
150 | batch_normalize=1
151 | filters=64
152 | size=1
153 | stride=1
154 | pad=1
155 | activation=SiLU
156 |
157 | [convolutional]
158 | batch_normalize=1
159 | filters=64
160 | size=3
161 | stride=1
162 | pad=1
163 | activation=SiLU
164 |
165 | [shortcut]
166 | from=-3
167 | activation=linear
168 |
169 | [convolutional]
170 | batch_normalize=1
171 | filters=64
172 | size=1
173 | stride=1
174 | pad=1
175 | activation=SiLU
176 |
177 | [convolutional]
178 | batch_normalize=1
179 | filters=64
180 | size=3
181 | stride=1
182 | pad=1
183 | activation=SiLU
184 |
185 | [shortcut]
186 | from=-3
187 | activation=linear
188 |
189 | [route]
190 | layers = -1,-12
191 |
192 | [convolutional]
193 | batch_normalize=1
194 | filters=128
195 | size=1
196 | stride=1
197 | pad=1
198 | activation=SiLU
199 |
200 | # Downsample
201 | [convolutional]
202 | batch_normalize=1
203 | filters=256
204 | size=3
205 | stride=2
206 | pad=1
207 | activation=SiLU
208 |
209 | #C3
210 | [convolutional]
211 | batch_normalize=1
212 | filters=128
213 | size=1
214 | stride=1
215 | pad=1
216 | activation=SiLU
217 |
218 | [route]
219 | layers = -2
220 |
221 | [convolutional]
222 | batch_normalize=1
223 | filters=128
224 | size=1
225 | stride=1
226 | pad=1
227 | activation=SiLU
228 |
229 | [convolutional]
230 | batch_normalize=1
231 | filters=128
232 | size=1
233 | stride=1
234 | pad=1
235 | activation=SiLU
236 |
237 | [convolutional]
238 | batch_normalize=1
239 | filters=128
240 | size=3
241 | stride=1
242 | pad=1
243 | activation=SiLU
244 |
245 | [shortcut]
246 | from=-3
247 | activation=linear
248 |
249 | [convolutional]
250 | batch_normalize=1
251 | filters=128
252 | size=1
253 | stride=1
254 | pad=1
255 | activation=SiLU
256 |
257 | [convolutional]
258 | batch_normalize=1
259 | filters=128
260 | size=3
261 | stride=1
262 | pad=1
263 | activation=SiLU
264 |
265 | [shortcut]
266 | from=-3
267 | activation=linear
268 |
269 | [convolutional]
270 | batch_normalize=1
271 | filters=128
272 | size=1
273 | stride=1
274 | pad=1
275 | activation=SiLU
276 |
277 | [convolutional]
278 | batch_normalize=1
279 | filters=128
280 | size=3
281 | stride=1
282 | pad=1
283 | activation=SiLU
284 |
285 | [shortcut]
286 | from=-3
287 | activation=linear
288 |
289 | [route]
290 | layers = -1,-12
291 |
292 | [convolutional]
293 | batch_normalize=1
294 | filters=256
295 | size=1
296 | stride=1
297 | pad=1
298 | activation=SiLU
299 |
300 | # Downsample
301 | [convolutional]
302 | batch_normalize=1
303 | filters=512
304 | size=3
305 | stride=2
306 | pad=1
307 | activation=SiLU
308 |
309 | [convolutional]
310 | batch_normalize=1
311 | filters=256
312 | size=1
313 | stride=1
314 | pad=1
315 | activation=SiLU
316 |
317 | ### SPP ###
318 | [maxpool]
319 | stride=1
320 | size=5
321 |
322 | [route]
323 | layers=-2
324 |
325 | [maxpool]
326 | stride=1
327 | size=9
328 |
329 | [route]
330 | layers=-4
331 |
332 | [maxpool]
333 | stride=1
334 | size=13
335 |
336 | [route]
337 | ###layers=-1,-3,-5,-6
338 | layers=-6,-5,-3,-1
339 | ### End SPP ###
340 |
341 | [convolutional]
342 | batch_normalize=1
343 | filters=512
344 | size=1
345 | stride=1
346 | pad=1
347 | activation=SiLU
348 |
349 | #C3
350 | [convolutional]
351 | batch_normalize=1
352 | filters=256
353 | size=1
354 | stride=1
355 | pad=1
356 | activation=SiLU
357 |
358 | [route]
359 | layers = -2
360 |
361 | [convolutional]
362 | batch_normalize=1
363 | filters=256
364 | size=1
365 | stride=1
366 | pad=1
367 | activation=SiLU
368 |
369 | [convolutional]
370 | batch_normalize=1
371 | filters=256
372 | size=1
373 | stride=1
374 | pad=1
375 | activation=SiLU
376 |
377 | [convolutional]
378 | batch_normalize=1
379 | filters=256
380 | size=3
381 | stride=1
382 | pad=1
383 | activation=SiLU
384 |
385 | [route]
386 | layers = -1,-5
387 |
388 | [convolutional]
389 | batch_normalize=1
390 | filters=512
391 | size=1
392 | stride=1
393 | pad=1
394 | activation=SiLU
395 |
396 | [convolutional]
397 | batch_normalize=1
398 | filters=256
399 | size=1
400 | stride=1
401 | pad=1
402 | activation=SiLU
403 |
404 | [upsample]
405 | stride=2
406 |
407 | [route]
408 | layers = -1,-19
409 |
410 | #C3
411 | [convolutional]
412 | batch_normalize=1
413 | filters=128
414 | size=1
415 | stride=1
416 | pad=1
417 | activation=SiLU
418 |
419 | [route]
420 | layers = -2
421 |
422 | [convolutional]
423 | batch_normalize=1
424 | filters=128
425 | size=1
426 | stride=1
427 | pad=1
428 | activation=SiLU
429 |
430 | [convolutional]
431 | batch_normalize=1
432 | filters=128
433 | size=1
434 | stride=1
435 | pad=1
436 | activation=SiLU
437 |
438 | [convolutional]
439 | batch_normalize=1
440 | filters=128
441 | size=3
442 | stride=1
443 | pad=1
444 | activation=SiLU
445 |
446 | [route]
447 | layers = -1,-5
448 |
449 | [convolutional]
450 | batch_normalize=1
451 | filters=256
452 | size=1
453 | stride=1
454 | pad=1
455 | activation=SiLU
456 |
457 | [convolutional]
458 | batch_normalize=1
459 | filters=128
460 | size=1
461 | stride=1
462 | pad=1
463 | activation=SiLU
464 |
465 | [upsample]
466 | stride=2
467 |
468 | [route]
469 | layers = -1,-44
470 |
471 | #C3
472 | [convolutional]
473 | batch_normalize=1
474 | filters=64
475 | size=1
476 | stride=1
477 | pad=1
478 | activation=SiLU
479 |
480 | [route]
481 | layers = -2
482 |
483 | [convolutional]
484 | batch_normalize=1
485 | filters=64
486 | size=1
487 | stride=1
488 | pad=1
489 | activation=SiLU
490 |
491 | [convolutional]
492 | batch_normalize=1
493 | filters=64
494 | size=1
495 | stride=1
496 | pad=1
497 | activation=SiLU
498 |
499 | [convolutional]
500 | batch_normalize=1
501 | filters=64
502 | size=3
503 | stride=1
504 | pad=1
505 | activation=SiLU
506 |
507 | [route]
508 | layers = -1,-5
509 |
510 | [convolutional]
511 | batch_normalize=1
512 | filters=128
513 | size=1
514 | stride=1
515 | pad=1
516 | activation=SiLU
517 |
518 | ######################
519 | [convolutional]
520 | size=1
521 | stride=1
522 | pad=1
523 | filters=255
524 | activation=linear
525 |
526 | [yolo]
527 | mask = 0,1,2
528 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
529 | classes=80
530 | num=9
531 | jitter=.3
532 | ignore_thresh = .7
533 | truth_thresh = 1
534 | scale_x_y = 1.2
535 | iou_thresh=0.213
536 | cls_normalizer=1.0
537 | iou_normalizer=0.07
538 | iou_loss=ciou
539 | nms_kind=greedynms
540 | beta_nms=0.6
541 |
542 | [route]
543 | layers = -3
544 |
545 | [convolutional]
546 | batch_normalize=1
547 | filters=128
548 | size=3
549 | stride=2
550 | pad=1
551 | activation=SiLU
552 |
553 | [route]
554 | layers = -1,-14
555 |
556 | #C3
557 | [convolutional]
558 | batch_normalize=1
559 | filters=128
560 | size=1
561 | stride=1
562 | pad=1
563 | activation=SiLU
564 |
565 | [route]
566 | layers = -2
567 |
568 | [convolutional]
569 | batch_normalize=1
570 | filters=128
571 | size=1
572 | stride=1
573 | pad=1
574 | activation=SiLU
575 |
576 | [convolutional]
577 | batch_normalize=1
578 | filters=128
579 | size=1
580 | stride=1
581 | pad=1
582 | activation=SiLU
583 |
584 | [convolutional]
585 | batch_normalize=1
586 | filters=128
587 | size=3
588 | stride=1
589 | pad=1
590 | activation=SiLU
591 |
592 | [route]
593 | layers = -1,-5
594 |
595 | [convolutional]
596 | batch_normalize=1
597 | filters=256
598 | size=1
599 | stride=1
600 | pad=1
601 | activation=SiLU
602 |
603 | ######################
604 | [convolutional]
605 | size=1
606 | stride=1
607 | pad=1
608 | filters=255
609 | activation=linear
610 |
611 | [yolo]
612 | mask = 3,4,5
613 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
614 | classes=80
615 | num=9
616 | jitter=.3
617 | ignore_thresh = .7
618 | truth_thresh = 1
619 | scale_x_y = 1.2
620 | iou_thresh=0.213
621 | cls_normalizer=1.0
622 | iou_normalizer=0.07
623 | iou_loss=ciou
624 | nms_kind=greedynms
625 | beta_nms=0.6
626 |
627 | [route]
628 | layers = -3
629 |
630 | [convolutional]
631 | batch_normalize=1
632 | filters=256
633 | size=3
634 | stride=2
635 | pad=1
636 | activation=SiLU
637 |
638 | [route]
639 | layers = -1,-36
640 |
641 | #C3
642 | [convolutional]
643 | batch_normalize=1
644 | filters=256
645 | size=1
646 | stride=1
647 | pad=1
648 | activation=SiLU
649 |
650 | [route]
651 | layers = -2
652 |
653 | [convolutional]
654 | batch_normalize=1
655 | filters=256
656 | size=1
657 | stride=1
658 | pad=1
659 | activation=SiLU
660 |
661 | [convolutional]
662 | batch_normalize=1
663 | filters=256
664 | size=1
665 | stride=1
666 | pad=1
667 | activation=SiLU
668 |
669 | [convolutional]
670 | batch_normalize=1
671 | filters=256
672 | size=3
673 | stride=1
674 | pad=1
675 | activation=SiLU
676 |
677 | [route]
678 | layers = -1,-5
679 |
680 | [convolutional]
681 | batch_normalize=1
682 | filters=512
683 | size=1
684 | stride=1
685 | pad=1
686 | activation=SiLU
687 |
688 | ######################
689 | [convolutional]
690 | size=1
691 | stride=1
692 | pad=1
693 | filters=255
694 | activation=linear
695 |
696 | [yolo]
697 | mask = 6,7,8
698 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
699 | classes=80
700 | num=9
701 | jitter=.3
702 | ignore_thresh = .7
703 | truth_thresh = 1
704 | scale_x_y = 1.2
705 | iou_thresh=0.213
706 | cls_normalizer=1.0
707 | iou_normalizer=0.07
708 | iou_loss=ciou
709 | nms_kind=greedynms
710 | beta_nms=0.6
711 |
712 |
713 |
--------------------------------------------------------------------------------
/cfg/0514/yolov5s_v4_hand.cfg:
--------------------------------------------------------------------------------
1 | [net]
2 | # Testing
3 | #batch=1
4 | #subdivisions=1
5 | # Training
6 | batch=64
7 | subdivisions=8
8 | width=608
9 | height=608
10 | channels=3
11 | momentum=0.949
12 | decay=0.0005
13 | angle=0
14 | saturation = 1.5
15 | exposure = 1.5
16 | hue=.1
17 |
18 | learning_rate=0.00261
19 | burn_in=1000
20 | max_batches = 500500
21 | policy=steps
22 | steps=400000,450000
23 | scales=.1,.1
24 |
25 | #cutmix=1
26 | mosaic=1
27 |
28 | #:104x104 54:52x52 85:26x26 104:13x13 for 416
29 | [focus]
30 | filters=12
31 |
32 | [convolutional]
33 | batch_normalize=1
34 | filters=32
35 | size=3
36 | stride=1
37 | pad=1
38 | activation=SiLU
39 |
40 | # Downsample
41 | [convolutional]
42 | batch_normalize=1
43 | filters=64
44 | size=3
45 | stride=2
46 | pad=1
47 | activation=SiLU
48 |
49 | #C3
50 | [convolutional]
51 | batch_normalize=1
52 | filters=32
53 | size=1
54 | stride=1
55 | pad=1
56 | activation=SiLU
57 |
58 | [route]
59 | layers = -2
60 |
61 | [convolutional]
62 | batch_normalize=1
63 | filters=32
64 | size=1
65 | stride=1
66 | pad=1
67 | activation=SiLU
68 |
69 | [convolutional]
70 | batch_normalize=1
71 | filters=32
72 | size=1
73 | stride=1
74 | pad=1
75 | activation=SiLU
76 |
77 | [convolutional]
78 | batch_normalize=1
79 | filters=32
80 | size=3
81 | stride=1
82 | pad=1
83 | activation=SiLU
84 |
85 | [shortcut]
86 | from=-3
87 | activation=linear
88 |
89 | [route]
90 | layers = -1,-6
91 |
92 | [convolutional]
93 | batch_normalize=1
94 | filters=64
95 | size=1
96 | stride=1
97 | pad=1
98 | activation=SiLU
99 |
100 | # Downsample
101 | [convolutional]
102 | batch_normalize=1
103 | filters=128
104 | size=3
105 | stride=2
106 | pad=1
107 | activation=SiLU
108 |
109 | #C3
110 | [convolutional]
111 | batch_normalize=1
112 | filters=64
113 | size=1
114 | stride=1
115 | pad=1
116 | activation=SiLU
117 |
118 | [route]
119 | layers = -2
120 |
121 | [convolutional]
122 | batch_normalize=1
123 | filters=64
124 | size=1
125 | stride=1
126 | pad=1
127 | activation=SiLU
128 |
129 | [convolutional]
130 | batch_normalize=1
131 | filters=64
132 | size=1
133 | stride=1
134 | pad=1
135 | activation=SiLU
136 |
137 | [convolutional]
138 | batch_normalize=1
139 | filters=64
140 | size=3
141 | stride=1
142 | pad=1
143 | activation=SiLU
144 |
145 | [shortcut]
146 | from=-3
147 | activation=linear
148 |
149 | [convolutional]
150 | batch_normalize=1
151 | filters=64
152 | size=1
153 | stride=1
154 | pad=1
155 | activation=SiLU
156 |
157 | [convolutional]
158 | batch_normalize=1
159 | filters=64
160 | size=3
161 | stride=1
162 | pad=1
163 | activation=SiLU
164 |
165 | [shortcut]
166 | from=-3
167 | activation=linear
168 |
169 | [convolutional]
170 | batch_normalize=1
171 | filters=64
172 | size=1
173 | stride=1
174 | pad=1
175 | activation=SiLU
176 |
177 | [convolutional]
178 | batch_normalize=1
179 | filters=64
180 | size=3
181 | stride=1
182 | pad=1
183 | activation=SiLU
184 |
185 | [shortcut]
186 | from=-3
187 | activation=linear
188 |
189 | [route]
190 | layers = -1,-12
191 |
192 | [convolutional]
193 | batch_normalize=1
194 | filters=128
195 | size=1
196 | stride=1
197 | pad=1
198 | activation=SiLU
199 |
200 | # Downsample
201 | [convolutional]
202 | batch_normalize=1
203 | filters=256
204 | size=3
205 | stride=2
206 | pad=1
207 | activation=SiLU
208 |
209 | #C3
210 | [convolutional]
211 | batch_normalize=1
212 | filters=128
213 | size=1
214 | stride=1
215 | pad=1
216 | activation=SiLU
217 |
218 | [route]
219 | layers = -2
220 |
221 | [convolutional]
222 | batch_normalize=1
223 | filters=128
224 | size=1
225 | stride=1
226 | pad=1
227 | activation=SiLU
228 |
229 | [convolutional]
230 | batch_normalize=1
231 | filters=128
232 | size=1
233 | stride=1
234 | pad=1
235 | activation=SiLU
236 |
237 | [convolutional]
238 | batch_normalize=1
239 | filters=128
240 | size=3
241 | stride=1
242 | pad=1
243 | activation=SiLU
244 |
245 | [shortcut]
246 | from=-3
247 | activation=linear
248 |
249 | [convolutional]
250 | batch_normalize=1
251 | filters=128
252 | size=1
253 | stride=1
254 | pad=1
255 | activation=SiLU
256 |
257 | [convolutional]
258 | batch_normalize=1
259 | filters=128
260 | size=3
261 | stride=1
262 | pad=1
263 | activation=SiLU
264 |
265 | [shortcut]
266 | from=-3
267 | activation=linear
268 |
269 | [convolutional]
270 | batch_normalize=1
271 | filters=128
272 | size=1
273 | stride=1
274 | pad=1
275 | activation=SiLU
276 |
277 | [convolutional]
278 | batch_normalize=1
279 | filters=128
280 | size=3
281 | stride=1
282 | pad=1
283 | activation=SiLU
284 |
285 | [shortcut]
286 | from=-3
287 | activation=linear
288 |
289 | [route]
290 | layers = -1,-12
291 |
292 | [convolutional]
293 | batch_normalize=1
294 | filters=256
295 | size=1
296 | stride=1
297 | pad=1
298 | activation=SiLU
299 |
300 | # Downsample
301 | [convolutional]
302 | batch_normalize=1
303 | filters=512
304 | size=3
305 | stride=2
306 | pad=1
307 | activation=SiLU
308 |
309 | [convolutional]
310 | batch_normalize=1
311 | filters=256
312 | size=1
313 | stride=1
314 | pad=1
315 | activation=SiLU
316 |
317 | ### SPP ###
318 | [maxpool]
319 | stride=1
320 | size=5
321 |
322 | [route]
323 | layers=-2
324 |
325 | [maxpool]
326 | stride=1
327 | size=9
328 |
329 | [route]
330 | layers=-4
331 |
332 | [maxpool]
333 | stride=1
334 | size=13
335 |
336 | [route]
337 | ###layers=-1,-3,-5,-6
338 | layers=-6,-5,-3,-1
339 | ### End SPP ###
340 |
341 | [convolutional]
342 | batch_normalize=1
343 | filters=512
344 | size=1
345 | stride=1
346 | pad=1
347 | activation=SiLU
348 |
349 | #C3
350 | [convolutional]
351 | batch_normalize=1
352 | filters=256
353 | size=1
354 | stride=1
355 | pad=1
356 | activation=SiLU
357 |
358 | [route]
359 | layers = -2
360 |
361 | [convolutional]
362 | batch_normalize=1
363 | filters=256
364 | size=1
365 | stride=1
366 | pad=1
367 | activation=SiLU
368 |
369 | [convolutional]
370 | batch_normalize=1
371 | filters=256
372 | size=1
373 | stride=1
374 | pad=1
375 | activation=SiLU
376 |
377 | [convolutional]
378 | batch_normalize=1
379 | filters=256
380 | size=3
381 | stride=1
382 | pad=1
383 | activation=SiLU
384 |
385 | [route]
386 | layers = -1,-5
387 |
388 | [convolutional]
389 | batch_normalize=1
390 | filters=512
391 | size=1
392 | stride=1
393 | pad=1
394 | activation=SiLU
395 |
396 | [convolutional]
397 | batch_normalize=1
398 | filters=256
399 | size=1
400 | stride=1
401 | pad=1
402 | activation=SiLU
403 |
404 | [upsample]
405 | stride=2
406 |
407 | [route]
408 | layers = -1,-19
409 |
410 | #C3
411 | [convolutional]
412 | batch_normalize=1
413 | filters=128
414 | size=1
415 | stride=1
416 | pad=1
417 | activation=SiLU
418 |
419 | [route]
420 | layers = -2
421 |
422 | [convolutional]
423 | batch_normalize=1
424 | filters=128
425 | size=1
426 | stride=1
427 | pad=1
428 | activation=SiLU
429 |
430 | [convolutional]
431 | batch_normalize=1
432 | filters=128
433 | size=1
434 | stride=1
435 | pad=1
436 | activation=SiLU
437 |
438 | [convolutional]
439 | batch_normalize=1
440 | filters=128
441 | size=3
442 | stride=1
443 | pad=1
444 | activation=SiLU
445 |
446 | [route]
447 | layers = -1,-5
448 |
449 | [convolutional]
450 | batch_normalize=1
451 | filters=256
452 | size=1
453 | stride=1
454 | pad=1
455 | activation=SiLU
456 |
457 | [convolutional]
458 | batch_normalize=1
459 | filters=128
460 | size=1
461 | stride=1
462 | pad=1
463 | activation=SiLU
464 |
465 | [upsample]
466 | stride=2
467 |
468 | [route]
469 | layers = -1,-44
470 |
471 | #C3
472 | [convolutional]
473 | batch_normalize=1
474 | filters=64
475 | size=1
476 | stride=1
477 | pad=1
478 | activation=SiLU
479 |
480 | [route]
481 | layers = -2
482 |
483 | [convolutional]
484 | batch_normalize=1
485 | filters=64
486 | size=1
487 | stride=1
488 | pad=1
489 | activation=SiLU
490 |
491 | [convolutional]
492 | batch_normalize=1
493 | filters=64
494 | size=1
495 | stride=1
496 | pad=1
497 | activation=SiLU
498 |
499 | [convolutional]
500 | batch_normalize=1
501 | filters=64
502 | size=3
503 | stride=1
504 | pad=1
505 | activation=SiLU
506 |
507 | [route]
508 | layers = -1,-5
509 |
510 | [convolutional]
511 | batch_normalize=1
512 | filters=128
513 | size=1
514 | stride=1
515 | pad=1
516 | activation=SiLU
517 |
518 | ######################
519 | [convolutional]
520 | size=1
521 | stride=1
522 | pad=1
523 | filters=18
524 | activation=linear
525 |
526 | [yolo]
527 | mask = 0,1,2
528 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
529 | classes=1
530 | num=9
531 | jitter=.3
532 | ignore_thresh = .7
533 | truth_thresh = 1
534 | scale_x_y = 1.2
535 | iou_thresh=0.213
536 | cls_normalizer=1.0
537 | iou_normalizer=0.07
538 | iou_loss=ciou
539 | nms_kind=greedynms
540 | beta_nms=0.6
541 |
542 | [route]
543 | layers = -3
544 |
545 | [convolutional]
546 | batch_normalize=1
547 | filters=128
548 | size=3
549 | stride=2
550 | pad=1
551 | activation=SiLU
552 |
553 | [route]
554 | layers = -1,-14
555 |
556 | #C3
557 | [convolutional]
558 | batch_normalize=1
559 | filters=128
560 | size=1
561 | stride=1
562 | pad=1
563 | activation=SiLU
564 |
565 | [route]
566 | layers = -2
567 |
568 | [convolutional]
569 | batch_normalize=1
570 | filters=128
571 | size=1
572 | stride=1
573 | pad=1
574 | activation=SiLU
575 |
576 | [convolutional]
577 | batch_normalize=1
578 | filters=128
579 | size=1
580 | stride=1
581 | pad=1
582 | activation=SiLU
583 |
584 | [convolutional]
585 | batch_normalize=1
586 | filters=128
587 | size=3
588 | stride=1
589 | pad=1
590 | activation=SiLU
591 |
592 | [route]
593 | layers = -1,-5
594 |
595 | [convolutional]
596 | batch_normalize=1
597 | filters=256
598 | size=1
599 | stride=1
600 | pad=1
601 | activation=SiLU
602 |
603 | ######################
604 | [convolutional]
605 | size=1
606 | stride=1
607 | pad=1
608 | filters=18
609 | activation=linear
610 |
611 | [yolo]
612 | mask = 3,4,5
613 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
614 | classes=1
615 | num=9
616 | jitter=.3
617 | ignore_thresh = .7
618 | truth_thresh = 1
619 | scale_x_y = 1.2
620 | iou_thresh=0.213
621 | cls_normalizer=1.0
622 | iou_normalizer=0.07
623 | iou_loss=ciou
624 | nms_kind=greedynms
625 | beta_nms=0.6
626 |
627 | [route]
628 | layers = -3
629 |
630 | [convolutional]
631 | batch_normalize=1
632 | filters=256
633 | size=3
634 | stride=2
635 | pad=1
636 | activation=SiLU
637 |
638 | [route]
639 | layers = -1,-36
640 |
641 | #C3
642 | [convolutional]
643 | batch_normalize=1
644 | filters=256
645 | size=1
646 | stride=1
647 | pad=1
648 | activation=SiLU
649 |
650 | [route]
651 | layers = -2
652 |
653 | [convolutional]
654 | batch_normalize=1
655 | filters=256
656 | size=1
657 | stride=1
658 | pad=1
659 | activation=SiLU
660 |
661 | [convolutional]
662 | batch_normalize=1
663 | filters=256
664 | size=1
665 | stride=1
666 | pad=1
667 | activation=SiLU
668 |
669 | [convolutional]
670 | batch_normalize=1
671 | filters=256
672 | size=3
673 | stride=1
674 | pad=1
675 | activation=SiLU
676 |
677 | [route]
678 | layers = -1,-5
679 |
680 | [convolutional]
681 | batch_normalize=1
682 | filters=512
683 | size=1
684 | stride=1
685 | pad=1
686 | activation=SiLU
687 |
688 | ######################
689 | [convolutional]
690 | size=1
691 | stride=1
692 | pad=1
693 | filters=18
694 | activation=linear
695 |
696 | [yolo]
697 | mask = 6,7,8
698 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
699 | classes=1
700 | num=9
701 | jitter=.3
702 | ignore_thresh = .7
703 | truth_thresh = 1
704 | scale_x_y = 1.2
705 | iou_thresh=0.213
706 | cls_normalizer=1.0
707 | iou_normalizer=0.07
708 | iou_loss=ciou
709 | nms_kind=greedynms
710 | beta_nms=0.6
711 |
712 |
713 |
--------------------------------------------------------------------------------
/cfg/last_prune/prune_0.8_keep_0.01_8x_yolov5l_v4.cfg:
--------------------------------------------------------------------------------
1 | [net]
2 | batch=64
3 | subdivisions=8
4 | width=416
5 | height=416
6 | channels=3
7 | momentum=0.949
8 | decay=0.0005
9 | angle=0
10 | saturation=1.5
11 | exposure=1.5
12 | hue=.1
13 | learning_rate=0.00261
14 | burn_in=1000
15 | max_batches=500500
16 | policy=steps
17 | steps=400000,450000
18 | scales=.1,.1
19 | mosaic=1
20 |
21 | [focus]
22 | filters=12
23 |
24 | [convolutional]
25 | batch_normalize=1
26 | filters=40
27 | size=3
28 | stride=1
29 | pad=1
30 | activation=leaky
31 |
32 | [convolutional]
33 | batch_normalize=1
34 | filters=112
35 | size=3
36 | stride=2
37 | pad=1
38 | activation=leaky
39 |
40 | [convolutional]
41 | batch_normalize=1
42 | filters=64
43 | size=1
44 | stride=1
45 | pad=1
46 | activation=leaky
47 |
48 | [route]
49 | layers=-2
50 |
51 | [convolutional]
52 | batch_normalize=1
53 | filters=64
54 | size=1
55 | stride=1
56 | pad=1
57 | activation=leaky
58 |
59 | [convolutional]
60 | batch_normalize=1
61 | filters=64
62 | size=1
63 | stride=1
64 | pad=1
65 | activation=leaky
66 |
67 | [convolutional]
68 | batch_normalize=1
69 | filters=64
70 | size=3
71 | stride=1
72 | pad=1
73 | activation=leaky
74 |
75 | [shortcut]
76 | from=-3
77 | activation=linear
78 |
79 | [convolutional]
80 | batch_normalize=1
81 | filters=64
82 | size=1
83 | stride=1
84 | pad=1
85 | activation=leaky
86 |
87 | [convolutional]
88 | batch_normalize=1
89 | filters=64
90 | size=3
91 | stride=1
92 | pad=1
93 | activation=leaky
94 |
95 | [shortcut]
96 | from=-3
97 | activation=linear
98 |
99 | [convolutional]
100 | batch_normalize=1
101 | filters=64
102 | size=1
103 | stride=1
104 | pad=1
105 | activation=leaky
106 |
107 | [convolutional]
108 | batch_normalize=1
109 | filters=64
110 | size=3
111 | stride=1
112 | pad=1
113 | activation=leaky
114 |
115 | [shortcut]
116 | from=-3
117 | activation=linear
118 |
119 | [route]
120 | layers=-1,-12
121 |
122 | [convolutional]
123 | batch_normalize=1
124 | filters=128
125 | size=1
126 | stride=1
127 | pad=1
128 | activation=leaky
129 |
130 | [convolutional]
131 | batch_normalize=1
132 | filters=248
133 | size=3
134 | stride=2
135 | pad=1
136 | activation=leaky
137 |
138 | [convolutional]
139 | batch_normalize=1
140 | filters=128
141 | size=1
142 | stride=1
143 | pad=1
144 | activation=leaky
145 |
146 | [route]
147 | layers=-2
148 |
149 | [convolutional]
150 | batch_normalize=1
151 | filters=128
152 | size=1
153 | stride=1
154 | pad=1
155 | activation=leaky
156 |
157 | [convolutional]
158 | batch_normalize=1
159 | filters=56
160 | size=1
161 | stride=1
162 | pad=1
163 | activation=leaky
164 |
165 | [convolutional]
166 | batch_normalize=1
167 | filters=128
168 | size=3
169 | stride=1
170 | pad=1
171 | activation=leaky
172 |
173 | [shortcut]
174 | from=-3
175 | activation=linear
176 |
177 | [convolutional]
178 | batch_normalize=1
179 | filters=56
180 | size=1
181 | stride=1
182 | pad=1
183 | activation=leaky
184 |
185 | [convolutional]
186 | batch_normalize=1
187 | filters=128
188 | size=3
189 | stride=1
190 | pad=1
191 | activation=leaky
192 |
193 | [shortcut]
194 | from=-3
195 | activation=linear
196 |
197 | [convolutional]
198 | batch_normalize=1
199 | filters=24
200 | size=1
201 | stride=1
202 | pad=1
203 | activation=leaky
204 |
205 | [convolutional]
206 | batch_normalize=1
207 | filters=128
208 | size=3
209 | stride=1
210 | pad=1
211 | activation=leaky
212 |
213 | [shortcut]
214 | from=-3
215 | activation=linear
216 |
217 | [convolutional]
218 | batch_normalize=1
219 | filters=24
220 | size=1
221 | stride=1
222 | pad=1
223 | activation=leaky
224 |
225 | [convolutional]
226 | batch_normalize=1
227 | filters=128
228 | size=3
229 | stride=1
230 | pad=1
231 | activation=leaky
232 |
233 | [shortcut]
234 | from=-3
235 | activation=linear
236 |
237 | [convolutional]
238 | batch_normalize=1
239 | filters=40
240 | size=1
241 | stride=1
242 | pad=1
243 | activation=leaky
244 |
245 | [convolutional]
246 | batch_normalize=1
247 | filters=128
248 | size=3
249 | stride=1
250 | pad=1
251 | activation=leaky
252 |
253 | [shortcut]
254 | from=-3
255 | activation=linear
256 |
257 | [convolutional]
258 | batch_normalize=1
259 | filters=32
260 | size=1
261 | stride=1
262 | pad=1
263 | activation=leaky
264 |
265 | [convolutional]
266 | batch_normalize=1
267 | filters=128
268 | size=3
269 | stride=1
270 | pad=1
271 | activation=leaky
272 |
273 | [shortcut]
274 | from=-3
275 | activation=linear
276 |
277 | [convolutional]
278 | batch_normalize=1
279 | filters=64
280 | size=1
281 | stride=1
282 | pad=1
283 | activation=leaky
284 |
285 | [convolutional]
286 | batch_normalize=1
287 | filters=128
288 | size=3
289 | stride=1
290 | pad=1
291 | activation=leaky
292 |
293 | [shortcut]
294 | from=-3
295 | activation=linear
296 |
297 | [convolutional]
298 | batch_normalize=1
299 | filters=64
300 | size=1
301 | stride=1
302 | pad=1
303 | activation=leaky
304 |
305 | [convolutional]
306 | batch_normalize=1
307 | filters=128
308 | size=3
309 | stride=1
310 | pad=1
311 | activation=leaky
312 |
313 | [shortcut]
314 | from=-3
315 | activation=linear
316 |
317 | [convolutional]
318 | batch_normalize=1
319 | filters=56
320 | size=1
321 | stride=1
322 | pad=1
323 | activation=leaky
324 |
325 | [convolutional]
326 | batch_normalize=1
327 | filters=128
328 | size=3
329 | stride=1
330 | pad=1
331 | activation=leaky
332 |
333 | [shortcut]
334 | from=-3
335 | activation=linear
336 |
337 | [route]
338 | layers=-1,-30
339 |
340 | [convolutional]
341 | batch_normalize=1
342 | filters=240
343 | size=1
344 | stride=1
345 | pad=1
346 | activation=leaky
347 |
348 | [convolutional]
349 | batch_normalize=1
350 | filters=240
351 | size=3
352 | stride=2
353 | pad=1
354 | activation=leaky
355 |
356 | [convolutional]
357 | batch_normalize=1
358 | filters=96
359 | size=1
360 | stride=1
361 | pad=1
362 | activation=leaky
363 |
364 | [route]
365 | layers=-2
366 |
367 | [convolutional]
368 | batch_normalize=1
369 | filters=245
370 | size=1
371 | stride=1
372 | pad=1
373 | activation=leaky
374 |
375 | [convolutional]
376 | batch_normalize=1
377 | filters=56
378 | size=1
379 | stride=1
380 | pad=1
381 | activation=leaky
382 |
383 | [convolutional]
384 | batch_normalize=1
385 | filters=245
386 | size=3
387 | stride=1
388 | pad=1
389 | activation=leaky
390 |
391 | [shortcut]
392 | from=-3
393 | activation=linear
394 |
395 | [convolutional]
396 | batch_normalize=1
397 | filters=8
398 | size=1
399 | stride=1
400 | pad=1
401 | activation=leaky
402 |
403 | [convolutional]
404 | batch_normalize=1
405 | filters=245
406 | size=3
407 | stride=1
408 | pad=1
409 | activation=leaky
410 |
411 | [shortcut]
412 | from=-3
413 | activation=linear
414 |
415 | [convolutional]
416 | batch_normalize=1
417 | filters=16
418 | size=1
419 | stride=1
420 | pad=1
421 | activation=leaky
422 |
423 | [convolutional]
424 | batch_normalize=1
425 | filters=245
426 | size=3
427 | stride=1
428 | pad=1
429 | activation=leaky
430 |
431 | [shortcut]
432 | from=-3
433 | activation=linear
434 |
435 | [convolutional]
436 | batch_normalize=1
437 | filters=24
438 | size=1
439 | stride=1
440 | pad=1
441 | activation=leaky
442 |
443 | [convolutional]
444 | batch_normalize=1
445 | filters=245
446 | size=3
447 | stride=1
448 | pad=1
449 | activation=leaky
450 |
451 | [shortcut]
452 | from=-3
453 | activation=linear
454 |
455 | [convolutional]
456 | batch_normalize=1
457 | filters=16
458 | size=1
459 | stride=1
460 | pad=1
461 | activation=leaky
462 |
463 | [convolutional]
464 | batch_normalize=1
465 | filters=245
466 | size=3
467 | stride=1
468 | pad=1
469 | activation=leaky
470 |
471 | [shortcut]
472 | from=-3
473 | activation=linear
474 |
475 | [convolutional]
476 | batch_normalize=1
477 | filters=24
478 | size=1
479 | stride=1
480 | pad=1
481 | activation=leaky
482 |
483 | [convolutional]
484 | batch_normalize=1
485 | filters=245
486 | size=3
487 | stride=1
488 | pad=1
489 | activation=leaky
490 |
491 | [shortcut]
492 | from=-3
493 | activation=linear
494 |
495 | [convolutional]
496 | batch_normalize=1
497 | filters=16
498 | size=1
499 | stride=1
500 | pad=1
501 | activation=leaky
502 |
503 | [convolutional]
504 | batch_normalize=1
505 | filters=245
506 | size=3
507 | stride=1
508 | pad=1
509 | activation=leaky
510 |
511 | [shortcut]
512 | from=-3
513 | activation=linear
514 |
515 | [convolutional]
516 | batch_normalize=1
517 | filters=32
518 | size=1
519 | stride=1
520 | pad=1
521 | activation=leaky
522 |
523 | [convolutional]
524 | batch_normalize=1
525 | filters=245
526 | size=3
527 | stride=1
528 | pad=1
529 | activation=leaky
530 |
531 | [shortcut]
532 | from=-3
533 | activation=linear
534 |
535 | [convolutional]
536 | batch_normalize=1
537 | filters=24
538 | size=1
539 | stride=1
540 | pad=1
541 | activation=leaky
542 |
543 | [convolutional]
544 | batch_normalize=1
545 | filters=245
546 | size=3
547 | stride=1
548 | pad=1
549 | activation=leaky
550 |
551 | [shortcut]
552 | from=-3
553 | activation=linear
554 |
555 | [route]
556 | layers=-1,-30
557 |
558 | [convolutional]
559 | batch_normalize=1
560 | filters=144
561 | size=1
562 | stride=1
563 | pad=1
564 | activation=leaky
565 |
566 | [convolutional]
567 | batch_normalize=1
568 | filters=104
569 | size=3
570 | stride=2
571 | pad=1
572 | activation=leaky
573 |
574 | [convolutional]
575 | batch_normalize=1
576 | filters=512
577 | size=1
578 | stride=1
579 | pad=1
580 | activation=leaky
581 |
582 | [maxpool]
583 | stride=1
584 | size=5
585 |
586 | [route]
587 | layers=-2
588 |
589 | [maxpool]
590 | stride=1
591 | size=9
592 |
593 | [route]
594 | layers=-4
595 |
596 | [maxpool]
597 | stride=1
598 | size=13
599 |
600 | [route]
601 | layers=-6,-5,-3,-1
602 |
603 | [convolutional]
604 | batch_normalize=1
605 | filters=56
606 | size=1
607 | stride=1
608 | pad=1
609 | activation=leaky
610 |
611 | [convolutional]
612 | batch_normalize=1
613 | filters=40
614 | size=1
615 | stride=1
616 | pad=1
617 | activation=leaky
618 |
619 | [route]
620 | layers=-2
621 |
622 | [convolutional]
623 | batch_normalize=1
624 | filters=8
625 | size=1
626 | stride=1
627 | pad=1
628 | activation=leaky
629 |
630 | [convolutional]
631 | batch_normalize=1
632 | filters=8
633 | size=1
634 | stride=1
635 | pad=1
636 | activation=leaky
637 |
638 | [convolutional]
639 | batch_normalize=1
640 | filters=8
641 | size=3
642 | stride=1
643 | pad=1
644 | activation=leaky
645 |
646 | [convolutional]
647 | batch_normalize=1
648 | filters=8
649 | size=1
650 | stride=1
651 | pad=1
652 | activation=leaky
653 |
654 | [convolutional]
655 | batch_normalize=1
656 | filters=16
657 | size=3
658 | stride=1
659 | pad=1
660 | activation=leaky
661 |
662 | [convolutional]
663 | batch_normalize=1
664 | filters=8
665 | size=1
666 | stride=1
667 | pad=1
668 | activation=leaky
669 |
670 | [convolutional]
671 | batch_normalize=1
672 | filters=8
673 | size=3
674 | stride=1
675 | pad=1
676 | activation=leaky
677 |
678 | [route]
679 | layers=-1,-9
680 |
681 | [convolutional]
682 | batch_normalize=1
683 | filters=40
684 | size=1
685 | stride=1
686 | pad=1
687 | activation=leaky
688 |
689 | [convolutional]
690 | batch_normalize=1
691 | filters=512
692 | size=1
693 | stride=1
694 | pad=1
695 | activation=leaky
696 |
697 | [upsample]
698 | stride=2
699 |
700 | [route]
701 | layers=-1,-23
702 |
703 | [convolutional]
704 | batch_normalize=1
705 | filters=48
706 | size=1
707 | stride=1
708 | pad=1
709 | activation=leaky
710 |
711 | [route]
712 | layers=-2
713 |
714 | [convolutional]
715 | batch_normalize=1
716 | filters=80
717 | size=1
718 | stride=1
719 | pad=1
720 | activation=leaky
721 |
722 | [convolutional]
723 | batch_normalize=1
724 | filters=104
725 | size=1
726 | stride=1
727 | pad=1
728 | activation=leaky
729 |
730 | [convolutional]
731 | batch_normalize=1
732 | filters=128
733 | size=3
734 | stride=1
735 | pad=1
736 | activation=leaky
737 |
738 | [convolutional]
739 | batch_normalize=1
740 | filters=112
741 | size=1
742 | stride=1
743 | pad=1
744 | activation=leaky
745 |
746 | [convolutional]
747 | batch_normalize=1
748 | filters=112
749 | size=3
750 | stride=1
751 | pad=1
752 | activation=leaky
753 |
754 | [convolutional]
755 | batch_normalize=1
756 | filters=96
757 | size=1
758 | stride=1
759 | pad=1
760 | activation=leaky
761 |
762 | [convolutional]
763 | batch_normalize=1
764 | filters=104
765 | size=3
766 | stride=1
767 | pad=1
768 | activation=leaky
769 |
770 | [route]
771 | layers=-1,-9
772 |
773 | [convolutional]
774 | batch_normalize=1
775 | filters=88
776 | size=1
777 | stride=1
778 | pad=1
779 | activation=leaky
780 |
781 | [convolutional]
782 | batch_normalize=1
783 | filters=256
784 | size=1
785 | stride=1
786 | pad=1
787 | activation=leaky
788 |
789 | [upsample]
790 | stride=2
791 |
792 | [route]
793 | layers=-1,-70
794 |
795 | [convolutional]
796 | batch_normalize=1
797 | filters=32
798 | size=1
799 | stride=1
800 | pad=1
801 | activation=leaky
802 |
803 | [route]
804 | layers=-2
805 |
806 | [convolutional]
807 | batch_normalize=1
808 | filters=72
809 | size=1
810 | stride=1
811 | pad=1
812 | activation=leaky
813 |
814 | [convolutional]
815 | batch_normalize=1
816 | filters=72
817 | size=1
818 | stride=1
819 | pad=1
820 | activation=leaky
821 |
822 | [convolutional]
823 | batch_normalize=1
824 | filters=96
825 | size=3
826 | stride=1
827 | pad=1
828 | activation=leaky
829 |
830 | [convolutional]
831 | batch_normalize=1
832 | filters=88
833 | size=1
834 | stride=1
835 | pad=1
836 | activation=leaky
837 |
838 | [convolutional]
839 | batch_normalize=1
840 | filters=88
841 | size=3
842 | stride=1
843 | pad=1
844 | activation=leaky
845 |
846 | [convolutional]
847 | batch_normalize=1
848 | filters=80
849 | size=1
850 | stride=1
851 | pad=1
852 | activation=leaky
853 |
854 | [convolutional]
855 | batch_normalize=1
856 | filters=96
857 | size=3
858 | stride=1
859 | pad=1
860 | activation=leaky
861 |
862 | [route]
863 | layers=-1,-9
864 |
865 | [convolutional]
866 | batch_normalize=1
867 | filters=96
868 | size=1
869 | stride=1
870 | pad=1
871 | activation=leaky
872 |
873 | [convolutional]
874 | size=1
875 | stride=1
876 | pad=1
877 | filters=24
878 | activation=linear
879 |
880 | [yolo]
881 | mask=0,1,2
882 | anchors=40,39, 51,50, 61,59, 75,69, 62,92, 88,98, 115,77, 93,129, 128,115
883 | classes=3
884 | num=9
885 | jitter=.3
886 | ignore_thresh=.7
887 | truth_thresh=1
888 | scale_x_y=1.2
889 | iou_thresh=0.213
890 | cls_normalizer=1.0
891 | iou_normalizer=0.07
892 | iou_loss=ciou
893 | nms_kind=greedynms
894 | beta_nms=0.6
895 |
896 | [route]
897 | layers=-3
898 |
899 | [convolutional]
900 | batch_normalize=1
901 | filters=72
902 | size=3
903 | stride=2
904 | pad=1
905 | activation=leaky
906 |
907 | [route]
908 | layers=-1,-18
909 |
910 | [convolutional]
911 | batch_normalize=1
912 | filters=64
913 | size=1
914 | stride=1
915 | pad=1
916 | activation=leaky
917 |
918 | [route]
919 | layers=-2
920 |
921 | [convolutional]
922 | batch_normalize=1
923 | filters=56
924 | size=1
925 | stride=1
926 | pad=1
927 | activation=leaky
928 |
929 | [convolutional]
930 | batch_normalize=1
931 | filters=48
932 | size=1
933 | stride=1
934 | pad=1
935 | activation=leaky
936 |
937 | [convolutional]
938 | batch_normalize=1
939 | filters=64
940 | size=3
941 | stride=1
942 | pad=1
943 | activation=leaky
944 |
945 | [convolutional]
946 | batch_normalize=1
947 | filters=48
948 | size=1
949 | stride=1
950 | pad=1
951 | activation=leaky
952 |
953 | [convolutional]
954 | batch_normalize=1
955 | filters=64
956 | size=3
957 | stride=1
958 | pad=1
959 | activation=leaky
960 |
961 | [convolutional]
962 | batch_normalize=1
963 | filters=64
964 | size=1
965 | stride=1
966 | pad=1
967 | activation=leaky
968 |
969 | [convolutional]
970 | batch_normalize=1
971 | filters=80
972 | size=3
973 | stride=1
974 | pad=1
975 | activation=leaky
976 |
977 | [route]
978 | layers=-1,-9
979 |
980 | [convolutional]
981 | batch_normalize=1
982 | filters=104
983 | size=1
984 | stride=1
985 | pad=1
986 | activation=leaky
987 |
988 | [convolutional]
989 | size=1
990 | stride=1
991 | pad=1
992 | filters=24
993 | activation=linear
994 |
995 | [yolo]
996 | mask=3,4,5
997 | anchors=40,39, 51,50, 61,59, 75,69, 62,92, 88,98, 115,77, 93,129, 128,115
998 | classes=3
999 | num=9
1000 | jitter=.3
1001 | ignore_thresh=.7
1002 | truth_thresh=1
1003 | scale_x_y=1.2
1004 | iou_thresh=0.213
1005 | cls_normalizer=1.0
1006 | iou_normalizer=0.07
1007 | iou_loss=ciou
1008 | nms_kind=greedynms
1009 | beta_nms=0.6
1010 |
1011 | [route]
1012 | layers=-3
1013 |
1014 | [convolutional]
1015 | batch_normalize=1
1016 | filters=136
1017 | size=3
1018 | stride=2
1019 | pad=1
1020 | activation=leaky
1021 |
1022 | [route]
1023 | layers=-1,-48
1024 |
1025 | [convolutional]
1026 | batch_normalize=1
1027 | filters=64
1028 | size=1
1029 | stride=1
1030 | pad=1
1031 | activation=leaky
1032 |
1033 | [route]
1034 | layers=-2
1035 |
1036 | [convolutional]
1037 | batch_normalize=1
1038 | filters=8
1039 | size=1
1040 | stride=1
1041 | pad=1
1042 | activation=leaky
1043 |
1044 | [convolutional]
1045 | batch_normalize=1
1046 | filters=8
1047 | size=1
1048 | stride=1
1049 | pad=1
1050 | activation=leaky
1051 |
1052 | [convolutional]
1053 | batch_normalize=1
1054 | filters=8
1055 | size=3
1056 | stride=1
1057 | pad=1
1058 | activation=leaky
1059 |
1060 | [convolutional]
1061 | batch_normalize=1
1062 | filters=8
1063 | size=1
1064 | stride=1
1065 | pad=1
1066 | activation=leaky
1067 |
1068 | [convolutional]
1069 | batch_normalize=1
1070 | filters=8
1071 | size=3
1072 | stride=1
1073 | pad=1
1074 | activation=leaky
1075 |
1076 | [convolutional]
1077 | batch_normalize=1
1078 | filters=8
1079 | size=1
1080 | stride=1
1081 | pad=1
1082 | activation=leaky
1083 |
1084 | [convolutional]
1085 | batch_normalize=1
1086 | filters=8
1087 | size=3
1088 | stride=1
1089 | pad=1
1090 | activation=leaky
1091 |
1092 | [route]
1093 | layers=-1,-9
1094 |
1095 | [convolutional]
1096 | batch_normalize=1
1097 | filters=72
1098 | size=1
1099 | stride=1
1100 | pad=1
1101 | activation=leaky
1102 |
1103 | [convolutional]
1104 | size=1
1105 | stride=1
1106 | pad=1
1107 | filters=24
1108 | activation=linear
1109 |
1110 | [yolo]
1111 | mask=6,7,8
1112 | anchors=40,39, 51,50, 61,59, 75,69, 62,92, 88,98, 115,77, 93,129, 128,115
1113 | classes=3
1114 | num=9
1115 | jitter=.3
1116 | ignore_thresh=.7
1117 | truth_thresh=1
1118 | scale_x_y=1.2
1119 | iou_thresh=0.213
1120 | cls_normalizer=1.0
1121 | iou_normalizer=0.07
1122 | iou_loss=ciou
1123 | nms_kind=greedynms
1124 | beta_nms=0.6
1125 |
1126 |
--------------------------------------------------------------------------------
/cfg/prune_0.8_keep_0.01_8x_yolov5l_v4.cfg:
--------------------------------------------------------------------------------
1 | [net]
2 | batch=64
3 | subdivisions=8
4 | width=416
5 | height=416
6 | channels=3
7 | momentum=0.949
8 | decay=0.0005
9 | angle=0
10 | saturation=1.5
11 | exposure=1.5
12 | hue=.1
13 | learning_rate=0.00261
14 | burn_in=1000
15 | max_batches=500500
16 | policy=steps
17 | steps=400000,450000
18 | scales=.1,.1
19 | mosaic=1
20 |
21 | [focus]
22 | filters=12
23 |
24 | [convolutional]
25 | batch_normalize=1
26 | filters=56
27 | size=3
28 | stride=1
29 | pad=1
30 | activation=leaky
31 |
32 | [convolutional]
33 | batch_normalize=1
34 | filters=128
35 | size=3
36 | stride=2
37 | pad=1
38 | activation=leaky
39 |
40 | [convolutional]
41 | batch_normalize=1
42 | filters=56
43 | size=1
44 | stride=1
45 | pad=1
46 | activation=leaky
47 |
48 | [route]
49 | layers=-2
50 |
51 | [convolutional]
52 | batch_normalize=1
53 | filters=64
54 | size=1
55 | stride=1
56 | pad=1
57 | activation=leaky
58 |
59 | [convolutional]
60 | batch_normalize=1
61 | filters=56
62 | size=1
63 | stride=1
64 | pad=1
65 | activation=leaky
66 |
67 | [convolutional]
68 | batch_normalize=1
69 | filters=64
70 | size=3
71 | stride=1
72 | pad=1
73 | activation=leaky
74 |
75 | [shortcut]
76 | from=-3
77 | activation=linear
78 |
79 | [convolutional]
80 | batch_normalize=1
81 | filters=48
82 | size=1
83 | stride=1
84 | pad=1
85 | activation=leaky
86 |
87 | [convolutional]
88 | batch_normalize=1
89 | filters=64
90 | size=3
91 | stride=1
92 | pad=1
93 | activation=leaky
94 |
95 | [shortcut]
96 | from=-3
97 | activation=linear
98 |
99 | [convolutional]
100 | batch_normalize=1
101 | filters=16
102 | size=1
103 | stride=1
104 | pad=1
105 | activation=leaky
106 |
107 | [convolutional]
108 | batch_normalize=1
109 | filters=64
110 | size=3
111 | stride=1
112 | pad=1
113 | activation=leaky
114 |
115 | [shortcut]
116 | from=-3
117 | activation=linear
118 |
119 | [route]
120 | layers=-1,-12
121 |
122 | [convolutional]
123 | batch_normalize=1
124 | filters=64
125 | size=1
126 | stride=1
127 | pad=1
128 | activation=leaky
129 |
130 | [convolutional]
131 | batch_normalize=1
132 | filters=16
133 | size=3
134 | stride=2
135 | pad=1
136 | activation=leaky
137 |
138 | [convolutional]
139 | batch_normalize=1
140 | filters=128
141 | size=1
142 | stride=1
143 | pad=1
144 | activation=leaky
145 |
146 | [route]
147 | layers=-2
148 |
149 | [convolutional]
150 | batch_normalize=1
151 | filters=109
152 | size=1
153 | stride=1
154 | pad=1
155 | activation=leaky
156 |
157 | [convolutional]
158 | batch_normalize=1
159 | filters=32
160 | size=1
161 | stride=1
162 | pad=1
163 | activation=leaky
164 |
165 | [convolutional]
166 | batch_normalize=1
167 | filters=109
168 | size=3
169 | stride=1
170 | pad=1
171 | activation=leaky
172 |
173 | [shortcut]
174 | from=-3
175 | activation=linear
176 |
177 | [convolutional]
178 | batch_normalize=1
179 | filters=16
180 | size=1
181 | stride=1
182 | pad=1
183 | activation=leaky
184 |
185 | [convolutional]
186 | batch_normalize=1
187 | filters=109
188 | size=3
189 | stride=1
190 | pad=1
191 | activation=leaky
192 |
193 | [shortcut]
194 | from=-3
195 | activation=linear
196 |
197 | [convolutional]
198 | batch_normalize=1
199 | filters=8
200 | size=1
201 | stride=1
202 | pad=1
203 | activation=leaky
204 |
205 | [convolutional]
206 | batch_normalize=1
207 | filters=109
208 | size=3
209 | stride=1
210 | pad=1
211 | activation=leaky
212 |
213 | [shortcut]
214 | from=-3
215 | activation=linear
216 |
217 | [convolutional]
218 | batch_normalize=1
219 | filters=8
220 | size=1
221 | stride=1
222 | pad=1
223 | activation=leaky
224 |
225 | [convolutional]
226 | batch_normalize=1
227 | filters=109
228 | size=3
229 | stride=1
230 | pad=1
231 | activation=leaky
232 |
233 | [shortcut]
234 | from=-3
235 | activation=linear
236 |
237 | [convolutional]
238 | batch_normalize=1
239 | filters=8
240 | size=1
241 | stride=1
242 | pad=1
243 | activation=leaky
244 |
245 | [convolutional]
246 | batch_normalize=1
247 | filters=109
248 | size=3
249 | stride=1
250 | pad=1
251 | activation=leaky
252 |
253 | [shortcut]
254 | from=-3
255 | activation=linear
256 |
257 | [convolutional]
258 | batch_normalize=1
259 | filters=8
260 | size=1
261 | stride=1
262 | pad=1
263 | activation=leaky
264 |
265 | [convolutional]
266 | batch_normalize=1
267 | filters=109
268 | size=3
269 | stride=1
270 | pad=1
271 | activation=leaky
272 |
273 | [shortcut]
274 | from=-3
275 | activation=linear
276 |
277 | [convolutional]
278 | batch_normalize=1
279 | filters=8
280 | size=1
281 | stride=1
282 | pad=1
283 | activation=leaky
284 |
285 | [convolutional]
286 | batch_normalize=1
287 | filters=109
288 | size=3
289 | stride=1
290 | pad=1
291 | activation=leaky
292 |
293 | [shortcut]
294 | from=-3
295 | activation=linear
296 |
297 | [convolutional]
298 | batch_normalize=1
299 | filters=8
300 | size=1
301 | stride=1
302 | pad=1
303 | activation=leaky
304 |
305 | [convolutional]
306 | batch_normalize=1
307 | filters=109
308 | size=3
309 | stride=1
310 | pad=1
311 | activation=leaky
312 |
313 | [shortcut]
314 | from=-3
315 | activation=linear
316 |
317 | [convolutional]
318 | batch_normalize=1
319 | filters=8
320 | size=1
321 | stride=1
322 | pad=1
323 | activation=leaky
324 |
325 | [convolutional]
326 | batch_normalize=1
327 | filters=109
328 | size=3
329 | stride=1
330 | pad=1
331 | activation=leaky
332 |
333 | [shortcut]
334 | from=-3
335 | activation=linear
336 |
337 | [route]
338 | layers=-1,-30
339 |
340 | [convolutional]
341 | batch_normalize=1
342 | filters=32
343 | size=1
344 | stride=1
345 | pad=1
346 | activation=leaky
347 |
348 | [convolutional]
349 | batch_normalize=1
350 | filters=32
351 | size=3
352 | stride=2
353 | pad=1
354 | activation=leaky
355 |
356 | [convolutional]
357 | batch_normalize=1
358 | filters=256
359 | size=1
360 | stride=1
361 | pad=1
362 | activation=leaky
363 |
364 | [route]
365 | layers=-2
366 |
367 | [convolutional]
368 | batch_normalize=1
369 | filters=231
370 | size=1
371 | stride=1
372 | pad=1
373 | activation=leaky
374 |
375 | [convolutional]
376 | batch_normalize=1
377 | filters=32
378 | size=1
379 | stride=1
380 | pad=1
381 | activation=leaky
382 |
383 | [convolutional]
384 | batch_normalize=1
385 | filters=231
386 | size=3
387 | stride=1
388 | pad=1
389 | activation=leaky
390 |
391 | [shortcut]
392 | from=-3
393 | activation=linear
394 |
395 | [convolutional]
396 | batch_normalize=1
397 | filters=32
398 | size=1
399 | stride=1
400 | pad=1
401 | activation=leaky
402 |
403 | [convolutional]
404 | batch_normalize=1
405 | filters=231
406 | size=3
407 | stride=1
408 | pad=1
409 | activation=leaky
410 |
411 | [shortcut]
412 | from=-3
413 | activation=linear
414 |
415 | [convolutional]
416 | batch_normalize=1
417 | filters=32
418 | size=1
419 | stride=1
420 | pad=1
421 | activation=leaky
422 |
423 | [convolutional]
424 | batch_normalize=1
425 | filters=231
426 | size=3
427 | stride=1
428 | pad=1
429 | activation=leaky
430 |
431 | [shortcut]
432 | from=-3
433 | activation=linear
434 |
435 | [convolutional]
436 | batch_normalize=1
437 | filters=16
438 | size=1
439 | stride=1
440 | pad=1
441 | activation=leaky
442 |
443 | [convolutional]
444 | batch_normalize=1
445 | filters=231
446 | size=3
447 | stride=1
448 | pad=1
449 | activation=leaky
450 |
451 | [shortcut]
452 | from=-3
453 | activation=linear
454 |
455 | [convolutional]
456 | batch_normalize=1
457 | filters=24
458 | size=1
459 | stride=1
460 | pad=1
461 | activation=leaky
462 |
463 | [convolutional]
464 | batch_normalize=1
465 | filters=231
466 | size=3
467 | stride=1
468 | pad=1
469 | activation=leaky
470 |
471 | [shortcut]
472 | from=-3
473 | activation=linear
474 |
475 | [convolutional]
476 | batch_normalize=1
477 | filters=16
478 | size=1
479 | stride=1
480 | pad=1
481 | activation=leaky
482 |
483 | [convolutional]
484 | batch_normalize=1
485 | filters=231
486 | size=3
487 | stride=1
488 | pad=1
489 | activation=leaky
490 |
491 | [shortcut]
492 | from=-3
493 | activation=linear
494 |
495 | [convolutional]
496 | batch_normalize=1
497 | filters=8
498 | size=1
499 | stride=1
500 | pad=1
501 | activation=leaky
502 |
503 | [convolutional]
504 | batch_normalize=1
505 | filters=231
506 | size=3
507 | stride=1
508 | pad=1
509 | activation=leaky
510 |
511 | [shortcut]
512 | from=-3
513 | activation=linear
514 |
515 | [convolutional]
516 | batch_normalize=1
517 | filters=8
518 | size=1
519 | stride=1
520 | pad=1
521 | activation=leaky
522 |
523 | [convolutional]
524 | batch_normalize=1
525 | filters=231
526 | size=3
527 | stride=1
528 | pad=1
529 | activation=leaky
530 |
531 | [shortcut]
532 | from=-3
533 | activation=linear
534 |
535 | [convolutional]
536 | batch_normalize=1
537 | filters=8
538 | size=1
539 | stride=1
540 | pad=1
541 | activation=leaky
542 |
543 | [convolutional]
544 | batch_normalize=1
545 | filters=231
546 | size=3
547 | stride=1
548 | pad=1
549 | activation=leaky
550 |
551 | [shortcut]
552 | from=-3
553 | activation=linear
554 |
555 | [route]
556 | layers=-1,-30
557 |
558 | [convolutional]
559 | batch_normalize=1
560 | filters=96
561 | size=1
562 | stride=1
563 | pad=1
564 | activation=leaky
565 |
566 | [convolutional]
567 | batch_normalize=1
568 | filters=408
569 | size=3
570 | stride=2
571 | pad=1
572 | activation=leaky
573 |
574 | [convolutional]
575 | batch_normalize=1
576 | filters=512
577 | size=1
578 | stride=1
579 | pad=1
580 | activation=leaky
581 |
582 | [maxpool]
583 | stride=1
584 | size=5
585 |
586 | [route]
587 | layers=-2
588 |
589 | [maxpool]
590 | stride=1
591 | size=9
592 |
593 | [route]
594 | layers=-4
595 |
596 | [maxpool]
597 | stride=1
598 | size=13
599 |
600 | [route]
601 | layers=-6,-5,-3,-1
602 |
603 | [convolutional]
604 | batch_normalize=1
605 | filters=320
606 | size=1
607 | stride=1
608 | pad=1
609 | activation=leaky
610 |
611 | [convolutional]
612 | batch_normalize=1
613 | filters=432
614 | size=1
615 | stride=1
616 | pad=1
617 | activation=leaky
618 |
619 | [route]
620 | layers=-2
621 |
622 | [convolutional]
623 | batch_normalize=1
624 | filters=152
625 | size=1
626 | stride=1
627 | pad=1
628 | activation=leaky
629 |
630 | [convolutional]
631 | batch_normalize=1
632 | filters=56
633 | size=1
634 | stride=1
635 | pad=1
636 | activation=leaky
637 |
638 | [convolutional]
639 | batch_normalize=1
640 | filters=48
641 | size=3
642 | stride=1
643 | pad=1
644 | activation=leaky
645 |
646 | [convolutional]
647 | batch_normalize=1
648 | filters=32
649 | size=1
650 | stride=1
651 | pad=1
652 | activation=leaky
653 |
654 | [convolutional]
655 | batch_normalize=1
656 | filters=56
657 | size=3
658 | stride=1
659 | pad=1
660 | activation=leaky
661 |
662 | [convolutional]
663 | batch_normalize=1
664 | filters=48
665 | size=1
666 | stride=1
667 | pad=1
668 | activation=leaky
669 |
670 | [convolutional]
671 | batch_normalize=1
672 | filters=16
673 | size=3
674 | stride=1
675 | pad=1
676 | activation=leaky
677 |
678 | [route]
679 | layers=-1,-9
680 |
681 | [convolutional]
682 | batch_normalize=1
683 | filters=464
684 | size=1
685 | stride=1
686 | pad=1
687 | activation=leaky
688 |
689 | [convolutional]
690 | batch_normalize=1
691 | filters=512
692 | size=1
693 | stride=1
694 | pad=1
695 | activation=leaky
696 |
697 | [upsample]
698 | stride=2
699 |
700 | [route]
701 | layers=-1,-23
702 |
703 | [convolutional]
704 | batch_normalize=1
705 | filters=96
706 | size=1
707 | stride=1
708 | pad=1
709 | activation=leaky
710 |
711 | [route]
712 | layers=-2
713 |
714 | [convolutional]
715 | batch_normalize=1
716 | filters=56
717 | size=1
718 | stride=1
719 | pad=1
720 | activation=leaky
721 |
722 | [convolutional]
723 | batch_normalize=1
724 | filters=80
725 | size=1
726 | stride=1
727 | pad=1
728 | activation=leaky
729 |
730 | [convolutional]
731 | batch_normalize=1
732 | filters=24
733 | size=3
734 | stride=1
735 | pad=1
736 | activation=leaky
737 |
738 | [convolutional]
739 | batch_normalize=1
740 | filters=64
741 | size=1
742 | stride=1
743 | pad=1
744 | activation=leaky
745 |
746 | [convolutional]
747 | batch_normalize=1
748 | filters=40
749 | size=3
750 | stride=1
751 | pad=1
752 | activation=leaky
753 |
754 | [convolutional]
755 | batch_normalize=1
756 | filters=48
757 | size=1
758 | stride=1
759 | pad=1
760 | activation=leaky
761 |
762 | [convolutional]
763 | batch_normalize=1
764 | filters=32
765 | size=3
766 | stride=1
767 | pad=1
768 | activation=leaky
769 |
770 | [route]
771 | layers=-1,-9
772 |
773 | [convolutional]
774 | batch_normalize=1
775 | filters=168
776 | size=1
777 | stride=1
778 | pad=1
779 | activation=leaky
780 |
781 | [convolutional]
782 | batch_normalize=1
783 | filters=256
784 | size=1
785 | stride=1
786 | pad=1
787 | activation=leaky
788 |
789 | [upsample]
790 | stride=2
791 |
792 | [route]
793 | layers=-1,-70
794 |
795 | [convolutional]
796 | batch_normalize=1
797 | filters=24
798 | size=1
799 | stride=1
800 | pad=1
801 | activation=leaky
802 |
803 | [route]
804 | layers=-2
805 |
806 | [convolutional]
807 | batch_normalize=1
808 | filters=8
809 | size=1
810 | stride=1
811 | pad=1
812 | activation=leaky
813 |
814 | [convolutional]
815 | batch_normalize=1
816 | filters=8
817 | size=1
818 | stride=1
819 | pad=1
820 | activation=leaky
821 |
822 | [convolutional]
823 | batch_normalize=1
824 | filters=8
825 | size=3
826 | stride=1
827 | pad=1
828 | activation=leaky
829 |
830 | [convolutional]
831 | batch_normalize=1
832 | filters=8
833 | size=1
834 | stride=1
835 | pad=1
836 | activation=leaky
837 |
838 | [convolutional]
839 | batch_normalize=1
840 | filters=8
841 | size=3
842 | stride=1
843 | pad=1
844 | activation=leaky
845 |
846 | [convolutional]
847 | batch_normalize=1
848 | filters=8
849 | size=1
850 | stride=1
851 | pad=1
852 | activation=leaky
853 |
854 | [convolutional]
855 | batch_normalize=1
856 | filters=24
857 | size=3
858 | stride=1
859 | pad=1
860 | activation=leaky
861 |
862 | [route]
863 | layers=-1,-9
864 |
865 | [convolutional]
866 | batch_normalize=1
867 | filters=176
868 | size=1
869 | stride=1
870 | pad=1
871 | activation=leaky
872 |
873 | [convolutional]
874 | size=1
875 | stride=1
876 | pad=1
877 | filters=24
878 | activation=linear
879 |
880 | [yolo]
881 | mask=0,1,2
882 | anchors=39,38, 49,67, 74,49, 74,86, 113,71, 97,119, 163,108, 134,155, 210,199
883 | classes=3
884 | num=9
885 | jitter=.3
886 | ignore_thresh=.7
887 | truth_thresh=1
888 | scale_x_y=1.2
889 | iou_thresh=0.213
890 | cls_normalizer=1.0
891 | iou_normalizer=0.07
892 | iou_loss=ciou
893 | nms_kind=greedynms
894 | beta_nms=0.6
895 |
896 | [route]
897 | layers=-3
898 |
899 | [convolutional]
900 | batch_normalize=1
901 | filters=32
902 | size=3
903 | stride=2
904 | pad=1
905 | activation=leaky
906 |
907 | [route]
908 | layers=-1,-18
909 |
910 | [convolutional]
911 | batch_normalize=1
912 | filters=56
913 | size=1
914 | stride=1
915 | pad=1
916 | activation=leaky
917 |
918 | [route]
919 | layers=-2
920 |
921 | [convolutional]
922 | batch_normalize=1
923 | filters=48
924 | size=1
925 | stride=1
926 | pad=1
927 | activation=leaky
928 |
929 | [convolutional]
930 | batch_normalize=1
931 | filters=32
932 | size=1
933 | stride=1
934 | pad=1
935 | activation=leaky
936 |
937 | [convolutional]
938 | batch_normalize=1
939 | filters=32
940 | size=3
941 | stride=1
942 | pad=1
943 | activation=leaky
944 |
945 | [convolutional]
946 | batch_normalize=1
947 | filters=24
948 | size=1
949 | stride=1
950 | pad=1
951 | activation=leaky
952 |
953 | [convolutional]
954 | batch_normalize=1
955 | filters=24
956 | size=3
957 | stride=1
958 | pad=1
959 | activation=leaky
960 |
961 | [convolutional]
962 | batch_normalize=1
963 | filters=40
964 | size=1
965 | stride=1
966 | pad=1
967 | activation=leaky
968 |
969 | [convolutional]
970 | batch_normalize=1
971 | filters=64
972 | size=3
973 | stride=1
974 | pad=1
975 | activation=leaky
976 |
977 | [route]
978 | layers=-1,-9
979 |
980 | [convolutional]
981 | batch_normalize=1
982 | filters=240
983 | size=1
984 | stride=1
985 | pad=1
986 | activation=leaky
987 |
988 | [convolutional]
989 | size=1
990 | stride=1
991 | pad=1
992 | filters=24
993 | activation=linear
994 |
995 | [yolo]
996 | mask=3,4,5
997 | anchors=39,38, 49,67, 74,49, 74,86, 113,71, 97,119, 163,108, 134,155, 210,199
998 | classes=3
999 | num=9
1000 | jitter=.3
1001 | ignore_thresh=.7
1002 | truth_thresh=1
1003 | scale_x_y=1.2
1004 | iou_thresh=0.213
1005 | cls_normalizer=1.0
1006 | iou_normalizer=0.07
1007 | iou_loss=ciou
1008 | nms_kind=greedynms
1009 | beta_nms=0.6
1010 |
1011 | [route]
1012 | layers=-3
1013 |
1014 | [convolutional]
1015 | batch_normalize=1
1016 | filters=176
1017 | size=3
1018 | stride=2
1019 | pad=1
1020 | activation=leaky
1021 |
1022 | [route]
1023 | layers=-1,-48
1024 |
1025 | [convolutional]
1026 | batch_normalize=1
1027 | filters=144
1028 | size=1
1029 | stride=1
1030 | pad=1
1031 | activation=leaky
1032 |
1033 | [route]
1034 | layers=-2
1035 |
1036 | [convolutional]
1037 | batch_normalize=1
1038 | filters=32
1039 | size=1
1040 | stride=1
1041 | pad=1
1042 | activation=leaky
1043 |
1044 | [convolutional]
1045 | batch_normalize=1
1046 | filters=16
1047 | size=1
1048 | stride=1
1049 | pad=1
1050 | activation=leaky
1051 |
1052 | [convolutional]
1053 | batch_normalize=1
1054 | filters=32
1055 | size=3
1056 | stride=1
1057 | pad=1
1058 | activation=leaky
1059 |
1060 | [convolutional]
1061 | batch_normalize=1
1062 | filters=16
1063 | size=1
1064 | stride=1
1065 | pad=1
1066 | activation=leaky
1067 |
1068 | [convolutional]
1069 | batch_normalize=1
1070 | filters=64
1071 | size=3
1072 | stride=1
1073 | pad=1
1074 | activation=leaky
1075 |
1076 | [convolutional]
1077 | batch_normalize=1
1078 | filters=88
1079 | size=1
1080 | stride=1
1081 | pad=1
1082 | activation=leaky
1083 |
1084 | [convolutional]
1085 | batch_normalize=1
1086 | filters=96
1087 | size=3
1088 | stride=1
1089 | pad=1
1090 | activation=leaky
1091 |
1092 | [route]
1093 | layers=-1,-9
1094 |
1095 | [convolutional]
1096 | batch_normalize=1
1097 | filters=272
1098 | size=1
1099 | stride=1
1100 | pad=1
1101 | activation=leaky
1102 |
1103 | [convolutional]
1104 | size=1
1105 | stride=1
1106 | pad=1
1107 | filters=24
1108 | activation=linear
1109 |
1110 | [yolo]
1111 | mask=6,7,8
1112 | anchors=39,38, 49,67, 74,49, 74,86, 113,71, 97,119, 163,108, 134,155, 210,199
1113 | classes=3
1114 | num=9
1115 | jitter=.3
1116 | ignore_thresh=.7
1117 | truth_thresh=1
1118 | scale_x_y=1.2
1119 | iou_thresh=0.213
1120 | cls_normalizer=1.0
1121 | iou_normalizer=0.07
1122 | iou_loss=ciou
1123 | nms_kind=greedynms
1124 | beta_nms=0.6
1125 |
1126 |
--------------------------------------------------------------------------------
/data/coco.data:
--------------------------------------------------------------------------------
1 | classes=80
2 | train=../coco/trainvalno5k.txt
3 | valid=../coco/5k.txt
4 | names=data/coco.names
5 | backup=backup/
6 | eval=coco
7 |
--------------------------------------------------------------------------------
/data/coco.names:
--------------------------------------------------------------------------------
1 | person
2 | bicycle
3 | car
4 | motorcycle
5 | airplane
6 | bus
7 | train
8 | truck
9 | boat
10 | traffic light
11 | fire hydrant
12 | stop sign
13 | parking meter
14 | bench
15 | bird
16 | cat
17 | dog
18 | horse
19 | sheep
20 | cow
21 | elephant
22 | bear
23 | zebra
24 | giraffe
25 | backpack
26 | umbrella
27 | handbag
28 | tie
29 | suitcase
30 | frisbee
31 | skis
32 | snowboard
33 | sports ball
34 | kite
35 | baseball bat
36 | baseball glove
37 | skateboard
38 | surfboard
39 | tennis racket
40 | bottle
41 | wine glass
42 | cup
43 | fork
44 | knife
45 | spoon
46 | bowl
47 | banana
48 | apple
49 | sandwich
50 | orange
51 | broccoli
52 | carrot
53 | hot dog
54 | pizza
55 | donut
56 | cake
57 | chair
58 | couch
59 | potted plant
60 | bed
61 | dining table
62 | toilet
63 | tv
64 | laptop
65 | mouse
66 | remote
67 | keyboard
68 | cell phone
69 | microwave
70 | oven
71 | toaster
72 | sink
73 | refrigerator
74 | book
75 | clock
76 | vase
77 | scissors
78 | teddy bear
79 | hair drier
80 | toothbrush
81 |
--------------------------------------------------------------------------------
/data/coco_128img.data:
--------------------------------------------------------------------------------
1 | classes=80
2 | train=./data/coco_128img.txt
3 | valid=./data/coco_128img.txt
4 | names=data/coco.names
5 | backup=backup/
6 | eval=coco
7 |
--------------------------------------------------------------------------------
/data/coco_128img.txt:
--------------------------------------------------------------------------------
1 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000009.jpg
2 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000025.jpg
3 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000030.jpg
4 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000034.jpg
5 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000036.jpg
6 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000042.jpg
7 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000049.jpg
8 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000061.jpg
9 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000064.jpg
10 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000071.jpg
11 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000072.jpg
12 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000073.jpg
13 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000074.jpg
14 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000077.jpg
15 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000078.jpg
16 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000081.jpg
17 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000086.jpg
18 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000089.jpg
19 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000092.jpg
20 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000094.jpg
21 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000109.jpg
22 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000110.jpg
23 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000113.jpg
24 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000127.jpg
25 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000133.jpg
26 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000136.jpg
27 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000138.jpg
28 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000142.jpg
29 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000143.jpg
30 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000144.jpg
31 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000149.jpg
32 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000151.jpg
33 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000154.jpg
34 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000164.jpg
35 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000165.jpg
36 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000192.jpg
37 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000194.jpg
38 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000196.jpg
39 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000201.jpg
40 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000208.jpg
41 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000241.jpg
42 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000247.jpg
43 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000250.jpg
44 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000257.jpg
45 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000260.jpg
46 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000263.jpg
47 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000283.jpg
48 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000294.jpg
49 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000307.jpg
50 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000308.jpg
51 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000309.jpg
52 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000312.jpg
53 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000315.jpg
54 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000321.jpg
55 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000322.jpg
56 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000326.jpg
57 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000328.jpg
58 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000332.jpg
59 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000338.jpg
60 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000349.jpg
61 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000357.jpg
62 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000359.jpg
63 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000360.jpg
64 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000368.jpg
65 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000370.jpg
66 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000382.jpg
67 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000384.jpg
68 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000387.jpg
69 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000389.jpg
70 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000394.jpg
71 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000395.jpg
72 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000397.jpg
73 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000400.jpg
74 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000404.jpg
75 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000415.jpg
76 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000419.jpg
77 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000428.jpg
78 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000431.jpg
79 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000436.jpg
80 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000438.jpg
81 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000443.jpg
82 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000446.jpg
83 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000450.jpg
84 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000459.jpg
85 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000471.jpg
86 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000472.jpg
87 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000474.jpg
88 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000486.jpg
89 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000488.jpg
90 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000490.jpg
91 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000491.jpg
92 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000502.jpg
93 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000508.jpg
94 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000510.jpg
95 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000514.jpg
96 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000520.jpg
97 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000529.jpg
98 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000531.jpg
99 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000532.jpg
100 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000536.jpg
101 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000540.jpg
102 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000542.jpg
103 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000544.jpg
104 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000560.jpg
105 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000562.jpg
106 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000564.jpg
107 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000569.jpg
108 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000572.jpg
109 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000575.jpg
110 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000581.jpg
111 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000584.jpg
112 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000589.jpg
113 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000590.jpg
114 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000595.jpg
115 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000597.jpg
116 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000599.jpg
117 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000605.jpg
118 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000612.jpg
119 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000620.jpg
120 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000623.jpg
121 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000625.jpg
122 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000626.jpg
123 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000629.jpg
124 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000634.jpg
125 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000636.jpg
126 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000641.jpg
127 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000643.jpg
128 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000650.jpg
--------------------------------------------------------------------------------
/data/get_coco_dataset.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # CREDIT: https://github.com/pjreddie/darknet/tree/master/scripts/get_coco_dataset.sh
3 |
4 | # Clone COCO API
5 | git clone https://github.com/pdollar/coco && cd coco
6 |
7 | # Download Images
8 | mkdir images && cd images
9 | wget -c https://pjreddie.com/media/files/train2014.zip
10 | wget -c https://pjreddie.com/media/files/val2014.zip
11 |
12 | # Unzip
13 | unzip -q train2014.zip
14 | unzip -q val2014.zip
15 |
16 | # (optional) Delete zip files
17 | rm -rf *.zip
18 |
19 | cd ..
20 |
21 | # Download COCO Metadata
22 | wget -c https://pjreddie.com/media/files/instances_train-val2014.zip
23 | wget -c https://pjreddie.com/media/files/coco/5k.part
24 | wget -c https://pjreddie.com/media/files/coco/trainvalno5k.part
25 | wget -c https://pjreddie.com/media/files/coco/labels.tgz
26 | tar xzf labels.tgz
27 | unzip -q instances_train-val2014.zip
28 |
29 | # Set Up Image Lists
30 | paste <(awk "{print \"$PWD\"}" <5k.part) 5k.part | tr -d '\t' > 5k.txt
31 | paste <(awk "{print \"$PWD\"}" trainvalno5k.txt
32 |
33 | # get xview training data
34 | # wget -O train_images.tgz 'https://d307kc0mrhucc3.cloudfront.net/train_images.tgz?Expires=1530124049&Signature=JrQoxipmsETvb7eQHCfDFUO-QEHJGAayUv0i-ParmS-1hn7hl9D~bzGuHWG82imEbZSLUARTtm0wOJ7EmYMGmG5PtLKz9H5qi6DjoSUuFc13NQ-~6yUhE~NfPaTnehUdUMCa3On2wl1h1ZtRG~0Jq1P-AJbpe~oQxbyBrs1KccaMa7FK4F4oMM6sMnNgoXx8-3O77kYw~uOpTMFmTaQdHln6EztW0Lx17i57kK3ogbSUpXgaUTqjHCRA1dWIl7PY1ngQnLslkLhZqmKcaL-BvWf0ZGjHxCDQBpnUjIlvMu5NasegkwD9Jjc0ClgTxsttSkmbapVqaVC8peR0pO619Q__&Key-Pair-Id=APKAIKGDJB5C3XUL2DXQ'
35 | # tar -xvzf train_images.tgz
36 | # sudo rm -rf train_images/._*
37 | # lastly convert each .tif to a .bmp for faster loading in cv2
38 |
39 | # ./coco/images/train2014/COCO_train2014_000000167126.jpg # corrupted image
40 |
--------------------------------------------------------------------------------
/data/get_coco_dataset_gdrive.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # https://stackoverflow.com/questions/48133080/how-to-download-a-google-drive-url-via-curl-or-wget/48133859
3 |
4 | # Zip coco folder
5 | # zip -r coco.zip coco
6 | # tar -czvf coco.tar.gz coco
7 |
8 | # Set fileid and filename
9 | filename="coco.zip"
10 | fileid="1HaXkef9z6y5l4vUnCYgdmEAj61c6bfWO" # coco.zip
11 |
12 | # Download from Google Drive, accepting presented query
13 | curl -c ./cookie -s -L "https://drive.google.com/uc?export=download&id=${fileid}" > /dev/null
14 | curl -Lb ./cookie "https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=${fileid}" -o ${filename}
15 | rm ./cookie
16 |
17 | # Unzip
18 | unzip -q ${filename} # for coco.zip
19 | # tar -xzf ${filename} # for coco.tar.gz
20 |
--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BokyLiu/YoloV5sl_V4_prune/c0ff39c5a5b10cbca95beb597c722cdc02e81885/models/__init__.py
--------------------------------------------------------------------------------
/models/common.py:
--------------------------------------------------------------------------------
1 | # This file contains modules common to various models
2 | import math
3 |
4 | import torch
5 | import torch.nn as nn
6 |
7 |
8 | def autopad(k, p=None): # kernel, padding
9 | # Pad to 'same'
10 | if p is None:
11 | p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
12 | return p
13 |
14 |
15 | def DWConv(c1, c2, k=1, s=1, act=True):
16 | # Depthwise convolution
17 | return Conv(c1, c2, k, s, g=math.gcd(c1, c2), act=act)
18 |
19 |
20 | class Conv(nn.Module):
21 | # Standard convolution
22 | def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
23 | super(Conv, self).__init__()
24 | self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
25 | self.bn = nn.BatchNorm2d(c2)
26 | #self.act = nn.LeakyReLU(0.1, inplace=True) if act else nn.Identity() #yolov5_v2
27 | #self.act = nn.Hardswish() if act else nn.Identity() #yolov5_v3
28 | self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity()) #yolov5_v4
29 |
30 | def forward(self, x):
31 | return self.act(self.bn(self.conv(x)))
32 |
33 | def fuseforward(self, x):
34 | return self.act(self.conv(x))
35 |
36 |
37 | class Bottleneck(nn.Module):
38 | # Standard bottleneck
39 | def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion
40 | super(Bottleneck, self).__init__()
41 | c_ = int(c2 * e) # hidden channels
42 | self.cv1 = Conv(c1, c_, 1, 1)
43 | self.cv2 = Conv(c_, c2, 3, 1, g=g)
44 | self.add = shortcut and c1 == c2
45 |
46 | def forward(self, x):
47 | return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
48 |
49 |
50 | class BottleneckCSP(nn.Module):
51 | # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
52 | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
53 | super(BottleneckCSP, self).__init__()
54 | c_ = int(c2 * e) # hidden channels
55 | self.cv1 = Conv(c1, c_, 1, 1)
56 | self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
57 | self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
58 | self.cv4 = Conv(2 * c_, c2, 1, 1)
59 | self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
60 | self.act = nn.LeakyReLU(0.1, inplace=True)
61 | self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
62 |
63 | def forward(self, x):
64 | y1 = self.cv3(self.m(self.cv1(x)))
65 | y2 = self.cv2(x)
66 | return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
67 |
68 | class C3(nn.Module):
69 | # CSP Bottleneck with 3 convolutions
70 | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
71 | super(C3, self).__init__()
72 | c_ = int(c2 * e) # hidden channels
73 | self.cv1 = Conv(c1, c_, 1, 1)
74 | self.cv2 = Conv(c1, c_, 1, 1)
75 | self.cv3 = Conv(2 * c_, c2, 1) # act=FReLU(c2)
76 | self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
77 | # self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)])
78 |
79 | def forward(self, x):
80 | return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1))
81 |
82 | class SPP(nn.Module):
83 | # Spatial pyramid pooling layer used in YOLOv3-SPP
84 | def __init__(self, c1, c2, k=(5, 9, 13)):
85 | super(SPP, self).__init__()
86 | c_ = c1 // 2 # hidden channels
87 | self.cv1 = Conv(c1, c_, 1, 1)
88 | self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
89 | self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
90 |
91 | def forward(self, x):
92 | x = self.cv1(x)
93 | return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
94 |
95 |
96 | class Focus(nn.Module):
97 | # Focus wh information into c-space
98 | def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
99 | super(Focus, self).__init__()
100 | self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
101 |
102 | def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2)
103 | return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))
104 |
105 |
106 | class Concat(nn.Module):
107 | # Concatenate a list of tensors along dimension
108 | def __init__(self, dimension=1):
109 | super(Concat, self).__init__()
110 | self.d = dimension
111 |
112 | def forward(self, x):
113 | return torch.cat(x, self.d)
114 |
115 |
116 | class Flatten(nn.Module):
117 | # Use after nn.AdaptiveAvgPool2d(1) to remove last 2 dimensions
118 | @staticmethod
119 | def forward(x):
120 | return x.view(x.size(0), -1)
121 |
122 |
123 | class Classify(nn.Module):
124 | # Classification head, i.e. x(b,c1,20,20) to x(b,c2)
125 | def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups
126 | super(Classify, self).__init__()
127 | self.aap = nn.AdaptiveAvgPool2d(1) # to x(b,c1,1,1)
128 | self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False) # to x(b,c2,1,1)
129 | self.flat = Flatten()
130 |
131 | def forward(self, x):
132 | z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1) # cat if list
133 | return self.flat(self.conv(z)) # flatten to x(b,c2)
134 |
--------------------------------------------------------------------------------
/models/experimental.py:
--------------------------------------------------------------------------------
1 | # This file contains experimental modules
2 |
3 | import numpy as np
4 | import torch
5 | import torch.nn as nn
6 |
7 | from models.common import Conv, DWConv
8 | # from utils.google_utils import attempt_download
9 |
10 |
11 | class CrossConv(nn.Module):
12 | # Cross Convolution Downsample
13 | def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
14 | # ch_in, ch_out, kernel, stride, groups, expansion, shortcut
15 | super(CrossConv, self).__init__()
16 | c_ = int(c2 * e) # hidden channels
17 | self.cv1 = Conv(c1, c_, (1, k), (1, s))
18 | self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
19 | self.add = shortcut and c1 == c2
20 |
21 | def forward(self, x):
22 | return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
23 |
24 | class Sum(nn.Module):
25 | # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
26 | def __init__(self, n, weight=False): # n: number of inputs
27 | super(Sum, self).__init__()
28 | self.weight = weight # apply weights boolean
29 | self.iter = range(n - 1) # iter object
30 | if weight:
31 | self.w = nn.Parameter(-torch.arange(1., n) / 2, requires_grad=True) # layer weights
32 |
33 | def forward(self, x):
34 | y = x[0] # no weight
35 | if self.weight:
36 | w = torch.sigmoid(self.w) * 2
37 | for i in self.iter:
38 | y = y + x[i + 1] * w[i]
39 | else:
40 | for i in self.iter:
41 | y = y + x[i + 1]
42 | return y
43 |
44 |
45 | class GhostConv(nn.Module):
46 | # Ghost Convolution https://github.com/huawei-noah/ghostnet
47 | def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups
48 | super(GhostConv, self).__init__()
49 | c_ = c2 // 2 # hidden channels
50 | self.cv1 = Conv(c1, c_, k, s, g, act)
51 | self.cv2 = Conv(c_, c_, 5, 1, c_, act)
52 |
53 | def forward(self, x):
54 | y = self.cv1(x)
55 | return torch.cat([y, self.cv2(y)], 1)
56 |
57 |
58 | class GhostBottleneck(nn.Module):
59 | # Ghost Bottleneck https://github.com/huawei-noah/ghostnet
60 | def __init__(self, c1, c2, k, s):
61 | super(GhostBottleneck, self).__init__()
62 | c_ = c2 // 2
63 | self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1), # pw
64 | DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw
65 | GhostConv(c_, c2, 1, 1, act=False)) # pw-linear
66 | self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False),
67 | Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity()
68 |
69 | def forward(self, x):
70 | return self.conv(x) + self.shortcut(x)
71 |
72 |
73 | class MixConv2d(nn.Module):
74 | # Mixed Depthwise Conv https://arxiv.org/abs/1907.09595
75 | def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):
76 | super(MixConv2d, self).__init__()
77 | groups = len(k)
78 | if equal_ch: # equal c_ per group
79 | i = torch.linspace(0, groups - 1E-6, c2).floor() # c2 indices
80 | c_ = [(i == g).sum() for g in range(groups)] # intermediate channels
81 | else: # equal weight.numel() per group
82 | b = [c2] + [0] * groups
83 | a = np.eye(groups + 1, groups, k=-1)
84 | a -= np.roll(a, 1, axis=1)
85 | a *= np.array(k) ** 2
86 | a[0] = 1
87 | c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b
88 |
89 | self.m = nn.ModuleList([nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups)])
90 | self.bn = nn.BatchNorm2d(c2)
91 | self.act = nn.LeakyReLU(0.1, inplace=True)
92 |
93 | def forward(self, x):
94 | return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))
95 |
96 |
97 | class Ensemble(nn.ModuleList):
98 | # Ensemble of models
99 | def __init__(self):
100 | super(Ensemble, self).__init__()
101 |
102 | def forward(self, x, augment=False):
103 | y = []
104 | for module in self:
105 | y.append(module(x, augment)[0])
106 | # y = torch.stack(y).max(0)[0] # max ensemble
107 | # y = torch.cat(y, 1) # nms ensemble
108 | y = torch.stack(y).mean(0) # mean ensemble
109 | return y, None # inference, train output
110 |
111 |
112 | # def attempt_load(weights, map_location=None):
113 | # # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a
114 | # model = Ensemble()
115 | # for w in weights if isinstance(weights, list) else [weights]:
116 | # attempt_download(w)
117 | # model.append(torch.load(w, map_location=map_location)['model'].float().fuse().eval()) # load FP32 model
118 | #
119 | # if len(model) == 1:
120 | # return model[-1] # return model
121 | # else:
122 | # print('Ensemble created with %s\n' % weights)
123 | # for k in ['names', 'stride']:
124 | # setattr(model, k, getattr(model[-1], k))
125 | # return model # return ensemble
126 |
--------------------------------------------------------------------------------
/models/export.py:
--------------------------------------------------------------------------------
1 | """Exports a YOLOv5 *.pt model to ONNX and TorchScript formats
2 |
3 | Usage:
4 | $ export PYTHONPATH="$PWD" && python models/export.py --weights ./weights/yolov5s.pt --img 640 --batch 1
5 | """
6 |
7 | import argparse
8 |
9 | import torch
10 |
11 | from utils.google_utils import attempt_download
12 |
13 | if __name__ == '__main__':
14 | parser = argparse.ArgumentParser()
15 | parser.add_argument('--weights', type=str, default='./yolov5s.pt', help='weights path')
16 | parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size')
17 | parser.add_argument('--batch-size', type=int, default=1, help='batch size')
18 | opt = parser.parse_args()
19 | opt.img_size *= 2 if len(opt.img_size) == 1 else 1 # expand
20 | print(opt)
21 |
22 | # Input
23 | img = torch.zeros((opt.batch_size, 3, *opt.img_size)) # image size(1,3,320,192) iDetection
24 |
25 | # Load PyTorch model
26 | attempt_download(opt.weights)
27 | model = torch.load(opt.weights, map_location=torch.device('cpu'))['model'].float()
28 | model.eval()
29 | model.model[-1].export = True # set Detect() layer export=True
30 | y = model(img) # dry run
31 |
32 | # TorchScript export
33 | try:
34 | print('\nStarting TorchScript export with torch %s...' % torch.__version__)
35 | f = opt.weights.replace('.pt', '.torchscript.pt') # filename
36 | ts = torch.jit.trace(model, img)
37 | ts.save(f)
38 | print('TorchScript export success, saved as %s' % f)
39 | except Exception as e:
40 | print('TorchScript export failure: %s' % e)
41 |
42 | # ONNX export
43 | try:
44 | import onnx
45 |
46 | print('\nStarting ONNX export with onnx %s...' % onnx.__version__)
47 | f = opt.weights.replace('.pt', '.onnx') # filename
48 | model.fuse() # only for ONNX
49 | torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['images'],
50 | output_names=['classes', 'boxes'] if y is None else ['output'])
51 |
52 | # Checks
53 | onnx_model = onnx.load(f) # load onnx model
54 | onnx.checker.check_model(onnx_model) # check onnx model
55 | print(onnx.helper.printable_graph(onnx_model.graph)) # print a human readable model
56 | print('ONNX export success, saved as %s' % f)
57 | except Exception as e:
58 | print('ONNX export failure: %s' % e)
59 |
60 | # CoreML export
61 | try:
62 | import coremltools as ct
63 |
64 | print('\nStarting CoreML export with coremltools %s...' % ct.__version__)
65 | # convert model from torchscript and apply pixel scaling as per detect.py
66 | model = ct.convert(ts, inputs=[ct.ImageType(name='images', shape=img.shape, scale=1 / 255.0, bias=[0, 0, 0])])
67 | f = opt.weights.replace('.pt', '.mlmodel') # filename
68 | model.save(f)
69 | print('CoreML export success, saved as %s' % f)
70 | except Exception as e:
71 | print('CoreML export failure: %s' % e)
72 |
73 | # Finish
74 | print('\nExport complete. Visualize with https://github.com/lutzroeder/netron.')
75 |
--------------------------------------------------------------------------------
/models/hub/yolov3-spp.yaml:
--------------------------------------------------------------------------------
1 | # parameters
2 | nc: 80 # number of classes
3 | depth_multiple: 1.0 # model depth multiple
4 | width_multiple: 1.0 # layer channel multiple
5 |
6 | # anchors
7 | anchors:
8 | - [10,13, 16,30, 33,23] # P3/8
9 | - [30,61, 62,45, 59,119] # P4/16
10 | - [116,90, 156,198, 373,326] # P5/32
11 |
12 | # darknet53 backbone
13 | backbone:
14 | # [from, number, module, args]
15 | [[-1, 1, Conv, [32, 3, 1]], # 0
16 | [-1, 1, Conv, [64, 3, 2]], # 1-P1/2
17 | [-1, 1, Bottleneck, [64]],
18 | [-1, 1, Conv, [128, 3, 2]], # 3-P2/4
19 | [-1, 2, Bottleneck, [128]],
20 | [-1, 1, Conv, [256, 3, 2]], # 5-P3/8
21 | [-1, 8, Bottleneck, [256]],
22 | [-1, 1, Conv, [512, 3, 2]], # 7-P4/16
23 | [-1, 8, Bottleneck, [512]],
24 | [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
25 | [-1, 4, Bottleneck, [1024]], # 10
26 | ]
27 |
28 | # YOLOv3-SPP head
29 | head:
30 | [[-1, 1, Bottleneck, [1024, False]],
31 | [-1, 1, SPP, [512, [5, 9, 13]]],
32 | [-1, 1, Conv, [1024, 3, 1]],
33 | [-1, 1, Conv, [512, 1, 1]],
34 | [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large)
35 |
36 | [-2, 1, Conv, [256, 1, 1]],
37 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
38 | [[-1, 8], 1, Concat, [1]], # cat backbone P4
39 | [-1, 1, Bottleneck, [512, False]],
40 | [-1, 1, Bottleneck, [512, False]],
41 | [-1, 1, Conv, [256, 1, 1]],
42 | [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium)
43 |
44 | [-2, 1, Conv, [128, 1, 1]],
45 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
46 | [[-1, 6], 1, Concat, [1]], # cat backbone P3
47 | [-1, 1, Bottleneck, [256, False]],
48 | [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small)
49 |
50 | [[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
51 | ]
52 |
--------------------------------------------------------------------------------
/models/hub/yolov5-fpn.yaml:
--------------------------------------------------------------------------------
1 | # parameters
2 | nc: 80 # number of classes
3 | depth_multiple: 1.0 # model depth multiple
4 | width_multiple: 1.0 # layer channel multiple
5 |
6 | # anchors
7 | anchors:
8 | - [10,13, 16,30, 33,23] # P3/8
9 | - [30,61, 62,45, 59,119] # P4/16
10 | - [116,90, 156,198, 373,326] # P5/32
11 |
12 | # YOLOv5 backbone
13 | backbone:
14 | # [from, number, module, args]
15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2
16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
17 | [-1, 3, Bottleneck, [128]],
18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
19 | [-1, 9, BottleneckCSP, [256]],
20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
21 | [-1, 9, BottleneckCSP, [512]],
22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
23 | [-1, 1, SPP, [1024, [5, 9, 13]]],
24 | [-1, 6, BottleneckCSP, [1024]], # 9
25 | ]
26 |
27 | # YOLOv5 FPN head
28 | head:
29 | [[-1, 3, BottleneckCSP, [1024, False]], # 10 (P5/32-large)
30 |
31 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
32 | [[-1, 6], 1, Concat, [1]], # cat backbone P4
33 | [-1, 1, Conv, [512, 1, 1]],
34 | [-1, 3, BottleneckCSP, [512, False]], # 14 (P4/16-medium)
35 |
36 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
37 | [[-1, 4], 1, Concat, [1]], # cat backbone P3
38 | [-1, 1, Conv, [256, 1, 1]],
39 | [-1, 3, BottleneckCSP, [256, False]], # 18 (P3/8-small)
40 |
41 | [[18, 14, 10], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
42 | ]
43 |
--------------------------------------------------------------------------------
/models/hub/yolov5-panet.yaml:
--------------------------------------------------------------------------------
1 | # parameters
2 | nc: 80 # number of classes
3 | depth_multiple: 1.0 # model depth multiple
4 | width_multiple: 1.0 # layer channel multiple
5 |
6 | # anchors
7 | anchors:
8 | - [116,90, 156,198, 373,326] # P5/32
9 | - [30,61, 62,45, 59,119] # P4/16
10 | - [10,13, 16,30, 33,23] # P3/8
11 |
12 | # YOLOv5 backbone
13 | backbone:
14 | # [from, number, module, args]
15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2
16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
17 | [-1, 3, BottleneckCSP, [128]],
18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
19 | [-1, 9, BottleneckCSP, [256]],
20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
21 | [-1, 9, BottleneckCSP, [512]],
22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
23 | [-1, 1, SPP, [1024, [5, 9, 13]]],
24 | [-1, 3, BottleneckCSP, [1024, False]], # 9
25 | ]
26 |
27 | # YOLOv5 PANet head
28 | head:
29 | [[-1, 1, Conv, [512, 1, 1]],
30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4
32 | [-1, 3, BottleneckCSP, [512, False]], # 13
33 |
34 | [-1, 1, Conv, [256, 1, 1]],
35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3
37 | [-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small)
38 |
39 | [-1, 1, Conv, [256, 3, 2]],
40 | [[-1, 14], 1, Concat, [1]], # cat head P4
41 | [-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium)
42 |
43 | [-1, 1, Conv, [512, 3, 2]],
44 | [[-1, 10], 1, Concat, [1]], # cat head P5
45 | [-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large)
46 |
47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P5, P4, P3)
48 | ]
49 |
--------------------------------------------------------------------------------
/models/yolo.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import math
3 | from copy import deepcopy
4 | from pathlib import Path
5 |
6 | import torch
7 | import torch.nn as nn
8 |
9 | from models.common import Conv, Bottleneck, SPP, DWConv, Focus, BottleneckCSP, Concat,C3
10 | from models.experimental import MixConv2d, CrossConv
11 | from utils.general import check_anchor_order, make_divisible, check_file
12 | from utils.torch_utils import (
13 | time_synchronized, fuse_conv_and_bn, model_info, initialize_weights, select_device)
14 |
15 |
16 | class Detect(nn.Module):
17 | def __init__(self, nc=80, anchors=(), ch=()): # detection layer
18 | super(Detect, self).__init__()
19 | self.stride = None # strides computed during build
20 | self.nc = nc # number of classes
21 | self.no = nc + 5 # number of outputs per anchor
22 | self.nl = len(anchors) # number of detection layers
23 | self.na = len(anchors[0]) // 2 # number of anchors
24 | self.grid = [torch.zeros(1)] * self.nl # init grid
25 | a = torch.tensor(anchors).float().view(self.nl, -1, 2)
26 | self.register_buffer('anchors', a) # shape(nl,na,2)
27 | self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2)) # shape(nl,1,na,1,1,2)
28 | self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv
29 | self.export = False # onnx export
30 |
31 | def forward(self, x):
32 | # x = x.copy() # for profiling
33 | z = [] # inference output
34 | self.training |= self.export
35 | for i in range(self.nl):
36 | x[i] = self.m[i](x[i]) # conv
37 | bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85)
38 | x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
39 |
40 | if not self.training: # inference
41 | if self.grid[i].shape[2:4] != x[i].shape[2:4]:
42 | self.grid[i] = self._make_grid(nx, ny).to(x[i].device)
43 |
44 | y = x[i].sigmoid()
45 | y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i].to(x[i].device)) * self.stride[i] # xy
46 | y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh
47 | z.append(y.view(bs, -1, self.no))
48 |
49 | return x if self.training else (torch.cat(z, 1), x)
50 |
51 | @staticmethod
52 | def _make_grid(nx=20, ny=20):
53 | yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
54 | return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
55 |
56 |
57 | class Model(nn.Module):
58 | def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None): # model, input channels, number of classes
59 | super(Model, self).__init__()
60 | if isinstance(cfg, dict):
61 | self.yaml = cfg # model dict
62 | else: # is *.yaml
63 | import yaml # for torch hub
64 | self.yaml_file = Path(cfg).name
65 | with open(cfg) as f:
66 | self.yaml = yaml.load(f, Loader=yaml.FullLoader) # model dict
67 |
68 | # Define model
69 | if nc and nc != self.yaml['nc']:
70 | print('Overriding %s nc=%g with nc=%g' % (cfg, self.yaml['nc'], nc))
71 | self.yaml['nc'] = nc # override yaml value
72 | self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist, ch_out
73 | # print([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))])
74 |
75 | # Build strides, anchors
76 | m = self.model[-1] # Detect()
77 | if isinstance(m, Detect):
78 | s = 128 # 2x min stride
79 | m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))]) # forward
80 | m.anchors /= m.stride.view(-1, 1, 1)
81 | check_anchor_order(m)
82 | self.stride = m.stride
83 | self._initialize_biases() # only run once
84 | # print('Strides: %s' % m.stride.tolist())
85 |
86 | # Init weights, biases
87 | initialize_weights(self)
88 | self.info()
89 | print('')
90 |
91 | def forward(self, x, augment=False, profile=False):
92 | if augment:
93 | img_size = x.shape[-2:] # height, width
94 | s = [1, 0.83, 0.67] # scales
95 | f = [None, 3, None] # flips (2-ud, 3-lr)
96 | y = [] # outputs
97 | for si, fi in zip(s, f):
98 | xi = scale_img(x.flip(fi) if fi else x, si)
99 | yi = self.forward_once(xi)[0] # forward
100 | # cv2.imwrite('img%g.jpg' % s, 255 * xi[0].numpy().transpose((1, 2, 0))[:, :, ::-1]) # save
101 | yi[..., :4] /= si # de-scale
102 | if fi == 2:
103 | yi[..., 1] = img_size[0] - yi[..., 1] # de-flip ud
104 | elif fi == 3:
105 | yi[..., 0] = img_size[1] - yi[..., 0] # de-flip lr
106 | y.append(yi)
107 | return torch.cat(y, 1), None # augmented inference, train
108 | else:
109 | return self.forward_once(x, profile) # single-scale inference, train
110 |
111 | def forward_once(self, x, profile=False):
112 | y, dt = [], [] # outputs
113 | for m in self.model:
114 | if m.f != -1: # if not from previous layer
115 | x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers
116 |
117 | if profile:
118 | try:
119 | import thop
120 | o = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2 # FLOPS
121 | except:
122 | o = 0
123 | t = time_synchronized()
124 | for _ in range(10):
125 | _ = m(x)
126 | dt.append((time_synchronized() - t) * 100)
127 | print('%10.1f%10.0f%10.1fms %-40s' % (o, m.np, dt[-1], m.type))
128 |
129 | x = m(x) # run
130 | y.append(x if m.i in self.save else None) # save output
131 |
132 | if profile:
133 | print('%.1fms total' % sum(dt))
134 | return x
135 |
136 | def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is class frequency
137 | # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
138 | m = self.model[-1] # Detect() module
139 | for mi, s in zip(m.m, m.stride): # from
140 | b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85)
141 | b[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image)
142 | b[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum()) # cls
143 | mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
144 |
145 | def _print_biases(self):
146 | m = self.model[-1] # Detect() module
147 | for mi in m.m: # from
148 | b = mi.bias.detach().view(m.na, -1).T # conv.bias(255) to (3,85)
149 | print(('%6g Conv2d.bias:' + '%10.3g' * 6) % (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean()))
150 |
151 | # def _print_weights(self):
152 | # for m in self.model.modules():
153 | # if type(m) is Bottleneck:
154 | # print('%10.3g' % (m.w.detach().sigmoid() * 2)) # shortcut weights
155 |
156 | # def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers
157 | # print('Fusing layers... ', end='')
158 | # for m in self.model.modules():
159 | # if type(m) is Conv:
160 | # m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatability
161 | # m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv
162 | # m.bn = None # remove batchnorm
163 | # m.forward = m.fuseforward # update forward
164 | # self.info()
165 | # return self
166 |
167 | def info(self): # print model information
168 | model_info(self)
169 |
170 |
171 | def parse_model(d, ch): # model_dict, input_channels(3)
172 | print('\n%3s%18s%3s%10s %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments'))
173 | anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']
174 | na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors
175 | no = na * (nc + 5) # number of outputs = anchors * (classes + 5)
176 |
177 | layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out
178 | for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args
179 | m = eval(m) if isinstance(m, str) else m # eval strings
180 | for j, a in enumerate(args):
181 | try:
182 | args[j] = eval(a) if isinstance(a, str) else a # eval strings
183 | except:
184 | pass
185 |
186 | n = max(round(n * gd), 1) if n > 1 else n # depth gain
187 | if m in [nn.Conv2d, Conv, Bottleneck, SPP, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP, C3]:
188 | c1, c2 = ch[f], args[0]
189 |
190 | # Normal
191 | # if i > 0 and args[0] != no: # channel expansion factor
192 | # ex = 1.75 # exponential (default 2.0)
193 | # e = math.log(c2 / ch[1]) / math.log(2)
194 | # c2 = int(ch[1] * ex ** e)
195 | # if m != Focus:
196 |
197 | c2 = make_divisible(c2 * gw, 8) if c2 != no else c2
198 |
199 | # Experimental
200 | # if i > 0 and args[0] != no: # channel expansion factor
201 | # ex = 1 + gw # exponential (default 2.0)
202 | # ch1 = 32 # ch[1]
203 | # e = math.log(c2 / ch1) / math.log(2) # level 1-n
204 | # c2 = int(ch1 * ex ** e)
205 | # if m != Focus:
206 | # c2 = make_divisible(c2, 8) if c2 != no else c2
207 |
208 | args = [c1, c2, *args[1:]]
209 | if m in [BottleneckCSP, C3]:
210 | args.insert(2, n)
211 | n = 1
212 | elif m is nn.BatchNorm2d:
213 | args = [ch[f]]
214 | elif m is Concat:
215 | c2 = sum([ch[-1 if x == -1 else x + 1] for x in f])
216 | elif m is Detect:
217 | args.append([ch[x + 1] for x in f])
218 | if isinstance(args[1], int): # number of anchors
219 | args[1] = [list(range(args[1] * 2))] * len(f)
220 | else:
221 | c2 = ch[f]
222 |
223 | m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args) # module
224 | t = str(m)[8:-2].replace('__main__.', '') # module type
225 | np = sum([x.numel() for x in m_.parameters()]) # number params
226 | m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params
227 | print('%3s%18s%3s%10.0f %-40s%-30s' % (i, f, n, np, t, args)) # print
228 | save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist
229 | layers.append(m_)
230 | ch.append(c2)
231 | return nn.Sequential(*layers), sorted(save)
232 |
233 |
234 | if __name__ == '__main__':
235 | parser = argparse.ArgumentParser()
236 | parser.add_argument('--cfg', type=str, default='yolov5s.yaml', help='model.yaml')
237 | parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
238 | opt = parser.parse_args()
239 | opt.cfg = check_file(opt.cfg) # check file
240 | device = select_device(opt.device)
241 |
242 | # Create model
243 | model = Model(opt.cfg).to(device)
244 | model.train()
245 |
246 | # Profile
247 | # img = torch.rand(8 if torch.cuda.is_available() else 1, 3, 640, 640).to(device)
248 | # y = model(img, profile=True)
249 |
250 | # ONNX export
251 | # model.model[-1].export = True
252 | # torch.onnx.export(model, img, opt.cfg.replace('.yaml', '.onnx'), verbose=True, opset_version=11)
253 |
254 | # Tensorboard
255 | # from torch.utils.tensorboard import SummaryWriter
256 | # tb_writer = SummaryWriter()
257 | # print("Run 'tensorboard --logdir=models/runs' to view tensorboard at http://localhost:6006/")
258 | # tb_writer.add_graph(model.model, img) # add model to tensorboard
259 | # tb_writer.add_image('test', img[0], dataformats='CWH') # add model to tensorboard
260 |
--------------------------------------------------------------------------------
/models/yolov5s_v4.yaml:
--------------------------------------------------------------------------------
1 | # parameters
2 | nc: 80 # number of classes
3 | depth_multiple: 0.33 # model depth multiple
4 | width_multiple: 0.50 # layer channel multiple
5 |
6 | # anchors
7 | anchors:
8 | - [10,13, 16,30, 33,23] # P3/8
9 | - [30,61, 62,45, 59,119] # P4/16
10 | - [116,90, 156,198, 373,326] # P5/32
11 |
12 | # YOLOv5 backbone
13 | backbone:
14 | # [from, number, module, args]
15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2
16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
17 | [-1, 3, C3, [128]],
18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
19 | [-1, 9, C3, [256]],
20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
21 | [-1, 9, C3, [512]],
22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
23 | [-1, 1, SPP, [1024, [5, 9, 13]]],
24 | [-1, 3, C3, [1024, False]], # 9
25 | ]
26 |
27 | # YOLOv5 head
28 | head:
29 | [[-1, 1, Conv, [512, 1, 1]],
30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4
32 | [-1, 3, C3, [512, False]], # 13
33 |
34 | [-1, 1, Conv, [256, 1, 1]],
35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3
37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small)
38 |
39 | [-1, 1, Conv, [256, 3, 2]],
40 | [[-1, 14], 1, Concat, [1]], # cat head P4
41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium)
42 |
43 | [-1, 1, Conv, [512, 3, 2]],
44 | [[-1, 10], 1, Concat, [1]], # cat head P5
45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large)
46 |
47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
48 | ]
49 |
--------------------------------------------------------------------------------
/prune_yolov5s.sh:
--------------------------------------------------------------------------------
1 | python prune_yolov5s.py --cfg cfg/yolov5s_v4_hand.cfg --data data/oxfordhand.data --weights weights/last_v4s.pt --percent 0.8 --img_size 640
--------------------------------------------------------------------------------
/slim_prune_yolov5s_8x.sh:
--------------------------------------------------------------------------------
1 | python slim_prune_yolov5s_8x.py --cfg cfg/yolov5s_v4_hand.cfg --data data/oxfordhand.data --weights weights/last_v4s.pt --global_percent 0.5 --layer_keep 0.01 --img_size 640
--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import json
3 |
4 | from torch.utils.data import DataLoader
5 |
6 | from modelsori import *
7 | from utils.datasets import *
8 | from utils.utils import *
9 | import torchvision
10 |
11 | def box_iouv5(box1, box2):
12 | # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py
13 | """
14 | Return intersection-over-union (Jaccard index) of boxes.
15 | Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
16 | Arguments:
17 | box1 (Tensor[N, 4])
18 | box2 (Tensor[M, 4])
19 | Returns:
20 | iou (Tensor[N, M]): the NxM matrix containing the pairwise
21 | IoU values for every element in boxes1 and boxes2
22 | """
23 |
24 | def box_area(box):
25 | # box = 4xn
26 | return (box[2] - box[0]) * (box[3] - box[1])
27 |
28 | area1 = box_area(box1.T)
29 | area2 = box_area(box2.T)
30 |
31 | # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
32 | inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2)
33 | return inter / (area1[:, None] + area2 - inter) # iou = inter / (area1 + area2 - inter)
34 |
35 | def non_max_suppressionv5(prediction, conf_thres=0.1, iou_thres=0.6, merge=False, classes=None, agnostic=False):
36 | """Performs Non-Maximum Suppression (NMS) on inference results
37 |
38 | Returns:
39 | detections with shape: nx6 (x1, y1, x2, y2, conf, cls)
40 | """
41 | if prediction.dtype is torch.float16:
42 | prediction = prediction.float() # to FP32
43 |
44 | nc = prediction[0].shape[1] - 5 # number of classes
45 | xc = prediction[..., 4] > conf_thres # candidates
46 |
47 | # Settings
48 | min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height
49 | max_det = 300 # maximum number of detections per image
50 | time_limit = 10.0 # seconds to quit after
51 | redundant = True # require redundant detections
52 | multi_label = nc > 1 # multiple labels per box (adds 0.5ms/img)
53 |
54 | t = time.time()
55 | output = [None] * prediction.shape[0]
56 | for xi, x in enumerate(prediction): # image index, image inference
57 | # Apply constraints
58 | # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height
59 | x = x[xc[xi]] # confidence
60 |
61 | # If none remain process next image
62 | if not x.shape[0]:
63 | continue
64 |
65 | # Compute conf
66 | x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf
67 |
68 | # Box (center x, center y, width, height) to (x1, y1, x2, y2)
69 | box = xywh2xyxy(x[:, :4])
70 |
71 | # Detections matrix nx6 (xyxy, conf, cls)
72 | if multi_label:
73 | i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
74 | x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)
75 | else: # best class only
76 | conf, j = x[:, 5:].max(1, keepdim=True)
77 | x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]
78 |
79 | # Filter by class
80 | if classes:
81 | x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
82 |
83 | # Apply finite constraint
84 | # if not torch.isfinite(x).all():
85 | # x = x[torch.isfinite(x).all(1)]
86 |
87 | # If none remain process next image
88 | n = x.shape[0] # number of boxes
89 | if not n:
90 | continue
91 |
92 | # Sort by confidence
93 | # x = x[x[:, 4].argsort(descending=True)]
94 |
95 | # Batched NMS
96 | c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
97 | boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores
98 | i = torchvision.ops.boxes.nms(boxes, scores, iou_thres)
99 | if i.shape[0] > max_det: # limit detections
100 | i = i[:max_det]
101 | if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean)
102 | try: # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
103 | iou = box_iouv5(boxes[i], boxes) > iou_thres # iou matrix
104 | weights = iou * scores[None] # box weights
105 | x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes
106 | if redundant:
107 | i = i[iou.sum(1) > 1] # require redundancy
108 | except: # possible CUDA error https://github.com/ultralytics/yolov3/issues/1139
109 | print(x, i, x.shape, i.shape)
110 | pass
111 |
112 | output[xi] = x[i]
113 | if (time.time() - t) > time_limit:
114 | break # time limit exceeded
115 |
116 | return output
117 |
118 | def test(cfg,
119 | data,
120 | weights=None,
121 | batch_size=16,
122 | img_size=416,
123 | iou_thres=0.5,
124 | conf_thres=0.001,
125 | nms_thres=0.5,
126 | save_json=False,
127 | model=None):
128 |
129 | # Initialize/load model and set device
130 | if model is None:
131 | device = torch_utils.select_device(opt.device)
132 | verbose = True
133 |
134 | # Initialize model
135 | model = Darknet(cfg, img_size).to(device)
136 |
137 | # Load weights
138 | attempt_download(weights)
139 | if weights.endswith('.pt'): # pytorch format
140 | model.load_state_dict(torch.load(weights, map_location=device)['model'])
141 | else: # darknet format
142 | _ = load_darknet_weights(model, weights)
143 |
144 | if torch.cuda.device_count() > 1:
145 | model = nn.DataParallel(model)
146 | else:
147 | device = next(model.parameters()).device # get model device
148 | verbose = False
149 |
150 | # Configure run
151 | data = parse_data_cfg(data)
152 | nc = int(data['classes']) # number of classes
153 | test_path = data['valid'] # path to test images
154 | names = load_classes(data['names']) # class names
155 |
156 | # Dataloader
157 | dataset = LoadImagesAndLabels(test_path, img_size, batch_size)
158 | dataloader = DataLoader(dataset,
159 | batch_size=batch_size,
160 | num_workers=min([os.cpu_count(), batch_size, 16]),
161 | pin_memory=True,
162 | collate_fn=dataset.collate_fn)
163 |
164 | seen = 0
165 | model.eval()
166 | coco91class = 3
167 | s = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', 'mAP', 'F1')
168 | p, r, f1, mp, mr, map, mf1 = 0., 0., 0., 0., 0., 0., 0.
169 | loss = torch.zeros(3)
170 | jdict, stats, ap, ap_class = [], [], [], []
171 | for batch_i, (imgs, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)):
172 | targets = targets.to(device)
173 | imgs = imgs.to(device)
174 | _, _, height, width = imgs.shape # batch size, channels, height, width
175 |
176 | # Plot images with bounding boxes
177 | if batch_i == 0 and not os.path.exists('test_batch0.jpg'):
178 | plot_images(imgs=imgs, targets=targets, paths=paths, fname='test_batch0.jpg')
179 |
180 | # Run model
181 | inf_out, train_out = model(imgs) # inference and training outputs
182 |
183 | # Compute loss
184 | if hasattr(model, 'hyp'): # if model has loss hyperparameters
185 | loss += compute_loss(train_out, targets, model)[1][:3].cpu() # GIoU, obj, cls
186 |
187 | # Run NMS
188 | # output = non_max_suppression(inf_out, conf_thres=conf_thres, nms_thres=nms_thres)
189 | output = non_max_suppressionv5(inf_out,conf_thres=conf_thres, iou_thres=nms_thres, classes=None,agnostic=False)
190 |
191 | # Statistics per image
192 | for si, pred in enumerate(output):
193 | labels = targets[targets[:, 0] == si, 1:]
194 | nl = len(labels)
195 | tcls = labels[:, 0].tolist() if nl else [] # target class
196 | seen += 1
197 |
198 | if pred is None:
199 | if nl:
200 | stats.append(([], torch.Tensor(), torch.Tensor(), tcls))
201 | continue
202 |
203 | # Append to text file
204 | # with open('test.txt', 'a') as file:
205 | # [file.write('%11.5g' * 7 % tuple(x) + '\n') for x in pred]
206 |
207 | # Append to pycocotools JSON dictionary
208 | if save_json:
209 | # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...
210 | image_id = int(Path(paths[si]).stem.split('_')[-1])
211 | box = pred[:, :4].clone() # xyxy
212 | scale_coords(imgs[si].shape[1:], box, shapes[si]) # to original shape
213 | box = xyxy2xywh(box) # xywh
214 | box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner
215 | for di, d in enumerate(pred):
216 | jdict.append({'image_id': image_id,
217 | 'category_id': coco91class[int(d[6])],
218 | 'bbox': [floatn(x, 3) for x in box[di]],
219 | 'score': floatn(d[4], 5)})
220 |
221 | # Clip boxes to image bounds
222 | clip_coords(pred, (height, width))
223 |
224 | # Assign all predictions as incorrect
225 | correct = [0] * len(pred)
226 | if nl:
227 | detected = []
228 | tcls_tensor = labels[:, 0]
229 |
230 | # target boxes
231 | tbox = xywh2xyxy(labels[:, 1:5])
232 | tbox[:, [0, 2]] *= width
233 | tbox[:, [1, 3]] *= height
234 |
235 | # Search for correct predictions
236 | for i, (*pbox, pconf, pcls) in enumerate(pred):
237 | # for i, (*pbox, pconf, pcls_conf, pcls) in enumerate(pred):
238 |
239 | # Break if all targets already located in image
240 | if len(detected) == nl:
241 | break
242 |
243 | # Continue if predicted class not among image classes
244 | if pcls.item() not in tcls:
245 | continue
246 |
247 | # Best iou, index between pred and targets
248 | m = (pcls == tcls_tensor).nonzero().view(-1)
249 | iou, bi = bbox_iou(pbox, tbox[m]).max(0)
250 |
251 | # If iou > threshold and class is correct mark as correct
252 | if iou > iou_thres and m[bi] not in detected: # and pcls == tcls[bi]:
253 | correct[i] = 1
254 | detected.append(m[bi])
255 |
256 | # Append statistics (correct, conf, pcls, tcls)
257 | # stats.append((correct, pred[:, 4].cpu(), pred[:, 6].cpu(), tcls))
258 | stats.append((correct, pred[:, 4].cpu(), pred[:, 5].cpu(), tcls))
259 |
260 | # Compute statistics
261 | stats = [np.concatenate(x, 0) for x in list(zip(*stats))] # to numpy
262 | if len(stats):
263 | p, r, ap, f1, ap_class = ap_per_class(*stats)
264 | mp, mr, map, mf1 = p.mean(), r.mean(), ap.mean(), f1.mean()
265 | nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class
266 | else:
267 | nt = torch.zeros(1)
268 |
269 | # Print results
270 | pf = '%20s' + '%10.3g' * 6 # print format
271 | print(pf % ('all', seen, nt.sum(), mp, mr, map, mf1))
272 |
273 | # Print results per class
274 | if verbose and nc > 1 and len(stats):
275 | for i, c in enumerate(ap_class):
276 | print(pf % (names[c], seen, nt[c], p[i], r[i], ap[i], f1[i]))
277 |
278 | # Save JSON
279 | if save_json and map and len(jdict):
280 | try:
281 | imgIds = [int(Path(x).stem.split('_')[-1]) for x in dataset.img_files]
282 | with open('results.json', 'w') as file:
283 | json.dump(jdict, file)
284 |
285 | from pycocotools.coco import COCO
286 | from pycocotools.cocoeval import COCOeval
287 |
288 | # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
289 | cocoGt = COCO('../coco/annotations/instances_val2014.json') # initialize COCO ground truth api
290 | cocoDt = cocoGt.loadRes('results.json') # initialize COCO pred api
291 |
292 | cocoEval = COCOeval(cocoGt, cocoDt, 'bbox')
293 | cocoEval.params.imgIds = imgIds # [:32] # only evaluate these images
294 | cocoEval.evaluate()
295 | cocoEval.accumulate()
296 | cocoEval.summarize()
297 | map = cocoEval.stats[1] # update mAP to pycocotools mAP
298 | except:
299 | print('WARNING: missing dependency pycocotools from requirements.txt. Can not compute official COCO mAP.')
300 |
301 | # Return results
302 | maps = np.zeros(nc) + map
303 | for i, c in enumerate(ap_class):
304 | maps[c] = ap[i]
305 | return (mp, mr, map, mf1, *(loss / len(dataloader)).tolist()), maps
306 |
307 |
308 | if __name__ == '__main__':
309 | parser = argparse.ArgumentParser(prog='test.py')
310 | parser.add_argument('--cfg', type=str, default='cfg/yolov5s.cfg', help='cfg file path')
311 | parser.add_argument('--data', type=str, default='data/coco.data', help='coco.data file path')
312 | parser.add_argument('--weights', type=str, default='weights/yolov3-spp.weights', help='path to weights file')
313 | parser.add_argument('--batch-size', type=int, default=16, help='size of each image batch')
314 | parser.add_argument('--img-size', type=int, default=416, help='inference size (pixels)')
315 | parser.add_argument('--iou-thres', type=float, default=0.5, help='iou threshold required to qualify as detected')
316 | parser.add_argument('--conf-thres', type=float, default=0.001, help='object confidence threshold')
317 | parser.add_argument('--nms-thres', type=float, default=0.5, help='iou threshold for non-maximum suppression')
318 | parser.add_argument('--save-json', action='store_true', help='save a cocoapi-compatible JSON results file')
319 | parser.add_argument('--device', default='', help='device id (i.e. 0 or 0,1) or cpu')
320 | opt = parser.parse_args()
321 | print(opt)
322 |
323 | with torch.no_grad():
324 | test(opt.cfg,
325 | opt.data,
326 | opt.weights,
327 | opt.batch_size,
328 | opt.img_size,
329 | opt.iou_thres,
330 | opt.conf_thres,
331 | opt.nms_thres,
332 | opt.save_json)
333 |
--------------------------------------------------------------------------------
/test_yolov5s.py:
--------------------------------------------------------------------------------
1 | from modelsori import *
2 | from utils.utils import *
3 | import numpy as np
4 | from copy import deepcopy
5 | from test import test
6 | from terminaltables import AsciiTable
7 | import time
8 | from utils.prune_utils import *
9 | import argparse
10 |
11 | from models.yolo import Model
12 |
13 | import torchvision
14 |
15 | def letterboxv5(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True):
16 | # Resize image to a 32-pixel-multiple rectangle https://github.com/ultralytics/yolov3/issues/232
17 | shape = img.shape[:2] # current shape [height, width]
18 | if isinstance(new_shape, int):
19 | new_shape = (new_shape, new_shape)
20 |
21 | # Scale ratio (new / old)
22 | r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
23 | if not scaleup: # only scale down, do not scale up (for better test mAP)
24 | r = min(r, 1.0)
25 |
26 | # Compute padding
27 | ratio = r, r # width, height ratios
28 | new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
29 | dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
30 | if auto: # minimum rectangle
31 | # dw, dh = np.mod(dw, 64), np.mod(dh, 64) # wh padding
32 | dw, dh = np.mod(dw, 32), np.mod(dh, 32) # wh padding
33 | elif scaleFill: # stretch
34 | dw, dh = 0.0, 0.0
35 | new_unpad = (new_shape[1], new_shape[0])
36 | ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios
37 |
38 | dw /= 2 # divide padding into 2 sides
39 | dh /= 2
40 |
41 | if shape[::-1] != new_unpad: # resize
42 | img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
43 | top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
44 | left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
45 | img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
46 | return img, ratio, (dw, dh)
47 |
48 | def box_iou(box1, box2):
49 | # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py
50 | """
51 | Return intersection-over-union (Jaccard index) of boxes.
52 | Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
53 | Arguments:
54 | box1 (Tensor[N, 4])
55 | box2 (Tensor[M, 4])
56 | Returns:
57 | iou (Tensor[N, M]): the NxM matrix containing the pairwise
58 | IoU values for every element in boxes1 and boxes2
59 | """
60 |
61 | def box_area(box):
62 | # box = 4xn
63 | return (box[2] - box[0]) * (box[3] - box[1])
64 |
65 | area1 = box_area(box1.T)
66 | area2 = box_area(box2.T)
67 |
68 | # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
69 | inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2)
70 | return inter / (area1[:, None] + area2 - inter) # iou = inter / (area1 + area2 - inter)
71 |
72 | def non_max_suppressionv5(prediction, conf_thres=0.1, iou_thres=0.6, merge=False, classes=None, agnostic=False):
73 | """Performs Non-Maximum Suppression (NMS) on inference results
74 |
75 | Returns:
76 | detections with shape: nx6 (x1, y1, x2, y2, conf, cls)
77 | """
78 | if prediction.dtype is torch.float16:
79 | prediction = prediction.float() # to FP32
80 |
81 | nc = prediction[0].shape[1] - 5 # number of classes
82 | xc = prediction[..., 4] > conf_thres # candidates
83 |
84 | # Settings
85 | min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height
86 | max_det = 300 # maximum number of detections per image
87 | time_limit = 10.0 # seconds to quit after
88 | redundant = True # require redundant detections
89 | multi_label = nc > 1 # multiple labels per box (adds 0.5ms/img)
90 |
91 | t = time.time()
92 | output = [None] * prediction.shape[0]
93 | for xi, x in enumerate(prediction): # image index, image inference
94 | # Apply constraints
95 | # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height
96 | x = x[xc[xi]] # confidence
97 |
98 | # If none remain process next image
99 | if not x.shape[0]:
100 | continue
101 |
102 | # Compute conf
103 | x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf
104 |
105 | # Box (center x, center y, width, height) to (x1, y1, x2, y2)
106 | box = xywh2xyxy(x[:, :4])
107 |
108 | # Detections matrix nx6 (xyxy, conf, cls)
109 | if multi_label:
110 | i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
111 | x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)
112 | else: # best class only
113 | conf, j = x[:, 5:].max(1, keepdim=True)
114 | x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]
115 |
116 | # Filter by class
117 | if classes:
118 | x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
119 |
120 | # Apply finite constraint
121 | # if not torch.isfinite(x).all():
122 | # x = x[torch.isfinite(x).all(1)]
123 |
124 | # If none remain process next image
125 | n = x.shape[0] # number of boxes
126 | if not n:
127 | continue
128 |
129 | # Sort by confidence
130 | # x = x[x[:, 4].argsort(descending=True)]
131 |
132 | # Batched NMS
133 | c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
134 | boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores
135 | i = torchvision.ops.boxes.nms(boxes, scores, iou_thres)
136 | if i.shape[0] > max_det: # limit detections
137 | i = i[:max_det]
138 | if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean)
139 | try: # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
140 | iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix
141 | weights = iou * scores[None] # box weights
142 | x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes
143 | if redundant:
144 | i = i[iou.sum(1) > 1] # require redundancy
145 | except: # possible CUDA error https://github.com/ultralytics/yolov3/issues/1139
146 | print(x, i, x.shape, i.shape)
147 | pass
148 |
149 | output[xi] = x[i]
150 | if (time.time() - t) > time_limit:
151 | break # time limit exceeded
152 |
153 | return output
154 |
155 | def plot_one_box(x, img, color=None, label=None, line_thickness=None):
156 | # Plots one bounding box on image img
157 | tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1 # line/font thickness
158 | color = color or [random.randint(0, 255) for _ in range(3)]
159 | c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
160 | cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
161 | if label:
162 | tf = max(tl - 1, 1) # font thickness
163 | t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
164 | c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
165 | cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA) # filled
166 | cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)
167 |
168 | def copy_conv(conv_src,conv_dst):
169 | conv_dst[0] = conv_src.conv
170 | conv_dst[1] = conv_src.bn
171 | conv_dst[2] = conv_src.act
172 |
173 | def copy_weight_v4(modelyolov5,model):
174 | focus = list(modelyolov5.model.children())[0]
175 | copy_conv(focus.conv, model.module_list[1])
176 | conv1 = list(modelyolov5.model.children())[1]
177 | copy_conv(conv1, model.module_list[2])
178 | cspnet1 = list(modelyolov5.model.children())[2]
179 | copy_conv(cspnet1.cv2, model.module_list[3])
180 | copy_conv(cspnet1.cv1, model.module_list[5])
181 | copy_conv(cspnet1.m[0].cv1, model.module_list[6])
182 | copy_conv(cspnet1.m[0].cv2, model.module_list[7])
183 | copy_conv(cspnet1.cv3, model.module_list[10])
184 | conv2 = list(modelyolov5.model.children())[3]
185 | copy_conv(conv2, model.module_list[11])
186 | cspnet2 = list(modelyolov5.model.children())[4]
187 | copy_conv(cspnet2.cv2, model.module_list[12])
188 | copy_conv(cspnet2.cv1, model.module_list[14])
189 | copy_conv(cspnet2.m[0].cv1, model.module_list[15])
190 | copy_conv(cspnet2.m[0].cv2, model.module_list[16])
191 | copy_conv(cspnet2.m[1].cv1, model.module_list[18])
192 | copy_conv(cspnet2.m[1].cv2, model.module_list[19])
193 | copy_conv(cspnet2.m[2].cv1, model.module_list[21])
194 | copy_conv(cspnet2.m[2].cv2, model.module_list[22])
195 | copy_conv(cspnet2.cv3, model.module_list[25])
196 | conv3 = list(modelyolov5.model.children())[5]
197 | copy_conv(conv3, model.module_list[26])
198 | cspnet3 = list(modelyolov5.model.children())[6]
199 | copy_conv(cspnet3.cv2, model.module_list[27])
200 | copy_conv(cspnet3.cv1, model.module_list[29])
201 | copy_conv(cspnet3.m[0].cv1, model.module_list[30])
202 | copy_conv(cspnet3.m[0].cv2, model.module_list[31])
203 | copy_conv(cspnet3.m[1].cv1, model.module_list[33])
204 | copy_conv(cspnet3.m[1].cv2, model.module_list[34])
205 | copy_conv(cspnet3.m[2].cv1, model.module_list[36])
206 | copy_conv(cspnet3.m[2].cv2, model.module_list[37])
207 | copy_conv(cspnet3.cv3, model.module_list[40])
208 | conv4 = list(modelyolov5.model.children())[7]
209 | copy_conv(conv4, model.module_list[41])
210 | spp = list(modelyolov5.model.children())[8]
211 | copy_conv(spp.cv1, model.module_list[42])
212 | model.module_list[43] = spp.m[0]
213 | model.module_list[45] = spp.m[1]
214 | model.module_list[47] = spp.m[2]
215 | copy_conv(spp.cv2, model.module_list[49])
216 | cspnet4 = list(modelyolov5.model.children())[9]
217 | copy_conv(cspnet4.cv2, model.module_list[50])
218 | copy_conv(cspnet4.cv1, model.module_list[52])
219 | copy_conv(cspnet4.m[0].cv1, model.module_list[53])
220 | copy_conv(cspnet4.m[0].cv2, model.module_list[54])
221 | copy_conv(cspnet4.cv3, model.module_list[56])
222 | conv5 = list(modelyolov5.model.children())[10]
223 | copy_conv(conv5, model.module_list[57])
224 | upsample1 = list(modelyolov5.model.children())[11]
225 | model.module_list[58] = upsample1
226 | cspnet5 = list(modelyolov5.model.children())[13]
227 | copy_conv(cspnet5.cv2, model.module_list[60])
228 | copy_conv(cspnet5.cv1, model.module_list[62])
229 | copy_conv(cspnet5.m[0].cv1, model.module_list[63])
230 | copy_conv(cspnet5.m[0].cv2, model.module_list[64])
231 | copy_conv(cspnet5.cv3, model.module_list[66])
232 | conv6 = list(modelyolov5.model.children())[14]
233 | copy_conv(conv6, model.module_list[67])
234 | upsample2 = list(modelyolov5.model.children())[15]
235 | model.module_list[68] = upsample2
236 | cspnet6 = list(modelyolov5.model.children())[17]
237 | copy_conv(cspnet6.cv2, model.module_list[70])
238 | copy_conv(cspnet6.cv1, model.module_list[72])
239 | copy_conv(cspnet6.m[0].cv1, model.module_list[73])
240 | copy_conv(cspnet6.m[0].cv2, model.module_list[74])
241 | copy_conv(cspnet6.cv3, model.module_list[76])
242 | conv7 = list(modelyolov5.model.children())[18]
243 | copy_conv(conv7, model.module_list[80])
244 | cspnet7 = list(modelyolov5.model.children())[20]
245 | copy_conv(cspnet7.cv2, model.module_list[82])
246 | copy_conv(cspnet7.cv1, model.module_list[84])
247 | copy_conv(cspnet7.m[0].cv1, model.module_list[85])
248 | copy_conv(cspnet7.m[0].cv2, model.module_list[86])
249 | copy_conv(cspnet7.cv3, model.module_list[88])
250 | conv8 = list(modelyolov5.model.children())[21]
251 | copy_conv(conv8, model.module_list[92])
252 | cspnet8 = list(modelyolov5.model.children())[23]
253 | copy_conv(cspnet8.cv2, model.module_list[94])
254 | copy_conv(cspnet8.cv1, model.module_list[96])
255 | copy_conv(cspnet8.m[0].cv1, model.module_list[97])
256 | copy_conv(cspnet8.m[0].cv2, model.module_list[98])
257 | copy_conv(cspnet8.cv3, model.module_list[100])
258 | detect = list(modelyolov5.model.children())[24]
259 | model.module_list[77][0] = detect.m[0]
260 | model.module_list[89][0] = detect.m[1]
261 | model.module_list[101][0] = detect.m[2]
262 |
263 | def initialize_weights(model):
264 | for m in model.modules():
265 | t = type(m)
266 | if t is nn.Conv2d:
267 | pass # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
268 | elif t is nn.BatchNorm2d:
269 | m.eps = 1e-3
270 | m.momentum = 0.03
271 | elif t in [nn.LeakyReLU, nn.ReLU, nn.ReLU6]:
272 | m.inplace = True
273 |
274 | if __name__ == '__main__':
275 | parser = argparse.ArgumentParser()
276 | parser.add_argument('--cfg', type=str, default='cfg/yolov5s_v4.cfg', help='cfg file path')
277 | parser.add_argument('--data', type=str, default='data/coco.data', help='*.data file path')
278 | parser.add_argument('--weights', type=str, default='weights/yolov5s_v4.pt', help='sparse model weights')
279 | parser.add_argument('--img_size', type=int, default=416, help='inference size (pixels)')
280 | opt = parser.parse_args()
281 | print(opt)
282 |
283 | img_size = opt.img_size
284 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
285 |
286 | #the way of loading yolov5s
287 | # ckpt = torch.load(opt.weights, map_location=device) # load checkpoint
288 | # modelyolov5 = Model('models/yolov5s_v4.yaml', nc=80).to(device)
289 | # exclude = ['anchor'] # exclude keys
290 | # ckpt['model'] = {k: v for k, v in ckpt['model'].float().state_dict().items()
291 | # if k in modelyolov5.state_dict() and not any(x in k for x in exclude)
292 | # and modelyolov5.state_dict()[k].shape == v.shape}
293 | # modelyolov5.load_state_dict(ckpt['model'], strict=False)
294 |
295 | #another way of loading yolov5s
296 | modelyolov5=torch.load(opt.weights, map_location=device)['model'].float().eval()
297 | modelyolov5.model[24].export = False # onnx export
298 |
299 | # model=modelyolov5
300 |
301 | #load yolov5s from cfg
302 | model = Darknet(opt.cfg, (img_size, img_size)).to(device)
303 | copy_weight_v4(modelyolov5,model)
304 |
305 | path='data/samples/bus.jpg'
306 | img0 = cv2.imread(path) # BGR
307 | # Padded resize
308 | img = letterboxv5(img0, new_shape=416)[0]
309 |
310 | # Convert
311 | img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
312 | img = np.ascontiguousarray(img)
313 | img = torch.from_numpy(img).to(device)
314 | img = img.float()
315 | img /= 255.0 # 0 - 255 to 0.0 - 1.0
316 | if img.ndimension() == 3:
317 | img = img.unsqueeze(0)
318 |
319 | # modelyolov5.eval()
320 |
321 |
322 | model.eval()
323 | pred = model(img)[0]
324 |
325 | pred = non_max_suppressionv5(pred, 0.4, 0.5, classes=None,
326 | agnostic=False)
327 | # Process detections
328 | for i, det in enumerate(pred): # detections per image
329 | if det is not None and len(det):
330 | # Rescale boxes from img_size to im0 size
331 | det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img0.shape).round()
332 |
333 | # Write results
334 | for *xyxy, conf, cls in det:
335 | label = '%s %.2f' % (str(int(cls)), conf)
336 | plot_one_box(xyxy, img0, label=label, color=[random.randint(0, 255) for _ in range(3)], line_thickness=3)
337 | cv2.imwrite("v5_cfg.jpg", img0)
338 |
339 | modelyolov5.eval()
340 | pred = modelyolov5(img)[0]
341 |
342 | pred = non_max_suppressionv5(pred, 0.4, 0.5, classes=None,
343 | agnostic=False)
344 | # Process detections
345 | for i, det in enumerate(pred): # detections per image
346 | if det is not None and len(det):
347 | # Rescale boxes from img_size to im0 size
348 | det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img0.shape).round()
349 |
350 | # Write results
351 | for *xyxy, conf, cls in det:
352 | label = '%s %.2f' % (str(int(cls)), conf)
353 | plot_one_box(xyxy, img0, label=label, color=[random.randint(0, 255) for _ in range(3)],
354 | line_thickness=3)
355 | cv2.imwrite("v5.jpg", img0)
356 |
--------------------------------------------------------------------------------
/tk1_time.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BokyLiu/YoloV5sl_V4_prune/c0ff39c5a5b10cbca95beb597c722cdc02e81885/tk1_time.xls
--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | #
--------------------------------------------------------------------------------
/utils/adabound.py:
--------------------------------------------------------------------------------
1 | import math
2 |
3 | import torch
4 | from torch.optim import Optimizer
5 |
6 |
7 | class AdaBound(Optimizer):
8 | """Implements AdaBound algorithm.
9 | It has been proposed in `Adaptive Gradient Methods with Dynamic Bound of Learning Rate`_.
10 | Arguments:
11 | params (iterable): iterable of parameters to optimize or dicts defining
12 | parameter groups
13 | lr (float, optional): Adam learning rate (default: 1e-3)
14 | betas (Tuple[float, float], optional): coefficients used for computing
15 | running averages of gradient and its square (default: (0.9, 0.999))
16 | final_lr (float, optional): final (SGD) learning rate (default: 0.1)
17 | gamma (float, optional): convergence speed of the bound functions (default: 1e-3)
18 | eps (float, optional): term added to the denominator to improve
19 | numerical stability (default: 1e-8)
20 | weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
21 | amsbound (boolean, optional): whether to use the AMSBound variant of this algorithm
22 | .. Adaptive Gradient Methods with Dynamic Bound of Learning Rate:
23 | https://openreview.net/forum?id=Bkg3g2R9FX
24 | """
25 |
26 | def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), final_lr=0.1, gamma=1e-3,
27 | eps=1e-8, weight_decay=0, amsbound=False):
28 | if not 0.0 <= lr:
29 | raise ValueError("Invalid learning rate: {}".format(lr))
30 | if not 0.0 <= eps:
31 | raise ValueError("Invalid epsilon value: {}".format(eps))
32 | if not 0.0 <= betas[0] < 1.0:
33 | raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0]))
34 | if not 0.0 <= betas[1] < 1.0:
35 | raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1]))
36 | if not 0.0 <= final_lr:
37 | raise ValueError("Invalid final learning rate: {}".format(final_lr))
38 | if not 0.0 <= gamma < 1.0:
39 | raise ValueError("Invalid gamma parameter: {}".format(gamma))
40 | defaults = dict(lr=lr, betas=betas, final_lr=final_lr, gamma=gamma, eps=eps,
41 | weight_decay=weight_decay, amsbound=amsbound)
42 | super(AdaBound, self).__init__(params, defaults)
43 |
44 | self.base_lrs = list(map(lambda group: group['lr'], self.param_groups))
45 |
46 | def __setstate__(self, state):
47 | super(AdaBound, self).__setstate__(state)
48 | for group in self.param_groups:
49 | group.setdefault('amsbound', False)
50 |
51 | def step(self, closure=None):
52 | """Performs a single optimization step.
53 | Arguments:
54 | closure (callable, optional): A closure that reevaluates the model
55 | and returns the loss.
56 | """
57 | loss = None
58 | if closure is not None:
59 | loss = closure()
60 |
61 | for group, base_lr in zip(self.param_groups, self.base_lrs):
62 | for p in group['params']:
63 | if p.grad is None:
64 | continue
65 | grad = p.grad.data
66 | if grad.is_sparse:
67 | raise RuntimeError(
68 | 'Adam does not support sparse gradients, please consider SparseAdam instead')
69 | amsbound = group['amsbound']
70 |
71 | state = self.state[p]
72 |
73 | # State initialization
74 | if len(state) == 0:
75 | state['step'] = 0
76 | # Exponential moving average of gradient values
77 | state['exp_avg'] = torch.zeros_like(p.data)
78 | # Exponential moving average of squared gradient values
79 | state['exp_avg_sq'] = torch.zeros_like(p.data)
80 | if amsbound:
81 | # Maintains max of all exp. moving avg. of sq. grad. values
82 | state['max_exp_avg_sq'] = torch.zeros_like(p.data)
83 |
84 | exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
85 | if amsbound:
86 | max_exp_avg_sq = state['max_exp_avg_sq']
87 | beta1, beta2 = group['betas']
88 |
89 | state['step'] += 1
90 |
91 | if group['weight_decay'] != 0:
92 | grad = grad.add(group['weight_decay'], p.data)
93 |
94 | # Decay the first and second moment running average coefficient
95 | exp_avg.mul_(beta1).add_(1 - beta1, grad)
96 | exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
97 | if amsbound:
98 | # Maintains the maximum of all 2nd moment running avg. till now
99 | torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq)
100 | # Use the max. for normalizing running avg. of gradient
101 | denom = max_exp_avg_sq.sqrt().add_(group['eps'])
102 | else:
103 | denom = exp_avg_sq.sqrt().add_(group['eps'])
104 |
105 | bias_correction1 = 1 - beta1 ** state['step']
106 | bias_correction2 = 1 - beta2 ** state['step']
107 | step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1
108 |
109 | # Applies bounds on actual learning rate
110 | # lr_scheduler cannot affect final_lr, this is a workaround to apply lr decay
111 | final_lr = group['final_lr'] * group['lr'] / base_lr
112 | lower_bound = final_lr * (1 - 1 / (group['gamma'] * state['step'] + 1))
113 | upper_bound = final_lr * (1 + 1 / (group['gamma'] * state['step']))
114 | step_size = torch.full_like(denom, step_size)
115 | step_size.div_(denom).clamp_(lower_bound, upper_bound).mul_(exp_avg)
116 |
117 | p.data.add_(-step_size)
118 |
119 | return loss
120 |
121 |
122 | class AdaBoundW(Optimizer):
123 | """Implements AdaBound algorithm with Decoupled Weight Decay (arxiv.org/abs/1711.05101)
124 | It has been proposed in `Adaptive Gradient Methods with Dynamic Bound of Learning Rate`_.
125 | Arguments:
126 | params (iterable): iterable of parameters to optimize or dicts defining
127 | parameter groups
128 | lr (float, optional): Adam learning rate (default: 1e-3)
129 | betas (Tuple[float, float], optional): coefficients used for computing
130 | running averages of gradient and its square (default: (0.9, 0.999))
131 | final_lr (float, optional): final (SGD) learning rate (default: 0.1)
132 | gamma (float, optional): convergence speed of the bound functions (default: 1e-3)
133 | eps (float, optional): term added to the denominator to improve
134 | numerical stability (default: 1e-8)
135 | weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
136 | amsbound (boolean, optional): whether to use the AMSBound variant of this algorithm
137 | .. Adaptive Gradient Methods with Dynamic Bound of Learning Rate:
138 | https://openreview.net/forum?id=Bkg3g2R9FX
139 | """
140 |
141 | def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), final_lr=0.1, gamma=1e-3,
142 | eps=1e-8, weight_decay=0, amsbound=False):
143 | if not 0.0 <= lr:
144 | raise ValueError("Invalid learning rate: {}".format(lr))
145 | if not 0.0 <= eps:
146 | raise ValueError("Invalid epsilon value: {}".format(eps))
147 | if not 0.0 <= betas[0] < 1.0:
148 | raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0]))
149 | if not 0.0 <= betas[1] < 1.0:
150 | raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1]))
151 | if not 0.0 <= final_lr:
152 | raise ValueError("Invalid final learning rate: {}".format(final_lr))
153 | if not 0.0 <= gamma < 1.0:
154 | raise ValueError("Invalid gamma parameter: {}".format(gamma))
155 | defaults = dict(lr=lr, betas=betas, final_lr=final_lr, gamma=gamma, eps=eps,
156 | weight_decay=weight_decay, amsbound=amsbound)
157 | super(AdaBoundW, self).__init__(params, defaults)
158 |
159 | self.base_lrs = list(map(lambda group: group['lr'], self.param_groups))
160 |
161 | def __setstate__(self, state):
162 | super(AdaBoundW, self).__setstate__(state)
163 | for group in self.param_groups:
164 | group.setdefault('amsbound', False)
165 |
166 | def step(self, closure=None):
167 | """Performs a single optimization step.
168 | Arguments:
169 | closure (callable, optional): A closure that reevaluates the model
170 | and returns the loss.
171 | """
172 | loss = None
173 | if closure is not None:
174 | loss = closure()
175 |
176 | for group, base_lr in zip(self.param_groups, self.base_lrs):
177 | for p in group['params']:
178 | if p.grad is None:
179 | continue
180 | grad = p.grad.data
181 | if grad.is_sparse:
182 | raise RuntimeError(
183 | 'Adam does not support sparse gradients, please consider SparseAdam instead')
184 | amsbound = group['amsbound']
185 |
186 | state = self.state[p]
187 |
188 | # State initialization
189 | if len(state) == 0:
190 | state['step'] = 0
191 | # Exponential moving average of gradient values
192 | state['exp_avg'] = torch.zeros_like(p.data)
193 | # Exponential moving average of squared gradient values
194 | state['exp_avg_sq'] = torch.zeros_like(p.data)
195 | if amsbound:
196 | # Maintains max of all exp. moving avg. of sq. grad. values
197 | state['max_exp_avg_sq'] = torch.zeros_like(p.data)
198 |
199 | exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
200 | if amsbound:
201 | max_exp_avg_sq = state['max_exp_avg_sq']
202 | beta1, beta2 = group['betas']
203 |
204 | state['step'] += 1
205 |
206 | # Decay the first and second moment running average coefficient
207 | exp_avg.mul_(beta1).add_(1 - beta1, grad)
208 | exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
209 | if amsbound:
210 | # Maintains the maximum of all 2nd moment running avg. till now
211 | torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq)
212 | # Use the max. for normalizing running avg. of gradient
213 | denom = max_exp_avg_sq.sqrt().add_(group['eps'])
214 | else:
215 | denom = exp_avg_sq.sqrt().add_(group['eps'])
216 |
217 | bias_correction1 = 1 - beta1 ** state['step']
218 | bias_correction2 = 1 - beta2 ** state['step']
219 | step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1
220 |
221 | # Applies bounds on actual learning rate
222 | # lr_scheduler cannot affect final_lr, this is a workaround to apply lr decay
223 | final_lr = group['final_lr'] * group['lr'] / base_lr
224 | lower_bound = final_lr * (1 - 1 / (group['gamma'] * state['step'] + 1))
225 | upper_bound = final_lr * (1 + 1 / (group['gamma'] * state['step']))
226 | step_size = torch.full_like(denom, step_size)
227 | step_size.div_(denom).clamp_(lower_bound, upper_bound).mul_(exp_avg)
228 |
229 | if group['weight_decay'] != 0:
230 | decayed_weights = torch.mul(p.data, group['weight_decay'])
231 | p.data.add_(-step_size)
232 | p.data.sub_(decayed_weights)
233 | else:
234 | p.data.add_(-step_size)
235 |
236 | return loss
237 |
--------------------------------------------------------------------------------
/utils/gcp.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # New VM
4 | rm -rf sample_data yolov3 darknet apex coco cocoapi knife knifec
5 | git clone https://github.com/ultralytics/yolov3
6 | # git clone https://github.com/AlexeyAB/darknet && cd darknet && make GPU=1 CUDNN=1 CUDNN_HALF=1 OPENCV=0 && wget -c https://pjreddie.com/media/files/darknet53.conv.74 && cd ..
7 | git clone https://github.com/NVIDIA/apex && cd apex && pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" . --user && cd .. && rm -rf apex
8 | # git clone https://github.com/cocodataset/cocoapi && cd cocoapi/PythonAPI && make && cd ../.. && cp -r cocoapi/PythonAPI/pycocotools yolov3
9 | sudo conda install -y -c conda-forge scikit-image tensorboard pycocotools
10 | python3 -c "
11 | from yolov3.utils.google_utils import gdrive_download
12 | gdrive_download('1HaXkef9z6y5l4vUnCYgdmEAj61c6bfWO','coco.zip')"
13 | sudo shutdown
14 |
15 | # Re-clone
16 | rm -rf yolov3 # Warning: remove existing
17 | git clone https://github.com/ultralytics/yolov3 && cd yolov3 # master
18 | # git clone -b test --depth 1 https://github.com/ultralytics/yolov3 test # branch
19 | python3 train.py --img-size 320 --weights weights/darknet53.conv.74 --epochs 27 --batch-size 64 --accumulate 1
20 |
21 | # Train
22 | python3 train.py
23 |
24 | # Resume
25 | python3 train.py --resume
26 |
27 | # Detect
28 | python3 detect.py
29 |
30 | # Test
31 | python3 test.py --save-json
32 |
33 | # Evolve
34 | for i in {0..500}
35 | do
36 | python3 train.py --data data/coco.data --img-size 320 --epochs 1 --batch-size 64 --accumulate 1 --evolve --bucket yolov4
37 | done
38 |
39 | # Git pull
40 | git pull https://github.com/ultralytics/yolov3 # master
41 | git pull https://github.com/ultralytics/yolov3 test # branch
42 |
43 | # Test Darknet training
44 | python3 test.py --weights ../darknet/backup/yolov3.backup
45 |
46 | # Copy last.pt TO bucket
47 | gsutil cp yolov3/weights/last1gpu.pt gs://ultralytics
48 |
49 | # Copy last.pt FROM bucket
50 | gsutil cp gs://ultralytics/last.pt yolov3/weights/last.pt
51 | wget https://storage.googleapis.com/ultralytics/yolov3/last_v1_0.pt -O weights/last_v1_0.pt
52 | wget https://storage.googleapis.com/ultralytics/yolov3/best_v1_0.pt -O weights/best_v1_0.pt
53 |
54 | # Reproduce tutorials
55 | rm results*.txt # WARNING: removes existing results
56 | python3 train.py --nosave --data data/coco_1img.data && mv results.txt results0r_1img.txt
57 | python3 train.py --nosave --data data/coco_10img.data && mv results.txt results0r_10img.txt
58 | python3 train.py --nosave --data data/coco_100img.data && mv results.txt results0r_100img.txt
59 | # python3 train.py --nosave --data data/coco_100img.data --transfer && mv results.txt results3_100imgTL.txt
60 | python3 -c "from utils import utils; utils.plot_results()"
61 | # gsutil cp results*.txt gs://ultralytics
62 | gsutil cp results.png gs://ultralytics
63 | sudo shutdown
64 |
65 | # Reproduce mAP
66 | python3 test.py --save-json --img-size 608
67 | python3 test.py --save-json --img-size 416
68 | python3 test.py --save-json --img-size 320
69 | sudo shutdown
70 |
71 | # Benchmark script
72 | git clone https://github.com/ultralytics/yolov3 # clone our repo
73 | git clone https://github.com/NVIDIA/apex && cd apex && pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" . --user && cd .. && rm -rf apex # install nvidia apex
74 | python3 -c "from yolov3.utils.google_utils import gdrive_download; gdrive_download('1HaXkef9z6y5l4vUnCYgdmEAj61c6bfWO','coco.zip')" # download coco dataset (20GB)
75 | cd yolov3 && clear && python3 train.py --epochs 1 # run benchmark (~30 min)
76 |
77 | # Unit tests
78 | python3 detect.py # detect 2 persons, 1 tie
79 | python3 test.py --data data/coco_32img.data # test mAP = 0.8
80 | python3 train.py --data data/coco_32img.data --epochs 5 --nosave # train 5 epochs
81 | python3 train.py --data data/coco_1cls.data --epochs 5 --nosave # train 5 epochs
82 | python3 train.py --data data/coco_1img.data --epochs 5 --nosave # train 5 epochs
83 |
84 | # AlexyAB Darknet
85 | gsutil cp -r gs://sm6/supermarket2 . # dataset from bucket
86 | rm -rf darknet && git clone https://github.com/AlexeyAB/darknet && cd darknet && wget -c https://pjreddie.com/media/files/darknet53.conv.74 # sudo apt install libopencv-dev && make
87 | ./darknet detector calc_anchors data/coco_img64.data -num_of_clusters 9 -width 320 -height 320 # kmeans anchor calculation
88 | ./darknet detector train ../supermarket2/supermarket2.data ../yolo_v3_spp_pan_scale.cfg darknet53.conv.74 -map -dont_show # train spp
89 | ./darknet detector train ../yolov3/data/coco.data ../yolov3-spp.cfg darknet53.conv.74 -map -dont_show # train spp coco
90 |
91 | ./darknet detector train data/coco.data ../yolov3-spp.cfg darknet53.conv.74 -map -dont_show # train spp
92 | gsutil cp -r backup/*5000.weights gs://sm6/weights
93 | sudo shutdown
94 |
95 |
96 | ./darknet detector train ../supermarket2/supermarket2.data ../yolov3-tiny-sm2-1cls.cfg yolov3-tiny.conv.15 -map -dont_show # train tiny
97 | ./darknet detector train ../supermarket2/supermarket2.data cfg/yolov3-spp-sm2-1cls.cfg backup/yolov3-spp-sm2-1cls_last.weights # resume
98 | python3 train.py --data ../supermarket2/supermarket2.data --cfg ../yolov3-spp-sm2-1cls.cfg --epochs 100 --num-workers 8 --img-size 320 --nosave # train ultralytics
99 | python3 test.py --data ../supermarket2/supermarket2.data --weights ../darknet/backup/yolov3-spp-sm2-1cls_5000.weights --cfg cfg/yolov3-spp-sm2-1cls.cfg # test
100 | gsutil cp -r backup/*.weights gs://sm6/weights # weights to bucket
101 |
102 | python3 test.py --data ../supermarket2/supermarket2.data --weights weights/yolov3-spp-sm2-1cls_5000.weights --cfg ../yolov3-spp-sm2-1cls.cfg --img-size 320 --conf-thres 0.2 # test
103 | python3 test.py --data ../supermarket2/supermarket2.data --weights weights/yolov3-spp-sm2-1cls-scalexy_125_5000.weights --cfg ../yolov3-spp-sm2-1cls-scalexy_125.cfg --img-size 320 --conf-thres 0.2 # test
104 | python3 test.py --data ../supermarket2/supermarket2.data --weights weights/yolov3-spp-sm2-1cls-scalexy_150_5000.weights --cfg ../yolov3-spp-sm2-1cls-scalexy_150.cfg --img-size 320 --conf-thres 0.2 # test
105 | python3 test.py --data ../supermarket2/supermarket2.data --weights weights/yolov3-spp-sm2-1cls-scalexy_200_5000.weights --cfg ../yolov3-spp-sm2-1cls-scalexy_200.cfg --img-size 320 --conf-thres 0.2 # test
106 | python3 test.py --data ../supermarket2/supermarket2.data --weights ../darknet/backup/yolov3-spp-sm2-1cls-scalexy_variable_5000.weights --cfg ../yolov3-spp-sm2-1cls-scalexy_variable.cfg --img-size 320 --conf-thres 0.2 # test
107 |
108 | python3 train.py --img-size 320 --epochs 27 --batch-size 64 --accumulate 1 --nosave --notest && python3 test.py --weights weights/last.pt --img-size 320 --save-json && sudo shutdown
109 |
110 | # Debug/Development
111 | python3 train.py --data data/coco.data --img-size 320 --single-scale --batch-size 64 --accumulate 1 --epochs 1 --evolve --giou
112 | python3 test.py --weights weights/last.pt --cfg cfg/yolov3-spp.cfg --img-size 320
113 |
114 | gsutil cp evolve.txt gs://ultralytics
115 | sudo shutdown
116 |
117 | #Docker
118 | sudo docker kill $(sudo docker ps -q)
119 | sudo docker pull ultralytics/yolov3:v1
120 | sudo nvidia-docker run -it --ipc=host --mount type=bind,source="$(pwd)"/coco,target=/usr/src/coco ultralytics/yolov3:v1
121 |
122 | clear
123 | while true
124 | do
125 | python3 train.py --data data/coco.data --img-size 320 --batch-size 64 --accumulate 1 --evolve --epochs 1 --adam --bucket yolov4/adamdefaultpw_coco_1e --device 1
126 | done
127 |
128 | python3 train.py --data data/coco.data --img-size 320 --batch-size 64 --accumulate 1 --epochs 1 --adam --device 1 --prebias
129 | while true; do python3 train.py --data data/coco.data --img-size 320 --batch-size 64 --accumulate 1 --evolve --epochs 1 --adam --bucket yolov4/adamdefaultpw_coco_1e; done
130 |
--------------------------------------------------------------------------------
/utils/google_utils.py:
--------------------------------------------------------------------------------
1 | # This file contains google utils: https://cloud.google.com/storage/docs/reference/libraries
2 | # pip install --upgrade google-cloud-storage
3 |
4 | import os
5 | import time
6 |
7 |
8 | # from google.cloud import storage
9 |
10 |
11 | def gdrive_download(id='1HaXkef9z6y5l4vUnCYgdmEAj61c6bfWO', name='coco.zip'):
12 | # https://gist.github.com/tanaikech/f0f2d122e05bf5f971611258c22c110f
13 | # Downloads a file from Google Drive, accepting presented query
14 | # from utils.google_utils import *; gdrive_download()
15 | t = time.time()
16 |
17 | print('Downloading https://drive.google.com/uc?export=download&id=%s as %s... ' % (id, name), end='')
18 | if os.path.exists(name): # remove existing
19 | os.remove(name)
20 |
21 | # Attempt large file download
22 | s = ["curl -c ./cookie -s -L \"https://drive.google.com/uc?export=download&id=%s\" > /dev/null" % id,
23 | "curl -Lb ./cookie -s \"https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=%s\" -o %s" % (
24 | id, name),
25 | 'rm ./cookie']
26 | [os.system(x) for x in s] # run commands
27 |
28 | # Attempt small file download
29 | if not os.path.exists(name): # file size < 40MB
30 | s = 'curl -f -L -o %s https://drive.google.com/uc?export=download&id=%s' % (name, id)
31 | os.system(s)
32 |
33 | # Unzip if archive
34 | if name.endswith('.zip'):
35 | print('unzipping... ', end='')
36 | os.system('unzip -q %s' % name) # unzip
37 | os.remove(name) # remove zip to free space
38 |
39 | print('Done (%.1fs)' % (time.time() - t))
40 |
41 |
42 | def upload_blob(bucket_name, source_file_name, destination_blob_name):
43 | # Uploads a file to a bucket
44 | # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python
45 |
46 | storage_client = storage.Client()
47 | bucket = storage_client.get_bucket(bucket_name)
48 | blob = bucket.blob(destination_blob_name)
49 |
50 | blob.upload_from_filename(source_file_name)
51 |
52 | print('File {} uploaded to {}.'.format(
53 | source_file_name,
54 | destination_blob_name))
55 |
56 |
57 | def download_blob(bucket_name, source_blob_name, destination_file_name):
58 | # Uploads a blob from a bucket
59 | storage_client = storage.Client()
60 | bucket = storage_client.get_bucket(bucket_name)
61 | blob = bucket.blob(source_blob_name)
62 |
63 | blob.download_to_filename(destination_file_name)
64 |
65 | print('Blob {} downloaded to {}.'.format(
66 | source_blob_name,
67 | destination_file_name))
68 |
--------------------------------------------------------------------------------
/utils/parse_config.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 |
4 | def parse_model_cfg(path):
5 | # Parses the yolo-v3 layer configuration file and returns module definitions
6 | file = open(path, 'r')
7 | lines = file.read().split('\n')
8 | lines = [x for x in lines if x and not x.startswith('#')]
9 | lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces
10 | mdefs = [] # module definitions
11 | for line in lines:
12 | if line.startswith('['): # This marks the start of a new block
13 | mdefs.append({})
14 | mdefs[-1]['type'] = line[1:-1].rstrip()
15 | if mdefs[-1]['type'] == 'convolutional':
16 | mdefs[-1]['batch_normalize'] = 0 # pre-populate with zeros (may be overwritten later)
17 | else:
18 | key, val = line.split("=")
19 | key = key.rstrip()
20 |
21 | if 'anchors' in key:
22 | mdefs[-1][key] = np.array([float(x) for x in val.split(',')]).reshape((-1, 2)) # np anchors
23 | else:
24 | mdefs[-1][key] = val.strip()
25 |
26 | return mdefs
27 |
28 |
29 | def parse_data_cfg(path):
30 | # Parses the data configuration file
31 | options = dict()
32 | with open(path, 'r') as fp:
33 | lines = fp.readlines()
34 |
35 | for line in lines:
36 | line = line.strip()
37 | if line == '' or line.startswith('#'):
38 | continue
39 | key, val = line.split('=')
40 | options[key.strip()] = val.strip()
41 |
42 | return options
43 |
44 |
--------------------------------------------------------------------------------
/utils/torch_utils.py:
--------------------------------------------------------------------------------
1 | import os
2 | import time
3 |
4 | import torch
5 | import torch.nn as nn
6 |
7 |
8 | def init_seeds(seed=0):
9 | torch.manual_seed(seed)
10 | torch.cuda.manual_seed(seed)
11 | torch.cuda.manual_seed_all(seed)
12 |
13 | # Remove randomness (may be slower on Tesla GPUs) # https://pytorch.org/docs/stable/notes/randomness.html
14 | if seed == 0:
15 | torch.backends.cudnn.deterministic = True
16 | torch.backends.cudnn.benchmark = False
17 |
18 |
19 | def select_device(device='', apex=False):
20 | # device = 'cpu' or '0' or '0,1,2,3'
21 | cpu_request = device.lower() == 'cpu'
22 | if device and not cpu_request: # if device requested other than 'cpu'
23 | os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable
24 | assert torch.cuda.is_available(), 'CUDA unavailable, invalid device %s requested' % device # check availablity
25 |
26 | cuda = False if cpu_request else torch.cuda.is_available()
27 | if cuda:
28 | c = 1024 ** 2 # bytes to MB
29 | ng = torch.cuda.device_count()
30 | x = [torch.cuda.get_device_properties(i) for i in range(ng)]
31 | cuda_str = 'Using CUDA ' + ('Apex ' if apex else '') # apex for mixed precision https://github.com/NVIDIA/apex
32 | for i in range(0, ng):
33 | if i == 1:
34 | cuda_str = ' ' * len(cuda_str)
35 | print("%sdevice%g _CudaDeviceProperties(name='%s', total_memory=%dMB)" %
36 | (cuda_str, i, x[i].name, x[i].total_memory / c))
37 | else:
38 | print('Using CPU')
39 |
40 | print('') # skip a line
41 | return torch.device('cuda:0' if cuda else 'cpu')
42 |
43 | def time_synchronized():
44 | torch.cuda.synchronize() if torch.cuda.is_available() else None
45 | return time.time()
46 |
47 |
48 | def is_parallel(model):
49 | # is model is parallel with DP or DDP
50 | return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel)
51 |
52 | def initialize_weights(model):
53 | for m in model.modules():
54 | t = type(m)
55 | if t is nn.Conv2d:
56 | pass # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
57 | elif t is nn.BatchNorm2d:
58 | m.eps = 1e-3
59 | m.momentum = 0.03
60 | elif t in [nn.LeakyReLU, nn.ReLU, nn.ReLU6]:
61 | m.inplace = True
62 |
63 | def fuse_conv_and_bn(conv, bn):
64 | # https://tehnokv.com/posts/fusing-batchnorm-and-conv/
65 | with torch.no_grad():
66 | # init
67 | fusedconv = torch.nn.Conv2d(conv.in_channels,
68 | conv.out_channels,
69 | kernel_size=conv.kernel_size,
70 | stride=conv.stride,
71 | padding=conv.padding,
72 | bias=True)
73 |
74 | # prepare filters
75 | w_conv = conv.weight.clone().view(conv.out_channels, -1)
76 | w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
77 | fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size()))
78 |
79 | # prepare spatial bias
80 | if conv.bias is not None:
81 | b_conv = conv.bias
82 | else:
83 | b_conv = torch.zeros(conv.weight.size(0))
84 | b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps))
85 | fusedconv.bias.copy_(b_conv + b_bn)
86 |
87 | return fusedconv
88 |
89 |
90 | def model_info(model, report='summary'):
91 | # Plots a line-by-line description of a PyTorch model
92 | n_p = sum(x.numel() for x in model.parameters()) # number parameters
93 | n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients
94 | if report is 'full':
95 | print('%5s %40s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma'))
96 | for i, (name, p) in enumerate(model.named_parameters()):
97 | name = name.replace('module_list.', '')
98 | print('%5g %40s %9s %12g %20s %10.3g %10.3g' %
99 | (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std()))
100 | print('Model Summary: %g layers, %g parameters, %g gradients' % (len(list(model.parameters())), n_p, n_g))
101 |
--------------------------------------------------------------------------------