├── .gitignore ├── LICENSE ├── README.md ├── cfg ├── 0514 │ ├── prune_0.7_keep_0.05_8x_yolov5l_v4.cfg │ ├── prune_0.8_keep_0.01_8x_yolov5l_v4.cfg │ ├── prune_0.8_keep_0.05_8x_yolov5l_v4.cfg │ ├── yolov5s_v4.cfg │ └── yolov5s_v4_hand.cfg ├── last_prune │ ├── prune_0.3_keep_0.05_8x_yolov5l_v4.cfg │ └── prune_0.8_keep_0.01_8x_yolov5l_v4.cfg ├── prune_0.7_keep_0.1_8x_yolov5l_v4.cfg ├── prune_0.8_keep_0.01_8x_yolov5l_v4.cfg └── yolov5l_v4.cfg ├── data ├── coco.data ├── coco.names ├── coco_128img.data ├── coco_128img.txt ├── get_coco_dataset.sh └── get_coco_dataset_gdrive.sh ├── models ├── __init__.py ├── common.py ├── experimental.py ├── export.py ├── hub │ ├── yolov3-spp.yaml │ ├── yolov5-fpn.yaml │ └── yolov5-panet.yaml ├── yolo.py └── yolov5s_v4.yaml ├── modelsori.py ├── prune_yolov5s.py ├── prune_yolov5s.sh ├── shortcut_prune_yolov5s.py ├── slim_prune_yolov5l_8x.py ├── slim_prune_yolov5s.py ├── slim_prune_yolov5s_8x.py ├── slim_prune_yolov5s_8x.sh ├── test.py ├── test_yolov5s.py ├── tk1_time.xls └── utils ├── __init__.py ├── adabound.py ├── datasets.py ├── gcp.sh ├── general.py ├── google_utils.py ├── parse_config.py ├── prune_utils.py ├── torch_utils.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Repo-specific GitIgnore ---------------------------------------------------------------------------------------------- 2 | *.jpg 3 | *.jpeg 4 | *.png 5 | *.bmp 6 | *.tif 7 | *.tiff 8 | *.heic 9 | *.JPG 10 | *.JPEG 11 | *.PNG 12 | *.BMP 13 | *.TIF 14 | *.TIFF 15 | *.HEIC 16 | *.mp4 17 | *.mov 18 | *.MOV 19 | *.avi 20 | *.data 21 | *.json 22 | 23 | #data 24 | data/coco128 25 | data/hand_dataset 26 | 27 | 28 | # *.cfg 29 | !cfg/yolov3*.cfg 30 | 31 | storage.googleapis.com 32 | runs/* 33 | data/* 34 | !data/samples/zidane.jpg 35 | !data/samples/bus.jpg 36 | !data/coco.names 37 | !data/coco_paper.names 38 | !data/coco.data 39 | !data/coco_*.data 40 | !data/coco_*.txt 41 | !data/trainvalno5k.shapes 42 | !data/*.sh 43 | 44 | pycocotools/* 45 | results*.txt 46 | gcp_test*.sh 47 | 48 | # MATLAB GitIgnore ----------------------------------------------------------------------------------------------------- 49 | *.m~ 50 | *.mat 51 | !targets*.mat 52 | 53 | # Neural Network weights ----------------------------------------------------------------------------------------------- 54 | *.weights 55 | *.pt 56 | *.onnx 57 | *.mlmodel 58 | *.torchscript 59 | darknet53.conv.74 60 | yolov3-tiny.conv.15 61 | 62 | # GitHub Python GitIgnore ---------------------------------------------------------------------------------------------- 63 | # Byte-compiled / optimized / DLL files 64 | __pycache__/ 65 | *.py[cod] 66 | *$py.class 67 | 68 | # C extensions 69 | *.so 70 | 71 | # Distribution / packaging 72 | .Python 73 | env/ 74 | build/ 75 | develop-eggs/ 76 | dist/ 77 | downloads/ 78 | eggs/ 79 | .eggs/ 80 | lib/ 81 | lib64/ 82 | parts/ 83 | sdist/ 84 | var/ 85 | wheels/ 86 | *.egg-info/ 87 | .installed.cfg 88 | *.egg 89 | 90 | # PyInstaller 91 | # Usually these files are written by a python script from a template 92 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 93 | *.manifest 94 | *.spec 95 | 96 | # Installer logs 97 | pip-log.txt 98 | pip-delete-this-directory.txt 99 | 100 | # Unit test / coverage reports 101 | htmlcov/ 102 | .tox/ 103 | .coverage 104 | .coverage.* 105 | .cache 106 | nosetests.xml 107 | coverage.xml 108 | *.cover 109 | .hypothesis/ 110 | 111 | # Translations 112 | *.mo 113 | *.pot 114 | 115 | # Django stuff: 116 | *.log 117 | local_settings.py 118 | 119 | # Flask stuff: 120 | instance/ 121 | .webassets-cache 122 | 123 | # Scrapy stuff: 124 | .scrapy 125 | 126 | # Sphinx documentation 127 | docs/_build/ 128 | 129 | # PyBuilder 130 | target/ 131 | 132 | # Jupyter Notebook 133 | .ipynb_checkpoints 134 | 135 | # pyenv 136 | .python-version 137 | 138 | # celery beat schedule file 139 | celerybeat-schedule 140 | 141 | # SageMath parsed files 142 | *.sage.py 143 | 144 | # dotenv 145 | .env 146 | 147 | # virtualenv 148 | .venv 149 | venv/ 150 | ENV/ 151 | 152 | # Spyder project settings 153 | .spyderproject 154 | .spyproject 155 | 156 | # Rope project settings 157 | .ropeproject 158 | 159 | # mkdocs documentation 160 | /site 161 | 162 | # mypy 163 | .mypy_cache/ 164 | 165 | 166 | # https://github.com/github/gitignore/blob/master/Global/macOS.gitignore ----------------------------------------------- 167 | 168 | # General 169 | .DS_Store 170 | .AppleDouble 171 | .LSOverride 172 | 173 | # Icon must end with two \r 174 | Icon 175 | Icon? 176 | 177 | # Thumbnails 178 | ._* 179 | 180 | # Files that might appear in the root of a volume 181 | .DocumentRevisions-V100 182 | .fseventsd 183 | .Spotlight-V100 184 | .TemporaryItems 185 | .Trashes 186 | .VolumeIcon.icns 187 | .com.apple.timemachine.donotpresent 188 | 189 | # Directories potentially created on remote AFP share 190 | .AppleDB 191 | .AppleDesktop 192 | Network Trash Folder 193 | Temporary Items 194 | .apdisk 195 | 196 | 197 | # https://github.com/github/gitignore/blob/master/Global/JetBrains.gitignore 198 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm 199 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 200 | 201 | # User-specific stuff: 202 | .idea/* 203 | .idea/**/workspace.xml 204 | .idea/**/tasks.xml 205 | .idea/dictionaries 206 | .html # Bokeh Plots 207 | .pg # TensorFlow Frozen Graphs 208 | .avi # videos 209 | 210 | # Sensitive or high-churn files: 211 | .idea/**/dataSources/ 212 | .idea/**/dataSources.ids 213 | .idea/**/dataSources.local.xml 214 | .idea/**/sqlDataSources.xml 215 | .idea/**/dynamic.xml 216 | .idea/**/uiDesigner.xml 217 | 218 | # Gradle: 219 | .idea/**/gradle.xml 220 | .idea/**/libraries 221 | 222 | # CMake 223 | cmake-build-debug/ 224 | cmake-build-release/ 225 | 226 | # Mongo Explorer plugin: 227 | .idea/**/mongoSettings.xml 228 | 229 | ## File-based project format: 230 | *.iws 231 | 232 | ## Plugin-specific files: 233 | 234 | # IntelliJ 235 | out/ 236 | 237 | # mpeltonen/sbt-idea plugin 238 | .idea_modules/ 239 | 240 | # JIRA plugin 241 | atlassian-ide-plugin.xml 242 | 243 | # Cursive Clojure plugin 244 | .idea/replstate.xml 245 | 246 | # Crashlytics plugin (for Android Studio and IntelliJ) 247 | com_crashlytics_export_strings.xml 248 | crashlytics.properties 249 | crashlytics-build.properties 250 | fabric.properties 251 | 252 | tensorboard/ 253 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # yolov5_prune 2 | 本项目作为 [ZJU-lishuang](https://github.com/ZJU-lishuang)/**[yolov5_prune](https://github.com/ZJU-lishuang/yolov5_prune)**的补充,实现了l模型的剪枝。其余过程都可参考lishuang的repo。 3 | 4 | 本项目基于[tanluren/yolov3-channel-and-layer-pruning](https://github.com/tanluren/yolov3-channel-and-layer-pruning)实现,将项目扩展到yolov5上。 5 | 6 | 项目的基本流程是,使用[ultralytics/yolov5](https://github.com/ultralytics/yolov5)训练自己的数据集,在模型性能达到要求但速度未达到要求时,对模型进行剪枝。首先是稀疏化训练,稀疏化训练很重要,如果模型稀疏度不够,剪枝比例过大会导致剪枝后的模型map接近0。剪枝完成后对模型进行微调回复精度。 7 | 8 | 本项目使用的yolov5为第四版本。 9 | yolov5第三版本参考[yolov5-v3-prune](https://github.com/ZJU-lishuang/yolov5_prune/tree/v3) 10 | yolov5第二版本参考[yolov5-v2-prune](https://github.com/ZJU-lishuang/yolov5_prune/tree/v2) 11 | 12 | TODO: 增加m,l,x的模型剪枝,如果有时间的话。>-< 13 | 14 | PS:在开源数据集和不能开源的数据集上模型均剪枝成功。 15 | 16 | ## 实例流程 17 | 数据集下载[dataset](http://www.robots.ox.ac.uk/~vgg/data/hands/downloads/hand_dataset.tar.gz)
18 | ### STEP1:基础训练 19 | 附件:[训练记录](https://drive.google.com/drive/folders/1v0HZYBhU6d4M2hvEfjia76wYbQlaFz_f?usp=sharing)
20 | ### STEP2:稀疏训练 21 | 附件:[稀疏训练记录](https://drive.google.com/drive/folders/1tJaeSOzQlyrx1l22hhop8G3ZuKshm8rp?usp=sharing)
22 | ### STEP3:八倍通道剪枝 23 | 附件:[剪枝后模型](https://drive.google.com/drive/folders/1V5nA6oGXX43bagpO3cJIFpI0zjAOzt0p?usp=sharing)
24 | ### STEP4:微调finetune 25 | 附件:[微调训练记录](https://drive.google.com/drive/folders/1vT_pN_XlMBniF9YXaPj2KeCNZitxYFLA?usp=sharing)
26 | ### STEP4:微调finetune,使用蒸馏技术优化模型,效果由于单纯的微调模型 27 | 附件:[微调蒸馏训练记录](https://drive.google.com/drive/folders/1T3SGh0FjyjxDckFcKVxpxQHF2XzZ-gfN?usp=sharing)
28 | 29 | ## 剪枝步骤 30 | #### STEP1:基础训练 31 | [yolov5](https://github.com/ZJU-lishuang/yolov5-v4)
32 | 示例代码
33 | ``` 34 | python train.py --img 640 --batch 8 --epochs 50 --weights weights/yolov5s_v4.pt --data data/coco_hand.yaml --cfg models/yolov5s.yaml --name s_hand 35 | ``` 36 | 37 | #### STEP2:稀疏训练 38 | --prune 0 适用于通道剪枝策略一,--prune 1 适用于其他剪枝策略。
39 | [yolov5](https://github.com/ZJU-lishuang/yolov5-v4)
40 | 示例代码
41 | ``` 42 | python train_sparsity.py --img 640 --batch 8 --epochs 50 --data data/coco_hand.yaml --cfg models/yolov5s.yaml --weights runs/train/s_hand/weights/last.pt --name s_hand_sparsity -sr --s 0.001 --prune 1 43 | ``` 44 | 45 | #### STEP3:通道剪枝策略一 46 | 不对shortcut直连的层进行剪枝,避免维度处理。
47 | ``` 48 | python prune_yolov5s.py --cfg cfg/yolov5s.cfg --data data/fangweisui.data --weights weights/yolov5s_prune0.pt --percent 0.8 49 | ``` 50 | 51 | #### STEP3:通道剪枝策略二 52 | 对shortcut层也进行了剪枝,剪枝采用每组shortcut中第一个卷积层的mask。
53 | ``` 54 | python shortcut_prune_yolov5s.py --cfg cfg/yolov5s.cfg --data data/fangweisui.data --weights weights/yolov5s_prune1.pt --percent 0.3 55 | ``` 56 | 57 | #### STEP3:通道剪枝策略三 58 | 先以全局阈值找出各卷积层的mask,然后对于每组shortcut,它将相连的各卷积层的剪枝mask取并集,用merge后的mask进行剪枝。
59 | ``` 60 | python slim_prune_yolov5s.py --cfg cfg/yolov5s.cfg --data data/fangweisui.data --weights weights/yolov5s_prune1.pt --global_percent 0.8 --layer_keep 0.01 61 | ``` 62 | 63 | #### STEP3:八倍通道剪枝 64 | 在硬件部署上发现,模型剪枝率相同时,通道数为8的倍数速度最快。(采坑:需要将硬件性能开启到最大)
65 | 示例代码
66 | ``` 67 | python slim_prune_yolov5s_8x.py --cfg cfg/yolov5s_v4_hand.cfg --data data/oxfordhand.data --weights weights/last_v4s.pt --global_percent 0.5 --layer_keep 0.01 --img_size 640 68 | ``` 69 | 70 | #### STEP4:微调finetune 71 | [yolov5](https://github.com/ZJU-lishuang/yolov5-v4)
72 | 示例代码
73 | ``` 74 | python prune_finetune.py --img 640 --batch 8 --epochs 50 --data data/coco_hand.yaml --cfg ./cfg/prune_0.5_keep_0.01_8x_yolov5s_v4_hand.cfg --weights ./weights/prune_0.5_keep_0.01_8x_last_v4s.pt --name s_hand_finetune 75 | ``` 76 | 77 | #### STEP4:微调finetune,使用蒸馏技术优化模型 78 | [yolov5](https://github.com/ZJU-lishuang/yolov5-v4)
79 | 示例代码
80 | ``` 81 | python prune_finetune.py --img 640 --batch 8 --epochs 50 --data data/coco_hand.yaml --cfg ./cfg/prune_0.5_keep_0.01_8x_yolov5s_v4_hand.cfg --weights ./weights/prune_0.5_keep_0.01_8x_last_v4s.pt --name s_hand_finetune_distill --distill 82 | ``` 83 | 84 | #### STEP5:剪枝后模型推理 85 | [yolov5](https://github.com/ZJU-lishuang/yolov5-v4)
86 | 示例代码
87 | ```shell 88 | python prune_detect.py --weights weights/last_s_hand_finetune.pt --img 640 --conf 0.7 --save-txt --source inference/images 89 | ``` 90 | 91 | 92 | -------------------------------------------------------------------------------- /cfg/0514/prune_0.7_keep_0.05_8x_yolov5l_v4.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=8 4 | width=416 5 | height=416 6 | channels=3 7 | momentum=0.949 8 | decay=0.0005 9 | angle=0 10 | saturation=1.5 11 | exposure=1.5 12 | hue=.1 13 | learning_rate=0.00261 14 | burn_in=1000 15 | max_batches=500500 16 | policy=steps 17 | steps=400000,450000 18 | scales=.1,.1 19 | mosaic=1 20 | 21 | [focus] 22 | filters=12 23 | 24 | [convolutional] 25 | batch_normalize=1 26 | filters=56 27 | size=3 28 | stride=1 29 | pad=1 30 | activation=leaky 31 | 32 | [convolutional] 33 | batch_normalize=1 34 | filters=128 35 | size=3 36 | stride=2 37 | pad=1 38 | activation=leaky 39 | 40 | [convolutional] 41 | batch_normalize=1 42 | filters=64 43 | size=1 44 | stride=1 45 | pad=1 46 | activation=leaky 47 | 48 | [route] 49 | layers=-2 50 | 51 | [convolutional] 52 | batch_normalize=1 53 | filters=64 54 | size=1 55 | stride=1 56 | pad=1 57 | activation=leaky 58 | 59 | [convolutional] 60 | batch_normalize=1 61 | filters=64 62 | size=1 63 | stride=1 64 | pad=1 65 | activation=leaky 66 | 67 | [convolutional] 68 | batch_normalize=1 69 | filters=64 70 | size=3 71 | stride=1 72 | pad=1 73 | activation=leaky 74 | 75 | [shortcut] 76 | from=-3 77 | activation=linear 78 | 79 | [convolutional] 80 | batch_normalize=1 81 | filters=64 82 | size=1 83 | stride=1 84 | pad=1 85 | activation=leaky 86 | 87 | [convolutional] 88 | batch_normalize=1 89 | filters=64 90 | size=3 91 | stride=1 92 | pad=1 93 | activation=leaky 94 | 95 | [shortcut] 96 | from=-3 97 | activation=linear 98 | 99 | [convolutional] 100 | batch_normalize=1 101 | filters=64 102 | size=1 103 | stride=1 104 | pad=1 105 | activation=leaky 106 | 107 | [convolutional] 108 | batch_normalize=1 109 | filters=64 110 | size=3 111 | stride=1 112 | pad=1 113 | activation=leaky 114 | 115 | [shortcut] 116 | from=-3 117 | activation=linear 118 | 119 | [route] 120 | layers=-1,-12 121 | 122 | [convolutional] 123 | batch_normalize=1 124 | filters=128 125 | size=1 126 | stride=1 127 | pad=1 128 | activation=leaky 129 | 130 | [convolutional] 131 | batch_normalize=1 132 | filters=256 133 | size=3 134 | stride=2 135 | pad=1 136 | activation=leaky 137 | 138 | [convolutional] 139 | batch_normalize=1 140 | filters=128 141 | size=1 142 | stride=1 143 | pad=1 144 | activation=leaky 145 | 146 | [route] 147 | layers=-2 148 | 149 | [convolutional] 150 | batch_normalize=1 151 | filters=128 152 | size=1 153 | stride=1 154 | pad=1 155 | activation=leaky 156 | 157 | [convolutional] 158 | batch_normalize=1 159 | filters=72 160 | size=1 161 | stride=1 162 | pad=1 163 | activation=leaky 164 | 165 | [convolutional] 166 | batch_normalize=1 167 | filters=128 168 | size=3 169 | stride=1 170 | pad=1 171 | activation=leaky 172 | 173 | [shortcut] 174 | from=-3 175 | activation=linear 176 | 177 | [convolutional] 178 | batch_normalize=1 179 | filters=64 180 | size=1 181 | stride=1 182 | pad=1 183 | activation=leaky 184 | 185 | [convolutional] 186 | batch_normalize=1 187 | filters=128 188 | size=3 189 | stride=1 190 | pad=1 191 | activation=leaky 192 | 193 | [shortcut] 194 | from=-3 195 | activation=linear 196 | 197 | [convolutional] 198 | batch_normalize=1 199 | filters=48 200 | size=1 201 | stride=1 202 | pad=1 203 | activation=leaky 204 | 205 | [convolutional] 206 | batch_normalize=1 207 | filters=128 208 | size=3 209 | stride=1 210 | pad=1 211 | activation=leaky 212 | 213 | [shortcut] 214 | from=-3 215 | activation=linear 216 | 217 | [convolutional] 218 | batch_normalize=1 219 | filters=40 220 | size=1 221 | stride=1 222 | pad=1 223 | activation=leaky 224 | 225 | [convolutional] 226 | batch_normalize=1 227 | filters=128 228 | size=3 229 | stride=1 230 | pad=1 231 | activation=leaky 232 | 233 | [shortcut] 234 | from=-3 235 | activation=linear 236 | 237 | [convolutional] 238 | batch_normalize=1 239 | filters=64 240 | size=1 241 | stride=1 242 | pad=1 243 | activation=leaky 244 | 245 | [convolutional] 246 | batch_normalize=1 247 | filters=128 248 | size=3 249 | stride=1 250 | pad=1 251 | activation=leaky 252 | 253 | [shortcut] 254 | from=-3 255 | activation=linear 256 | 257 | [convolutional] 258 | batch_normalize=1 259 | filters=48 260 | size=1 261 | stride=1 262 | pad=1 263 | activation=leaky 264 | 265 | [convolutional] 266 | batch_normalize=1 267 | filters=128 268 | size=3 269 | stride=1 270 | pad=1 271 | activation=leaky 272 | 273 | [shortcut] 274 | from=-3 275 | activation=linear 276 | 277 | [convolutional] 278 | batch_normalize=1 279 | filters=80 280 | size=1 281 | stride=1 282 | pad=1 283 | activation=leaky 284 | 285 | [convolutional] 286 | batch_normalize=1 287 | filters=128 288 | size=3 289 | stride=1 290 | pad=1 291 | activation=leaky 292 | 293 | [shortcut] 294 | from=-3 295 | activation=linear 296 | 297 | [convolutional] 298 | batch_normalize=1 299 | filters=88 300 | size=1 301 | stride=1 302 | pad=1 303 | activation=leaky 304 | 305 | [convolutional] 306 | batch_normalize=1 307 | filters=128 308 | size=3 309 | stride=1 310 | pad=1 311 | activation=leaky 312 | 313 | [shortcut] 314 | from=-3 315 | activation=linear 316 | 317 | [convolutional] 318 | batch_normalize=1 319 | filters=72 320 | size=1 321 | stride=1 322 | pad=1 323 | activation=leaky 324 | 325 | [convolutional] 326 | batch_normalize=1 327 | filters=128 328 | size=3 329 | stride=1 330 | pad=1 331 | activation=leaky 332 | 333 | [shortcut] 334 | from=-3 335 | activation=linear 336 | 337 | [route] 338 | layers=-1,-30 339 | 340 | [convolutional] 341 | batch_normalize=1 342 | filters=256 343 | size=1 344 | stride=1 345 | pad=1 346 | activation=leaky 347 | 348 | [convolutional] 349 | batch_normalize=1 350 | filters=328 351 | size=3 352 | stride=2 353 | pad=1 354 | activation=leaky 355 | 356 | [convolutional] 357 | batch_normalize=1 358 | filters=232 359 | size=1 360 | stride=1 361 | pad=1 362 | activation=leaky 363 | 364 | [route] 365 | layers=-2 366 | 367 | [convolutional] 368 | batch_normalize=1 369 | filters=254 370 | size=1 371 | stride=1 372 | pad=1 373 | activation=leaky 374 | 375 | [convolutional] 376 | batch_normalize=1 377 | filters=80 378 | size=1 379 | stride=1 380 | pad=1 381 | activation=leaky 382 | 383 | [convolutional] 384 | batch_normalize=1 385 | filters=254 386 | size=3 387 | stride=1 388 | pad=1 389 | activation=leaky 390 | 391 | [shortcut] 392 | from=-3 393 | activation=linear 394 | 395 | [convolutional] 396 | batch_normalize=1 397 | filters=16 398 | size=1 399 | stride=1 400 | pad=1 401 | activation=leaky 402 | 403 | [convolutional] 404 | batch_normalize=1 405 | filters=254 406 | size=3 407 | stride=1 408 | pad=1 409 | activation=leaky 410 | 411 | [shortcut] 412 | from=-3 413 | activation=linear 414 | 415 | [convolutional] 416 | batch_normalize=1 417 | filters=24 418 | size=1 419 | stride=1 420 | pad=1 421 | activation=leaky 422 | 423 | [convolutional] 424 | batch_normalize=1 425 | filters=254 426 | size=3 427 | stride=1 428 | pad=1 429 | activation=leaky 430 | 431 | [shortcut] 432 | from=-3 433 | activation=linear 434 | 435 | [convolutional] 436 | batch_normalize=1 437 | filters=32 438 | size=1 439 | stride=1 440 | pad=1 441 | activation=leaky 442 | 443 | [convolutional] 444 | batch_normalize=1 445 | filters=254 446 | size=3 447 | stride=1 448 | pad=1 449 | activation=leaky 450 | 451 | [shortcut] 452 | from=-3 453 | activation=linear 454 | 455 | [convolutional] 456 | batch_normalize=1 457 | filters=32 458 | size=1 459 | stride=1 460 | pad=1 461 | activation=leaky 462 | 463 | [convolutional] 464 | batch_normalize=1 465 | filters=254 466 | size=3 467 | stride=1 468 | pad=1 469 | activation=leaky 470 | 471 | [shortcut] 472 | from=-3 473 | activation=linear 474 | 475 | [convolutional] 476 | batch_normalize=1 477 | filters=40 478 | size=1 479 | stride=1 480 | pad=1 481 | activation=leaky 482 | 483 | [convolutional] 484 | batch_normalize=1 485 | filters=254 486 | size=3 487 | stride=1 488 | pad=1 489 | activation=leaky 490 | 491 | [shortcut] 492 | from=-3 493 | activation=linear 494 | 495 | [convolutional] 496 | batch_normalize=1 497 | filters=24 498 | size=1 499 | stride=1 500 | pad=1 501 | activation=leaky 502 | 503 | [convolutional] 504 | batch_normalize=1 505 | filters=254 506 | size=3 507 | stride=1 508 | pad=1 509 | activation=leaky 510 | 511 | [shortcut] 512 | from=-3 513 | activation=linear 514 | 515 | [convolutional] 516 | batch_normalize=1 517 | filters=32 518 | size=1 519 | stride=1 520 | pad=1 521 | activation=leaky 522 | 523 | [convolutional] 524 | batch_normalize=1 525 | filters=254 526 | size=3 527 | stride=1 528 | pad=1 529 | activation=leaky 530 | 531 | [shortcut] 532 | from=-3 533 | activation=linear 534 | 535 | [convolutional] 536 | batch_normalize=1 537 | filters=32 538 | size=1 539 | stride=1 540 | pad=1 541 | activation=leaky 542 | 543 | [convolutional] 544 | batch_normalize=1 545 | filters=254 546 | size=3 547 | stride=1 548 | pad=1 549 | activation=leaky 550 | 551 | [shortcut] 552 | from=-3 553 | activation=linear 554 | 555 | [route] 556 | layers=-1,-30 557 | 558 | [convolutional] 559 | batch_normalize=1 560 | filters=224 561 | size=1 562 | stride=1 563 | pad=1 564 | activation=leaky 565 | 566 | [convolutional] 567 | batch_normalize=1 568 | filters=128 569 | size=3 570 | stride=2 571 | pad=1 572 | activation=leaky 573 | 574 | [convolutional] 575 | batch_normalize=1 576 | filters=512 577 | size=1 578 | stride=1 579 | pad=1 580 | activation=leaky 581 | 582 | [maxpool] 583 | stride=1 584 | size=5 585 | 586 | [route] 587 | layers=-2 588 | 589 | [maxpool] 590 | stride=1 591 | size=9 592 | 593 | [route] 594 | layers=-4 595 | 596 | [maxpool] 597 | stride=1 598 | size=13 599 | 600 | [route] 601 | layers=-6,-5,-3,-1 602 | 603 | [convolutional] 604 | batch_normalize=1 605 | filters=112 606 | size=1 607 | stride=1 608 | pad=1 609 | activation=leaky 610 | 611 | [convolutional] 612 | batch_normalize=1 613 | filters=56 614 | size=1 615 | stride=1 616 | pad=1 617 | activation=leaky 618 | 619 | [route] 620 | layers=-2 621 | 622 | [convolutional] 623 | batch_normalize=1 624 | filters=40 625 | size=1 626 | stride=1 627 | pad=1 628 | activation=leaky 629 | 630 | [convolutional] 631 | batch_normalize=1 632 | filters=40 633 | size=1 634 | stride=1 635 | pad=1 636 | activation=leaky 637 | 638 | [convolutional] 639 | batch_normalize=1 640 | filters=40 641 | size=3 642 | stride=1 643 | pad=1 644 | activation=leaky 645 | 646 | [convolutional] 647 | batch_normalize=1 648 | filters=32 649 | size=1 650 | stride=1 651 | pad=1 652 | activation=leaky 653 | 654 | [convolutional] 655 | batch_normalize=1 656 | filters=48 657 | size=3 658 | stride=1 659 | pad=1 660 | activation=leaky 661 | 662 | [convolutional] 663 | batch_normalize=1 664 | filters=32 665 | size=1 666 | stride=1 667 | pad=1 668 | activation=leaky 669 | 670 | [convolutional] 671 | batch_normalize=1 672 | filters=40 673 | size=3 674 | stride=1 675 | pad=1 676 | activation=leaky 677 | 678 | [route] 679 | layers=-1,-9 680 | 681 | [convolutional] 682 | batch_normalize=1 683 | filters=64 684 | size=1 685 | stride=1 686 | pad=1 687 | activation=leaky 688 | 689 | [convolutional] 690 | batch_normalize=1 691 | filters=512 692 | size=1 693 | stride=1 694 | pad=1 695 | activation=leaky 696 | 697 | [upsample] 698 | stride=2 699 | 700 | [route] 701 | layers=-1,-23 702 | 703 | [convolutional] 704 | batch_normalize=1 705 | filters=64 706 | size=1 707 | stride=1 708 | pad=1 709 | activation=leaky 710 | 711 | [route] 712 | layers=-2 713 | 714 | [convolutional] 715 | batch_normalize=1 716 | filters=96 717 | size=1 718 | stride=1 719 | pad=1 720 | activation=leaky 721 | 722 | [convolutional] 723 | batch_normalize=1 724 | filters=120 725 | size=1 726 | stride=1 727 | pad=1 728 | activation=leaky 729 | 730 | [convolutional] 731 | batch_normalize=1 732 | filters=152 733 | size=3 734 | stride=1 735 | pad=1 736 | activation=leaky 737 | 738 | [convolutional] 739 | batch_normalize=1 740 | filters=144 741 | size=1 742 | stride=1 743 | pad=1 744 | activation=leaky 745 | 746 | [convolutional] 747 | batch_normalize=1 748 | filters=144 749 | size=3 750 | stride=1 751 | pad=1 752 | activation=leaky 753 | 754 | [convolutional] 755 | batch_normalize=1 756 | filters=112 757 | size=1 758 | stride=1 759 | pad=1 760 | activation=leaky 761 | 762 | [convolutional] 763 | batch_normalize=1 764 | filters=120 765 | size=3 766 | stride=1 767 | pad=1 768 | activation=leaky 769 | 770 | [route] 771 | layers=-1,-9 772 | 773 | [convolutional] 774 | batch_normalize=1 775 | filters=120 776 | size=1 777 | stride=1 778 | pad=1 779 | activation=leaky 780 | 781 | [convolutional] 782 | batch_normalize=1 783 | filters=256 784 | size=1 785 | stride=1 786 | pad=1 787 | activation=leaky 788 | 789 | [upsample] 790 | stride=2 791 | 792 | [route] 793 | layers=-1,-70 794 | 795 | [convolutional] 796 | batch_normalize=1 797 | filters=72 798 | size=1 799 | stride=1 800 | pad=1 801 | activation=leaky 802 | 803 | [route] 804 | layers=-2 805 | 806 | [convolutional] 807 | batch_normalize=1 808 | filters=80 809 | size=1 810 | stride=1 811 | pad=1 812 | activation=leaky 813 | 814 | [convolutional] 815 | batch_normalize=1 816 | filters=88 817 | size=1 818 | stride=1 819 | pad=1 820 | activation=leaky 821 | 822 | [convolutional] 823 | batch_normalize=1 824 | filters=104 825 | size=3 826 | stride=1 827 | pad=1 828 | activation=leaky 829 | 830 | [convolutional] 831 | batch_normalize=1 832 | filters=96 833 | size=1 834 | stride=1 835 | pad=1 836 | activation=leaky 837 | 838 | [convolutional] 839 | batch_normalize=1 840 | filters=96 841 | size=3 842 | stride=1 843 | pad=1 844 | activation=leaky 845 | 846 | [convolutional] 847 | batch_normalize=1 848 | filters=80 849 | size=1 850 | stride=1 851 | pad=1 852 | activation=leaky 853 | 854 | [convolutional] 855 | batch_normalize=1 856 | filters=112 857 | size=3 858 | stride=1 859 | pad=1 860 | activation=leaky 861 | 862 | [route] 863 | layers=-1,-9 864 | 865 | [convolutional] 866 | batch_normalize=1 867 | filters=208 868 | size=1 869 | stride=1 870 | pad=1 871 | activation=leaky 872 | 873 | [convolutional] 874 | size=1 875 | stride=1 876 | pad=1 877 | filters=24 878 | activation=linear 879 | 880 | [yolo] 881 | mask=0,1,2 882 | anchors=40,39, 51,50, 61,59, 75,69, 62,92, 88,98, 115,77, 93,129, 128,115 883 | classes=3 884 | num=9 885 | jitter=.3 886 | ignore_thresh=.7 887 | truth_thresh=1 888 | scale_x_y=1.2 889 | iou_thresh=0.213 890 | cls_normalizer=1.0 891 | iou_normalizer=0.07 892 | iou_loss=ciou 893 | nms_kind=greedynms 894 | beta_nms=0.6 895 | 896 | [route] 897 | layers=-3 898 | 899 | [convolutional] 900 | batch_normalize=1 901 | filters=120 902 | size=3 903 | stride=2 904 | pad=1 905 | activation=leaky 906 | 907 | [route] 908 | layers=-1,-18 909 | 910 | [convolutional] 911 | batch_normalize=1 912 | filters=128 913 | size=1 914 | stride=1 915 | pad=1 916 | activation=leaky 917 | 918 | [route] 919 | layers=-2 920 | 921 | [convolutional] 922 | batch_normalize=1 923 | filters=112 924 | size=1 925 | stride=1 926 | pad=1 927 | activation=leaky 928 | 929 | [convolutional] 930 | batch_normalize=1 931 | filters=104 932 | size=1 933 | stride=1 934 | pad=1 935 | activation=leaky 936 | 937 | [convolutional] 938 | batch_normalize=1 939 | filters=96 940 | size=3 941 | stride=1 942 | pad=1 943 | activation=leaky 944 | 945 | [convolutional] 946 | batch_normalize=1 947 | filters=80 948 | size=1 949 | stride=1 950 | pad=1 951 | activation=leaky 952 | 953 | [convolutional] 954 | batch_normalize=1 955 | filters=96 956 | size=3 957 | stride=1 958 | pad=1 959 | activation=leaky 960 | 961 | [convolutional] 962 | batch_normalize=1 963 | filters=88 964 | size=1 965 | stride=1 966 | pad=1 967 | activation=leaky 968 | 969 | [convolutional] 970 | batch_normalize=1 971 | filters=112 972 | size=3 973 | stride=1 974 | pad=1 975 | activation=leaky 976 | 977 | [route] 978 | layers=-1,-9 979 | 980 | [convolutional] 981 | batch_normalize=1 982 | filters=240 983 | size=1 984 | stride=1 985 | pad=1 986 | activation=leaky 987 | 988 | [convolutional] 989 | size=1 990 | stride=1 991 | pad=1 992 | filters=24 993 | activation=linear 994 | 995 | [yolo] 996 | mask=3,4,5 997 | anchors=40,39, 51,50, 61,59, 75,69, 62,92, 88,98, 115,77, 93,129, 128,115 998 | classes=3 999 | num=9 1000 | jitter=.3 1001 | ignore_thresh=.7 1002 | truth_thresh=1 1003 | scale_x_y=1.2 1004 | iou_thresh=0.213 1005 | cls_normalizer=1.0 1006 | iou_normalizer=0.07 1007 | iou_loss=ciou 1008 | nms_kind=greedynms 1009 | beta_nms=0.6 1010 | 1011 | [route] 1012 | layers=-3 1013 | 1014 | [convolutional] 1015 | batch_normalize=1 1016 | filters=224 1017 | size=3 1018 | stride=2 1019 | pad=1 1020 | activation=leaky 1021 | 1022 | [route] 1023 | layers=-1,-48 1024 | 1025 | [convolutional] 1026 | batch_normalize=1 1027 | filters=104 1028 | size=1 1029 | stride=1 1030 | pad=1 1031 | activation=leaky 1032 | 1033 | [route] 1034 | layers=-2 1035 | 1036 | [convolutional] 1037 | batch_normalize=1 1038 | filters=40 1039 | size=1 1040 | stride=1 1041 | pad=1 1042 | activation=leaky 1043 | 1044 | [convolutional] 1045 | batch_normalize=1 1046 | filters=40 1047 | size=1 1048 | stride=1 1049 | pad=1 1050 | activation=leaky 1051 | 1052 | [convolutional] 1053 | batch_normalize=1 1054 | filters=40 1055 | size=3 1056 | stride=1 1057 | pad=1 1058 | activation=leaky 1059 | 1060 | [convolutional] 1061 | batch_normalize=1 1062 | filters=32 1063 | size=1 1064 | stride=1 1065 | pad=1 1066 | activation=leaky 1067 | 1068 | [convolutional] 1069 | batch_normalize=1 1070 | filters=48 1071 | size=3 1072 | stride=1 1073 | pad=1 1074 | activation=leaky 1075 | 1076 | [convolutional] 1077 | batch_normalize=1 1078 | filters=48 1079 | size=1 1080 | stride=1 1081 | pad=1 1082 | activation=leaky 1083 | 1084 | [convolutional] 1085 | batch_normalize=1 1086 | filters=40 1087 | size=3 1088 | stride=1 1089 | pad=1 1090 | activation=leaky 1091 | 1092 | [route] 1093 | layers=-1,-9 1094 | 1095 | [convolutional] 1096 | batch_normalize=1 1097 | filters=232 1098 | size=1 1099 | stride=1 1100 | pad=1 1101 | activation=leaky 1102 | 1103 | [convolutional] 1104 | size=1 1105 | stride=1 1106 | pad=1 1107 | filters=24 1108 | activation=linear 1109 | 1110 | [yolo] 1111 | mask=6,7,8 1112 | anchors=40,39, 51,50, 61,59, 75,69, 62,92, 88,98, 115,77, 93,129, 128,115 1113 | classes=3 1114 | num=9 1115 | jitter=.3 1116 | ignore_thresh=.7 1117 | truth_thresh=1 1118 | scale_x_y=1.2 1119 | iou_thresh=0.213 1120 | cls_normalizer=1.0 1121 | iou_normalizer=0.07 1122 | iou_loss=ciou 1123 | nms_kind=greedynms 1124 | beta_nms=0.6 1125 | 1126 | -------------------------------------------------------------------------------- /cfg/0514/prune_0.8_keep_0.01_8x_yolov5l_v4.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=8 4 | width=416 5 | height=416 6 | channels=3 7 | momentum=0.949 8 | decay=0.0005 9 | angle=0 10 | saturation=1.5 11 | exposure=1.5 12 | hue=.1 13 | learning_rate=0.00261 14 | burn_in=1000 15 | max_batches=500500 16 | policy=steps 17 | steps=400000,450000 18 | scales=.1,.1 19 | mosaic=1 20 | 21 | [focus] 22 | filters=12 23 | 24 | [convolutional] 25 | batch_normalize=1 26 | filters=40 27 | size=3 28 | stride=1 29 | pad=1 30 | activation=leaky 31 | 32 | [convolutional] 33 | batch_normalize=1 34 | filters=112 35 | size=3 36 | stride=2 37 | pad=1 38 | activation=leaky 39 | 40 | [convolutional] 41 | batch_normalize=1 42 | filters=64 43 | size=1 44 | stride=1 45 | pad=1 46 | activation=leaky 47 | 48 | [route] 49 | layers=-2 50 | 51 | [convolutional] 52 | batch_normalize=1 53 | filters=64 54 | size=1 55 | stride=1 56 | pad=1 57 | activation=leaky 58 | 59 | [convolutional] 60 | batch_normalize=1 61 | filters=64 62 | size=1 63 | stride=1 64 | pad=1 65 | activation=leaky 66 | 67 | [convolutional] 68 | batch_normalize=1 69 | filters=64 70 | size=3 71 | stride=1 72 | pad=1 73 | activation=leaky 74 | 75 | [shortcut] 76 | from=-3 77 | activation=linear 78 | 79 | [convolutional] 80 | batch_normalize=1 81 | filters=64 82 | size=1 83 | stride=1 84 | pad=1 85 | activation=leaky 86 | 87 | [convolutional] 88 | batch_normalize=1 89 | filters=64 90 | size=3 91 | stride=1 92 | pad=1 93 | activation=leaky 94 | 95 | [shortcut] 96 | from=-3 97 | activation=linear 98 | 99 | [convolutional] 100 | batch_normalize=1 101 | filters=64 102 | size=1 103 | stride=1 104 | pad=1 105 | activation=leaky 106 | 107 | [convolutional] 108 | batch_normalize=1 109 | filters=64 110 | size=3 111 | stride=1 112 | pad=1 113 | activation=leaky 114 | 115 | [shortcut] 116 | from=-3 117 | activation=linear 118 | 119 | [route] 120 | layers=-1,-12 121 | 122 | [convolutional] 123 | batch_normalize=1 124 | filters=128 125 | size=1 126 | stride=1 127 | pad=1 128 | activation=leaky 129 | 130 | [convolutional] 131 | batch_normalize=1 132 | filters=248 133 | size=3 134 | stride=2 135 | pad=1 136 | activation=leaky 137 | 138 | [convolutional] 139 | batch_normalize=1 140 | filters=128 141 | size=1 142 | stride=1 143 | pad=1 144 | activation=leaky 145 | 146 | [route] 147 | layers=-2 148 | 149 | [convolutional] 150 | batch_normalize=1 151 | filters=128 152 | size=1 153 | stride=1 154 | pad=1 155 | activation=leaky 156 | 157 | [convolutional] 158 | batch_normalize=1 159 | filters=56 160 | size=1 161 | stride=1 162 | pad=1 163 | activation=leaky 164 | 165 | [convolutional] 166 | batch_normalize=1 167 | filters=128 168 | size=3 169 | stride=1 170 | pad=1 171 | activation=leaky 172 | 173 | [shortcut] 174 | from=-3 175 | activation=linear 176 | 177 | [convolutional] 178 | batch_normalize=1 179 | filters=56 180 | size=1 181 | stride=1 182 | pad=1 183 | activation=leaky 184 | 185 | [convolutional] 186 | batch_normalize=1 187 | filters=128 188 | size=3 189 | stride=1 190 | pad=1 191 | activation=leaky 192 | 193 | [shortcut] 194 | from=-3 195 | activation=linear 196 | 197 | [convolutional] 198 | batch_normalize=1 199 | filters=24 200 | size=1 201 | stride=1 202 | pad=1 203 | activation=leaky 204 | 205 | [convolutional] 206 | batch_normalize=1 207 | filters=128 208 | size=3 209 | stride=1 210 | pad=1 211 | activation=leaky 212 | 213 | [shortcut] 214 | from=-3 215 | activation=linear 216 | 217 | [convolutional] 218 | batch_normalize=1 219 | filters=24 220 | size=1 221 | stride=1 222 | pad=1 223 | activation=leaky 224 | 225 | [convolutional] 226 | batch_normalize=1 227 | filters=128 228 | size=3 229 | stride=1 230 | pad=1 231 | activation=leaky 232 | 233 | [shortcut] 234 | from=-3 235 | activation=linear 236 | 237 | [convolutional] 238 | batch_normalize=1 239 | filters=40 240 | size=1 241 | stride=1 242 | pad=1 243 | activation=leaky 244 | 245 | [convolutional] 246 | batch_normalize=1 247 | filters=128 248 | size=3 249 | stride=1 250 | pad=1 251 | activation=leaky 252 | 253 | [shortcut] 254 | from=-3 255 | activation=linear 256 | 257 | [convolutional] 258 | batch_normalize=1 259 | filters=32 260 | size=1 261 | stride=1 262 | pad=1 263 | activation=leaky 264 | 265 | [convolutional] 266 | batch_normalize=1 267 | filters=128 268 | size=3 269 | stride=1 270 | pad=1 271 | activation=leaky 272 | 273 | [shortcut] 274 | from=-3 275 | activation=linear 276 | 277 | [convolutional] 278 | batch_normalize=1 279 | filters=64 280 | size=1 281 | stride=1 282 | pad=1 283 | activation=leaky 284 | 285 | [convolutional] 286 | batch_normalize=1 287 | filters=128 288 | size=3 289 | stride=1 290 | pad=1 291 | activation=leaky 292 | 293 | [shortcut] 294 | from=-3 295 | activation=linear 296 | 297 | [convolutional] 298 | batch_normalize=1 299 | filters=64 300 | size=1 301 | stride=1 302 | pad=1 303 | activation=leaky 304 | 305 | [convolutional] 306 | batch_normalize=1 307 | filters=128 308 | size=3 309 | stride=1 310 | pad=1 311 | activation=leaky 312 | 313 | [shortcut] 314 | from=-3 315 | activation=linear 316 | 317 | [convolutional] 318 | batch_normalize=1 319 | filters=56 320 | size=1 321 | stride=1 322 | pad=1 323 | activation=leaky 324 | 325 | [convolutional] 326 | batch_normalize=1 327 | filters=128 328 | size=3 329 | stride=1 330 | pad=1 331 | activation=leaky 332 | 333 | [shortcut] 334 | from=-3 335 | activation=linear 336 | 337 | [route] 338 | layers=-1,-30 339 | 340 | [convolutional] 341 | batch_normalize=1 342 | filters=240 343 | size=1 344 | stride=1 345 | pad=1 346 | activation=leaky 347 | 348 | [convolutional] 349 | batch_normalize=1 350 | filters=240 351 | size=3 352 | stride=2 353 | pad=1 354 | activation=leaky 355 | 356 | [convolutional] 357 | batch_normalize=1 358 | filters=96 359 | size=1 360 | stride=1 361 | pad=1 362 | activation=leaky 363 | 364 | [route] 365 | layers=-2 366 | 367 | [convolutional] 368 | batch_normalize=1 369 | filters=245 370 | size=1 371 | stride=1 372 | pad=1 373 | activation=leaky 374 | 375 | [convolutional] 376 | batch_normalize=1 377 | filters=56 378 | size=1 379 | stride=1 380 | pad=1 381 | activation=leaky 382 | 383 | [convolutional] 384 | batch_normalize=1 385 | filters=245 386 | size=3 387 | stride=1 388 | pad=1 389 | activation=leaky 390 | 391 | [shortcut] 392 | from=-3 393 | activation=linear 394 | 395 | [convolutional] 396 | batch_normalize=1 397 | filters=8 398 | size=1 399 | stride=1 400 | pad=1 401 | activation=leaky 402 | 403 | [convolutional] 404 | batch_normalize=1 405 | filters=245 406 | size=3 407 | stride=1 408 | pad=1 409 | activation=leaky 410 | 411 | [shortcut] 412 | from=-3 413 | activation=linear 414 | 415 | [convolutional] 416 | batch_normalize=1 417 | filters=16 418 | size=1 419 | stride=1 420 | pad=1 421 | activation=leaky 422 | 423 | [convolutional] 424 | batch_normalize=1 425 | filters=245 426 | size=3 427 | stride=1 428 | pad=1 429 | activation=leaky 430 | 431 | [shortcut] 432 | from=-3 433 | activation=linear 434 | 435 | [convolutional] 436 | batch_normalize=1 437 | filters=24 438 | size=1 439 | stride=1 440 | pad=1 441 | activation=leaky 442 | 443 | [convolutional] 444 | batch_normalize=1 445 | filters=245 446 | size=3 447 | stride=1 448 | pad=1 449 | activation=leaky 450 | 451 | [shortcut] 452 | from=-3 453 | activation=linear 454 | 455 | [convolutional] 456 | batch_normalize=1 457 | filters=16 458 | size=1 459 | stride=1 460 | pad=1 461 | activation=leaky 462 | 463 | [convolutional] 464 | batch_normalize=1 465 | filters=245 466 | size=3 467 | stride=1 468 | pad=1 469 | activation=leaky 470 | 471 | [shortcut] 472 | from=-3 473 | activation=linear 474 | 475 | [convolutional] 476 | batch_normalize=1 477 | filters=24 478 | size=1 479 | stride=1 480 | pad=1 481 | activation=leaky 482 | 483 | [convolutional] 484 | batch_normalize=1 485 | filters=245 486 | size=3 487 | stride=1 488 | pad=1 489 | activation=leaky 490 | 491 | [shortcut] 492 | from=-3 493 | activation=linear 494 | 495 | [convolutional] 496 | batch_normalize=1 497 | filters=16 498 | size=1 499 | stride=1 500 | pad=1 501 | activation=leaky 502 | 503 | [convolutional] 504 | batch_normalize=1 505 | filters=245 506 | size=3 507 | stride=1 508 | pad=1 509 | activation=leaky 510 | 511 | [shortcut] 512 | from=-3 513 | activation=linear 514 | 515 | [convolutional] 516 | batch_normalize=1 517 | filters=32 518 | size=1 519 | stride=1 520 | pad=1 521 | activation=leaky 522 | 523 | [convolutional] 524 | batch_normalize=1 525 | filters=245 526 | size=3 527 | stride=1 528 | pad=1 529 | activation=leaky 530 | 531 | [shortcut] 532 | from=-3 533 | activation=linear 534 | 535 | [convolutional] 536 | batch_normalize=1 537 | filters=24 538 | size=1 539 | stride=1 540 | pad=1 541 | activation=leaky 542 | 543 | [convolutional] 544 | batch_normalize=1 545 | filters=245 546 | size=3 547 | stride=1 548 | pad=1 549 | activation=leaky 550 | 551 | [shortcut] 552 | from=-3 553 | activation=linear 554 | 555 | [route] 556 | layers=-1,-30 557 | 558 | [convolutional] 559 | batch_normalize=1 560 | filters=144 561 | size=1 562 | stride=1 563 | pad=1 564 | activation=leaky 565 | 566 | [convolutional] 567 | batch_normalize=1 568 | filters=104 569 | size=3 570 | stride=2 571 | pad=1 572 | activation=leaky 573 | 574 | [convolutional] 575 | batch_normalize=1 576 | filters=512 577 | size=1 578 | stride=1 579 | pad=1 580 | activation=leaky 581 | 582 | [maxpool] 583 | stride=1 584 | size=5 585 | 586 | [route] 587 | layers=-2 588 | 589 | [maxpool] 590 | stride=1 591 | size=9 592 | 593 | [route] 594 | layers=-4 595 | 596 | [maxpool] 597 | stride=1 598 | size=13 599 | 600 | [route] 601 | layers=-6,-5,-3,-1 602 | 603 | [convolutional] 604 | batch_normalize=1 605 | filters=56 606 | size=1 607 | stride=1 608 | pad=1 609 | activation=leaky 610 | 611 | [convolutional] 612 | batch_normalize=1 613 | filters=40 614 | size=1 615 | stride=1 616 | pad=1 617 | activation=leaky 618 | 619 | [route] 620 | layers=-2 621 | 622 | [convolutional] 623 | batch_normalize=1 624 | filters=8 625 | size=1 626 | stride=1 627 | pad=1 628 | activation=leaky 629 | 630 | [convolutional] 631 | batch_normalize=1 632 | filters=8 633 | size=1 634 | stride=1 635 | pad=1 636 | activation=leaky 637 | 638 | [convolutional] 639 | batch_normalize=1 640 | filters=8 641 | size=3 642 | stride=1 643 | pad=1 644 | activation=leaky 645 | 646 | [convolutional] 647 | batch_normalize=1 648 | filters=8 649 | size=1 650 | stride=1 651 | pad=1 652 | activation=leaky 653 | 654 | [convolutional] 655 | batch_normalize=1 656 | filters=16 657 | size=3 658 | stride=1 659 | pad=1 660 | activation=leaky 661 | 662 | [convolutional] 663 | batch_normalize=1 664 | filters=8 665 | size=1 666 | stride=1 667 | pad=1 668 | activation=leaky 669 | 670 | [convolutional] 671 | batch_normalize=1 672 | filters=8 673 | size=3 674 | stride=1 675 | pad=1 676 | activation=leaky 677 | 678 | [route] 679 | layers=-1,-9 680 | 681 | [convolutional] 682 | batch_normalize=1 683 | filters=40 684 | size=1 685 | stride=1 686 | pad=1 687 | activation=leaky 688 | 689 | [convolutional] 690 | batch_normalize=1 691 | filters=512 692 | size=1 693 | stride=1 694 | pad=1 695 | activation=leaky 696 | 697 | [upsample] 698 | stride=2 699 | 700 | [route] 701 | layers=-1,-23 702 | 703 | [convolutional] 704 | batch_normalize=1 705 | filters=48 706 | size=1 707 | stride=1 708 | pad=1 709 | activation=leaky 710 | 711 | [route] 712 | layers=-2 713 | 714 | [convolutional] 715 | batch_normalize=1 716 | filters=80 717 | size=1 718 | stride=1 719 | pad=1 720 | activation=leaky 721 | 722 | [convolutional] 723 | batch_normalize=1 724 | filters=104 725 | size=1 726 | stride=1 727 | pad=1 728 | activation=leaky 729 | 730 | [convolutional] 731 | batch_normalize=1 732 | filters=128 733 | size=3 734 | stride=1 735 | pad=1 736 | activation=leaky 737 | 738 | [convolutional] 739 | batch_normalize=1 740 | filters=112 741 | size=1 742 | stride=1 743 | pad=1 744 | activation=leaky 745 | 746 | [convolutional] 747 | batch_normalize=1 748 | filters=112 749 | size=3 750 | stride=1 751 | pad=1 752 | activation=leaky 753 | 754 | [convolutional] 755 | batch_normalize=1 756 | filters=96 757 | size=1 758 | stride=1 759 | pad=1 760 | activation=leaky 761 | 762 | [convolutional] 763 | batch_normalize=1 764 | filters=104 765 | size=3 766 | stride=1 767 | pad=1 768 | activation=leaky 769 | 770 | [route] 771 | layers=-1,-9 772 | 773 | [convolutional] 774 | batch_normalize=1 775 | filters=88 776 | size=1 777 | stride=1 778 | pad=1 779 | activation=leaky 780 | 781 | [convolutional] 782 | batch_normalize=1 783 | filters=256 784 | size=1 785 | stride=1 786 | pad=1 787 | activation=leaky 788 | 789 | [upsample] 790 | stride=2 791 | 792 | [route] 793 | layers=-1,-70 794 | 795 | [convolutional] 796 | batch_normalize=1 797 | filters=32 798 | size=1 799 | stride=1 800 | pad=1 801 | activation=leaky 802 | 803 | [route] 804 | layers=-2 805 | 806 | [convolutional] 807 | batch_normalize=1 808 | filters=72 809 | size=1 810 | stride=1 811 | pad=1 812 | activation=leaky 813 | 814 | [convolutional] 815 | batch_normalize=1 816 | filters=72 817 | size=1 818 | stride=1 819 | pad=1 820 | activation=leaky 821 | 822 | [convolutional] 823 | batch_normalize=1 824 | filters=96 825 | size=3 826 | stride=1 827 | pad=1 828 | activation=leaky 829 | 830 | [convolutional] 831 | batch_normalize=1 832 | filters=88 833 | size=1 834 | stride=1 835 | pad=1 836 | activation=leaky 837 | 838 | [convolutional] 839 | batch_normalize=1 840 | filters=88 841 | size=3 842 | stride=1 843 | pad=1 844 | activation=leaky 845 | 846 | [convolutional] 847 | batch_normalize=1 848 | filters=80 849 | size=1 850 | stride=1 851 | pad=1 852 | activation=leaky 853 | 854 | [convolutional] 855 | batch_normalize=1 856 | filters=96 857 | size=3 858 | stride=1 859 | pad=1 860 | activation=leaky 861 | 862 | [route] 863 | layers=-1,-9 864 | 865 | [convolutional] 866 | batch_normalize=1 867 | filters=96 868 | size=1 869 | stride=1 870 | pad=1 871 | activation=leaky 872 | 873 | [convolutional] 874 | size=1 875 | stride=1 876 | pad=1 877 | filters=24 878 | activation=linear 879 | 880 | [yolo] 881 | mask=0,1,2 882 | anchors=39,38, 49,67, 74,49, 74,86, 113,71, 97,119, 163,108, 134,155, 210,199 883 | classes=3 884 | num=9 885 | jitter=.3 886 | ignore_thresh=.7 887 | truth_thresh=1 888 | scale_x_y=1.2 889 | iou_thresh=0.213 890 | cls_normalizer=1.0 891 | iou_normalizer=0.07 892 | iou_loss=ciou 893 | nms_kind=greedynms 894 | beta_nms=0.6 895 | 896 | [route] 897 | layers=-3 898 | 899 | [convolutional] 900 | batch_normalize=1 901 | filters=72 902 | size=3 903 | stride=2 904 | pad=1 905 | activation=leaky 906 | 907 | [route] 908 | layers=-1,-18 909 | 910 | [convolutional] 911 | batch_normalize=1 912 | filters=64 913 | size=1 914 | stride=1 915 | pad=1 916 | activation=leaky 917 | 918 | [route] 919 | layers=-2 920 | 921 | [convolutional] 922 | batch_normalize=1 923 | filters=56 924 | size=1 925 | stride=1 926 | pad=1 927 | activation=leaky 928 | 929 | [convolutional] 930 | batch_normalize=1 931 | filters=48 932 | size=1 933 | stride=1 934 | pad=1 935 | activation=leaky 936 | 937 | [convolutional] 938 | batch_normalize=1 939 | filters=64 940 | size=3 941 | stride=1 942 | pad=1 943 | activation=leaky 944 | 945 | [convolutional] 946 | batch_normalize=1 947 | filters=48 948 | size=1 949 | stride=1 950 | pad=1 951 | activation=leaky 952 | 953 | [convolutional] 954 | batch_normalize=1 955 | filters=64 956 | size=3 957 | stride=1 958 | pad=1 959 | activation=leaky 960 | 961 | [convolutional] 962 | batch_normalize=1 963 | filters=64 964 | size=1 965 | stride=1 966 | pad=1 967 | activation=leaky 968 | 969 | [convolutional] 970 | batch_normalize=1 971 | filters=80 972 | size=3 973 | stride=1 974 | pad=1 975 | activation=leaky 976 | 977 | [route] 978 | layers=-1,-9 979 | 980 | [convolutional] 981 | batch_normalize=1 982 | filters=104 983 | size=1 984 | stride=1 985 | pad=1 986 | activation=leaky 987 | 988 | [convolutional] 989 | size=1 990 | stride=1 991 | pad=1 992 | filters=24 993 | activation=linear 994 | 995 | [yolo] 996 | mask=3,4,5 997 | anchors=39,38, 49,67, 74,49, 74,86, 113,71, 97,119, 163,108, 134,155, 210,199 998 | classes=3 999 | num=9 1000 | jitter=.3 1001 | ignore_thresh=.7 1002 | truth_thresh=1 1003 | scale_x_y=1.2 1004 | iou_thresh=0.213 1005 | cls_normalizer=1.0 1006 | iou_normalizer=0.07 1007 | iou_loss=ciou 1008 | nms_kind=greedynms 1009 | beta_nms=0.6 1010 | 1011 | [route] 1012 | layers=-3 1013 | 1014 | [convolutional] 1015 | batch_normalize=1 1016 | filters=136 1017 | size=3 1018 | stride=2 1019 | pad=1 1020 | activation=leaky 1021 | 1022 | [route] 1023 | layers=-1,-48 1024 | 1025 | [convolutional] 1026 | batch_normalize=1 1027 | filters=64 1028 | size=1 1029 | stride=1 1030 | pad=1 1031 | activation=leaky 1032 | 1033 | [route] 1034 | layers=-2 1035 | 1036 | [convolutional] 1037 | batch_normalize=1 1038 | filters=8 1039 | size=1 1040 | stride=1 1041 | pad=1 1042 | activation=leaky 1043 | 1044 | [convolutional] 1045 | batch_normalize=1 1046 | filters=8 1047 | size=1 1048 | stride=1 1049 | pad=1 1050 | activation=leaky 1051 | 1052 | [convolutional] 1053 | batch_normalize=1 1054 | filters=8 1055 | size=3 1056 | stride=1 1057 | pad=1 1058 | activation=leaky 1059 | 1060 | [convolutional] 1061 | batch_normalize=1 1062 | filters=8 1063 | size=1 1064 | stride=1 1065 | pad=1 1066 | activation=leaky 1067 | 1068 | [convolutional] 1069 | batch_normalize=1 1070 | filters=8 1071 | size=3 1072 | stride=1 1073 | pad=1 1074 | activation=leaky 1075 | 1076 | [convolutional] 1077 | batch_normalize=1 1078 | filters=8 1079 | size=1 1080 | stride=1 1081 | pad=1 1082 | activation=leaky 1083 | 1084 | [convolutional] 1085 | batch_normalize=1 1086 | filters=8 1087 | size=3 1088 | stride=1 1089 | pad=1 1090 | activation=leaky 1091 | 1092 | [route] 1093 | layers=-1,-9 1094 | 1095 | [convolutional] 1096 | batch_normalize=1 1097 | filters=72 1098 | size=1 1099 | stride=1 1100 | pad=1 1101 | activation=leaky 1102 | 1103 | [convolutional] 1104 | size=1 1105 | stride=1 1106 | pad=1 1107 | filters=24 1108 | activation=linear 1109 | 1110 | [yolo] 1111 | mask=6,7,8 1112 | anchors=39,38, 49,67, 74,49, 74,86, 113,71, 97,119, 163,108, 134,155, 210,199 1113 | classes=3 1114 | num=9 1115 | jitter=.3 1116 | ignore_thresh=.7 1117 | truth_thresh=1 1118 | scale_x_y=1.2 1119 | iou_thresh=0.213 1120 | cls_normalizer=1.0 1121 | iou_normalizer=0.07 1122 | iou_loss=ciou 1123 | nms_kind=greedynms 1124 | beta_nms=0.6 1125 | 1126 | -------------------------------------------------------------------------------- /cfg/0514/yolov5s_v4.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | #batch=1 4 | #subdivisions=1 5 | # Training 6 | batch=64 7 | subdivisions=8 8 | width=608 9 | height=608 10 | channels=3 11 | momentum=0.949 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.00261 19 | burn_in=1000 20 | max_batches = 500500 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | #cutmix=1 26 | mosaic=1 27 | 28 | #:104x104 54:52x52 85:26x26 104:13x13 for 416 29 | [focus] 30 | filters=12 31 | 32 | [convolutional] 33 | batch_normalize=1 34 | filters=32 35 | size=3 36 | stride=1 37 | pad=1 38 | activation=SiLU 39 | 40 | # Downsample 41 | [convolutional] 42 | batch_normalize=1 43 | filters=64 44 | size=3 45 | stride=2 46 | pad=1 47 | activation=SiLU 48 | 49 | #C3 50 | [convolutional] 51 | batch_normalize=1 52 | filters=32 53 | size=1 54 | stride=1 55 | pad=1 56 | activation=SiLU 57 | 58 | [route] 59 | layers = -2 60 | 61 | [convolutional] 62 | batch_normalize=1 63 | filters=32 64 | size=1 65 | stride=1 66 | pad=1 67 | activation=SiLU 68 | 69 | [convolutional] 70 | batch_normalize=1 71 | filters=32 72 | size=1 73 | stride=1 74 | pad=1 75 | activation=SiLU 76 | 77 | [convolutional] 78 | batch_normalize=1 79 | filters=32 80 | size=3 81 | stride=1 82 | pad=1 83 | activation=SiLU 84 | 85 | [shortcut] 86 | from=-3 87 | activation=linear 88 | 89 | [route] 90 | layers = -1,-6 91 | 92 | [convolutional] 93 | batch_normalize=1 94 | filters=64 95 | size=1 96 | stride=1 97 | pad=1 98 | activation=SiLU 99 | 100 | # Downsample 101 | [convolutional] 102 | batch_normalize=1 103 | filters=128 104 | size=3 105 | stride=2 106 | pad=1 107 | activation=SiLU 108 | 109 | #C3 110 | [convolutional] 111 | batch_normalize=1 112 | filters=64 113 | size=1 114 | stride=1 115 | pad=1 116 | activation=SiLU 117 | 118 | [route] 119 | layers = -2 120 | 121 | [convolutional] 122 | batch_normalize=1 123 | filters=64 124 | size=1 125 | stride=1 126 | pad=1 127 | activation=SiLU 128 | 129 | [convolutional] 130 | batch_normalize=1 131 | filters=64 132 | size=1 133 | stride=1 134 | pad=1 135 | activation=SiLU 136 | 137 | [convolutional] 138 | batch_normalize=1 139 | filters=64 140 | size=3 141 | stride=1 142 | pad=1 143 | activation=SiLU 144 | 145 | [shortcut] 146 | from=-3 147 | activation=linear 148 | 149 | [convolutional] 150 | batch_normalize=1 151 | filters=64 152 | size=1 153 | stride=1 154 | pad=1 155 | activation=SiLU 156 | 157 | [convolutional] 158 | batch_normalize=1 159 | filters=64 160 | size=3 161 | stride=1 162 | pad=1 163 | activation=SiLU 164 | 165 | [shortcut] 166 | from=-3 167 | activation=linear 168 | 169 | [convolutional] 170 | batch_normalize=1 171 | filters=64 172 | size=1 173 | stride=1 174 | pad=1 175 | activation=SiLU 176 | 177 | [convolutional] 178 | batch_normalize=1 179 | filters=64 180 | size=3 181 | stride=1 182 | pad=1 183 | activation=SiLU 184 | 185 | [shortcut] 186 | from=-3 187 | activation=linear 188 | 189 | [route] 190 | layers = -1,-12 191 | 192 | [convolutional] 193 | batch_normalize=1 194 | filters=128 195 | size=1 196 | stride=1 197 | pad=1 198 | activation=SiLU 199 | 200 | # Downsample 201 | [convolutional] 202 | batch_normalize=1 203 | filters=256 204 | size=3 205 | stride=2 206 | pad=1 207 | activation=SiLU 208 | 209 | #C3 210 | [convolutional] 211 | batch_normalize=1 212 | filters=128 213 | size=1 214 | stride=1 215 | pad=1 216 | activation=SiLU 217 | 218 | [route] 219 | layers = -2 220 | 221 | [convolutional] 222 | batch_normalize=1 223 | filters=128 224 | size=1 225 | stride=1 226 | pad=1 227 | activation=SiLU 228 | 229 | [convolutional] 230 | batch_normalize=1 231 | filters=128 232 | size=1 233 | stride=1 234 | pad=1 235 | activation=SiLU 236 | 237 | [convolutional] 238 | batch_normalize=1 239 | filters=128 240 | size=3 241 | stride=1 242 | pad=1 243 | activation=SiLU 244 | 245 | [shortcut] 246 | from=-3 247 | activation=linear 248 | 249 | [convolutional] 250 | batch_normalize=1 251 | filters=128 252 | size=1 253 | stride=1 254 | pad=1 255 | activation=SiLU 256 | 257 | [convolutional] 258 | batch_normalize=1 259 | filters=128 260 | size=3 261 | stride=1 262 | pad=1 263 | activation=SiLU 264 | 265 | [shortcut] 266 | from=-3 267 | activation=linear 268 | 269 | [convolutional] 270 | batch_normalize=1 271 | filters=128 272 | size=1 273 | stride=1 274 | pad=1 275 | activation=SiLU 276 | 277 | [convolutional] 278 | batch_normalize=1 279 | filters=128 280 | size=3 281 | stride=1 282 | pad=1 283 | activation=SiLU 284 | 285 | [shortcut] 286 | from=-3 287 | activation=linear 288 | 289 | [route] 290 | layers = -1,-12 291 | 292 | [convolutional] 293 | batch_normalize=1 294 | filters=256 295 | size=1 296 | stride=1 297 | pad=1 298 | activation=SiLU 299 | 300 | # Downsample 301 | [convolutional] 302 | batch_normalize=1 303 | filters=512 304 | size=3 305 | stride=2 306 | pad=1 307 | activation=SiLU 308 | 309 | [convolutional] 310 | batch_normalize=1 311 | filters=256 312 | size=1 313 | stride=1 314 | pad=1 315 | activation=SiLU 316 | 317 | ### SPP ### 318 | [maxpool] 319 | stride=1 320 | size=5 321 | 322 | [route] 323 | layers=-2 324 | 325 | [maxpool] 326 | stride=1 327 | size=9 328 | 329 | [route] 330 | layers=-4 331 | 332 | [maxpool] 333 | stride=1 334 | size=13 335 | 336 | [route] 337 | ###layers=-1,-3,-5,-6 338 | layers=-6,-5,-3,-1 339 | ### End SPP ### 340 | 341 | [convolutional] 342 | batch_normalize=1 343 | filters=512 344 | size=1 345 | stride=1 346 | pad=1 347 | activation=SiLU 348 | 349 | #C3 350 | [convolutional] 351 | batch_normalize=1 352 | filters=256 353 | size=1 354 | stride=1 355 | pad=1 356 | activation=SiLU 357 | 358 | [route] 359 | layers = -2 360 | 361 | [convolutional] 362 | batch_normalize=1 363 | filters=256 364 | size=1 365 | stride=1 366 | pad=1 367 | activation=SiLU 368 | 369 | [convolutional] 370 | batch_normalize=1 371 | filters=256 372 | size=1 373 | stride=1 374 | pad=1 375 | activation=SiLU 376 | 377 | [convolutional] 378 | batch_normalize=1 379 | filters=256 380 | size=3 381 | stride=1 382 | pad=1 383 | activation=SiLU 384 | 385 | [route] 386 | layers = -1,-5 387 | 388 | [convolutional] 389 | batch_normalize=1 390 | filters=512 391 | size=1 392 | stride=1 393 | pad=1 394 | activation=SiLU 395 | 396 | [convolutional] 397 | batch_normalize=1 398 | filters=256 399 | size=1 400 | stride=1 401 | pad=1 402 | activation=SiLU 403 | 404 | [upsample] 405 | stride=2 406 | 407 | [route] 408 | layers = -1,-19 409 | 410 | #C3 411 | [convolutional] 412 | batch_normalize=1 413 | filters=128 414 | size=1 415 | stride=1 416 | pad=1 417 | activation=SiLU 418 | 419 | [route] 420 | layers = -2 421 | 422 | [convolutional] 423 | batch_normalize=1 424 | filters=128 425 | size=1 426 | stride=1 427 | pad=1 428 | activation=SiLU 429 | 430 | [convolutional] 431 | batch_normalize=1 432 | filters=128 433 | size=1 434 | stride=1 435 | pad=1 436 | activation=SiLU 437 | 438 | [convolutional] 439 | batch_normalize=1 440 | filters=128 441 | size=3 442 | stride=1 443 | pad=1 444 | activation=SiLU 445 | 446 | [route] 447 | layers = -1,-5 448 | 449 | [convolutional] 450 | batch_normalize=1 451 | filters=256 452 | size=1 453 | stride=1 454 | pad=1 455 | activation=SiLU 456 | 457 | [convolutional] 458 | batch_normalize=1 459 | filters=128 460 | size=1 461 | stride=1 462 | pad=1 463 | activation=SiLU 464 | 465 | [upsample] 466 | stride=2 467 | 468 | [route] 469 | layers = -1,-44 470 | 471 | #C3 472 | [convolutional] 473 | batch_normalize=1 474 | filters=64 475 | size=1 476 | stride=1 477 | pad=1 478 | activation=SiLU 479 | 480 | [route] 481 | layers = -2 482 | 483 | [convolutional] 484 | batch_normalize=1 485 | filters=64 486 | size=1 487 | stride=1 488 | pad=1 489 | activation=SiLU 490 | 491 | [convolutional] 492 | batch_normalize=1 493 | filters=64 494 | size=1 495 | stride=1 496 | pad=1 497 | activation=SiLU 498 | 499 | [convolutional] 500 | batch_normalize=1 501 | filters=64 502 | size=3 503 | stride=1 504 | pad=1 505 | activation=SiLU 506 | 507 | [route] 508 | layers = -1,-5 509 | 510 | [convolutional] 511 | batch_normalize=1 512 | filters=128 513 | size=1 514 | stride=1 515 | pad=1 516 | activation=SiLU 517 | 518 | ###################### 519 | [convolutional] 520 | size=1 521 | stride=1 522 | pad=1 523 | filters=255 524 | activation=linear 525 | 526 | [yolo] 527 | mask = 0,1,2 528 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 529 | classes=80 530 | num=9 531 | jitter=.3 532 | ignore_thresh = .7 533 | truth_thresh = 1 534 | scale_x_y = 1.2 535 | iou_thresh=0.213 536 | cls_normalizer=1.0 537 | iou_normalizer=0.07 538 | iou_loss=ciou 539 | nms_kind=greedynms 540 | beta_nms=0.6 541 | 542 | [route] 543 | layers = -3 544 | 545 | [convolutional] 546 | batch_normalize=1 547 | filters=128 548 | size=3 549 | stride=2 550 | pad=1 551 | activation=SiLU 552 | 553 | [route] 554 | layers = -1,-14 555 | 556 | #C3 557 | [convolutional] 558 | batch_normalize=1 559 | filters=128 560 | size=1 561 | stride=1 562 | pad=1 563 | activation=SiLU 564 | 565 | [route] 566 | layers = -2 567 | 568 | [convolutional] 569 | batch_normalize=1 570 | filters=128 571 | size=1 572 | stride=1 573 | pad=1 574 | activation=SiLU 575 | 576 | [convolutional] 577 | batch_normalize=1 578 | filters=128 579 | size=1 580 | stride=1 581 | pad=1 582 | activation=SiLU 583 | 584 | [convolutional] 585 | batch_normalize=1 586 | filters=128 587 | size=3 588 | stride=1 589 | pad=1 590 | activation=SiLU 591 | 592 | [route] 593 | layers = -1,-5 594 | 595 | [convolutional] 596 | batch_normalize=1 597 | filters=256 598 | size=1 599 | stride=1 600 | pad=1 601 | activation=SiLU 602 | 603 | ###################### 604 | [convolutional] 605 | size=1 606 | stride=1 607 | pad=1 608 | filters=255 609 | activation=linear 610 | 611 | [yolo] 612 | mask = 3,4,5 613 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 614 | classes=80 615 | num=9 616 | jitter=.3 617 | ignore_thresh = .7 618 | truth_thresh = 1 619 | scale_x_y = 1.2 620 | iou_thresh=0.213 621 | cls_normalizer=1.0 622 | iou_normalizer=0.07 623 | iou_loss=ciou 624 | nms_kind=greedynms 625 | beta_nms=0.6 626 | 627 | [route] 628 | layers = -3 629 | 630 | [convolutional] 631 | batch_normalize=1 632 | filters=256 633 | size=3 634 | stride=2 635 | pad=1 636 | activation=SiLU 637 | 638 | [route] 639 | layers = -1,-36 640 | 641 | #C3 642 | [convolutional] 643 | batch_normalize=1 644 | filters=256 645 | size=1 646 | stride=1 647 | pad=1 648 | activation=SiLU 649 | 650 | [route] 651 | layers = -2 652 | 653 | [convolutional] 654 | batch_normalize=1 655 | filters=256 656 | size=1 657 | stride=1 658 | pad=1 659 | activation=SiLU 660 | 661 | [convolutional] 662 | batch_normalize=1 663 | filters=256 664 | size=1 665 | stride=1 666 | pad=1 667 | activation=SiLU 668 | 669 | [convolutional] 670 | batch_normalize=1 671 | filters=256 672 | size=3 673 | stride=1 674 | pad=1 675 | activation=SiLU 676 | 677 | [route] 678 | layers = -1,-5 679 | 680 | [convolutional] 681 | batch_normalize=1 682 | filters=512 683 | size=1 684 | stride=1 685 | pad=1 686 | activation=SiLU 687 | 688 | ###################### 689 | [convolutional] 690 | size=1 691 | stride=1 692 | pad=1 693 | filters=255 694 | activation=linear 695 | 696 | [yolo] 697 | mask = 6,7,8 698 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 699 | classes=80 700 | num=9 701 | jitter=.3 702 | ignore_thresh = .7 703 | truth_thresh = 1 704 | scale_x_y = 1.2 705 | iou_thresh=0.213 706 | cls_normalizer=1.0 707 | iou_normalizer=0.07 708 | iou_loss=ciou 709 | nms_kind=greedynms 710 | beta_nms=0.6 711 | 712 | 713 | -------------------------------------------------------------------------------- /cfg/0514/yolov5s_v4_hand.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | #batch=1 4 | #subdivisions=1 5 | # Training 6 | batch=64 7 | subdivisions=8 8 | width=608 9 | height=608 10 | channels=3 11 | momentum=0.949 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.00261 19 | burn_in=1000 20 | max_batches = 500500 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | #cutmix=1 26 | mosaic=1 27 | 28 | #:104x104 54:52x52 85:26x26 104:13x13 for 416 29 | [focus] 30 | filters=12 31 | 32 | [convolutional] 33 | batch_normalize=1 34 | filters=32 35 | size=3 36 | stride=1 37 | pad=1 38 | activation=SiLU 39 | 40 | # Downsample 41 | [convolutional] 42 | batch_normalize=1 43 | filters=64 44 | size=3 45 | stride=2 46 | pad=1 47 | activation=SiLU 48 | 49 | #C3 50 | [convolutional] 51 | batch_normalize=1 52 | filters=32 53 | size=1 54 | stride=1 55 | pad=1 56 | activation=SiLU 57 | 58 | [route] 59 | layers = -2 60 | 61 | [convolutional] 62 | batch_normalize=1 63 | filters=32 64 | size=1 65 | stride=1 66 | pad=1 67 | activation=SiLU 68 | 69 | [convolutional] 70 | batch_normalize=1 71 | filters=32 72 | size=1 73 | stride=1 74 | pad=1 75 | activation=SiLU 76 | 77 | [convolutional] 78 | batch_normalize=1 79 | filters=32 80 | size=3 81 | stride=1 82 | pad=1 83 | activation=SiLU 84 | 85 | [shortcut] 86 | from=-3 87 | activation=linear 88 | 89 | [route] 90 | layers = -1,-6 91 | 92 | [convolutional] 93 | batch_normalize=1 94 | filters=64 95 | size=1 96 | stride=1 97 | pad=1 98 | activation=SiLU 99 | 100 | # Downsample 101 | [convolutional] 102 | batch_normalize=1 103 | filters=128 104 | size=3 105 | stride=2 106 | pad=1 107 | activation=SiLU 108 | 109 | #C3 110 | [convolutional] 111 | batch_normalize=1 112 | filters=64 113 | size=1 114 | stride=1 115 | pad=1 116 | activation=SiLU 117 | 118 | [route] 119 | layers = -2 120 | 121 | [convolutional] 122 | batch_normalize=1 123 | filters=64 124 | size=1 125 | stride=1 126 | pad=1 127 | activation=SiLU 128 | 129 | [convolutional] 130 | batch_normalize=1 131 | filters=64 132 | size=1 133 | stride=1 134 | pad=1 135 | activation=SiLU 136 | 137 | [convolutional] 138 | batch_normalize=1 139 | filters=64 140 | size=3 141 | stride=1 142 | pad=1 143 | activation=SiLU 144 | 145 | [shortcut] 146 | from=-3 147 | activation=linear 148 | 149 | [convolutional] 150 | batch_normalize=1 151 | filters=64 152 | size=1 153 | stride=1 154 | pad=1 155 | activation=SiLU 156 | 157 | [convolutional] 158 | batch_normalize=1 159 | filters=64 160 | size=3 161 | stride=1 162 | pad=1 163 | activation=SiLU 164 | 165 | [shortcut] 166 | from=-3 167 | activation=linear 168 | 169 | [convolutional] 170 | batch_normalize=1 171 | filters=64 172 | size=1 173 | stride=1 174 | pad=1 175 | activation=SiLU 176 | 177 | [convolutional] 178 | batch_normalize=1 179 | filters=64 180 | size=3 181 | stride=1 182 | pad=1 183 | activation=SiLU 184 | 185 | [shortcut] 186 | from=-3 187 | activation=linear 188 | 189 | [route] 190 | layers = -1,-12 191 | 192 | [convolutional] 193 | batch_normalize=1 194 | filters=128 195 | size=1 196 | stride=1 197 | pad=1 198 | activation=SiLU 199 | 200 | # Downsample 201 | [convolutional] 202 | batch_normalize=1 203 | filters=256 204 | size=3 205 | stride=2 206 | pad=1 207 | activation=SiLU 208 | 209 | #C3 210 | [convolutional] 211 | batch_normalize=1 212 | filters=128 213 | size=1 214 | stride=1 215 | pad=1 216 | activation=SiLU 217 | 218 | [route] 219 | layers = -2 220 | 221 | [convolutional] 222 | batch_normalize=1 223 | filters=128 224 | size=1 225 | stride=1 226 | pad=1 227 | activation=SiLU 228 | 229 | [convolutional] 230 | batch_normalize=1 231 | filters=128 232 | size=1 233 | stride=1 234 | pad=1 235 | activation=SiLU 236 | 237 | [convolutional] 238 | batch_normalize=1 239 | filters=128 240 | size=3 241 | stride=1 242 | pad=1 243 | activation=SiLU 244 | 245 | [shortcut] 246 | from=-3 247 | activation=linear 248 | 249 | [convolutional] 250 | batch_normalize=1 251 | filters=128 252 | size=1 253 | stride=1 254 | pad=1 255 | activation=SiLU 256 | 257 | [convolutional] 258 | batch_normalize=1 259 | filters=128 260 | size=3 261 | stride=1 262 | pad=1 263 | activation=SiLU 264 | 265 | [shortcut] 266 | from=-3 267 | activation=linear 268 | 269 | [convolutional] 270 | batch_normalize=1 271 | filters=128 272 | size=1 273 | stride=1 274 | pad=1 275 | activation=SiLU 276 | 277 | [convolutional] 278 | batch_normalize=1 279 | filters=128 280 | size=3 281 | stride=1 282 | pad=1 283 | activation=SiLU 284 | 285 | [shortcut] 286 | from=-3 287 | activation=linear 288 | 289 | [route] 290 | layers = -1,-12 291 | 292 | [convolutional] 293 | batch_normalize=1 294 | filters=256 295 | size=1 296 | stride=1 297 | pad=1 298 | activation=SiLU 299 | 300 | # Downsample 301 | [convolutional] 302 | batch_normalize=1 303 | filters=512 304 | size=3 305 | stride=2 306 | pad=1 307 | activation=SiLU 308 | 309 | [convolutional] 310 | batch_normalize=1 311 | filters=256 312 | size=1 313 | stride=1 314 | pad=1 315 | activation=SiLU 316 | 317 | ### SPP ### 318 | [maxpool] 319 | stride=1 320 | size=5 321 | 322 | [route] 323 | layers=-2 324 | 325 | [maxpool] 326 | stride=1 327 | size=9 328 | 329 | [route] 330 | layers=-4 331 | 332 | [maxpool] 333 | stride=1 334 | size=13 335 | 336 | [route] 337 | ###layers=-1,-3,-5,-6 338 | layers=-6,-5,-3,-1 339 | ### End SPP ### 340 | 341 | [convolutional] 342 | batch_normalize=1 343 | filters=512 344 | size=1 345 | stride=1 346 | pad=1 347 | activation=SiLU 348 | 349 | #C3 350 | [convolutional] 351 | batch_normalize=1 352 | filters=256 353 | size=1 354 | stride=1 355 | pad=1 356 | activation=SiLU 357 | 358 | [route] 359 | layers = -2 360 | 361 | [convolutional] 362 | batch_normalize=1 363 | filters=256 364 | size=1 365 | stride=1 366 | pad=1 367 | activation=SiLU 368 | 369 | [convolutional] 370 | batch_normalize=1 371 | filters=256 372 | size=1 373 | stride=1 374 | pad=1 375 | activation=SiLU 376 | 377 | [convolutional] 378 | batch_normalize=1 379 | filters=256 380 | size=3 381 | stride=1 382 | pad=1 383 | activation=SiLU 384 | 385 | [route] 386 | layers = -1,-5 387 | 388 | [convolutional] 389 | batch_normalize=1 390 | filters=512 391 | size=1 392 | stride=1 393 | pad=1 394 | activation=SiLU 395 | 396 | [convolutional] 397 | batch_normalize=1 398 | filters=256 399 | size=1 400 | stride=1 401 | pad=1 402 | activation=SiLU 403 | 404 | [upsample] 405 | stride=2 406 | 407 | [route] 408 | layers = -1,-19 409 | 410 | #C3 411 | [convolutional] 412 | batch_normalize=1 413 | filters=128 414 | size=1 415 | stride=1 416 | pad=1 417 | activation=SiLU 418 | 419 | [route] 420 | layers = -2 421 | 422 | [convolutional] 423 | batch_normalize=1 424 | filters=128 425 | size=1 426 | stride=1 427 | pad=1 428 | activation=SiLU 429 | 430 | [convolutional] 431 | batch_normalize=1 432 | filters=128 433 | size=1 434 | stride=1 435 | pad=1 436 | activation=SiLU 437 | 438 | [convolutional] 439 | batch_normalize=1 440 | filters=128 441 | size=3 442 | stride=1 443 | pad=1 444 | activation=SiLU 445 | 446 | [route] 447 | layers = -1,-5 448 | 449 | [convolutional] 450 | batch_normalize=1 451 | filters=256 452 | size=1 453 | stride=1 454 | pad=1 455 | activation=SiLU 456 | 457 | [convolutional] 458 | batch_normalize=1 459 | filters=128 460 | size=1 461 | stride=1 462 | pad=1 463 | activation=SiLU 464 | 465 | [upsample] 466 | stride=2 467 | 468 | [route] 469 | layers = -1,-44 470 | 471 | #C3 472 | [convolutional] 473 | batch_normalize=1 474 | filters=64 475 | size=1 476 | stride=1 477 | pad=1 478 | activation=SiLU 479 | 480 | [route] 481 | layers = -2 482 | 483 | [convolutional] 484 | batch_normalize=1 485 | filters=64 486 | size=1 487 | stride=1 488 | pad=1 489 | activation=SiLU 490 | 491 | [convolutional] 492 | batch_normalize=1 493 | filters=64 494 | size=1 495 | stride=1 496 | pad=1 497 | activation=SiLU 498 | 499 | [convolutional] 500 | batch_normalize=1 501 | filters=64 502 | size=3 503 | stride=1 504 | pad=1 505 | activation=SiLU 506 | 507 | [route] 508 | layers = -1,-5 509 | 510 | [convolutional] 511 | batch_normalize=1 512 | filters=128 513 | size=1 514 | stride=1 515 | pad=1 516 | activation=SiLU 517 | 518 | ###################### 519 | [convolutional] 520 | size=1 521 | stride=1 522 | pad=1 523 | filters=18 524 | activation=linear 525 | 526 | [yolo] 527 | mask = 0,1,2 528 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 529 | classes=1 530 | num=9 531 | jitter=.3 532 | ignore_thresh = .7 533 | truth_thresh = 1 534 | scale_x_y = 1.2 535 | iou_thresh=0.213 536 | cls_normalizer=1.0 537 | iou_normalizer=0.07 538 | iou_loss=ciou 539 | nms_kind=greedynms 540 | beta_nms=0.6 541 | 542 | [route] 543 | layers = -3 544 | 545 | [convolutional] 546 | batch_normalize=1 547 | filters=128 548 | size=3 549 | stride=2 550 | pad=1 551 | activation=SiLU 552 | 553 | [route] 554 | layers = -1,-14 555 | 556 | #C3 557 | [convolutional] 558 | batch_normalize=1 559 | filters=128 560 | size=1 561 | stride=1 562 | pad=1 563 | activation=SiLU 564 | 565 | [route] 566 | layers = -2 567 | 568 | [convolutional] 569 | batch_normalize=1 570 | filters=128 571 | size=1 572 | stride=1 573 | pad=1 574 | activation=SiLU 575 | 576 | [convolutional] 577 | batch_normalize=1 578 | filters=128 579 | size=1 580 | stride=1 581 | pad=1 582 | activation=SiLU 583 | 584 | [convolutional] 585 | batch_normalize=1 586 | filters=128 587 | size=3 588 | stride=1 589 | pad=1 590 | activation=SiLU 591 | 592 | [route] 593 | layers = -1,-5 594 | 595 | [convolutional] 596 | batch_normalize=1 597 | filters=256 598 | size=1 599 | stride=1 600 | pad=1 601 | activation=SiLU 602 | 603 | ###################### 604 | [convolutional] 605 | size=1 606 | stride=1 607 | pad=1 608 | filters=18 609 | activation=linear 610 | 611 | [yolo] 612 | mask = 3,4,5 613 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 614 | classes=1 615 | num=9 616 | jitter=.3 617 | ignore_thresh = .7 618 | truth_thresh = 1 619 | scale_x_y = 1.2 620 | iou_thresh=0.213 621 | cls_normalizer=1.0 622 | iou_normalizer=0.07 623 | iou_loss=ciou 624 | nms_kind=greedynms 625 | beta_nms=0.6 626 | 627 | [route] 628 | layers = -3 629 | 630 | [convolutional] 631 | batch_normalize=1 632 | filters=256 633 | size=3 634 | stride=2 635 | pad=1 636 | activation=SiLU 637 | 638 | [route] 639 | layers = -1,-36 640 | 641 | #C3 642 | [convolutional] 643 | batch_normalize=1 644 | filters=256 645 | size=1 646 | stride=1 647 | pad=1 648 | activation=SiLU 649 | 650 | [route] 651 | layers = -2 652 | 653 | [convolutional] 654 | batch_normalize=1 655 | filters=256 656 | size=1 657 | stride=1 658 | pad=1 659 | activation=SiLU 660 | 661 | [convolutional] 662 | batch_normalize=1 663 | filters=256 664 | size=1 665 | stride=1 666 | pad=1 667 | activation=SiLU 668 | 669 | [convolutional] 670 | batch_normalize=1 671 | filters=256 672 | size=3 673 | stride=1 674 | pad=1 675 | activation=SiLU 676 | 677 | [route] 678 | layers = -1,-5 679 | 680 | [convolutional] 681 | batch_normalize=1 682 | filters=512 683 | size=1 684 | stride=1 685 | pad=1 686 | activation=SiLU 687 | 688 | ###################### 689 | [convolutional] 690 | size=1 691 | stride=1 692 | pad=1 693 | filters=18 694 | activation=linear 695 | 696 | [yolo] 697 | mask = 6,7,8 698 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 699 | classes=1 700 | num=9 701 | jitter=.3 702 | ignore_thresh = .7 703 | truth_thresh = 1 704 | scale_x_y = 1.2 705 | iou_thresh=0.213 706 | cls_normalizer=1.0 707 | iou_normalizer=0.07 708 | iou_loss=ciou 709 | nms_kind=greedynms 710 | beta_nms=0.6 711 | 712 | 713 | -------------------------------------------------------------------------------- /cfg/last_prune/prune_0.8_keep_0.01_8x_yolov5l_v4.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=8 4 | width=416 5 | height=416 6 | channels=3 7 | momentum=0.949 8 | decay=0.0005 9 | angle=0 10 | saturation=1.5 11 | exposure=1.5 12 | hue=.1 13 | learning_rate=0.00261 14 | burn_in=1000 15 | max_batches=500500 16 | policy=steps 17 | steps=400000,450000 18 | scales=.1,.1 19 | mosaic=1 20 | 21 | [focus] 22 | filters=12 23 | 24 | [convolutional] 25 | batch_normalize=1 26 | filters=40 27 | size=3 28 | stride=1 29 | pad=1 30 | activation=leaky 31 | 32 | [convolutional] 33 | batch_normalize=1 34 | filters=112 35 | size=3 36 | stride=2 37 | pad=1 38 | activation=leaky 39 | 40 | [convolutional] 41 | batch_normalize=1 42 | filters=64 43 | size=1 44 | stride=1 45 | pad=1 46 | activation=leaky 47 | 48 | [route] 49 | layers=-2 50 | 51 | [convolutional] 52 | batch_normalize=1 53 | filters=64 54 | size=1 55 | stride=1 56 | pad=1 57 | activation=leaky 58 | 59 | [convolutional] 60 | batch_normalize=1 61 | filters=64 62 | size=1 63 | stride=1 64 | pad=1 65 | activation=leaky 66 | 67 | [convolutional] 68 | batch_normalize=1 69 | filters=64 70 | size=3 71 | stride=1 72 | pad=1 73 | activation=leaky 74 | 75 | [shortcut] 76 | from=-3 77 | activation=linear 78 | 79 | [convolutional] 80 | batch_normalize=1 81 | filters=64 82 | size=1 83 | stride=1 84 | pad=1 85 | activation=leaky 86 | 87 | [convolutional] 88 | batch_normalize=1 89 | filters=64 90 | size=3 91 | stride=1 92 | pad=1 93 | activation=leaky 94 | 95 | [shortcut] 96 | from=-3 97 | activation=linear 98 | 99 | [convolutional] 100 | batch_normalize=1 101 | filters=64 102 | size=1 103 | stride=1 104 | pad=1 105 | activation=leaky 106 | 107 | [convolutional] 108 | batch_normalize=1 109 | filters=64 110 | size=3 111 | stride=1 112 | pad=1 113 | activation=leaky 114 | 115 | [shortcut] 116 | from=-3 117 | activation=linear 118 | 119 | [route] 120 | layers=-1,-12 121 | 122 | [convolutional] 123 | batch_normalize=1 124 | filters=128 125 | size=1 126 | stride=1 127 | pad=1 128 | activation=leaky 129 | 130 | [convolutional] 131 | batch_normalize=1 132 | filters=248 133 | size=3 134 | stride=2 135 | pad=1 136 | activation=leaky 137 | 138 | [convolutional] 139 | batch_normalize=1 140 | filters=128 141 | size=1 142 | stride=1 143 | pad=1 144 | activation=leaky 145 | 146 | [route] 147 | layers=-2 148 | 149 | [convolutional] 150 | batch_normalize=1 151 | filters=128 152 | size=1 153 | stride=1 154 | pad=1 155 | activation=leaky 156 | 157 | [convolutional] 158 | batch_normalize=1 159 | filters=56 160 | size=1 161 | stride=1 162 | pad=1 163 | activation=leaky 164 | 165 | [convolutional] 166 | batch_normalize=1 167 | filters=128 168 | size=3 169 | stride=1 170 | pad=1 171 | activation=leaky 172 | 173 | [shortcut] 174 | from=-3 175 | activation=linear 176 | 177 | [convolutional] 178 | batch_normalize=1 179 | filters=56 180 | size=1 181 | stride=1 182 | pad=1 183 | activation=leaky 184 | 185 | [convolutional] 186 | batch_normalize=1 187 | filters=128 188 | size=3 189 | stride=1 190 | pad=1 191 | activation=leaky 192 | 193 | [shortcut] 194 | from=-3 195 | activation=linear 196 | 197 | [convolutional] 198 | batch_normalize=1 199 | filters=24 200 | size=1 201 | stride=1 202 | pad=1 203 | activation=leaky 204 | 205 | [convolutional] 206 | batch_normalize=1 207 | filters=128 208 | size=3 209 | stride=1 210 | pad=1 211 | activation=leaky 212 | 213 | [shortcut] 214 | from=-3 215 | activation=linear 216 | 217 | [convolutional] 218 | batch_normalize=1 219 | filters=24 220 | size=1 221 | stride=1 222 | pad=1 223 | activation=leaky 224 | 225 | [convolutional] 226 | batch_normalize=1 227 | filters=128 228 | size=3 229 | stride=1 230 | pad=1 231 | activation=leaky 232 | 233 | [shortcut] 234 | from=-3 235 | activation=linear 236 | 237 | [convolutional] 238 | batch_normalize=1 239 | filters=40 240 | size=1 241 | stride=1 242 | pad=1 243 | activation=leaky 244 | 245 | [convolutional] 246 | batch_normalize=1 247 | filters=128 248 | size=3 249 | stride=1 250 | pad=1 251 | activation=leaky 252 | 253 | [shortcut] 254 | from=-3 255 | activation=linear 256 | 257 | [convolutional] 258 | batch_normalize=1 259 | filters=32 260 | size=1 261 | stride=1 262 | pad=1 263 | activation=leaky 264 | 265 | [convolutional] 266 | batch_normalize=1 267 | filters=128 268 | size=3 269 | stride=1 270 | pad=1 271 | activation=leaky 272 | 273 | [shortcut] 274 | from=-3 275 | activation=linear 276 | 277 | [convolutional] 278 | batch_normalize=1 279 | filters=64 280 | size=1 281 | stride=1 282 | pad=1 283 | activation=leaky 284 | 285 | [convolutional] 286 | batch_normalize=1 287 | filters=128 288 | size=3 289 | stride=1 290 | pad=1 291 | activation=leaky 292 | 293 | [shortcut] 294 | from=-3 295 | activation=linear 296 | 297 | [convolutional] 298 | batch_normalize=1 299 | filters=64 300 | size=1 301 | stride=1 302 | pad=1 303 | activation=leaky 304 | 305 | [convolutional] 306 | batch_normalize=1 307 | filters=128 308 | size=3 309 | stride=1 310 | pad=1 311 | activation=leaky 312 | 313 | [shortcut] 314 | from=-3 315 | activation=linear 316 | 317 | [convolutional] 318 | batch_normalize=1 319 | filters=56 320 | size=1 321 | stride=1 322 | pad=1 323 | activation=leaky 324 | 325 | [convolutional] 326 | batch_normalize=1 327 | filters=128 328 | size=3 329 | stride=1 330 | pad=1 331 | activation=leaky 332 | 333 | [shortcut] 334 | from=-3 335 | activation=linear 336 | 337 | [route] 338 | layers=-1,-30 339 | 340 | [convolutional] 341 | batch_normalize=1 342 | filters=240 343 | size=1 344 | stride=1 345 | pad=1 346 | activation=leaky 347 | 348 | [convolutional] 349 | batch_normalize=1 350 | filters=240 351 | size=3 352 | stride=2 353 | pad=1 354 | activation=leaky 355 | 356 | [convolutional] 357 | batch_normalize=1 358 | filters=96 359 | size=1 360 | stride=1 361 | pad=1 362 | activation=leaky 363 | 364 | [route] 365 | layers=-2 366 | 367 | [convolutional] 368 | batch_normalize=1 369 | filters=245 370 | size=1 371 | stride=1 372 | pad=1 373 | activation=leaky 374 | 375 | [convolutional] 376 | batch_normalize=1 377 | filters=56 378 | size=1 379 | stride=1 380 | pad=1 381 | activation=leaky 382 | 383 | [convolutional] 384 | batch_normalize=1 385 | filters=245 386 | size=3 387 | stride=1 388 | pad=1 389 | activation=leaky 390 | 391 | [shortcut] 392 | from=-3 393 | activation=linear 394 | 395 | [convolutional] 396 | batch_normalize=1 397 | filters=8 398 | size=1 399 | stride=1 400 | pad=1 401 | activation=leaky 402 | 403 | [convolutional] 404 | batch_normalize=1 405 | filters=245 406 | size=3 407 | stride=1 408 | pad=1 409 | activation=leaky 410 | 411 | [shortcut] 412 | from=-3 413 | activation=linear 414 | 415 | [convolutional] 416 | batch_normalize=1 417 | filters=16 418 | size=1 419 | stride=1 420 | pad=1 421 | activation=leaky 422 | 423 | [convolutional] 424 | batch_normalize=1 425 | filters=245 426 | size=3 427 | stride=1 428 | pad=1 429 | activation=leaky 430 | 431 | [shortcut] 432 | from=-3 433 | activation=linear 434 | 435 | [convolutional] 436 | batch_normalize=1 437 | filters=24 438 | size=1 439 | stride=1 440 | pad=1 441 | activation=leaky 442 | 443 | [convolutional] 444 | batch_normalize=1 445 | filters=245 446 | size=3 447 | stride=1 448 | pad=1 449 | activation=leaky 450 | 451 | [shortcut] 452 | from=-3 453 | activation=linear 454 | 455 | [convolutional] 456 | batch_normalize=1 457 | filters=16 458 | size=1 459 | stride=1 460 | pad=1 461 | activation=leaky 462 | 463 | [convolutional] 464 | batch_normalize=1 465 | filters=245 466 | size=3 467 | stride=1 468 | pad=1 469 | activation=leaky 470 | 471 | [shortcut] 472 | from=-3 473 | activation=linear 474 | 475 | [convolutional] 476 | batch_normalize=1 477 | filters=24 478 | size=1 479 | stride=1 480 | pad=1 481 | activation=leaky 482 | 483 | [convolutional] 484 | batch_normalize=1 485 | filters=245 486 | size=3 487 | stride=1 488 | pad=1 489 | activation=leaky 490 | 491 | [shortcut] 492 | from=-3 493 | activation=linear 494 | 495 | [convolutional] 496 | batch_normalize=1 497 | filters=16 498 | size=1 499 | stride=1 500 | pad=1 501 | activation=leaky 502 | 503 | [convolutional] 504 | batch_normalize=1 505 | filters=245 506 | size=3 507 | stride=1 508 | pad=1 509 | activation=leaky 510 | 511 | [shortcut] 512 | from=-3 513 | activation=linear 514 | 515 | [convolutional] 516 | batch_normalize=1 517 | filters=32 518 | size=1 519 | stride=1 520 | pad=1 521 | activation=leaky 522 | 523 | [convolutional] 524 | batch_normalize=1 525 | filters=245 526 | size=3 527 | stride=1 528 | pad=1 529 | activation=leaky 530 | 531 | [shortcut] 532 | from=-3 533 | activation=linear 534 | 535 | [convolutional] 536 | batch_normalize=1 537 | filters=24 538 | size=1 539 | stride=1 540 | pad=1 541 | activation=leaky 542 | 543 | [convolutional] 544 | batch_normalize=1 545 | filters=245 546 | size=3 547 | stride=1 548 | pad=1 549 | activation=leaky 550 | 551 | [shortcut] 552 | from=-3 553 | activation=linear 554 | 555 | [route] 556 | layers=-1,-30 557 | 558 | [convolutional] 559 | batch_normalize=1 560 | filters=144 561 | size=1 562 | stride=1 563 | pad=1 564 | activation=leaky 565 | 566 | [convolutional] 567 | batch_normalize=1 568 | filters=104 569 | size=3 570 | stride=2 571 | pad=1 572 | activation=leaky 573 | 574 | [convolutional] 575 | batch_normalize=1 576 | filters=512 577 | size=1 578 | stride=1 579 | pad=1 580 | activation=leaky 581 | 582 | [maxpool] 583 | stride=1 584 | size=5 585 | 586 | [route] 587 | layers=-2 588 | 589 | [maxpool] 590 | stride=1 591 | size=9 592 | 593 | [route] 594 | layers=-4 595 | 596 | [maxpool] 597 | stride=1 598 | size=13 599 | 600 | [route] 601 | layers=-6,-5,-3,-1 602 | 603 | [convolutional] 604 | batch_normalize=1 605 | filters=56 606 | size=1 607 | stride=1 608 | pad=1 609 | activation=leaky 610 | 611 | [convolutional] 612 | batch_normalize=1 613 | filters=40 614 | size=1 615 | stride=1 616 | pad=1 617 | activation=leaky 618 | 619 | [route] 620 | layers=-2 621 | 622 | [convolutional] 623 | batch_normalize=1 624 | filters=8 625 | size=1 626 | stride=1 627 | pad=1 628 | activation=leaky 629 | 630 | [convolutional] 631 | batch_normalize=1 632 | filters=8 633 | size=1 634 | stride=1 635 | pad=1 636 | activation=leaky 637 | 638 | [convolutional] 639 | batch_normalize=1 640 | filters=8 641 | size=3 642 | stride=1 643 | pad=1 644 | activation=leaky 645 | 646 | [convolutional] 647 | batch_normalize=1 648 | filters=8 649 | size=1 650 | stride=1 651 | pad=1 652 | activation=leaky 653 | 654 | [convolutional] 655 | batch_normalize=1 656 | filters=16 657 | size=3 658 | stride=1 659 | pad=1 660 | activation=leaky 661 | 662 | [convolutional] 663 | batch_normalize=1 664 | filters=8 665 | size=1 666 | stride=1 667 | pad=1 668 | activation=leaky 669 | 670 | [convolutional] 671 | batch_normalize=1 672 | filters=8 673 | size=3 674 | stride=1 675 | pad=1 676 | activation=leaky 677 | 678 | [route] 679 | layers=-1,-9 680 | 681 | [convolutional] 682 | batch_normalize=1 683 | filters=40 684 | size=1 685 | stride=1 686 | pad=1 687 | activation=leaky 688 | 689 | [convolutional] 690 | batch_normalize=1 691 | filters=512 692 | size=1 693 | stride=1 694 | pad=1 695 | activation=leaky 696 | 697 | [upsample] 698 | stride=2 699 | 700 | [route] 701 | layers=-1,-23 702 | 703 | [convolutional] 704 | batch_normalize=1 705 | filters=48 706 | size=1 707 | stride=1 708 | pad=1 709 | activation=leaky 710 | 711 | [route] 712 | layers=-2 713 | 714 | [convolutional] 715 | batch_normalize=1 716 | filters=80 717 | size=1 718 | stride=1 719 | pad=1 720 | activation=leaky 721 | 722 | [convolutional] 723 | batch_normalize=1 724 | filters=104 725 | size=1 726 | stride=1 727 | pad=1 728 | activation=leaky 729 | 730 | [convolutional] 731 | batch_normalize=1 732 | filters=128 733 | size=3 734 | stride=1 735 | pad=1 736 | activation=leaky 737 | 738 | [convolutional] 739 | batch_normalize=1 740 | filters=112 741 | size=1 742 | stride=1 743 | pad=1 744 | activation=leaky 745 | 746 | [convolutional] 747 | batch_normalize=1 748 | filters=112 749 | size=3 750 | stride=1 751 | pad=1 752 | activation=leaky 753 | 754 | [convolutional] 755 | batch_normalize=1 756 | filters=96 757 | size=1 758 | stride=1 759 | pad=1 760 | activation=leaky 761 | 762 | [convolutional] 763 | batch_normalize=1 764 | filters=104 765 | size=3 766 | stride=1 767 | pad=1 768 | activation=leaky 769 | 770 | [route] 771 | layers=-1,-9 772 | 773 | [convolutional] 774 | batch_normalize=1 775 | filters=88 776 | size=1 777 | stride=1 778 | pad=1 779 | activation=leaky 780 | 781 | [convolutional] 782 | batch_normalize=1 783 | filters=256 784 | size=1 785 | stride=1 786 | pad=1 787 | activation=leaky 788 | 789 | [upsample] 790 | stride=2 791 | 792 | [route] 793 | layers=-1,-70 794 | 795 | [convolutional] 796 | batch_normalize=1 797 | filters=32 798 | size=1 799 | stride=1 800 | pad=1 801 | activation=leaky 802 | 803 | [route] 804 | layers=-2 805 | 806 | [convolutional] 807 | batch_normalize=1 808 | filters=72 809 | size=1 810 | stride=1 811 | pad=1 812 | activation=leaky 813 | 814 | [convolutional] 815 | batch_normalize=1 816 | filters=72 817 | size=1 818 | stride=1 819 | pad=1 820 | activation=leaky 821 | 822 | [convolutional] 823 | batch_normalize=1 824 | filters=96 825 | size=3 826 | stride=1 827 | pad=1 828 | activation=leaky 829 | 830 | [convolutional] 831 | batch_normalize=1 832 | filters=88 833 | size=1 834 | stride=1 835 | pad=1 836 | activation=leaky 837 | 838 | [convolutional] 839 | batch_normalize=1 840 | filters=88 841 | size=3 842 | stride=1 843 | pad=1 844 | activation=leaky 845 | 846 | [convolutional] 847 | batch_normalize=1 848 | filters=80 849 | size=1 850 | stride=1 851 | pad=1 852 | activation=leaky 853 | 854 | [convolutional] 855 | batch_normalize=1 856 | filters=96 857 | size=3 858 | stride=1 859 | pad=1 860 | activation=leaky 861 | 862 | [route] 863 | layers=-1,-9 864 | 865 | [convolutional] 866 | batch_normalize=1 867 | filters=96 868 | size=1 869 | stride=1 870 | pad=1 871 | activation=leaky 872 | 873 | [convolutional] 874 | size=1 875 | stride=1 876 | pad=1 877 | filters=24 878 | activation=linear 879 | 880 | [yolo] 881 | mask=0,1,2 882 | anchors=40,39, 51,50, 61,59, 75,69, 62,92, 88,98, 115,77, 93,129, 128,115 883 | classes=3 884 | num=9 885 | jitter=.3 886 | ignore_thresh=.7 887 | truth_thresh=1 888 | scale_x_y=1.2 889 | iou_thresh=0.213 890 | cls_normalizer=1.0 891 | iou_normalizer=0.07 892 | iou_loss=ciou 893 | nms_kind=greedynms 894 | beta_nms=0.6 895 | 896 | [route] 897 | layers=-3 898 | 899 | [convolutional] 900 | batch_normalize=1 901 | filters=72 902 | size=3 903 | stride=2 904 | pad=1 905 | activation=leaky 906 | 907 | [route] 908 | layers=-1,-18 909 | 910 | [convolutional] 911 | batch_normalize=1 912 | filters=64 913 | size=1 914 | stride=1 915 | pad=1 916 | activation=leaky 917 | 918 | [route] 919 | layers=-2 920 | 921 | [convolutional] 922 | batch_normalize=1 923 | filters=56 924 | size=1 925 | stride=1 926 | pad=1 927 | activation=leaky 928 | 929 | [convolutional] 930 | batch_normalize=1 931 | filters=48 932 | size=1 933 | stride=1 934 | pad=1 935 | activation=leaky 936 | 937 | [convolutional] 938 | batch_normalize=1 939 | filters=64 940 | size=3 941 | stride=1 942 | pad=1 943 | activation=leaky 944 | 945 | [convolutional] 946 | batch_normalize=1 947 | filters=48 948 | size=1 949 | stride=1 950 | pad=1 951 | activation=leaky 952 | 953 | [convolutional] 954 | batch_normalize=1 955 | filters=64 956 | size=3 957 | stride=1 958 | pad=1 959 | activation=leaky 960 | 961 | [convolutional] 962 | batch_normalize=1 963 | filters=64 964 | size=1 965 | stride=1 966 | pad=1 967 | activation=leaky 968 | 969 | [convolutional] 970 | batch_normalize=1 971 | filters=80 972 | size=3 973 | stride=1 974 | pad=1 975 | activation=leaky 976 | 977 | [route] 978 | layers=-1,-9 979 | 980 | [convolutional] 981 | batch_normalize=1 982 | filters=104 983 | size=1 984 | stride=1 985 | pad=1 986 | activation=leaky 987 | 988 | [convolutional] 989 | size=1 990 | stride=1 991 | pad=1 992 | filters=24 993 | activation=linear 994 | 995 | [yolo] 996 | mask=3,4,5 997 | anchors=40,39, 51,50, 61,59, 75,69, 62,92, 88,98, 115,77, 93,129, 128,115 998 | classes=3 999 | num=9 1000 | jitter=.3 1001 | ignore_thresh=.7 1002 | truth_thresh=1 1003 | scale_x_y=1.2 1004 | iou_thresh=0.213 1005 | cls_normalizer=1.0 1006 | iou_normalizer=0.07 1007 | iou_loss=ciou 1008 | nms_kind=greedynms 1009 | beta_nms=0.6 1010 | 1011 | [route] 1012 | layers=-3 1013 | 1014 | [convolutional] 1015 | batch_normalize=1 1016 | filters=136 1017 | size=3 1018 | stride=2 1019 | pad=1 1020 | activation=leaky 1021 | 1022 | [route] 1023 | layers=-1,-48 1024 | 1025 | [convolutional] 1026 | batch_normalize=1 1027 | filters=64 1028 | size=1 1029 | stride=1 1030 | pad=1 1031 | activation=leaky 1032 | 1033 | [route] 1034 | layers=-2 1035 | 1036 | [convolutional] 1037 | batch_normalize=1 1038 | filters=8 1039 | size=1 1040 | stride=1 1041 | pad=1 1042 | activation=leaky 1043 | 1044 | [convolutional] 1045 | batch_normalize=1 1046 | filters=8 1047 | size=1 1048 | stride=1 1049 | pad=1 1050 | activation=leaky 1051 | 1052 | [convolutional] 1053 | batch_normalize=1 1054 | filters=8 1055 | size=3 1056 | stride=1 1057 | pad=1 1058 | activation=leaky 1059 | 1060 | [convolutional] 1061 | batch_normalize=1 1062 | filters=8 1063 | size=1 1064 | stride=1 1065 | pad=1 1066 | activation=leaky 1067 | 1068 | [convolutional] 1069 | batch_normalize=1 1070 | filters=8 1071 | size=3 1072 | stride=1 1073 | pad=1 1074 | activation=leaky 1075 | 1076 | [convolutional] 1077 | batch_normalize=1 1078 | filters=8 1079 | size=1 1080 | stride=1 1081 | pad=1 1082 | activation=leaky 1083 | 1084 | [convolutional] 1085 | batch_normalize=1 1086 | filters=8 1087 | size=3 1088 | stride=1 1089 | pad=1 1090 | activation=leaky 1091 | 1092 | [route] 1093 | layers=-1,-9 1094 | 1095 | [convolutional] 1096 | batch_normalize=1 1097 | filters=72 1098 | size=1 1099 | stride=1 1100 | pad=1 1101 | activation=leaky 1102 | 1103 | [convolutional] 1104 | size=1 1105 | stride=1 1106 | pad=1 1107 | filters=24 1108 | activation=linear 1109 | 1110 | [yolo] 1111 | mask=6,7,8 1112 | anchors=40,39, 51,50, 61,59, 75,69, 62,92, 88,98, 115,77, 93,129, 128,115 1113 | classes=3 1114 | num=9 1115 | jitter=.3 1116 | ignore_thresh=.7 1117 | truth_thresh=1 1118 | scale_x_y=1.2 1119 | iou_thresh=0.213 1120 | cls_normalizer=1.0 1121 | iou_normalizer=0.07 1122 | iou_loss=ciou 1123 | nms_kind=greedynms 1124 | beta_nms=0.6 1125 | 1126 | -------------------------------------------------------------------------------- /cfg/prune_0.8_keep_0.01_8x_yolov5l_v4.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=8 4 | width=416 5 | height=416 6 | channels=3 7 | momentum=0.949 8 | decay=0.0005 9 | angle=0 10 | saturation=1.5 11 | exposure=1.5 12 | hue=.1 13 | learning_rate=0.00261 14 | burn_in=1000 15 | max_batches=500500 16 | policy=steps 17 | steps=400000,450000 18 | scales=.1,.1 19 | mosaic=1 20 | 21 | [focus] 22 | filters=12 23 | 24 | [convolutional] 25 | batch_normalize=1 26 | filters=56 27 | size=3 28 | stride=1 29 | pad=1 30 | activation=leaky 31 | 32 | [convolutional] 33 | batch_normalize=1 34 | filters=128 35 | size=3 36 | stride=2 37 | pad=1 38 | activation=leaky 39 | 40 | [convolutional] 41 | batch_normalize=1 42 | filters=56 43 | size=1 44 | stride=1 45 | pad=1 46 | activation=leaky 47 | 48 | [route] 49 | layers=-2 50 | 51 | [convolutional] 52 | batch_normalize=1 53 | filters=64 54 | size=1 55 | stride=1 56 | pad=1 57 | activation=leaky 58 | 59 | [convolutional] 60 | batch_normalize=1 61 | filters=56 62 | size=1 63 | stride=1 64 | pad=1 65 | activation=leaky 66 | 67 | [convolutional] 68 | batch_normalize=1 69 | filters=64 70 | size=3 71 | stride=1 72 | pad=1 73 | activation=leaky 74 | 75 | [shortcut] 76 | from=-3 77 | activation=linear 78 | 79 | [convolutional] 80 | batch_normalize=1 81 | filters=48 82 | size=1 83 | stride=1 84 | pad=1 85 | activation=leaky 86 | 87 | [convolutional] 88 | batch_normalize=1 89 | filters=64 90 | size=3 91 | stride=1 92 | pad=1 93 | activation=leaky 94 | 95 | [shortcut] 96 | from=-3 97 | activation=linear 98 | 99 | [convolutional] 100 | batch_normalize=1 101 | filters=16 102 | size=1 103 | stride=1 104 | pad=1 105 | activation=leaky 106 | 107 | [convolutional] 108 | batch_normalize=1 109 | filters=64 110 | size=3 111 | stride=1 112 | pad=1 113 | activation=leaky 114 | 115 | [shortcut] 116 | from=-3 117 | activation=linear 118 | 119 | [route] 120 | layers=-1,-12 121 | 122 | [convolutional] 123 | batch_normalize=1 124 | filters=64 125 | size=1 126 | stride=1 127 | pad=1 128 | activation=leaky 129 | 130 | [convolutional] 131 | batch_normalize=1 132 | filters=16 133 | size=3 134 | stride=2 135 | pad=1 136 | activation=leaky 137 | 138 | [convolutional] 139 | batch_normalize=1 140 | filters=128 141 | size=1 142 | stride=1 143 | pad=1 144 | activation=leaky 145 | 146 | [route] 147 | layers=-2 148 | 149 | [convolutional] 150 | batch_normalize=1 151 | filters=109 152 | size=1 153 | stride=1 154 | pad=1 155 | activation=leaky 156 | 157 | [convolutional] 158 | batch_normalize=1 159 | filters=32 160 | size=1 161 | stride=1 162 | pad=1 163 | activation=leaky 164 | 165 | [convolutional] 166 | batch_normalize=1 167 | filters=109 168 | size=3 169 | stride=1 170 | pad=1 171 | activation=leaky 172 | 173 | [shortcut] 174 | from=-3 175 | activation=linear 176 | 177 | [convolutional] 178 | batch_normalize=1 179 | filters=16 180 | size=1 181 | stride=1 182 | pad=1 183 | activation=leaky 184 | 185 | [convolutional] 186 | batch_normalize=1 187 | filters=109 188 | size=3 189 | stride=1 190 | pad=1 191 | activation=leaky 192 | 193 | [shortcut] 194 | from=-3 195 | activation=linear 196 | 197 | [convolutional] 198 | batch_normalize=1 199 | filters=8 200 | size=1 201 | stride=1 202 | pad=1 203 | activation=leaky 204 | 205 | [convolutional] 206 | batch_normalize=1 207 | filters=109 208 | size=3 209 | stride=1 210 | pad=1 211 | activation=leaky 212 | 213 | [shortcut] 214 | from=-3 215 | activation=linear 216 | 217 | [convolutional] 218 | batch_normalize=1 219 | filters=8 220 | size=1 221 | stride=1 222 | pad=1 223 | activation=leaky 224 | 225 | [convolutional] 226 | batch_normalize=1 227 | filters=109 228 | size=3 229 | stride=1 230 | pad=1 231 | activation=leaky 232 | 233 | [shortcut] 234 | from=-3 235 | activation=linear 236 | 237 | [convolutional] 238 | batch_normalize=1 239 | filters=8 240 | size=1 241 | stride=1 242 | pad=1 243 | activation=leaky 244 | 245 | [convolutional] 246 | batch_normalize=1 247 | filters=109 248 | size=3 249 | stride=1 250 | pad=1 251 | activation=leaky 252 | 253 | [shortcut] 254 | from=-3 255 | activation=linear 256 | 257 | [convolutional] 258 | batch_normalize=1 259 | filters=8 260 | size=1 261 | stride=1 262 | pad=1 263 | activation=leaky 264 | 265 | [convolutional] 266 | batch_normalize=1 267 | filters=109 268 | size=3 269 | stride=1 270 | pad=1 271 | activation=leaky 272 | 273 | [shortcut] 274 | from=-3 275 | activation=linear 276 | 277 | [convolutional] 278 | batch_normalize=1 279 | filters=8 280 | size=1 281 | stride=1 282 | pad=1 283 | activation=leaky 284 | 285 | [convolutional] 286 | batch_normalize=1 287 | filters=109 288 | size=3 289 | stride=1 290 | pad=1 291 | activation=leaky 292 | 293 | [shortcut] 294 | from=-3 295 | activation=linear 296 | 297 | [convolutional] 298 | batch_normalize=1 299 | filters=8 300 | size=1 301 | stride=1 302 | pad=1 303 | activation=leaky 304 | 305 | [convolutional] 306 | batch_normalize=1 307 | filters=109 308 | size=3 309 | stride=1 310 | pad=1 311 | activation=leaky 312 | 313 | [shortcut] 314 | from=-3 315 | activation=linear 316 | 317 | [convolutional] 318 | batch_normalize=1 319 | filters=8 320 | size=1 321 | stride=1 322 | pad=1 323 | activation=leaky 324 | 325 | [convolutional] 326 | batch_normalize=1 327 | filters=109 328 | size=3 329 | stride=1 330 | pad=1 331 | activation=leaky 332 | 333 | [shortcut] 334 | from=-3 335 | activation=linear 336 | 337 | [route] 338 | layers=-1,-30 339 | 340 | [convolutional] 341 | batch_normalize=1 342 | filters=32 343 | size=1 344 | stride=1 345 | pad=1 346 | activation=leaky 347 | 348 | [convolutional] 349 | batch_normalize=1 350 | filters=32 351 | size=3 352 | stride=2 353 | pad=1 354 | activation=leaky 355 | 356 | [convolutional] 357 | batch_normalize=1 358 | filters=256 359 | size=1 360 | stride=1 361 | pad=1 362 | activation=leaky 363 | 364 | [route] 365 | layers=-2 366 | 367 | [convolutional] 368 | batch_normalize=1 369 | filters=231 370 | size=1 371 | stride=1 372 | pad=1 373 | activation=leaky 374 | 375 | [convolutional] 376 | batch_normalize=1 377 | filters=32 378 | size=1 379 | stride=1 380 | pad=1 381 | activation=leaky 382 | 383 | [convolutional] 384 | batch_normalize=1 385 | filters=231 386 | size=3 387 | stride=1 388 | pad=1 389 | activation=leaky 390 | 391 | [shortcut] 392 | from=-3 393 | activation=linear 394 | 395 | [convolutional] 396 | batch_normalize=1 397 | filters=32 398 | size=1 399 | stride=1 400 | pad=1 401 | activation=leaky 402 | 403 | [convolutional] 404 | batch_normalize=1 405 | filters=231 406 | size=3 407 | stride=1 408 | pad=1 409 | activation=leaky 410 | 411 | [shortcut] 412 | from=-3 413 | activation=linear 414 | 415 | [convolutional] 416 | batch_normalize=1 417 | filters=32 418 | size=1 419 | stride=1 420 | pad=1 421 | activation=leaky 422 | 423 | [convolutional] 424 | batch_normalize=1 425 | filters=231 426 | size=3 427 | stride=1 428 | pad=1 429 | activation=leaky 430 | 431 | [shortcut] 432 | from=-3 433 | activation=linear 434 | 435 | [convolutional] 436 | batch_normalize=1 437 | filters=16 438 | size=1 439 | stride=1 440 | pad=1 441 | activation=leaky 442 | 443 | [convolutional] 444 | batch_normalize=1 445 | filters=231 446 | size=3 447 | stride=1 448 | pad=1 449 | activation=leaky 450 | 451 | [shortcut] 452 | from=-3 453 | activation=linear 454 | 455 | [convolutional] 456 | batch_normalize=1 457 | filters=24 458 | size=1 459 | stride=1 460 | pad=1 461 | activation=leaky 462 | 463 | [convolutional] 464 | batch_normalize=1 465 | filters=231 466 | size=3 467 | stride=1 468 | pad=1 469 | activation=leaky 470 | 471 | [shortcut] 472 | from=-3 473 | activation=linear 474 | 475 | [convolutional] 476 | batch_normalize=1 477 | filters=16 478 | size=1 479 | stride=1 480 | pad=1 481 | activation=leaky 482 | 483 | [convolutional] 484 | batch_normalize=1 485 | filters=231 486 | size=3 487 | stride=1 488 | pad=1 489 | activation=leaky 490 | 491 | [shortcut] 492 | from=-3 493 | activation=linear 494 | 495 | [convolutional] 496 | batch_normalize=1 497 | filters=8 498 | size=1 499 | stride=1 500 | pad=1 501 | activation=leaky 502 | 503 | [convolutional] 504 | batch_normalize=1 505 | filters=231 506 | size=3 507 | stride=1 508 | pad=1 509 | activation=leaky 510 | 511 | [shortcut] 512 | from=-3 513 | activation=linear 514 | 515 | [convolutional] 516 | batch_normalize=1 517 | filters=8 518 | size=1 519 | stride=1 520 | pad=1 521 | activation=leaky 522 | 523 | [convolutional] 524 | batch_normalize=1 525 | filters=231 526 | size=3 527 | stride=1 528 | pad=1 529 | activation=leaky 530 | 531 | [shortcut] 532 | from=-3 533 | activation=linear 534 | 535 | [convolutional] 536 | batch_normalize=1 537 | filters=8 538 | size=1 539 | stride=1 540 | pad=1 541 | activation=leaky 542 | 543 | [convolutional] 544 | batch_normalize=1 545 | filters=231 546 | size=3 547 | stride=1 548 | pad=1 549 | activation=leaky 550 | 551 | [shortcut] 552 | from=-3 553 | activation=linear 554 | 555 | [route] 556 | layers=-1,-30 557 | 558 | [convolutional] 559 | batch_normalize=1 560 | filters=96 561 | size=1 562 | stride=1 563 | pad=1 564 | activation=leaky 565 | 566 | [convolutional] 567 | batch_normalize=1 568 | filters=408 569 | size=3 570 | stride=2 571 | pad=1 572 | activation=leaky 573 | 574 | [convolutional] 575 | batch_normalize=1 576 | filters=512 577 | size=1 578 | stride=1 579 | pad=1 580 | activation=leaky 581 | 582 | [maxpool] 583 | stride=1 584 | size=5 585 | 586 | [route] 587 | layers=-2 588 | 589 | [maxpool] 590 | stride=1 591 | size=9 592 | 593 | [route] 594 | layers=-4 595 | 596 | [maxpool] 597 | stride=1 598 | size=13 599 | 600 | [route] 601 | layers=-6,-5,-3,-1 602 | 603 | [convolutional] 604 | batch_normalize=1 605 | filters=320 606 | size=1 607 | stride=1 608 | pad=1 609 | activation=leaky 610 | 611 | [convolutional] 612 | batch_normalize=1 613 | filters=432 614 | size=1 615 | stride=1 616 | pad=1 617 | activation=leaky 618 | 619 | [route] 620 | layers=-2 621 | 622 | [convolutional] 623 | batch_normalize=1 624 | filters=152 625 | size=1 626 | stride=1 627 | pad=1 628 | activation=leaky 629 | 630 | [convolutional] 631 | batch_normalize=1 632 | filters=56 633 | size=1 634 | stride=1 635 | pad=1 636 | activation=leaky 637 | 638 | [convolutional] 639 | batch_normalize=1 640 | filters=48 641 | size=3 642 | stride=1 643 | pad=1 644 | activation=leaky 645 | 646 | [convolutional] 647 | batch_normalize=1 648 | filters=32 649 | size=1 650 | stride=1 651 | pad=1 652 | activation=leaky 653 | 654 | [convolutional] 655 | batch_normalize=1 656 | filters=56 657 | size=3 658 | stride=1 659 | pad=1 660 | activation=leaky 661 | 662 | [convolutional] 663 | batch_normalize=1 664 | filters=48 665 | size=1 666 | stride=1 667 | pad=1 668 | activation=leaky 669 | 670 | [convolutional] 671 | batch_normalize=1 672 | filters=16 673 | size=3 674 | stride=1 675 | pad=1 676 | activation=leaky 677 | 678 | [route] 679 | layers=-1,-9 680 | 681 | [convolutional] 682 | batch_normalize=1 683 | filters=464 684 | size=1 685 | stride=1 686 | pad=1 687 | activation=leaky 688 | 689 | [convolutional] 690 | batch_normalize=1 691 | filters=512 692 | size=1 693 | stride=1 694 | pad=1 695 | activation=leaky 696 | 697 | [upsample] 698 | stride=2 699 | 700 | [route] 701 | layers=-1,-23 702 | 703 | [convolutional] 704 | batch_normalize=1 705 | filters=96 706 | size=1 707 | stride=1 708 | pad=1 709 | activation=leaky 710 | 711 | [route] 712 | layers=-2 713 | 714 | [convolutional] 715 | batch_normalize=1 716 | filters=56 717 | size=1 718 | stride=1 719 | pad=1 720 | activation=leaky 721 | 722 | [convolutional] 723 | batch_normalize=1 724 | filters=80 725 | size=1 726 | stride=1 727 | pad=1 728 | activation=leaky 729 | 730 | [convolutional] 731 | batch_normalize=1 732 | filters=24 733 | size=3 734 | stride=1 735 | pad=1 736 | activation=leaky 737 | 738 | [convolutional] 739 | batch_normalize=1 740 | filters=64 741 | size=1 742 | stride=1 743 | pad=1 744 | activation=leaky 745 | 746 | [convolutional] 747 | batch_normalize=1 748 | filters=40 749 | size=3 750 | stride=1 751 | pad=1 752 | activation=leaky 753 | 754 | [convolutional] 755 | batch_normalize=1 756 | filters=48 757 | size=1 758 | stride=1 759 | pad=1 760 | activation=leaky 761 | 762 | [convolutional] 763 | batch_normalize=1 764 | filters=32 765 | size=3 766 | stride=1 767 | pad=1 768 | activation=leaky 769 | 770 | [route] 771 | layers=-1,-9 772 | 773 | [convolutional] 774 | batch_normalize=1 775 | filters=168 776 | size=1 777 | stride=1 778 | pad=1 779 | activation=leaky 780 | 781 | [convolutional] 782 | batch_normalize=1 783 | filters=256 784 | size=1 785 | stride=1 786 | pad=1 787 | activation=leaky 788 | 789 | [upsample] 790 | stride=2 791 | 792 | [route] 793 | layers=-1,-70 794 | 795 | [convolutional] 796 | batch_normalize=1 797 | filters=24 798 | size=1 799 | stride=1 800 | pad=1 801 | activation=leaky 802 | 803 | [route] 804 | layers=-2 805 | 806 | [convolutional] 807 | batch_normalize=1 808 | filters=8 809 | size=1 810 | stride=1 811 | pad=1 812 | activation=leaky 813 | 814 | [convolutional] 815 | batch_normalize=1 816 | filters=8 817 | size=1 818 | stride=1 819 | pad=1 820 | activation=leaky 821 | 822 | [convolutional] 823 | batch_normalize=1 824 | filters=8 825 | size=3 826 | stride=1 827 | pad=1 828 | activation=leaky 829 | 830 | [convolutional] 831 | batch_normalize=1 832 | filters=8 833 | size=1 834 | stride=1 835 | pad=1 836 | activation=leaky 837 | 838 | [convolutional] 839 | batch_normalize=1 840 | filters=8 841 | size=3 842 | stride=1 843 | pad=1 844 | activation=leaky 845 | 846 | [convolutional] 847 | batch_normalize=1 848 | filters=8 849 | size=1 850 | stride=1 851 | pad=1 852 | activation=leaky 853 | 854 | [convolutional] 855 | batch_normalize=1 856 | filters=24 857 | size=3 858 | stride=1 859 | pad=1 860 | activation=leaky 861 | 862 | [route] 863 | layers=-1,-9 864 | 865 | [convolutional] 866 | batch_normalize=1 867 | filters=176 868 | size=1 869 | stride=1 870 | pad=1 871 | activation=leaky 872 | 873 | [convolutional] 874 | size=1 875 | stride=1 876 | pad=1 877 | filters=24 878 | activation=linear 879 | 880 | [yolo] 881 | mask=0,1,2 882 | anchors=39,38, 49,67, 74,49, 74,86, 113,71, 97,119, 163,108, 134,155, 210,199 883 | classes=3 884 | num=9 885 | jitter=.3 886 | ignore_thresh=.7 887 | truth_thresh=1 888 | scale_x_y=1.2 889 | iou_thresh=0.213 890 | cls_normalizer=1.0 891 | iou_normalizer=0.07 892 | iou_loss=ciou 893 | nms_kind=greedynms 894 | beta_nms=0.6 895 | 896 | [route] 897 | layers=-3 898 | 899 | [convolutional] 900 | batch_normalize=1 901 | filters=32 902 | size=3 903 | stride=2 904 | pad=1 905 | activation=leaky 906 | 907 | [route] 908 | layers=-1,-18 909 | 910 | [convolutional] 911 | batch_normalize=1 912 | filters=56 913 | size=1 914 | stride=1 915 | pad=1 916 | activation=leaky 917 | 918 | [route] 919 | layers=-2 920 | 921 | [convolutional] 922 | batch_normalize=1 923 | filters=48 924 | size=1 925 | stride=1 926 | pad=1 927 | activation=leaky 928 | 929 | [convolutional] 930 | batch_normalize=1 931 | filters=32 932 | size=1 933 | stride=1 934 | pad=1 935 | activation=leaky 936 | 937 | [convolutional] 938 | batch_normalize=1 939 | filters=32 940 | size=3 941 | stride=1 942 | pad=1 943 | activation=leaky 944 | 945 | [convolutional] 946 | batch_normalize=1 947 | filters=24 948 | size=1 949 | stride=1 950 | pad=1 951 | activation=leaky 952 | 953 | [convolutional] 954 | batch_normalize=1 955 | filters=24 956 | size=3 957 | stride=1 958 | pad=1 959 | activation=leaky 960 | 961 | [convolutional] 962 | batch_normalize=1 963 | filters=40 964 | size=1 965 | stride=1 966 | pad=1 967 | activation=leaky 968 | 969 | [convolutional] 970 | batch_normalize=1 971 | filters=64 972 | size=3 973 | stride=1 974 | pad=1 975 | activation=leaky 976 | 977 | [route] 978 | layers=-1,-9 979 | 980 | [convolutional] 981 | batch_normalize=1 982 | filters=240 983 | size=1 984 | stride=1 985 | pad=1 986 | activation=leaky 987 | 988 | [convolutional] 989 | size=1 990 | stride=1 991 | pad=1 992 | filters=24 993 | activation=linear 994 | 995 | [yolo] 996 | mask=3,4,5 997 | anchors=39,38, 49,67, 74,49, 74,86, 113,71, 97,119, 163,108, 134,155, 210,199 998 | classes=3 999 | num=9 1000 | jitter=.3 1001 | ignore_thresh=.7 1002 | truth_thresh=1 1003 | scale_x_y=1.2 1004 | iou_thresh=0.213 1005 | cls_normalizer=1.0 1006 | iou_normalizer=0.07 1007 | iou_loss=ciou 1008 | nms_kind=greedynms 1009 | beta_nms=0.6 1010 | 1011 | [route] 1012 | layers=-3 1013 | 1014 | [convolutional] 1015 | batch_normalize=1 1016 | filters=176 1017 | size=3 1018 | stride=2 1019 | pad=1 1020 | activation=leaky 1021 | 1022 | [route] 1023 | layers=-1,-48 1024 | 1025 | [convolutional] 1026 | batch_normalize=1 1027 | filters=144 1028 | size=1 1029 | stride=1 1030 | pad=1 1031 | activation=leaky 1032 | 1033 | [route] 1034 | layers=-2 1035 | 1036 | [convolutional] 1037 | batch_normalize=1 1038 | filters=32 1039 | size=1 1040 | stride=1 1041 | pad=1 1042 | activation=leaky 1043 | 1044 | [convolutional] 1045 | batch_normalize=1 1046 | filters=16 1047 | size=1 1048 | stride=1 1049 | pad=1 1050 | activation=leaky 1051 | 1052 | [convolutional] 1053 | batch_normalize=1 1054 | filters=32 1055 | size=3 1056 | stride=1 1057 | pad=1 1058 | activation=leaky 1059 | 1060 | [convolutional] 1061 | batch_normalize=1 1062 | filters=16 1063 | size=1 1064 | stride=1 1065 | pad=1 1066 | activation=leaky 1067 | 1068 | [convolutional] 1069 | batch_normalize=1 1070 | filters=64 1071 | size=3 1072 | stride=1 1073 | pad=1 1074 | activation=leaky 1075 | 1076 | [convolutional] 1077 | batch_normalize=1 1078 | filters=88 1079 | size=1 1080 | stride=1 1081 | pad=1 1082 | activation=leaky 1083 | 1084 | [convolutional] 1085 | batch_normalize=1 1086 | filters=96 1087 | size=3 1088 | stride=1 1089 | pad=1 1090 | activation=leaky 1091 | 1092 | [route] 1093 | layers=-1,-9 1094 | 1095 | [convolutional] 1096 | batch_normalize=1 1097 | filters=272 1098 | size=1 1099 | stride=1 1100 | pad=1 1101 | activation=leaky 1102 | 1103 | [convolutional] 1104 | size=1 1105 | stride=1 1106 | pad=1 1107 | filters=24 1108 | activation=linear 1109 | 1110 | [yolo] 1111 | mask=6,7,8 1112 | anchors=39,38, 49,67, 74,49, 74,86, 113,71, 97,119, 163,108, 134,155, 210,199 1113 | classes=3 1114 | num=9 1115 | jitter=.3 1116 | ignore_thresh=.7 1117 | truth_thresh=1 1118 | scale_x_y=1.2 1119 | iou_thresh=0.213 1120 | cls_normalizer=1.0 1121 | iou_normalizer=0.07 1122 | iou_loss=ciou 1123 | nms_kind=greedynms 1124 | beta_nms=0.6 1125 | 1126 | -------------------------------------------------------------------------------- /data/coco.data: -------------------------------------------------------------------------------- 1 | classes=80 2 | train=../coco/trainvalno5k.txt 3 | valid=../coco/5k.txt 4 | names=data/coco.names 5 | backup=backup/ 6 | eval=coco 7 | -------------------------------------------------------------------------------- /data/coco.names: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorcycle 5 | airplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | couch 59 | potted plant 60 | bed 61 | dining table 62 | toilet 63 | tv 64 | laptop 65 | mouse 66 | remote 67 | keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush 81 | -------------------------------------------------------------------------------- /data/coco_128img.data: -------------------------------------------------------------------------------- 1 | classes=80 2 | train=./data/coco_128img.txt 3 | valid=./data/coco_128img.txt 4 | names=data/coco.names 5 | backup=backup/ 6 | eval=coco 7 | -------------------------------------------------------------------------------- /data/coco_128img.txt: -------------------------------------------------------------------------------- 1 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000009.jpg 2 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000025.jpg 3 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000030.jpg 4 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000034.jpg 5 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000036.jpg 6 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000042.jpg 7 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000049.jpg 8 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000061.jpg 9 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000064.jpg 10 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000071.jpg 11 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000072.jpg 12 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000073.jpg 13 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000074.jpg 14 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000077.jpg 15 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000078.jpg 16 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000081.jpg 17 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000086.jpg 18 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000089.jpg 19 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000092.jpg 20 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000094.jpg 21 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000109.jpg 22 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000110.jpg 23 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000113.jpg 24 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000127.jpg 25 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000133.jpg 26 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000136.jpg 27 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000138.jpg 28 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000142.jpg 29 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000143.jpg 30 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000144.jpg 31 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000149.jpg 32 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000151.jpg 33 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000154.jpg 34 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000164.jpg 35 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000165.jpg 36 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000192.jpg 37 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000194.jpg 38 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000196.jpg 39 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000201.jpg 40 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000208.jpg 41 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000241.jpg 42 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000247.jpg 43 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000250.jpg 44 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000257.jpg 45 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000260.jpg 46 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000263.jpg 47 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000283.jpg 48 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000294.jpg 49 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000307.jpg 50 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000308.jpg 51 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000309.jpg 52 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000312.jpg 53 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000315.jpg 54 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000321.jpg 55 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000322.jpg 56 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000326.jpg 57 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000328.jpg 58 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000332.jpg 59 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000338.jpg 60 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000349.jpg 61 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000357.jpg 62 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000359.jpg 63 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000360.jpg 64 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000368.jpg 65 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000370.jpg 66 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000382.jpg 67 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000384.jpg 68 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000387.jpg 69 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000389.jpg 70 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000394.jpg 71 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000395.jpg 72 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000397.jpg 73 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000400.jpg 74 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000404.jpg 75 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000415.jpg 76 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000419.jpg 77 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000428.jpg 78 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000431.jpg 79 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000436.jpg 80 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000438.jpg 81 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000443.jpg 82 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000446.jpg 83 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000450.jpg 84 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000459.jpg 85 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000471.jpg 86 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000472.jpg 87 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000474.jpg 88 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000486.jpg 89 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000488.jpg 90 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000490.jpg 91 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000491.jpg 92 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000502.jpg 93 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000508.jpg 94 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000510.jpg 95 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000514.jpg 96 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000520.jpg 97 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000529.jpg 98 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000531.jpg 99 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000532.jpg 100 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000536.jpg 101 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000540.jpg 102 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000542.jpg 103 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000544.jpg 104 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000560.jpg 105 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000562.jpg 106 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000564.jpg 107 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000569.jpg 108 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000572.jpg 109 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000575.jpg 110 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000581.jpg 111 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000584.jpg 112 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000589.jpg 113 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000590.jpg 114 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000595.jpg 115 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000597.jpg 116 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000599.jpg 117 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000605.jpg 118 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000612.jpg 119 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000620.jpg 120 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000623.jpg 121 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000625.jpg 122 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000626.jpg 123 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000629.jpg 124 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000634.jpg 125 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000636.jpg 126 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000641.jpg 127 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000643.jpg 128 | /home/lishuang/Disk/remote/pycharm/yolov5_prune/data/coco128/images/train2017/000000000650.jpg -------------------------------------------------------------------------------- /data/get_coco_dataset.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # CREDIT: https://github.com/pjreddie/darknet/tree/master/scripts/get_coco_dataset.sh 3 | 4 | # Clone COCO API 5 | git clone https://github.com/pdollar/coco && cd coco 6 | 7 | # Download Images 8 | mkdir images && cd images 9 | wget -c https://pjreddie.com/media/files/train2014.zip 10 | wget -c https://pjreddie.com/media/files/val2014.zip 11 | 12 | # Unzip 13 | unzip -q train2014.zip 14 | unzip -q val2014.zip 15 | 16 | # (optional) Delete zip files 17 | rm -rf *.zip 18 | 19 | cd .. 20 | 21 | # Download COCO Metadata 22 | wget -c https://pjreddie.com/media/files/instances_train-val2014.zip 23 | wget -c https://pjreddie.com/media/files/coco/5k.part 24 | wget -c https://pjreddie.com/media/files/coco/trainvalno5k.part 25 | wget -c https://pjreddie.com/media/files/coco/labels.tgz 26 | tar xzf labels.tgz 27 | unzip -q instances_train-val2014.zip 28 | 29 | # Set Up Image Lists 30 | paste <(awk "{print \"$PWD\"}" <5k.part) 5k.part | tr -d '\t' > 5k.txt 31 | paste <(awk "{print \"$PWD\"}" trainvalno5k.txt 32 | 33 | # get xview training data 34 | # wget -O train_images.tgz 'https://d307kc0mrhucc3.cloudfront.net/train_images.tgz?Expires=1530124049&Signature=JrQoxipmsETvb7eQHCfDFUO-QEHJGAayUv0i-ParmS-1hn7hl9D~bzGuHWG82imEbZSLUARTtm0wOJ7EmYMGmG5PtLKz9H5qi6DjoSUuFc13NQ-~6yUhE~NfPaTnehUdUMCa3On2wl1h1ZtRG~0Jq1P-AJbpe~oQxbyBrs1KccaMa7FK4F4oMM6sMnNgoXx8-3O77kYw~uOpTMFmTaQdHln6EztW0Lx17i57kK3ogbSUpXgaUTqjHCRA1dWIl7PY1ngQnLslkLhZqmKcaL-BvWf0ZGjHxCDQBpnUjIlvMu5NasegkwD9Jjc0ClgTxsttSkmbapVqaVC8peR0pO619Q__&Key-Pair-Id=APKAIKGDJB5C3XUL2DXQ' 35 | # tar -xvzf train_images.tgz 36 | # sudo rm -rf train_images/._* 37 | # lastly convert each .tif to a .bmp for faster loading in cv2 38 | 39 | # ./coco/images/train2014/COCO_train2014_000000167126.jpg # corrupted image 40 | -------------------------------------------------------------------------------- /data/get_coco_dataset_gdrive.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # https://stackoverflow.com/questions/48133080/how-to-download-a-google-drive-url-via-curl-or-wget/48133859 3 | 4 | # Zip coco folder 5 | # zip -r coco.zip coco 6 | # tar -czvf coco.tar.gz coco 7 | 8 | # Set fileid and filename 9 | filename="coco.zip" 10 | fileid="1HaXkef9z6y5l4vUnCYgdmEAj61c6bfWO" # coco.zip 11 | 12 | # Download from Google Drive, accepting presented query 13 | curl -c ./cookie -s -L "https://drive.google.com/uc?export=download&id=${fileid}" > /dev/null 14 | curl -Lb ./cookie "https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=${fileid}" -o ${filename} 15 | rm ./cookie 16 | 17 | # Unzip 18 | unzip -q ${filename} # for coco.zip 19 | # tar -xzf ${filename} # for coco.tar.gz 20 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BokyLiu/YoloV5sl_V4_prune/c0ff39c5a5b10cbca95beb597c722cdc02e81885/models/__init__.py -------------------------------------------------------------------------------- /models/common.py: -------------------------------------------------------------------------------- 1 | # This file contains modules common to various models 2 | import math 3 | 4 | import torch 5 | import torch.nn as nn 6 | 7 | 8 | def autopad(k, p=None): # kernel, padding 9 | # Pad to 'same' 10 | if p is None: 11 | p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad 12 | return p 13 | 14 | 15 | def DWConv(c1, c2, k=1, s=1, act=True): 16 | # Depthwise convolution 17 | return Conv(c1, c2, k, s, g=math.gcd(c1, c2), act=act) 18 | 19 | 20 | class Conv(nn.Module): 21 | # Standard convolution 22 | def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups 23 | super(Conv, self).__init__() 24 | self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False) 25 | self.bn = nn.BatchNorm2d(c2) 26 | #self.act = nn.LeakyReLU(0.1, inplace=True) if act else nn.Identity() #yolov5_v2 27 | #self.act = nn.Hardswish() if act else nn.Identity() #yolov5_v3 28 | self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity()) #yolov5_v4 29 | 30 | def forward(self, x): 31 | return self.act(self.bn(self.conv(x))) 32 | 33 | def fuseforward(self, x): 34 | return self.act(self.conv(x)) 35 | 36 | 37 | class Bottleneck(nn.Module): 38 | # Standard bottleneck 39 | def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion 40 | super(Bottleneck, self).__init__() 41 | c_ = int(c2 * e) # hidden channels 42 | self.cv1 = Conv(c1, c_, 1, 1) 43 | self.cv2 = Conv(c_, c2, 3, 1, g=g) 44 | self.add = shortcut and c1 == c2 45 | 46 | def forward(self, x): 47 | return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) 48 | 49 | 50 | class BottleneckCSP(nn.Module): 51 | # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks 52 | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion 53 | super(BottleneckCSP, self).__init__() 54 | c_ = int(c2 * e) # hidden channels 55 | self.cv1 = Conv(c1, c_, 1, 1) 56 | self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False) 57 | self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False) 58 | self.cv4 = Conv(2 * c_, c2, 1, 1) 59 | self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3) 60 | self.act = nn.LeakyReLU(0.1, inplace=True) 61 | self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) 62 | 63 | def forward(self, x): 64 | y1 = self.cv3(self.m(self.cv1(x))) 65 | y2 = self.cv2(x) 66 | return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1)))) 67 | 68 | class C3(nn.Module): 69 | # CSP Bottleneck with 3 convolutions 70 | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion 71 | super(C3, self).__init__() 72 | c_ = int(c2 * e) # hidden channels 73 | self.cv1 = Conv(c1, c_, 1, 1) 74 | self.cv2 = Conv(c1, c_, 1, 1) 75 | self.cv3 = Conv(2 * c_, c2, 1) # act=FReLU(c2) 76 | self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) 77 | # self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)]) 78 | 79 | def forward(self, x): 80 | return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1)) 81 | 82 | class SPP(nn.Module): 83 | # Spatial pyramid pooling layer used in YOLOv3-SPP 84 | def __init__(self, c1, c2, k=(5, 9, 13)): 85 | super(SPP, self).__init__() 86 | c_ = c1 // 2 # hidden channels 87 | self.cv1 = Conv(c1, c_, 1, 1) 88 | self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1) 89 | self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k]) 90 | 91 | def forward(self, x): 92 | x = self.cv1(x) 93 | return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1)) 94 | 95 | 96 | class Focus(nn.Module): 97 | # Focus wh information into c-space 98 | def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups 99 | super(Focus, self).__init__() 100 | self.conv = Conv(c1 * 4, c2, k, s, p, g, act) 101 | 102 | def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2) 103 | return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1)) 104 | 105 | 106 | class Concat(nn.Module): 107 | # Concatenate a list of tensors along dimension 108 | def __init__(self, dimension=1): 109 | super(Concat, self).__init__() 110 | self.d = dimension 111 | 112 | def forward(self, x): 113 | return torch.cat(x, self.d) 114 | 115 | 116 | class Flatten(nn.Module): 117 | # Use after nn.AdaptiveAvgPool2d(1) to remove last 2 dimensions 118 | @staticmethod 119 | def forward(x): 120 | return x.view(x.size(0), -1) 121 | 122 | 123 | class Classify(nn.Module): 124 | # Classification head, i.e. x(b,c1,20,20) to x(b,c2) 125 | def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups 126 | super(Classify, self).__init__() 127 | self.aap = nn.AdaptiveAvgPool2d(1) # to x(b,c1,1,1) 128 | self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False) # to x(b,c2,1,1) 129 | self.flat = Flatten() 130 | 131 | def forward(self, x): 132 | z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1) # cat if list 133 | return self.flat(self.conv(z)) # flatten to x(b,c2) 134 | -------------------------------------------------------------------------------- /models/experimental.py: -------------------------------------------------------------------------------- 1 | # This file contains experimental modules 2 | 3 | import numpy as np 4 | import torch 5 | import torch.nn as nn 6 | 7 | from models.common import Conv, DWConv 8 | # from utils.google_utils import attempt_download 9 | 10 | 11 | class CrossConv(nn.Module): 12 | # Cross Convolution Downsample 13 | def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False): 14 | # ch_in, ch_out, kernel, stride, groups, expansion, shortcut 15 | super(CrossConv, self).__init__() 16 | c_ = int(c2 * e) # hidden channels 17 | self.cv1 = Conv(c1, c_, (1, k), (1, s)) 18 | self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g) 19 | self.add = shortcut and c1 == c2 20 | 21 | def forward(self, x): 22 | return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) 23 | 24 | class Sum(nn.Module): 25 | # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 26 | def __init__(self, n, weight=False): # n: number of inputs 27 | super(Sum, self).__init__() 28 | self.weight = weight # apply weights boolean 29 | self.iter = range(n - 1) # iter object 30 | if weight: 31 | self.w = nn.Parameter(-torch.arange(1., n) / 2, requires_grad=True) # layer weights 32 | 33 | def forward(self, x): 34 | y = x[0] # no weight 35 | if self.weight: 36 | w = torch.sigmoid(self.w) * 2 37 | for i in self.iter: 38 | y = y + x[i + 1] * w[i] 39 | else: 40 | for i in self.iter: 41 | y = y + x[i + 1] 42 | return y 43 | 44 | 45 | class GhostConv(nn.Module): 46 | # Ghost Convolution https://github.com/huawei-noah/ghostnet 47 | def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups 48 | super(GhostConv, self).__init__() 49 | c_ = c2 // 2 # hidden channels 50 | self.cv1 = Conv(c1, c_, k, s, g, act) 51 | self.cv2 = Conv(c_, c_, 5, 1, c_, act) 52 | 53 | def forward(self, x): 54 | y = self.cv1(x) 55 | return torch.cat([y, self.cv2(y)], 1) 56 | 57 | 58 | class GhostBottleneck(nn.Module): 59 | # Ghost Bottleneck https://github.com/huawei-noah/ghostnet 60 | def __init__(self, c1, c2, k, s): 61 | super(GhostBottleneck, self).__init__() 62 | c_ = c2 // 2 63 | self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1), # pw 64 | DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw 65 | GhostConv(c_, c2, 1, 1, act=False)) # pw-linear 66 | self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False), 67 | Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity() 68 | 69 | def forward(self, x): 70 | return self.conv(x) + self.shortcut(x) 71 | 72 | 73 | class MixConv2d(nn.Module): 74 | # Mixed Depthwise Conv https://arxiv.org/abs/1907.09595 75 | def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True): 76 | super(MixConv2d, self).__init__() 77 | groups = len(k) 78 | if equal_ch: # equal c_ per group 79 | i = torch.linspace(0, groups - 1E-6, c2).floor() # c2 indices 80 | c_ = [(i == g).sum() for g in range(groups)] # intermediate channels 81 | else: # equal weight.numel() per group 82 | b = [c2] + [0] * groups 83 | a = np.eye(groups + 1, groups, k=-1) 84 | a -= np.roll(a, 1, axis=1) 85 | a *= np.array(k) ** 2 86 | a[0] = 1 87 | c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b 88 | 89 | self.m = nn.ModuleList([nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups)]) 90 | self.bn = nn.BatchNorm2d(c2) 91 | self.act = nn.LeakyReLU(0.1, inplace=True) 92 | 93 | def forward(self, x): 94 | return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1))) 95 | 96 | 97 | class Ensemble(nn.ModuleList): 98 | # Ensemble of models 99 | def __init__(self): 100 | super(Ensemble, self).__init__() 101 | 102 | def forward(self, x, augment=False): 103 | y = [] 104 | for module in self: 105 | y.append(module(x, augment)[0]) 106 | # y = torch.stack(y).max(0)[0] # max ensemble 107 | # y = torch.cat(y, 1) # nms ensemble 108 | y = torch.stack(y).mean(0) # mean ensemble 109 | return y, None # inference, train output 110 | 111 | 112 | # def attempt_load(weights, map_location=None): 113 | # # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a 114 | # model = Ensemble() 115 | # for w in weights if isinstance(weights, list) else [weights]: 116 | # attempt_download(w) 117 | # model.append(torch.load(w, map_location=map_location)['model'].float().fuse().eval()) # load FP32 model 118 | # 119 | # if len(model) == 1: 120 | # return model[-1] # return model 121 | # else: 122 | # print('Ensemble created with %s\n' % weights) 123 | # for k in ['names', 'stride']: 124 | # setattr(model, k, getattr(model[-1], k)) 125 | # return model # return ensemble 126 | -------------------------------------------------------------------------------- /models/export.py: -------------------------------------------------------------------------------- 1 | """Exports a YOLOv5 *.pt model to ONNX and TorchScript formats 2 | 3 | Usage: 4 | $ export PYTHONPATH="$PWD" && python models/export.py --weights ./weights/yolov5s.pt --img 640 --batch 1 5 | """ 6 | 7 | import argparse 8 | 9 | import torch 10 | 11 | from utils.google_utils import attempt_download 12 | 13 | if __name__ == '__main__': 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('--weights', type=str, default='./yolov5s.pt', help='weights path') 16 | parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size') 17 | parser.add_argument('--batch-size', type=int, default=1, help='batch size') 18 | opt = parser.parse_args() 19 | opt.img_size *= 2 if len(opt.img_size) == 1 else 1 # expand 20 | print(opt) 21 | 22 | # Input 23 | img = torch.zeros((opt.batch_size, 3, *opt.img_size)) # image size(1,3,320,192) iDetection 24 | 25 | # Load PyTorch model 26 | attempt_download(opt.weights) 27 | model = torch.load(opt.weights, map_location=torch.device('cpu'))['model'].float() 28 | model.eval() 29 | model.model[-1].export = True # set Detect() layer export=True 30 | y = model(img) # dry run 31 | 32 | # TorchScript export 33 | try: 34 | print('\nStarting TorchScript export with torch %s...' % torch.__version__) 35 | f = opt.weights.replace('.pt', '.torchscript.pt') # filename 36 | ts = torch.jit.trace(model, img) 37 | ts.save(f) 38 | print('TorchScript export success, saved as %s' % f) 39 | except Exception as e: 40 | print('TorchScript export failure: %s' % e) 41 | 42 | # ONNX export 43 | try: 44 | import onnx 45 | 46 | print('\nStarting ONNX export with onnx %s...' % onnx.__version__) 47 | f = opt.weights.replace('.pt', '.onnx') # filename 48 | model.fuse() # only for ONNX 49 | torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['images'], 50 | output_names=['classes', 'boxes'] if y is None else ['output']) 51 | 52 | # Checks 53 | onnx_model = onnx.load(f) # load onnx model 54 | onnx.checker.check_model(onnx_model) # check onnx model 55 | print(onnx.helper.printable_graph(onnx_model.graph)) # print a human readable model 56 | print('ONNX export success, saved as %s' % f) 57 | except Exception as e: 58 | print('ONNX export failure: %s' % e) 59 | 60 | # CoreML export 61 | try: 62 | import coremltools as ct 63 | 64 | print('\nStarting CoreML export with coremltools %s...' % ct.__version__) 65 | # convert model from torchscript and apply pixel scaling as per detect.py 66 | model = ct.convert(ts, inputs=[ct.ImageType(name='images', shape=img.shape, scale=1 / 255.0, bias=[0, 0, 0])]) 67 | f = opt.weights.replace('.pt', '.mlmodel') # filename 68 | model.save(f) 69 | print('CoreML export success, saved as %s' % f) 70 | except Exception as e: 71 | print('CoreML export failure: %s' % e) 72 | 73 | # Finish 74 | print('\nExport complete. Visualize with https://github.com/lutzroeder/netron.') 75 | -------------------------------------------------------------------------------- /models/hub/yolov3-spp.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # darknet53 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [32, 3, 1]], # 0 16 | [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 17 | [-1, 1, Bottleneck, [64]], 18 | [-1, 1, Conv, [128, 3, 2]], # 3-P2/4 19 | [-1, 2, Bottleneck, [128]], 20 | [-1, 1, Conv, [256, 3, 2]], # 5-P3/8 21 | [-1, 8, Bottleneck, [256]], 22 | [-1, 1, Conv, [512, 3, 2]], # 7-P4/16 23 | [-1, 8, Bottleneck, [512]], 24 | [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32 25 | [-1, 4, Bottleneck, [1024]], # 10 26 | ] 27 | 28 | # YOLOv3-SPP head 29 | head: 30 | [[-1, 1, Bottleneck, [1024, False]], 31 | [-1, 1, SPP, [512, [5, 9, 13]]], 32 | [-1, 1, Conv, [1024, 3, 1]], 33 | [-1, 1, Conv, [512, 1, 1]], 34 | [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large) 35 | 36 | [-2, 1, Conv, [256, 1, 1]], 37 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 38 | [[-1, 8], 1, Concat, [1]], # cat backbone P4 39 | [-1, 1, Bottleneck, [512, False]], 40 | [-1, 1, Bottleneck, [512, False]], 41 | [-1, 1, Conv, [256, 1, 1]], 42 | [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium) 43 | 44 | [-2, 1, Conv, [128, 1, 1]], 45 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 46 | [[-1, 6], 1, Concat, [1]], # cat backbone P3 47 | [-1, 1, Bottleneck, [256, False]], 48 | [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small) 49 | 50 | [[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 51 | ] 52 | -------------------------------------------------------------------------------- /models/hub/yolov5-fpn.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, Bottleneck, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 9, BottleneckCSP, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, BottleneckCSP, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 1, SPP, [1024, [5, 9, 13]]], 24 | [-1, 6, BottleneckCSP, [1024]], # 9 25 | ] 26 | 27 | # YOLOv5 FPN head 28 | head: 29 | [[-1, 3, BottleneckCSP, [1024, False]], # 10 (P5/32-large) 30 | 31 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 32 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 33 | [-1, 1, Conv, [512, 1, 1]], 34 | [-1, 3, BottleneckCSP, [512, False]], # 14 (P4/16-medium) 35 | 36 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 37 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 38 | [-1, 1, Conv, [256, 1, 1]], 39 | [-1, 3, BottleneckCSP, [256, False]], # 18 (P3/8-small) 40 | 41 | [[18, 14, 10], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 42 | ] 43 | -------------------------------------------------------------------------------- /models/hub/yolov5-panet.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [116,90, 156,198, 373,326] # P5/32 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [10,13, 16,30, 33,23] # P3/8 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, BottleneckCSP, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 9, BottleneckCSP, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, BottleneckCSP, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 1, SPP, [1024, [5, 9, 13]]], 24 | [-1, 3, BottleneckCSP, [1024, False]], # 9 25 | ] 26 | 27 | # YOLOv5 PANet head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, BottleneckCSP, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P5, P4, P3) 48 | ] 49 | -------------------------------------------------------------------------------- /models/yolo.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import math 3 | from copy import deepcopy 4 | from pathlib import Path 5 | 6 | import torch 7 | import torch.nn as nn 8 | 9 | from models.common import Conv, Bottleneck, SPP, DWConv, Focus, BottleneckCSP, Concat,C3 10 | from models.experimental import MixConv2d, CrossConv 11 | from utils.general import check_anchor_order, make_divisible, check_file 12 | from utils.torch_utils import ( 13 | time_synchronized, fuse_conv_and_bn, model_info, initialize_weights, select_device) 14 | 15 | 16 | class Detect(nn.Module): 17 | def __init__(self, nc=80, anchors=(), ch=()): # detection layer 18 | super(Detect, self).__init__() 19 | self.stride = None # strides computed during build 20 | self.nc = nc # number of classes 21 | self.no = nc + 5 # number of outputs per anchor 22 | self.nl = len(anchors) # number of detection layers 23 | self.na = len(anchors[0]) // 2 # number of anchors 24 | self.grid = [torch.zeros(1)] * self.nl # init grid 25 | a = torch.tensor(anchors).float().view(self.nl, -1, 2) 26 | self.register_buffer('anchors', a) # shape(nl,na,2) 27 | self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2)) # shape(nl,1,na,1,1,2) 28 | self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv 29 | self.export = False # onnx export 30 | 31 | def forward(self, x): 32 | # x = x.copy() # for profiling 33 | z = [] # inference output 34 | self.training |= self.export 35 | for i in range(self.nl): 36 | x[i] = self.m[i](x[i]) # conv 37 | bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85) 38 | x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() 39 | 40 | if not self.training: # inference 41 | if self.grid[i].shape[2:4] != x[i].shape[2:4]: 42 | self.grid[i] = self._make_grid(nx, ny).to(x[i].device) 43 | 44 | y = x[i].sigmoid() 45 | y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i].to(x[i].device)) * self.stride[i] # xy 46 | y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh 47 | z.append(y.view(bs, -1, self.no)) 48 | 49 | return x if self.training else (torch.cat(z, 1), x) 50 | 51 | @staticmethod 52 | def _make_grid(nx=20, ny=20): 53 | yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)]) 54 | return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float() 55 | 56 | 57 | class Model(nn.Module): 58 | def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None): # model, input channels, number of classes 59 | super(Model, self).__init__() 60 | if isinstance(cfg, dict): 61 | self.yaml = cfg # model dict 62 | else: # is *.yaml 63 | import yaml # for torch hub 64 | self.yaml_file = Path(cfg).name 65 | with open(cfg) as f: 66 | self.yaml = yaml.load(f, Loader=yaml.FullLoader) # model dict 67 | 68 | # Define model 69 | if nc and nc != self.yaml['nc']: 70 | print('Overriding %s nc=%g with nc=%g' % (cfg, self.yaml['nc'], nc)) 71 | self.yaml['nc'] = nc # override yaml value 72 | self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist, ch_out 73 | # print([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))]) 74 | 75 | # Build strides, anchors 76 | m = self.model[-1] # Detect() 77 | if isinstance(m, Detect): 78 | s = 128 # 2x min stride 79 | m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))]) # forward 80 | m.anchors /= m.stride.view(-1, 1, 1) 81 | check_anchor_order(m) 82 | self.stride = m.stride 83 | self._initialize_biases() # only run once 84 | # print('Strides: %s' % m.stride.tolist()) 85 | 86 | # Init weights, biases 87 | initialize_weights(self) 88 | self.info() 89 | print('') 90 | 91 | def forward(self, x, augment=False, profile=False): 92 | if augment: 93 | img_size = x.shape[-2:] # height, width 94 | s = [1, 0.83, 0.67] # scales 95 | f = [None, 3, None] # flips (2-ud, 3-lr) 96 | y = [] # outputs 97 | for si, fi in zip(s, f): 98 | xi = scale_img(x.flip(fi) if fi else x, si) 99 | yi = self.forward_once(xi)[0] # forward 100 | # cv2.imwrite('img%g.jpg' % s, 255 * xi[0].numpy().transpose((1, 2, 0))[:, :, ::-1]) # save 101 | yi[..., :4] /= si # de-scale 102 | if fi == 2: 103 | yi[..., 1] = img_size[0] - yi[..., 1] # de-flip ud 104 | elif fi == 3: 105 | yi[..., 0] = img_size[1] - yi[..., 0] # de-flip lr 106 | y.append(yi) 107 | return torch.cat(y, 1), None # augmented inference, train 108 | else: 109 | return self.forward_once(x, profile) # single-scale inference, train 110 | 111 | def forward_once(self, x, profile=False): 112 | y, dt = [], [] # outputs 113 | for m in self.model: 114 | if m.f != -1: # if not from previous layer 115 | x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers 116 | 117 | if profile: 118 | try: 119 | import thop 120 | o = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2 # FLOPS 121 | except: 122 | o = 0 123 | t = time_synchronized() 124 | for _ in range(10): 125 | _ = m(x) 126 | dt.append((time_synchronized() - t) * 100) 127 | print('%10.1f%10.0f%10.1fms %-40s' % (o, m.np, dt[-1], m.type)) 128 | 129 | x = m(x) # run 130 | y.append(x if m.i in self.save else None) # save output 131 | 132 | if profile: 133 | print('%.1fms total' % sum(dt)) 134 | return x 135 | 136 | def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is class frequency 137 | # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1. 138 | m = self.model[-1] # Detect() module 139 | for mi, s in zip(m.m, m.stride): # from 140 | b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85) 141 | b[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image) 142 | b[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum()) # cls 143 | mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) 144 | 145 | def _print_biases(self): 146 | m = self.model[-1] # Detect() module 147 | for mi in m.m: # from 148 | b = mi.bias.detach().view(m.na, -1).T # conv.bias(255) to (3,85) 149 | print(('%6g Conv2d.bias:' + '%10.3g' * 6) % (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean())) 150 | 151 | # def _print_weights(self): 152 | # for m in self.model.modules(): 153 | # if type(m) is Bottleneck: 154 | # print('%10.3g' % (m.w.detach().sigmoid() * 2)) # shortcut weights 155 | 156 | # def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers 157 | # print('Fusing layers... ', end='') 158 | # for m in self.model.modules(): 159 | # if type(m) is Conv: 160 | # m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatability 161 | # m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv 162 | # m.bn = None # remove batchnorm 163 | # m.forward = m.fuseforward # update forward 164 | # self.info() 165 | # return self 166 | 167 | def info(self): # print model information 168 | model_info(self) 169 | 170 | 171 | def parse_model(d, ch): # model_dict, input_channels(3) 172 | print('\n%3s%18s%3s%10s %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments')) 173 | anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple'] 174 | na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors 175 | no = na * (nc + 5) # number of outputs = anchors * (classes + 5) 176 | 177 | layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out 178 | for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args 179 | m = eval(m) if isinstance(m, str) else m # eval strings 180 | for j, a in enumerate(args): 181 | try: 182 | args[j] = eval(a) if isinstance(a, str) else a # eval strings 183 | except: 184 | pass 185 | 186 | n = max(round(n * gd), 1) if n > 1 else n # depth gain 187 | if m in [nn.Conv2d, Conv, Bottleneck, SPP, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP, C3]: 188 | c1, c2 = ch[f], args[0] 189 | 190 | # Normal 191 | # if i > 0 and args[0] != no: # channel expansion factor 192 | # ex = 1.75 # exponential (default 2.0) 193 | # e = math.log(c2 / ch[1]) / math.log(2) 194 | # c2 = int(ch[1] * ex ** e) 195 | # if m != Focus: 196 | 197 | c2 = make_divisible(c2 * gw, 8) if c2 != no else c2 198 | 199 | # Experimental 200 | # if i > 0 and args[0] != no: # channel expansion factor 201 | # ex = 1 + gw # exponential (default 2.0) 202 | # ch1 = 32 # ch[1] 203 | # e = math.log(c2 / ch1) / math.log(2) # level 1-n 204 | # c2 = int(ch1 * ex ** e) 205 | # if m != Focus: 206 | # c2 = make_divisible(c2, 8) if c2 != no else c2 207 | 208 | args = [c1, c2, *args[1:]] 209 | if m in [BottleneckCSP, C3]: 210 | args.insert(2, n) 211 | n = 1 212 | elif m is nn.BatchNorm2d: 213 | args = [ch[f]] 214 | elif m is Concat: 215 | c2 = sum([ch[-1 if x == -1 else x + 1] for x in f]) 216 | elif m is Detect: 217 | args.append([ch[x + 1] for x in f]) 218 | if isinstance(args[1], int): # number of anchors 219 | args[1] = [list(range(args[1] * 2))] * len(f) 220 | else: 221 | c2 = ch[f] 222 | 223 | m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args) # module 224 | t = str(m)[8:-2].replace('__main__.', '') # module type 225 | np = sum([x.numel() for x in m_.parameters()]) # number params 226 | m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params 227 | print('%3s%18s%3s%10.0f %-40s%-30s' % (i, f, n, np, t, args)) # print 228 | save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist 229 | layers.append(m_) 230 | ch.append(c2) 231 | return nn.Sequential(*layers), sorted(save) 232 | 233 | 234 | if __name__ == '__main__': 235 | parser = argparse.ArgumentParser() 236 | parser.add_argument('--cfg', type=str, default='yolov5s.yaml', help='model.yaml') 237 | parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') 238 | opt = parser.parse_args() 239 | opt.cfg = check_file(opt.cfg) # check file 240 | device = select_device(opt.device) 241 | 242 | # Create model 243 | model = Model(opt.cfg).to(device) 244 | model.train() 245 | 246 | # Profile 247 | # img = torch.rand(8 if torch.cuda.is_available() else 1, 3, 640, 640).to(device) 248 | # y = model(img, profile=True) 249 | 250 | # ONNX export 251 | # model.model[-1].export = True 252 | # torch.onnx.export(model, img, opt.cfg.replace('.yaml', '.onnx'), verbose=True, opset_version=11) 253 | 254 | # Tensorboard 255 | # from torch.utils.tensorboard import SummaryWriter 256 | # tb_writer = SummaryWriter() 257 | # print("Run 'tensorboard --logdir=models/runs' to view tensorboard at http://localhost:6006/") 258 | # tb_writer.add_graph(model.model, img) # add model to tensorboard 259 | # tb_writer.add_image('test', img[0], dataformats='CWH') # add model to tensorboard 260 | -------------------------------------------------------------------------------- /models/yolov5s_v4.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 0.33 # model depth multiple 4 | width_multiple: 0.50 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, C3, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 9, C3, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 1, SPP, [1024, [5, 9, 13]]], 24 | [-1, 3, C3, [1024, False]], # 9 25 | ] 26 | 27 | # YOLOv5 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /prune_yolov5s.sh: -------------------------------------------------------------------------------- 1 | python prune_yolov5s.py --cfg cfg/yolov5s_v4_hand.cfg --data data/oxfordhand.data --weights weights/last_v4s.pt --percent 0.8 --img_size 640 -------------------------------------------------------------------------------- /slim_prune_yolov5s_8x.sh: -------------------------------------------------------------------------------- 1 | python slim_prune_yolov5s_8x.py --cfg cfg/yolov5s_v4_hand.cfg --data data/oxfordhand.data --weights weights/last_v4s.pt --global_percent 0.5 --layer_keep 0.01 --img_size 640 -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | 4 | from torch.utils.data import DataLoader 5 | 6 | from modelsori import * 7 | from utils.datasets import * 8 | from utils.utils import * 9 | import torchvision 10 | 11 | def box_iouv5(box1, box2): 12 | # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py 13 | """ 14 | Return intersection-over-union (Jaccard index) of boxes. 15 | Both sets of boxes are expected to be in (x1, y1, x2, y2) format. 16 | Arguments: 17 | box1 (Tensor[N, 4]) 18 | box2 (Tensor[M, 4]) 19 | Returns: 20 | iou (Tensor[N, M]): the NxM matrix containing the pairwise 21 | IoU values for every element in boxes1 and boxes2 22 | """ 23 | 24 | def box_area(box): 25 | # box = 4xn 26 | return (box[2] - box[0]) * (box[3] - box[1]) 27 | 28 | area1 = box_area(box1.T) 29 | area2 = box_area(box2.T) 30 | 31 | # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2) 32 | inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2) 33 | return inter / (area1[:, None] + area2 - inter) # iou = inter / (area1 + area2 - inter) 34 | 35 | def non_max_suppressionv5(prediction, conf_thres=0.1, iou_thres=0.6, merge=False, classes=None, agnostic=False): 36 | """Performs Non-Maximum Suppression (NMS) on inference results 37 | 38 | Returns: 39 | detections with shape: nx6 (x1, y1, x2, y2, conf, cls) 40 | """ 41 | if prediction.dtype is torch.float16: 42 | prediction = prediction.float() # to FP32 43 | 44 | nc = prediction[0].shape[1] - 5 # number of classes 45 | xc = prediction[..., 4] > conf_thres # candidates 46 | 47 | # Settings 48 | min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height 49 | max_det = 300 # maximum number of detections per image 50 | time_limit = 10.0 # seconds to quit after 51 | redundant = True # require redundant detections 52 | multi_label = nc > 1 # multiple labels per box (adds 0.5ms/img) 53 | 54 | t = time.time() 55 | output = [None] * prediction.shape[0] 56 | for xi, x in enumerate(prediction): # image index, image inference 57 | # Apply constraints 58 | # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height 59 | x = x[xc[xi]] # confidence 60 | 61 | # If none remain process next image 62 | if not x.shape[0]: 63 | continue 64 | 65 | # Compute conf 66 | x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf 67 | 68 | # Box (center x, center y, width, height) to (x1, y1, x2, y2) 69 | box = xywh2xyxy(x[:, :4]) 70 | 71 | # Detections matrix nx6 (xyxy, conf, cls) 72 | if multi_label: 73 | i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T 74 | x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1) 75 | else: # best class only 76 | conf, j = x[:, 5:].max(1, keepdim=True) 77 | x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres] 78 | 79 | # Filter by class 80 | if classes: 81 | x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)] 82 | 83 | # Apply finite constraint 84 | # if not torch.isfinite(x).all(): 85 | # x = x[torch.isfinite(x).all(1)] 86 | 87 | # If none remain process next image 88 | n = x.shape[0] # number of boxes 89 | if not n: 90 | continue 91 | 92 | # Sort by confidence 93 | # x = x[x[:, 4].argsort(descending=True)] 94 | 95 | # Batched NMS 96 | c = x[:, 5:6] * (0 if agnostic else max_wh) # classes 97 | boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores 98 | i = torchvision.ops.boxes.nms(boxes, scores, iou_thres) 99 | if i.shape[0] > max_det: # limit detections 100 | i = i[:max_det] 101 | if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean) 102 | try: # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) 103 | iou = box_iouv5(boxes[i], boxes) > iou_thres # iou matrix 104 | weights = iou * scores[None] # box weights 105 | x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes 106 | if redundant: 107 | i = i[iou.sum(1) > 1] # require redundancy 108 | except: # possible CUDA error https://github.com/ultralytics/yolov3/issues/1139 109 | print(x, i, x.shape, i.shape) 110 | pass 111 | 112 | output[xi] = x[i] 113 | if (time.time() - t) > time_limit: 114 | break # time limit exceeded 115 | 116 | return output 117 | 118 | def test(cfg, 119 | data, 120 | weights=None, 121 | batch_size=16, 122 | img_size=416, 123 | iou_thres=0.5, 124 | conf_thres=0.001, 125 | nms_thres=0.5, 126 | save_json=False, 127 | model=None): 128 | 129 | # Initialize/load model and set device 130 | if model is None: 131 | device = torch_utils.select_device(opt.device) 132 | verbose = True 133 | 134 | # Initialize model 135 | model = Darknet(cfg, img_size).to(device) 136 | 137 | # Load weights 138 | attempt_download(weights) 139 | if weights.endswith('.pt'): # pytorch format 140 | model.load_state_dict(torch.load(weights, map_location=device)['model']) 141 | else: # darknet format 142 | _ = load_darknet_weights(model, weights) 143 | 144 | if torch.cuda.device_count() > 1: 145 | model = nn.DataParallel(model) 146 | else: 147 | device = next(model.parameters()).device # get model device 148 | verbose = False 149 | 150 | # Configure run 151 | data = parse_data_cfg(data) 152 | nc = int(data['classes']) # number of classes 153 | test_path = data['valid'] # path to test images 154 | names = load_classes(data['names']) # class names 155 | 156 | # Dataloader 157 | dataset = LoadImagesAndLabels(test_path, img_size, batch_size) 158 | dataloader = DataLoader(dataset, 159 | batch_size=batch_size, 160 | num_workers=min([os.cpu_count(), batch_size, 16]), 161 | pin_memory=True, 162 | collate_fn=dataset.collate_fn) 163 | 164 | seen = 0 165 | model.eval() 166 | coco91class = 3 167 | s = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', 'mAP', 'F1') 168 | p, r, f1, mp, mr, map, mf1 = 0., 0., 0., 0., 0., 0., 0. 169 | loss = torch.zeros(3) 170 | jdict, stats, ap, ap_class = [], [], [], [] 171 | for batch_i, (imgs, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)): 172 | targets = targets.to(device) 173 | imgs = imgs.to(device) 174 | _, _, height, width = imgs.shape # batch size, channels, height, width 175 | 176 | # Plot images with bounding boxes 177 | if batch_i == 0 and not os.path.exists('test_batch0.jpg'): 178 | plot_images(imgs=imgs, targets=targets, paths=paths, fname='test_batch0.jpg') 179 | 180 | # Run model 181 | inf_out, train_out = model(imgs) # inference and training outputs 182 | 183 | # Compute loss 184 | if hasattr(model, 'hyp'): # if model has loss hyperparameters 185 | loss += compute_loss(train_out, targets, model)[1][:3].cpu() # GIoU, obj, cls 186 | 187 | # Run NMS 188 | # output = non_max_suppression(inf_out, conf_thres=conf_thres, nms_thres=nms_thres) 189 | output = non_max_suppressionv5(inf_out,conf_thres=conf_thres, iou_thres=nms_thres, classes=None,agnostic=False) 190 | 191 | # Statistics per image 192 | for si, pred in enumerate(output): 193 | labels = targets[targets[:, 0] == si, 1:] 194 | nl = len(labels) 195 | tcls = labels[:, 0].tolist() if nl else [] # target class 196 | seen += 1 197 | 198 | if pred is None: 199 | if nl: 200 | stats.append(([], torch.Tensor(), torch.Tensor(), tcls)) 201 | continue 202 | 203 | # Append to text file 204 | # with open('test.txt', 'a') as file: 205 | # [file.write('%11.5g' * 7 % tuple(x) + '\n') for x in pred] 206 | 207 | # Append to pycocotools JSON dictionary 208 | if save_json: 209 | # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ... 210 | image_id = int(Path(paths[si]).stem.split('_')[-1]) 211 | box = pred[:, :4].clone() # xyxy 212 | scale_coords(imgs[si].shape[1:], box, shapes[si]) # to original shape 213 | box = xyxy2xywh(box) # xywh 214 | box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner 215 | for di, d in enumerate(pred): 216 | jdict.append({'image_id': image_id, 217 | 'category_id': coco91class[int(d[6])], 218 | 'bbox': [floatn(x, 3) for x in box[di]], 219 | 'score': floatn(d[4], 5)}) 220 | 221 | # Clip boxes to image bounds 222 | clip_coords(pred, (height, width)) 223 | 224 | # Assign all predictions as incorrect 225 | correct = [0] * len(pred) 226 | if nl: 227 | detected = [] 228 | tcls_tensor = labels[:, 0] 229 | 230 | # target boxes 231 | tbox = xywh2xyxy(labels[:, 1:5]) 232 | tbox[:, [0, 2]] *= width 233 | tbox[:, [1, 3]] *= height 234 | 235 | # Search for correct predictions 236 | for i, (*pbox, pconf, pcls) in enumerate(pred): 237 | # for i, (*pbox, pconf, pcls_conf, pcls) in enumerate(pred): 238 | 239 | # Break if all targets already located in image 240 | if len(detected) == nl: 241 | break 242 | 243 | # Continue if predicted class not among image classes 244 | if pcls.item() not in tcls: 245 | continue 246 | 247 | # Best iou, index between pred and targets 248 | m = (pcls == tcls_tensor).nonzero().view(-1) 249 | iou, bi = bbox_iou(pbox, tbox[m]).max(0) 250 | 251 | # If iou > threshold and class is correct mark as correct 252 | if iou > iou_thres and m[bi] not in detected: # and pcls == tcls[bi]: 253 | correct[i] = 1 254 | detected.append(m[bi]) 255 | 256 | # Append statistics (correct, conf, pcls, tcls) 257 | # stats.append((correct, pred[:, 4].cpu(), pred[:, 6].cpu(), tcls)) 258 | stats.append((correct, pred[:, 4].cpu(), pred[:, 5].cpu(), tcls)) 259 | 260 | # Compute statistics 261 | stats = [np.concatenate(x, 0) for x in list(zip(*stats))] # to numpy 262 | if len(stats): 263 | p, r, ap, f1, ap_class = ap_per_class(*stats) 264 | mp, mr, map, mf1 = p.mean(), r.mean(), ap.mean(), f1.mean() 265 | nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class 266 | else: 267 | nt = torch.zeros(1) 268 | 269 | # Print results 270 | pf = '%20s' + '%10.3g' * 6 # print format 271 | print(pf % ('all', seen, nt.sum(), mp, mr, map, mf1)) 272 | 273 | # Print results per class 274 | if verbose and nc > 1 and len(stats): 275 | for i, c in enumerate(ap_class): 276 | print(pf % (names[c], seen, nt[c], p[i], r[i], ap[i], f1[i])) 277 | 278 | # Save JSON 279 | if save_json and map and len(jdict): 280 | try: 281 | imgIds = [int(Path(x).stem.split('_')[-1]) for x in dataset.img_files] 282 | with open('results.json', 'w') as file: 283 | json.dump(jdict, file) 284 | 285 | from pycocotools.coco import COCO 286 | from pycocotools.cocoeval import COCOeval 287 | 288 | # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb 289 | cocoGt = COCO('../coco/annotations/instances_val2014.json') # initialize COCO ground truth api 290 | cocoDt = cocoGt.loadRes('results.json') # initialize COCO pred api 291 | 292 | cocoEval = COCOeval(cocoGt, cocoDt, 'bbox') 293 | cocoEval.params.imgIds = imgIds # [:32] # only evaluate these images 294 | cocoEval.evaluate() 295 | cocoEval.accumulate() 296 | cocoEval.summarize() 297 | map = cocoEval.stats[1] # update mAP to pycocotools mAP 298 | except: 299 | print('WARNING: missing dependency pycocotools from requirements.txt. Can not compute official COCO mAP.') 300 | 301 | # Return results 302 | maps = np.zeros(nc) + map 303 | for i, c in enumerate(ap_class): 304 | maps[c] = ap[i] 305 | return (mp, mr, map, mf1, *(loss / len(dataloader)).tolist()), maps 306 | 307 | 308 | if __name__ == '__main__': 309 | parser = argparse.ArgumentParser(prog='test.py') 310 | parser.add_argument('--cfg', type=str, default='cfg/yolov5s.cfg', help='cfg file path') 311 | parser.add_argument('--data', type=str, default='data/coco.data', help='coco.data file path') 312 | parser.add_argument('--weights', type=str, default='weights/yolov3-spp.weights', help='path to weights file') 313 | parser.add_argument('--batch-size', type=int, default=16, help='size of each image batch') 314 | parser.add_argument('--img-size', type=int, default=416, help='inference size (pixels)') 315 | parser.add_argument('--iou-thres', type=float, default=0.5, help='iou threshold required to qualify as detected') 316 | parser.add_argument('--conf-thres', type=float, default=0.001, help='object confidence threshold') 317 | parser.add_argument('--nms-thres', type=float, default=0.5, help='iou threshold for non-maximum suppression') 318 | parser.add_argument('--save-json', action='store_true', help='save a cocoapi-compatible JSON results file') 319 | parser.add_argument('--device', default='', help='device id (i.e. 0 or 0,1) or cpu') 320 | opt = parser.parse_args() 321 | print(opt) 322 | 323 | with torch.no_grad(): 324 | test(opt.cfg, 325 | opt.data, 326 | opt.weights, 327 | opt.batch_size, 328 | opt.img_size, 329 | opt.iou_thres, 330 | opt.conf_thres, 331 | opt.nms_thres, 332 | opt.save_json) 333 | -------------------------------------------------------------------------------- /test_yolov5s.py: -------------------------------------------------------------------------------- 1 | from modelsori import * 2 | from utils.utils import * 3 | import numpy as np 4 | from copy import deepcopy 5 | from test import test 6 | from terminaltables import AsciiTable 7 | import time 8 | from utils.prune_utils import * 9 | import argparse 10 | 11 | from models.yolo import Model 12 | 13 | import torchvision 14 | 15 | def letterboxv5(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True): 16 | # Resize image to a 32-pixel-multiple rectangle https://github.com/ultralytics/yolov3/issues/232 17 | shape = img.shape[:2] # current shape [height, width] 18 | if isinstance(new_shape, int): 19 | new_shape = (new_shape, new_shape) 20 | 21 | # Scale ratio (new / old) 22 | r = min(new_shape[0] / shape[0], new_shape[1] / shape[1]) 23 | if not scaleup: # only scale down, do not scale up (for better test mAP) 24 | r = min(r, 1.0) 25 | 26 | # Compute padding 27 | ratio = r, r # width, height ratios 28 | new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) 29 | dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding 30 | if auto: # minimum rectangle 31 | # dw, dh = np.mod(dw, 64), np.mod(dh, 64) # wh padding 32 | dw, dh = np.mod(dw, 32), np.mod(dh, 32) # wh padding 33 | elif scaleFill: # stretch 34 | dw, dh = 0.0, 0.0 35 | new_unpad = (new_shape[1], new_shape[0]) 36 | ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios 37 | 38 | dw /= 2 # divide padding into 2 sides 39 | dh /= 2 40 | 41 | if shape[::-1] != new_unpad: # resize 42 | img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) 43 | top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) 44 | left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) 45 | img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border 46 | return img, ratio, (dw, dh) 47 | 48 | def box_iou(box1, box2): 49 | # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py 50 | """ 51 | Return intersection-over-union (Jaccard index) of boxes. 52 | Both sets of boxes are expected to be in (x1, y1, x2, y2) format. 53 | Arguments: 54 | box1 (Tensor[N, 4]) 55 | box2 (Tensor[M, 4]) 56 | Returns: 57 | iou (Tensor[N, M]): the NxM matrix containing the pairwise 58 | IoU values for every element in boxes1 and boxes2 59 | """ 60 | 61 | def box_area(box): 62 | # box = 4xn 63 | return (box[2] - box[0]) * (box[3] - box[1]) 64 | 65 | area1 = box_area(box1.T) 66 | area2 = box_area(box2.T) 67 | 68 | # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2) 69 | inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2) 70 | return inter / (area1[:, None] + area2 - inter) # iou = inter / (area1 + area2 - inter) 71 | 72 | def non_max_suppressionv5(prediction, conf_thres=0.1, iou_thres=0.6, merge=False, classes=None, agnostic=False): 73 | """Performs Non-Maximum Suppression (NMS) on inference results 74 | 75 | Returns: 76 | detections with shape: nx6 (x1, y1, x2, y2, conf, cls) 77 | """ 78 | if prediction.dtype is torch.float16: 79 | prediction = prediction.float() # to FP32 80 | 81 | nc = prediction[0].shape[1] - 5 # number of classes 82 | xc = prediction[..., 4] > conf_thres # candidates 83 | 84 | # Settings 85 | min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height 86 | max_det = 300 # maximum number of detections per image 87 | time_limit = 10.0 # seconds to quit after 88 | redundant = True # require redundant detections 89 | multi_label = nc > 1 # multiple labels per box (adds 0.5ms/img) 90 | 91 | t = time.time() 92 | output = [None] * prediction.shape[0] 93 | for xi, x in enumerate(prediction): # image index, image inference 94 | # Apply constraints 95 | # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height 96 | x = x[xc[xi]] # confidence 97 | 98 | # If none remain process next image 99 | if not x.shape[0]: 100 | continue 101 | 102 | # Compute conf 103 | x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf 104 | 105 | # Box (center x, center y, width, height) to (x1, y1, x2, y2) 106 | box = xywh2xyxy(x[:, :4]) 107 | 108 | # Detections matrix nx6 (xyxy, conf, cls) 109 | if multi_label: 110 | i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T 111 | x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1) 112 | else: # best class only 113 | conf, j = x[:, 5:].max(1, keepdim=True) 114 | x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres] 115 | 116 | # Filter by class 117 | if classes: 118 | x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)] 119 | 120 | # Apply finite constraint 121 | # if not torch.isfinite(x).all(): 122 | # x = x[torch.isfinite(x).all(1)] 123 | 124 | # If none remain process next image 125 | n = x.shape[0] # number of boxes 126 | if not n: 127 | continue 128 | 129 | # Sort by confidence 130 | # x = x[x[:, 4].argsort(descending=True)] 131 | 132 | # Batched NMS 133 | c = x[:, 5:6] * (0 if agnostic else max_wh) # classes 134 | boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores 135 | i = torchvision.ops.boxes.nms(boxes, scores, iou_thres) 136 | if i.shape[0] > max_det: # limit detections 137 | i = i[:max_det] 138 | if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean) 139 | try: # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) 140 | iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix 141 | weights = iou * scores[None] # box weights 142 | x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes 143 | if redundant: 144 | i = i[iou.sum(1) > 1] # require redundancy 145 | except: # possible CUDA error https://github.com/ultralytics/yolov3/issues/1139 146 | print(x, i, x.shape, i.shape) 147 | pass 148 | 149 | output[xi] = x[i] 150 | if (time.time() - t) > time_limit: 151 | break # time limit exceeded 152 | 153 | return output 154 | 155 | def plot_one_box(x, img, color=None, label=None, line_thickness=None): 156 | # Plots one bounding box on image img 157 | tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1 # line/font thickness 158 | color = color or [random.randint(0, 255) for _ in range(3)] 159 | c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3])) 160 | cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA) 161 | if label: 162 | tf = max(tl - 1, 1) # font thickness 163 | t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] 164 | c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 165 | cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA) # filled 166 | cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA) 167 | 168 | def copy_conv(conv_src,conv_dst): 169 | conv_dst[0] = conv_src.conv 170 | conv_dst[1] = conv_src.bn 171 | conv_dst[2] = conv_src.act 172 | 173 | def copy_weight_v4(modelyolov5,model): 174 | focus = list(modelyolov5.model.children())[0] 175 | copy_conv(focus.conv, model.module_list[1]) 176 | conv1 = list(modelyolov5.model.children())[1] 177 | copy_conv(conv1, model.module_list[2]) 178 | cspnet1 = list(modelyolov5.model.children())[2] 179 | copy_conv(cspnet1.cv2, model.module_list[3]) 180 | copy_conv(cspnet1.cv1, model.module_list[5]) 181 | copy_conv(cspnet1.m[0].cv1, model.module_list[6]) 182 | copy_conv(cspnet1.m[0].cv2, model.module_list[7]) 183 | copy_conv(cspnet1.cv3, model.module_list[10]) 184 | conv2 = list(modelyolov5.model.children())[3] 185 | copy_conv(conv2, model.module_list[11]) 186 | cspnet2 = list(modelyolov5.model.children())[4] 187 | copy_conv(cspnet2.cv2, model.module_list[12]) 188 | copy_conv(cspnet2.cv1, model.module_list[14]) 189 | copy_conv(cspnet2.m[0].cv1, model.module_list[15]) 190 | copy_conv(cspnet2.m[0].cv2, model.module_list[16]) 191 | copy_conv(cspnet2.m[1].cv1, model.module_list[18]) 192 | copy_conv(cspnet2.m[1].cv2, model.module_list[19]) 193 | copy_conv(cspnet2.m[2].cv1, model.module_list[21]) 194 | copy_conv(cspnet2.m[2].cv2, model.module_list[22]) 195 | copy_conv(cspnet2.cv3, model.module_list[25]) 196 | conv3 = list(modelyolov5.model.children())[5] 197 | copy_conv(conv3, model.module_list[26]) 198 | cspnet3 = list(modelyolov5.model.children())[6] 199 | copy_conv(cspnet3.cv2, model.module_list[27]) 200 | copy_conv(cspnet3.cv1, model.module_list[29]) 201 | copy_conv(cspnet3.m[0].cv1, model.module_list[30]) 202 | copy_conv(cspnet3.m[0].cv2, model.module_list[31]) 203 | copy_conv(cspnet3.m[1].cv1, model.module_list[33]) 204 | copy_conv(cspnet3.m[1].cv2, model.module_list[34]) 205 | copy_conv(cspnet3.m[2].cv1, model.module_list[36]) 206 | copy_conv(cspnet3.m[2].cv2, model.module_list[37]) 207 | copy_conv(cspnet3.cv3, model.module_list[40]) 208 | conv4 = list(modelyolov5.model.children())[7] 209 | copy_conv(conv4, model.module_list[41]) 210 | spp = list(modelyolov5.model.children())[8] 211 | copy_conv(spp.cv1, model.module_list[42]) 212 | model.module_list[43] = spp.m[0] 213 | model.module_list[45] = spp.m[1] 214 | model.module_list[47] = spp.m[2] 215 | copy_conv(spp.cv2, model.module_list[49]) 216 | cspnet4 = list(modelyolov5.model.children())[9] 217 | copy_conv(cspnet4.cv2, model.module_list[50]) 218 | copy_conv(cspnet4.cv1, model.module_list[52]) 219 | copy_conv(cspnet4.m[0].cv1, model.module_list[53]) 220 | copy_conv(cspnet4.m[0].cv2, model.module_list[54]) 221 | copy_conv(cspnet4.cv3, model.module_list[56]) 222 | conv5 = list(modelyolov5.model.children())[10] 223 | copy_conv(conv5, model.module_list[57]) 224 | upsample1 = list(modelyolov5.model.children())[11] 225 | model.module_list[58] = upsample1 226 | cspnet5 = list(modelyolov5.model.children())[13] 227 | copy_conv(cspnet5.cv2, model.module_list[60]) 228 | copy_conv(cspnet5.cv1, model.module_list[62]) 229 | copy_conv(cspnet5.m[0].cv1, model.module_list[63]) 230 | copy_conv(cspnet5.m[0].cv2, model.module_list[64]) 231 | copy_conv(cspnet5.cv3, model.module_list[66]) 232 | conv6 = list(modelyolov5.model.children())[14] 233 | copy_conv(conv6, model.module_list[67]) 234 | upsample2 = list(modelyolov5.model.children())[15] 235 | model.module_list[68] = upsample2 236 | cspnet6 = list(modelyolov5.model.children())[17] 237 | copy_conv(cspnet6.cv2, model.module_list[70]) 238 | copy_conv(cspnet6.cv1, model.module_list[72]) 239 | copy_conv(cspnet6.m[0].cv1, model.module_list[73]) 240 | copy_conv(cspnet6.m[0].cv2, model.module_list[74]) 241 | copy_conv(cspnet6.cv3, model.module_list[76]) 242 | conv7 = list(modelyolov5.model.children())[18] 243 | copy_conv(conv7, model.module_list[80]) 244 | cspnet7 = list(modelyolov5.model.children())[20] 245 | copy_conv(cspnet7.cv2, model.module_list[82]) 246 | copy_conv(cspnet7.cv1, model.module_list[84]) 247 | copy_conv(cspnet7.m[0].cv1, model.module_list[85]) 248 | copy_conv(cspnet7.m[0].cv2, model.module_list[86]) 249 | copy_conv(cspnet7.cv3, model.module_list[88]) 250 | conv8 = list(modelyolov5.model.children())[21] 251 | copy_conv(conv8, model.module_list[92]) 252 | cspnet8 = list(modelyolov5.model.children())[23] 253 | copy_conv(cspnet8.cv2, model.module_list[94]) 254 | copy_conv(cspnet8.cv1, model.module_list[96]) 255 | copy_conv(cspnet8.m[0].cv1, model.module_list[97]) 256 | copy_conv(cspnet8.m[0].cv2, model.module_list[98]) 257 | copy_conv(cspnet8.cv3, model.module_list[100]) 258 | detect = list(modelyolov5.model.children())[24] 259 | model.module_list[77][0] = detect.m[0] 260 | model.module_list[89][0] = detect.m[1] 261 | model.module_list[101][0] = detect.m[2] 262 | 263 | def initialize_weights(model): 264 | for m in model.modules(): 265 | t = type(m) 266 | if t is nn.Conv2d: 267 | pass # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 268 | elif t is nn.BatchNorm2d: 269 | m.eps = 1e-3 270 | m.momentum = 0.03 271 | elif t in [nn.LeakyReLU, nn.ReLU, nn.ReLU6]: 272 | m.inplace = True 273 | 274 | if __name__ == '__main__': 275 | parser = argparse.ArgumentParser() 276 | parser.add_argument('--cfg', type=str, default='cfg/yolov5s_v4.cfg', help='cfg file path') 277 | parser.add_argument('--data', type=str, default='data/coco.data', help='*.data file path') 278 | parser.add_argument('--weights', type=str, default='weights/yolov5s_v4.pt', help='sparse model weights') 279 | parser.add_argument('--img_size', type=int, default=416, help='inference size (pixels)') 280 | opt = parser.parse_args() 281 | print(opt) 282 | 283 | img_size = opt.img_size 284 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 285 | 286 | #the way of loading yolov5s 287 | # ckpt = torch.load(opt.weights, map_location=device) # load checkpoint 288 | # modelyolov5 = Model('models/yolov5s_v4.yaml', nc=80).to(device) 289 | # exclude = ['anchor'] # exclude keys 290 | # ckpt['model'] = {k: v for k, v in ckpt['model'].float().state_dict().items() 291 | # if k in modelyolov5.state_dict() and not any(x in k for x in exclude) 292 | # and modelyolov5.state_dict()[k].shape == v.shape} 293 | # modelyolov5.load_state_dict(ckpt['model'], strict=False) 294 | 295 | #another way of loading yolov5s 296 | modelyolov5=torch.load(opt.weights, map_location=device)['model'].float().eval() 297 | modelyolov5.model[24].export = False # onnx export 298 | 299 | # model=modelyolov5 300 | 301 | #load yolov5s from cfg 302 | model = Darknet(opt.cfg, (img_size, img_size)).to(device) 303 | copy_weight_v4(modelyolov5,model) 304 | 305 | path='data/samples/bus.jpg' 306 | img0 = cv2.imread(path) # BGR 307 | # Padded resize 308 | img = letterboxv5(img0, new_shape=416)[0] 309 | 310 | # Convert 311 | img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 312 | img = np.ascontiguousarray(img) 313 | img = torch.from_numpy(img).to(device) 314 | img = img.float() 315 | img /= 255.0 # 0 - 255 to 0.0 - 1.0 316 | if img.ndimension() == 3: 317 | img = img.unsqueeze(0) 318 | 319 | # modelyolov5.eval() 320 | 321 | 322 | model.eval() 323 | pred = model(img)[0] 324 | 325 | pred = non_max_suppressionv5(pred, 0.4, 0.5, classes=None, 326 | agnostic=False) 327 | # Process detections 328 | for i, det in enumerate(pred): # detections per image 329 | if det is not None and len(det): 330 | # Rescale boxes from img_size to im0 size 331 | det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img0.shape).round() 332 | 333 | # Write results 334 | for *xyxy, conf, cls in det: 335 | label = '%s %.2f' % (str(int(cls)), conf) 336 | plot_one_box(xyxy, img0, label=label, color=[random.randint(0, 255) for _ in range(3)], line_thickness=3) 337 | cv2.imwrite("v5_cfg.jpg", img0) 338 | 339 | modelyolov5.eval() 340 | pred = modelyolov5(img)[0] 341 | 342 | pred = non_max_suppressionv5(pred, 0.4, 0.5, classes=None, 343 | agnostic=False) 344 | # Process detections 345 | for i, det in enumerate(pred): # detections per image 346 | if det is not None and len(det): 347 | # Rescale boxes from img_size to im0 size 348 | det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img0.shape).round() 349 | 350 | # Write results 351 | for *xyxy, conf, cls in det: 352 | label = '%s %.2f' % (str(int(cls)), conf) 353 | plot_one_box(xyxy, img0, label=label, color=[random.randint(0, 255) for _ in range(3)], 354 | line_thickness=3) 355 | cv2.imwrite("v5.jpg", img0) 356 | -------------------------------------------------------------------------------- /tk1_time.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BokyLiu/YoloV5sl_V4_prune/c0ff39c5a5b10cbca95beb597c722cdc02e81885/tk1_time.xls -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | # -------------------------------------------------------------------------------- /utils/adabound.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | from torch.optim import Optimizer 5 | 6 | 7 | class AdaBound(Optimizer): 8 | """Implements AdaBound algorithm. 9 | It has been proposed in `Adaptive Gradient Methods with Dynamic Bound of Learning Rate`_. 10 | Arguments: 11 | params (iterable): iterable of parameters to optimize or dicts defining 12 | parameter groups 13 | lr (float, optional): Adam learning rate (default: 1e-3) 14 | betas (Tuple[float, float], optional): coefficients used for computing 15 | running averages of gradient and its square (default: (0.9, 0.999)) 16 | final_lr (float, optional): final (SGD) learning rate (default: 0.1) 17 | gamma (float, optional): convergence speed of the bound functions (default: 1e-3) 18 | eps (float, optional): term added to the denominator to improve 19 | numerical stability (default: 1e-8) 20 | weight_decay (float, optional): weight decay (L2 penalty) (default: 0) 21 | amsbound (boolean, optional): whether to use the AMSBound variant of this algorithm 22 | .. Adaptive Gradient Methods with Dynamic Bound of Learning Rate: 23 | https://openreview.net/forum?id=Bkg3g2R9FX 24 | """ 25 | 26 | def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), final_lr=0.1, gamma=1e-3, 27 | eps=1e-8, weight_decay=0, amsbound=False): 28 | if not 0.0 <= lr: 29 | raise ValueError("Invalid learning rate: {}".format(lr)) 30 | if not 0.0 <= eps: 31 | raise ValueError("Invalid epsilon value: {}".format(eps)) 32 | if not 0.0 <= betas[0] < 1.0: 33 | raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0])) 34 | if not 0.0 <= betas[1] < 1.0: 35 | raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1])) 36 | if not 0.0 <= final_lr: 37 | raise ValueError("Invalid final learning rate: {}".format(final_lr)) 38 | if not 0.0 <= gamma < 1.0: 39 | raise ValueError("Invalid gamma parameter: {}".format(gamma)) 40 | defaults = dict(lr=lr, betas=betas, final_lr=final_lr, gamma=gamma, eps=eps, 41 | weight_decay=weight_decay, amsbound=amsbound) 42 | super(AdaBound, self).__init__(params, defaults) 43 | 44 | self.base_lrs = list(map(lambda group: group['lr'], self.param_groups)) 45 | 46 | def __setstate__(self, state): 47 | super(AdaBound, self).__setstate__(state) 48 | for group in self.param_groups: 49 | group.setdefault('amsbound', False) 50 | 51 | def step(self, closure=None): 52 | """Performs a single optimization step. 53 | Arguments: 54 | closure (callable, optional): A closure that reevaluates the model 55 | and returns the loss. 56 | """ 57 | loss = None 58 | if closure is not None: 59 | loss = closure() 60 | 61 | for group, base_lr in zip(self.param_groups, self.base_lrs): 62 | for p in group['params']: 63 | if p.grad is None: 64 | continue 65 | grad = p.grad.data 66 | if grad.is_sparse: 67 | raise RuntimeError( 68 | 'Adam does not support sparse gradients, please consider SparseAdam instead') 69 | amsbound = group['amsbound'] 70 | 71 | state = self.state[p] 72 | 73 | # State initialization 74 | if len(state) == 0: 75 | state['step'] = 0 76 | # Exponential moving average of gradient values 77 | state['exp_avg'] = torch.zeros_like(p.data) 78 | # Exponential moving average of squared gradient values 79 | state['exp_avg_sq'] = torch.zeros_like(p.data) 80 | if amsbound: 81 | # Maintains max of all exp. moving avg. of sq. grad. values 82 | state['max_exp_avg_sq'] = torch.zeros_like(p.data) 83 | 84 | exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq'] 85 | if amsbound: 86 | max_exp_avg_sq = state['max_exp_avg_sq'] 87 | beta1, beta2 = group['betas'] 88 | 89 | state['step'] += 1 90 | 91 | if group['weight_decay'] != 0: 92 | grad = grad.add(group['weight_decay'], p.data) 93 | 94 | # Decay the first and second moment running average coefficient 95 | exp_avg.mul_(beta1).add_(1 - beta1, grad) 96 | exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad) 97 | if amsbound: 98 | # Maintains the maximum of all 2nd moment running avg. till now 99 | torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq) 100 | # Use the max. for normalizing running avg. of gradient 101 | denom = max_exp_avg_sq.sqrt().add_(group['eps']) 102 | else: 103 | denom = exp_avg_sq.sqrt().add_(group['eps']) 104 | 105 | bias_correction1 = 1 - beta1 ** state['step'] 106 | bias_correction2 = 1 - beta2 ** state['step'] 107 | step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1 108 | 109 | # Applies bounds on actual learning rate 110 | # lr_scheduler cannot affect final_lr, this is a workaround to apply lr decay 111 | final_lr = group['final_lr'] * group['lr'] / base_lr 112 | lower_bound = final_lr * (1 - 1 / (group['gamma'] * state['step'] + 1)) 113 | upper_bound = final_lr * (1 + 1 / (group['gamma'] * state['step'])) 114 | step_size = torch.full_like(denom, step_size) 115 | step_size.div_(denom).clamp_(lower_bound, upper_bound).mul_(exp_avg) 116 | 117 | p.data.add_(-step_size) 118 | 119 | return loss 120 | 121 | 122 | class AdaBoundW(Optimizer): 123 | """Implements AdaBound algorithm with Decoupled Weight Decay (arxiv.org/abs/1711.05101) 124 | It has been proposed in `Adaptive Gradient Methods with Dynamic Bound of Learning Rate`_. 125 | Arguments: 126 | params (iterable): iterable of parameters to optimize or dicts defining 127 | parameter groups 128 | lr (float, optional): Adam learning rate (default: 1e-3) 129 | betas (Tuple[float, float], optional): coefficients used for computing 130 | running averages of gradient and its square (default: (0.9, 0.999)) 131 | final_lr (float, optional): final (SGD) learning rate (default: 0.1) 132 | gamma (float, optional): convergence speed of the bound functions (default: 1e-3) 133 | eps (float, optional): term added to the denominator to improve 134 | numerical stability (default: 1e-8) 135 | weight_decay (float, optional): weight decay (L2 penalty) (default: 0) 136 | amsbound (boolean, optional): whether to use the AMSBound variant of this algorithm 137 | .. Adaptive Gradient Methods with Dynamic Bound of Learning Rate: 138 | https://openreview.net/forum?id=Bkg3g2R9FX 139 | """ 140 | 141 | def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), final_lr=0.1, gamma=1e-3, 142 | eps=1e-8, weight_decay=0, amsbound=False): 143 | if not 0.0 <= lr: 144 | raise ValueError("Invalid learning rate: {}".format(lr)) 145 | if not 0.0 <= eps: 146 | raise ValueError("Invalid epsilon value: {}".format(eps)) 147 | if not 0.0 <= betas[0] < 1.0: 148 | raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0])) 149 | if not 0.0 <= betas[1] < 1.0: 150 | raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1])) 151 | if not 0.0 <= final_lr: 152 | raise ValueError("Invalid final learning rate: {}".format(final_lr)) 153 | if not 0.0 <= gamma < 1.0: 154 | raise ValueError("Invalid gamma parameter: {}".format(gamma)) 155 | defaults = dict(lr=lr, betas=betas, final_lr=final_lr, gamma=gamma, eps=eps, 156 | weight_decay=weight_decay, amsbound=amsbound) 157 | super(AdaBoundW, self).__init__(params, defaults) 158 | 159 | self.base_lrs = list(map(lambda group: group['lr'], self.param_groups)) 160 | 161 | def __setstate__(self, state): 162 | super(AdaBoundW, self).__setstate__(state) 163 | for group in self.param_groups: 164 | group.setdefault('amsbound', False) 165 | 166 | def step(self, closure=None): 167 | """Performs a single optimization step. 168 | Arguments: 169 | closure (callable, optional): A closure that reevaluates the model 170 | and returns the loss. 171 | """ 172 | loss = None 173 | if closure is not None: 174 | loss = closure() 175 | 176 | for group, base_lr in zip(self.param_groups, self.base_lrs): 177 | for p in group['params']: 178 | if p.grad is None: 179 | continue 180 | grad = p.grad.data 181 | if grad.is_sparse: 182 | raise RuntimeError( 183 | 'Adam does not support sparse gradients, please consider SparseAdam instead') 184 | amsbound = group['amsbound'] 185 | 186 | state = self.state[p] 187 | 188 | # State initialization 189 | if len(state) == 0: 190 | state['step'] = 0 191 | # Exponential moving average of gradient values 192 | state['exp_avg'] = torch.zeros_like(p.data) 193 | # Exponential moving average of squared gradient values 194 | state['exp_avg_sq'] = torch.zeros_like(p.data) 195 | if amsbound: 196 | # Maintains max of all exp. moving avg. of sq. grad. values 197 | state['max_exp_avg_sq'] = torch.zeros_like(p.data) 198 | 199 | exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq'] 200 | if amsbound: 201 | max_exp_avg_sq = state['max_exp_avg_sq'] 202 | beta1, beta2 = group['betas'] 203 | 204 | state['step'] += 1 205 | 206 | # Decay the first and second moment running average coefficient 207 | exp_avg.mul_(beta1).add_(1 - beta1, grad) 208 | exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad) 209 | if amsbound: 210 | # Maintains the maximum of all 2nd moment running avg. till now 211 | torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq) 212 | # Use the max. for normalizing running avg. of gradient 213 | denom = max_exp_avg_sq.sqrt().add_(group['eps']) 214 | else: 215 | denom = exp_avg_sq.sqrt().add_(group['eps']) 216 | 217 | bias_correction1 = 1 - beta1 ** state['step'] 218 | bias_correction2 = 1 - beta2 ** state['step'] 219 | step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1 220 | 221 | # Applies bounds on actual learning rate 222 | # lr_scheduler cannot affect final_lr, this is a workaround to apply lr decay 223 | final_lr = group['final_lr'] * group['lr'] / base_lr 224 | lower_bound = final_lr * (1 - 1 / (group['gamma'] * state['step'] + 1)) 225 | upper_bound = final_lr * (1 + 1 / (group['gamma'] * state['step'])) 226 | step_size = torch.full_like(denom, step_size) 227 | step_size.div_(denom).clamp_(lower_bound, upper_bound).mul_(exp_avg) 228 | 229 | if group['weight_decay'] != 0: 230 | decayed_weights = torch.mul(p.data, group['weight_decay']) 231 | p.data.add_(-step_size) 232 | p.data.sub_(decayed_weights) 233 | else: 234 | p.data.add_(-step_size) 235 | 236 | return loss 237 | -------------------------------------------------------------------------------- /utils/gcp.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # New VM 4 | rm -rf sample_data yolov3 darknet apex coco cocoapi knife knifec 5 | git clone https://github.com/ultralytics/yolov3 6 | # git clone https://github.com/AlexeyAB/darknet && cd darknet && make GPU=1 CUDNN=1 CUDNN_HALF=1 OPENCV=0 && wget -c https://pjreddie.com/media/files/darknet53.conv.74 && cd .. 7 | git clone https://github.com/NVIDIA/apex && cd apex && pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" . --user && cd .. && rm -rf apex 8 | # git clone https://github.com/cocodataset/cocoapi && cd cocoapi/PythonAPI && make && cd ../.. && cp -r cocoapi/PythonAPI/pycocotools yolov3 9 | sudo conda install -y -c conda-forge scikit-image tensorboard pycocotools 10 | python3 -c " 11 | from yolov3.utils.google_utils import gdrive_download 12 | gdrive_download('1HaXkef9z6y5l4vUnCYgdmEAj61c6bfWO','coco.zip')" 13 | sudo shutdown 14 | 15 | # Re-clone 16 | rm -rf yolov3 # Warning: remove existing 17 | git clone https://github.com/ultralytics/yolov3 && cd yolov3 # master 18 | # git clone -b test --depth 1 https://github.com/ultralytics/yolov3 test # branch 19 | python3 train.py --img-size 320 --weights weights/darknet53.conv.74 --epochs 27 --batch-size 64 --accumulate 1 20 | 21 | # Train 22 | python3 train.py 23 | 24 | # Resume 25 | python3 train.py --resume 26 | 27 | # Detect 28 | python3 detect.py 29 | 30 | # Test 31 | python3 test.py --save-json 32 | 33 | # Evolve 34 | for i in {0..500} 35 | do 36 | python3 train.py --data data/coco.data --img-size 320 --epochs 1 --batch-size 64 --accumulate 1 --evolve --bucket yolov4 37 | done 38 | 39 | # Git pull 40 | git pull https://github.com/ultralytics/yolov3 # master 41 | git pull https://github.com/ultralytics/yolov3 test # branch 42 | 43 | # Test Darknet training 44 | python3 test.py --weights ../darknet/backup/yolov3.backup 45 | 46 | # Copy last.pt TO bucket 47 | gsutil cp yolov3/weights/last1gpu.pt gs://ultralytics 48 | 49 | # Copy last.pt FROM bucket 50 | gsutil cp gs://ultralytics/last.pt yolov3/weights/last.pt 51 | wget https://storage.googleapis.com/ultralytics/yolov3/last_v1_0.pt -O weights/last_v1_0.pt 52 | wget https://storage.googleapis.com/ultralytics/yolov3/best_v1_0.pt -O weights/best_v1_0.pt 53 | 54 | # Reproduce tutorials 55 | rm results*.txt # WARNING: removes existing results 56 | python3 train.py --nosave --data data/coco_1img.data && mv results.txt results0r_1img.txt 57 | python3 train.py --nosave --data data/coco_10img.data && mv results.txt results0r_10img.txt 58 | python3 train.py --nosave --data data/coco_100img.data && mv results.txt results0r_100img.txt 59 | # python3 train.py --nosave --data data/coco_100img.data --transfer && mv results.txt results3_100imgTL.txt 60 | python3 -c "from utils import utils; utils.plot_results()" 61 | # gsutil cp results*.txt gs://ultralytics 62 | gsutil cp results.png gs://ultralytics 63 | sudo shutdown 64 | 65 | # Reproduce mAP 66 | python3 test.py --save-json --img-size 608 67 | python3 test.py --save-json --img-size 416 68 | python3 test.py --save-json --img-size 320 69 | sudo shutdown 70 | 71 | # Benchmark script 72 | git clone https://github.com/ultralytics/yolov3 # clone our repo 73 | git clone https://github.com/NVIDIA/apex && cd apex && pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" . --user && cd .. && rm -rf apex # install nvidia apex 74 | python3 -c "from yolov3.utils.google_utils import gdrive_download; gdrive_download('1HaXkef9z6y5l4vUnCYgdmEAj61c6bfWO','coco.zip')" # download coco dataset (20GB) 75 | cd yolov3 && clear && python3 train.py --epochs 1 # run benchmark (~30 min) 76 | 77 | # Unit tests 78 | python3 detect.py # detect 2 persons, 1 tie 79 | python3 test.py --data data/coco_32img.data # test mAP = 0.8 80 | python3 train.py --data data/coco_32img.data --epochs 5 --nosave # train 5 epochs 81 | python3 train.py --data data/coco_1cls.data --epochs 5 --nosave # train 5 epochs 82 | python3 train.py --data data/coco_1img.data --epochs 5 --nosave # train 5 epochs 83 | 84 | # AlexyAB Darknet 85 | gsutil cp -r gs://sm6/supermarket2 . # dataset from bucket 86 | rm -rf darknet && git clone https://github.com/AlexeyAB/darknet && cd darknet && wget -c https://pjreddie.com/media/files/darknet53.conv.74 # sudo apt install libopencv-dev && make 87 | ./darknet detector calc_anchors data/coco_img64.data -num_of_clusters 9 -width 320 -height 320 # kmeans anchor calculation 88 | ./darknet detector train ../supermarket2/supermarket2.data ../yolo_v3_spp_pan_scale.cfg darknet53.conv.74 -map -dont_show # train spp 89 | ./darknet detector train ../yolov3/data/coco.data ../yolov3-spp.cfg darknet53.conv.74 -map -dont_show # train spp coco 90 | 91 | ./darknet detector train data/coco.data ../yolov3-spp.cfg darknet53.conv.74 -map -dont_show # train spp 92 | gsutil cp -r backup/*5000.weights gs://sm6/weights 93 | sudo shutdown 94 | 95 | 96 | ./darknet detector train ../supermarket2/supermarket2.data ../yolov3-tiny-sm2-1cls.cfg yolov3-tiny.conv.15 -map -dont_show # train tiny 97 | ./darknet detector train ../supermarket2/supermarket2.data cfg/yolov3-spp-sm2-1cls.cfg backup/yolov3-spp-sm2-1cls_last.weights # resume 98 | python3 train.py --data ../supermarket2/supermarket2.data --cfg ../yolov3-spp-sm2-1cls.cfg --epochs 100 --num-workers 8 --img-size 320 --nosave # train ultralytics 99 | python3 test.py --data ../supermarket2/supermarket2.data --weights ../darknet/backup/yolov3-spp-sm2-1cls_5000.weights --cfg cfg/yolov3-spp-sm2-1cls.cfg # test 100 | gsutil cp -r backup/*.weights gs://sm6/weights # weights to bucket 101 | 102 | python3 test.py --data ../supermarket2/supermarket2.data --weights weights/yolov3-spp-sm2-1cls_5000.weights --cfg ../yolov3-spp-sm2-1cls.cfg --img-size 320 --conf-thres 0.2 # test 103 | python3 test.py --data ../supermarket2/supermarket2.data --weights weights/yolov3-spp-sm2-1cls-scalexy_125_5000.weights --cfg ../yolov3-spp-sm2-1cls-scalexy_125.cfg --img-size 320 --conf-thres 0.2 # test 104 | python3 test.py --data ../supermarket2/supermarket2.data --weights weights/yolov3-spp-sm2-1cls-scalexy_150_5000.weights --cfg ../yolov3-spp-sm2-1cls-scalexy_150.cfg --img-size 320 --conf-thres 0.2 # test 105 | python3 test.py --data ../supermarket2/supermarket2.data --weights weights/yolov3-spp-sm2-1cls-scalexy_200_5000.weights --cfg ../yolov3-spp-sm2-1cls-scalexy_200.cfg --img-size 320 --conf-thres 0.2 # test 106 | python3 test.py --data ../supermarket2/supermarket2.data --weights ../darknet/backup/yolov3-spp-sm2-1cls-scalexy_variable_5000.weights --cfg ../yolov3-spp-sm2-1cls-scalexy_variable.cfg --img-size 320 --conf-thres 0.2 # test 107 | 108 | python3 train.py --img-size 320 --epochs 27 --batch-size 64 --accumulate 1 --nosave --notest && python3 test.py --weights weights/last.pt --img-size 320 --save-json && sudo shutdown 109 | 110 | # Debug/Development 111 | python3 train.py --data data/coco.data --img-size 320 --single-scale --batch-size 64 --accumulate 1 --epochs 1 --evolve --giou 112 | python3 test.py --weights weights/last.pt --cfg cfg/yolov3-spp.cfg --img-size 320 113 | 114 | gsutil cp evolve.txt gs://ultralytics 115 | sudo shutdown 116 | 117 | #Docker 118 | sudo docker kill $(sudo docker ps -q) 119 | sudo docker pull ultralytics/yolov3:v1 120 | sudo nvidia-docker run -it --ipc=host --mount type=bind,source="$(pwd)"/coco,target=/usr/src/coco ultralytics/yolov3:v1 121 | 122 | clear 123 | while true 124 | do 125 | python3 train.py --data data/coco.data --img-size 320 --batch-size 64 --accumulate 1 --evolve --epochs 1 --adam --bucket yolov4/adamdefaultpw_coco_1e --device 1 126 | done 127 | 128 | python3 train.py --data data/coco.data --img-size 320 --batch-size 64 --accumulate 1 --epochs 1 --adam --device 1 --prebias 129 | while true; do python3 train.py --data data/coco.data --img-size 320 --batch-size 64 --accumulate 1 --evolve --epochs 1 --adam --bucket yolov4/adamdefaultpw_coco_1e; done 130 | -------------------------------------------------------------------------------- /utils/google_utils.py: -------------------------------------------------------------------------------- 1 | # This file contains google utils: https://cloud.google.com/storage/docs/reference/libraries 2 | # pip install --upgrade google-cloud-storage 3 | 4 | import os 5 | import time 6 | 7 | 8 | # from google.cloud import storage 9 | 10 | 11 | def gdrive_download(id='1HaXkef9z6y5l4vUnCYgdmEAj61c6bfWO', name='coco.zip'): 12 | # https://gist.github.com/tanaikech/f0f2d122e05bf5f971611258c22c110f 13 | # Downloads a file from Google Drive, accepting presented query 14 | # from utils.google_utils import *; gdrive_download() 15 | t = time.time() 16 | 17 | print('Downloading https://drive.google.com/uc?export=download&id=%s as %s... ' % (id, name), end='') 18 | if os.path.exists(name): # remove existing 19 | os.remove(name) 20 | 21 | # Attempt large file download 22 | s = ["curl -c ./cookie -s -L \"https://drive.google.com/uc?export=download&id=%s\" > /dev/null" % id, 23 | "curl -Lb ./cookie -s \"https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=%s\" -o %s" % ( 24 | id, name), 25 | 'rm ./cookie'] 26 | [os.system(x) for x in s] # run commands 27 | 28 | # Attempt small file download 29 | if not os.path.exists(name): # file size < 40MB 30 | s = 'curl -f -L -o %s https://drive.google.com/uc?export=download&id=%s' % (name, id) 31 | os.system(s) 32 | 33 | # Unzip if archive 34 | if name.endswith('.zip'): 35 | print('unzipping... ', end='') 36 | os.system('unzip -q %s' % name) # unzip 37 | os.remove(name) # remove zip to free space 38 | 39 | print('Done (%.1fs)' % (time.time() - t)) 40 | 41 | 42 | def upload_blob(bucket_name, source_file_name, destination_blob_name): 43 | # Uploads a file to a bucket 44 | # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python 45 | 46 | storage_client = storage.Client() 47 | bucket = storage_client.get_bucket(bucket_name) 48 | blob = bucket.blob(destination_blob_name) 49 | 50 | blob.upload_from_filename(source_file_name) 51 | 52 | print('File {} uploaded to {}.'.format( 53 | source_file_name, 54 | destination_blob_name)) 55 | 56 | 57 | def download_blob(bucket_name, source_blob_name, destination_file_name): 58 | # Uploads a blob from a bucket 59 | storage_client = storage.Client() 60 | bucket = storage_client.get_bucket(bucket_name) 61 | blob = bucket.blob(source_blob_name) 62 | 63 | blob.download_to_filename(destination_file_name) 64 | 65 | print('Blob {} downloaded to {}.'.format( 66 | source_blob_name, 67 | destination_file_name)) 68 | -------------------------------------------------------------------------------- /utils/parse_config.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | def parse_model_cfg(path): 5 | # Parses the yolo-v3 layer configuration file and returns module definitions 6 | file = open(path, 'r') 7 | lines = file.read().split('\n') 8 | lines = [x for x in lines if x and not x.startswith('#')] 9 | lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces 10 | mdefs = [] # module definitions 11 | for line in lines: 12 | if line.startswith('['): # This marks the start of a new block 13 | mdefs.append({}) 14 | mdefs[-1]['type'] = line[1:-1].rstrip() 15 | if mdefs[-1]['type'] == 'convolutional': 16 | mdefs[-1]['batch_normalize'] = 0 # pre-populate with zeros (may be overwritten later) 17 | else: 18 | key, val = line.split("=") 19 | key = key.rstrip() 20 | 21 | if 'anchors' in key: 22 | mdefs[-1][key] = np.array([float(x) for x in val.split(',')]).reshape((-1, 2)) # np anchors 23 | else: 24 | mdefs[-1][key] = val.strip() 25 | 26 | return mdefs 27 | 28 | 29 | def parse_data_cfg(path): 30 | # Parses the data configuration file 31 | options = dict() 32 | with open(path, 'r') as fp: 33 | lines = fp.readlines() 34 | 35 | for line in lines: 36 | line = line.strip() 37 | if line == '' or line.startswith('#'): 38 | continue 39 | key, val = line.split('=') 40 | options[key.strip()] = val.strip() 41 | 42 | return options 43 | 44 | -------------------------------------------------------------------------------- /utils/torch_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | 4 | import torch 5 | import torch.nn as nn 6 | 7 | 8 | def init_seeds(seed=0): 9 | torch.manual_seed(seed) 10 | torch.cuda.manual_seed(seed) 11 | torch.cuda.manual_seed_all(seed) 12 | 13 | # Remove randomness (may be slower on Tesla GPUs) # https://pytorch.org/docs/stable/notes/randomness.html 14 | if seed == 0: 15 | torch.backends.cudnn.deterministic = True 16 | torch.backends.cudnn.benchmark = False 17 | 18 | 19 | def select_device(device='', apex=False): 20 | # device = 'cpu' or '0' or '0,1,2,3' 21 | cpu_request = device.lower() == 'cpu' 22 | if device and not cpu_request: # if device requested other than 'cpu' 23 | os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable 24 | assert torch.cuda.is_available(), 'CUDA unavailable, invalid device %s requested' % device # check availablity 25 | 26 | cuda = False if cpu_request else torch.cuda.is_available() 27 | if cuda: 28 | c = 1024 ** 2 # bytes to MB 29 | ng = torch.cuda.device_count() 30 | x = [torch.cuda.get_device_properties(i) for i in range(ng)] 31 | cuda_str = 'Using CUDA ' + ('Apex ' if apex else '') # apex for mixed precision https://github.com/NVIDIA/apex 32 | for i in range(0, ng): 33 | if i == 1: 34 | cuda_str = ' ' * len(cuda_str) 35 | print("%sdevice%g _CudaDeviceProperties(name='%s', total_memory=%dMB)" % 36 | (cuda_str, i, x[i].name, x[i].total_memory / c)) 37 | else: 38 | print('Using CPU') 39 | 40 | print('') # skip a line 41 | return torch.device('cuda:0' if cuda else 'cpu') 42 | 43 | def time_synchronized(): 44 | torch.cuda.synchronize() if torch.cuda.is_available() else None 45 | return time.time() 46 | 47 | 48 | def is_parallel(model): 49 | # is model is parallel with DP or DDP 50 | return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel) 51 | 52 | def initialize_weights(model): 53 | for m in model.modules(): 54 | t = type(m) 55 | if t is nn.Conv2d: 56 | pass # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 57 | elif t is nn.BatchNorm2d: 58 | m.eps = 1e-3 59 | m.momentum = 0.03 60 | elif t in [nn.LeakyReLU, nn.ReLU, nn.ReLU6]: 61 | m.inplace = True 62 | 63 | def fuse_conv_and_bn(conv, bn): 64 | # https://tehnokv.com/posts/fusing-batchnorm-and-conv/ 65 | with torch.no_grad(): 66 | # init 67 | fusedconv = torch.nn.Conv2d(conv.in_channels, 68 | conv.out_channels, 69 | kernel_size=conv.kernel_size, 70 | stride=conv.stride, 71 | padding=conv.padding, 72 | bias=True) 73 | 74 | # prepare filters 75 | w_conv = conv.weight.clone().view(conv.out_channels, -1) 76 | w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var))) 77 | fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size())) 78 | 79 | # prepare spatial bias 80 | if conv.bias is not None: 81 | b_conv = conv.bias 82 | else: 83 | b_conv = torch.zeros(conv.weight.size(0)) 84 | b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps)) 85 | fusedconv.bias.copy_(b_conv + b_bn) 86 | 87 | return fusedconv 88 | 89 | 90 | def model_info(model, report='summary'): 91 | # Plots a line-by-line description of a PyTorch model 92 | n_p = sum(x.numel() for x in model.parameters()) # number parameters 93 | n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients 94 | if report is 'full': 95 | print('%5s %40s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma')) 96 | for i, (name, p) in enumerate(model.named_parameters()): 97 | name = name.replace('module_list.', '') 98 | print('%5g %40s %9s %12g %20s %10.3g %10.3g' % 99 | (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std())) 100 | print('Model Summary: %g layers, %g parameters, %g gradients' % (len(list(model.parameters())), n_p, n_g)) 101 | --------------------------------------------------------------------------------