├── Dockerfile ├── LICENSE ├── README.md ├── cfg ├── yolov3-1cls.cfg ├── yolov3-hand.cfg ├── yolov3-spp-1cls.cfg ├── yolov3-spp-hand.cfg ├── yolov3-spp-pan-scale.cfg ├── yolov3-spp.cfg ├── yolov3-tiny-1cls.cfg ├── yolov3-tiny.cfg ├── yolov3.cfg ├── yolov3s-18a320.cfg ├── yolov3s-30a320.cfg ├── yolov3s-3a320.cfg ├── yolov3s-9a320.cfg ├── yolov4-tiny.cfg └── yolov4.cfg ├── data ├── 5k.shapes ├── 5k.txt ├── coco.data ├── coco.names ├── coco_1000img.data ├── coco_1000img.txt ├── coco_1000val.data ├── coco_1000val.txt ├── coco_16img.data ├── coco_16img.txt ├── coco_1cls.data ├── coco_1cls.txt ├── coco_1img.data ├── coco_1img.txt ├── coco_1k5k.data ├── coco_32img.data ├── coco_32img.txt ├── coco_500img.txt ├── coco_500val.data ├── coco_500val.txt ├── coco_64img.data ├── coco_64img.shapes ├── coco_64img.txt ├── coco_paper.names ├── converter.py ├── get_coco_dataset.sh ├── get_coco_dataset_gdrive.sh ├── hand.data ├── img │ ├── 1.jpg │ ├── 2.jpg │ ├── baseline_and_sparse.jpg │ ├── bn.jpg │ ├── finetune_and_bn.jpg │ └── prune9316.png ├── oxfordhand.data ├── oxfordhand.names ├── samples │ ├── bus.jpg │ └── zidane.jpg ├── trainvalno5k.shapes └── valid_.shapes ├── detect.py ├── layer_channel_prune.py ├── layer_prune.py ├── models.py ├── prune.py ├── requirements.txt ├── shortcut_prune.py ├── slim_prune.py ├── test.py ├── train.py ├── utils ├── __init__.py ├── adabound.py ├── datasets.py ├── gcp.sh ├── google_utils.py ├── parse_config.py ├── prune_utils.py ├── torch_utils.py └── utils.py └── weights └── download_yolov3_weights.sh /Dockerfile: -------------------------------------------------------------------------------- 1 | # Start from Nvidia PyTorch image https://ngc.nvidia.com/catalog/containers/nvidia:pytorch 2 | FROM nvcr.io/nvidia/pytorch:19.08-py3 3 | 4 | # Install dependencies (pip or conda) 5 | RUN pip install -U gsutil 6 | # RUN pip install -U -r requirements.txt 7 | # RUN conda update -n base -c defaults conda 8 | # RUN conda install -y -c anaconda future numpy opencv matplotlib tqdm pillow 9 | # RUN conda install -y -c conda-forge scikit-image tensorboard pycocotools 10 | 11 | ## Install OpenCV with Gstreamer support 12 | #WORKDIR /usr/src 13 | #RUN pip uninstall -y opencv-python 14 | #RUN apt-get update 15 | #RUN apt-get install -y gstreamer1.0-tools gstreamer1.0-python3-dbg-plugin-loader libgstreamer1.0-dev libgstreamer-plugins-base1.0-dev 16 | #RUN git clone https://github.com/opencv/opencv.git && cd opencv && git checkout 4.1.1 && mkdir build 17 | #RUN git clone https://github.com/opencv/opencv_contrib.git && cd opencv_contrib && git checkout 4.1.1 18 | #RUN cd opencv/build && cmake ../ \ 19 | # -D OPENCV_EXTRA_MODULES_PATH=../../opencv_contrib/modules \ 20 | # -D BUILD_OPENCV_PYTHON3=ON \ 21 | # -D PYTHON3_EXECUTABLE=/opt/conda/bin/python \ 22 | # -D PYTHON3_INCLUDE_PATH=/opt/conda/include/python3.6m \ 23 | # -D PYTHON3_LIBRARIES=/opt/conda/lib/python3.6/site-packages \ 24 | # -D WITH_GSTREAMER=ON \ 25 | # -D WITH_FFMPEG=OFF \ 26 | # && make && make install && ldconfig 27 | #RUN cd /usr/local/lib/python3.6/site-packages/cv2/python-3.6/ && mv cv2.cpython-36m-x86_64-linux-gnu.so cv2.so 28 | #RUN cd /opt/conda/lib/python3.6/site-packages/ && ln -s /usr/local/lib/python3.6/site-packages/cv2/python-3.6/cv2.so cv2.so 29 | #RUN python3 -c "import cv2; print(cv2.getBuildInformation())" 30 | 31 | # Create working directory 32 | RUN mkdir -p /usr/src/app 33 | WORKDIR /usr/src/app 34 | 35 | # Copy contents 36 | COPY . /usr/src/app 37 | 38 | # Copy weights 39 | #RUN python3 -c "from utils.google_utils import *; \ 40 | # gdrive_download(id='18xqvs_uwAqfTXp-LJCYLYNHBOcrwbrp0', name='weights/darknet53.conv.74'); \ 41 | # gdrive_download(id='1oPCHKsM2JpM-zgyepQciGli9X0MTsJCO', name='weights/yolov3-spp.weights'); \ 42 | # gdrive_download(id='1vFlbJ_dXPvtwaLLOu-twnjK4exdFiQ73', name='weights/yolov3-spp.pt)" 43 | 44 | 45 | # --------------------------------------------------- Extras Below --------------------------------------------------- 46 | 47 | # Build 48 | # rm -rf yolov3 # Warning: remove existing 49 | # git clone https://github.com/ultralytics/yolov3 && cd yolov3 && python3 detect.py 50 | # sudo docker image prune -af && sudo docker build -t ultralytics/yolov3:v0 . 51 | 52 | # Run 53 | # sudo nvidia-docker run --ipc=host ultralytics/yolov3:v0 python3 detect.py 54 | 55 | # Run with local directory access 56 | # sudo nvidia-docker run --ipc=host --mount type=bind,source="$(pwd)"/coco,target=/usr/src/coco ultralytics/yolov3:v0 python3 train.py 57 | 58 | # Build and Push 59 | # export tag=ultralytics/yolov3:v0 && sudo docker build -t $tag . && docker push $tag 60 | 61 | # Kill all 62 | # sudo docker kill $(sudo docker ps -q) 63 | 64 | # Run bash for loop 65 | # sudo nvidia-docker run --ipc=host ultralytics/yolov3:v0 while true; do python3 train.py --evolve; done 66 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # yolov3-channel-and-layer-pruning 2 | 本项目以[ultralytics/yolov3](https://github.com/ultralytics/yolov3)为基础实现,根据论文[Learning Efficient Convolutional Networks Through Network Slimming (ICCV 2017)](http://openaccess.thecvf.com/content_iccv_2017/html/Liu_Learning_Efficient_Convolutional_ICCV_2017_paper.html)原理基于bn层Gmma系数进行通道剪枝,下面引用了几种不同的通道剪枝策略,并对原策略进行了改进,提高了剪枝率和精度;在这些工作基础上,又衍生出了层剪枝,本身通道剪枝已经大大减小了模型参数和计算量,降低了模型对资源的占用,而层剪枝可以进一步减小了计算量,并大大提高了模型推理速度;通过层剪枝和通道剪枝结合,可以压缩模型的深度和宽度,某种意义上实现了针对不同数据集的小模型搜索。
3 |
4 | 项目的基本工作流程是,使用yolov3训练自己数据集,达到理想精度后进行稀疏训练,稀疏训练是重中之重,对需要剪枝的层对应的bn gamma系数进行大幅压缩,理想的压缩情况如下图,然后就可以对不重要的通道或者层进行剪枝,剪枝后可以对模型进行微调恢复精度,后续会写篇博客记录一些实验过程及调参经验,在此感谢[行云大佬](https://github.com/zbyuan)的讨论和合作!
5 |
6 | ![稀疏](https://github.com/tanluren/yolov3-channel-and-layer-pruning/blob/master/data/img/1.jpg) 7 | 8 |
9 | 10 | #### 更新 11 | 1.增加了对**yolov3-spp**结构的支持,基础训练可以直接使用yolov3-spp.weights初始化权重,各个层剪枝及通道剪枝脚本的使用也和yolov3一致。
12 | 2.增加了多尺度推理支持,train.py和各剪枝脚本都可以指定命令行参数, 如 --img_size 608 .
13 | 3.2019/12/06更改了层剪枝的选层策略,由最大值排序改为均值排序。
14 | 4.2019/12/08**重要**更新,增加了**知识蒸馏**策略。蒸馏是用高精度的大模型指导低精度的小模型,在结构相似的情况下效果尤为明显。而剪枝得到的小模型和原模型在结构上高度相似,非常符合蒸馏的应用条件。这里更新了一个参考Hinton大神Distilling the Knowledge in a Neural Network的蒸馏策略,原策略是针对分类模型的,但在这里也有不错的表现。调用只需要在微调的时候指定老师模型的cfg和权重即可:--t_cfg --t_weights。最近会更新第二种针对yolo检测的知识蒸馏策略。
15 | 5.2019/12/10交流的小伙伴比较多,回答不过来,可以加群734912150
16 | 6.2019/12/14增加了针对蒸馏的混合精度训练支持,项目中各项训练都可以使用[apex](https://github.com/NVIDIA/apex)加速,但需要先安装。使用混合精度可以加速训练,同时减轻显存占用,但训练效果可能会差一丢丢。代码默认开启了混合精度,如需关闭,可以把train.py中的mixed_precision改为False.
17 | 7.2019/12/23更新了**知识蒸馏策略二**,并默认使用二。策略二参考了论文"Learning Efficient Object Detection Models with Knowledge Distillation",相比策略一,对分类和回归分别作了处理,分类的蒸馏和策略一差不多,回归部分会分别计算学生和老师相对target的L2距离,如果学生更远,学生会再向target学习,而不是向老师学习。调用同样是指定老师的cfg和权重即可。需要强调的是,蒸馏在这里只是辅助微调,如果注重精度优先,剪枝时尽量剪不掉点的比例,这时蒸馏的作用也不大;如果注重速度,剪枝比例较大,导致模型精度下降较多,可以结合蒸馏提升精度。
18 | 8.2019/12/27更新了两种**稀疏策略**,详看下面稀疏训练环节。
19 | 9.2020/01/02修正各剪枝版本多分辨率推理test问题,主要是把命令行参数img_size传递给test函数。
20 | 10.2020/01/04补了个[博客](https://blog.csdn.net/weixin_41397123/article/details/103828931)分享**无人机数据集visdrone**案例,演示如何压缩一个12M的无人机视角目标检测模型(标题党)。
21 | 11.2020/04/10增加了**yolov3-tiny**的剪枝支持,稀疏照旧,剪通道用slim_prune.py,不可剪层。
22 | 12.2020/4/24增加支持**yolov4**剪枝.
23 | 13.2020/4/30在datasets.py 592行添加了支持负样本训练,默认注释掉.
24 | 14.2020/7/8更新支持**yolov4-tiny**剪通道.
25 | 26 | 27 | 28 | #### 基础训练 29 | 环境配置查看requirements.txt,数据准备参考[这里](https://github.com/ultralytics/yolov3/wiki/Train-Custom-Data),预训练权重可以从darknet官网下载。
30 | 用yolov3训练自己的数据集,修改cfg,配置好data,用yolov3.weights初始化权重。
31 |
32 | `python train.py --cfg cfg/my_cfg.cfg --data data/my_data.data --weights weights/yolov3.weights --epochs 100 --batch-size 32` 33 | 34 | #### 稀疏训练 35 | scale参数默认0.001,根据数据集,mAP,BN分布调整,数据分布广类别多的,或者稀疏时掉点厉害的适当调小s;-sr用于开启稀疏训练;--prune 0适用于prune.py,--prune 1 适用于其他剪枝策略。稀疏训练就是精度和稀疏度的博弈过程,如何寻找好的策略让稀疏后的模型保持高精度同时实现高稀疏度是值得研究的问题,大的s一般稀疏较快但精度掉的快,小的s一般稀疏较慢但精度掉的慢;配合大学习率会稀疏加快,后期小学习率有助于精度回升。
36 | 注意:训练保存的pt权重包含epoch信息,可通过`python -c "from models import *; convert('cfg/yolov3.cfg', 'weights/last.pt')"`转换为darknet weights去除掉epoch信息,使用darknet weights从epoch 0开始稀疏训练。
37 |
38 | `python train.py --cfg cfg/my_cfg.cfg --data data/my_data.data --weights weights/last.weights --epochs 300 --batch-size 32 -sr --s 0.001 --prune 1` 39 | * ##### 稀疏策略一:恒定s 40 | 这是一开始的策略,也是默认的策略。在整个稀疏过程中,始终以恒定的s给模型添加额外的梯度,因为力度比较均匀,往往压缩度较高。但稀疏过程是个博弈过程,我们不仅想要较高的压缩度,也想要在学习率下降后恢复足够的精度,不同的s最后稀疏结果也不同,想要找到合适的s往往需要较高的时间成本。
41 |
42 | `bn_module.weight.grad.data.add_(s * torch.sign(bn_module.weight.data))` 43 | * ##### 稀疏策略二:全局s衰减 44 | 关键代码是下面这句,在epochs的0.5阶段s衰减100倍。前提是0.5之前权重已经完成大幅压缩,这时对s衰减有助于精度快速回升,但是相应的bn会出现一定膨胀,降低压缩度,有利有弊,可以说是牺牲较大的压缩度换取较高的精度,同时减少寻找s的时间成本。当然这个0.5和100可以自己调整。注意也不能为了在前半部分加快压缩bn而大大提高s,过大的s会导致模型精度下降厉害,且s衰减后也无法恢复。如果想使用这个策略,可以在prune_utils.py中的BNOptimizer把下面这句取消注释。
45 |
46 | `# s = s if epoch <= opt.epochs * 0.5 else s * 0.01` 47 | * ##### 稀疏策略三:局部s衰减 48 | 关键代码是下面两句,在epochs的0.5阶段开始对85%的通道保持原力度压缩,15%的通道进行s衰减100倍。这个85%是个先验知识,是由策略一稀疏后尝试剪通道几乎不掉点的最大比例,几乎不掉点指的是相对稀疏后精度;如果微调后还是不及baseline,或者说达不到精度要求,就可以使用策略三进行局部s衰减,从中间开始重新稀疏,这可以在牺牲较小压缩度情况下提高较大精度。如果想使用这个策略可以在train.py中把下面这两句取消注释,并根据自己策略一情况把0.85改为自己的比例,还有0.5和100也是可调的。策略二和三不建议一起用,除非你想做组合策略。
49 |
50 | `#if opt.sr and opt.prune==1 and epoch > opt.epochs * 0.5:`
51 | `# idx2mask = get_mask2(model, prune_idx, 0.85)` 52 | 53 | #### 通道剪枝策略一 54 | 策略源自[Lam1360/YOLOv3-model-pruning](https://github.com/Lam1360/YOLOv3-model-pruning),这是一种保守的策略,因为yolov3中有五组共23处shortcut连接,对应的是add操作,通道剪枝后如何保证shortcut的两个输入维度一致,这是必须考虑的问题。而Lam1360/YOLOv3-model-pruning对shortcut直连的层不进行剪枝,避免了维度处理问题,但它同样实现了较高剪枝率,对模型参数的减小有很大帮助。虽然它剪枝率最低,但是它对剪枝各细节的处理非常优雅,后面的代码也较多参考了原始项目。在本项目中还更改了它的阈值规则,可以设置更高的剪枝阈值。
55 |
56 | `python prune.py --cfg cfg/my_cfg.cfg --data data/my_data.data --weights weights/last.pt --percent 0.85` 57 | 58 | #### 通道剪枝策略二 59 | 策略源自[coldlarry/YOLOv3-complete-pruning](https://github.com/coldlarry/YOLOv3-complete-pruning),这个策略对涉及shortcut的卷积层也进行了剪枝,剪枝采用每组shortcut中第一个卷积层的mask,一共使用五种mask实现了五组shortcut相关卷积层的剪枝,进一步提高了剪枝率。本项目中对涉及shortcut的剪枝后激活偏移值处理进行了完善,并修改了阈值规则,可以设置更高剪枝率,当然剪枝率的设置和剪枝后的精度变化跟稀疏训练有很大关系,这里再次强调稀疏训练的重要性。
60 |
61 | `python shortcut_prune.py --cfg cfg/my_cfg.cfg --data data/my_data.data --weights weights/last.pt --percent 0.6` 62 | 63 | #### 通道剪枝策略三 64 | 策略参考自[PengyiZhang/SlimYOLOv3](https://github.com/PengyiZhang/SlimYOLOv3),这个策略的通道剪枝率最高,先以全局阈值找出各卷积层的mask,然后对于每组shortcut,它将相连的各卷积层的剪枝mask取并集,用merge后的mask进行剪枝,这样对每一个相关层都做了考虑,同时它还对每一个层的保留通道做了限制,实验中它的剪枝效果最好。在本项目中还对激活偏移值添加了处理,降低剪枝时的精度损失。
65 |
66 | `python slim_prune.py --cfg cfg/my_cfg.cfg --data data/my_data.data --weights weights/last.pt --global_percent 0.8 --layer_keep 0.01` 67 | 68 | #### 层剪枝 69 | 这个策略是在之前的通道剪枝策略基础上衍生出来的,针对每一个shortcut层前一个CBL进行评价,对各层的Gmma均值进行排序,取最小的进行层剪枝。为保证yolov3结构完整,这里每剪一个shortcut结构,会同时剪掉一个shortcut层和它前面的两个卷积层。是的,这里只考虑剪主干中的shortcut模块。但是yolov3中有23处shortcut,剪掉8个shortcut就是剪掉了24个层,剪掉16个shortcut就是剪掉了48个层,总共有69个层的剪层空间;实验中对简单的数据集剪掉了较多shortcut而精度降低很少。
70 |
71 | `python layer_prune.py --cfg cfg/my_cfg.cfg --data data/my_data.data --weights weights/last.pt --shortcuts 12` 72 | 73 | #### 同时剪层和通道 74 | 前面的通道剪枝和层剪枝已经分别压缩了模型的宽度和深度,可以自由搭配使用,甚至迭代式剪枝,调配出针对自己数据集的一副良药。这里整合了一个同时剪层和通道的脚本,方便对比剪枝效果,有需要的可以使用这个脚本进行剪枝。
75 |
76 | `python layer_channel_prune.py --cfg cfg/my_cfg.cfg --data data/my_data.data --weights weights/last.pt --shortcuts 12 --global_percent 0.8 --layer_keep 0.1` 77 | 78 | #### 微调finetune 79 | 剪枝的效果好不好首先还是要看稀疏情况,而不同的剪枝策略和阈值设置在剪枝后的效果表现也不一样,有时剪枝后模型精度甚至可能上升,而一般而言剪枝会损害模型精度,这时候需要对剪枝后的模型进行微调,让精度回升。训练代码中默认了前6个epoch进行warmup,这对微调有好处,有需要的可以自行调整超参学习率。
80 |
81 | `python train.py --cfg cfg/prune_0.85_my_cfg.cfg --data data/my_data.data --weights weights/prune_0.85_last.weights --epochs 100 --batch-size 32` 82 | 83 | #### tensorboard实时查看训练过程 84 | `tensorboard --logdir runs`
85 |
86 | ![tensorboard](https://github.com/tanluren/yolov3-channel-and-layer-pruning/blob/master/data/img/2.jpg) 87 |
88 | 欢迎使用和测试,有问题或者交流实验过程可以发issue或者加群734912150
89 | 90 | 91 | #### 案例 92 | 使用yolov3-spp训练oxfordhand数据集并剪枝。下载[数据集](http://www.robots.ox.ac.uk/~vgg/data/hands/downloads/hand_dataset.tar.gz),解压到data文件夹,运行converter.py,把得到的train.txt和valid.txt路径更新在oxfordhand.data中。通过以下代码分别进行基础训练和稀疏训练:
93 | `python train.py --cfg cfg/yolov3-spp-hand.cfg --data data/oxfordhand.data --weights weights/yolov3-spp.weights --batch-size 20 --epochs 100`
94 |
95 | `python -c "from models import *; convert('cfg/yolov3.cfg', 'weights/last.pt')"`
96 | `python train.py --cfg cfg/yolov3-spp-hand.cfg --data data/oxfordhand.data --weights weights/converted.weights --batch-size 20 --epochs 300 -sr --s 0.001 --prune 1`
97 |
98 | 训练的情况如下图,蓝色线是基础训练,红色线是稀疏训练。其中基础训练跑了100个epoch,后半段已经出现了过拟合,最终得到的baseline模型mAP为0.84;稀疏训练以s0.001跑了300个epoch,选择的稀疏类型为prune 1全局稀疏,为包括shortcut的剪枝做准备,并且在总epochs的0.7和0.9阶段进行了Gmma为0.1的学习率衰减,稀疏过程中模型精度起伏较大,在学习率降低后精度出现了回升,最终稀疏模型mAP 0.797。
99 | ![baseline_and_sparse](https://github.com/tanluren/yolov3-channel-and-layer-pruning/blob/master/data/img/baseline_and_sparse.jpg) 100 |
101 | 再来看看bn的稀疏情况,代码使用tensorboard记录了参与稀疏的bn层的Gmma权重变化,下图左边看到正常训练时Gmma总体上分布在1附近类似正态分布,右边可以看到稀疏过程Gmma大部分逐渐被压到接近0,接近0的通道其输出值近似于常量,可以将其剪掉。
102 | ![bn](https://github.com/tanluren/yolov3-channel-and-layer-pruning/blob/master/data/img/bn.jpg) 103 |
104 | 这时候便可以进行剪枝,这里例子使用layer_channel_prune.py同时进行剪通道和剪层,这个脚本融合了slim_prune剪通道策略和layer_prune剪层策略。Global perent剪通道的全局比例为0.93,layer keep每层最低保持通道数比例为0.01,shortcuts剪了16个,相当于剪了48个层(32个CBL,16个shortcut);下图结果可以看到剪通道后模型掉了一个点,而大小从239M压缩到5.2M,剪层后mAP掉到0.53,大小压缩到4.6M,模型参数减少了98%,推理速度也从16毫秒减到6毫秒(tesla p100测试结果)。
105 | `python layer_channel_prune.py --cfg cfg/yolov3-spp-hand.cfg --data data/oxfordhand.data --weights weights/last.pt --global_percent 0.93 --layer_keep 0.01 --shortcuts 16`
106 |
107 | ![prune9316](https://github.com/tanluren/yolov3-channel-and-layer-pruning/blob/master/data/img/prune9316.png) 108 |
109 | 鉴于模型精度出现了下跌,我们来进行微调,下面是微调50个epoch的结果,精度恢复到了0.793,bn也开始呈正态分布,这个结果相对于baseline掉了几个点,但是模型大幅压缩减少了资源占用,提高了运行速度。如果想提高精度,可以尝试降低剪枝率,比如这里只剪10个shortcut的话,同样微调50epoch精度可以回到0.81;而想追求速度的话,这里有个极端例子,全局剪0.95,层剪掉54个,模型压缩到了2.8M,推理时间降到5毫秒,而mAP降到了0,但是微调50epoch后依然回到了0.75。
110 |
111 | `python train.py --cfg cfg/prune_16_shortcut_prune_0.93_keep_0.01_yolov3-spp-hand.cfg --data data/oxfordhand.data --weights weights/prune_16_shortcut_prune_0.93_keep_0.01_last.weights --batch-size 52 --epochs 50`
112 | ![finetune_and_bn](https://github.com/tanluren/yolov3-channel-and-layer-pruning/blob/master/data/img/finetune_and_bn.jpg)
113 | 可以猜测,剪枝得到的cfg是针对该数据集相对合理的结构,而保留的权重可以让模型快速训练接近这个结构的能力上限,这个过程类似于一种有限范围的结构搜索。而不同的训练策略,稀疏策略,剪枝策略会得到不同的结果,相信即使是这个例子也可以进一步压缩并保持良好精度。yolov3有众多优化项目和工程项目,可以利用这个剪枝得到的cfg和weights放到其他项目中做进一步优化和应用。
114 | [这里](https://pan.baidu.com/s/1APUfwO4L69u28Wt9gFNAYw)分享了这个例子的权重和cfg,包括baseline,稀疏,不同剪枝设置后的结果。 115 | 116 | ## License 117 | Apache 2.0 118 | -------------------------------------------------------------------------------- /cfg/yolov3-1cls.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | #batch=1 4 | #subdivisions=1 5 | # Training 6 | batch=16 7 | subdivisions=1 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=32 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | # Downsample 34 | 35 | [convolutional] 36 | batch_normalize=1 37 | filters=64 38 | size=3 39 | stride=2 40 | pad=1 41 | activation=leaky 42 | 43 | [convolutional] 44 | batch_normalize=1 45 | filters=32 46 | size=1 47 | stride=1 48 | pad=1 49 | activation=leaky 50 | 51 | [convolutional] 52 | batch_normalize=1 53 | filters=64 54 | size=3 55 | stride=1 56 | pad=1 57 | activation=leaky 58 | 59 | [shortcut] 60 | from=-3 61 | activation=linear 62 | 63 | # Downsample 64 | 65 | [convolutional] 66 | batch_normalize=1 67 | filters=128 68 | size=3 69 | stride=2 70 | pad=1 71 | activation=leaky 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=64 76 | size=1 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [convolutional] 82 | batch_normalize=1 83 | filters=128 84 | size=3 85 | stride=1 86 | pad=1 87 | activation=leaky 88 | 89 | [shortcut] 90 | from=-3 91 | activation=linear 92 | 93 | [convolutional] 94 | batch_normalize=1 95 | filters=64 96 | size=1 97 | stride=1 98 | pad=1 99 | activation=leaky 100 | 101 | [convolutional] 102 | batch_normalize=1 103 | filters=128 104 | size=3 105 | stride=1 106 | pad=1 107 | activation=leaky 108 | 109 | [shortcut] 110 | from=-3 111 | activation=linear 112 | 113 | # Downsample 114 | 115 | [convolutional] 116 | batch_normalize=1 117 | filters=256 118 | size=3 119 | stride=2 120 | pad=1 121 | activation=leaky 122 | 123 | [convolutional] 124 | batch_normalize=1 125 | filters=128 126 | size=1 127 | stride=1 128 | pad=1 129 | activation=leaky 130 | 131 | [convolutional] 132 | batch_normalize=1 133 | filters=256 134 | size=3 135 | stride=1 136 | pad=1 137 | activation=leaky 138 | 139 | [shortcut] 140 | from=-3 141 | activation=linear 142 | 143 | [convolutional] 144 | batch_normalize=1 145 | filters=128 146 | size=1 147 | stride=1 148 | pad=1 149 | activation=leaky 150 | 151 | [convolutional] 152 | batch_normalize=1 153 | filters=256 154 | size=3 155 | stride=1 156 | pad=1 157 | activation=leaky 158 | 159 | [shortcut] 160 | from=-3 161 | activation=linear 162 | 163 | [convolutional] 164 | batch_normalize=1 165 | filters=128 166 | size=1 167 | stride=1 168 | pad=1 169 | activation=leaky 170 | 171 | [convolutional] 172 | batch_normalize=1 173 | filters=256 174 | size=3 175 | stride=1 176 | pad=1 177 | activation=leaky 178 | 179 | [shortcut] 180 | from=-3 181 | activation=linear 182 | 183 | [convolutional] 184 | batch_normalize=1 185 | filters=128 186 | size=1 187 | stride=1 188 | pad=1 189 | activation=leaky 190 | 191 | [convolutional] 192 | batch_normalize=1 193 | filters=256 194 | size=3 195 | stride=1 196 | pad=1 197 | activation=leaky 198 | 199 | [shortcut] 200 | from=-3 201 | activation=linear 202 | 203 | 204 | [convolutional] 205 | batch_normalize=1 206 | filters=128 207 | size=1 208 | stride=1 209 | pad=1 210 | activation=leaky 211 | 212 | [convolutional] 213 | batch_normalize=1 214 | filters=256 215 | size=3 216 | stride=1 217 | pad=1 218 | activation=leaky 219 | 220 | [shortcut] 221 | from=-3 222 | activation=linear 223 | 224 | [convolutional] 225 | batch_normalize=1 226 | filters=128 227 | size=1 228 | stride=1 229 | pad=1 230 | activation=leaky 231 | 232 | [convolutional] 233 | batch_normalize=1 234 | filters=256 235 | size=3 236 | stride=1 237 | pad=1 238 | activation=leaky 239 | 240 | [shortcut] 241 | from=-3 242 | activation=linear 243 | 244 | [convolutional] 245 | batch_normalize=1 246 | filters=128 247 | size=1 248 | stride=1 249 | pad=1 250 | activation=leaky 251 | 252 | [convolutional] 253 | batch_normalize=1 254 | filters=256 255 | size=3 256 | stride=1 257 | pad=1 258 | activation=leaky 259 | 260 | [shortcut] 261 | from=-3 262 | activation=linear 263 | 264 | [convolutional] 265 | batch_normalize=1 266 | filters=128 267 | size=1 268 | stride=1 269 | pad=1 270 | activation=leaky 271 | 272 | [convolutional] 273 | batch_normalize=1 274 | filters=256 275 | size=3 276 | stride=1 277 | pad=1 278 | activation=leaky 279 | 280 | [shortcut] 281 | from=-3 282 | activation=linear 283 | 284 | # Downsample 285 | 286 | [convolutional] 287 | batch_normalize=1 288 | filters=512 289 | size=3 290 | stride=2 291 | pad=1 292 | activation=leaky 293 | 294 | [convolutional] 295 | batch_normalize=1 296 | filters=256 297 | size=1 298 | stride=1 299 | pad=1 300 | activation=leaky 301 | 302 | [convolutional] 303 | batch_normalize=1 304 | filters=512 305 | size=3 306 | stride=1 307 | pad=1 308 | activation=leaky 309 | 310 | [shortcut] 311 | from=-3 312 | activation=linear 313 | 314 | 315 | [convolutional] 316 | batch_normalize=1 317 | filters=256 318 | size=1 319 | stride=1 320 | pad=1 321 | activation=leaky 322 | 323 | [convolutional] 324 | batch_normalize=1 325 | filters=512 326 | size=3 327 | stride=1 328 | pad=1 329 | activation=leaky 330 | 331 | [shortcut] 332 | from=-3 333 | activation=linear 334 | 335 | 336 | [convolutional] 337 | batch_normalize=1 338 | filters=256 339 | size=1 340 | stride=1 341 | pad=1 342 | activation=leaky 343 | 344 | [convolutional] 345 | batch_normalize=1 346 | filters=512 347 | size=3 348 | stride=1 349 | pad=1 350 | activation=leaky 351 | 352 | [shortcut] 353 | from=-3 354 | activation=linear 355 | 356 | 357 | [convolutional] 358 | batch_normalize=1 359 | filters=256 360 | size=1 361 | stride=1 362 | pad=1 363 | activation=leaky 364 | 365 | [convolutional] 366 | batch_normalize=1 367 | filters=512 368 | size=3 369 | stride=1 370 | pad=1 371 | activation=leaky 372 | 373 | [shortcut] 374 | from=-3 375 | activation=linear 376 | 377 | [convolutional] 378 | batch_normalize=1 379 | filters=256 380 | size=1 381 | stride=1 382 | pad=1 383 | activation=leaky 384 | 385 | [convolutional] 386 | batch_normalize=1 387 | filters=512 388 | size=3 389 | stride=1 390 | pad=1 391 | activation=leaky 392 | 393 | [shortcut] 394 | from=-3 395 | activation=linear 396 | 397 | 398 | [convolutional] 399 | batch_normalize=1 400 | filters=256 401 | size=1 402 | stride=1 403 | pad=1 404 | activation=leaky 405 | 406 | [convolutional] 407 | batch_normalize=1 408 | filters=512 409 | size=3 410 | stride=1 411 | pad=1 412 | activation=leaky 413 | 414 | [shortcut] 415 | from=-3 416 | activation=linear 417 | 418 | 419 | [convolutional] 420 | batch_normalize=1 421 | filters=256 422 | size=1 423 | stride=1 424 | pad=1 425 | activation=leaky 426 | 427 | [convolutional] 428 | batch_normalize=1 429 | filters=512 430 | size=3 431 | stride=1 432 | pad=1 433 | activation=leaky 434 | 435 | [shortcut] 436 | from=-3 437 | activation=linear 438 | 439 | [convolutional] 440 | batch_normalize=1 441 | filters=256 442 | size=1 443 | stride=1 444 | pad=1 445 | activation=leaky 446 | 447 | [convolutional] 448 | batch_normalize=1 449 | filters=512 450 | size=3 451 | stride=1 452 | pad=1 453 | activation=leaky 454 | 455 | [shortcut] 456 | from=-3 457 | activation=linear 458 | 459 | # Downsample 460 | 461 | [convolutional] 462 | batch_normalize=1 463 | filters=1024 464 | size=3 465 | stride=2 466 | pad=1 467 | activation=leaky 468 | 469 | [convolutional] 470 | batch_normalize=1 471 | filters=512 472 | size=1 473 | stride=1 474 | pad=1 475 | activation=leaky 476 | 477 | [convolutional] 478 | batch_normalize=1 479 | filters=1024 480 | size=3 481 | stride=1 482 | pad=1 483 | activation=leaky 484 | 485 | [shortcut] 486 | from=-3 487 | activation=linear 488 | 489 | [convolutional] 490 | batch_normalize=1 491 | filters=512 492 | size=1 493 | stride=1 494 | pad=1 495 | activation=leaky 496 | 497 | [convolutional] 498 | batch_normalize=1 499 | filters=1024 500 | size=3 501 | stride=1 502 | pad=1 503 | activation=leaky 504 | 505 | [shortcut] 506 | from=-3 507 | activation=linear 508 | 509 | [convolutional] 510 | batch_normalize=1 511 | filters=512 512 | size=1 513 | stride=1 514 | pad=1 515 | activation=leaky 516 | 517 | [convolutional] 518 | batch_normalize=1 519 | filters=1024 520 | size=3 521 | stride=1 522 | pad=1 523 | activation=leaky 524 | 525 | [shortcut] 526 | from=-3 527 | activation=linear 528 | 529 | [convolutional] 530 | batch_normalize=1 531 | filters=512 532 | size=1 533 | stride=1 534 | pad=1 535 | activation=leaky 536 | 537 | [convolutional] 538 | batch_normalize=1 539 | filters=1024 540 | size=3 541 | stride=1 542 | pad=1 543 | activation=leaky 544 | 545 | [shortcut] 546 | from=-3 547 | activation=linear 548 | 549 | ###################### 550 | 551 | [convolutional] 552 | batch_normalize=1 553 | filters=512 554 | size=1 555 | stride=1 556 | pad=1 557 | activation=leaky 558 | 559 | [convolutional] 560 | batch_normalize=1 561 | size=3 562 | stride=1 563 | pad=1 564 | filters=1024 565 | activation=leaky 566 | 567 | [convolutional] 568 | batch_normalize=1 569 | filters=512 570 | size=1 571 | stride=1 572 | pad=1 573 | activation=leaky 574 | 575 | [convolutional] 576 | batch_normalize=1 577 | size=3 578 | stride=1 579 | pad=1 580 | filters=1024 581 | activation=leaky 582 | 583 | [convolutional] 584 | batch_normalize=1 585 | filters=512 586 | size=1 587 | stride=1 588 | pad=1 589 | activation=leaky 590 | 591 | [convolutional] 592 | batch_normalize=1 593 | size=3 594 | stride=1 595 | pad=1 596 | filters=1024 597 | activation=leaky 598 | 599 | [convolutional] 600 | size=1 601 | stride=1 602 | pad=1 603 | filters=18 604 | activation=linear 605 | 606 | 607 | [yolo] 608 | mask = 6,7,8 609 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 610 | classes=1 611 | num=9 612 | jitter=.3 613 | ignore_thresh = .7 614 | truth_thresh = 1 615 | random=1 616 | 617 | 618 | [route] 619 | layers = -4 620 | 621 | [convolutional] 622 | batch_normalize=1 623 | filters=256 624 | size=1 625 | stride=1 626 | pad=1 627 | activation=leaky 628 | 629 | [upsample] 630 | stride=2 631 | 632 | [route] 633 | layers = -1, 61 634 | 635 | 636 | 637 | [convolutional] 638 | batch_normalize=1 639 | filters=256 640 | size=1 641 | stride=1 642 | pad=1 643 | activation=leaky 644 | 645 | [convolutional] 646 | batch_normalize=1 647 | size=3 648 | stride=1 649 | pad=1 650 | filters=512 651 | activation=leaky 652 | 653 | [convolutional] 654 | batch_normalize=1 655 | filters=256 656 | size=1 657 | stride=1 658 | pad=1 659 | activation=leaky 660 | 661 | [convolutional] 662 | batch_normalize=1 663 | size=3 664 | stride=1 665 | pad=1 666 | filters=512 667 | activation=leaky 668 | 669 | [convolutional] 670 | batch_normalize=1 671 | filters=256 672 | size=1 673 | stride=1 674 | pad=1 675 | activation=leaky 676 | 677 | [convolutional] 678 | batch_normalize=1 679 | size=3 680 | stride=1 681 | pad=1 682 | filters=512 683 | activation=leaky 684 | 685 | [convolutional] 686 | size=1 687 | stride=1 688 | pad=1 689 | filters=18 690 | activation=linear 691 | 692 | 693 | [yolo] 694 | mask = 3,4,5 695 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 696 | classes=1 697 | num=9 698 | jitter=.3 699 | ignore_thresh = .7 700 | truth_thresh = 1 701 | random=1 702 | 703 | 704 | 705 | [route] 706 | layers = -4 707 | 708 | [convolutional] 709 | batch_normalize=1 710 | filters=128 711 | size=1 712 | stride=1 713 | pad=1 714 | activation=leaky 715 | 716 | [upsample] 717 | stride=2 718 | 719 | [route] 720 | layers = -1, 36 721 | 722 | 723 | 724 | [convolutional] 725 | batch_normalize=1 726 | filters=128 727 | size=1 728 | stride=1 729 | pad=1 730 | activation=leaky 731 | 732 | [convolutional] 733 | batch_normalize=1 734 | size=3 735 | stride=1 736 | pad=1 737 | filters=256 738 | activation=leaky 739 | 740 | [convolutional] 741 | batch_normalize=1 742 | filters=128 743 | size=1 744 | stride=1 745 | pad=1 746 | activation=leaky 747 | 748 | [convolutional] 749 | batch_normalize=1 750 | size=3 751 | stride=1 752 | pad=1 753 | filters=256 754 | activation=leaky 755 | 756 | [convolutional] 757 | batch_normalize=1 758 | filters=128 759 | size=1 760 | stride=1 761 | pad=1 762 | activation=leaky 763 | 764 | [convolutional] 765 | batch_normalize=1 766 | size=3 767 | stride=1 768 | pad=1 769 | filters=256 770 | activation=leaky 771 | 772 | [convolutional] 773 | size=1 774 | stride=1 775 | pad=1 776 | filters=18 777 | activation=linear 778 | 779 | 780 | [yolo] 781 | mask = 0,1,2 782 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 783 | classes=1 784 | num=9 785 | jitter=.3 786 | ignore_thresh = .7 787 | truth_thresh = 1 788 | random=1 789 | -------------------------------------------------------------------------------- /cfg/yolov3-hand.cfg: -------------------------------------------------------------------------------- 1 | 2 | [net] 3 | # Testing 4 | #batch=1 5 | #subdivisions=1 6 | # Training 7 | batch=16 8 | subdivisions=1 9 | width=416 10 | height=416 11 | channels=3 12 | momentum=0.9 13 | decay=0.0005 14 | angle=0 15 | saturation = 1.5 16 | exposure = 1.5 17 | hue=.1 18 | 19 | learning_rate=0.001 20 | burn_in=1000 21 | max_batches = 500200 22 | policy=steps 23 | steps=400000,450000 24 | scales=.1,.1 25 | 26 | [convolutional] 27 | batch_normalize=1 28 | filters=32 29 | size=3 30 | stride=1 31 | pad=1 32 | activation=leaky 33 | 34 | # Downsample 35 | 36 | [convolutional] 37 | batch_normalize=1 38 | filters=64 39 | size=3 40 | stride=2 41 | pad=1 42 | activation=leaky 43 | 44 | [convolutional] 45 | batch_normalize=1 46 | filters=32 47 | size=1 48 | stride=1 49 | pad=1 50 | activation=leaky 51 | 52 | [convolutional] 53 | batch_normalize=1 54 | filters=64 55 | size=3 56 | stride=1 57 | pad=1 58 | activation=leaky 59 | 60 | [shortcut] 61 | from=-3 62 | activation=linear 63 | 64 | # Downsample 65 | 66 | [convolutional] 67 | batch_normalize=1 68 | filters=128 69 | size=3 70 | stride=2 71 | pad=1 72 | activation=leaky 73 | 74 | [convolutional] 75 | batch_normalize=1 76 | filters=64 77 | size=1 78 | stride=1 79 | pad=1 80 | activation=leaky 81 | 82 | [convolutional] 83 | batch_normalize=1 84 | filters=128 85 | size=3 86 | stride=1 87 | pad=1 88 | activation=leaky 89 | 90 | [shortcut] 91 | from=-3 92 | activation=linear 93 | 94 | [convolutional] 95 | batch_normalize=1 96 | filters=64 97 | size=1 98 | stride=1 99 | pad=1 100 | activation=leaky 101 | 102 | [convolutional] 103 | batch_normalize=1 104 | filters=128 105 | size=3 106 | stride=1 107 | pad=1 108 | activation=leaky 109 | 110 | [shortcut] 111 | from=-3 112 | activation=linear 113 | 114 | # Downsample 115 | 116 | [convolutional] 117 | batch_normalize=1 118 | filters=256 119 | size=3 120 | stride=2 121 | pad=1 122 | activation=leaky 123 | 124 | [convolutional] 125 | batch_normalize=1 126 | filters=128 127 | size=1 128 | stride=1 129 | pad=1 130 | activation=leaky 131 | 132 | [convolutional] 133 | batch_normalize=1 134 | filters=256 135 | size=3 136 | stride=1 137 | pad=1 138 | activation=leaky 139 | 140 | [shortcut] 141 | from=-3 142 | activation=linear 143 | 144 | [convolutional] 145 | batch_normalize=1 146 | filters=128 147 | size=1 148 | stride=1 149 | pad=1 150 | activation=leaky 151 | 152 | [convolutional] 153 | batch_normalize=1 154 | filters=256 155 | size=3 156 | stride=1 157 | pad=1 158 | activation=leaky 159 | 160 | [shortcut] 161 | from=-3 162 | activation=linear 163 | 164 | [convolutional] 165 | batch_normalize=1 166 | filters=128 167 | size=1 168 | stride=1 169 | pad=1 170 | activation=leaky 171 | 172 | [convolutional] 173 | batch_normalize=1 174 | filters=256 175 | size=3 176 | stride=1 177 | pad=1 178 | activation=leaky 179 | 180 | [shortcut] 181 | from=-3 182 | activation=linear 183 | 184 | [convolutional] 185 | batch_normalize=1 186 | filters=128 187 | size=1 188 | stride=1 189 | pad=1 190 | activation=leaky 191 | 192 | [convolutional] 193 | batch_normalize=1 194 | filters=256 195 | size=3 196 | stride=1 197 | pad=1 198 | activation=leaky 199 | 200 | [shortcut] 201 | from=-3 202 | activation=linear 203 | 204 | 205 | [convolutional] 206 | batch_normalize=1 207 | filters=128 208 | size=1 209 | stride=1 210 | pad=1 211 | activation=leaky 212 | 213 | [convolutional] 214 | batch_normalize=1 215 | filters=256 216 | size=3 217 | stride=1 218 | pad=1 219 | activation=leaky 220 | 221 | [shortcut] 222 | from=-3 223 | activation=linear 224 | 225 | [convolutional] 226 | batch_normalize=1 227 | filters=128 228 | size=1 229 | stride=1 230 | pad=1 231 | activation=leaky 232 | 233 | [convolutional] 234 | batch_normalize=1 235 | filters=256 236 | size=3 237 | stride=1 238 | pad=1 239 | activation=leaky 240 | 241 | [shortcut] 242 | from=-3 243 | activation=linear 244 | 245 | [convolutional] 246 | batch_normalize=1 247 | filters=128 248 | size=1 249 | stride=1 250 | pad=1 251 | activation=leaky 252 | 253 | [convolutional] 254 | batch_normalize=1 255 | filters=256 256 | size=3 257 | stride=1 258 | pad=1 259 | activation=leaky 260 | 261 | [shortcut] 262 | from=-3 263 | activation=linear 264 | 265 | [convolutional] 266 | batch_normalize=1 267 | filters=128 268 | size=1 269 | stride=1 270 | pad=1 271 | activation=leaky 272 | 273 | [convolutional] 274 | batch_normalize=1 275 | filters=256 276 | size=3 277 | stride=1 278 | pad=1 279 | activation=leaky 280 | 281 | [shortcut] 282 | from=-3 283 | activation=linear 284 | 285 | # Downsample 286 | 287 | [convolutional] 288 | batch_normalize=1 289 | filters=512 290 | size=3 291 | stride=2 292 | pad=1 293 | activation=leaky 294 | 295 | [convolutional] 296 | batch_normalize=1 297 | filters=256 298 | size=1 299 | stride=1 300 | pad=1 301 | activation=leaky 302 | 303 | [convolutional] 304 | batch_normalize=1 305 | filters=512 306 | size=3 307 | stride=1 308 | pad=1 309 | activation=leaky 310 | 311 | [shortcut] 312 | from=-3 313 | activation=linear 314 | 315 | 316 | [convolutional] 317 | batch_normalize=1 318 | filters=256 319 | size=1 320 | stride=1 321 | pad=1 322 | activation=leaky 323 | 324 | [convolutional] 325 | batch_normalize=1 326 | filters=512 327 | size=3 328 | stride=1 329 | pad=1 330 | activation=leaky 331 | 332 | [shortcut] 333 | from=-3 334 | activation=linear 335 | 336 | 337 | [convolutional] 338 | batch_normalize=1 339 | filters=256 340 | size=1 341 | stride=1 342 | pad=1 343 | activation=leaky 344 | 345 | [convolutional] 346 | batch_normalize=1 347 | filters=512 348 | size=3 349 | stride=1 350 | pad=1 351 | activation=leaky 352 | 353 | [shortcut] 354 | from=-3 355 | activation=linear 356 | 357 | 358 | [convolutional] 359 | batch_normalize=1 360 | filters=256 361 | size=1 362 | stride=1 363 | pad=1 364 | activation=leaky 365 | 366 | [convolutional] 367 | batch_normalize=1 368 | filters=512 369 | size=3 370 | stride=1 371 | pad=1 372 | activation=leaky 373 | 374 | [shortcut] 375 | from=-3 376 | activation=linear 377 | 378 | [convolutional] 379 | batch_normalize=1 380 | filters=256 381 | size=1 382 | stride=1 383 | pad=1 384 | activation=leaky 385 | 386 | [convolutional] 387 | batch_normalize=1 388 | filters=512 389 | size=3 390 | stride=1 391 | pad=1 392 | activation=leaky 393 | 394 | [shortcut] 395 | from=-3 396 | activation=linear 397 | 398 | 399 | [convolutional] 400 | batch_normalize=1 401 | filters=256 402 | size=1 403 | stride=1 404 | pad=1 405 | activation=leaky 406 | 407 | [convolutional] 408 | batch_normalize=1 409 | filters=512 410 | size=3 411 | stride=1 412 | pad=1 413 | activation=leaky 414 | 415 | [shortcut] 416 | from=-3 417 | activation=linear 418 | 419 | 420 | [convolutional] 421 | batch_normalize=1 422 | filters=256 423 | size=1 424 | stride=1 425 | pad=1 426 | activation=leaky 427 | 428 | [convolutional] 429 | batch_normalize=1 430 | filters=512 431 | size=3 432 | stride=1 433 | pad=1 434 | activation=leaky 435 | 436 | [shortcut] 437 | from=-3 438 | activation=linear 439 | 440 | [convolutional] 441 | batch_normalize=1 442 | filters=256 443 | size=1 444 | stride=1 445 | pad=1 446 | activation=leaky 447 | 448 | [convolutional] 449 | batch_normalize=1 450 | filters=512 451 | size=3 452 | stride=1 453 | pad=1 454 | activation=leaky 455 | 456 | [shortcut] 457 | from=-3 458 | activation=linear 459 | 460 | # Downsample 461 | 462 | [convolutional] 463 | batch_normalize=1 464 | filters=1024 465 | size=3 466 | stride=2 467 | pad=1 468 | activation=leaky 469 | 470 | [convolutional] 471 | batch_normalize=1 472 | filters=512 473 | size=1 474 | stride=1 475 | pad=1 476 | activation=leaky 477 | 478 | [convolutional] 479 | batch_normalize=1 480 | filters=1024 481 | size=3 482 | stride=1 483 | pad=1 484 | activation=leaky 485 | 486 | [shortcut] 487 | from=-3 488 | activation=linear 489 | 490 | [convolutional] 491 | batch_normalize=1 492 | filters=512 493 | size=1 494 | stride=1 495 | pad=1 496 | activation=leaky 497 | 498 | [convolutional] 499 | batch_normalize=1 500 | filters=1024 501 | size=3 502 | stride=1 503 | pad=1 504 | activation=leaky 505 | 506 | [shortcut] 507 | from=-3 508 | activation=linear 509 | 510 | [convolutional] 511 | batch_normalize=1 512 | filters=512 513 | size=1 514 | stride=1 515 | pad=1 516 | activation=leaky 517 | 518 | [convolutional] 519 | batch_normalize=1 520 | filters=1024 521 | size=3 522 | stride=1 523 | pad=1 524 | activation=leaky 525 | 526 | [shortcut] 527 | from=-3 528 | activation=linear 529 | 530 | [convolutional] 531 | batch_normalize=1 532 | filters=512 533 | size=1 534 | stride=1 535 | pad=1 536 | activation=leaky 537 | 538 | [convolutional] 539 | batch_normalize=1 540 | filters=1024 541 | size=3 542 | stride=1 543 | pad=1 544 | activation=leaky 545 | 546 | [shortcut] 547 | from=-3 548 | activation=linear 549 | 550 | ###################### 551 | 552 | [convolutional] 553 | batch_normalize=1 554 | filters=512 555 | size=1 556 | stride=1 557 | pad=1 558 | activation=leaky 559 | 560 | [convolutional] 561 | batch_normalize=1 562 | size=3 563 | stride=1 564 | pad=1 565 | filters=1024 566 | activation=leaky 567 | 568 | [convolutional] 569 | batch_normalize=1 570 | filters=512 571 | size=1 572 | stride=1 573 | pad=1 574 | activation=leaky 575 | 576 | [convolutional] 577 | batch_normalize=1 578 | size=3 579 | stride=1 580 | pad=1 581 | filters=1024 582 | activation=leaky 583 | 584 | [convolutional] 585 | batch_normalize=1 586 | filters=512 587 | size=1 588 | stride=1 589 | pad=1 590 | activation=leaky 591 | 592 | [convolutional] 593 | batch_normalize=1 594 | size=3 595 | stride=1 596 | pad=1 597 | filters=1024 598 | activation=leaky 599 | 600 | [convolutional] 601 | size=1 602 | stride=1 603 | pad=1 604 | filters=18 605 | activation=linear 606 | 607 | 608 | [yolo] 609 | mask = 6,7,8 610 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 611 | classes=1 612 | num=9 613 | jitter=.3 614 | ignore_thresh = .7 615 | truth_thresh = 1 616 | random=1 617 | 618 | 619 | [route] 620 | layers = -4 621 | 622 | [convolutional] 623 | batch_normalize=1 624 | filters=256 625 | size=1 626 | stride=1 627 | pad=1 628 | activation=leaky 629 | 630 | [upsample] 631 | stride=2 632 | 633 | [route] 634 | layers = -1, 61 635 | 636 | 637 | 638 | [convolutional] 639 | batch_normalize=1 640 | filters=256 641 | size=1 642 | stride=1 643 | pad=1 644 | activation=leaky 645 | 646 | [convolutional] 647 | batch_normalize=1 648 | size=3 649 | stride=1 650 | pad=1 651 | filters=512 652 | activation=leaky 653 | 654 | [convolutional] 655 | batch_normalize=1 656 | filters=256 657 | size=1 658 | stride=1 659 | pad=1 660 | activation=leaky 661 | 662 | [convolutional] 663 | batch_normalize=1 664 | size=3 665 | stride=1 666 | pad=1 667 | filters=512 668 | activation=leaky 669 | 670 | [convolutional] 671 | batch_normalize=1 672 | filters=256 673 | size=1 674 | stride=1 675 | pad=1 676 | activation=leaky 677 | 678 | [convolutional] 679 | batch_normalize=1 680 | size=3 681 | stride=1 682 | pad=1 683 | filters=512 684 | activation=leaky 685 | 686 | [convolutional] 687 | size=1 688 | stride=1 689 | pad=1 690 | filters=18 691 | activation=linear 692 | 693 | 694 | [yolo] 695 | mask = 3,4,5 696 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 697 | classes=1 698 | num=9 699 | jitter=.3 700 | ignore_thresh = .7 701 | truth_thresh = 1 702 | random=1 703 | 704 | 705 | 706 | [route] 707 | layers = -4 708 | 709 | [convolutional] 710 | batch_normalize=1 711 | filters=128 712 | size=1 713 | stride=1 714 | pad=1 715 | activation=leaky 716 | 717 | [upsample] 718 | stride=2 719 | 720 | [route] 721 | layers = -1, 36 722 | 723 | 724 | 725 | [convolutional] 726 | batch_normalize=1 727 | filters=128 728 | size=1 729 | stride=1 730 | pad=1 731 | activation=leaky 732 | 733 | [convolutional] 734 | batch_normalize=1 735 | size=3 736 | stride=1 737 | pad=1 738 | filters=256 739 | activation=leaky 740 | 741 | [convolutional] 742 | batch_normalize=1 743 | filters=128 744 | size=1 745 | stride=1 746 | pad=1 747 | activation=leaky 748 | 749 | [convolutional] 750 | batch_normalize=1 751 | size=3 752 | stride=1 753 | pad=1 754 | filters=256 755 | activation=leaky 756 | 757 | [convolutional] 758 | batch_normalize=1 759 | filters=128 760 | size=1 761 | stride=1 762 | pad=1 763 | activation=leaky 764 | 765 | [convolutional] 766 | batch_normalize=1 767 | size=3 768 | stride=1 769 | pad=1 770 | filters=256 771 | activation=leaky 772 | 773 | [convolutional] 774 | size=1 775 | stride=1 776 | pad=1 777 | filters=18 778 | activation=linear 779 | 780 | 781 | [yolo] 782 | mask = 0,1,2 783 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 784 | classes=1 785 | num=9 786 | jitter=.3 787 | ignore_thresh = .7 788 | truth_thresh = 1 789 | random=1 790 | 791 | -------------------------------------------------------------------------------- /cfg/yolov3-spp-1cls.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | # batch=1 4 | # subdivisions=1 5 | # Training 6 | batch=64 7 | subdivisions=16 8 | width=608 9 | height=608 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=100 20 | max_batches = 5000 21 | policy=steps 22 | steps=4000,4500 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=32 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | # Downsample 34 | 35 | [convolutional] 36 | batch_normalize=1 37 | filters=64 38 | size=3 39 | stride=2 40 | pad=1 41 | activation=leaky 42 | 43 | [convolutional] 44 | batch_normalize=1 45 | filters=32 46 | size=1 47 | stride=1 48 | pad=1 49 | activation=leaky 50 | 51 | [convolutional] 52 | batch_normalize=1 53 | filters=64 54 | size=3 55 | stride=1 56 | pad=1 57 | activation=leaky 58 | 59 | [shortcut] 60 | from=-3 61 | activation=linear 62 | 63 | # Downsample 64 | 65 | [convolutional] 66 | batch_normalize=1 67 | filters=128 68 | size=3 69 | stride=2 70 | pad=1 71 | activation=leaky 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=64 76 | size=1 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [convolutional] 82 | batch_normalize=1 83 | filters=128 84 | size=3 85 | stride=1 86 | pad=1 87 | activation=leaky 88 | 89 | [shortcut] 90 | from=-3 91 | activation=linear 92 | 93 | [convolutional] 94 | batch_normalize=1 95 | filters=64 96 | size=1 97 | stride=1 98 | pad=1 99 | activation=leaky 100 | 101 | [convolutional] 102 | batch_normalize=1 103 | filters=128 104 | size=3 105 | stride=1 106 | pad=1 107 | activation=leaky 108 | 109 | [shortcut] 110 | from=-3 111 | activation=linear 112 | 113 | # Downsample 114 | 115 | [convolutional] 116 | batch_normalize=1 117 | filters=256 118 | size=3 119 | stride=2 120 | pad=1 121 | activation=leaky 122 | 123 | [convolutional] 124 | batch_normalize=1 125 | filters=128 126 | size=1 127 | stride=1 128 | pad=1 129 | activation=leaky 130 | 131 | [convolutional] 132 | batch_normalize=1 133 | filters=256 134 | size=3 135 | stride=1 136 | pad=1 137 | activation=leaky 138 | 139 | [shortcut] 140 | from=-3 141 | activation=linear 142 | 143 | [convolutional] 144 | batch_normalize=1 145 | filters=128 146 | size=1 147 | stride=1 148 | pad=1 149 | activation=leaky 150 | 151 | [convolutional] 152 | batch_normalize=1 153 | filters=256 154 | size=3 155 | stride=1 156 | pad=1 157 | activation=leaky 158 | 159 | [shortcut] 160 | from=-3 161 | activation=linear 162 | 163 | [convolutional] 164 | batch_normalize=1 165 | filters=128 166 | size=1 167 | stride=1 168 | pad=1 169 | activation=leaky 170 | 171 | [convolutional] 172 | batch_normalize=1 173 | filters=256 174 | size=3 175 | stride=1 176 | pad=1 177 | activation=leaky 178 | 179 | [shortcut] 180 | from=-3 181 | activation=linear 182 | 183 | [convolutional] 184 | batch_normalize=1 185 | filters=128 186 | size=1 187 | stride=1 188 | pad=1 189 | activation=leaky 190 | 191 | [convolutional] 192 | batch_normalize=1 193 | filters=256 194 | size=3 195 | stride=1 196 | pad=1 197 | activation=leaky 198 | 199 | [shortcut] 200 | from=-3 201 | activation=linear 202 | 203 | 204 | [convolutional] 205 | batch_normalize=1 206 | filters=128 207 | size=1 208 | stride=1 209 | pad=1 210 | activation=leaky 211 | 212 | [convolutional] 213 | batch_normalize=1 214 | filters=256 215 | size=3 216 | stride=1 217 | pad=1 218 | activation=leaky 219 | 220 | [shortcut] 221 | from=-3 222 | activation=linear 223 | 224 | [convolutional] 225 | batch_normalize=1 226 | filters=128 227 | size=1 228 | stride=1 229 | pad=1 230 | activation=leaky 231 | 232 | [convolutional] 233 | batch_normalize=1 234 | filters=256 235 | size=3 236 | stride=1 237 | pad=1 238 | activation=leaky 239 | 240 | [shortcut] 241 | from=-3 242 | activation=linear 243 | 244 | [convolutional] 245 | batch_normalize=1 246 | filters=128 247 | size=1 248 | stride=1 249 | pad=1 250 | activation=leaky 251 | 252 | [convolutional] 253 | batch_normalize=1 254 | filters=256 255 | size=3 256 | stride=1 257 | pad=1 258 | activation=leaky 259 | 260 | [shortcut] 261 | from=-3 262 | activation=linear 263 | 264 | [convolutional] 265 | batch_normalize=1 266 | filters=128 267 | size=1 268 | stride=1 269 | pad=1 270 | activation=leaky 271 | 272 | [convolutional] 273 | batch_normalize=1 274 | filters=256 275 | size=3 276 | stride=1 277 | pad=1 278 | activation=leaky 279 | 280 | [shortcut] 281 | from=-3 282 | activation=linear 283 | 284 | # Downsample 285 | 286 | [convolutional] 287 | batch_normalize=1 288 | filters=512 289 | size=3 290 | stride=2 291 | pad=1 292 | activation=leaky 293 | 294 | [convolutional] 295 | batch_normalize=1 296 | filters=256 297 | size=1 298 | stride=1 299 | pad=1 300 | activation=leaky 301 | 302 | [convolutional] 303 | batch_normalize=1 304 | filters=512 305 | size=3 306 | stride=1 307 | pad=1 308 | activation=leaky 309 | 310 | [shortcut] 311 | from=-3 312 | activation=linear 313 | 314 | 315 | [convolutional] 316 | batch_normalize=1 317 | filters=256 318 | size=1 319 | stride=1 320 | pad=1 321 | activation=leaky 322 | 323 | [convolutional] 324 | batch_normalize=1 325 | filters=512 326 | size=3 327 | stride=1 328 | pad=1 329 | activation=leaky 330 | 331 | [shortcut] 332 | from=-3 333 | activation=linear 334 | 335 | 336 | [convolutional] 337 | batch_normalize=1 338 | filters=256 339 | size=1 340 | stride=1 341 | pad=1 342 | activation=leaky 343 | 344 | [convolutional] 345 | batch_normalize=1 346 | filters=512 347 | size=3 348 | stride=1 349 | pad=1 350 | activation=leaky 351 | 352 | [shortcut] 353 | from=-3 354 | activation=linear 355 | 356 | 357 | [convolutional] 358 | batch_normalize=1 359 | filters=256 360 | size=1 361 | stride=1 362 | pad=1 363 | activation=leaky 364 | 365 | [convolutional] 366 | batch_normalize=1 367 | filters=512 368 | size=3 369 | stride=1 370 | pad=1 371 | activation=leaky 372 | 373 | [shortcut] 374 | from=-3 375 | activation=linear 376 | 377 | [convolutional] 378 | batch_normalize=1 379 | filters=256 380 | size=1 381 | stride=1 382 | pad=1 383 | activation=leaky 384 | 385 | [convolutional] 386 | batch_normalize=1 387 | filters=512 388 | size=3 389 | stride=1 390 | pad=1 391 | activation=leaky 392 | 393 | [shortcut] 394 | from=-3 395 | activation=linear 396 | 397 | 398 | [convolutional] 399 | batch_normalize=1 400 | filters=256 401 | size=1 402 | stride=1 403 | pad=1 404 | activation=leaky 405 | 406 | [convolutional] 407 | batch_normalize=1 408 | filters=512 409 | size=3 410 | stride=1 411 | pad=1 412 | activation=leaky 413 | 414 | [shortcut] 415 | from=-3 416 | activation=linear 417 | 418 | 419 | [convolutional] 420 | batch_normalize=1 421 | filters=256 422 | size=1 423 | stride=1 424 | pad=1 425 | activation=leaky 426 | 427 | [convolutional] 428 | batch_normalize=1 429 | filters=512 430 | size=3 431 | stride=1 432 | pad=1 433 | activation=leaky 434 | 435 | [shortcut] 436 | from=-3 437 | activation=linear 438 | 439 | [convolutional] 440 | batch_normalize=1 441 | filters=256 442 | size=1 443 | stride=1 444 | pad=1 445 | activation=leaky 446 | 447 | [convolutional] 448 | batch_normalize=1 449 | filters=512 450 | size=3 451 | stride=1 452 | pad=1 453 | activation=leaky 454 | 455 | [shortcut] 456 | from=-3 457 | activation=linear 458 | 459 | # Downsample 460 | 461 | [convolutional] 462 | batch_normalize=1 463 | filters=1024 464 | size=3 465 | stride=2 466 | pad=1 467 | activation=leaky 468 | 469 | [convolutional] 470 | batch_normalize=1 471 | filters=512 472 | size=1 473 | stride=1 474 | pad=1 475 | activation=leaky 476 | 477 | [convolutional] 478 | batch_normalize=1 479 | filters=1024 480 | size=3 481 | stride=1 482 | pad=1 483 | activation=leaky 484 | 485 | [shortcut] 486 | from=-3 487 | activation=linear 488 | 489 | [convolutional] 490 | batch_normalize=1 491 | filters=512 492 | size=1 493 | stride=1 494 | pad=1 495 | activation=leaky 496 | 497 | [convolutional] 498 | batch_normalize=1 499 | filters=1024 500 | size=3 501 | stride=1 502 | pad=1 503 | activation=leaky 504 | 505 | [shortcut] 506 | from=-3 507 | activation=linear 508 | 509 | [convolutional] 510 | batch_normalize=1 511 | filters=512 512 | size=1 513 | stride=1 514 | pad=1 515 | activation=leaky 516 | 517 | [convolutional] 518 | batch_normalize=1 519 | filters=1024 520 | size=3 521 | stride=1 522 | pad=1 523 | activation=leaky 524 | 525 | [shortcut] 526 | from=-3 527 | activation=linear 528 | 529 | [convolutional] 530 | batch_normalize=1 531 | filters=512 532 | size=1 533 | stride=1 534 | pad=1 535 | activation=leaky 536 | 537 | [convolutional] 538 | batch_normalize=1 539 | filters=1024 540 | size=3 541 | stride=1 542 | pad=1 543 | activation=leaky 544 | 545 | [shortcut] 546 | from=-3 547 | activation=linear 548 | 549 | ###################### 550 | 551 | [convolutional] 552 | batch_normalize=1 553 | filters=512 554 | size=1 555 | stride=1 556 | pad=1 557 | activation=leaky 558 | 559 | [convolutional] 560 | batch_normalize=1 561 | size=3 562 | stride=1 563 | pad=1 564 | filters=1024 565 | activation=leaky 566 | 567 | [convolutional] 568 | batch_normalize=1 569 | filters=512 570 | size=1 571 | stride=1 572 | pad=1 573 | activation=leaky 574 | 575 | ### SPP ### 576 | [maxpool] 577 | stride=1 578 | size=5 579 | 580 | [route] 581 | layers=-2 582 | 583 | [maxpool] 584 | stride=1 585 | size=9 586 | 587 | [route] 588 | layers=-4 589 | 590 | [maxpool] 591 | stride=1 592 | size=13 593 | 594 | [route] 595 | layers=-1,-3,-5,-6 596 | 597 | ### End SPP ### 598 | 599 | [convolutional] 600 | batch_normalize=1 601 | filters=512 602 | size=1 603 | stride=1 604 | pad=1 605 | activation=leaky 606 | 607 | 608 | [convolutional] 609 | batch_normalize=1 610 | size=3 611 | stride=1 612 | pad=1 613 | filters=1024 614 | activation=leaky 615 | 616 | [convolutional] 617 | batch_normalize=1 618 | filters=512 619 | size=1 620 | stride=1 621 | pad=1 622 | activation=leaky 623 | 624 | [convolutional] 625 | batch_normalize=1 626 | size=3 627 | stride=1 628 | pad=1 629 | filters=1024 630 | activation=leaky 631 | 632 | [convolutional] 633 | size=1 634 | stride=1 635 | pad=1 636 | filters=18 637 | activation=linear 638 | 639 | 640 | [yolo] 641 | mask = 6,7,8 642 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 643 | classes=1 644 | num=9 645 | jitter=.3 646 | ignore_thresh = .7 647 | truth_thresh = 1 648 | random=1 649 | 650 | 651 | [route] 652 | layers = -4 653 | 654 | [convolutional] 655 | batch_normalize=1 656 | filters=256 657 | size=1 658 | stride=1 659 | pad=1 660 | activation=leaky 661 | 662 | [upsample] 663 | stride=2 664 | 665 | [route] 666 | layers = -1, 61 667 | 668 | 669 | 670 | [convolutional] 671 | batch_normalize=1 672 | filters=256 673 | size=1 674 | stride=1 675 | pad=1 676 | activation=leaky 677 | 678 | [convolutional] 679 | batch_normalize=1 680 | size=3 681 | stride=1 682 | pad=1 683 | filters=512 684 | activation=leaky 685 | 686 | [convolutional] 687 | batch_normalize=1 688 | filters=256 689 | size=1 690 | stride=1 691 | pad=1 692 | activation=leaky 693 | 694 | [convolutional] 695 | batch_normalize=1 696 | size=3 697 | stride=1 698 | pad=1 699 | filters=512 700 | activation=leaky 701 | 702 | [convolutional] 703 | batch_normalize=1 704 | filters=256 705 | size=1 706 | stride=1 707 | pad=1 708 | activation=leaky 709 | 710 | [convolutional] 711 | batch_normalize=1 712 | size=3 713 | stride=1 714 | pad=1 715 | filters=512 716 | activation=leaky 717 | 718 | [convolutional] 719 | size=1 720 | stride=1 721 | pad=1 722 | filters=18 723 | activation=linear 724 | 725 | 726 | [yolo] 727 | mask = 3,4,5 728 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 729 | classes=1 730 | num=9 731 | jitter=.3 732 | ignore_thresh = .7 733 | truth_thresh = 1 734 | random=1 735 | 736 | 737 | 738 | [route] 739 | layers = -4 740 | 741 | [convolutional] 742 | batch_normalize=1 743 | filters=128 744 | size=1 745 | stride=1 746 | pad=1 747 | activation=leaky 748 | 749 | [upsample] 750 | stride=2 751 | 752 | [route] 753 | layers = -1, 36 754 | 755 | 756 | 757 | [convolutional] 758 | batch_normalize=1 759 | filters=128 760 | size=1 761 | stride=1 762 | pad=1 763 | activation=leaky 764 | 765 | [convolutional] 766 | batch_normalize=1 767 | size=3 768 | stride=1 769 | pad=1 770 | filters=256 771 | activation=leaky 772 | 773 | [convolutional] 774 | batch_normalize=1 775 | filters=128 776 | size=1 777 | stride=1 778 | pad=1 779 | activation=leaky 780 | 781 | [convolutional] 782 | batch_normalize=1 783 | size=3 784 | stride=1 785 | pad=1 786 | filters=256 787 | activation=leaky 788 | 789 | [convolutional] 790 | batch_normalize=1 791 | filters=128 792 | size=1 793 | stride=1 794 | pad=1 795 | activation=leaky 796 | 797 | [convolutional] 798 | batch_normalize=1 799 | size=3 800 | stride=1 801 | pad=1 802 | filters=256 803 | activation=leaky 804 | 805 | [convolutional] 806 | size=1 807 | stride=1 808 | pad=1 809 | filters=18 810 | activation=linear 811 | 812 | 813 | [yolo] 814 | mask = 0,1,2 815 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 816 | classes=1 817 | num=9 818 | jitter=.3 819 | ignore_thresh = .7 820 | truth_thresh = 1 821 | random=1 822 | -------------------------------------------------------------------------------- /cfg/yolov3-tiny-1cls.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=1 4 | subdivisions=1 5 | # Training 6 | # batch=64 7 | # subdivisions=2 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=16 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | batch_normalize=1 39 | filters=32 40 | size=3 41 | stride=1 42 | pad=1 43 | activation=leaky 44 | 45 | [maxpool] 46 | size=2 47 | stride=2 48 | 49 | [convolutional] 50 | batch_normalize=1 51 | filters=64 52 | size=3 53 | stride=1 54 | pad=1 55 | activation=leaky 56 | 57 | [maxpool] 58 | size=2 59 | stride=2 60 | 61 | [convolutional] 62 | batch_normalize=1 63 | filters=128 64 | size=3 65 | stride=1 66 | pad=1 67 | activation=leaky 68 | 69 | [maxpool] 70 | size=2 71 | stride=2 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=256 76 | size=3 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [maxpool] 82 | size=2 83 | stride=2 84 | 85 | [convolutional] 86 | batch_normalize=1 87 | filters=512 88 | size=3 89 | stride=1 90 | pad=1 91 | activation=leaky 92 | 93 | [maxpool] 94 | size=2 95 | stride=1 96 | 97 | [convolutional] 98 | batch_normalize=1 99 | filters=1024 100 | size=3 101 | stride=1 102 | pad=1 103 | activation=leaky 104 | 105 | ########### 106 | 107 | [convolutional] 108 | batch_normalize=1 109 | filters=256 110 | size=1 111 | stride=1 112 | pad=1 113 | activation=leaky 114 | 115 | [convolutional] 116 | batch_normalize=1 117 | filters=512 118 | size=3 119 | stride=1 120 | pad=1 121 | activation=leaky 122 | 123 | [convolutional] 124 | size=1 125 | stride=1 126 | pad=1 127 | filters=18 128 | activation=linear 129 | 130 | 131 | 132 | [yolo] 133 | mask = 3,4,5 134 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 135 | classes=1 136 | num=6 137 | jitter=.3 138 | ignore_thresh = .7 139 | truth_thresh = 1 140 | random=1 141 | 142 | [route] 143 | layers = -4 144 | 145 | [convolutional] 146 | batch_normalize=1 147 | filters=128 148 | size=1 149 | stride=1 150 | pad=1 151 | activation=leaky 152 | 153 | [upsample] 154 | stride=2 155 | 156 | [route] 157 | layers = -1, 8 158 | 159 | [convolutional] 160 | batch_normalize=1 161 | filters=256 162 | size=3 163 | stride=1 164 | pad=1 165 | activation=leaky 166 | 167 | [convolutional] 168 | size=1 169 | stride=1 170 | pad=1 171 | filters=18 172 | activation=linear 173 | 174 | [yolo] 175 | mask = 0,1,2 176 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 177 | classes=1 178 | num=6 179 | jitter=.3 180 | ignore_thresh = .7 181 | truth_thresh = 1 182 | random=1 183 | -------------------------------------------------------------------------------- /cfg/yolov3-tiny.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=1 4 | subdivisions=1 5 | # Training 6 | # batch=64 7 | # subdivisions=2 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=16 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | batch_normalize=1 39 | filters=32 40 | size=3 41 | stride=1 42 | pad=1 43 | activation=leaky 44 | 45 | [maxpool] 46 | size=2 47 | stride=2 48 | 49 | [convolutional] 50 | batch_normalize=1 51 | filters=64 52 | size=3 53 | stride=1 54 | pad=1 55 | activation=leaky 56 | 57 | [maxpool] 58 | size=2 59 | stride=2 60 | 61 | [convolutional] 62 | batch_normalize=1 63 | filters=128 64 | size=3 65 | stride=1 66 | pad=1 67 | activation=leaky 68 | 69 | [maxpool] 70 | size=2 71 | stride=2 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=256 76 | size=3 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [maxpool] 82 | size=2 83 | stride=2 84 | 85 | [convolutional] 86 | batch_normalize=1 87 | filters=512 88 | size=3 89 | stride=1 90 | pad=1 91 | activation=leaky 92 | 93 | [maxpool] 94 | size=2 95 | stride=1 96 | 97 | [convolutional] 98 | batch_normalize=1 99 | filters=1024 100 | size=3 101 | stride=1 102 | pad=1 103 | activation=leaky 104 | 105 | ########### 106 | 107 | [convolutional] 108 | batch_normalize=1 109 | filters=256 110 | size=1 111 | stride=1 112 | pad=1 113 | activation=leaky 114 | 115 | [convolutional] 116 | batch_normalize=1 117 | filters=512 118 | size=3 119 | stride=1 120 | pad=1 121 | activation=leaky 122 | 123 | [convolutional] 124 | size=1 125 | stride=1 126 | pad=1 127 | filters=255 128 | activation=linear 129 | 130 | 131 | 132 | [yolo] 133 | mask = 3,4,5 134 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 135 | classes=80 136 | num=6 137 | jitter=.3 138 | ignore_thresh = .7 139 | truth_thresh = 1 140 | random=1 141 | 142 | [route] 143 | layers = -4 144 | 145 | [convolutional] 146 | batch_normalize=1 147 | filters=128 148 | size=1 149 | stride=1 150 | pad=1 151 | activation=leaky 152 | 153 | [upsample] 154 | stride=2 155 | 156 | [route] 157 | layers = -1, 8 158 | 159 | [convolutional] 160 | batch_normalize=1 161 | filters=256 162 | size=3 163 | stride=1 164 | pad=1 165 | activation=leaky 166 | 167 | [convolutional] 168 | size=1 169 | stride=1 170 | pad=1 171 | filters=255 172 | activation=linear 173 | 174 | [yolo] 175 | mask = 1,2,3 176 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 177 | classes=80 178 | num=6 179 | jitter=.3 180 | ignore_thresh = .7 181 | truth_thresh = 1 182 | random=1 183 | -------------------------------------------------------------------------------- /cfg/yolov3.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | #batch=1 4 | #subdivisions=1 5 | # Training 6 | batch=16 7 | subdivisions=1 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=32 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | # Downsample 34 | 35 | [convolutional] 36 | batch_normalize=1 37 | filters=64 38 | size=3 39 | stride=2 40 | pad=1 41 | activation=leaky 42 | 43 | [convolutional] 44 | batch_normalize=1 45 | filters=32 46 | size=1 47 | stride=1 48 | pad=1 49 | activation=leaky 50 | 51 | [convolutional] 52 | batch_normalize=1 53 | filters=64 54 | size=3 55 | stride=1 56 | pad=1 57 | activation=leaky 58 | 59 | [shortcut] 60 | from=-3 61 | activation=linear 62 | 63 | # Downsample 64 | 65 | [convolutional] 66 | batch_normalize=1 67 | filters=128 68 | size=3 69 | stride=2 70 | pad=1 71 | activation=leaky 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=64 76 | size=1 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [convolutional] 82 | batch_normalize=1 83 | filters=128 84 | size=3 85 | stride=1 86 | pad=1 87 | activation=leaky 88 | 89 | [shortcut] 90 | from=-3 91 | activation=linear 92 | 93 | [convolutional] 94 | batch_normalize=1 95 | filters=64 96 | size=1 97 | stride=1 98 | pad=1 99 | activation=leaky 100 | 101 | [convolutional] 102 | batch_normalize=1 103 | filters=128 104 | size=3 105 | stride=1 106 | pad=1 107 | activation=leaky 108 | 109 | [shortcut] 110 | from=-3 111 | activation=linear 112 | 113 | # Downsample 114 | 115 | [convolutional] 116 | batch_normalize=1 117 | filters=256 118 | size=3 119 | stride=2 120 | pad=1 121 | activation=leaky 122 | 123 | [convolutional] 124 | batch_normalize=1 125 | filters=128 126 | size=1 127 | stride=1 128 | pad=1 129 | activation=leaky 130 | 131 | [convolutional] 132 | batch_normalize=1 133 | filters=256 134 | size=3 135 | stride=1 136 | pad=1 137 | activation=leaky 138 | 139 | [shortcut] 140 | from=-3 141 | activation=linear 142 | 143 | [convolutional] 144 | batch_normalize=1 145 | filters=128 146 | size=1 147 | stride=1 148 | pad=1 149 | activation=leaky 150 | 151 | [convolutional] 152 | batch_normalize=1 153 | filters=256 154 | size=3 155 | stride=1 156 | pad=1 157 | activation=leaky 158 | 159 | [shortcut] 160 | from=-3 161 | activation=linear 162 | 163 | [convolutional] 164 | batch_normalize=1 165 | filters=128 166 | size=1 167 | stride=1 168 | pad=1 169 | activation=leaky 170 | 171 | [convolutional] 172 | batch_normalize=1 173 | filters=256 174 | size=3 175 | stride=1 176 | pad=1 177 | activation=leaky 178 | 179 | [shortcut] 180 | from=-3 181 | activation=linear 182 | 183 | [convolutional] 184 | batch_normalize=1 185 | filters=128 186 | size=1 187 | stride=1 188 | pad=1 189 | activation=leaky 190 | 191 | [convolutional] 192 | batch_normalize=1 193 | filters=256 194 | size=3 195 | stride=1 196 | pad=1 197 | activation=leaky 198 | 199 | [shortcut] 200 | from=-3 201 | activation=linear 202 | 203 | 204 | [convolutional] 205 | batch_normalize=1 206 | filters=128 207 | size=1 208 | stride=1 209 | pad=1 210 | activation=leaky 211 | 212 | [convolutional] 213 | batch_normalize=1 214 | filters=256 215 | size=3 216 | stride=1 217 | pad=1 218 | activation=leaky 219 | 220 | [shortcut] 221 | from=-3 222 | activation=linear 223 | 224 | [convolutional] 225 | batch_normalize=1 226 | filters=128 227 | size=1 228 | stride=1 229 | pad=1 230 | activation=leaky 231 | 232 | [convolutional] 233 | batch_normalize=1 234 | filters=256 235 | size=3 236 | stride=1 237 | pad=1 238 | activation=leaky 239 | 240 | [shortcut] 241 | from=-3 242 | activation=linear 243 | 244 | [convolutional] 245 | batch_normalize=1 246 | filters=128 247 | size=1 248 | stride=1 249 | pad=1 250 | activation=leaky 251 | 252 | [convolutional] 253 | batch_normalize=1 254 | filters=256 255 | size=3 256 | stride=1 257 | pad=1 258 | activation=leaky 259 | 260 | [shortcut] 261 | from=-3 262 | activation=linear 263 | 264 | [convolutional] 265 | batch_normalize=1 266 | filters=128 267 | size=1 268 | stride=1 269 | pad=1 270 | activation=leaky 271 | 272 | [convolutional] 273 | batch_normalize=1 274 | filters=256 275 | size=3 276 | stride=1 277 | pad=1 278 | activation=leaky 279 | 280 | [shortcut] 281 | from=-3 282 | activation=linear 283 | 284 | # Downsample 285 | 286 | [convolutional] 287 | batch_normalize=1 288 | filters=512 289 | size=3 290 | stride=2 291 | pad=1 292 | activation=leaky 293 | 294 | [convolutional] 295 | batch_normalize=1 296 | filters=256 297 | size=1 298 | stride=1 299 | pad=1 300 | activation=leaky 301 | 302 | [convolutional] 303 | batch_normalize=1 304 | filters=512 305 | size=3 306 | stride=1 307 | pad=1 308 | activation=leaky 309 | 310 | [shortcut] 311 | from=-3 312 | activation=linear 313 | 314 | 315 | [convolutional] 316 | batch_normalize=1 317 | filters=256 318 | size=1 319 | stride=1 320 | pad=1 321 | activation=leaky 322 | 323 | [convolutional] 324 | batch_normalize=1 325 | filters=512 326 | size=3 327 | stride=1 328 | pad=1 329 | activation=leaky 330 | 331 | [shortcut] 332 | from=-3 333 | activation=linear 334 | 335 | 336 | [convolutional] 337 | batch_normalize=1 338 | filters=256 339 | size=1 340 | stride=1 341 | pad=1 342 | activation=leaky 343 | 344 | [convolutional] 345 | batch_normalize=1 346 | filters=512 347 | size=3 348 | stride=1 349 | pad=1 350 | activation=leaky 351 | 352 | [shortcut] 353 | from=-3 354 | activation=linear 355 | 356 | 357 | [convolutional] 358 | batch_normalize=1 359 | filters=256 360 | size=1 361 | stride=1 362 | pad=1 363 | activation=leaky 364 | 365 | [convolutional] 366 | batch_normalize=1 367 | filters=512 368 | size=3 369 | stride=1 370 | pad=1 371 | activation=leaky 372 | 373 | [shortcut] 374 | from=-3 375 | activation=linear 376 | 377 | [convolutional] 378 | batch_normalize=1 379 | filters=256 380 | size=1 381 | stride=1 382 | pad=1 383 | activation=leaky 384 | 385 | [convolutional] 386 | batch_normalize=1 387 | filters=512 388 | size=3 389 | stride=1 390 | pad=1 391 | activation=leaky 392 | 393 | [shortcut] 394 | from=-3 395 | activation=linear 396 | 397 | 398 | [convolutional] 399 | batch_normalize=1 400 | filters=256 401 | size=1 402 | stride=1 403 | pad=1 404 | activation=leaky 405 | 406 | [convolutional] 407 | batch_normalize=1 408 | filters=512 409 | size=3 410 | stride=1 411 | pad=1 412 | activation=leaky 413 | 414 | [shortcut] 415 | from=-3 416 | activation=linear 417 | 418 | 419 | [convolutional] 420 | batch_normalize=1 421 | filters=256 422 | size=1 423 | stride=1 424 | pad=1 425 | activation=leaky 426 | 427 | [convolutional] 428 | batch_normalize=1 429 | filters=512 430 | size=3 431 | stride=1 432 | pad=1 433 | activation=leaky 434 | 435 | [shortcut] 436 | from=-3 437 | activation=linear 438 | 439 | [convolutional] 440 | batch_normalize=1 441 | filters=256 442 | size=1 443 | stride=1 444 | pad=1 445 | activation=leaky 446 | 447 | [convolutional] 448 | batch_normalize=1 449 | filters=512 450 | size=3 451 | stride=1 452 | pad=1 453 | activation=leaky 454 | 455 | [shortcut] 456 | from=-3 457 | activation=linear 458 | 459 | # Downsample 460 | 461 | [convolutional] 462 | batch_normalize=1 463 | filters=1024 464 | size=3 465 | stride=2 466 | pad=1 467 | activation=leaky 468 | 469 | [convolutional] 470 | batch_normalize=1 471 | filters=512 472 | size=1 473 | stride=1 474 | pad=1 475 | activation=leaky 476 | 477 | [convolutional] 478 | batch_normalize=1 479 | filters=1024 480 | size=3 481 | stride=1 482 | pad=1 483 | activation=leaky 484 | 485 | [shortcut] 486 | from=-3 487 | activation=linear 488 | 489 | [convolutional] 490 | batch_normalize=1 491 | filters=512 492 | size=1 493 | stride=1 494 | pad=1 495 | activation=leaky 496 | 497 | [convolutional] 498 | batch_normalize=1 499 | filters=1024 500 | size=3 501 | stride=1 502 | pad=1 503 | activation=leaky 504 | 505 | [shortcut] 506 | from=-3 507 | activation=linear 508 | 509 | [convolutional] 510 | batch_normalize=1 511 | filters=512 512 | size=1 513 | stride=1 514 | pad=1 515 | activation=leaky 516 | 517 | [convolutional] 518 | batch_normalize=1 519 | filters=1024 520 | size=3 521 | stride=1 522 | pad=1 523 | activation=leaky 524 | 525 | [shortcut] 526 | from=-3 527 | activation=linear 528 | 529 | [convolutional] 530 | batch_normalize=1 531 | filters=512 532 | size=1 533 | stride=1 534 | pad=1 535 | activation=leaky 536 | 537 | [convolutional] 538 | batch_normalize=1 539 | filters=1024 540 | size=3 541 | stride=1 542 | pad=1 543 | activation=leaky 544 | 545 | [shortcut] 546 | from=-3 547 | activation=linear 548 | 549 | ###################### 550 | 551 | [convolutional] 552 | batch_normalize=1 553 | filters=512 554 | size=1 555 | stride=1 556 | pad=1 557 | activation=leaky 558 | 559 | [convolutional] 560 | batch_normalize=1 561 | size=3 562 | stride=1 563 | pad=1 564 | filters=1024 565 | activation=leaky 566 | 567 | [convolutional] 568 | batch_normalize=1 569 | filters=512 570 | size=1 571 | stride=1 572 | pad=1 573 | activation=leaky 574 | 575 | [convolutional] 576 | batch_normalize=1 577 | size=3 578 | stride=1 579 | pad=1 580 | filters=1024 581 | activation=leaky 582 | 583 | [convolutional] 584 | batch_normalize=1 585 | filters=512 586 | size=1 587 | stride=1 588 | pad=1 589 | activation=leaky 590 | 591 | [convolutional] 592 | batch_normalize=1 593 | size=3 594 | stride=1 595 | pad=1 596 | filters=1024 597 | activation=leaky 598 | 599 | [convolutional] 600 | size=1 601 | stride=1 602 | pad=1 603 | filters=255 604 | activation=linear 605 | 606 | 607 | [yolo] 608 | mask = 6,7,8 609 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 610 | classes=80 611 | num=9 612 | jitter=.3 613 | ignore_thresh = .7 614 | truth_thresh = 1 615 | random=1 616 | 617 | 618 | [route] 619 | layers = -4 620 | 621 | [convolutional] 622 | batch_normalize=1 623 | filters=256 624 | size=1 625 | stride=1 626 | pad=1 627 | activation=leaky 628 | 629 | [upsample] 630 | stride=2 631 | 632 | [route] 633 | layers = -1, 61 634 | 635 | 636 | 637 | [convolutional] 638 | batch_normalize=1 639 | filters=256 640 | size=1 641 | stride=1 642 | pad=1 643 | activation=leaky 644 | 645 | [convolutional] 646 | batch_normalize=1 647 | size=3 648 | stride=1 649 | pad=1 650 | filters=512 651 | activation=leaky 652 | 653 | [convolutional] 654 | batch_normalize=1 655 | filters=256 656 | size=1 657 | stride=1 658 | pad=1 659 | activation=leaky 660 | 661 | [convolutional] 662 | batch_normalize=1 663 | size=3 664 | stride=1 665 | pad=1 666 | filters=512 667 | activation=leaky 668 | 669 | [convolutional] 670 | batch_normalize=1 671 | filters=256 672 | size=1 673 | stride=1 674 | pad=1 675 | activation=leaky 676 | 677 | [convolutional] 678 | batch_normalize=1 679 | size=3 680 | stride=1 681 | pad=1 682 | filters=512 683 | activation=leaky 684 | 685 | [convolutional] 686 | size=1 687 | stride=1 688 | pad=1 689 | filters=255 690 | activation=linear 691 | 692 | 693 | [yolo] 694 | mask = 3,4,5 695 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 696 | classes=80 697 | num=9 698 | jitter=.3 699 | ignore_thresh = .7 700 | truth_thresh = 1 701 | random=1 702 | 703 | 704 | 705 | [route] 706 | layers = -4 707 | 708 | [convolutional] 709 | batch_normalize=1 710 | filters=128 711 | size=1 712 | stride=1 713 | pad=1 714 | activation=leaky 715 | 716 | [upsample] 717 | stride=2 718 | 719 | [route] 720 | layers = -1, 36 721 | 722 | 723 | 724 | [convolutional] 725 | batch_normalize=1 726 | filters=128 727 | size=1 728 | stride=1 729 | pad=1 730 | activation=leaky 731 | 732 | [convolutional] 733 | batch_normalize=1 734 | size=3 735 | stride=1 736 | pad=1 737 | filters=256 738 | activation=leaky 739 | 740 | [convolutional] 741 | batch_normalize=1 742 | filters=128 743 | size=1 744 | stride=1 745 | pad=1 746 | activation=leaky 747 | 748 | [convolutional] 749 | batch_normalize=1 750 | size=3 751 | stride=1 752 | pad=1 753 | filters=256 754 | activation=leaky 755 | 756 | [convolutional] 757 | batch_normalize=1 758 | filters=128 759 | size=1 760 | stride=1 761 | pad=1 762 | activation=leaky 763 | 764 | [convolutional] 765 | batch_normalize=1 766 | size=3 767 | stride=1 768 | pad=1 769 | filters=256 770 | activation=leaky 771 | 772 | [convolutional] 773 | size=1 774 | stride=1 775 | pad=1 776 | filters=255 777 | activation=linear 778 | 779 | 780 | [yolo] 781 | mask = 0,1,2 782 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 783 | classes=80 784 | num=9 785 | jitter=.3 786 | ignore_thresh = .7 787 | truth_thresh = 1 788 | random=1 789 | -------------------------------------------------------------------------------- /cfg/yolov3s-3a320.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | # batch=1 4 | # subdivisions=1 5 | # Training 6 | batch=64 7 | subdivisions=16 8 | width=608 9 | height=608 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=32 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | # Downsample 34 | 35 | [convolutional] 36 | batch_normalize=1 37 | filters=64 38 | size=3 39 | stride=2 40 | pad=1 41 | activation=leaky 42 | 43 | [convolutional] 44 | batch_normalize=1 45 | filters=32 46 | size=1 47 | stride=1 48 | pad=1 49 | activation=leaky 50 | 51 | [convolutional] 52 | batch_normalize=1 53 | filters=64 54 | size=3 55 | stride=1 56 | pad=1 57 | activation=leaky 58 | 59 | [shortcut] 60 | from=-3 61 | activation=linear 62 | 63 | # Downsample 64 | 65 | [convolutional] 66 | batch_normalize=1 67 | filters=128 68 | size=3 69 | stride=2 70 | pad=1 71 | activation=leaky 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=64 76 | size=1 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [convolutional] 82 | batch_normalize=1 83 | filters=128 84 | size=3 85 | stride=1 86 | pad=1 87 | activation=leaky 88 | 89 | [shortcut] 90 | from=-3 91 | activation=linear 92 | 93 | [convolutional] 94 | batch_normalize=1 95 | filters=64 96 | size=1 97 | stride=1 98 | pad=1 99 | activation=leaky 100 | 101 | [convolutional] 102 | batch_normalize=1 103 | filters=128 104 | size=3 105 | stride=1 106 | pad=1 107 | activation=leaky 108 | 109 | [shortcut] 110 | from=-3 111 | activation=linear 112 | 113 | # Downsample 114 | 115 | [convolutional] 116 | batch_normalize=1 117 | filters=256 118 | size=3 119 | stride=2 120 | pad=1 121 | activation=leaky 122 | 123 | [convolutional] 124 | batch_normalize=1 125 | filters=128 126 | size=1 127 | stride=1 128 | pad=1 129 | activation=leaky 130 | 131 | [convolutional] 132 | batch_normalize=1 133 | filters=256 134 | size=3 135 | stride=1 136 | pad=1 137 | activation=leaky 138 | 139 | [shortcut] 140 | from=-3 141 | activation=linear 142 | 143 | [convolutional] 144 | batch_normalize=1 145 | filters=128 146 | size=1 147 | stride=1 148 | pad=1 149 | activation=leaky 150 | 151 | [convolutional] 152 | batch_normalize=1 153 | filters=256 154 | size=3 155 | stride=1 156 | pad=1 157 | activation=leaky 158 | 159 | [shortcut] 160 | from=-3 161 | activation=linear 162 | 163 | [convolutional] 164 | batch_normalize=1 165 | filters=128 166 | size=1 167 | stride=1 168 | pad=1 169 | activation=leaky 170 | 171 | [convolutional] 172 | batch_normalize=1 173 | filters=256 174 | size=3 175 | stride=1 176 | pad=1 177 | activation=leaky 178 | 179 | [shortcut] 180 | from=-3 181 | activation=linear 182 | 183 | [convolutional] 184 | batch_normalize=1 185 | filters=128 186 | size=1 187 | stride=1 188 | pad=1 189 | activation=leaky 190 | 191 | [convolutional] 192 | batch_normalize=1 193 | filters=256 194 | size=3 195 | stride=1 196 | pad=1 197 | activation=leaky 198 | 199 | [shortcut] 200 | from=-3 201 | activation=linear 202 | 203 | 204 | [convolutional] 205 | batch_normalize=1 206 | filters=128 207 | size=1 208 | stride=1 209 | pad=1 210 | activation=leaky 211 | 212 | [convolutional] 213 | batch_normalize=1 214 | filters=256 215 | size=3 216 | stride=1 217 | pad=1 218 | activation=leaky 219 | 220 | [shortcut] 221 | from=-3 222 | activation=linear 223 | 224 | [convolutional] 225 | batch_normalize=1 226 | filters=128 227 | size=1 228 | stride=1 229 | pad=1 230 | activation=leaky 231 | 232 | [convolutional] 233 | batch_normalize=1 234 | filters=256 235 | size=3 236 | stride=1 237 | pad=1 238 | activation=leaky 239 | 240 | [shortcut] 241 | from=-3 242 | activation=linear 243 | 244 | [convolutional] 245 | batch_normalize=1 246 | filters=128 247 | size=1 248 | stride=1 249 | pad=1 250 | activation=leaky 251 | 252 | [convolutional] 253 | batch_normalize=1 254 | filters=256 255 | size=3 256 | stride=1 257 | pad=1 258 | activation=leaky 259 | 260 | [shortcut] 261 | from=-3 262 | activation=linear 263 | 264 | [convolutional] 265 | batch_normalize=1 266 | filters=128 267 | size=1 268 | stride=1 269 | pad=1 270 | activation=leaky 271 | 272 | [convolutional] 273 | batch_normalize=1 274 | filters=256 275 | size=3 276 | stride=1 277 | pad=1 278 | activation=leaky 279 | 280 | [shortcut] 281 | from=-3 282 | activation=linear 283 | 284 | # Downsample 285 | 286 | [convolutional] 287 | batch_normalize=1 288 | filters=512 289 | size=3 290 | stride=2 291 | pad=1 292 | activation=leaky 293 | 294 | [convolutional] 295 | batch_normalize=1 296 | filters=256 297 | size=1 298 | stride=1 299 | pad=1 300 | activation=leaky 301 | 302 | [convolutional] 303 | batch_normalize=1 304 | filters=512 305 | size=3 306 | stride=1 307 | pad=1 308 | activation=leaky 309 | 310 | [shortcut] 311 | from=-3 312 | activation=linear 313 | 314 | 315 | [convolutional] 316 | batch_normalize=1 317 | filters=256 318 | size=1 319 | stride=1 320 | pad=1 321 | activation=leaky 322 | 323 | [convolutional] 324 | batch_normalize=1 325 | filters=512 326 | size=3 327 | stride=1 328 | pad=1 329 | activation=leaky 330 | 331 | [shortcut] 332 | from=-3 333 | activation=linear 334 | 335 | 336 | [convolutional] 337 | batch_normalize=1 338 | filters=256 339 | size=1 340 | stride=1 341 | pad=1 342 | activation=leaky 343 | 344 | [convolutional] 345 | batch_normalize=1 346 | filters=512 347 | size=3 348 | stride=1 349 | pad=1 350 | activation=leaky 351 | 352 | [shortcut] 353 | from=-3 354 | activation=linear 355 | 356 | 357 | [convolutional] 358 | batch_normalize=1 359 | filters=256 360 | size=1 361 | stride=1 362 | pad=1 363 | activation=leaky 364 | 365 | [convolutional] 366 | batch_normalize=1 367 | filters=512 368 | size=3 369 | stride=1 370 | pad=1 371 | activation=leaky 372 | 373 | [shortcut] 374 | from=-3 375 | activation=linear 376 | 377 | [convolutional] 378 | batch_normalize=1 379 | filters=256 380 | size=1 381 | stride=1 382 | pad=1 383 | activation=leaky 384 | 385 | [convolutional] 386 | batch_normalize=1 387 | filters=512 388 | size=3 389 | stride=1 390 | pad=1 391 | activation=leaky 392 | 393 | [shortcut] 394 | from=-3 395 | activation=linear 396 | 397 | 398 | [convolutional] 399 | batch_normalize=1 400 | filters=256 401 | size=1 402 | stride=1 403 | pad=1 404 | activation=leaky 405 | 406 | [convolutional] 407 | batch_normalize=1 408 | filters=512 409 | size=3 410 | stride=1 411 | pad=1 412 | activation=leaky 413 | 414 | [shortcut] 415 | from=-3 416 | activation=linear 417 | 418 | 419 | [convolutional] 420 | batch_normalize=1 421 | filters=256 422 | size=1 423 | stride=1 424 | pad=1 425 | activation=leaky 426 | 427 | [convolutional] 428 | batch_normalize=1 429 | filters=512 430 | size=3 431 | stride=1 432 | pad=1 433 | activation=leaky 434 | 435 | [shortcut] 436 | from=-3 437 | activation=linear 438 | 439 | [convolutional] 440 | batch_normalize=1 441 | filters=256 442 | size=1 443 | stride=1 444 | pad=1 445 | activation=leaky 446 | 447 | [convolutional] 448 | batch_normalize=1 449 | filters=512 450 | size=3 451 | stride=1 452 | pad=1 453 | activation=leaky 454 | 455 | [shortcut] 456 | from=-3 457 | activation=linear 458 | 459 | # Downsample 460 | 461 | [convolutional] 462 | batch_normalize=1 463 | filters=1024 464 | size=3 465 | stride=2 466 | pad=1 467 | activation=leaky 468 | 469 | [convolutional] 470 | batch_normalize=1 471 | filters=512 472 | size=1 473 | stride=1 474 | pad=1 475 | activation=leaky 476 | 477 | [convolutional] 478 | batch_normalize=1 479 | filters=1024 480 | size=3 481 | stride=1 482 | pad=1 483 | activation=leaky 484 | 485 | [shortcut] 486 | from=-3 487 | activation=linear 488 | 489 | [convolutional] 490 | batch_normalize=1 491 | filters=512 492 | size=1 493 | stride=1 494 | pad=1 495 | activation=leaky 496 | 497 | [convolutional] 498 | batch_normalize=1 499 | filters=1024 500 | size=3 501 | stride=1 502 | pad=1 503 | activation=leaky 504 | 505 | [shortcut] 506 | from=-3 507 | activation=linear 508 | 509 | [convolutional] 510 | batch_normalize=1 511 | filters=512 512 | size=1 513 | stride=1 514 | pad=1 515 | activation=leaky 516 | 517 | [convolutional] 518 | batch_normalize=1 519 | filters=1024 520 | size=3 521 | stride=1 522 | pad=1 523 | activation=leaky 524 | 525 | [shortcut] 526 | from=-3 527 | activation=linear 528 | 529 | [convolutional] 530 | batch_normalize=1 531 | filters=512 532 | size=1 533 | stride=1 534 | pad=1 535 | activation=leaky 536 | 537 | [convolutional] 538 | batch_normalize=1 539 | filters=1024 540 | size=3 541 | stride=1 542 | pad=1 543 | activation=leaky 544 | 545 | [shortcut] 546 | from=-3 547 | activation=linear 548 | 549 | ###################### 550 | 551 | [convolutional] 552 | batch_normalize=1 553 | filters=512 554 | size=1 555 | stride=1 556 | pad=1 557 | activation=leaky 558 | 559 | [convolutional] 560 | batch_normalize=1 561 | size=3 562 | stride=1 563 | pad=1 564 | filters=1024 565 | activation=leaky 566 | 567 | [convolutional] 568 | batch_normalize=1 569 | filters=512 570 | size=1 571 | stride=1 572 | pad=1 573 | activation=leaky 574 | 575 | ### SPP ### 576 | [maxpool] 577 | stride=1 578 | size=5 579 | 580 | [route] 581 | layers=-2 582 | 583 | [maxpool] 584 | stride=1 585 | size=9 586 | 587 | [route] 588 | layers=-4 589 | 590 | [maxpool] 591 | stride=1 592 | size=13 593 | 594 | [route] 595 | layers=-1,-3,-5,-6 596 | 597 | ### End SPP ### 598 | 599 | [convolutional] 600 | batch_normalize=1 601 | filters=512 602 | size=1 603 | stride=1 604 | pad=1 605 | activation=leaky 606 | 607 | 608 | [convolutional] 609 | batch_normalize=1 610 | size=3 611 | stride=1 612 | pad=1 613 | filters=1024 614 | activation=leaky 615 | 616 | [convolutional] 617 | batch_normalize=1 618 | filters=512 619 | size=1 620 | stride=1 621 | pad=1 622 | activation=leaky 623 | 624 | [convolutional] 625 | batch_normalize=1 626 | size=3 627 | stride=1 628 | pad=1 629 | filters=1024 630 | activation=leaky 631 | 632 | [convolutional] 633 | size=1 634 | stride=1 635 | pad=1 636 | filters=85 637 | activation=linear 638 | 639 | 640 | [yolo] 641 | mask = 2 642 | anchors = 16,30, 62,45, 156,198 643 | classes=80 644 | num=3 645 | jitter=.3 646 | ignore_thresh = .7 647 | truth_thresh = 1 648 | random=1 649 | 650 | 651 | [route] 652 | layers = -4 653 | 654 | [convolutional] 655 | batch_normalize=1 656 | filters=256 657 | size=1 658 | stride=1 659 | pad=1 660 | activation=leaky 661 | 662 | [upsample] 663 | stride=2 664 | 665 | [route] 666 | layers = -1, 61 667 | 668 | 669 | 670 | [convolutional] 671 | batch_normalize=1 672 | filters=256 673 | size=1 674 | stride=1 675 | pad=1 676 | activation=leaky 677 | 678 | [convolutional] 679 | batch_normalize=1 680 | size=3 681 | stride=1 682 | pad=1 683 | filters=512 684 | activation=leaky 685 | 686 | [convolutional] 687 | batch_normalize=1 688 | filters=256 689 | size=1 690 | stride=1 691 | pad=1 692 | activation=leaky 693 | 694 | [convolutional] 695 | batch_normalize=1 696 | size=3 697 | stride=1 698 | pad=1 699 | filters=512 700 | activation=leaky 701 | 702 | [convolutional] 703 | batch_normalize=1 704 | filters=256 705 | size=1 706 | stride=1 707 | pad=1 708 | activation=leaky 709 | 710 | [convolutional] 711 | batch_normalize=1 712 | size=3 713 | stride=1 714 | pad=1 715 | filters=512 716 | activation=leaky 717 | 718 | [convolutional] 719 | size=1 720 | stride=1 721 | pad=1 722 | filters=85 723 | activation=linear 724 | 725 | 726 | [yolo] 727 | mask = 1 728 | anchors = 16,30, 62,45, 156,198 729 | classes=80 730 | num=3 731 | jitter=.3 732 | ignore_thresh = .7 733 | truth_thresh = 1 734 | random=1 735 | 736 | 737 | 738 | [route] 739 | layers = -4 740 | 741 | [convolutional] 742 | batch_normalize=1 743 | filters=128 744 | size=1 745 | stride=1 746 | pad=1 747 | activation=leaky 748 | 749 | [upsample] 750 | stride=2 751 | 752 | [route] 753 | layers = -1, 36 754 | 755 | 756 | 757 | [convolutional] 758 | batch_normalize=1 759 | filters=128 760 | size=1 761 | stride=1 762 | pad=1 763 | activation=leaky 764 | 765 | [convolutional] 766 | batch_normalize=1 767 | size=3 768 | stride=1 769 | pad=1 770 | filters=256 771 | activation=leaky 772 | 773 | [convolutional] 774 | batch_normalize=1 775 | filters=128 776 | size=1 777 | stride=1 778 | pad=1 779 | activation=leaky 780 | 781 | [convolutional] 782 | batch_normalize=1 783 | size=3 784 | stride=1 785 | pad=1 786 | filters=256 787 | activation=leaky 788 | 789 | [convolutional] 790 | batch_normalize=1 791 | filters=128 792 | size=1 793 | stride=1 794 | pad=1 795 | activation=leaky 796 | 797 | [convolutional] 798 | batch_normalize=1 799 | size=3 800 | stride=1 801 | pad=1 802 | filters=256 803 | activation=leaky 804 | 805 | [convolutional] 806 | size=1 807 | stride=1 808 | pad=1 809 | filters=85 810 | activation=linear 811 | 812 | 813 | [yolo] 814 | mask = 0 815 | anchors = 16,30, 62,45, 156,198 816 | classes=80 817 | num=3 818 | jitter=.3 819 | ignore_thresh = .7 820 | truth_thresh = 1 821 | random=1 822 | -------------------------------------------------------------------------------- /cfg/yolov4-tiny.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | #batch=1 4 | #subdivisions=1 5 | # Training 6 | batch=64 7 | subdivisions=1 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.00261 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=32 28 | size=3 29 | stride=2 30 | pad=1 31 | activation=leaky 32 | 33 | [convolutional] 34 | batch_normalize=1 35 | filters=64 36 | size=3 37 | stride=2 38 | pad=1 39 | activation=leaky 40 | 41 | [convolutional] 42 | batch_normalize=1 43 | filters=64 44 | size=3 45 | stride=1 46 | pad=1 47 | activation=leaky 48 | 49 | [route] 50 | layers=-1 51 | groups=2 52 | group_id=1 53 | 54 | [convolutional] 55 | batch_normalize=1 56 | filters=32 57 | size=3 58 | stride=1 59 | pad=1 60 | activation=leaky 61 | 62 | [convolutional] 63 | batch_normalize=1 64 | filters=32 65 | size=3 66 | stride=1 67 | pad=1 68 | activation=leaky 69 | 70 | [route] 71 | layers = -1,-2 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=64 76 | size=1 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [route] 82 | layers = -6,-1 83 | 84 | [maxpool] 85 | size=2 86 | stride=2 87 | 88 | [convolutional] 89 | batch_normalize=1 90 | filters=128 91 | size=3 92 | stride=1 93 | pad=1 94 | activation=leaky 95 | 96 | [route] 97 | layers=-1 98 | groups=2 99 | group_id=1 100 | 101 | [convolutional] 102 | batch_normalize=1 103 | filters=64 104 | size=3 105 | stride=1 106 | pad=1 107 | activation=leaky 108 | 109 | [convolutional] 110 | batch_normalize=1 111 | filters=64 112 | size=3 113 | stride=1 114 | pad=1 115 | activation=leaky 116 | 117 | [route] 118 | layers = -1,-2 119 | 120 | [convolutional] 121 | batch_normalize=1 122 | filters=128 123 | size=1 124 | stride=1 125 | pad=1 126 | activation=leaky 127 | 128 | [route] 129 | layers = -6,-1 130 | 131 | [maxpool] 132 | size=2 133 | stride=2 134 | 135 | [convolutional] 136 | batch_normalize=1 137 | filters=256 138 | size=3 139 | stride=1 140 | pad=1 141 | activation=leaky 142 | 143 | [route] 144 | layers=-1 145 | groups=2 146 | group_id=1 147 | 148 | [convolutional] 149 | batch_normalize=1 150 | filters=128 151 | size=3 152 | stride=1 153 | pad=1 154 | activation=leaky 155 | 156 | [convolutional] 157 | batch_normalize=1 158 | filters=128 159 | size=3 160 | stride=1 161 | pad=1 162 | activation=leaky 163 | 164 | [route] 165 | layers = -1,-2 166 | 167 | [convolutional] 168 | batch_normalize=1 169 | filters=256 170 | size=1 171 | stride=1 172 | pad=1 173 | activation=leaky 174 | 175 | [route] 176 | layers = -6,-1 177 | 178 | [maxpool] 179 | size=2 180 | stride=2 181 | 182 | [convolutional] 183 | batch_normalize=1 184 | filters=512 185 | size=3 186 | stride=1 187 | pad=1 188 | activation=leaky 189 | 190 | ################################## 191 | 192 | [convolutional] 193 | batch_normalize=1 194 | filters=256 195 | size=1 196 | stride=1 197 | pad=1 198 | activation=leaky 199 | 200 | [convolutional] 201 | batch_normalize=1 202 | filters=512 203 | size=3 204 | stride=1 205 | pad=1 206 | activation=leaky 207 | 208 | [convolutional] 209 | size=1 210 | stride=1 211 | pad=1 212 | filters=255 213 | activation=linear 214 | 215 | 216 | 217 | [yolo] 218 | mask = 3,4,5 219 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 220 | classes=80 221 | num=6 222 | jitter=.3 223 | scale_x_y = 1.05 224 | cls_normalizer=1.0 225 | iou_normalizer=0.07 226 | iou_loss=ciou 227 | ignore_thresh = .7 228 | truth_thresh = 1 229 | random=0 230 | resize=1.5 231 | nms_kind=greedynms 232 | beta_nms=0.6 233 | 234 | [route] 235 | layers = -4 236 | 237 | [convolutional] 238 | batch_normalize=1 239 | filters=128 240 | size=1 241 | stride=1 242 | pad=1 243 | activation=leaky 244 | 245 | [upsample] 246 | stride=2 247 | 248 | [route] 249 | layers = -1, 23 250 | 251 | [convolutional] 252 | batch_normalize=1 253 | filters=256 254 | size=3 255 | stride=1 256 | pad=1 257 | activation=leaky 258 | 259 | [convolutional] 260 | size=1 261 | stride=1 262 | pad=1 263 | filters=255 264 | activation=linear 265 | 266 | [yolo] 267 | mask = 1,2,3 268 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 269 | classes=80 270 | num=6 271 | jitter=.3 272 | scale_x_y = 1.05 273 | cls_normalizer=1.0 274 | iou_normalizer=0.07 275 | iou_loss=ciou 276 | ignore_thresh = .7 277 | truth_thresh = 1 278 | random=0 279 | resize=1.5 280 | nms_kind=greedynms 281 | beta_nms=0.6 -------------------------------------------------------------------------------- /data/coco.data: -------------------------------------------------------------------------------- 1 | classes=80 2 | train=../coco/trainvalno5k.txt 3 | valid=../coco/5k.txt 4 | names=data/coco.names 5 | backup=backup/ 6 | eval=coco 7 | -------------------------------------------------------------------------------- /data/coco.names: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorcycle 5 | airplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | couch 59 | potted plant 60 | bed 61 | dining table 62 | toilet 63 | tv 64 | laptop 65 | mouse 66 | remote 67 | keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush 81 | -------------------------------------------------------------------------------- /data/coco_1000img.data: -------------------------------------------------------------------------------- 1 | classes=80 2 | train=./data/coco_1000img.txt 3 | valid=./data/coco_1000img.txt 4 | names=data/coco.names 5 | backup=backup/ 6 | eval=coco 7 | -------------------------------------------------------------------------------- /data/coco_1000val.data: -------------------------------------------------------------------------------- 1 | classes=80 2 | train=./data/coco_1000img.txt 3 | valid=./data/coco_1000val.txt 4 | names=data/coco.names 5 | backup=backup/ 6 | eval=coco 7 | -------------------------------------------------------------------------------- /data/coco_16img.data: -------------------------------------------------------------------------------- 1 | classes=80 2 | train=./data/coco_16img.txt 3 | valid=./data/coco_16img.txt 4 | names=data/coco.names 5 | backup=backup/ 6 | eval=coco 7 | -------------------------------------------------------------------------------- /data/coco_16img.txt: -------------------------------------------------------------------------------- 1 | ../coco/images/train2014/COCO_train2014_000000000009.jpg 2 | ../coco/images/train2014/COCO_train2014_000000000025.jpg 3 | ../coco/images/train2014/COCO_train2014_000000000030.jpg 4 | ../coco/images/train2014/COCO_train2014_000000000034.jpg 5 | ../coco/images/train2014/COCO_train2014_000000000036.jpg 6 | ../coco/images/train2014/COCO_train2014_000000000049.jpg 7 | ../coco/images/train2014/COCO_train2014_000000000061.jpg 8 | ../coco/images/train2014/COCO_train2014_000000000064.jpg 9 | ../coco/images/train2014/COCO_train2014_000000000071.jpg 10 | ../coco/images/train2014/COCO_train2014_000000000072.jpg 11 | ../coco/images/train2014/COCO_train2014_000000000077.jpg 12 | ../coco/images/train2014/COCO_train2014_000000000078.jpg 13 | ../coco/images/train2014/COCO_train2014_000000000081.jpg 14 | ../coco/images/train2014/COCO_train2014_000000000086.jpg 15 | ../coco/images/train2014/COCO_train2014_000000000089.jpg 16 | ../coco/images/train2014/COCO_train2014_000000000092.jpg 17 | -------------------------------------------------------------------------------- /data/coco_1cls.data: -------------------------------------------------------------------------------- 1 | classes=1 2 | train=./data/coco_1cls.txt 3 | valid=./data/coco_1cls.txt 4 | names=data/coco.names 5 | backup=backup/ 6 | eval=coco 7 | -------------------------------------------------------------------------------- /data/coco_1cls.txt: -------------------------------------------------------------------------------- 1 | ../coco/images/val2014/COCO_val2014_000000013992.jpg 2 | ../coco/images/val2014/COCO_val2014_000000047226.jpg 3 | ../coco/images/val2014/COCO_val2014_000000050324.jpg 4 | ../coco/images/val2014/COCO_val2014_000000121497.jpg 5 | ../coco/images/val2014/COCO_val2014_000000001464.jpg 6 | -------------------------------------------------------------------------------- /data/coco_1img.data: -------------------------------------------------------------------------------- 1 | classes=80 2 | train=./data/coco_1img.txt 3 | valid=./data/coco_1img.txt 4 | names=data/coco.names 5 | backup=backup/ 6 | eval=coco 7 | -------------------------------------------------------------------------------- /data/coco_1img.txt: -------------------------------------------------------------------------------- 1 | ../coco/images/val2014/COCO_val2014_000000581886.jpg 2 | -------------------------------------------------------------------------------- /data/coco_1k5k.data: -------------------------------------------------------------------------------- 1 | classes=80 2 | train=./data/coco_1000img.txt 3 | valid=./data/5k.txt 4 | names=data/coco.names 5 | backup=backup/ 6 | eval=coco 7 | -------------------------------------------------------------------------------- /data/coco_32img.data: -------------------------------------------------------------------------------- 1 | classes=80 2 | train=./data/coco_32img.txt 3 | valid=./data/coco_32img.txt 4 | names=data/coco.names 5 | backup=backup/ 6 | eval=coco 7 | -------------------------------------------------------------------------------- /data/coco_32img.txt: -------------------------------------------------------------------------------- 1 | ../coco/images/train2014/COCO_train2014_000000000009.jpg 2 | ../coco/images/train2014/COCO_train2014_000000000025.jpg 3 | ../coco/images/train2014/COCO_train2014_000000000030.jpg 4 | ../coco/images/train2014/COCO_train2014_000000000034.jpg 5 | ../coco/images/train2014/COCO_train2014_000000000036.jpg 6 | ../coco/images/train2014/COCO_train2014_000000000049.jpg 7 | ../coco/images/train2014/COCO_train2014_000000000061.jpg 8 | ../coco/images/train2014/COCO_train2014_000000000064.jpg 9 | ../coco/images/train2014/COCO_train2014_000000000071.jpg 10 | ../coco/images/train2014/COCO_train2014_000000000072.jpg 11 | ../coco/images/train2014/COCO_train2014_000000000077.jpg 12 | ../coco/images/train2014/COCO_train2014_000000000078.jpg 13 | ../coco/images/train2014/COCO_train2014_000000000081.jpg 14 | ../coco/images/train2014/COCO_train2014_000000000086.jpg 15 | ../coco/images/train2014/COCO_train2014_000000000089.jpg 16 | ../coco/images/train2014/COCO_train2014_000000000092.jpg 17 | ../coco/images/train2014/COCO_train2014_000000000094.jpg 18 | ../coco/images/train2014/COCO_train2014_000000000109.jpg 19 | ../coco/images/train2014/COCO_train2014_000000000110.jpg 20 | ../coco/images/train2014/COCO_train2014_000000000113.jpg 21 | ../coco/images/train2014/COCO_train2014_000000000127.jpg 22 | ../coco/images/train2014/COCO_train2014_000000000138.jpg 23 | ../coco/images/train2014/COCO_train2014_000000000142.jpg 24 | ../coco/images/train2014/COCO_train2014_000000000144.jpg 25 | ../coco/images/train2014/COCO_train2014_000000000149.jpg 26 | ../coco/images/train2014/COCO_train2014_000000000151.jpg 27 | ../coco/images/train2014/COCO_train2014_000000000154.jpg 28 | ../coco/images/train2014/COCO_train2014_000000000165.jpg 29 | ../coco/images/train2014/COCO_train2014_000000000194.jpg 30 | ../coco/images/train2014/COCO_train2014_000000000201.jpg 31 | ../coco/images/train2014/COCO_train2014_000000000247.jpg 32 | ../coco/images/train2014/COCO_train2014_000000000260.jpg 33 | -------------------------------------------------------------------------------- /data/coco_500val.data: -------------------------------------------------------------------------------- 1 | classes=80 2 | train=./data/coco_500img.txt 3 | valid=./data/coco_500val.txt 4 | names=data/coco.names 5 | backup=backup/ 6 | eval=coco 7 | -------------------------------------------------------------------------------- /data/coco_64img.data: -------------------------------------------------------------------------------- 1 | classes=80 2 | train=./data/coco_64img.txt 3 | valid=./data/coco_64img.txt 4 | names=data/coco.names 5 | backup=backup/ 6 | eval=coco 7 | -------------------------------------------------------------------------------- /data/coco_64img.shapes: -------------------------------------------------------------------------------- 1 | 640 480 2 | 640 426 3 | 640 428 4 | 640 425 5 | 481 640 6 | 381 500 7 | 640 488 8 | 480 640 9 | 640 426 10 | 427 640 11 | 500 375 12 | 612 612 13 | 640 425 14 | 512 640 15 | 640 480 16 | 640 427 17 | 640 427 18 | 640 416 19 | 640 480 20 | 416 640 21 | 640 481 22 | 640 573 23 | 480 640 24 | 640 480 25 | 640 428 26 | 480 640 27 | 427 640 28 | 640 536 29 | 640 480 30 | 640 428 31 | 640 424 32 | 500 333 33 | 591 640 34 | 640 480 35 | 640 426 36 | 600 600 37 | 640 427 38 | 640 427 39 | 640 480 40 | 640 481 41 | 640 427 42 | 640 480 43 | 640 480 44 | 480 640 45 | 480 640 46 | 640 480 47 | 446 640 48 | 640 480 49 | 640 611 50 | 426 640 51 | 640 480 52 | 640 389 53 | 427 640 54 | 640 480 55 | 640 480 56 | 480 640 57 | 640 480 58 | 640 427 59 | 500 495 60 | 500 313 61 | 640 480 62 | 360 640 63 | 427 640 64 | 640 480 65 | -------------------------------------------------------------------------------- /data/coco_64img.txt: -------------------------------------------------------------------------------- 1 | ../coco/images/train2014/COCO_train2014_000000000009.jpg 2 | ../coco/images/train2014/COCO_train2014_000000000025.jpg 3 | ../coco/images/train2014/COCO_train2014_000000000030.jpg 4 | ../coco/images/train2014/COCO_train2014_000000000034.jpg 5 | ../coco/images/train2014/COCO_train2014_000000000036.jpg 6 | ../coco/images/train2014/COCO_train2014_000000000049.jpg 7 | ../coco/images/train2014/COCO_train2014_000000000061.jpg 8 | ../coco/images/train2014/COCO_train2014_000000000064.jpg 9 | ../coco/images/train2014/COCO_train2014_000000000071.jpg 10 | ../coco/images/train2014/COCO_train2014_000000000072.jpg 11 | ../coco/images/train2014/COCO_train2014_000000000077.jpg 12 | ../coco/images/train2014/COCO_train2014_000000000078.jpg 13 | ../coco/images/train2014/COCO_train2014_000000000081.jpg 14 | ../coco/images/train2014/COCO_train2014_000000000086.jpg 15 | ../coco/images/train2014/COCO_train2014_000000000089.jpg 16 | ../coco/images/train2014/COCO_train2014_000000000092.jpg 17 | ../coco/images/train2014/COCO_train2014_000000000094.jpg 18 | ../coco/images/train2014/COCO_train2014_000000000109.jpg 19 | ../coco/images/train2014/COCO_train2014_000000000110.jpg 20 | ../coco/images/train2014/COCO_train2014_000000000113.jpg 21 | ../coco/images/train2014/COCO_train2014_000000000127.jpg 22 | ../coco/images/train2014/COCO_train2014_000000000138.jpg 23 | ../coco/images/train2014/COCO_train2014_000000000142.jpg 24 | ../coco/images/train2014/COCO_train2014_000000000144.jpg 25 | ../coco/images/train2014/COCO_train2014_000000000149.jpg 26 | ../coco/images/train2014/COCO_train2014_000000000151.jpg 27 | ../coco/images/train2014/COCO_train2014_000000000154.jpg 28 | ../coco/images/train2014/COCO_train2014_000000000165.jpg 29 | ../coco/images/train2014/COCO_train2014_000000000194.jpg 30 | ../coco/images/train2014/COCO_train2014_000000000201.jpg 31 | ../coco/images/train2014/COCO_train2014_000000000247.jpg 32 | ../coco/images/train2014/COCO_train2014_000000000260.jpg 33 | ../coco/images/train2014/COCO_train2014_000000000263.jpg 34 | ../coco/images/train2014/COCO_train2014_000000000307.jpg 35 | ../coco/images/train2014/COCO_train2014_000000000308.jpg 36 | ../coco/images/train2014/COCO_train2014_000000000309.jpg 37 | ../coco/images/train2014/COCO_train2014_000000000312.jpg 38 | ../coco/images/train2014/COCO_train2014_000000000315.jpg 39 | ../coco/images/train2014/COCO_train2014_000000000321.jpg 40 | ../coco/images/train2014/COCO_train2014_000000000322.jpg 41 | ../coco/images/train2014/COCO_train2014_000000000326.jpg 42 | ../coco/images/train2014/COCO_train2014_000000000332.jpg 43 | ../coco/images/train2014/COCO_train2014_000000000349.jpg 44 | ../coco/images/train2014/COCO_train2014_000000000368.jpg 45 | ../coco/images/train2014/COCO_train2014_000000000370.jpg 46 | ../coco/images/train2014/COCO_train2014_000000000382.jpg 47 | ../coco/images/train2014/COCO_train2014_000000000384.jpg 48 | ../coco/images/train2014/COCO_train2014_000000000389.jpg 49 | ../coco/images/train2014/COCO_train2014_000000000394.jpg 50 | ../coco/images/train2014/COCO_train2014_000000000404.jpg 51 | ../coco/images/train2014/COCO_train2014_000000000419.jpg 52 | ../coco/images/train2014/COCO_train2014_000000000431.jpg 53 | ../coco/images/train2014/COCO_train2014_000000000436.jpg 54 | ../coco/images/train2014/COCO_train2014_000000000438.jpg 55 | ../coco/images/train2014/COCO_train2014_000000000443.jpg 56 | ../coco/images/train2014/COCO_train2014_000000000446.jpg 57 | ../coco/images/train2014/COCO_train2014_000000000450.jpg 58 | ../coco/images/train2014/COCO_train2014_000000000471.jpg 59 | ../coco/images/train2014/COCO_train2014_000000000490.jpg 60 | ../coco/images/train2014/COCO_train2014_000000000491.jpg 61 | ../coco/images/train2014/COCO_train2014_000000000510.jpg 62 | ../coco/images/train2014/COCO_train2014_000000000514.jpg 63 | ../coco/images/train2014/COCO_train2014_000000000529.jpg 64 | ../coco/images/train2014/COCO_train2014_000000000531.jpg 65 | -------------------------------------------------------------------------------- /data/coco_paper.names: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorcycle 5 | airplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | street sign 13 | stop sign 14 | parking meter 15 | bench 16 | bird 17 | cat 18 | dog 19 | horse 20 | sheep 21 | cow 22 | elephant 23 | bear 24 | zebra 25 | giraffe 26 | hat 27 | backpack 28 | umbrella 29 | shoe 30 | eye glasses 31 | handbag 32 | tie 33 | suitcase 34 | frisbee 35 | skis 36 | snowboard 37 | sports ball 38 | kite 39 | baseball bat 40 | baseball glove 41 | skateboard 42 | surfboard 43 | tennis racket 44 | bottle 45 | plate 46 | wine glass 47 | cup 48 | fork 49 | knife 50 | spoon 51 | bowl 52 | banana 53 | apple 54 | sandwich 55 | orange 56 | broccoli 57 | carrot 58 | hot dog 59 | pizza 60 | donut 61 | cake 62 | chair 63 | couch 64 | potted plant 65 | bed 66 | mirror 67 | dining table 68 | window 69 | desk 70 | toilet 71 | door 72 | tv 73 | laptop 74 | mouse 75 | remote 76 | keyboard 77 | cell phone 78 | microwave 79 | oven 80 | toaster 81 | sink 82 | refrigerator 83 | blender 84 | book 85 | clock 86 | vase 87 | scissors 88 | teddy bear 89 | hair drier 90 | toothbrush 91 | hair brush -------------------------------------------------------------------------------- /data/converter.py: -------------------------------------------------------------------------------- 1 | import scipy.io as sio 2 | from PIL import Image 3 | import os, glob 4 | import datetime 5 | import shutil 6 | 7 | running_from_path = os.getcwd() 8 | created_images_dir = 'images' 9 | created_labels_dir = 'labels' 10 | data_dir = 'data' # data_dir为脚本所在的文件夹 11 | 12 | def hms_string(sec_elapsed): # 格式化显示已消耗时间 13 | h = int(sec_elapsed / (60 * 60)) 14 | m = int((sec_elapsed % (60 * 60)) / 60) 15 | s = sec_elapsed % 60. 16 | return "{}:{:>02}:{:>05.2f}".format(h, m, s) 17 | 18 | def generate_dir(set_name, root_path): # 往images和labels文件夹下生成相应的文件夹 19 | images_dir = os.path.join(root_path, 'images') 20 | annotation_dir = os.path.join(root_path, 'annotations') 21 | 22 | new_images_dir = os.path.join(created_images_dir, set_name) # 将图片从原来的文件夹复制到该文件夹下 23 | new_annotation_dir = os.path.join(created_labels_dir, set_name) 24 | 25 | if not os.path.exists(new_images_dir): 26 | os.makedirs(new_images_dir) 27 | 28 | if not os.path.exists(new_annotation_dir): 29 | os.makedirs(new_annotation_dir) 30 | 31 | for img in glob.glob(os.path.join(images_dir, "*.jpg")): # 将图片从原来的文件夹复制到新文件夹下 32 | shutil.copy(img, new_images_dir) 33 | 34 | os.chdir(annotation_dir) # 切换到annotation的路径下 35 | matlab_annotations = glob.glob("*.mat") # 仅仅包含文件名,不包含路径 36 | os.chdir(running_from_path) # 切换回原来的路径 37 | 38 | for matfile in matlab_annotations: 39 | filename = matfile.split(".")[0] 40 | 41 | pil_image = Image.open(os.path.join(images_dir, filename+".jpg")) 42 | 43 | content = sio.loadmat(os.path.join(annotation_dir, matfile), matlab_compatible=False) 44 | 45 | boxes = content["boxes"] 46 | 47 | width, height = pil_image.size 48 | 49 | with open(os.path.join(new_annotation_dir, filename+".txt"), "w") as hs: 50 | for box_idx, box in enumerate(boxes.T): 51 | a = box[0][0][0][0] 52 | b = box[0][0][0][1] 53 | c = box[0][0][0][2] 54 | d = box[0][0][0][3] 55 | 56 | aXY = (a[0][1], a[0][0]) 57 | bXY = (b[0][1], b[0][0]) 58 | cXY = (c[0][1], c[0][0]) 59 | dXY = (d[0][1], d[0][0]) 60 | 61 | maxX = max(aXY[0], bXY[0], cXY[0], dXY[0]) 62 | minX = min(aXY[0], bXY[0], cXY[0], dXY[0]) 63 | maxY = max(aXY[1], bXY[1], cXY[1], dXY[1]) 64 | minY = min(aXY[1], bXY[1], cXY[1], dXY[1]) 65 | 66 | # clip,防止超出边界 67 | maxX = min(maxX, width-1) 68 | minX = max(minX, 0) 69 | maxY = min(maxY, height-1) 70 | minY = max(minY, 0) 71 | 72 | # ( / ) 73 | norm_width = (maxX - minX) / width 74 | 75 | # ( / ) 76 | norm_height = (maxY - minY) / height 77 | 78 | center_x, center_y = (maxX + minX) / 2, (maxY + minY) / 2 79 | 80 | norm_center_x = center_x / width 81 | norm_center_y = center_y / height 82 | 83 | if box_idx != 0: 84 | hs.write("\n") 85 | 86 | hs.write("0 %f %f %f %f" % (norm_center_x, norm_center_y, norm_width, norm_height)) # 0表示类别 87 | 88 | def create_txt(dirlist, filename): 89 | with open(filename, "w") as txtfile: # 在data文件夹下生成txt文件 90 | imglist = [] 91 | 92 | for dir in dirlist: # dir='images/test' 93 | imglist.extend(glob.glob(os.path.join(dir, "*.jpg"))) # img='images/test/abc.jpg' 94 | 95 | for idx, img in enumerate(imglist): 96 | if idx != 0: 97 | txtfile.write("\n") 98 | txtfile.write(os.path.join(data_dir, img)) # 加上前缀data 99 | 100 | if __name__ == '__main__': 101 | start_time = datetime.datetime.now() 102 | 103 | generate_dir("train", "hand_dataset/training_dataset/training_data") # 第一个参数表示生成的文件夹的名称 104 | generate_dir("test", "hand_dataset/test_dataset/test_data") 105 | generate_dir("validation", "hand_dataset/validation_dataset/validation_data") 106 | 107 | create_txt((os.path.join(created_images_dir, 'train'), # 将train和validation文件夹下的图片合并成train 108 | os.path.join(created_images_dir, 'validation')), 109 | 'train.txt') 110 | create_txt((os.path.join(created_images_dir, 'test'), ), 111 | 'valid.txt') 112 | 113 | end_time = datetime.datetime.now() 114 | seconds_elapsed = (end_time - start_time).total_seconds() 115 | print("It took {} to execute this".format(hms_string(seconds_elapsed))) -------------------------------------------------------------------------------- /data/get_coco_dataset.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # CREDIT: https://github.com/pjreddie/darknet/tree/master/scripts/get_coco_dataset.sh 3 | 4 | # Clone COCO API 5 | git clone https://github.com/pdollar/coco && cd coco 6 | 7 | # Download Images 8 | mkdir images && cd images 9 | wget -c https://pjreddie.com/media/files/train2014.zip 10 | wget -c https://pjreddie.com/media/files/val2014.zip 11 | 12 | # Unzip 13 | unzip -q train2014.zip 14 | unzip -q val2014.zip 15 | 16 | # (optional) Delete zip files 17 | rm -rf *.zip 18 | 19 | cd .. 20 | 21 | # Download COCO Metadata 22 | wget -c https://pjreddie.com/media/files/instances_train-val2014.zip 23 | wget -c https://pjreddie.com/media/files/coco/5k.part 24 | wget -c https://pjreddie.com/media/files/coco/trainvalno5k.part 25 | wget -c https://pjreddie.com/media/files/coco/labels.tgz 26 | tar xzf labels.tgz 27 | unzip -q instances_train-val2014.zip 28 | 29 | # Set Up Image Lists 30 | paste <(awk "{print \"$PWD\"}" <5k.part) 5k.part | tr -d '\t' > 5k.txt 31 | paste <(awk "{print \"$PWD\"}" trainvalno5k.txt 32 | 33 | # get xview training data 34 | # wget -O train_images.tgz 'https://d307kc0mrhucc3.cloudfront.net/train_images.tgz?Expires=1530124049&Signature=JrQoxipmsETvb7eQHCfDFUO-QEHJGAayUv0i-ParmS-1hn7hl9D~bzGuHWG82imEbZSLUARTtm0wOJ7EmYMGmG5PtLKz9H5qi6DjoSUuFc13NQ-~6yUhE~NfPaTnehUdUMCa3On2wl1h1ZtRG~0Jq1P-AJbpe~oQxbyBrs1KccaMa7FK4F4oMM6sMnNgoXx8-3O77kYw~uOpTMFmTaQdHln6EztW0Lx17i57kK3ogbSUpXgaUTqjHCRA1dWIl7PY1ngQnLslkLhZqmKcaL-BvWf0ZGjHxCDQBpnUjIlvMu5NasegkwD9Jjc0ClgTxsttSkmbapVqaVC8peR0pO619Q__&Key-Pair-Id=APKAIKGDJB5C3XUL2DXQ' 35 | # tar -xvzf train_images.tgz 36 | # sudo rm -rf train_images/._* 37 | # lastly convert each .tif to a .bmp for faster loading in cv2 38 | 39 | # ./coco/images/train2014/COCO_train2014_000000167126.jpg # corrupted image 40 | -------------------------------------------------------------------------------- /data/get_coco_dataset_gdrive.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # https://stackoverflow.com/questions/48133080/how-to-download-a-google-drive-url-via-curl-or-wget/48133859 3 | 4 | # Zip coco folder 5 | # zip -r coco.zip coco 6 | # tar -czvf coco.tar.gz coco 7 | 8 | # Set fileid and filename 9 | filename="coco.zip" 10 | fileid="1HaXkef9z6y5l4vUnCYgdmEAj61c6bfWO" # coco.zip 11 | 12 | # Download from Google Drive, accepting presented query 13 | curl -c ./cookie -s -L "https://drive.google.com/uc?export=download&id=${fileid}" > /dev/null 14 | curl -Lb ./cookie "https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=${fileid}" -o ${filename} 15 | rm ./cookie 16 | 17 | # Unzip 18 | unzip -q ${filename} # for coco.zip 19 | # tar -xzf ${filename} # for coco.tar.gz 20 | -------------------------------------------------------------------------------- /data/hand.data: -------------------------------------------------------------------------------- 1 | classes=1 2 | train=D:/dl/YOLOv3-model-pruning/data/train_.txt 3 | valid=D:/dl/YOLOv3-model-pruning/data/valid_.txt 4 | names=D:/dl/YOLOv3-model-pruning/data/oxfordhand.names 5 | backup=backup/ 6 | eval=coco 7 | -------------------------------------------------------------------------------- /data/img/1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanluren/yolov3-channel-and-layer-pruning/9220f301ed2fea90b0ce3e179f825dba46e7aace/data/img/1.jpg -------------------------------------------------------------------------------- /data/img/2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanluren/yolov3-channel-and-layer-pruning/9220f301ed2fea90b0ce3e179f825dba46e7aace/data/img/2.jpg -------------------------------------------------------------------------------- /data/img/baseline_and_sparse.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanluren/yolov3-channel-and-layer-pruning/9220f301ed2fea90b0ce3e179f825dba46e7aace/data/img/baseline_and_sparse.jpg -------------------------------------------------------------------------------- /data/img/bn.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanluren/yolov3-channel-and-layer-pruning/9220f301ed2fea90b0ce3e179f825dba46e7aace/data/img/bn.jpg -------------------------------------------------------------------------------- /data/img/finetune_and_bn.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanluren/yolov3-channel-and-layer-pruning/9220f301ed2fea90b0ce3e179f825dba46e7aace/data/img/finetune_and_bn.jpg -------------------------------------------------------------------------------- /data/img/prune9316.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanluren/yolov3-channel-and-layer-pruning/9220f301ed2fea90b0ce3e179f825dba46e7aace/data/img/prune9316.png -------------------------------------------------------------------------------- /data/oxfordhand.data: -------------------------------------------------------------------------------- 1 | classes= 1 2 | train=data/train.txt 3 | valid=data/valid.txt 4 | names=data/oxfordhand.names 5 | -------------------------------------------------------------------------------- /data/oxfordhand.names: -------------------------------------------------------------------------------- 1 | hand 2 | 3 | -------------------------------------------------------------------------------- /data/samples/bus.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanluren/yolov3-channel-and-layer-pruning/9220f301ed2fea90b0ce3e179f825dba46e7aace/data/samples/bus.jpg -------------------------------------------------------------------------------- /data/samples/zidane.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tanluren/yolov3-channel-and-layer-pruning/9220f301ed2fea90b0ce3e179f825dba46e7aace/data/samples/zidane.jpg -------------------------------------------------------------------------------- /data/valid_.shapes: -------------------------------------------------------------------------------- 1 | 500 375 2 | 500 375 3 | 500 375 4 | 500 375 5 | 500 375 6 | 500 375 7 | 375 500 8 | 500 333 9 | 333 500 10 | 500 375 11 | 500 434 12 | 500 375 13 | 333 500 14 | 500 375 15 | 500 331 16 | 500 375 17 | 500 375 18 | 500 374 19 | 500 375 20 | 500 375 21 | 375 500 22 | 500 333 23 | 500 333 24 | 500 375 25 | 500 332 26 | 500 486 27 | 500 375 28 | 500 375 29 | 375 500 30 | 375 500 31 | 500 375 32 | 500 375 33 | 500 375 34 | 355 500 35 | 375 500 36 | 500 333 37 | 500 375 38 | 500 377 39 | 375 500 40 | 500 375 41 | 500 375 42 | 500 375 43 | 500 375 44 | 333 500 45 | 500 375 46 | 500 333 47 | 500 346 48 | 500 375 49 | 476 500 50 | 500 333 51 | 500 420 52 | 500 333 53 | 500 333 54 | 500 333 55 | 333 500 56 | 333 500 57 | 375 500 58 | 500 379 59 | 500 375 60 | 500 375 61 | 500 357 62 | 375 500 63 | 500 393 64 | 333 500 65 | 500 375 66 | 500 375 67 | 500 333 68 | 333 500 69 | 327 500 70 | 500 375 71 | 500 375 72 | 500 345 73 | 333 500 74 | 375 500 75 | 500 380 76 | 500 375 77 | 487 377 78 | 500 375 79 | 500 333 80 | 500 333 81 | 333 500 82 | 500 375 83 | 375 500 84 | 500 375 85 | 500 375 86 | 500 375 87 | 375 500 88 | 500 375 89 | 500 332 90 | 333 500 91 | 480 360 92 | 500 334 93 | 500 375 94 | 500 375 95 | 333 500 96 | 500 333 97 | 375 500 98 | 500 375 99 | 500 375 100 | 500 375 101 | 500 375 102 | 500 375 103 | 332 500 104 | 500 375 105 | 500 375 106 | 375 500 107 | 500 333 108 | 500 331 109 | 500 375 110 | 333 500 111 | 333 500 112 | 486 500 113 | 500 375 114 | 375 500 115 | 356 500 116 | 500 375 117 | 375 500 118 | 500 375 119 | 500 375 120 | 268 400 121 | 389 500 122 | 333 500 123 | 500 375 124 | 500 375 125 | 500 367 126 | 500 375 127 | 500 375 128 | 500 334 129 | 495 500 130 | 319 480 131 | 500 375 132 | 500 375 133 | 333 500 134 | 500 375 135 | 500 375 136 | 500 375 137 | 326 500 138 | 500 375 139 | 500 375 140 | 500 375 141 | 500 400 142 | 332 500 143 | 500 375 144 | 500 375 145 | 360 331 146 | 333 500 147 | 500 332 148 | 500 374 149 | 500 375 150 | 375 500 151 | 500 375 152 | 500 375 153 | 500 375 154 | 500 367 155 | 500 375 156 | 500 375 157 | 500 375 158 | 375 500 159 | 500 375 160 | 500 375 161 | 500 390 162 | 500 358 163 | 500 397 164 | 500 341 165 | 375 500 166 | 500 333 167 | 500 375 168 | 332 500 169 | 500 375 170 | 500 375 171 | 500 375 172 | 375 500 173 | 240 320 174 | 450 480 175 | 417 500 176 | 500 400 177 | 500 375 178 | 500 411 179 | 338 500 180 | 500 375 181 | 500 375 182 | 379 500 183 | 500 375 184 | 333 500 185 | 500 332 186 | 500 375 187 | 500 375 188 | 500 375 189 | 500 375 190 | 332 500 191 | 469 500 192 | 500 375 193 | 333 500 194 | 500 375 195 | 500 375 196 | 500 376 197 | 500 375 198 | 500 334 199 | 500 375 200 | 500 375 201 | 500 341 202 | 500 333 203 | 500 375 204 | 500 375 205 | 500 334 206 | 500 375 207 | 500 375 208 | 500 357 209 | 375 500 210 | 500 375 211 | 500 375 212 | 375 500 213 | 500 375 214 | 500 497 215 | 375 500 216 | 375 500 217 | 500 334 218 | 500 375 219 | 500 375 220 | 500 375 221 | 500 375 222 | 500 375 223 | 333 500 224 | 500 375 225 | 375 500 226 | 500 375 227 | 500 375 228 | 375 500 229 | 500 375 230 | 334 500 231 | 500 375 232 | 364 500 233 | 375 500 234 | 494 500 235 | 484 500 236 | 500 333 237 | 500 375 238 | 500 443 239 | 375 500 240 | 500 375 241 | 500 334 242 | 500 375 243 | 375 500 244 | 500 375 245 | 500 333 246 | 500 375 247 | 313 500 248 | 500 375 249 | 400 300 250 | 375 500 251 | 375 500 252 | 500 375 253 | 333 500 254 | 500 337 255 | 375 500 256 | 500 290 257 | 500 375 258 | 500 312 259 | 500 333 260 | 500 375 261 | 375 500 262 | 500 333 263 | 500 333 264 | 500 333 265 | 500 375 266 | 500 375 267 | 500 375 268 | 500 333 269 | 500 375 270 | 500 375 271 | 500 375 272 | 500 375 273 | 500 375 274 | 375 500 275 | 375 500 276 | 500 375 277 | 500 374 278 | 333 500 279 | 375 500 280 | 500 375 281 | 500 375 282 | 500 375 283 | 500 375 284 | 333 500 285 | 500 375 286 | 500 375 287 | 500 375 288 | 500 333 289 | 294 500 290 | 500 375 291 | 500 375 292 | 500 375 293 | 500 334 294 | 375 500 295 | 333 500 296 | 500 375 297 | 333 500 298 | 500 375 299 | 500 221 300 | 500 374 301 | 500 375 302 | 333 500 303 | 500 333 304 | 500 375 305 | 270 360 306 | 500 371 307 | 500 333 308 | 500 335 309 | 358 500 310 | 220 500 311 | 500 375 312 | 500 375 313 | 375 500 314 | 500 375 315 | 500 375 316 | 375 500 317 | 366 500 318 | 500 375 319 | 500 379 320 | 500 375 321 | 500 489 322 | 500 333 323 | 500 375 324 | 500 375 325 | 500 333 326 | 500 375 327 | 500 375 328 | 334 500 329 | 500 395 330 | 333 500 331 | 500 369 332 | 500 375 333 | 375 500 334 | 500 375 335 | 500 375 336 | 375 500 337 | 500 333 338 | 500 332 339 | 500 375 340 | 500 375 341 | 375 500 342 | 375 500 343 | 500 379 344 | 500 395 345 | 500 333 346 | 500 375 347 | 500 375 348 | 378 500 349 | 500 333 350 | 500 335 351 | 500 333 352 | 375 500 353 | 375 500 354 | 281 500 355 | 500 336 356 | 500 333 357 | 500 375 358 | 500 245 359 | 500 375 360 | 500 375 361 | 500 333 362 | 500 375 363 | 500 334 364 | 500 375 365 | 500 419 366 | 500 375 367 | 500 333 368 | 500 375 369 | 500 375 370 | 375 500 371 | 500 375 372 | 500 375 373 | 500 375 374 | 375 500 375 | 500 332 376 | 500 333 377 | 500 277 378 | 500 333 379 | 500 333 380 | 375 500 381 | 500 334 382 | 500 375 383 | 500 375 384 | 500 333 385 | 335 500 386 | 500 375 387 | 500 375 388 | 332 500 389 | 500 375 390 | 500 375 391 | 500 375 392 | 500 375 393 | 500 375 394 | 500 313 395 | 500 375 396 | 500 375 397 | 333 500 398 | 500 375 399 | 500 375 400 | 335 500 401 | 500 375 402 | 500 375 403 | 500 375 404 | 375 500 405 | 500 335 406 | 375 500 407 | 500 375 408 | 375 500 409 | 500 500 410 | 500 375 411 | 500 375 412 | 500 333 413 | 500 375 414 | 500 375 415 | 500 375 416 | 500 375 417 | 333 500 418 | 500 375 419 | 500 375 420 | 500 375 421 | 500 332 422 | 500 375 423 | 334 500 424 | 332 500 425 | 375 500 426 | 500 333 427 | 500 405 428 | 333 500 429 | 500 334 430 | 500 333 431 | 500 375 432 | 500 375 433 | 500 332 434 | 333 500 435 | 368 500 436 | 375 500 437 | 500 375 438 | 500 375 439 | 500 375 440 | 500 375 441 | 500 375 442 | 288 432 443 | 375 500 444 | 500 375 445 | 500 333 446 | 500 375 447 | 500 333 448 | 375 500 449 | 500 375 450 | 500 281 451 | 333 500 452 | 500 333 453 | 500 375 454 | 500 333 455 | 500 375 456 | 500 334 457 | 500 375 458 | 375 500 459 | 375 500 460 | 375 500 461 | 500 333 462 | 500 333 463 | 500 375 464 | 500 375 465 | 500 375 466 | 500 375 467 | 500 375 468 | 500 356 469 | 474 500 470 | 500 375 471 | 500 375 472 | 500 326 473 | 360 480 474 | 500 375 475 | 500 375 476 | 500 488 477 | 500 375 478 | 442 500 479 | 500 333 480 | 450 349 481 | 375 500 482 | 500 375 483 | 375 500 484 | 306 500 485 | 500 338 486 | 500 333 487 | 500 375 488 | 375 500 489 | 500 375 490 | 500 333 491 | 375 500 492 | 500 375 493 | 375 500 494 | 378 500 495 | 500 375 496 | 500 375 497 | 500 375 498 | 500 405 499 | 500 333 500 | 500 375 501 | 500 500 502 | 500 375 503 | 450 300 504 | 500 375 505 | 500 375 506 | 500 375 507 | 333 500 508 | 500 375 509 | 500 375 510 | 500 375 511 | 500 375 512 | 500 377 513 | 500 375 514 | 500 333 515 | 500 375 516 | 375 500 517 | 375 500 518 | 500 375 519 | 500 375 520 | 500 375 521 | 375 500 522 | 500 375 523 | 500 332 524 | 500 375 525 | 500 375 526 | 500 375 527 | 334 500 528 | 500 375 529 | 500 375 530 | 332 500 531 | 500 333 532 | 500 375 533 | 375 500 534 | 375 500 535 | 333 500 536 | 500 332 537 | 500 375 538 | 500 375 539 | 500 375 540 | 500 364 541 | 333 500 542 | 500 375 543 | 500 333 544 | 500 375 545 | 500 375 546 | 500 333 547 | 375 500 548 | 500 375 549 | 500 333 550 | 500 336 551 | 500 375 552 | 500 359 553 | 500 333 554 | 500 375 555 | 500 375 556 | 500 375 557 | 500 332 558 | 500 375 559 | 500 333 560 | 500 375 561 | 282 500 562 | 500 375 563 | 500 375 564 | 500 375 565 | 500 375 566 | 500 375 567 | 375 500 568 | 500 375 569 | 375 500 570 | 500 375 571 | 500 375 572 | 500 375 573 | 500 390 574 | 334 500 575 | 332 500 576 | 500 375 577 | 500 375 578 | 425 319 579 | 500 333 580 | 500 334 581 | 500 375 582 | 375 500 583 | 500 333 584 | 500 375 585 | 375 500 586 | 500 332 587 | 500 375 588 | 448 336 589 | 500 375 590 | 500 375 591 | 500 375 592 | 500 375 593 | 500 375 594 | 500 375 595 | 500 335 596 | 500 333 597 | 375 500 598 | 375 500 599 | 500 333 600 | 500 375 601 | 333 500 602 | 500 375 603 | 500 375 604 | 500 375 605 | 375 500 606 | 500 375 607 | 500 375 608 | 500 375 609 | 333 500 610 | 375 500 611 | 500 439 612 | 375 500 613 | 500 375 614 | 500 375 615 | 334 500 616 | 374 500 617 | 500 375 618 | 321 500 619 | 500 400 620 | 500 375 621 | 500 375 622 | 500 375 623 | 500 375 624 | 500 410 625 | 500 333 626 | 500 375 627 | 500 375 628 | 500 334 629 | 500 375 630 | 500 375 631 | 500 326 632 | 500 375 633 | 500 357 634 | 500 374 635 | 500 375 636 | 500 374 637 | 500 333 638 | 500 375 639 | 500 375 640 | 500 334 641 | 375 500 642 | 500 375 643 | 500 334 644 | 500 375 645 | 500 333 646 | 500 375 647 | 500 375 648 | 500 375 649 | 480 361 650 | 375 500 651 | 333 500 652 | 500 333 653 | 333 500 654 | 500 333 655 | 500 372 656 | 375 500 657 | 500 375 658 | 375 500 659 | 500 319 660 | 500 333 661 | 500 375 662 | 375 500 663 | 500 377 664 | 400 498 665 | 500 393 666 | 500 334 667 | 333 500 668 | 500 333 669 | 500 375 670 | 500 375 671 | 500 375 672 | 500 375 673 | 375 500 674 | 375 500 675 | 333 500 676 | 500 333 677 | 500 333 678 | 333 500 679 | 306 500 680 | 500 375 681 | 500 334 682 | 320 448 683 | 333 500 684 | 375 500 685 | 500 334 686 | 270 360 687 | 361 500 688 | 500 393 689 | 500 375 690 | 396 500 691 | 500 333 692 | 500 333 693 | 320 480 694 | 500 375 695 | 500 375 696 | 500 375 697 | 500 375 698 | 399 500 699 | 500 375 700 | 500 375 701 | 500 333 702 | 500 332 703 | 500 375 704 | 320 480 705 | 500 375 706 | 375 500 707 | 500 334 708 | 451 500 709 | 500 375 710 | 500 375 711 | 500 368 712 | 500 375 713 | 375 500 714 | 500 375 715 | 500 375 716 | 319 500 717 | 500 375 718 | 450 500 719 | 375 500 720 | 500 375 721 | 375 500 722 | 500 374 723 | 500 375 724 | 500 375 725 | 500 375 726 | 345 500 727 | 500 375 728 | 500 375 729 | 500 325 730 | 500 375 731 | 500 379 732 | 500 333 733 | 500 375 734 | 375 500 735 | 500 375 736 | 500 375 737 | 354 500 738 | 500 375 739 | 375 500 740 | 500 375 741 | 333 500 742 | 375 500 743 | 500 333 744 | 225 417 745 | 333 500 746 | 500 333 747 | 375 500 748 | 500 332 749 | 500 334 750 | 400 500 751 | 500 333 752 | 333 500 753 | 500 375 754 | 375 500 755 | 333 500 756 | 500 333 757 | 340 500 758 | 500 375 759 | 375 500 760 | 333 500 761 | 500 375 762 | 500 375 763 | 500 375 764 | 500 375 765 | 500 402 766 | 375 500 767 | 500 333 768 | 500 333 769 | 374 500 770 | 500 333 771 | 375 500 772 | 500 333 773 | 500 375 774 | 500 375 775 | 500 294 776 | 500 375 777 | 375 500 778 | 500 375 779 | 500 332 780 | 332 500 781 | 358 500 782 | 500 333 783 | 380 472 784 | 500 375 785 | 375 500 786 | 500 375 787 | 500 375 788 | 500 375 789 | 453 500 790 | 375 500 791 | 500 333 792 | 500 500 793 | 375 500 794 | 500 375 795 | 375 500 796 | 500 375 797 | 375 500 798 | 402 500 799 | 459 288 800 | 273 500 801 | 415 500 802 | 346 336 803 | 320 500 804 | 500 375 805 | 500 333 806 | 500 333 807 | 500 410 808 | 500 375 809 | 500 458 810 | 500 333 811 | 500 375 812 | 443 437 813 | 341 251 814 | 375 500 815 | 500 375 816 | 252 400 817 | 288 432 818 | 500 334 819 | 375 500 820 | 375 500 821 | 377 500 822 | -------------------------------------------------------------------------------- /detect.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from sys import platform 3 | 4 | from models import * # set ONNX_EXPORT in models.py 5 | from utils.datasets import * 6 | from utils.utils import * 7 | 8 | 9 | def detect(save_txt=False, save_img=False): 10 | img_size = (320, 192) if ONNX_EXPORT else opt.img_size # (320, 192) or (416, 256) or (608, 352) for (height, width) 11 | out, source, weights, half, view_img = opt.output, opt.source, opt.weights, opt.half, opt.view_img 12 | webcam = source == '0' or source.startswith('rtsp') or source.startswith('http') or source.endswith('.txt') 13 | 14 | # Initialize 15 | device = torch_utils.select_device(device='cpu' if ONNX_EXPORT else opt.device) 16 | if os.path.exists(out): 17 | shutil.rmtree(out) # delete output folder 18 | os.makedirs(out) # make new output folder 19 | 20 | # Initialize model 21 | model = Darknet(opt.cfg, img_size) 22 | 23 | # Load weights 24 | attempt_download(weights) 25 | if weights.endswith('.pt'): # pytorch format 26 | model.load_state_dict(torch.load(weights, map_location=device)['model']) 27 | else: # darknet format 28 | _ = load_darknet_weights(model, weights) 29 | 30 | # Fuse Conv2d + BatchNorm2d layers 31 | # model.fuse() 32 | 33 | # Eval mode 34 | model.to(device).eval() 35 | 36 | # Export mode 37 | if ONNX_EXPORT: 38 | img = torch.zeros((1, 3) + img_size) # (1, 3, 320, 192) 39 | torch.onnx.export(model, img, 'weights/export.onnx', verbose=True) 40 | return 41 | 42 | # Half precision 43 | half = half and device.type != 'cpu' # half precision only supported on CUDA 44 | if half: 45 | model.half() 46 | 47 | # Set Dataloader 48 | vid_path, vid_writer = None, None 49 | if webcam: 50 | view_img = True 51 | torch.backends.cudnn.benchmark = True # set True to speed up constant image size inference 52 | dataset = LoadStreams(source, img_size=img_size, half=half) 53 | else: 54 | save_img = True 55 | dataset = LoadImages(source, img_size=img_size, half=half) 56 | 57 | # Get classes and colors 58 | classes = load_classes(parse_data_cfg(opt.data)['names']) 59 | colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(classes))] 60 | 61 | # Run inference 62 | t0 = time.time() 63 | for path, img, im0s, vid_cap in dataset: 64 | t = time.time() 65 | 66 | # Get detections 67 | img = torch.from_numpy(img).to(device) 68 | if img.ndimension() == 3: 69 | img = img.unsqueeze(0) 70 | pred, _ = model(img) 71 | 72 | if opt.half: 73 | pred = pred.float() 74 | 75 | for i, det in enumerate(non_max_suppression(pred, opt.conf_thres, opt.nms_thres)): # detections per image 76 | if webcam: # batch_size >= 1 77 | p, s, im0 = path[i], '%g: ' % i, im0s[i] 78 | else: 79 | p, s, im0 = path, '', im0s 80 | 81 | save_path = str(Path(out) / Path(p).name) 82 | s += '%gx%g ' % img.shape[2:] # print string 83 | if det is not None and len(det): 84 | # Rescale boxes from img_size to im0 size 85 | det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() 86 | 87 | # Print results 88 | for c in det[:, -1].unique(): 89 | n = (det[:, -1] == c).sum() # detections per class 90 | s += '%g %ss, ' % (n, classes[int(c)]) # add to string 91 | 92 | # Write results 93 | for *xyxy, conf, _, cls in det: 94 | if save_txt: # Write to file 95 | with open(save_path + '.txt', 'a') as file: 96 | file.write(('%g ' * 6 + '\n') % (*xyxy, cls, conf)) 97 | 98 | if save_img or view_img: # Add bbox to image 99 | label = '%s %.2f' % (classes[int(cls)], conf) 100 | plot_one_box(xyxy, im0, label=label, color=colors[int(cls)]) 101 | 102 | print('%sDone. (%.3fs)' % (s, time.time() - t)) 103 | 104 | # Stream results 105 | if view_img: 106 | cv2.imshow(p, im0) 107 | 108 | # Save results (image with detections) 109 | if save_img: 110 | if dataset.mode == 'images': 111 | cv2.imwrite(save_path, im0) 112 | else: 113 | if vid_path != save_path: # new video 114 | vid_path = save_path 115 | if isinstance(vid_writer, cv2.VideoWriter): 116 | vid_writer.release() # release previous video writer 117 | 118 | fps = vid_cap.get(cv2.CAP_PROP_FPS) 119 | w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) 120 | h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) 121 | vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h)) 122 | vid_writer.write(im0) 123 | 124 | if save_txt or save_img: 125 | print('Results saved to %s' % os.getcwd() + os.sep + out) 126 | if platform == 'darwin': # MacOS 127 | os.system('open ' + out + ' ' + save_path) 128 | 129 | print('Done. (%.3fs)' % (time.time() - t0)) 130 | 131 | 132 | if __name__ == '__main__': 133 | parser = argparse.ArgumentParser() 134 | parser.add_argument('--cfg', type=str, default='cfg/yolov3-spp.cfg', help='cfg file path') 135 | parser.add_argument('--data', type=str, default='data/coco.data', help='coco.data file path') 136 | parser.add_argument('--weights', type=str, default='weights/yolov3-spp.weights', help='path to weights file') 137 | parser.add_argument('--source', type=str, default='data/samples', help='source') # input file/folder, 0 for webcam 138 | parser.add_argument('--output', type=str, default='output', help='output folder') # output folder 139 | parser.add_argument('--img-size', type=int, default=416, help='inference size (pixels)') 140 | parser.add_argument('--conf-thres', type=float, default=0.3, help='object confidence threshold') 141 | parser.add_argument('--nms-thres', type=float, default=0.5, help='iou threshold for non-maximum suppression') 142 | parser.add_argument('--fourcc', type=str, default='mp4v', help='output video codec (verify ffmpeg support)') 143 | parser.add_argument('--half', action='store_true', help='half precision FP16 inference') 144 | parser.add_argument('--device', default='', help='device id (i.e. 0 or 0,1) or cpu') 145 | parser.add_argument('--view-img', action='store_true', help='display results') 146 | opt = parser.parse_args() 147 | print(opt) 148 | 149 | with torch.no_grad(): 150 | detect() 151 | -------------------------------------------------------------------------------- /layer_prune.py: -------------------------------------------------------------------------------- 1 | from models import * 2 | from utils.utils import * 3 | import torch 4 | import numpy as np 5 | from copy import deepcopy 6 | from test import test 7 | from terminaltables import AsciiTable 8 | import time 9 | from utils.utils import * 10 | from utils.prune_utils import * 11 | import argparse 12 | 13 | 14 | 15 | if __name__ == '__main__': 16 | parser = argparse.ArgumentParser() 17 | parser.add_argument('--cfg', type=str, default='cfg/yolov3-hand.cfg', help='cfg file path') 18 | parser.add_argument('--data', type=str, default='data/oxfordhand.data', help='*.data file path') 19 | parser.add_argument('--weights', type=str, default='weights/last.pt', help='sparse model weights') 20 | parser.add_argument('--shortcuts', type=int, default=8, help='how many shortcut layers will be pruned,\ 21 | pruning one shortcut will also prune two CBL,yolov3 has 23 shortcuts') 22 | parser.add_argument('--img_size', type=int, default=416, help='inference size (pixels)') 23 | opt = parser.parse_args() 24 | print(opt) 25 | 26 | img_size = opt.img_size 27 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 28 | model = Darknet(opt.cfg, (img_size, img_size)).to(device) 29 | 30 | if opt.weights.endswith(".pt"): 31 | model.load_state_dict(torch.load(opt.weights, map_location=device)['model']) 32 | else: 33 | load_darknet_weights(model, opt.weights) 34 | print('\nloaded weights from ',opt.weights) 35 | 36 | 37 | eval_model = lambda model:test(model=model,cfg=opt.cfg, data=opt.data, batch_size=16, img_size=img_size) 38 | obtain_num_parameters = lambda model:sum([param.nelement() for param in model.parameters()]) 39 | 40 | with torch.no_grad(): 41 | print("\nlet's test the original model first:") 42 | origin_model_metric = eval_model(model) 43 | origin_nparameters = obtain_num_parameters(model) 44 | 45 | 46 | CBL_idx, Conv_idx, shortcut_idx = parse_module_defs4(model.module_defs) 47 | print('all shortcut_idx:', [i + 1 for i in shortcut_idx]) 48 | 49 | 50 | bn_weights = gather_bn_weights(model.module_list, shortcut_idx) 51 | 52 | sorted_bn = torch.sort(bn_weights)[0] 53 | 54 | 55 | # highest_thre = torch.zeros(len(shortcut_idx)) 56 | # for i, idx in enumerate(shortcut_idx): 57 | # highest_thre[i] = model.module_list[idx][1].weight.data.abs().max().clone() 58 | # _, sorted_index_thre = torch.sort(highest_thre) 59 | 60 | #这里更改了选层策略,由最大值排序改为均值排序,均值一般表现要稍好,但不是绝对,可以自己切换尝试;前面注释的四行为原策略。 61 | bn_mean = torch.zeros(len(shortcut_idx)) 62 | for i, idx in enumerate(shortcut_idx): 63 | bn_mean[i] = model.module_list[idx][1].weight.data.abs().mean().clone() 64 | _, sorted_index_thre = torch.sort(bn_mean) 65 | 66 | 67 | prune_shortcuts = torch.tensor(shortcut_idx)[[sorted_index_thre[:opt.shortcuts]]] 68 | prune_shortcuts = [int(x) for x in prune_shortcuts] 69 | 70 | index_all = list(range(len(model.module_defs))) 71 | index_prune = [] 72 | for idx in prune_shortcuts: 73 | index_prune.extend([idx - 1, idx, idx + 1]) 74 | index_remain = [idx for idx in index_all if idx not in index_prune] 75 | 76 | print('These shortcut layers and corresponding CBL will be pruned :', index_prune) 77 | 78 | 79 | 80 | 81 | 82 | def prune_and_eval(model, prune_shortcuts=[]): 83 | model_copy = deepcopy(model) 84 | for idx in prune_shortcuts: 85 | for i in [idx, idx-1]: 86 | bn_module = model_copy.module_list[i][1] 87 | 88 | mask = torch.zeros(bn_module.weight.data.shape[0]).cuda() 89 | bn_module.weight.data.mul_(mask) 90 | 91 | 92 | with torch.no_grad(): 93 | mAP = eval_model(model_copy)[0][2] 94 | 95 | print(f'simply mask the BN Gama of to_be_pruned CBL as zero, now the mAP is {mAP:.4f}') 96 | 97 | 98 | prune_and_eval(model, prune_shortcuts) 99 | 100 | 101 | 102 | 103 | 104 | #%% 105 | def obtain_filters_mask(model, CBL_idx, prune_shortcuts): 106 | 107 | filters_mask = [] 108 | for idx in CBL_idx: 109 | bn_module = model.module_list[idx][1] 110 | mask = np.ones(bn_module.weight.data.shape[0], dtype='float32') 111 | filters_mask.append(mask.copy()) 112 | CBLidx2mask = {idx: mask for idx, mask in zip(CBL_idx, filters_mask)} 113 | for idx in prune_shortcuts: 114 | for i in [idx, idx - 1]: 115 | bn_module = model.module_list[i][1] 116 | mask = np.zeros(bn_module.weight.data.shape[0], dtype='float32') 117 | CBLidx2mask[i] = mask.copy() 118 | return CBLidx2mask 119 | 120 | 121 | CBLidx2mask = obtain_filters_mask(model, CBL_idx, prune_shortcuts) 122 | 123 | 124 | 125 | pruned_model = prune_model_keep_size2(model, CBL_idx, CBL_idx, CBLidx2mask) 126 | 127 | with torch.no_grad(): 128 | mAP = eval_model(pruned_model)[0][2] 129 | print("after transfering the offset of pruned CBL's activation, map is {}".format(mAP)) 130 | 131 | 132 | compact_module_defs = deepcopy(model.module_defs) 133 | 134 | 135 | for j, module_def in enumerate(compact_module_defs): 136 | if module_def['type'] == 'route': 137 | from_layers = [int(s) for s in module_def['layers'].split(',')] 138 | if len(from_layers) == 1 and from_layers[0] > 0: 139 | count = 0 140 | for i in index_prune: 141 | if i <= from_layers[0]: 142 | count += 1 143 | from_layers[0] = from_layers[0] - count 144 | from_layers = str(from_layers[0]) 145 | module_def['layers'] = from_layers 146 | 147 | elif len(from_layers) == 2: 148 | count = 0 149 | if from_layers[1] > 0: 150 | for i in index_prune: 151 | if i <= from_layers[1]: 152 | count += 1 153 | from_layers[1] = from_layers[1] - count 154 | else: 155 | for i in index_prune: 156 | if i > j + from_layers[1] and i < j: 157 | count += 1 158 | from_layers[1] = from_layers[1] + count 159 | 160 | from_layers = ', '.join([str(s) for s in from_layers]) 161 | module_def['layers'] = from_layers 162 | 163 | compact_module_defs = [compact_module_defs[i] for i in index_remain] 164 | compact_model = Darknet([model.hyperparams.copy()] + compact_module_defs, (img_size, img_size)).to(device) 165 | for i, index in enumerate(index_remain): 166 | compact_model.module_list[i] = pruned_model.module_list[index] 167 | 168 | compact_nparameters = obtain_num_parameters(compact_model) 169 | 170 | # init_weights_from_loose_model(compact_model, pruned_model, CBL_idx, Conv_idx, CBLidx2mask) 171 | 172 | 173 | random_input = torch.rand((1, 3, img_size, img_size)).to(device) 174 | 175 | def obtain_avg_forward_time(input, model, repeat=200): 176 | 177 | model.eval() 178 | start = time.time() 179 | with torch.no_grad(): 180 | for i in range(repeat): 181 | output = model(input) 182 | avg_infer_time = (time.time() - start) / repeat 183 | 184 | return avg_infer_time, output 185 | 186 | pruned_forward_time, pruned_output = obtain_avg_forward_time(random_input, pruned_model) 187 | compact_forward_time, compact_output = obtain_avg_forward_time(random_input, compact_model) 188 | 189 | 190 | # 在测试集上测试剪枝后的模型, 并统计模型的参数数量 191 | with torch.no_grad(): 192 | compact_model_metric = eval_model(compact_model) 193 | 194 | 195 | # 比较剪枝前后参数数量的变化、指标性能的变化 196 | metric_table = [ 197 | ["Metric", "Before", "After"], 198 | ["mAP", f'{origin_model_metric[0][2]:.6f}', f'{compact_model_metric[0][2]:.6f}'], 199 | ["Parameters", f"{origin_nparameters}", f"{compact_nparameters}"], 200 | ["Inference", f'{pruned_forward_time:.4f}', f'{compact_forward_time:.4f}'] 201 | ] 202 | print(AsciiTable(metric_table).table) 203 | 204 | 205 | # 生成剪枝后的cfg文件并保存模型 206 | pruned_cfg_name = opt.cfg.replace('/', f'/prune_{opt.shortcuts}_shortcut_') 207 | pruned_cfg_file = write_cfg(pruned_cfg_name, [model.hyperparams.copy()] + compact_module_defs) 208 | print(f'Config file has been saved: {pruned_cfg_file}') 209 | 210 | compact_model_name = opt.weights.replace('/', f'/prune_{opt.shortcuts}_shortcut_') 211 | if compact_model_name.endswith('.pt'): 212 | compact_model_name = compact_model_name.replace('.pt', '.weights') 213 | 214 | save_weights(compact_model, path=compact_model_name) 215 | print(f'Compact model has been saved: {compact_model_name}') 216 | 217 | -------------------------------------------------------------------------------- /prune.py: -------------------------------------------------------------------------------- 1 | from models import * 2 | from utils.utils import * 3 | import numpy as np 4 | from copy import deepcopy 5 | from test import test 6 | from terminaltables import AsciiTable 7 | import time 8 | from utils.prune_utils import * 9 | import argparse 10 | 11 | 12 | 13 | if __name__ == '__main__': 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('--cfg', type=str, default='cfg/yolov3.cfg', help='cfg file path') 16 | parser.add_argument('--data', type=str, default='data/coco.data', help='*.data file path') 17 | parser.add_argument('--weights', type=str, default='weights/last.pt', help='sparse model weights') 18 | parser.add_argument('--percent', type=float, default=0.8, help='channel prune percent') 19 | parser.add_argument('--img_size', type=int, default=416, help='inference size (pixels)') 20 | opt = parser.parse_args() 21 | print(opt) 22 | 23 | 24 | img_size = opt.img_size 25 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 26 | model = Darknet(opt.cfg, (img_size, img_size)).to(device) 27 | if opt.weights.endswith('.pt'): 28 | model.load_state_dict(torch.load(opt.weights)['model']) 29 | else: 30 | load_darknet_weights(model, opt.weights) 31 | print('\nloaded weights from ',opt.weights) 32 | 33 | eval_model = lambda model:test(opt.cfg, opt.data, 34 | weights=opt.weights, 35 | batch_size=16, 36 | img_size=img_size, 37 | iou_thres=0.5, 38 | conf_thres=0.001, 39 | nms_thres=0.5, 40 | save_json=False, 41 | model=model) 42 | obtain_num_parameters = lambda model:sum([param.nelement() for param in model.parameters()]) 43 | 44 | print("\nlet's test the original model first:") 45 | with torch.no_grad(): 46 | origin_model_metric = eval_model(model) 47 | 48 | origin_nparameters = obtain_num_parameters(model) 49 | 50 | CBL_idx, Conv_idx, prune_idx= parse_module_defs(model.module_defs) 51 | 52 | bn_weights = gather_bn_weights(model.module_list, prune_idx) 53 | 54 | sorted_bn = torch.sort(bn_weights)[0] 55 | 56 | # 避免剪掉所有channel的最高阈值(每个BN层的gamma的最大值的最小值即为阈值上限) 57 | highest_thre = [] 58 | for idx in prune_idx: 59 | highest_thre.append(model.module_list[idx][1].weight.data.abs().max().item()) 60 | highest_thre = min(highest_thre) 61 | 62 | # 找到highest_thre对应的下标对应的百分比 63 | percent_limit = (sorted_bn==highest_thre).nonzero().item()/len(bn_weights) 64 | 65 | print(f'Suggested Gamma threshold should be less than {highest_thre:.4f}.') 66 | print(f'The corresponding prune ratio is {percent_limit:.3f}, but you can set higher.') 67 | 68 | #%% 69 | def prune_and_eval(model, sorted_bn, percent=.0): 70 | model_copy = deepcopy(model) 71 | thre_index = int(len(sorted_bn) * percent) 72 | thre = sorted_bn[thre_index] 73 | 74 | print(f'Gamma value that less than {thre:.4f} are set to zero!') 75 | 76 | remain_num = 0 77 | for idx in prune_idx: 78 | 79 | bn_module = model_copy.module_list[idx][1] 80 | 81 | mask = obtain_bn_mask(bn_module, thre) 82 | 83 | remain_num += int(mask.sum()) 84 | bn_module.weight.data.mul_(mask) 85 | print("let's test the current model!") 86 | with torch.no_grad(): 87 | mAP = eval_model(model_copy)[0][2] 88 | 89 | 90 | print(f'Number of channels has been reduced from {len(sorted_bn)} to {remain_num}') 91 | print(f'Prune ratio: {1-remain_num/len(sorted_bn):.3f}') 92 | print(f"mAP of the 'pruned' model is {mAP:.4f}") 93 | 94 | return thre 95 | 96 | percent = opt.percent 97 | print('the required prune percent is', percent) 98 | threshold = prune_and_eval(model, sorted_bn, percent) 99 | #%% 100 | def obtain_filters_mask(model, thre, CBL_idx, prune_idx): 101 | 102 | pruned = 0 103 | total = 0 104 | num_filters = [] 105 | filters_mask = [] 106 | for idx in CBL_idx: 107 | bn_module = model.module_list[idx][1] 108 | if idx in prune_idx: 109 | 110 | mask = obtain_bn_mask(bn_module, thre).cpu().numpy() 111 | remain = int(mask.sum()) 112 | pruned = pruned + mask.shape[0] - remain 113 | 114 | if remain == 0: 115 | # print("Channels would be all pruned!") 116 | # raise Exception 117 | max_value = bn_module.weight.data.abs().max() 118 | mask = obtain_bn_mask(bn_module, max_value).cpu().numpy() 119 | remain = int(mask.sum()) 120 | pruned = pruned + mask.shape[0] - remain 121 | 122 | print(f'layer index: {idx:>3d} \t total channel: {mask.shape[0]:>4d} \t ' 123 | f'remaining channel: {remain:>4d}') 124 | else: 125 | mask = np.ones(bn_module.weight.data.shape) 126 | remain = mask.shape[0] 127 | 128 | total += mask.shape[0] 129 | num_filters.append(remain) 130 | filters_mask.append(mask.copy()) 131 | 132 | prune_ratio = pruned / total 133 | print(f'Prune channels: {pruned}\tPrune ratio: {prune_ratio:.3f}') 134 | 135 | return num_filters, filters_mask 136 | 137 | num_filters, filters_mask = obtain_filters_mask(model, threshold, CBL_idx, prune_idx) 138 | 139 | #%% 140 | CBLidx2mask = {idx: mask.astype('float32') for idx, mask in zip(CBL_idx, filters_mask)} 141 | 142 | pruned_model = prune_model_keep_size2(model, CBL_idx, CBL_idx, CBLidx2mask) 143 | 144 | print("\nnow prune the model but keep size,(actually add offset of BN beta to next layer), let's see how the mAP goes") 145 | with torch.no_grad(): 146 | eval_model(pruned_model) 147 | 148 | 149 | #%% 150 | compact_module_defs = deepcopy(model.module_defs) 151 | for idx, num in zip(CBL_idx, num_filters): 152 | assert compact_module_defs[idx]['type'] == 'convolutional' 153 | compact_module_defs[idx]['filters'] = str(num) 154 | 155 | #%% 156 | compact_model = Darknet([model.hyperparams.copy()] + compact_module_defs, (img_size, img_size)).to(device) 157 | compact_nparameters = obtain_num_parameters(compact_model) 158 | 159 | init_weights_from_loose_model(compact_model, pruned_model, CBL_idx, Conv_idx, CBLidx2mask) 160 | 161 | #%% 162 | random_input = torch.rand((1, 3, img_size, img_size)).to(device) 163 | 164 | def obtain_avg_forward_time(input, model, repeat=200): 165 | 166 | model.eval() 167 | start = time.time() 168 | with torch.no_grad(): 169 | for i in range(repeat): 170 | output = model(input)[0] 171 | avg_infer_time = (time.time() - start) / repeat 172 | 173 | return avg_infer_time, output 174 | 175 | print('\ntesting avg forward time...') 176 | pruned_forward_time, pruned_output = obtain_avg_forward_time(random_input, pruned_model) 177 | compact_forward_time, compact_output = obtain_avg_forward_time(random_input, compact_model) 178 | 179 | diff = (pruned_output-compact_output).abs().gt(0.001).sum().item() 180 | if diff > 0: 181 | print('Something wrong with the pruned model!') 182 | 183 | #%% 184 | # 在测试集上测试剪枝后的模型, 并统计模型的参数数量 185 | print('testing the mAP of final pruned model') 186 | with torch.no_grad(): 187 | compact_model_metric = eval_model(compact_model) 188 | 189 | 190 | #%% 191 | # 比较剪枝前后参数数量的变化、指标性能的变化 192 | metric_table = [ 193 | ["Metric", "Before", "After"], 194 | ["mAP", f'{origin_model_metric[0][2]:.6f}', f'{compact_model_metric[0][2]:.6f}'], 195 | ["Parameters", f"{origin_nparameters}", f"{compact_nparameters}"], 196 | ["Inference", f'{pruned_forward_time:.4f}', f'{compact_forward_time:.4f}'] 197 | ] 198 | print(AsciiTable(metric_table).table) 199 | 200 | #%% 201 | # 生成剪枝后的cfg文件并保存模型 202 | pruned_cfg_name = opt.cfg.replace('/', f'/prune_{percent}_') 203 | pruned_cfg_file = write_cfg(pruned_cfg_name, [model.hyperparams.copy()] + compact_module_defs) 204 | print(f'Config file has been saved: {pruned_cfg_file}') 205 | 206 | compact_model_name = opt.weights.replace('/', f'/prune_{percent}_') 207 | if compact_model_name.endswith('.pt'): 208 | compact_model_name = compact_model_name.replace('.pt', '.weights') 209 | save_weights(compact_model, compact_model_name) 210 | print(f'Compact model has been saved: {compact_model_name}') 211 | 212 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # pip3 install -U -r requirements.txt 2 | numpy 3 | opencv-python 4 | torch >= 1.2 5 | matplotlib 6 | pycocotools 7 | tqdm 8 | tb-nightly 9 | future 10 | Pillow 11 | 12 | # Equivalent conda commands ---------------------------------------------------- 13 | # conda update -n base -c defaults conda 14 | # conda install -yc anaconda future numpy opencv matplotlib tqdm pillow 15 | # conda install -yc conda-forge scikit-image tensorboard pycocotools 16 | # conda install -yc spyder-ide spyder-line-profiler 17 | # conda install -yc pytorch pytorch torchvision 18 | -------------------------------------------------------------------------------- /shortcut_prune.py: -------------------------------------------------------------------------------- 1 | from models import * 2 | from utils.utils import * 3 | import numpy as np 4 | from copy import deepcopy 5 | from test import test 6 | from terminaltables import AsciiTable 7 | import time 8 | from utils.prune_utils import * 9 | import argparse 10 | 11 | 12 | 13 | if __name__ == '__main__': 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('--cfg', type=str, default='cfg/yolov3.cfg', help='cfg file path') 16 | parser.add_argument('--data', type=str, default='data/coco.data', help='*.data file path') 17 | parser.add_argument('--weights', type=str, default='weights/last.pt', help='sparse model weights') 18 | parser.add_argument('--percent', type=float, default=0.6, help='channel prune percent') 19 | parser.add_argument('--img_size', type=int, default=416, help='inference size (pixels)') 20 | opt = parser.parse_args() 21 | print(opt) 22 | 23 | img_size = opt.img_size 24 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 25 | model = Darknet(opt.cfg, (img_size, img_size)).to(device) 26 | 27 | if opt.weights.endswith(".pt"): 28 | model.load_state_dict(torch.load(opt.weights, map_location=device)['model']) 29 | else: 30 | _ = load_darknet_weights(model, opt.weights) 31 | print('\nloaded weights from ',opt.weights) 32 | 33 | 34 | eval_model = lambda model:test(model=model,cfg=opt.cfg, data=opt.data, batch_size=16, img_size=img_size) 35 | obtain_num_parameters = lambda model:sum([param.nelement() for param in model.parameters()]) 36 | 37 | print("\nlet's test the original model first:") 38 | with torch.no_grad(): 39 | origin_model_metric = eval_model(model) 40 | origin_nparameters = obtain_num_parameters(model) 41 | 42 | CBL_idx, Conv_idx, prune_idx,shortcut_idx,shortcut_all= parse_module_defs2(model.module_defs) 43 | 44 | 45 | sort_prune_idx=[idx for idx in prune_idx if idx not in shortcut_idx] 46 | 47 | #将所有要剪枝的BN层的α参数,拷贝到bn_weights列表 48 | bn_weights = gather_bn_weights(model.module_list, sort_prune_idx) 49 | 50 | #torch.sort返回二维列表,第一维是排序后的值列表,第二维是排序后的值列表对应的索引 51 | sorted_bn = torch.sort(bn_weights)[0] 52 | 53 | 54 | #避免剪掉所有channel的最高阈值(每个BN层的gamma的最大值的最小值即为阈值上限) 55 | highest_thre = [] 56 | for idx in sort_prune_idx: 57 | #.item()可以得到张量里的元素值 58 | highest_thre.append(model.module_list[idx][1].weight.data.abs().max().item()) 59 | highest_thre = min(highest_thre) 60 | 61 | # 找到highest_thre对应的下标对应的百分比 62 | percent_limit = (sorted_bn==highest_thre).nonzero().item()/len(bn_weights) 63 | 64 | print(f'Suggested Threshold should be less than {highest_thre:.4f}.') 65 | print(f'The corresponding prune ratio is {percent_limit:.3f},but you can set higher.') 66 | 67 | 68 | def prune_and_eval(model, sorted_bn, percent=.0): 69 | model_copy = deepcopy(model) 70 | thre_index = int(len(sorted_bn) * percent) 71 | #获得α参数的阈值,小于该值的α参数对应的通道,全部裁剪掉 72 | thre1 = sorted_bn[thre_index] 73 | 74 | print(f'Channels with Gamma value less than {thre1:.6f} are pruned!') 75 | 76 | remain_num = 0 77 | idx_new=dict() 78 | for idx in prune_idx: 79 | 80 | if idx not in shortcut_idx: 81 | 82 | bn_module = model_copy.module_list[idx][1] 83 | 84 | mask = obtain_bn_mask(bn_module, thre1) 85 | #记录剪枝后,每一层卷积层对应的mask 86 | # idx_new[idx]=mask.cpu().numpy() 87 | idx_new[idx]=mask 88 | remain_num += int(mask.sum()) 89 | bn_module.weight.data.mul_(mask) 90 | #bn_module.bias.data.mul_(mask*0.0001) 91 | else: 92 | 93 | bn_module = model_copy.module_list[idx][1] 94 | 95 | 96 | mask=idx_new[shortcut_idx[idx]] 97 | idx_new[idx]=mask 98 | 99 | 100 | remain_num += int(mask.sum()) 101 | bn_module.weight.data.mul_(mask) 102 | 103 | #print(int(mask.sum())) 104 | 105 | with torch.no_grad(): 106 | mAP = eval_model(model_copy)[0][2] 107 | 108 | print(f'Number of channels has been reduced from {len(sorted_bn)} to {remain_num}') 109 | print(f'Prune ratio: {1-remain_num/len(sorted_bn):.3f}') 110 | print(f'mAP of the pruned model is {mAP:.4f}') 111 | 112 | return thre1 113 | 114 | percent = opt.percent 115 | threshold = prune_and_eval(model, sorted_bn, percent) 116 | 117 | 118 | 119 | #**************************************************************** 120 | #虽然上面已经能看到剪枝后的效果,但是没有生成剪枝后的模型结构,因此下面的代码是为了生成新的模型结构并拷贝旧模型参数到新模型 121 | 122 | 123 | 124 | #%% 125 | def obtain_filters_mask(model, thre, CBL_idx, prune_idx): 126 | 127 | pruned = 0 128 | total = 0 129 | num_filters = [] 130 | filters_mask = [] 131 | idx_new=dict() 132 | #CBL_idx存储的是所有带BN的卷积层(YOLO层的前一层卷积层是不带BN的) 133 | for idx in CBL_idx: 134 | bn_module = model.module_list[idx][1] 135 | if idx in prune_idx: 136 | if idx not in shortcut_idx: 137 | 138 | mask = obtain_bn_mask(bn_module, thre).cpu().numpy() 139 | idx_new[idx]=mask 140 | remain = int(mask.sum()) 141 | pruned = pruned + mask.shape[0] - remain 142 | 143 | # if remain == 0: 144 | # print("Channels would be all pruned!") 145 | # raise Exception 146 | 147 | # print(f'layer index: {idx:>3d} \t total channel: {mask.shape[0]:>4d} \t ' 148 | # f'remaining channel: {remain:>4d}') 149 | else: 150 | mask=idx_new[shortcut_idx[idx]] 151 | idx_new[idx]=mask 152 | remain= int(mask.sum()) 153 | pruned = pruned + mask.shape[0] - remain 154 | 155 | if remain == 0: 156 | # print("Channels would be all pruned!") 157 | # raise Exception 158 | max_value = bn_module.weight.data.abs().max() 159 | mask = obtain_bn_mask(bn_module, max_value).cpu().numpy() 160 | remain = int(mask.sum()) 161 | pruned = pruned + mask.shape[0] - remain 162 | 163 | print(f'layer index: {idx:>3d} \t total channel: {mask.shape[0]:>4d} \t ' 164 | f'remaining channel: {remain:>4d}') 165 | else: 166 | mask = np.ones(bn_module.weight.data.shape) 167 | remain = mask.shape[0] 168 | 169 | total += mask.shape[0] 170 | num_filters.append(remain) 171 | filters_mask.append(mask.copy()) 172 | 173 | #因此,这里求出的prune_ratio,需要裁剪的α参数/cbl_idx中所有的α参数 174 | prune_ratio = pruned / total 175 | print(f'Prune channels: {pruned}\tPrune ratio: {prune_ratio:.3f}') 176 | 177 | return num_filters, filters_mask 178 | 179 | num_filters, filters_mask = obtain_filters_mask(model, threshold, CBL_idx, prune_idx) 180 | 181 | 182 | #CBLidx2mask存储CBL_idx中,每一层BN层对应的mask 183 | CBLidx2mask = {idx: mask for idx, mask in zip(CBL_idx, filters_mask)} 184 | 185 | 186 | pruned_model = prune_model_keep_size2(model, prune_idx, CBL_idx, CBLidx2mask) 187 | print("\nnow prune the model but keep size,(actually add offset of BN beta to next layer), let's see how the mAP goes") 188 | 189 | with torch.no_grad(): 190 | eval_model(pruned_model) 191 | 192 | 193 | 194 | #获得原始模型的module_defs,并修改该defs中的卷积核数量 195 | compact_module_defs = deepcopy(model.module_defs) 196 | for idx, num in zip(CBL_idx, num_filters): 197 | assert compact_module_defs[idx]['type'] == 'convolutional' 198 | compact_module_defs[idx]['filters'] = str(num) 199 | 200 | 201 | compact_model = Darknet([model.hyperparams.copy()] + compact_module_defs, (img_size, img_size)).to(device) 202 | compact_nparameters = obtain_num_parameters(compact_model) 203 | 204 | init_weights_from_loose_model(compact_model, pruned_model, CBL_idx, Conv_idx, CBLidx2mask) 205 | 206 | 207 | random_input = torch.rand((1, 3, img_size, img_size)).to(device) 208 | 209 | def obtain_avg_forward_time(input, model, repeat=200): 210 | 211 | model.eval() 212 | start = time.time() 213 | with torch.no_grad(): 214 | for i in range(repeat): 215 | output = model(input) 216 | avg_infer_time = (time.time() - start) / repeat 217 | 218 | return avg_infer_time, output 219 | 220 | print('testing Inference time...') 221 | pruned_forward_time, pruned_output = obtain_avg_forward_time(random_input, pruned_model) 222 | compact_forward_time, compact_output = obtain_avg_forward_time(random_input, compact_model) 223 | 224 | 225 | # 在测试集上测试剪枝后的模型, 并统计模型的参数数量 226 | print('testing final model') 227 | with torch.no_grad(): 228 | compact_model_metric = eval_model(compact_model) 229 | 230 | 231 | # 比较剪枝前后参数数量的变化、指标性能的变化 232 | metric_table = [ 233 | ["Metric", "Before", "After"], 234 | ["mAP", f'{origin_model_metric[0][2]:.6f}', f'{compact_model_metric[0][2]:.6f}'], 235 | ["Parameters", f"{origin_nparameters}", f"{compact_nparameters}"], 236 | ["Inference", f'{pruned_forward_time:.4f}', f'{compact_forward_time:.4f}'] 237 | ] 238 | print(AsciiTable(metric_table).table) 239 | 240 | 241 | # 生成剪枝后的cfg文件并保存模型 242 | pruned_cfg_name = opt.cfg.replace('/', f'/prune_{percent}_') 243 | pruned_cfg_file = write_cfg(pruned_cfg_name, [model.hyperparams.copy()] + compact_module_defs) 244 | print(f'Config file has been saved: {pruned_cfg_file}') 245 | 246 | compact_model_name = opt.weights.replace('/', f'/prune_{percent}_') 247 | if compact_model_name.endswith('.pt'): 248 | compact_model_name = compact_model_name.replace('.pt', '.weights') 249 | save_weights(compact_model, path=compact_model_name) 250 | print(f'Compact model has been saved: {compact_model_name}') 251 | 252 | -------------------------------------------------------------------------------- /slim_prune.py: -------------------------------------------------------------------------------- 1 | from models import * 2 | from utils.utils import * 3 | import numpy as np 4 | from copy import deepcopy 5 | from test import test 6 | from terminaltables import AsciiTable 7 | import time 8 | from utils.prune_utils import * 9 | import argparse 10 | 11 | 12 | 13 | if __name__ == '__main__': 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('--cfg', type=str, default='cfg/yolov3.cfg', help='cfg file path') 16 | parser.add_argument('--data', type=str, default='data/coco.data', help='*.data file path') 17 | parser.add_argument('--weights', type=str, default='weights/last.pt', help='sparse model weights') 18 | parser.add_argument('--global_percent', type=float, default=0.8, help='global channel prune percent') 19 | parser.add_argument('--layer_keep', type=float, default=0.01, help='channel keep percent per layer') 20 | parser.add_argument('--img_size', type=int, default=416, help='inference size (pixels)') 21 | opt = parser.parse_args() 22 | print(opt) 23 | 24 | img_size = opt.img_size 25 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 26 | model = Darknet(opt.cfg, (img_size, img_size)).to(device) 27 | 28 | if opt.weights.endswith(".pt"): 29 | model.load_state_dict(torch.load(opt.weights, map_location=device)['model']) 30 | else: 31 | _ = load_darknet_weights(model, opt.weights) 32 | print('\nloaded weights from ',opt.weights) 33 | 34 | 35 | eval_model = lambda model:test(model=model,cfg=opt.cfg, data=opt.data, batch_size=16, img_size=img_size) 36 | obtain_num_parameters = lambda model:sum([param.nelement() for param in model.parameters()]) 37 | 38 | print("\nlet's test the original model first:") 39 | with torch.no_grad(): 40 | origin_model_metric = eval_model(model) 41 | origin_nparameters = obtain_num_parameters(model) 42 | 43 | CBL_idx, Conv_idx, prune_idx, _, _= parse_module_defs2(model.module_defs) 44 | 45 | 46 | 47 | bn_weights = gather_bn_weights(model.module_list, prune_idx) 48 | 49 | sorted_bn = torch.sort(bn_weights)[0] 50 | sorted_bn, sorted_index = torch.sort(bn_weights) 51 | thresh_index = int(len(bn_weights) * opt.global_percent) 52 | thresh = sorted_bn[thresh_index].cuda() 53 | 54 | print(f'Global Threshold should be less than {thresh:.4f}.') 55 | 56 | 57 | 58 | 59 | #%% 60 | def obtain_filters_mask(model, thre, CBL_idx, prune_idx): 61 | 62 | pruned = 0 63 | total = 0 64 | num_filters = [] 65 | filters_mask = [] 66 | for idx in CBL_idx: 67 | bn_module = model.module_list[idx][1] 68 | if idx in prune_idx: 69 | 70 | weight_copy = bn_module.weight.data.abs().clone() 71 | 72 | channels = weight_copy.shape[0] # 73 | min_channel_num = int(channels * opt.layer_keep) if int(channels * opt.layer_keep) > 0 else 1 74 | mask = weight_copy.gt(thresh).float() 75 | 76 | if int(torch.sum(mask)) < min_channel_num: 77 | _, sorted_index_weights = torch.sort(weight_copy,descending=True) 78 | mask[sorted_index_weights[:min_channel_num]]=1. 79 | remain = int(mask.sum()) 80 | pruned = pruned + mask.shape[0] - remain 81 | 82 | print(f'layer index: {idx:>3d} \t total channel: {mask.shape[0]:>4d} \t ' 83 | f'remaining channel: {remain:>4d}') 84 | else: 85 | mask = torch.ones(bn_module.weight.data.shape) 86 | remain = mask.shape[0] 87 | 88 | total += mask.shape[0] 89 | num_filters.append(remain) 90 | filters_mask.append(mask.clone()) 91 | 92 | prune_ratio = pruned / total 93 | print(f'Prune channels: {pruned}\tPrune ratio: {prune_ratio:.3f}') 94 | 95 | return num_filters, filters_mask 96 | 97 | num_filters, filters_mask = obtain_filters_mask(model, thresh, CBL_idx, prune_idx) 98 | CBLidx2mask = {idx: mask for idx, mask in zip(CBL_idx, filters_mask)} 99 | CBLidx2filters = {idx: filters for idx, filters in zip(CBL_idx, num_filters)} 100 | 101 | for i in model.module_defs: 102 | if i['type'] == 'shortcut': 103 | i['is_access'] = False 104 | 105 | print('merge the mask of layers connected to shortcut!') 106 | merge_mask(model, CBLidx2mask, CBLidx2filters) 107 | 108 | 109 | 110 | def prune_and_eval(model, CBL_idx, CBLidx2mask): 111 | model_copy = deepcopy(model) 112 | 113 | for idx in CBL_idx: 114 | bn_module = model_copy.module_list[idx][1] 115 | mask = CBLidx2mask[idx].cuda() 116 | bn_module.weight.data.mul_(mask) 117 | 118 | with torch.no_grad(): 119 | mAP = eval_model(model_copy)[0][2] 120 | 121 | print(f'mask the gamma as zero, mAP of the model is {mAP:.4f}') 122 | 123 | 124 | prune_and_eval(model, CBL_idx, CBLidx2mask) 125 | 126 | 127 | for i in CBLidx2mask: 128 | CBLidx2mask[i] = CBLidx2mask[i].clone().cpu().numpy() 129 | 130 | 131 | 132 | pruned_model = prune_model_keep_size2(model, prune_idx, CBL_idx, CBLidx2mask) 133 | print("\nnow prune the model but keep size,(actually add offset of BN beta to following layers), let's see how the mAP goes") 134 | 135 | with torch.no_grad(): 136 | eval_model(pruned_model) 137 | 138 | for i in model.module_defs: 139 | if i['type'] == 'shortcut': 140 | i.pop('is_access') 141 | 142 | compact_module_defs = deepcopy(model.module_defs) 143 | for idx in CBL_idx: 144 | assert compact_module_defs[idx]['type'] == 'convolutional' 145 | compact_module_defs[idx]['filters'] = str(CBLidx2filters[idx]) 146 | 147 | 148 | compact_model = Darknet([model.hyperparams.copy()] + compact_module_defs, (img_size, img_size)).to(device) 149 | compact_nparameters = obtain_num_parameters(compact_model) 150 | 151 | init_weights_from_loose_model(compact_model, pruned_model, CBL_idx, Conv_idx, CBLidx2mask) 152 | 153 | 154 | random_input = torch.rand((1, 3, img_size, img_size)).to(device) 155 | 156 | def obtain_avg_forward_time(input, model, repeat=200): 157 | 158 | model.eval() 159 | start = time.time() 160 | with torch.no_grad(): 161 | for i in range(repeat): 162 | output = model(input) 163 | avg_infer_time = (time.time() - start) / repeat 164 | 165 | return avg_infer_time, output 166 | 167 | print('testing inference time...') 168 | pruned_forward_time, pruned_output = obtain_avg_forward_time(random_input, pruned_model) 169 | compact_forward_time, compact_output = obtain_avg_forward_time(random_input, compact_model) 170 | 171 | 172 | print('testing the final model...') 173 | with torch.no_grad(): 174 | compact_model_metric = eval_model(compact_model) 175 | 176 | 177 | metric_table = [ 178 | ["Metric", "Before", "After"], 179 | ["mAP", f'{origin_model_metric[0][2]:.6f}', f'{compact_model_metric[0][2]:.6f}'], 180 | ["Parameters", f"{origin_nparameters}", f"{compact_nparameters}"], 181 | ["Inference", f'{pruned_forward_time:.4f}', f'{compact_forward_time:.4f}'] 182 | ] 183 | print(AsciiTable(metric_table).table) 184 | 185 | 186 | 187 | pruned_cfg_name = opt.cfg.replace('/', f'/prune_{opt.global_percent}_keep_{opt.layer_keep}_') 188 | pruned_cfg_file = write_cfg(pruned_cfg_name, [model.hyperparams.copy()] + compact_module_defs) 189 | print(f'Config file has been saved: {pruned_cfg_file}') 190 | 191 | compact_model_name = opt.weights.replace('/', f'/prune_{opt.global_percent}_keep_{opt.layer_keep}_') 192 | if compact_model_name.endswith('.pt'): 193 | compact_model_name = compact_model_name.replace('.pt', '.weights') 194 | save_weights(compact_model, path=compact_model_name) 195 | print(f'Compact model has been saved: {compact_model_name}') 196 | 197 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | 4 | from torch.utils.data import DataLoader 5 | 6 | from models import * 7 | from utils.datasets import * 8 | from utils.utils import * 9 | 10 | 11 | def test(cfg, 12 | data, 13 | weights=None, 14 | batch_size=16, 15 | img_size=416, 16 | iou_thres=0.5, 17 | conf_thres=0.001, 18 | nms_thres=0.5, 19 | save_json=False, 20 | model=None): 21 | 22 | # Initialize/load model and set device 23 | if model is None: 24 | device = torch_utils.select_device(opt.device) 25 | verbose = True 26 | 27 | # Initialize model 28 | model = Darknet(cfg, img_size).to(device) 29 | 30 | # Load weights 31 | attempt_download(weights) 32 | if weights.endswith('.pt'): # pytorch format 33 | model.load_state_dict(torch.load(weights, map_location=device)['model']) 34 | else: # darknet format 35 | _ = load_darknet_weights(model, weights) 36 | 37 | if torch.cuda.device_count() > 1: 38 | model = nn.DataParallel(model) 39 | else: 40 | device = next(model.parameters()).device # get model device 41 | verbose = False 42 | 43 | # Configure run 44 | data = parse_data_cfg(data) 45 | nc = int(data['classes']) # number of classes 46 | test_path = data['valid'] # path to test images 47 | names = load_classes(data['names']) # class names 48 | 49 | # Dataloader 50 | dataset = LoadImagesAndLabels(test_path, img_size, batch_size) 51 | dataloader = DataLoader(dataset, 52 | batch_size=batch_size, 53 | num_workers=min([os.cpu_count(), batch_size, 16]), 54 | pin_memory=True, 55 | collate_fn=dataset.collate_fn) 56 | 57 | seen = 0 58 | model.eval() 59 | coco91class = coco80_to_coco91_class() 60 | s = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', 'mAP', 'F1') 61 | p, r, f1, mp, mr, map, mf1 = 0., 0., 0., 0., 0., 0., 0. 62 | loss = torch.zeros(3) 63 | jdict, stats, ap, ap_class = [], [], [], [] 64 | for batch_i, (imgs, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)): 65 | targets = targets.to(device) 66 | imgs = imgs.to(device) 67 | _, _, height, width = imgs.shape # batch size, channels, height, width 68 | 69 | # Plot images with bounding boxes 70 | if batch_i == 0 and not os.path.exists('test_batch0.jpg'): 71 | plot_images(imgs=imgs, targets=targets, paths=paths, fname='test_batch0.jpg') 72 | 73 | # Run model 74 | inf_out, train_out = model(imgs) # inference and training outputs 75 | 76 | # Compute loss 77 | if hasattr(model, 'hyp'): # if model has loss hyperparameters 78 | loss += compute_loss(train_out, targets, model)[1][:3].cpu() # GIoU, obj, cls 79 | 80 | # Run NMS 81 | output = non_max_suppression(inf_out, conf_thres=conf_thres, nms_thres=nms_thres) 82 | 83 | # Statistics per image 84 | for si, pred in enumerate(output): 85 | labels = targets[targets[:, 0] == si, 1:] 86 | nl = len(labels) 87 | tcls = labels[:, 0].tolist() if nl else [] # target class 88 | seen += 1 89 | 90 | if pred is None: 91 | if nl: 92 | stats.append(([], torch.Tensor(), torch.Tensor(), tcls)) 93 | continue 94 | 95 | # Append to text file 96 | # with open('test.txt', 'a') as file: 97 | # [file.write('%11.5g' * 7 % tuple(x) + '\n') for x in pred] 98 | 99 | # Append to pycocotools JSON dictionary 100 | if save_json: 101 | # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ... 102 | image_id = int(Path(paths[si]).stem.split('_')[-1]) 103 | box = pred[:, :4].clone() # xyxy 104 | scale_coords(imgs[si].shape[1:], box, shapes[si]) # to original shape 105 | box = xyxy2xywh(box) # xywh 106 | box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner 107 | for di, d in enumerate(pred): 108 | jdict.append({'image_id': image_id, 109 | 'category_id': coco91class[int(d[6])], 110 | 'bbox': [floatn(x, 3) for x in box[di]], 111 | 'score': floatn(d[4], 5)}) 112 | 113 | # Clip boxes to image bounds 114 | clip_coords(pred, (height, width)) 115 | 116 | # Assign all predictions as incorrect 117 | correct = [0] * len(pred) 118 | if nl: 119 | detected = [] 120 | tcls_tensor = labels[:, 0] 121 | 122 | # target boxes 123 | tbox = xywh2xyxy(labels[:, 1:5]) 124 | tbox[:, [0, 2]] *= width 125 | tbox[:, [1, 3]] *= height 126 | 127 | # Search for correct predictions 128 | for i, (*pbox, pconf, pcls_conf, pcls) in enumerate(pred): 129 | 130 | # Break if all targets already located in image 131 | if len(detected) == nl: 132 | break 133 | 134 | # Continue if predicted class not among image classes 135 | if pcls.item() not in tcls: 136 | continue 137 | 138 | # Best iou, index between pred and targets 139 | m = (pcls == tcls_tensor).nonzero().view(-1) 140 | iou, bi = bbox_iou(pbox, tbox[m]).max(0) 141 | 142 | # If iou > threshold and class is correct mark as correct 143 | if iou > iou_thres and m[bi] not in detected: # and pcls == tcls[bi]: 144 | correct[i] = 1 145 | detected.append(m[bi]) 146 | 147 | # Append statistics (correct, conf, pcls, tcls) 148 | stats.append((correct, pred[:, 4].cpu(), pred[:, 6].cpu(), tcls)) 149 | 150 | # Compute statistics 151 | stats = [np.concatenate(x, 0) for x in list(zip(*stats))] # to numpy 152 | if len(stats): 153 | p, r, ap, f1, ap_class = ap_per_class(*stats) 154 | mp, mr, map, mf1 = p.mean(), r.mean(), ap.mean(), f1.mean() 155 | nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class 156 | else: 157 | nt = torch.zeros(1) 158 | 159 | # Print results 160 | pf = '%20s' + '%10.3g' * 6 # print format 161 | print(pf % ('all', seen, nt.sum(), mp, mr, map, mf1)) 162 | 163 | # Print results per class 164 | if verbose and nc > 1 and len(stats): 165 | for i, c in enumerate(ap_class): 166 | print(pf % (names[c], seen, nt[c], p[i], r[i], ap[i], f1[i])) 167 | 168 | # Save JSON 169 | if save_json and map and len(jdict): 170 | try: 171 | imgIds = [int(Path(x).stem.split('_')[-1]) for x in dataset.img_files] 172 | with open('results.json', 'w') as file: 173 | json.dump(jdict, file) 174 | 175 | from pycocotools.coco import COCO 176 | from pycocotools.cocoeval import COCOeval 177 | 178 | # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb 179 | cocoGt = COCO('../coco/annotations/instances_val2014.json') # initialize COCO ground truth api 180 | cocoDt = cocoGt.loadRes('results.json') # initialize COCO pred api 181 | 182 | cocoEval = COCOeval(cocoGt, cocoDt, 'bbox') 183 | cocoEval.params.imgIds = imgIds # [:32] # only evaluate these images 184 | cocoEval.evaluate() 185 | cocoEval.accumulate() 186 | cocoEval.summarize() 187 | map = cocoEval.stats[1] # update mAP to pycocotools mAP 188 | except: 189 | print('WARNING: missing dependency pycocotools from requirements.txt. Can not compute official COCO mAP.') 190 | 191 | # Return results 192 | maps = np.zeros(nc) + map 193 | for i, c in enumerate(ap_class): 194 | maps[c] = ap[i] 195 | return (mp, mr, map, mf1, *(loss / len(dataloader)).tolist()), maps 196 | 197 | 198 | if __name__ == '__main__': 199 | parser = argparse.ArgumentParser(prog='test.py') 200 | parser.add_argument('--cfg', type=str, default='cfg/yolov3-spp.cfg', help='cfg file path') 201 | parser.add_argument('--data', type=str, default='data/coco.data', help='coco.data file path') 202 | parser.add_argument('--weights', type=str, default='weights/yolov3-spp.weights', help='path to weights file') 203 | parser.add_argument('--batch-size', type=int, default=16, help='size of each image batch') 204 | parser.add_argument('--img-size', type=int, default=416, help='inference size (pixels)') 205 | parser.add_argument('--iou-thres', type=float, default=0.5, help='iou threshold required to qualify as detected') 206 | parser.add_argument('--conf-thres', type=float, default=0.001, help='object confidence threshold') 207 | parser.add_argument('--nms-thres', type=float, default=0.5, help='iou threshold for non-maximum suppression') 208 | parser.add_argument('--save-json', action='store_true', help='save a cocoapi-compatible JSON results file') 209 | parser.add_argument('--device', default='', help='device id (i.e. 0 or 0,1) or cpu') 210 | opt = parser.parse_args() 211 | print(opt) 212 | 213 | with torch.no_grad(): 214 | test(opt.cfg, 215 | opt.data, 216 | opt.weights, 217 | opt.batch_size, 218 | opt.img_size, 219 | opt.iou_thres, 220 | opt.conf_thres, 221 | opt.nms_thres, 222 | opt.save_json) 223 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | # -------------------------------------------------------------------------------- /utils/adabound.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | from torch.optim import Optimizer 5 | 6 | 7 | class AdaBound(Optimizer): 8 | """Implements AdaBound algorithm. 9 | It has been proposed in `Adaptive Gradient Methods with Dynamic Bound of Learning Rate`_. 10 | Arguments: 11 | params (iterable): iterable of parameters to optimize or dicts defining 12 | parameter groups 13 | lr (float, optional): Adam learning rate (default: 1e-3) 14 | betas (Tuple[float, float], optional): coefficients used for computing 15 | running averages of gradient and its square (default: (0.9, 0.999)) 16 | final_lr (float, optional): final (SGD) learning rate (default: 0.1) 17 | gamma (float, optional): convergence speed of the bound functions (default: 1e-3) 18 | eps (float, optional): term added to the denominator to improve 19 | numerical stability (default: 1e-8) 20 | weight_decay (float, optional): weight decay (L2 penalty) (default: 0) 21 | amsbound (boolean, optional): whether to use the AMSBound variant of this algorithm 22 | .. Adaptive Gradient Methods with Dynamic Bound of Learning Rate: 23 | https://openreview.net/forum?id=Bkg3g2R9FX 24 | """ 25 | 26 | def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), final_lr=0.1, gamma=1e-3, 27 | eps=1e-8, weight_decay=0, amsbound=False): 28 | if not 0.0 <= lr: 29 | raise ValueError("Invalid learning rate: {}".format(lr)) 30 | if not 0.0 <= eps: 31 | raise ValueError("Invalid epsilon value: {}".format(eps)) 32 | if not 0.0 <= betas[0] < 1.0: 33 | raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0])) 34 | if not 0.0 <= betas[1] < 1.0: 35 | raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1])) 36 | if not 0.0 <= final_lr: 37 | raise ValueError("Invalid final learning rate: {}".format(final_lr)) 38 | if not 0.0 <= gamma < 1.0: 39 | raise ValueError("Invalid gamma parameter: {}".format(gamma)) 40 | defaults = dict(lr=lr, betas=betas, final_lr=final_lr, gamma=gamma, eps=eps, 41 | weight_decay=weight_decay, amsbound=amsbound) 42 | super(AdaBound, self).__init__(params, defaults) 43 | 44 | self.base_lrs = list(map(lambda group: group['lr'], self.param_groups)) 45 | 46 | def __setstate__(self, state): 47 | super(AdaBound, self).__setstate__(state) 48 | for group in self.param_groups: 49 | group.setdefault('amsbound', False) 50 | 51 | def step(self, closure=None): 52 | """Performs a single optimization step. 53 | Arguments: 54 | closure (callable, optional): A closure that reevaluates the model 55 | and returns the loss. 56 | """ 57 | loss = None 58 | if closure is not None: 59 | loss = closure() 60 | 61 | for group, base_lr in zip(self.param_groups, self.base_lrs): 62 | for p in group['params']: 63 | if p.grad is None: 64 | continue 65 | grad = p.grad.data 66 | if grad.is_sparse: 67 | raise RuntimeError( 68 | 'Adam does not support sparse gradients, please consider SparseAdam instead') 69 | amsbound = group['amsbound'] 70 | 71 | state = self.state[p] 72 | 73 | # State initialization 74 | if len(state) == 0: 75 | state['step'] = 0 76 | # Exponential moving average of gradient values 77 | state['exp_avg'] = torch.zeros_like(p.data) 78 | # Exponential moving average of squared gradient values 79 | state['exp_avg_sq'] = torch.zeros_like(p.data) 80 | if amsbound: 81 | # Maintains max of all exp. moving avg. of sq. grad. values 82 | state['max_exp_avg_sq'] = torch.zeros_like(p.data) 83 | 84 | exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq'] 85 | if amsbound: 86 | max_exp_avg_sq = state['max_exp_avg_sq'] 87 | beta1, beta2 = group['betas'] 88 | 89 | state['step'] += 1 90 | 91 | if group['weight_decay'] != 0: 92 | grad = grad.add(group['weight_decay'], p.data) 93 | 94 | # Decay the first and second moment running average coefficient 95 | exp_avg.mul_(beta1).add_(1 - beta1, grad) 96 | exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad) 97 | if amsbound: 98 | # Maintains the maximum of all 2nd moment running avg. till now 99 | torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq) 100 | # Use the max. for normalizing running avg. of gradient 101 | denom = max_exp_avg_sq.sqrt().add_(group['eps']) 102 | else: 103 | denom = exp_avg_sq.sqrt().add_(group['eps']) 104 | 105 | bias_correction1 = 1 - beta1 ** state['step'] 106 | bias_correction2 = 1 - beta2 ** state['step'] 107 | step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1 108 | 109 | # Applies bounds on actual learning rate 110 | # lr_scheduler cannot affect final_lr, this is a workaround to apply lr decay 111 | final_lr = group['final_lr'] * group['lr'] / base_lr 112 | lower_bound = final_lr * (1 - 1 / (group['gamma'] * state['step'] + 1)) 113 | upper_bound = final_lr * (1 + 1 / (group['gamma'] * state['step'])) 114 | step_size = torch.full_like(denom, step_size) 115 | step_size.div_(denom).clamp_(lower_bound, upper_bound).mul_(exp_avg) 116 | 117 | p.data.add_(-step_size) 118 | 119 | return loss 120 | 121 | 122 | class AdaBoundW(Optimizer): 123 | """Implements AdaBound algorithm with Decoupled Weight Decay (arxiv.org/abs/1711.05101) 124 | It has been proposed in `Adaptive Gradient Methods with Dynamic Bound of Learning Rate`_. 125 | Arguments: 126 | params (iterable): iterable of parameters to optimize or dicts defining 127 | parameter groups 128 | lr (float, optional): Adam learning rate (default: 1e-3) 129 | betas (Tuple[float, float], optional): coefficients used for computing 130 | running averages of gradient and its square (default: (0.9, 0.999)) 131 | final_lr (float, optional): final (SGD) learning rate (default: 0.1) 132 | gamma (float, optional): convergence speed of the bound functions (default: 1e-3) 133 | eps (float, optional): term added to the denominator to improve 134 | numerical stability (default: 1e-8) 135 | weight_decay (float, optional): weight decay (L2 penalty) (default: 0) 136 | amsbound (boolean, optional): whether to use the AMSBound variant of this algorithm 137 | .. Adaptive Gradient Methods with Dynamic Bound of Learning Rate: 138 | https://openreview.net/forum?id=Bkg3g2R9FX 139 | """ 140 | 141 | def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), final_lr=0.1, gamma=1e-3, 142 | eps=1e-8, weight_decay=0, amsbound=False): 143 | if not 0.0 <= lr: 144 | raise ValueError("Invalid learning rate: {}".format(lr)) 145 | if not 0.0 <= eps: 146 | raise ValueError("Invalid epsilon value: {}".format(eps)) 147 | if not 0.0 <= betas[0] < 1.0: 148 | raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0])) 149 | if not 0.0 <= betas[1] < 1.0: 150 | raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1])) 151 | if not 0.0 <= final_lr: 152 | raise ValueError("Invalid final learning rate: {}".format(final_lr)) 153 | if not 0.0 <= gamma < 1.0: 154 | raise ValueError("Invalid gamma parameter: {}".format(gamma)) 155 | defaults = dict(lr=lr, betas=betas, final_lr=final_lr, gamma=gamma, eps=eps, 156 | weight_decay=weight_decay, amsbound=amsbound) 157 | super(AdaBoundW, self).__init__(params, defaults) 158 | 159 | self.base_lrs = list(map(lambda group: group['lr'], self.param_groups)) 160 | 161 | def __setstate__(self, state): 162 | super(AdaBoundW, self).__setstate__(state) 163 | for group in self.param_groups: 164 | group.setdefault('amsbound', False) 165 | 166 | def step(self, closure=None): 167 | """Performs a single optimization step. 168 | Arguments: 169 | closure (callable, optional): A closure that reevaluates the model 170 | and returns the loss. 171 | """ 172 | loss = None 173 | if closure is not None: 174 | loss = closure() 175 | 176 | for group, base_lr in zip(self.param_groups, self.base_lrs): 177 | for p in group['params']: 178 | if p.grad is None: 179 | continue 180 | grad = p.grad.data 181 | if grad.is_sparse: 182 | raise RuntimeError( 183 | 'Adam does not support sparse gradients, please consider SparseAdam instead') 184 | amsbound = group['amsbound'] 185 | 186 | state = self.state[p] 187 | 188 | # State initialization 189 | if len(state) == 0: 190 | state['step'] = 0 191 | # Exponential moving average of gradient values 192 | state['exp_avg'] = torch.zeros_like(p.data) 193 | # Exponential moving average of squared gradient values 194 | state['exp_avg_sq'] = torch.zeros_like(p.data) 195 | if amsbound: 196 | # Maintains max of all exp. moving avg. of sq. grad. values 197 | state['max_exp_avg_sq'] = torch.zeros_like(p.data) 198 | 199 | exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq'] 200 | if amsbound: 201 | max_exp_avg_sq = state['max_exp_avg_sq'] 202 | beta1, beta2 = group['betas'] 203 | 204 | state['step'] += 1 205 | 206 | # Decay the first and second moment running average coefficient 207 | exp_avg.mul_(beta1).add_(1 - beta1, grad) 208 | exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad) 209 | if amsbound: 210 | # Maintains the maximum of all 2nd moment running avg. till now 211 | torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq) 212 | # Use the max. for normalizing running avg. of gradient 213 | denom = max_exp_avg_sq.sqrt().add_(group['eps']) 214 | else: 215 | denom = exp_avg_sq.sqrt().add_(group['eps']) 216 | 217 | bias_correction1 = 1 - beta1 ** state['step'] 218 | bias_correction2 = 1 - beta2 ** state['step'] 219 | step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1 220 | 221 | # Applies bounds on actual learning rate 222 | # lr_scheduler cannot affect final_lr, this is a workaround to apply lr decay 223 | final_lr = group['final_lr'] * group['lr'] / base_lr 224 | lower_bound = final_lr * (1 - 1 / (group['gamma'] * state['step'] + 1)) 225 | upper_bound = final_lr * (1 + 1 / (group['gamma'] * state['step'])) 226 | step_size = torch.full_like(denom, step_size) 227 | step_size.div_(denom).clamp_(lower_bound, upper_bound).mul_(exp_avg) 228 | 229 | if group['weight_decay'] != 0: 230 | decayed_weights = torch.mul(p.data, group['weight_decay']) 231 | p.data.add_(-step_size) 232 | p.data.sub_(decayed_weights) 233 | else: 234 | p.data.add_(-step_size) 235 | 236 | return loss 237 | -------------------------------------------------------------------------------- /utils/gcp.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # New VM 4 | rm -rf sample_data yolov3 darknet apex coco cocoapi knife knifec 5 | git clone https://github.com/ultralytics/yolov3 6 | # git clone https://github.com/AlexeyAB/darknet && cd darknet && make GPU=1 CUDNN=1 CUDNN_HALF=1 OPENCV=0 && wget -c https://pjreddie.com/media/files/darknet53.conv.74 && cd .. 7 | git clone https://github.com/NVIDIA/apex && cd apex && pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" . --user && cd .. && rm -rf apex 8 | # git clone https://github.com/cocodataset/cocoapi && cd cocoapi/PythonAPI && make && cd ../.. && cp -r cocoapi/PythonAPI/pycocotools yolov3 9 | sudo conda install -y -c conda-forge scikit-image tensorboard pycocotools 10 | python3 -c " 11 | from yolov3.utils.google_utils import gdrive_download 12 | gdrive_download('1HaXkef9z6y5l4vUnCYgdmEAj61c6bfWO','coco.zip')" 13 | sudo shutdown 14 | 15 | # Re-clone 16 | rm -rf yolov3 # Warning: remove existing 17 | git clone https://github.com/ultralytics/yolov3 && cd yolov3 # master 18 | # git clone -b test --depth 1 https://github.com/ultralytics/yolov3 test # branch 19 | python3 train.py --img-size 320 --weights weights/darknet53.conv.74 --epochs 27 --batch-size 64 --accumulate 1 20 | 21 | # Train 22 | python3 train.py 23 | 24 | # Resume 25 | python3 train.py --resume 26 | 27 | # Detect 28 | python3 detect.py 29 | 30 | # Test 31 | python3 test.py --save-json 32 | 33 | # Evolve 34 | for i in {0..500} 35 | do 36 | python3 train.py --data data/coco.data --img-size 320 --epochs 1 --batch-size 64 --accumulate 1 --evolve --bucket yolov4 37 | done 38 | 39 | # Git pull 40 | git pull https://github.com/ultralytics/yolov3 # master 41 | git pull https://github.com/ultralytics/yolov3 test # branch 42 | 43 | # Test Darknet training 44 | python3 test.py --weights ../darknet/backup/yolov3.backup 45 | 46 | # Copy last.pt TO bucket 47 | gsutil cp yolov3/weights/last1gpu.pt gs://ultralytics 48 | 49 | # Copy last.pt FROM bucket 50 | gsutil cp gs://ultralytics/last.pt yolov3/weights/last.pt 51 | wget https://storage.googleapis.com/ultralytics/yolov3/last_v1_0.pt -O weights/last_v1_0.pt 52 | wget https://storage.googleapis.com/ultralytics/yolov3/best_v1_0.pt -O weights/best_v1_0.pt 53 | 54 | # Reproduce tutorials 55 | rm results*.txt # WARNING: removes existing results 56 | python3 train.py --nosave --data data/coco_1img.data && mv results.txt results0r_1img.txt 57 | python3 train.py --nosave --data data/coco_10img.data && mv results.txt results0r_10img.txt 58 | python3 train.py --nosave --data data/coco_100img.data && mv results.txt results0r_100img.txt 59 | # python3 train.py --nosave --data data/coco_100img.data --transfer && mv results.txt results3_100imgTL.txt 60 | python3 -c "from utils import utils; utils.plot_results()" 61 | # gsutil cp results*.txt gs://ultralytics 62 | gsutil cp results.png gs://ultralytics 63 | sudo shutdown 64 | 65 | # Reproduce mAP 66 | python3 test.py --save-json --img-size 608 67 | python3 test.py --save-json --img-size 416 68 | python3 test.py --save-json --img-size 320 69 | sudo shutdown 70 | 71 | # Benchmark script 72 | git clone https://github.com/ultralytics/yolov3 # clone our repo 73 | git clone https://github.com/NVIDIA/apex && cd apex && pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" . --user && cd .. && rm -rf apex # install nvidia apex 74 | python3 -c "from yolov3.utils.google_utils import gdrive_download; gdrive_download('1HaXkef9z6y5l4vUnCYgdmEAj61c6bfWO','coco.zip')" # download coco dataset (20GB) 75 | cd yolov3 && clear && python3 train.py --epochs 1 # run benchmark (~30 min) 76 | 77 | # Unit tests 78 | python3 detect.py # detect 2 persons, 1 tie 79 | python3 test.py --data data/coco_32img.data # test mAP = 0.8 80 | python3 train.py --data data/coco_32img.data --epochs 5 --nosave # train 5 epochs 81 | python3 train.py --data data/coco_1cls.data --epochs 5 --nosave # train 5 epochs 82 | python3 train.py --data data/coco_1img.data --epochs 5 --nosave # train 5 epochs 83 | 84 | # AlexyAB Darknet 85 | gsutil cp -r gs://sm6/supermarket2 . # dataset from bucket 86 | rm -rf darknet && git clone https://github.com/AlexeyAB/darknet && cd darknet && wget -c https://pjreddie.com/media/files/darknet53.conv.74 # sudo apt install libopencv-dev && make 87 | ./darknet detector calc_anchors data/coco_img64.data -num_of_clusters 9 -width 320 -height 320 # kmeans anchor calculation 88 | ./darknet detector train ../supermarket2/supermarket2.data ../yolo_v3_spp_pan_scale.cfg darknet53.conv.74 -map -dont_show # train spp 89 | ./darknet detector train ../yolov3/data/coco.data ../yolov3-spp.cfg darknet53.conv.74 -map -dont_show # train spp coco 90 | 91 | ./darknet detector train data/coco.data ../yolov3-spp.cfg darknet53.conv.74 -map -dont_show # train spp 92 | gsutil cp -r backup/*5000.weights gs://sm6/weights 93 | sudo shutdown 94 | 95 | 96 | ./darknet detector train ../supermarket2/supermarket2.data ../yolov3-tiny-sm2-1cls.cfg yolov3-tiny.conv.15 -map -dont_show # train tiny 97 | ./darknet detector train ../supermarket2/supermarket2.data cfg/yolov3-spp-sm2-1cls.cfg backup/yolov3-spp-sm2-1cls_last.weights # resume 98 | python3 train.py --data ../supermarket2/supermarket2.data --cfg ../yolov3-spp-sm2-1cls.cfg --epochs 100 --num-workers 8 --img-size 320 --nosave # train ultralytics 99 | python3 test.py --data ../supermarket2/supermarket2.data --weights ../darknet/backup/yolov3-spp-sm2-1cls_5000.weights --cfg cfg/yolov3-spp-sm2-1cls.cfg # test 100 | gsutil cp -r backup/*.weights gs://sm6/weights # weights to bucket 101 | 102 | python3 test.py --data ../supermarket2/supermarket2.data --weights weights/yolov3-spp-sm2-1cls_5000.weights --cfg ../yolov3-spp-sm2-1cls.cfg --img-size 320 --conf-thres 0.2 # test 103 | python3 test.py --data ../supermarket2/supermarket2.data --weights weights/yolov3-spp-sm2-1cls-scalexy_125_5000.weights --cfg ../yolov3-spp-sm2-1cls-scalexy_125.cfg --img-size 320 --conf-thres 0.2 # test 104 | python3 test.py --data ../supermarket2/supermarket2.data --weights weights/yolov3-spp-sm2-1cls-scalexy_150_5000.weights --cfg ../yolov3-spp-sm2-1cls-scalexy_150.cfg --img-size 320 --conf-thres 0.2 # test 105 | python3 test.py --data ../supermarket2/supermarket2.data --weights weights/yolov3-spp-sm2-1cls-scalexy_200_5000.weights --cfg ../yolov3-spp-sm2-1cls-scalexy_200.cfg --img-size 320 --conf-thres 0.2 # test 106 | python3 test.py --data ../supermarket2/supermarket2.data --weights ../darknet/backup/yolov3-spp-sm2-1cls-scalexy_variable_5000.weights --cfg ../yolov3-spp-sm2-1cls-scalexy_variable.cfg --img-size 320 --conf-thres 0.2 # test 107 | 108 | python3 train.py --img-size 320 --epochs 27 --batch-size 64 --accumulate 1 --nosave --notest && python3 test.py --weights weights/last.pt --img-size 320 --save-json && sudo shutdown 109 | 110 | # Debug/Development 111 | python3 train.py --data data/coco.data --img-size 320 --single-scale --batch-size 64 --accumulate 1 --epochs 1 --evolve --giou 112 | python3 test.py --weights weights/last.pt --cfg cfg/yolov3-spp.cfg --img-size 320 113 | 114 | gsutil cp evolve.txt gs://ultralytics 115 | sudo shutdown 116 | 117 | #Docker 118 | sudo docker kill $(sudo docker ps -q) 119 | sudo docker pull ultralytics/yolov3:v1 120 | sudo nvidia-docker run -it --ipc=host --mount type=bind,source="$(pwd)"/coco,target=/usr/src/coco ultralytics/yolov3:v1 121 | 122 | clear 123 | while true 124 | do 125 | python3 train.py --data data/coco.data --img-size 320 --batch-size 64 --accumulate 1 --evolve --epochs 1 --adam --bucket yolov4/adamdefaultpw_coco_1e --device 1 126 | done 127 | 128 | python3 train.py --data data/coco.data --img-size 320 --batch-size 64 --accumulate 1 --epochs 1 --adam --device 1 --prebias 129 | while true; do python3 train.py --data data/coco.data --img-size 320 --batch-size 64 --accumulate 1 --evolve --epochs 1 --adam --bucket yolov4/adamdefaultpw_coco_1e; done 130 | -------------------------------------------------------------------------------- /utils/google_utils.py: -------------------------------------------------------------------------------- 1 | # This file contains google utils: https://cloud.google.com/storage/docs/reference/libraries 2 | # pip install --upgrade google-cloud-storage 3 | 4 | import os 5 | import time 6 | 7 | 8 | # from google.cloud import storage 9 | 10 | 11 | def gdrive_download(id='1HaXkef9z6y5l4vUnCYgdmEAj61c6bfWO', name='coco.zip'): 12 | # https://gist.github.com/tanaikech/f0f2d122e05bf5f971611258c22c110f 13 | # Downloads a file from Google Drive, accepting presented query 14 | # from utils.google_utils import *; gdrive_download() 15 | t = time.time() 16 | 17 | print('Downloading https://drive.google.com/uc?export=download&id=%s as %s... ' % (id, name), end='') 18 | if os.path.exists(name): # remove existing 19 | os.remove(name) 20 | 21 | # Attempt large file download 22 | s = ["curl -c ./cookie -s -L \"https://drive.google.com/uc?export=download&id=%s\" > /dev/null" % id, 23 | "curl -Lb ./cookie -s \"https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=%s\" -o %s" % ( 24 | id, name), 25 | 'rm ./cookie'] 26 | [os.system(x) for x in s] # run commands 27 | 28 | # Attempt small file download 29 | if not os.path.exists(name): # file size < 40MB 30 | s = 'curl -f -L -o %s https://drive.google.com/uc?export=download&id=%s' % (name, id) 31 | os.system(s) 32 | 33 | # Unzip if archive 34 | if name.endswith('.zip'): 35 | print('unzipping... ', end='') 36 | os.system('unzip -q %s' % name) # unzip 37 | os.remove(name) # remove zip to free space 38 | 39 | print('Done (%.1fs)' % (time.time() - t)) 40 | 41 | 42 | def upload_blob(bucket_name, source_file_name, destination_blob_name): 43 | # Uploads a file to a bucket 44 | # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python 45 | 46 | storage_client = storage.Client() 47 | bucket = storage_client.get_bucket(bucket_name) 48 | blob = bucket.blob(destination_blob_name) 49 | 50 | blob.upload_from_filename(source_file_name) 51 | 52 | print('File {} uploaded to {}.'.format( 53 | source_file_name, 54 | destination_blob_name)) 55 | 56 | 57 | def download_blob(bucket_name, source_blob_name, destination_file_name): 58 | # Uploads a blob from a bucket 59 | storage_client = storage.Client() 60 | bucket = storage_client.get_bucket(bucket_name) 61 | blob = bucket.blob(source_blob_name) 62 | 63 | blob.download_to_filename(destination_file_name) 64 | 65 | print('Blob {} downloaded to {}.'.format( 66 | source_blob_name, 67 | destination_file_name)) 68 | -------------------------------------------------------------------------------- /utils/parse_config.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | def parse_model_cfg(path): 5 | # Parses the yolo-v3 layer configuration file and returns module definitions 6 | file = open(path, 'r') 7 | lines = file.read().split('\n') 8 | lines = [x for x in lines if x and not x.startswith('#')] 9 | lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces 10 | mdefs = [] # module definitions 11 | for line in lines: 12 | if line.startswith('['): # This marks the start of a new block 13 | mdefs.append({}) 14 | mdefs[-1]['type'] = line[1:-1].rstrip() 15 | if mdefs[-1]['type'] == 'convolutional': 16 | mdefs[-1]['batch_normalize'] = 0 # pre-populate with zeros (may be overwritten later) 17 | else: 18 | key, val = line.split("=") 19 | key = key.rstrip() 20 | 21 | if 'anchors' in key: 22 | mdefs[-1][key] = np.array([float(x) for x in val.split(',')]).reshape((-1, 2)) # np anchors 23 | else: 24 | mdefs[-1][key] = val.strip() 25 | 26 | return mdefs 27 | 28 | 29 | def parse_data_cfg(path): 30 | # Parses the data configuration file 31 | options = dict() 32 | with open(path, 'r') as fp: 33 | lines = fp.readlines() 34 | 35 | for line in lines: 36 | line = line.strip() 37 | if line == '' or line.startswith('#'): 38 | continue 39 | key, val = line.split('=') 40 | options[key.strip()] = val.strip() 41 | 42 | return options 43 | 44 | -------------------------------------------------------------------------------- /utils/torch_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import torch 4 | 5 | 6 | def init_seeds(seed=0): 7 | torch.manual_seed(seed) 8 | torch.cuda.manual_seed(seed) 9 | torch.cuda.manual_seed_all(seed) 10 | 11 | # Remove randomness (may be slower on Tesla GPUs) # https://pytorch.org/docs/stable/notes/randomness.html 12 | if seed == 0: 13 | torch.backends.cudnn.deterministic = True 14 | torch.backends.cudnn.benchmark = False 15 | 16 | 17 | def select_device(device='', apex=False): 18 | # device = 'cpu' or '0' or '0,1,2,3' 19 | cpu_request = device.lower() == 'cpu' 20 | if device and not cpu_request: # if device requested other than 'cpu' 21 | os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable 22 | assert torch.cuda.is_available(), 'CUDA unavailable, invalid device %s requested' % device # check availablity 23 | 24 | cuda = False if cpu_request else torch.cuda.is_available() 25 | if cuda: 26 | c = 1024 ** 2 # bytes to MB 27 | ng = torch.cuda.device_count() 28 | x = [torch.cuda.get_device_properties(i) for i in range(ng)] 29 | cuda_str = 'Using CUDA ' + ('Apex ' if apex else '') # apex for mixed precision https://github.com/NVIDIA/apex 30 | for i in range(0, ng): 31 | if i == 1: 32 | cuda_str = ' ' * len(cuda_str) 33 | print("%sdevice%g _CudaDeviceProperties(name='%s', total_memory=%dMB)" % 34 | (cuda_str, i, x[i].name, x[i].total_memory / c)) 35 | else: 36 | print('Using CPU') 37 | 38 | print('') # skip a line 39 | return torch.device('cuda:0' if cuda else 'cpu') 40 | 41 | 42 | def fuse_conv_and_bn(conv, bn): 43 | # https://tehnokv.com/posts/fusing-batchnorm-and-conv/ 44 | with torch.no_grad(): 45 | # init 46 | fusedconv = torch.nn.Conv2d(conv.in_channels, 47 | conv.out_channels, 48 | kernel_size=conv.kernel_size, 49 | stride=conv.stride, 50 | padding=conv.padding, 51 | bias=True) 52 | 53 | # prepare filters 54 | w_conv = conv.weight.clone().view(conv.out_channels, -1) 55 | w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var))) 56 | fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size())) 57 | 58 | # prepare spatial bias 59 | if conv.bias is not None: 60 | b_conv = conv.bias 61 | else: 62 | b_conv = torch.zeros(conv.weight.size(0)) 63 | b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps)) 64 | fusedconv.bias.copy_(b_conv + b_bn) 65 | 66 | return fusedconv 67 | 68 | 69 | def model_info(model, report='summary'): 70 | # Plots a line-by-line description of a PyTorch model 71 | n_p = sum(x.numel() for x in model.parameters()) # number parameters 72 | n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients 73 | if report is 'full': 74 | print('%5s %40s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma')) 75 | for i, (name, p) in enumerate(model.named_parameters()): 76 | name = name.replace('module_list.', '') 77 | print('%5g %40s %9s %12g %20s %10.3g %10.3g' % 78 | (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std())) 79 | print('Model Summary: %g layers, %g parameters, %g gradients' % (len(list(model.parameters())), n_p, n_g)) 80 | -------------------------------------------------------------------------------- /weights/download_yolov3_weights.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # make '/weights' directory if it does not exist and cd into it 4 | mkdir -p weights && cd weights 5 | 6 | # copy darknet weight files, continue '-c' if partially downloaded 7 | wget -c https://pjreddie.com/media/files/yolov3.weights 8 | wget -c https://pjreddie.com/media/files/yolov3-tiny.weights 9 | wget -c https://pjreddie.com/media/files/yolov3-spp.weights 10 | 11 | # yolov3 pytorch weights 12 | # download from Google Drive: https://drive.google.com/drive/folders/1uxgUBemJVw9wZsdpboYbzUN4bcRhsuAI 13 | 14 | # darknet53 weights (first 75 layers only) 15 | wget -c https://pjreddie.com/media/files/darknet53.conv.74 16 | 17 | # yolov3-tiny weights from darknet (first 16 layers only) 18 | # ./darknet partial cfg/yolov3-tiny.cfg yolov3-tiny.weights yolov3-tiny.conv.15 15 19 | # mv yolov3-tiny.conv.15 ../ 20 | 21 | --------------------------------------------------------------------------------