├── Dockerfile
├── LICENSE
├── README.md
├── cfg
├── yolov3-1cls.cfg
├── yolov3-hand.cfg
├── yolov3-spp-1cls.cfg
├── yolov3-spp-hand.cfg
├── yolov3-spp-pan-scale.cfg
├── yolov3-spp.cfg
├── yolov3-tiny-1cls.cfg
├── yolov3-tiny.cfg
├── yolov3.cfg
├── yolov3s-18a320.cfg
├── yolov3s-30a320.cfg
├── yolov3s-3a320.cfg
├── yolov3s-9a320.cfg
├── yolov4-tiny.cfg
└── yolov4.cfg
├── data
├── 5k.shapes
├── 5k.txt
├── coco.data
├── coco.names
├── coco_1000img.data
├── coco_1000img.txt
├── coco_1000val.data
├── coco_1000val.txt
├── coco_16img.data
├── coco_16img.txt
├── coco_1cls.data
├── coco_1cls.txt
├── coco_1img.data
├── coco_1img.txt
├── coco_1k5k.data
├── coco_32img.data
├── coco_32img.txt
├── coco_500img.txt
├── coco_500val.data
├── coco_500val.txt
├── coco_64img.data
├── coco_64img.shapes
├── coco_64img.txt
├── coco_paper.names
├── converter.py
├── get_coco_dataset.sh
├── get_coco_dataset_gdrive.sh
├── hand.data
├── img
│ ├── 1.jpg
│ ├── 2.jpg
│ ├── baseline_and_sparse.jpg
│ ├── bn.jpg
│ ├── finetune_and_bn.jpg
│ └── prune9316.png
├── oxfordhand.data
├── oxfordhand.names
├── samples
│ ├── bus.jpg
│ └── zidane.jpg
├── trainvalno5k.shapes
└── valid_.shapes
├── detect.py
├── layer_channel_prune.py
├── layer_prune.py
├── models.py
├── prune.py
├── requirements.txt
├── shortcut_prune.py
├── slim_prune.py
├── test.py
├── train.py
├── utils
├── __init__.py
├── adabound.py
├── datasets.py
├── gcp.sh
├── google_utils.py
├── parse_config.py
├── prune_utils.py
├── torch_utils.py
└── utils.py
└── weights
└── download_yolov3_weights.sh
/Dockerfile:
--------------------------------------------------------------------------------
1 | # Start from Nvidia PyTorch image https://ngc.nvidia.com/catalog/containers/nvidia:pytorch
2 | FROM nvcr.io/nvidia/pytorch:19.08-py3
3 |
4 | # Install dependencies (pip or conda)
5 | RUN pip install -U gsutil
6 | # RUN pip install -U -r requirements.txt
7 | # RUN conda update -n base -c defaults conda
8 | # RUN conda install -y -c anaconda future numpy opencv matplotlib tqdm pillow
9 | # RUN conda install -y -c conda-forge scikit-image tensorboard pycocotools
10 |
11 | ## Install OpenCV with Gstreamer support
12 | #WORKDIR /usr/src
13 | #RUN pip uninstall -y opencv-python
14 | #RUN apt-get update
15 | #RUN apt-get install -y gstreamer1.0-tools gstreamer1.0-python3-dbg-plugin-loader libgstreamer1.0-dev libgstreamer-plugins-base1.0-dev
16 | #RUN git clone https://github.com/opencv/opencv.git && cd opencv && git checkout 4.1.1 && mkdir build
17 | #RUN git clone https://github.com/opencv/opencv_contrib.git && cd opencv_contrib && git checkout 4.1.1
18 | #RUN cd opencv/build && cmake ../ \
19 | # -D OPENCV_EXTRA_MODULES_PATH=../../opencv_contrib/modules \
20 | # -D BUILD_OPENCV_PYTHON3=ON \
21 | # -D PYTHON3_EXECUTABLE=/opt/conda/bin/python \
22 | # -D PYTHON3_INCLUDE_PATH=/opt/conda/include/python3.6m \
23 | # -D PYTHON3_LIBRARIES=/opt/conda/lib/python3.6/site-packages \
24 | # -D WITH_GSTREAMER=ON \
25 | # -D WITH_FFMPEG=OFF \
26 | # && make && make install && ldconfig
27 | #RUN cd /usr/local/lib/python3.6/site-packages/cv2/python-3.6/ && mv cv2.cpython-36m-x86_64-linux-gnu.so cv2.so
28 | #RUN cd /opt/conda/lib/python3.6/site-packages/ && ln -s /usr/local/lib/python3.6/site-packages/cv2/python-3.6/cv2.so cv2.so
29 | #RUN python3 -c "import cv2; print(cv2.getBuildInformation())"
30 |
31 | # Create working directory
32 | RUN mkdir -p /usr/src/app
33 | WORKDIR /usr/src/app
34 |
35 | # Copy contents
36 | COPY . /usr/src/app
37 |
38 | # Copy weights
39 | #RUN python3 -c "from utils.google_utils import *; \
40 | # gdrive_download(id='18xqvs_uwAqfTXp-LJCYLYNHBOcrwbrp0', name='weights/darknet53.conv.74'); \
41 | # gdrive_download(id='1oPCHKsM2JpM-zgyepQciGli9X0MTsJCO', name='weights/yolov3-spp.weights'); \
42 | # gdrive_download(id='1vFlbJ_dXPvtwaLLOu-twnjK4exdFiQ73', name='weights/yolov3-spp.pt)"
43 |
44 |
45 | # --------------------------------------------------- Extras Below ---------------------------------------------------
46 |
47 | # Build
48 | # rm -rf yolov3 # Warning: remove existing
49 | # git clone https://github.com/ultralytics/yolov3 && cd yolov3 && python3 detect.py
50 | # sudo docker image prune -af && sudo docker build -t ultralytics/yolov3:v0 .
51 |
52 | # Run
53 | # sudo nvidia-docker run --ipc=host ultralytics/yolov3:v0 python3 detect.py
54 |
55 | # Run with local directory access
56 | # sudo nvidia-docker run --ipc=host --mount type=bind,source="$(pwd)"/coco,target=/usr/src/coco ultralytics/yolov3:v0 python3 train.py
57 |
58 | # Build and Push
59 | # export tag=ultralytics/yolov3:v0 && sudo docker build -t $tag . && docker push $tag
60 |
61 | # Kill all
62 | # sudo docker kill $(sudo docker ps -q)
63 |
64 | # Run bash for loop
65 | # sudo nvidia-docker run --ipc=host ultralytics/yolov3:v0 while true; do python3 train.py --evolve; done
66 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # yolov3-channel-and-layer-pruning
2 | 本项目以[ultralytics/yolov3](https://github.com/ultralytics/yolov3)为基础实现,根据论文[Learning Efficient Convolutional Networks Through Network Slimming (ICCV 2017)](http://openaccess.thecvf.com/content_iccv_2017/html/Liu_Learning_Efficient_Convolutional_ICCV_2017_paper.html)原理基于bn层Gmma系数进行通道剪枝,下面引用了几种不同的通道剪枝策略,并对原策略进行了改进,提高了剪枝率和精度;在这些工作基础上,又衍生出了层剪枝,本身通道剪枝已经大大减小了模型参数和计算量,降低了模型对资源的占用,而层剪枝可以进一步减小了计算量,并大大提高了模型推理速度;通过层剪枝和通道剪枝结合,可以压缩模型的深度和宽度,某种意义上实现了针对不同数据集的小模型搜索。
3 |
4 | 项目的基本工作流程是,使用yolov3训练自己数据集,达到理想精度后进行稀疏训练,稀疏训练是重中之重,对需要剪枝的层对应的bn gamma系数进行大幅压缩,理想的压缩情况如下图,然后就可以对不重要的通道或者层进行剪枝,剪枝后可以对模型进行微调恢复精度,后续会写篇博客记录一些实验过程及调参经验,在此感谢[行云大佬](https://github.com/zbyuan)的讨论和合作!
5 |
6 | 
7 |
8 |
9 |
10 | #### 更新
11 | 1.增加了对**yolov3-spp**结构的支持,基础训练可以直接使用yolov3-spp.weights初始化权重,各个层剪枝及通道剪枝脚本的使用也和yolov3一致。
12 | 2.增加了多尺度推理支持,train.py和各剪枝脚本都可以指定命令行参数, 如 --img_size 608 .
13 | 3.2019/12/06更改了层剪枝的选层策略,由最大值排序改为均值排序。
14 | 4.2019/12/08**重要**更新,增加了**知识蒸馏**策略。蒸馏是用高精度的大模型指导低精度的小模型,在结构相似的情况下效果尤为明显。而剪枝得到的小模型和原模型在结构上高度相似,非常符合蒸馏的应用条件。这里更新了一个参考Hinton大神Distilling the Knowledge in a Neural Network的蒸馏策略,原策略是针对分类模型的,但在这里也有不错的表现。调用只需要在微调的时候指定老师模型的cfg和权重即可:--t_cfg --t_weights。最近会更新第二种针对yolo检测的知识蒸馏策略。
15 | 5.2019/12/10交流的小伙伴比较多,回答不过来,可以加群734912150
16 | 6.2019/12/14增加了针对蒸馏的混合精度训练支持,项目中各项训练都可以使用[apex](https://github.com/NVIDIA/apex)加速,但需要先安装。使用混合精度可以加速训练,同时减轻显存占用,但训练效果可能会差一丢丢。代码默认开启了混合精度,如需关闭,可以把train.py中的mixed_precision改为False.
17 | 7.2019/12/23更新了**知识蒸馏策略二**,并默认使用二。策略二参考了论文"Learning Efficient Object Detection Models with Knowledge Distillation",相比策略一,对分类和回归分别作了处理,分类的蒸馏和策略一差不多,回归部分会分别计算学生和老师相对target的L2距离,如果学生更远,学生会再向target学习,而不是向老师学习。调用同样是指定老师的cfg和权重即可。需要强调的是,蒸馏在这里只是辅助微调,如果注重精度优先,剪枝时尽量剪不掉点的比例,这时蒸馏的作用也不大;如果注重速度,剪枝比例较大,导致模型精度下降较多,可以结合蒸馏提升精度。
18 | 8.2019/12/27更新了两种**稀疏策略**,详看下面稀疏训练环节。
19 | 9.2020/01/02修正各剪枝版本多分辨率推理test问题,主要是把命令行参数img_size传递给test函数。
20 | 10.2020/01/04补了个[博客](https://blog.csdn.net/weixin_41397123/article/details/103828931)分享**无人机数据集visdrone**案例,演示如何压缩一个12M的无人机视角目标检测模型(标题党)。
21 | 11.2020/04/10增加了**yolov3-tiny**的剪枝支持,稀疏照旧,剪通道用slim_prune.py,不可剪层。
22 | 12.2020/4/24增加支持**yolov4**剪枝.
23 | 13.2020/4/30在datasets.py 592行添加了支持负样本训练,默认注释掉.
24 | 14.2020/7/8更新支持**yolov4-tiny**剪通道.
25 |
26 |
27 |
28 | #### 基础训练
29 | 环境配置查看requirements.txt,数据准备参考[这里](https://github.com/ultralytics/yolov3/wiki/Train-Custom-Data),预训练权重可以从darknet官网下载。
30 | 用yolov3训练自己的数据集,修改cfg,配置好data,用yolov3.weights初始化权重。
31 |
32 | `python train.py --cfg cfg/my_cfg.cfg --data data/my_data.data --weights weights/yolov3.weights --epochs 100 --batch-size 32`
33 |
34 | #### 稀疏训练
35 | scale参数默认0.001,根据数据集,mAP,BN分布调整,数据分布广类别多的,或者稀疏时掉点厉害的适当调小s;-sr用于开启稀疏训练;--prune 0适用于prune.py,--prune 1 适用于其他剪枝策略。稀疏训练就是精度和稀疏度的博弈过程,如何寻找好的策略让稀疏后的模型保持高精度同时实现高稀疏度是值得研究的问题,大的s一般稀疏较快但精度掉的快,小的s一般稀疏较慢但精度掉的慢;配合大学习率会稀疏加快,后期小学习率有助于精度回升。
36 | 注意:训练保存的pt权重包含epoch信息,可通过`python -c "from models import *; convert('cfg/yolov3.cfg', 'weights/last.pt')"`转换为darknet weights去除掉epoch信息,使用darknet weights从epoch 0开始稀疏训练。
37 |
38 | `python train.py --cfg cfg/my_cfg.cfg --data data/my_data.data --weights weights/last.weights --epochs 300 --batch-size 32 -sr --s 0.001 --prune 1`
39 | * ##### 稀疏策略一:恒定s
40 | 这是一开始的策略,也是默认的策略。在整个稀疏过程中,始终以恒定的s给模型添加额外的梯度,因为力度比较均匀,往往压缩度较高。但稀疏过程是个博弈过程,我们不仅想要较高的压缩度,也想要在学习率下降后恢复足够的精度,不同的s最后稀疏结果也不同,想要找到合适的s往往需要较高的时间成本。
41 |
42 | `bn_module.weight.grad.data.add_(s * torch.sign(bn_module.weight.data))`
43 | * ##### 稀疏策略二:全局s衰减
44 | 关键代码是下面这句,在epochs的0.5阶段s衰减100倍。前提是0.5之前权重已经完成大幅压缩,这时对s衰减有助于精度快速回升,但是相应的bn会出现一定膨胀,降低压缩度,有利有弊,可以说是牺牲较大的压缩度换取较高的精度,同时减少寻找s的时间成本。当然这个0.5和100可以自己调整。注意也不能为了在前半部分加快压缩bn而大大提高s,过大的s会导致模型精度下降厉害,且s衰减后也无法恢复。如果想使用这个策略,可以在prune_utils.py中的BNOptimizer把下面这句取消注释。
45 |
46 | `# s = s if epoch <= opt.epochs * 0.5 else s * 0.01`
47 | * ##### 稀疏策略三:局部s衰减
48 | 关键代码是下面两句,在epochs的0.5阶段开始对85%的通道保持原力度压缩,15%的通道进行s衰减100倍。这个85%是个先验知识,是由策略一稀疏后尝试剪通道几乎不掉点的最大比例,几乎不掉点指的是相对稀疏后精度;如果微调后还是不及baseline,或者说达不到精度要求,就可以使用策略三进行局部s衰减,从中间开始重新稀疏,这可以在牺牲较小压缩度情况下提高较大精度。如果想使用这个策略可以在train.py中把下面这两句取消注释,并根据自己策略一情况把0.85改为自己的比例,还有0.5和100也是可调的。策略二和三不建议一起用,除非你想做组合策略。
49 |
50 | `#if opt.sr and opt.prune==1 and epoch > opt.epochs * 0.5:`
51 | `# idx2mask = get_mask2(model, prune_idx, 0.85)`
52 |
53 | #### 通道剪枝策略一
54 | 策略源自[Lam1360/YOLOv3-model-pruning](https://github.com/Lam1360/YOLOv3-model-pruning),这是一种保守的策略,因为yolov3中有五组共23处shortcut连接,对应的是add操作,通道剪枝后如何保证shortcut的两个输入维度一致,这是必须考虑的问题。而Lam1360/YOLOv3-model-pruning对shortcut直连的层不进行剪枝,避免了维度处理问题,但它同样实现了较高剪枝率,对模型参数的减小有很大帮助。虽然它剪枝率最低,但是它对剪枝各细节的处理非常优雅,后面的代码也较多参考了原始项目。在本项目中还更改了它的阈值规则,可以设置更高的剪枝阈值。
55 |
56 | `python prune.py --cfg cfg/my_cfg.cfg --data data/my_data.data --weights weights/last.pt --percent 0.85`
57 |
58 | #### 通道剪枝策略二
59 | 策略源自[coldlarry/YOLOv3-complete-pruning](https://github.com/coldlarry/YOLOv3-complete-pruning),这个策略对涉及shortcut的卷积层也进行了剪枝,剪枝采用每组shortcut中第一个卷积层的mask,一共使用五种mask实现了五组shortcut相关卷积层的剪枝,进一步提高了剪枝率。本项目中对涉及shortcut的剪枝后激活偏移值处理进行了完善,并修改了阈值规则,可以设置更高剪枝率,当然剪枝率的设置和剪枝后的精度变化跟稀疏训练有很大关系,这里再次强调稀疏训练的重要性。
60 |
61 | `python shortcut_prune.py --cfg cfg/my_cfg.cfg --data data/my_data.data --weights weights/last.pt --percent 0.6`
62 |
63 | #### 通道剪枝策略三
64 | 策略参考自[PengyiZhang/SlimYOLOv3](https://github.com/PengyiZhang/SlimYOLOv3),这个策略的通道剪枝率最高,先以全局阈值找出各卷积层的mask,然后对于每组shortcut,它将相连的各卷积层的剪枝mask取并集,用merge后的mask进行剪枝,这样对每一个相关层都做了考虑,同时它还对每一个层的保留通道做了限制,实验中它的剪枝效果最好。在本项目中还对激活偏移值添加了处理,降低剪枝时的精度损失。
65 |
66 | `python slim_prune.py --cfg cfg/my_cfg.cfg --data data/my_data.data --weights weights/last.pt --global_percent 0.8 --layer_keep 0.01`
67 |
68 | #### 层剪枝
69 | 这个策略是在之前的通道剪枝策略基础上衍生出来的,针对每一个shortcut层前一个CBL进行评价,对各层的Gmma均值进行排序,取最小的进行层剪枝。为保证yolov3结构完整,这里每剪一个shortcut结构,会同时剪掉一个shortcut层和它前面的两个卷积层。是的,这里只考虑剪主干中的shortcut模块。但是yolov3中有23处shortcut,剪掉8个shortcut就是剪掉了24个层,剪掉16个shortcut就是剪掉了48个层,总共有69个层的剪层空间;实验中对简单的数据集剪掉了较多shortcut而精度降低很少。
70 |
71 | `python layer_prune.py --cfg cfg/my_cfg.cfg --data data/my_data.data --weights weights/last.pt --shortcuts 12`
72 |
73 | #### 同时剪层和通道
74 | 前面的通道剪枝和层剪枝已经分别压缩了模型的宽度和深度,可以自由搭配使用,甚至迭代式剪枝,调配出针对自己数据集的一副良药。这里整合了一个同时剪层和通道的脚本,方便对比剪枝效果,有需要的可以使用这个脚本进行剪枝。
75 |
76 | `python layer_channel_prune.py --cfg cfg/my_cfg.cfg --data data/my_data.data --weights weights/last.pt --shortcuts 12 --global_percent 0.8 --layer_keep 0.1`
77 |
78 | #### 微调finetune
79 | 剪枝的效果好不好首先还是要看稀疏情况,而不同的剪枝策略和阈值设置在剪枝后的效果表现也不一样,有时剪枝后模型精度甚至可能上升,而一般而言剪枝会损害模型精度,这时候需要对剪枝后的模型进行微调,让精度回升。训练代码中默认了前6个epoch进行warmup,这对微调有好处,有需要的可以自行调整超参学习率。
80 |
81 | `python train.py --cfg cfg/prune_0.85_my_cfg.cfg --data data/my_data.data --weights weights/prune_0.85_last.weights --epochs 100 --batch-size 32`
82 |
83 | #### tensorboard实时查看训练过程
84 | `tensorboard --logdir runs`
85 |
86 | 
87 |
88 | 欢迎使用和测试,有问题或者交流实验过程可以发issue或者加群734912150
89 |
90 |
91 | #### 案例
92 | 使用yolov3-spp训练oxfordhand数据集并剪枝。下载[数据集](http://www.robots.ox.ac.uk/~vgg/data/hands/downloads/hand_dataset.tar.gz),解压到data文件夹,运行converter.py,把得到的train.txt和valid.txt路径更新在oxfordhand.data中。通过以下代码分别进行基础训练和稀疏训练:
93 | `python train.py --cfg cfg/yolov3-spp-hand.cfg --data data/oxfordhand.data --weights weights/yolov3-spp.weights --batch-size 20 --epochs 100`
94 |
95 | `python -c "from models import *; convert('cfg/yolov3.cfg', 'weights/last.pt')"`
96 | `python train.py --cfg cfg/yolov3-spp-hand.cfg --data data/oxfordhand.data --weights weights/converted.weights --batch-size 20 --epochs 300 -sr --s 0.001 --prune 1`
97 |
98 | 训练的情况如下图,蓝色线是基础训练,红色线是稀疏训练。其中基础训练跑了100个epoch,后半段已经出现了过拟合,最终得到的baseline模型mAP为0.84;稀疏训练以s0.001跑了300个epoch,选择的稀疏类型为prune 1全局稀疏,为包括shortcut的剪枝做准备,并且在总epochs的0.7和0.9阶段进行了Gmma为0.1的学习率衰减,稀疏过程中模型精度起伏较大,在学习率降低后精度出现了回升,最终稀疏模型mAP 0.797。
99 | 
100 |
101 | 再来看看bn的稀疏情况,代码使用tensorboard记录了参与稀疏的bn层的Gmma权重变化,下图左边看到正常训练时Gmma总体上分布在1附近类似正态分布,右边可以看到稀疏过程Gmma大部分逐渐被压到接近0,接近0的通道其输出值近似于常量,可以将其剪掉。
102 | 
103 |
104 | 这时候便可以进行剪枝,这里例子使用layer_channel_prune.py同时进行剪通道和剪层,这个脚本融合了slim_prune剪通道策略和layer_prune剪层策略。Global perent剪通道的全局比例为0.93,layer keep每层最低保持通道数比例为0.01,shortcuts剪了16个,相当于剪了48个层(32个CBL,16个shortcut);下图结果可以看到剪通道后模型掉了一个点,而大小从239M压缩到5.2M,剪层后mAP掉到0.53,大小压缩到4.6M,模型参数减少了98%,推理速度也从16毫秒减到6毫秒(tesla p100测试结果)。
105 | `python layer_channel_prune.py --cfg cfg/yolov3-spp-hand.cfg --data data/oxfordhand.data --weights weights/last.pt --global_percent 0.93 --layer_keep 0.01 --shortcuts 16`
106 |
107 | 
108 |
109 | 鉴于模型精度出现了下跌,我们来进行微调,下面是微调50个epoch的结果,精度恢复到了0.793,bn也开始呈正态分布,这个结果相对于baseline掉了几个点,但是模型大幅压缩减少了资源占用,提高了运行速度。如果想提高精度,可以尝试降低剪枝率,比如这里只剪10个shortcut的话,同样微调50epoch精度可以回到0.81;而想追求速度的话,这里有个极端例子,全局剪0.95,层剪掉54个,模型压缩到了2.8M,推理时间降到5毫秒,而mAP降到了0,但是微调50epoch后依然回到了0.75。
110 |
111 | `python train.py --cfg cfg/prune_16_shortcut_prune_0.93_keep_0.01_yolov3-spp-hand.cfg --data data/oxfordhand.data --weights weights/prune_16_shortcut_prune_0.93_keep_0.01_last.weights --batch-size 52 --epochs 50`
112 | 
113 | 可以猜测,剪枝得到的cfg是针对该数据集相对合理的结构,而保留的权重可以让模型快速训练接近这个结构的能力上限,这个过程类似于一种有限范围的结构搜索。而不同的训练策略,稀疏策略,剪枝策略会得到不同的结果,相信即使是这个例子也可以进一步压缩并保持良好精度。yolov3有众多优化项目和工程项目,可以利用这个剪枝得到的cfg和weights放到其他项目中做进一步优化和应用。
114 | [这里](https://pan.baidu.com/s/1APUfwO4L69u28Wt9gFNAYw)分享了这个例子的权重和cfg,包括baseline,稀疏,不同剪枝设置后的结果。
115 |
116 | ## License
117 | Apache 2.0
118 |
--------------------------------------------------------------------------------
/cfg/yolov3-1cls.cfg:
--------------------------------------------------------------------------------
1 | [net]
2 | # Testing
3 | #batch=1
4 | #subdivisions=1
5 | # Training
6 | batch=16
7 | subdivisions=1
8 | width=416
9 | height=416
10 | channels=3
11 | momentum=0.9
12 | decay=0.0005
13 | angle=0
14 | saturation = 1.5
15 | exposure = 1.5
16 | hue=.1
17 |
18 | learning_rate=0.001
19 | burn_in=1000
20 | max_batches = 500200
21 | policy=steps
22 | steps=400000,450000
23 | scales=.1,.1
24 |
25 | [convolutional]
26 | batch_normalize=1
27 | filters=32
28 | size=3
29 | stride=1
30 | pad=1
31 | activation=leaky
32 |
33 | # Downsample
34 |
35 | [convolutional]
36 | batch_normalize=1
37 | filters=64
38 | size=3
39 | stride=2
40 | pad=1
41 | activation=leaky
42 |
43 | [convolutional]
44 | batch_normalize=1
45 | filters=32
46 | size=1
47 | stride=1
48 | pad=1
49 | activation=leaky
50 |
51 | [convolutional]
52 | batch_normalize=1
53 | filters=64
54 | size=3
55 | stride=1
56 | pad=1
57 | activation=leaky
58 |
59 | [shortcut]
60 | from=-3
61 | activation=linear
62 |
63 | # Downsample
64 |
65 | [convolutional]
66 | batch_normalize=1
67 | filters=128
68 | size=3
69 | stride=2
70 | pad=1
71 | activation=leaky
72 |
73 | [convolutional]
74 | batch_normalize=1
75 | filters=64
76 | size=1
77 | stride=1
78 | pad=1
79 | activation=leaky
80 |
81 | [convolutional]
82 | batch_normalize=1
83 | filters=128
84 | size=3
85 | stride=1
86 | pad=1
87 | activation=leaky
88 |
89 | [shortcut]
90 | from=-3
91 | activation=linear
92 |
93 | [convolutional]
94 | batch_normalize=1
95 | filters=64
96 | size=1
97 | stride=1
98 | pad=1
99 | activation=leaky
100 |
101 | [convolutional]
102 | batch_normalize=1
103 | filters=128
104 | size=3
105 | stride=1
106 | pad=1
107 | activation=leaky
108 |
109 | [shortcut]
110 | from=-3
111 | activation=linear
112 |
113 | # Downsample
114 |
115 | [convolutional]
116 | batch_normalize=1
117 | filters=256
118 | size=3
119 | stride=2
120 | pad=1
121 | activation=leaky
122 |
123 | [convolutional]
124 | batch_normalize=1
125 | filters=128
126 | size=1
127 | stride=1
128 | pad=1
129 | activation=leaky
130 |
131 | [convolutional]
132 | batch_normalize=1
133 | filters=256
134 | size=3
135 | stride=1
136 | pad=1
137 | activation=leaky
138 |
139 | [shortcut]
140 | from=-3
141 | activation=linear
142 |
143 | [convolutional]
144 | batch_normalize=1
145 | filters=128
146 | size=1
147 | stride=1
148 | pad=1
149 | activation=leaky
150 |
151 | [convolutional]
152 | batch_normalize=1
153 | filters=256
154 | size=3
155 | stride=1
156 | pad=1
157 | activation=leaky
158 |
159 | [shortcut]
160 | from=-3
161 | activation=linear
162 |
163 | [convolutional]
164 | batch_normalize=1
165 | filters=128
166 | size=1
167 | stride=1
168 | pad=1
169 | activation=leaky
170 |
171 | [convolutional]
172 | batch_normalize=1
173 | filters=256
174 | size=3
175 | stride=1
176 | pad=1
177 | activation=leaky
178 |
179 | [shortcut]
180 | from=-3
181 | activation=linear
182 |
183 | [convolutional]
184 | batch_normalize=1
185 | filters=128
186 | size=1
187 | stride=1
188 | pad=1
189 | activation=leaky
190 |
191 | [convolutional]
192 | batch_normalize=1
193 | filters=256
194 | size=3
195 | stride=1
196 | pad=1
197 | activation=leaky
198 |
199 | [shortcut]
200 | from=-3
201 | activation=linear
202 |
203 |
204 | [convolutional]
205 | batch_normalize=1
206 | filters=128
207 | size=1
208 | stride=1
209 | pad=1
210 | activation=leaky
211 |
212 | [convolutional]
213 | batch_normalize=1
214 | filters=256
215 | size=3
216 | stride=1
217 | pad=1
218 | activation=leaky
219 |
220 | [shortcut]
221 | from=-3
222 | activation=linear
223 |
224 | [convolutional]
225 | batch_normalize=1
226 | filters=128
227 | size=1
228 | stride=1
229 | pad=1
230 | activation=leaky
231 |
232 | [convolutional]
233 | batch_normalize=1
234 | filters=256
235 | size=3
236 | stride=1
237 | pad=1
238 | activation=leaky
239 |
240 | [shortcut]
241 | from=-3
242 | activation=linear
243 |
244 | [convolutional]
245 | batch_normalize=1
246 | filters=128
247 | size=1
248 | stride=1
249 | pad=1
250 | activation=leaky
251 |
252 | [convolutional]
253 | batch_normalize=1
254 | filters=256
255 | size=3
256 | stride=1
257 | pad=1
258 | activation=leaky
259 |
260 | [shortcut]
261 | from=-3
262 | activation=linear
263 |
264 | [convolutional]
265 | batch_normalize=1
266 | filters=128
267 | size=1
268 | stride=1
269 | pad=1
270 | activation=leaky
271 |
272 | [convolutional]
273 | batch_normalize=1
274 | filters=256
275 | size=3
276 | stride=1
277 | pad=1
278 | activation=leaky
279 |
280 | [shortcut]
281 | from=-3
282 | activation=linear
283 |
284 | # Downsample
285 |
286 | [convolutional]
287 | batch_normalize=1
288 | filters=512
289 | size=3
290 | stride=2
291 | pad=1
292 | activation=leaky
293 |
294 | [convolutional]
295 | batch_normalize=1
296 | filters=256
297 | size=1
298 | stride=1
299 | pad=1
300 | activation=leaky
301 |
302 | [convolutional]
303 | batch_normalize=1
304 | filters=512
305 | size=3
306 | stride=1
307 | pad=1
308 | activation=leaky
309 |
310 | [shortcut]
311 | from=-3
312 | activation=linear
313 |
314 |
315 | [convolutional]
316 | batch_normalize=1
317 | filters=256
318 | size=1
319 | stride=1
320 | pad=1
321 | activation=leaky
322 |
323 | [convolutional]
324 | batch_normalize=1
325 | filters=512
326 | size=3
327 | stride=1
328 | pad=1
329 | activation=leaky
330 |
331 | [shortcut]
332 | from=-3
333 | activation=linear
334 |
335 |
336 | [convolutional]
337 | batch_normalize=1
338 | filters=256
339 | size=1
340 | stride=1
341 | pad=1
342 | activation=leaky
343 |
344 | [convolutional]
345 | batch_normalize=1
346 | filters=512
347 | size=3
348 | stride=1
349 | pad=1
350 | activation=leaky
351 |
352 | [shortcut]
353 | from=-3
354 | activation=linear
355 |
356 |
357 | [convolutional]
358 | batch_normalize=1
359 | filters=256
360 | size=1
361 | stride=1
362 | pad=1
363 | activation=leaky
364 |
365 | [convolutional]
366 | batch_normalize=1
367 | filters=512
368 | size=3
369 | stride=1
370 | pad=1
371 | activation=leaky
372 |
373 | [shortcut]
374 | from=-3
375 | activation=linear
376 |
377 | [convolutional]
378 | batch_normalize=1
379 | filters=256
380 | size=1
381 | stride=1
382 | pad=1
383 | activation=leaky
384 |
385 | [convolutional]
386 | batch_normalize=1
387 | filters=512
388 | size=3
389 | stride=1
390 | pad=1
391 | activation=leaky
392 |
393 | [shortcut]
394 | from=-3
395 | activation=linear
396 |
397 |
398 | [convolutional]
399 | batch_normalize=1
400 | filters=256
401 | size=1
402 | stride=1
403 | pad=1
404 | activation=leaky
405 |
406 | [convolutional]
407 | batch_normalize=1
408 | filters=512
409 | size=3
410 | stride=1
411 | pad=1
412 | activation=leaky
413 |
414 | [shortcut]
415 | from=-3
416 | activation=linear
417 |
418 |
419 | [convolutional]
420 | batch_normalize=1
421 | filters=256
422 | size=1
423 | stride=1
424 | pad=1
425 | activation=leaky
426 |
427 | [convolutional]
428 | batch_normalize=1
429 | filters=512
430 | size=3
431 | stride=1
432 | pad=1
433 | activation=leaky
434 |
435 | [shortcut]
436 | from=-3
437 | activation=linear
438 |
439 | [convolutional]
440 | batch_normalize=1
441 | filters=256
442 | size=1
443 | stride=1
444 | pad=1
445 | activation=leaky
446 |
447 | [convolutional]
448 | batch_normalize=1
449 | filters=512
450 | size=3
451 | stride=1
452 | pad=1
453 | activation=leaky
454 |
455 | [shortcut]
456 | from=-3
457 | activation=linear
458 |
459 | # Downsample
460 |
461 | [convolutional]
462 | batch_normalize=1
463 | filters=1024
464 | size=3
465 | stride=2
466 | pad=1
467 | activation=leaky
468 |
469 | [convolutional]
470 | batch_normalize=1
471 | filters=512
472 | size=1
473 | stride=1
474 | pad=1
475 | activation=leaky
476 |
477 | [convolutional]
478 | batch_normalize=1
479 | filters=1024
480 | size=3
481 | stride=1
482 | pad=1
483 | activation=leaky
484 |
485 | [shortcut]
486 | from=-3
487 | activation=linear
488 |
489 | [convolutional]
490 | batch_normalize=1
491 | filters=512
492 | size=1
493 | stride=1
494 | pad=1
495 | activation=leaky
496 |
497 | [convolutional]
498 | batch_normalize=1
499 | filters=1024
500 | size=3
501 | stride=1
502 | pad=1
503 | activation=leaky
504 |
505 | [shortcut]
506 | from=-3
507 | activation=linear
508 |
509 | [convolutional]
510 | batch_normalize=1
511 | filters=512
512 | size=1
513 | stride=1
514 | pad=1
515 | activation=leaky
516 |
517 | [convolutional]
518 | batch_normalize=1
519 | filters=1024
520 | size=3
521 | stride=1
522 | pad=1
523 | activation=leaky
524 |
525 | [shortcut]
526 | from=-3
527 | activation=linear
528 |
529 | [convolutional]
530 | batch_normalize=1
531 | filters=512
532 | size=1
533 | stride=1
534 | pad=1
535 | activation=leaky
536 |
537 | [convolutional]
538 | batch_normalize=1
539 | filters=1024
540 | size=3
541 | stride=1
542 | pad=1
543 | activation=leaky
544 |
545 | [shortcut]
546 | from=-3
547 | activation=linear
548 |
549 | ######################
550 |
551 | [convolutional]
552 | batch_normalize=1
553 | filters=512
554 | size=1
555 | stride=1
556 | pad=1
557 | activation=leaky
558 |
559 | [convolutional]
560 | batch_normalize=1
561 | size=3
562 | stride=1
563 | pad=1
564 | filters=1024
565 | activation=leaky
566 |
567 | [convolutional]
568 | batch_normalize=1
569 | filters=512
570 | size=1
571 | stride=1
572 | pad=1
573 | activation=leaky
574 |
575 | [convolutional]
576 | batch_normalize=1
577 | size=3
578 | stride=1
579 | pad=1
580 | filters=1024
581 | activation=leaky
582 |
583 | [convolutional]
584 | batch_normalize=1
585 | filters=512
586 | size=1
587 | stride=1
588 | pad=1
589 | activation=leaky
590 |
591 | [convolutional]
592 | batch_normalize=1
593 | size=3
594 | stride=1
595 | pad=1
596 | filters=1024
597 | activation=leaky
598 |
599 | [convolutional]
600 | size=1
601 | stride=1
602 | pad=1
603 | filters=18
604 | activation=linear
605 |
606 |
607 | [yolo]
608 | mask = 6,7,8
609 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
610 | classes=1
611 | num=9
612 | jitter=.3
613 | ignore_thresh = .7
614 | truth_thresh = 1
615 | random=1
616 |
617 |
618 | [route]
619 | layers = -4
620 |
621 | [convolutional]
622 | batch_normalize=1
623 | filters=256
624 | size=1
625 | stride=1
626 | pad=1
627 | activation=leaky
628 |
629 | [upsample]
630 | stride=2
631 |
632 | [route]
633 | layers = -1, 61
634 |
635 |
636 |
637 | [convolutional]
638 | batch_normalize=1
639 | filters=256
640 | size=1
641 | stride=1
642 | pad=1
643 | activation=leaky
644 |
645 | [convolutional]
646 | batch_normalize=1
647 | size=3
648 | stride=1
649 | pad=1
650 | filters=512
651 | activation=leaky
652 |
653 | [convolutional]
654 | batch_normalize=1
655 | filters=256
656 | size=1
657 | stride=1
658 | pad=1
659 | activation=leaky
660 |
661 | [convolutional]
662 | batch_normalize=1
663 | size=3
664 | stride=1
665 | pad=1
666 | filters=512
667 | activation=leaky
668 |
669 | [convolutional]
670 | batch_normalize=1
671 | filters=256
672 | size=1
673 | stride=1
674 | pad=1
675 | activation=leaky
676 |
677 | [convolutional]
678 | batch_normalize=1
679 | size=3
680 | stride=1
681 | pad=1
682 | filters=512
683 | activation=leaky
684 |
685 | [convolutional]
686 | size=1
687 | stride=1
688 | pad=1
689 | filters=18
690 | activation=linear
691 |
692 |
693 | [yolo]
694 | mask = 3,4,5
695 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
696 | classes=1
697 | num=9
698 | jitter=.3
699 | ignore_thresh = .7
700 | truth_thresh = 1
701 | random=1
702 |
703 |
704 |
705 | [route]
706 | layers = -4
707 |
708 | [convolutional]
709 | batch_normalize=1
710 | filters=128
711 | size=1
712 | stride=1
713 | pad=1
714 | activation=leaky
715 |
716 | [upsample]
717 | stride=2
718 |
719 | [route]
720 | layers = -1, 36
721 |
722 |
723 |
724 | [convolutional]
725 | batch_normalize=1
726 | filters=128
727 | size=1
728 | stride=1
729 | pad=1
730 | activation=leaky
731 |
732 | [convolutional]
733 | batch_normalize=1
734 | size=3
735 | stride=1
736 | pad=1
737 | filters=256
738 | activation=leaky
739 |
740 | [convolutional]
741 | batch_normalize=1
742 | filters=128
743 | size=1
744 | stride=1
745 | pad=1
746 | activation=leaky
747 |
748 | [convolutional]
749 | batch_normalize=1
750 | size=3
751 | stride=1
752 | pad=1
753 | filters=256
754 | activation=leaky
755 |
756 | [convolutional]
757 | batch_normalize=1
758 | filters=128
759 | size=1
760 | stride=1
761 | pad=1
762 | activation=leaky
763 |
764 | [convolutional]
765 | batch_normalize=1
766 | size=3
767 | stride=1
768 | pad=1
769 | filters=256
770 | activation=leaky
771 |
772 | [convolutional]
773 | size=1
774 | stride=1
775 | pad=1
776 | filters=18
777 | activation=linear
778 |
779 |
780 | [yolo]
781 | mask = 0,1,2
782 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
783 | classes=1
784 | num=9
785 | jitter=.3
786 | ignore_thresh = .7
787 | truth_thresh = 1
788 | random=1
789 |
--------------------------------------------------------------------------------
/cfg/yolov3-hand.cfg:
--------------------------------------------------------------------------------
1 |
2 | [net]
3 | # Testing
4 | #batch=1
5 | #subdivisions=1
6 | # Training
7 | batch=16
8 | subdivisions=1
9 | width=416
10 | height=416
11 | channels=3
12 | momentum=0.9
13 | decay=0.0005
14 | angle=0
15 | saturation = 1.5
16 | exposure = 1.5
17 | hue=.1
18 |
19 | learning_rate=0.001
20 | burn_in=1000
21 | max_batches = 500200
22 | policy=steps
23 | steps=400000,450000
24 | scales=.1,.1
25 |
26 | [convolutional]
27 | batch_normalize=1
28 | filters=32
29 | size=3
30 | stride=1
31 | pad=1
32 | activation=leaky
33 |
34 | # Downsample
35 |
36 | [convolutional]
37 | batch_normalize=1
38 | filters=64
39 | size=3
40 | stride=2
41 | pad=1
42 | activation=leaky
43 |
44 | [convolutional]
45 | batch_normalize=1
46 | filters=32
47 | size=1
48 | stride=1
49 | pad=1
50 | activation=leaky
51 |
52 | [convolutional]
53 | batch_normalize=1
54 | filters=64
55 | size=3
56 | stride=1
57 | pad=1
58 | activation=leaky
59 |
60 | [shortcut]
61 | from=-3
62 | activation=linear
63 |
64 | # Downsample
65 |
66 | [convolutional]
67 | batch_normalize=1
68 | filters=128
69 | size=3
70 | stride=2
71 | pad=1
72 | activation=leaky
73 |
74 | [convolutional]
75 | batch_normalize=1
76 | filters=64
77 | size=1
78 | stride=1
79 | pad=1
80 | activation=leaky
81 |
82 | [convolutional]
83 | batch_normalize=1
84 | filters=128
85 | size=3
86 | stride=1
87 | pad=1
88 | activation=leaky
89 |
90 | [shortcut]
91 | from=-3
92 | activation=linear
93 |
94 | [convolutional]
95 | batch_normalize=1
96 | filters=64
97 | size=1
98 | stride=1
99 | pad=1
100 | activation=leaky
101 |
102 | [convolutional]
103 | batch_normalize=1
104 | filters=128
105 | size=3
106 | stride=1
107 | pad=1
108 | activation=leaky
109 |
110 | [shortcut]
111 | from=-3
112 | activation=linear
113 |
114 | # Downsample
115 |
116 | [convolutional]
117 | batch_normalize=1
118 | filters=256
119 | size=3
120 | stride=2
121 | pad=1
122 | activation=leaky
123 |
124 | [convolutional]
125 | batch_normalize=1
126 | filters=128
127 | size=1
128 | stride=1
129 | pad=1
130 | activation=leaky
131 |
132 | [convolutional]
133 | batch_normalize=1
134 | filters=256
135 | size=3
136 | stride=1
137 | pad=1
138 | activation=leaky
139 |
140 | [shortcut]
141 | from=-3
142 | activation=linear
143 |
144 | [convolutional]
145 | batch_normalize=1
146 | filters=128
147 | size=1
148 | stride=1
149 | pad=1
150 | activation=leaky
151 |
152 | [convolutional]
153 | batch_normalize=1
154 | filters=256
155 | size=3
156 | stride=1
157 | pad=1
158 | activation=leaky
159 |
160 | [shortcut]
161 | from=-3
162 | activation=linear
163 |
164 | [convolutional]
165 | batch_normalize=1
166 | filters=128
167 | size=1
168 | stride=1
169 | pad=1
170 | activation=leaky
171 |
172 | [convolutional]
173 | batch_normalize=1
174 | filters=256
175 | size=3
176 | stride=1
177 | pad=1
178 | activation=leaky
179 |
180 | [shortcut]
181 | from=-3
182 | activation=linear
183 |
184 | [convolutional]
185 | batch_normalize=1
186 | filters=128
187 | size=1
188 | stride=1
189 | pad=1
190 | activation=leaky
191 |
192 | [convolutional]
193 | batch_normalize=1
194 | filters=256
195 | size=3
196 | stride=1
197 | pad=1
198 | activation=leaky
199 |
200 | [shortcut]
201 | from=-3
202 | activation=linear
203 |
204 |
205 | [convolutional]
206 | batch_normalize=1
207 | filters=128
208 | size=1
209 | stride=1
210 | pad=1
211 | activation=leaky
212 |
213 | [convolutional]
214 | batch_normalize=1
215 | filters=256
216 | size=3
217 | stride=1
218 | pad=1
219 | activation=leaky
220 |
221 | [shortcut]
222 | from=-3
223 | activation=linear
224 |
225 | [convolutional]
226 | batch_normalize=1
227 | filters=128
228 | size=1
229 | stride=1
230 | pad=1
231 | activation=leaky
232 |
233 | [convolutional]
234 | batch_normalize=1
235 | filters=256
236 | size=3
237 | stride=1
238 | pad=1
239 | activation=leaky
240 |
241 | [shortcut]
242 | from=-3
243 | activation=linear
244 |
245 | [convolutional]
246 | batch_normalize=1
247 | filters=128
248 | size=1
249 | stride=1
250 | pad=1
251 | activation=leaky
252 |
253 | [convolutional]
254 | batch_normalize=1
255 | filters=256
256 | size=3
257 | stride=1
258 | pad=1
259 | activation=leaky
260 |
261 | [shortcut]
262 | from=-3
263 | activation=linear
264 |
265 | [convolutional]
266 | batch_normalize=1
267 | filters=128
268 | size=1
269 | stride=1
270 | pad=1
271 | activation=leaky
272 |
273 | [convolutional]
274 | batch_normalize=1
275 | filters=256
276 | size=3
277 | stride=1
278 | pad=1
279 | activation=leaky
280 |
281 | [shortcut]
282 | from=-3
283 | activation=linear
284 |
285 | # Downsample
286 |
287 | [convolutional]
288 | batch_normalize=1
289 | filters=512
290 | size=3
291 | stride=2
292 | pad=1
293 | activation=leaky
294 |
295 | [convolutional]
296 | batch_normalize=1
297 | filters=256
298 | size=1
299 | stride=1
300 | pad=1
301 | activation=leaky
302 |
303 | [convolutional]
304 | batch_normalize=1
305 | filters=512
306 | size=3
307 | stride=1
308 | pad=1
309 | activation=leaky
310 |
311 | [shortcut]
312 | from=-3
313 | activation=linear
314 |
315 |
316 | [convolutional]
317 | batch_normalize=1
318 | filters=256
319 | size=1
320 | stride=1
321 | pad=1
322 | activation=leaky
323 |
324 | [convolutional]
325 | batch_normalize=1
326 | filters=512
327 | size=3
328 | stride=1
329 | pad=1
330 | activation=leaky
331 |
332 | [shortcut]
333 | from=-3
334 | activation=linear
335 |
336 |
337 | [convolutional]
338 | batch_normalize=1
339 | filters=256
340 | size=1
341 | stride=1
342 | pad=1
343 | activation=leaky
344 |
345 | [convolutional]
346 | batch_normalize=1
347 | filters=512
348 | size=3
349 | stride=1
350 | pad=1
351 | activation=leaky
352 |
353 | [shortcut]
354 | from=-3
355 | activation=linear
356 |
357 |
358 | [convolutional]
359 | batch_normalize=1
360 | filters=256
361 | size=1
362 | stride=1
363 | pad=1
364 | activation=leaky
365 |
366 | [convolutional]
367 | batch_normalize=1
368 | filters=512
369 | size=3
370 | stride=1
371 | pad=1
372 | activation=leaky
373 |
374 | [shortcut]
375 | from=-3
376 | activation=linear
377 |
378 | [convolutional]
379 | batch_normalize=1
380 | filters=256
381 | size=1
382 | stride=1
383 | pad=1
384 | activation=leaky
385 |
386 | [convolutional]
387 | batch_normalize=1
388 | filters=512
389 | size=3
390 | stride=1
391 | pad=1
392 | activation=leaky
393 |
394 | [shortcut]
395 | from=-3
396 | activation=linear
397 |
398 |
399 | [convolutional]
400 | batch_normalize=1
401 | filters=256
402 | size=1
403 | stride=1
404 | pad=1
405 | activation=leaky
406 |
407 | [convolutional]
408 | batch_normalize=1
409 | filters=512
410 | size=3
411 | stride=1
412 | pad=1
413 | activation=leaky
414 |
415 | [shortcut]
416 | from=-3
417 | activation=linear
418 |
419 |
420 | [convolutional]
421 | batch_normalize=1
422 | filters=256
423 | size=1
424 | stride=1
425 | pad=1
426 | activation=leaky
427 |
428 | [convolutional]
429 | batch_normalize=1
430 | filters=512
431 | size=3
432 | stride=1
433 | pad=1
434 | activation=leaky
435 |
436 | [shortcut]
437 | from=-3
438 | activation=linear
439 |
440 | [convolutional]
441 | batch_normalize=1
442 | filters=256
443 | size=1
444 | stride=1
445 | pad=1
446 | activation=leaky
447 |
448 | [convolutional]
449 | batch_normalize=1
450 | filters=512
451 | size=3
452 | stride=1
453 | pad=1
454 | activation=leaky
455 |
456 | [shortcut]
457 | from=-3
458 | activation=linear
459 |
460 | # Downsample
461 |
462 | [convolutional]
463 | batch_normalize=1
464 | filters=1024
465 | size=3
466 | stride=2
467 | pad=1
468 | activation=leaky
469 |
470 | [convolutional]
471 | batch_normalize=1
472 | filters=512
473 | size=1
474 | stride=1
475 | pad=1
476 | activation=leaky
477 |
478 | [convolutional]
479 | batch_normalize=1
480 | filters=1024
481 | size=3
482 | stride=1
483 | pad=1
484 | activation=leaky
485 |
486 | [shortcut]
487 | from=-3
488 | activation=linear
489 |
490 | [convolutional]
491 | batch_normalize=1
492 | filters=512
493 | size=1
494 | stride=1
495 | pad=1
496 | activation=leaky
497 |
498 | [convolutional]
499 | batch_normalize=1
500 | filters=1024
501 | size=3
502 | stride=1
503 | pad=1
504 | activation=leaky
505 |
506 | [shortcut]
507 | from=-3
508 | activation=linear
509 |
510 | [convolutional]
511 | batch_normalize=1
512 | filters=512
513 | size=1
514 | stride=1
515 | pad=1
516 | activation=leaky
517 |
518 | [convolutional]
519 | batch_normalize=1
520 | filters=1024
521 | size=3
522 | stride=1
523 | pad=1
524 | activation=leaky
525 |
526 | [shortcut]
527 | from=-3
528 | activation=linear
529 |
530 | [convolutional]
531 | batch_normalize=1
532 | filters=512
533 | size=1
534 | stride=1
535 | pad=1
536 | activation=leaky
537 |
538 | [convolutional]
539 | batch_normalize=1
540 | filters=1024
541 | size=3
542 | stride=1
543 | pad=1
544 | activation=leaky
545 |
546 | [shortcut]
547 | from=-3
548 | activation=linear
549 |
550 | ######################
551 |
552 | [convolutional]
553 | batch_normalize=1
554 | filters=512
555 | size=1
556 | stride=1
557 | pad=1
558 | activation=leaky
559 |
560 | [convolutional]
561 | batch_normalize=1
562 | size=3
563 | stride=1
564 | pad=1
565 | filters=1024
566 | activation=leaky
567 |
568 | [convolutional]
569 | batch_normalize=1
570 | filters=512
571 | size=1
572 | stride=1
573 | pad=1
574 | activation=leaky
575 |
576 | [convolutional]
577 | batch_normalize=1
578 | size=3
579 | stride=1
580 | pad=1
581 | filters=1024
582 | activation=leaky
583 |
584 | [convolutional]
585 | batch_normalize=1
586 | filters=512
587 | size=1
588 | stride=1
589 | pad=1
590 | activation=leaky
591 |
592 | [convolutional]
593 | batch_normalize=1
594 | size=3
595 | stride=1
596 | pad=1
597 | filters=1024
598 | activation=leaky
599 |
600 | [convolutional]
601 | size=1
602 | stride=1
603 | pad=1
604 | filters=18
605 | activation=linear
606 |
607 |
608 | [yolo]
609 | mask = 6,7,8
610 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
611 | classes=1
612 | num=9
613 | jitter=.3
614 | ignore_thresh = .7
615 | truth_thresh = 1
616 | random=1
617 |
618 |
619 | [route]
620 | layers = -4
621 |
622 | [convolutional]
623 | batch_normalize=1
624 | filters=256
625 | size=1
626 | stride=1
627 | pad=1
628 | activation=leaky
629 |
630 | [upsample]
631 | stride=2
632 |
633 | [route]
634 | layers = -1, 61
635 |
636 |
637 |
638 | [convolutional]
639 | batch_normalize=1
640 | filters=256
641 | size=1
642 | stride=1
643 | pad=1
644 | activation=leaky
645 |
646 | [convolutional]
647 | batch_normalize=1
648 | size=3
649 | stride=1
650 | pad=1
651 | filters=512
652 | activation=leaky
653 |
654 | [convolutional]
655 | batch_normalize=1
656 | filters=256
657 | size=1
658 | stride=1
659 | pad=1
660 | activation=leaky
661 |
662 | [convolutional]
663 | batch_normalize=1
664 | size=3
665 | stride=1
666 | pad=1
667 | filters=512
668 | activation=leaky
669 |
670 | [convolutional]
671 | batch_normalize=1
672 | filters=256
673 | size=1
674 | stride=1
675 | pad=1
676 | activation=leaky
677 |
678 | [convolutional]
679 | batch_normalize=1
680 | size=3
681 | stride=1
682 | pad=1
683 | filters=512
684 | activation=leaky
685 |
686 | [convolutional]
687 | size=1
688 | stride=1
689 | pad=1
690 | filters=18
691 | activation=linear
692 |
693 |
694 | [yolo]
695 | mask = 3,4,5
696 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
697 | classes=1
698 | num=9
699 | jitter=.3
700 | ignore_thresh = .7
701 | truth_thresh = 1
702 | random=1
703 |
704 |
705 |
706 | [route]
707 | layers = -4
708 |
709 | [convolutional]
710 | batch_normalize=1
711 | filters=128
712 | size=1
713 | stride=1
714 | pad=1
715 | activation=leaky
716 |
717 | [upsample]
718 | stride=2
719 |
720 | [route]
721 | layers = -1, 36
722 |
723 |
724 |
725 | [convolutional]
726 | batch_normalize=1
727 | filters=128
728 | size=1
729 | stride=1
730 | pad=1
731 | activation=leaky
732 |
733 | [convolutional]
734 | batch_normalize=1
735 | size=3
736 | stride=1
737 | pad=1
738 | filters=256
739 | activation=leaky
740 |
741 | [convolutional]
742 | batch_normalize=1
743 | filters=128
744 | size=1
745 | stride=1
746 | pad=1
747 | activation=leaky
748 |
749 | [convolutional]
750 | batch_normalize=1
751 | size=3
752 | stride=1
753 | pad=1
754 | filters=256
755 | activation=leaky
756 |
757 | [convolutional]
758 | batch_normalize=1
759 | filters=128
760 | size=1
761 | stride=1
762 | pad=1
763 | activation=leaky
764 |
765 | [convolutional]
766 | batch_normalize=1
767 | size=3
768 | stride=1
769 | pad=1
770 | filters=256
771 | activation=leaky
772 |
773 | [convolutional]
774 | size=1
775 | stride=1
776 | pad=1
777 | filters=18
778 | activation=linear
779 |
780 |
781 | [yolo]
782 | mask = 0,1,2
783 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
784 | classes=1
785 | num=9
786 | jitter=.3
787 | ignore_thresh = .7
788 | truth_thresh = 1
789 | random=1
790 |
791 |
--------------------------------------------------------------------------------
/cfg/yolov3-spp-1cls.cfg:
--------------------------------------------------------------------------------
1 | [net]
2 | # Testing
3 | # batch=1
4 | # subdivisions=1
5 | # Training
6 | batch=64
7 | subdivisions=16
8 | width=608
9 | height=608
10 | channels=3
11 | momentum=0.9
12 | decay=0.0005
13 | angle=0
14 | saturation = 1.5
15 | exposure = 1.5
16 | hue=.1
17 |
18 | learning_rate=0.001
19 | burn_in=100
20 | max_batches = 5000
21 | policy=steps
22 | steps=4000,4500
23 | scales=.1,.1
24 |
25 | [convolutional]
26 | batch_normalize=1
27 | filters=32
28 | size=3
29 | stride=1
30 | pad=1
31 | activation=leaky
32 |
33 | # Downsample
34 |
35 | [convolutional]
36 | batch_normalize=1
37 | filters=64
38 | size=3
39 | stride=2
40 | pad=1
41 | activation=leaky
42 |
43 | [convolutional]
44 | batch_normalize=1
45 | filters=32
46 | size=1
47 | stride=1
48 | pad=1
49 | activation=leaky
50 |
51 | [convolutional]
52 | batch_normalize=1
53 | filters=64
54 | size=3
55 | stride=1
56 | pad=1
57 | activation=leaky
58 |
59 | [shortcut]
60 | from=-3
61 | activation=linear
62 |
63 | # Downsample
64 |
65 | [convolutional]
66 | batch_normalize=1
67 | filters=128
68 | size=3
69 | stride=2
70 | pad=1
71 | activation=leaky
72 |
73 | [convolutional]
74 | batch_normalize=1
75 | filters=64
76 | size=1
77 | stride=1
78 | pad=1
79 | activation=leaky
80 |
81 | [convolutional]
82 | batch_normalize=1
83 | filters=128
84 | size=3
85 | stride=1
86 | pad=1
87 | activation=leaky
88 |
89 | [shortcut]
90 | from=-3
91 | activation=linear
92 |
93 | [convolutional]
94 | batch_normalize=1
95 | filters=64
96 | size=1
97 | stride=1
98 | pad=1
99 | activation=leaky
100 |
101 | [convolutional]
102 | batch_normalize=1
103 | filters=128
104 | size=3
105 | stride=1
106 | pad=1
107 | activation=leaky
108 |
109 | [shortcut]
110 | from=-3
111 | activation=linear
112 |
113 | # Downsample
114 |
115 | [convolutional]
116 | batch_normalize=1
117 | filters=256
118 | size=3
119 | stride=2
120 | pad=1
121 | activation=leaky
122 |
123 | [convolutional]
124 | batch_normalize=1
125 | filters=128
126 | size=1
127 | stride=1
128 | pad=1
129 | activation=leaky
130 |
131 | [convolutional]
132 | batch_normalize=1
133 | filters=256
134 | size=3
135 | stride=1
136 | pad=1
137 | activation=leaky
138 |
139 | [shortcut]
140 | from=-3
141 | activation=linear
142 |
143 | [convolutional]
144 | batch_normalize=1
145 | filters=128
146 | size=1
147 | stride=1
148 | pad=1
149 | activation=leaky
150 |
151 | [convolutional]
152 | batch_normalize=1
153 | filters=256
154 | size=3
155 | stride=1
156 | pad=1
157 | activation=leaky
158 |
159 | [shortcut]
160 | from=-3
161 | activation=linear
162 |
163 | [convolutional]
164 | batch_normalize=1
165 | filters=128
166 | size=1
167 | stride=1
168 | pad=1
169 | activation=leaky
170 |
171 | [convolutional]
172 | batch_normalize=1
173 | filters=256
174 | size=3
175 | stride=1
176 | pad=1
177 | activation=leaky
178 |
179 | [shortcut]
180 | from=-3
181 | activation=linear
182 |
183 | [convolutional]
184 | batch_normalize=1
185 | filters=128
186 | size=1
187 | stride=1
188 | pad=1
189 | activation=leaky
190 |
191 | [convolutional]
192 | batch_normalize=1
193 | filters=256
194 | size=3
195 | stride=1
196 | pad=1
197 | activation=leaky
198 |
199 | [shortcut]
200 | from=-3
201 | activation=linear
202 |
203 |
204 | [convolutional]
205 | batch_normalize=1
206 | filters=128
207 | size=1
208 | stride=1
209 | pad=1
210 | activation=leaky
211 |
212 | [convolutional]
213 | batch_normalize=1
214 | filters=256
215 | size=3
216 | stride=1
217 | pad=1
218 | activation=leaky
219 |
220 | [shortcut]
221 | from=-3
222 | activation=linear
223 |
224 | [convolutional]
225 | batch_normalize=1
226 | filters=128
227 | size=1
228 | stride=1
229 | pad=1
230 | activation=leaky
231 |
232 | [convolutional]
233 | batch_normalize=1
234 | filters=256
235 | size=3
236 | stride=1
237 | pad=1
238 | activation=leaky
239 |
240 | [shortcut]
241 | from=-3
242 | activation=linear
243 |
244 | [convolutional]
245 | batch_normalize=1
246 | filters=128
247 | size=1
248 | stride=1
249 | pad=1
250 | activation=leaky
251 |
252 | [convolutional]
253 | batch_normalize=1
254 | filters=256
255 | size=3
256 | stride=1
257 | pad=1
258 | activation=leaky
259 |
260 | [shortcut]
261 | from=-3
262 | activation=linear
263 |
264 | [convolutional]
265 | batch_normalize=1
266 | filters=128
267 | size=1
268 | stride=1
269 | pad=1
270 | activation=leaky
271 |
272 | [convolutional]
273 | batch_normalize=1
274 | filters=256
275 | size=3
276 | stride=1
277 | pad=1
278 | activation=leaky
279 |
280 | [shortcut]
281 | from=-3
282 | activation=linear
283 |
284 | # Downsample
285 |
286 | [convolutional]
287 | batch_normalize=1
288 | filters=512
289 | size=3
290 | stride=2
291 | pad=1
292 | activation=leaky
293 |
294 | [convolutional]
295 | batch_normalize=1
296 | filters=256
297 | size=1
298 | stride=1
299 | pad=1
300 | activation=leaky
301 |
302 | [convolutional]
303 | batch_normalize=1
304 | filters=512
305 | size=3
306 | stride=1
307 | pad=1
308 | activation=leaky
309 |
310 | [shortcut]
311 | from=-3
312 | activation=linear
313 |
314 |
315 | [convolutional]
316 | batch_normalize=1
317 | filters=256
318 | size=1
319 | stride=1
320 | pad=1
321 | activation=leaky
322 |
323 | [convolutional]
324 | batch_normalize=1
325 | filters=512
326 | size=3
327 | stride=1
328 | pad=1
329 | activation=leaky
330 |
331 | [shortcut]
332 | from=-3
333 | activation=linear
334 |
335 |
336 | [convolutional]
337 | batch_normalize=1
338 | filters=256
339 | size=1
340 | stride=1
341 | pad=1
342 | activation=leaky
343 |
344 | [convolutional]
345 | batch_normalize=1
346 | filters=512
347 | size=3
348 | stride=1
349 | pad=1
350 | activation=leaky
351 |
352 | [shortcut]
353 | from=-3
354 | activation=linear
355 |
356 |
357 | [convolutional]
358 | batch_normalize=1
359 | filters=256
360 | size=1
361 | stride=1
362 | pad=1
363 | activation=leaky
364 |
365 | [convolutional]
366 | batch_normalize=1
367 | filters=512
368 | size=3
369 | stride=1
370 | pad=1
371 | activation=leaky
372 |
373 | [shortcut]
374 | from=-3
375 | activation=linear
376 |
377 | [convolutional]
378 | batch_normalize=1
379 | filters=256
380 | size=1
381 | stride=1
382 | pad=1
383 | activation=leaky
384 |
385 | [convolutional]
386 | batch_normalize=1
387 | filters=512
388 | size=3
389 | stride=1
390 | pad=1
391 | activation=leaky
392 |
393 | [shortcut]
394 | from=-3
395 | activation=linear
396 |
397 |
398 | [convolutional]
399 | batch_normalize=1
400 | filters=256
401 | size=1
402 | stride=1
403 | pad=1
404 | activation=leaky
405 |
406 | [convolutional]
407 | batch_normalize=1
408 | filters=512
409 | size=3
410 | stride=1
411 | pad=1
412 | activation=leaky
413 |
414 | [shortcut]
415 | from=-3
416 | activation=linear
417 |
418 |
419 | [convolutional]
420 | batch_normalize=1
421 | filters=256
422 | size=1
423 | stride=1
424 | pad=1
425 | activation=leaky
426 |
427 | [convolutional]
428 | batch_normalize=1
429 | filters=512
430 | size=3
431 | stride=1
432 | pad=1
433 | activation=leaky
434 |
435 | [shortcut]
436 | from=-3
437 | activation=linear
438 |
439 | [convolutional]
440 | batch_normalize=1
441 | filters=256
442 | size=1
443 | stride=1
444 | pad=1
445 | activation=leaky
446 |
447 | [convolutional]
448 | batch_normalize=1
449 | filters=512
450 | size=3
451 | stride=1
452 | pad=1
453 | activation=leaky
454 |
455 | [shortcut]
456 | from=-3
457 | activation=linear
458 |
459 | # Downsample
460 |
461 | [convolutional]
462 | batch_normalize=1
463 | filters=1024
464 | size=3
465 | stride=2
466 | pad=1
467 | activation=leaky
468 |
469 | [convolutional]
470 | batch_normalize=1
471 | filters=512
472 | size=1
473 | stride=1
474 | pad=1
475 | activation=leaky
476 |
477 | [convolutional]
478 | batch_normalize=1
479 | filters=1024
480 | size=3
481 | stride=1
482 | pad=1
483 | activation=leaky
484 |
485 | [shortcut]
486 | from=-3
487 | activation=linear
488 |
489 | [convolutional]
490 | batch_normalize=1
491 | filters=512
492 | size=1
493 | stride=1
494 | pad=1
495 | activation=leaky
496 |
497 | [convolutional]
498 | batch_normalize=1
499 | filters=1024
500 | size=3
501 | stride=1
502 | pad=1
503 | activation=leaky
504 |
505 | [shortcut]
506 | from=-3
507 | activation=linear
508 |
509 | [convolutional]
510 | batch_normalize=1
511 | filters=512
512 | size=1
513 | stride=1
514 | pad=1
515 | activation=leaky
516 |
517 | [convolutional]
518 | batch_normalize=1
519 | filters=1024
520 | size=3
521 | stride=1
522 | pad=1
523 | activation=leaky
524 |
525 | [shortcut]
526 | from=-3
527 | activation=linear
528 |
529 | [convolutional]
530 | batch_normalize=1
531 | filters=512
532 | size=1
533 | stride=1
534 | pad=1
535 | activation=leaky
536 |
537 | [convolutional]
538 | batch_normalize=1
539 | filters=1024
540 | size=3
541 | stride=1
542 | pad=1
543 | activation=leaky
544 |
545 | [shortcut]
546 | from=-3
547 | activation=linear
548 |
549 | ######################
550 |
551 | [convolutional]
552 | batch_normalize=1
553 | filters=512
554 | size=1
555 | stride=1
556 | pad=1
557 | activation=leaky
558 |
559 | [convolutional]
560 | batch_normalize=1
561 | size=3
562 | stride=1
563 | pad=1
564 | filters=1024
565 | activation=leaky
566 |
567 | [convolutional]
568 | batch_normalize=1
569 | filters=512
570 | size=1
571 | stride=1
572 | pad=1
573 | activation=leaky
574 |
575 | ### SPP ###
576 | [maxpool]
577 | stride=1
578 | size=5
579 |
580 | [route]
581 | layers=-2
582 |
583 | [maxpool]
584 | stride=1
585 | size=9
586 |
587 | [route]
588 | layers=-4
589 |
590 | [maxpool]
591 | stride=1
592 | size=13
593 |
594 | [route]
595 | layers=-1,-3,-5,-6
596 |
597 | ### End SPP ###
598 |
599 | [convolutional]
600 | batch_normalize=1
601 | filters=512
602 | size=1
603 | stride=1
604 | pad=1
605 | activation=leaky
606 |
607 |
608 | [convolutional]
609 | batch_normalize=1
610 | size=3
611 | stride=1
612 | pad=1
613 | filters=1024
614 | activation=leaky
615 |
616 | [convolutional]
617 | batch_normalize=1
618 | filters=512
619 | size=1
620 | stride=1
621 | pad=1
622 | activation=leaky
623 |
624 | [convolutional]
625 | batch_normalize=1
626 | size=3
627 | stride=1
628 | pad=1
629 | filters=1024
630 | activation=leaky
631 |
632 | [convolutional]
633 | size=1
634 | stride=1
635 | pad=1
636 | filters=18
637 | activation=linear
638 |
639 |
640 | [yolo]
641 | mask = 6,7,8
642 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
643 | classes=1
644 | num=9
645 | jitter=.3
646 | ignore_thresh = .7
647 | truth_thresh = 1
648 | random=1
649 |
650 |
651 | [route]
652 | layers = -4
653 |
654 | [convolutional]
655 | batch_normalize=1
656 | filters=256
657 | size=1
658 | stride=1
659 | pad=1
660 | activation=leaky
661 |
662 | [upsample]
663 | stride=2
664 |
665 | [route]
666 | layers = -1, 61
667 |
668 |
669 |
670 | [convolutional]
671 | batch_normalize=1
672 | filters=256
673 | size=1
674 | stride=1
675 | pad=1
676 | activation=leaky
677 |
678 | [convolutional]
679 | batch_normalize=1
680 | size=3
681 | stride=1
682 | pad=1
683 | filters=512
684 | activation=leaky
685 |
686 | [convolutional]
687 | batch_normalize=1
688 | filters=256
689 | size=1
690 | stride=1
691 | pad=1
692 | activation=leaky
693 |
694 | [convolutional]
695 | batch_normalize=1
696 | size=3
697 | stride=1
698 | pad=1
699 | filters=512
700 | activation=leaky
701 |
702 | [convolutional]
703 | batch_normalize=1
704 | filters=256
705 | size=1
706 | stride=1
707 | pad=1
708 | activation=leaky
709 |
710 | [convolutional]
711 | batch_normalize=1
712 | size=3
713 | stride=1
714 | pad=1
715 | filters=512
716 | activation=leaky
717 |
718 | [convolutional]
719 | size=1
720 | stride=1
721 | pad=1
722 | filters=18
723 | activation=linear
724 |
725 |
726 | [yolo]
727 | mask = 3,4,5
728 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
729 | classes=1
730 | num=9
731 | jitter=.3
732 | ignore_thresh = .7
733 | truth_thresh = 1
734 | random=1
735 |
736 |
737 |
738 | [route]
739 | layers = -4
740 |
741 | [convolutional]
742 | batch_normalize=1
743 | filters=128
744 | size=1
745 | stride=1
746 | pad=1
747 | activation=leaky
748 |
749 | [upsample]
750 | stride=2
751 |
752 | [route]
753 | layers = -1, 36
754 |
755 |
756 |
757 | [convolutional]
758 | batch_normalize=1
759 | filters=128
760 | size=1
761 | stride=1
762 | pad=1
763 | activation=leaky
764 |
765 | [convolutional]
766 | batch_normalize=1
767 | size=3
768 | stride=1
769 | pad=1
770 | filters=256
771 | activation=leaky
772 |
773 | [convolutional]
774 | batch_normalize=1
775 | filters=128
776 | size=1
777 | stride=1
778 | pad=1
779 | activation=leaky
780 |
781 | [convolutional]
782 | batch_normalize=1
783 | size=3
784 | stride=1
785 | pad=1
786 | filters=256
787 | activation=leaky
788 |
789 | [convolutional]
790 | batch_normalize=1
791 | filters=128
792 | size=1
793 | stride=1
794 | pad=1
795 | activation=leaky
796 |
797 | [convolutional]
798 | batch_normalize=1
799 | size=3
800 | stride=1
801 | pad=1
802 | filters=256
803 | activation=leaky
804 |
805 | [convolutional]
806 | size=1
807 | stride=1
808 | pad=1
809 | filters=18
810 | activation=linear
811 |
812 |
813 | [yolo]
814 | mask = 0,1,2
815 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
816 | classes=1
817 | num=9
818 | jitter=.3
819 | ignore_thresh = .7
820 | truth_thresh = 1
821 | random=1
822 |
--------------------------------------------------------------------------------
/cfg/yolov3-tiny-1cls.cfg:
--------------------------------------------------------------------------------
1 | [net]
2 | # Testing
3 | batch=1
4 | subdivisions=1
5 | # Training
6 | # batch=64
7 | # subdivisions=2
8 | width=416
9 | height=416
10 | channels=3
11 | momentum=0.9
12 | decay=0.0005
13 | angle=0
14 | saturation = 1.5
15 | exposure = 1.5
16 | hue=.1
17 |
18 | learning_rate=0.001
19 | burn_in=1000
20 | max_batches = 500200
21 | policy=steps
22 | steps=400000,450000
23 | scales=.1,.1
24 |
25 | [convolutional]
26 | batch_normalize=1
27 | filters=16
28 | size=3
29 | stride=1
30 | pad=1
31 | activation=leaky
32 |
33 | [maxpool]
34 | size=2
35 | stride=2
36 |
37 | [convolutional]
38 | batch_normalize=1
39 | filters=32
40 | size=3
41 | stride=1
42 | pad=1
43 | activation=leaky
44 |
45 | [maxpool]
46 | size=2
47 | stride=2
48 |
49 | [convolutional]
50 | batch_normalize=1
51 | filters=64
52 | size=3
53 | stride=1
54 | pad=1
55 | activation=leaky
56 |
57 | [maxpool]
58 | size=2
59 | stride=2
60 |
61 | [convolutional]
62 | batch_normalize=1
63 | filters=128
64 | size=3
65 | stride=1
66 | pad=1
67 | activation=leaky
68 |
69 | [maxpool]
70 | size=2
71 | stride=2
72 |
73 | [convolutional]
74 | batch_normalize=1
75 | filters=256
76 | size=3
77 | stride=1
78 | pad=1
79 | activation=leaky
80 |
81 | [maxpool]
82 | size=2
83 | stride=2
84 |
85 | [convolutional]
86 | batch_normalize=1
87 | filters=512
88 | size=3
89 | stride=1
90 | pad=1
91 | activation=leaky
92 |
93 | [maxpool]
94 | size=2
95 | stride=1
96 |
97 | [convolutional]
98 | batch_normalize=1
99 | filters=1024
100 | size=3
101 | stride=1
102 | pad=1
103 | activation=leaky
104 |
105 | ###########
106 |
107 | [convolutional]
108 | batch_normalize=1
109 | filters=256
110 | size=1
111 | stride=1
112 | pad=1
113 | activation=leaky
114 |
115 | [convolutional]
116 | batch_normalize=1
117 | filters=512
118 | size=3
119 | stride=1
120 | pad=1
121 | activation=leaky
122 |
123 | [convolutional]
124 | size=1
125 | stride=1
126 | pad=1
127 | filters=18
128 | activation=linear
129 |
130 |
131 |
132 | [yolo]
133 | mask = 3,4,5
134 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319
135 | classes=1
136 | num=6
137 | jitter=.3
138 | ignore_thresh = .7
139 | truth_thresh = 1
140 | random=1
141 |
142 | [route]
143 | layers = -4
144 |
145 | [convolutional]
146 | batch_normalize=1
147 | filters=128
148 | size=1
149 | stride=1
150 | pad=1
151 | activation=leaky
152 |
153 | [upsample]
154 | stride=2
155 |
156 | [route]
157 | layers = -1, 8
158 |
159 | [convolutional]
160 | batch_normalize=1
161 | filters=256
162 | size=3
163 | stride=1
164 | pad=1
165 | activation=leaky
166 |
167 | [convolutional]
168 | size=1
169 | stride=1
170 | pad=1
171 | filters=18
172 | activation=linear
173 |
174 | [yolo]
175 | mask = 0,1,2
176 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319
177 | classes=1
178 | num=6
179 | jitter=.3
180 | ignore_thresh = .7
181 | truth_thresh = 1
182 | random=1
183 |
--------------------------------------------------------------------------------
/cfg/yolov3-tiny.cfg:
--------------------------------------------------------------------------------
1 | [net]
2 | # Testing
3 | batch=1
4 | subdivisions=1
5 | # Training
6 | # batch=64
7 | # subdivisions=2
8 | width=416
9 | height=416
10 | channels=3
11 | momentum=0.9
12 | decay=0.0005
13 | angle=0
14 | saturation = 1.5
15 | exposure = 1.5
16 | hue=.1
17 |
18 | learning_rate=0.001
19 | burn_in=1000
20 | max_batches = 500200
21 | policy=steps
22 | steps=400000,450000
23 | scales=.1,.1
24 |
25 | [convolutional]
26 | batch_normalize=1
27 | filters=16
28 | size=3
29 | stride=1
30 | pad=1
31 | activation=leaky
32 |
33 | [maxpool]
34 | size=2
35 | stride=2
36 |
37 | [convolutional]
38 | batch_normalize=1
39 | filters=32
40 | size=3
41 | stride=1
42 | pad=1
43 | activation=leaky
44 |
45 | [maxpool]
46 | size=2
47 | stride=2
48 |
49 | [convolutional]
50 | batch_normalize=1
51 | filters=64
52 | size=3
53 | stride=1
54 | pad=1
55 | activation=leaky
56 |
57 | [maxpool]
58 | size=2
59 | stride=2
60 |
61 | [convolutional]
62 | batch_normalize=1
63 | filters=128
64 | size=3
65 | stride=1
66 | pad=1
67 | activation=leaky
68 |
69 | [maxpool]
70 | size=2
71 | stride=2
72 |
73 | [convolutional]
74 | batch_normalize=1
75 | filters=256
76 | size=3
77 | stride=1
78 | pad=1
79 | activation=leaky
80 |
81 | [maxpool]
82 | size=2
83 | stride=2
84 |
85 | [convolutional]
86 | batch_normalize=1
87 | filters=512
88 | size=3
89 | stride=1
90 | pad=1
91 | activation=leaky
92 |
93 | [maxpool]
94 | size=2
95 | stride=1
96 |
97 | [convolutional]
98 | batch_normalize=1
99 | filters=1024
100 | size=3
101 | stride=1
102 | pad=1
103 | activation=leaky
104 |
105 | ###########
106 |
107 | [convolutional]
108 | batch_normalize=1
109 | filters=256
110 | size=1
111 | stride=1
112 | pad=1
113 | activation=leaky
114 |
115 | [convolutional]
116 | batch_normalize=1
117 | filters=512
118 | size=3
119 | stride=1
120 | pad=1
121 | activation=leaky
122 |
123 | [convolutional]
124 | size=1
125 | stride=1
126 | pad=1
127 | filters=255
128 | activation=linear
129 |
130 |
131 |
132 | [yolo]
133 | mask = 3,4,5
134 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319
135 | classes=80
136 | num=6
137 | jitter=.3
138 | ignore_thresh = .7
139 | truth_thresh = 1
140 | random=1
141 |
142 | [route]
143 | layers = -4
144 |
145 | [convolutional]
146 | batch_normalize=1
147 | filters=128
148 | size=1
149 | stride=1
150 | pad=1
151 | activation=leaky
152 |
153 | [upsample]
154 | stride=2
155 |
156 | [route]
157 | layers = -1, 8
158 |
159 | [convolutional]
160 | batch_normalize=1
161 | filters=256
162 | size=3
163 | stride=1
164 | pad=1
165 | activation=leaky
166 |
167 | [convolutional]
168 | size=1
169 | stride=1
170 | pad=1
171 | filters=255
172 | activation=linear
173 |
174 | [yolo]
175 | mask = 1,2,3
176 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319
177 | classes=80
178 | num=6
179 | jitter=.3
180 | ignore_thresh = .7
181 | truth_thresh = 1
182 | random=1
183 |
--------------------------------------------------------------------------------
/cfg/yolov3.cfg:
--------------------------------------------------------------------------------
1 | [net]
2 | # Testing
3 | #batch=1
4 | #subdivisions=1
5 | # Training
6 | batch=16
7 | subdivisions=1
8 | width=416
9 | height=416
10 | channels=3
11 | momentum=0.9
12 | decay=0.0005
13 | angle=0
14 | saturation = 1.5
15 | exposure = 1.5
16 | hue=.1
17 |
18 | learning_rate=0.001
19 | burn_in=1000
20 | max_batches = 500200
21 | policy=steps
22 | steps=400000,450000
23 | scales=.1,.1
24 |
25 | [convolutional]
26 | batch_normalize=1
27 | filters=32
28 | size=3
29 | stride=1
30 | pad=1
31 | activation=leaky
32 |
33 | # Downsample
34 |
35 | [convolutional]
36 | batch_normalize=1
37 | filters=64
38 | size=3
39 | stride=2
40 | pad=1
41 | activation=leaky
42 |
43 | [convolutional]
44 | batch_normalize=1
45 | filters=32
46 | size=1
47 | stride=1
48 | pad=1
49 | activation=leaky
50 |
51 | [convolutional]
52 | batch_normalize=1
53 | filters=64
54 | size=3
55 | stride=1
56 | pad=1
57 | activation=leaky
58 |
59 | [shortcut]
60 | from=-3
61 | activation=linear
62 |
63 | # Downsample
64 |
65 | [convolutional]
66 | batch_normalize=1
67 | filters=128
68 | size=3
69 | stride=2
70 | pad=1
71 | activation=leaky
72 |
73 | [convolutional]
74 | batch_normalize=1
75 | filters=64
76 | size=1
77 | stride=1
78 | pad=1
79 | activation=leaky
80 |
81 | [convolutional]
82 | batch_normalize=1
83 | filters=128
84 | size=3
85 | stride=1
86 | pad=1
87 | activation=leaky
88 |
89 | [shortcut]
90 | from=-3
91 | activation=linear
92 |
93 | [convolutional]
94 | batch_normalize=1
95 | filters=64
96 | size=1
97 | stride=1
98 | pad=1
99 | activation=leaky
100 |
101 | [convolutional]
102 | batch_normalize=1
103 | filters=128
104 | size=3
105 | stride=1
106 | pad=1
107 | activation=leaky
108 |
109 | [shortcut]
110 | from=-3
111 | activation=linear
112 |
113 | # Downsample
114 |
115 | [convolutional]
116 | batch_normalize=1
117 | filters=256
118 | size=3
119 | stride=2
120 | pad=1
121 | activation=leaky
122 |
123 | [convolutional]
124 | batch_normalize=1
125 | filters=128
126 | size=1
127 | stride=1
128 | pad=1
129 | activation=leaky
130 |
131 | [convolutional]
132 | batch_normalize=1
133 | filters=256
134 | size=3
135 | stride=1
136 | pad=1
137 | activation=leaky
138 |
139 | [shortcut]
140 | from=-3
141 | activation=linear
142 |
143 | [convolutional]
144 | batch_normalize=1
145 | filters=128
146 | size=1
147 | stride=1
148 | pad=1
149 | activation=leaky
150 |
151 | [convolutional]
152 | batch_normalize=1
153 | filters=256
154 | size=3
155 | stride=1
156 | pad=1
157 | activation=leaky
158 |
159 | [shortcut]
160 | from=-3
161 | activation=linear
162 |
163 | [convolutional]
164 | batch_normalize=1
165 | filters=128
166 | size=1
167 | stride=1
168 | pad=1
169 | activation=leaky
170 |
171 | [convolutional]
172 | batch_normalize=1
173 | filters=256
174 | size=3
175 | stride=1
176 | pad=1
177 | activation=leaky
178 |
179 | [shortcut]
180 | from=-3
181 | activation=linear
182 |
183 | [convolutional]
184 | batch_normalize=1
185 | filters=128
186 | size=1
187 | stride=1
188 | pad=1
189 | activation=leaky
190 |
191 | [convolutional]
192 | batch_normalize=1
193 | filters=256
194 | size=3
195 | stride=1
196 | pad=1
197 | activation=leaky
198 |
199 | [shortcut]
200 | from=-3
201 | activation=linear
202 |
203 |
204 | [convolutional]
205 | batch_normalize=1
206 | filters=128
207 | size=1
208 | stride=1
209 | pad=1
210 | activation=leaky
211 |
212 | [convolutional]
213 | batch_normalize=1
214 | filters=256
215 | size=3
216 | stride=1
217 | pad=1
218 | activation=leaky
219 |
220 | [shortcut]
221 | from=-3
222 | activation=linear
223 |
224 | [convolutional]
225 | batch_normalize=1
226 | filters=128
227 | size=1
228 | stride=1
229 | pad=1
230 | activation=leaky
231 |
232 | [convolutional]
233 | batch_normalize=1
234 | filters=256
235 | size=3
236 | stride=1
237 | pad=1
238 | activation=leaky
239 |
240 | [shortcut]
241 | from=-3
242 | activation=linear
243 |
244 | [convolutional]
245 | batch_normalize=1
246 | filters=128
247 | size=1
248 | stride=1
249 | pad=1
250 | activation=leaky
251 |
252 | [convolutional]
253 | batch_normalize=1
254 | filters=256
255 | size=3
256 | stride=1
257 | pad=1
258 | activation=leaky
259 |
260 | [shortcut]
261 | from=-3
262 | activation=linear
263 |
264 | [convolutional]
265 | batch_normalize=1
266 | filters=128
267 | size=1
268 | stride=1
269 | pad=1
270 | activation=leaky
271 |
272 | [convolutional]
273 | batch_normalize=1
274 | filters=256
275 | size=3
276 | stride=1
277 | pad=1
278 | activation=leaky
279 |
280 | [shortcut]
281 | from=-3
282 | activation=linear
283 |
284 | # Downsample
285 |
286 | [convolutional]
287 | batch_normalize=1
288 | filters=512
289 | size=3
290 | stride=2
291 | pad=1
292 | activation=leaky
293 |
294 | [convolutional]
295 | batch_normalize=1
296 | filters=256
297 | size=1
298 | stride=1
299 | pad=1
300 | activation=leaky
301 |
302 | [convolutional]
303 | batch_normalize=1
304 | filters=512
305 | size=3
306 | stride=1
307 | pad=1
308 | activation=leaky
309 |
310 | [shortcut]
311 | from=-3
312 | activation=linear
313 |
314 |
315 | [convolutional]
316 | batch_normalize=1
317 | filters=256
318 | size=1
319 | stride=1
320 | pad=1
321 | activation=leaky
322 |
323 | [convolutional]
324 | batch_normalize=1
325 | filters=512
326 | size=3
327 | stride=1
328 | pad=1
329 | activation=leaky
330 |
331 | [shortcut]
332 | from=-3
333 | activation=linear
334 |
335 |
336 | [convolutional]
337 | batch_normalize=1
338 | filters=256
339 | size=1
340 | stride=1
341 | pad=1
342 | activation=leaky
343 |
344 | [convolutional]
345 | batch_normalize=1
346 | filters=512
347 | size=3
348 | stride=1
349 | pad=1
350 | activation=leaky
351 |
352 | [shortcut]
353 | from=-3
354 | activation=linear
355 |
356 |
357 | [convolutional]
358 | batch_normalize=1
359 | filters=256
360 | size=1
361 | stride=1
362 | pad=1
363 | activation=leaky
364 |
365 | [convolutional]
366 | batch_normalize=1
367 | filters=512
368 | size=3
369 | stride=1
370 | pad=1
371 | activation=leaky
372 |
373 | [shortcut]
374 | from=-3
375 | activation=linear
376 |
377 | [convolutional]
378 | batch_normalize=1
379 | filters=256
380 | size=1
381 | stride=1
382 | pad=1
383 | activation=leaky
384 |
385 | [convolutional]
386 | batch_normalize=1
387 | filters=512
388 | size=3
389 | stride=1
390 | pad=1
391 | activation=leaky
392 |
393 | [shortcut]
394 | from=-3
395 | activation=linear
396 |
397 |
398 | [convolutional]
399 | batch_normalize=1
400 | filters=256
401 | size=1
402 | stride=1
403 | pad=1
404 | activation=leaky
405 |
406 | [convolutional]
407 | batch_normalize=1
408 | filters=512
409 | size=3
410 | stride=1
411 | pad=1
412 | activation=leaky
413 |
414 | [shortcut]
415 | from=-3
416 | activation=linear
417 |
418 |
419 | [convolutional]
420 | batch_normalize=1
421 | filters=256
422 | size=1
423 | stride=1
424 | pad=1
425 | activation=leaky
426 |
427 | [convolutional]
428 | batch_normalize=1
429 | filters=512
430 | size=3
431 | stride=1
432 | pad=1
433 | activation=leaky
434 |
435 | [shortcut]
436 | from=-3
437 | activation=linear
438 |
439 | [convolutional]
440 | batch_normalize=1
441 | filters=256
442 | size=1
443 | stride=1
444 | pad=1
445 | activation=leaky
446 |
447 | [convolutional]
448 | batch_normalize=1
449 | filters=512
450 | size=3
451 | stride=1
452 | pad=1
453 | activation=leaky
454 |
455 | [shortcut]
456 | from=-3
457 | activation=linear
458 |
459 | # Downsample
460 |
461 | [convolutional]
462 | batch_normalize=1
463 | filters=1024
464 | size=3
465 | stride=2
466 | pad=1
467 | activation=leaky
468 |
469 | [convolutional]
470 | batch_normalize=1
471 | filters=512
472 | size=1
473 | stride=1
474 | pad=1
475 | activation=leaky
476 |
477 | [convolutional]
478 | batch_normalize=1
479 | filters=1024
480 | size=3
481 | stride=1
482 | pad=1
483 | activation=leaky
484 |
485 | [shortcut]
486 | from=-3
487 | activation=linear
488 |
489 | [convolutional]
490 | batch_normalize=1
491 | filters=512
492 | size=1
493 | stride=1
494 | pad=1
495 | activation=leaky
496 |
497 | [convolutional]
498 | batch_normalize=1
499 | filters=1024
500 | size=3
501 | stride=1
502 | pad=1
503 | activation=leaky
504 |
505 | [shortcut]
506 | from=-3
507 | activation=linear
508 |
509 | [convolutional]
510 | batch_normalize=1
511 | filters=512
512 | size=1
513 | stride=1
514 | pad=1
515 | activation=leaky
516 |
517 | [convolutional]
518 | batch_normalize=1
519 | filters=1024
520 | size=3
521 | stride=1
522 | pad=1
523 | activation=leaky
524 |
525 | [shortcut]
526 | from=-3
527 | activation=linear
528 |
529 | [convolutional]
530 | batch_normalize=1
531 | filters=512
532 | size=1
533 | stride=1
534 | pad=1
535 | activation=leaky
536 |
537 | [convolutional]
538 | batch_normalize=1
539 | filters=1024
540 | size=3
541 | stride=1
542 | pad=1
543 | activation=leaky
544 |
545 | [shortcut]
546 | from=-3
547 | activation=linear
548 |
549 | ######################
550 |
551 | [convolutional]
552 | batch_normalize=1
553 | filters=512
554 | size=1
555 | stride=1
556 | pad=1
557 | activation=leaky
558 |
559 | [convolutional]
560 | batch_normalize=1
561 | size=3
562 | stride=1
563 | pad=1
564 | filters=1024
565 | activation=leaky
566 |
567 | [convolutional]
568 | batch_normalize=1
569 | filters=512
570 | size=1
571 | stride=1
572 | pad=1
573 | activation=leaky
574 |
575 | [convolutional]
576 | batch_normalize=1
577 | size=3
578 | stride=1
579 | pad=1
580 | filters=1024
581 | activation=leaky
582 |
583 | [convolutional]
584 | batch_normalize=1
585 | filters=512
586 | size=1
587 | stride=1
588 | pad=1
589 | activation=leaky
590 |
591 | [convolutional]
592 | batch_normalize=1
593 | size=3
594 | stride=1
595 | pad=1
596 | filters=1024
597 | activation=leaky
598 |
599 | [convolutional]
600 | size=1
601 | stride=1
602 | pad=1
603 | filters=255
604 | activation=linear
605 |
606 |
607 | [yolo]
608 | mask = 6,7,8
609 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
610 | classes=80
611 | num=9
612 | jitter=.3
613 | ignore_thresh = .7
614 | truth_thresh = 1
615 | random=1
616 |
617 |
618 | [route]
619 | layers = -4
620 |
621 | [convolutional]
622 | batch_normalize=1
623 | filters=256
624 | size=1
625 | stride=1
626 | pad=1
627 | activation=leaky
628 |
629 | [upsample]
630 | stride=2
631 |
632 | [route]
633 | layers = -1, 61
634 |
635 |
636 |
637 | [convolutional]
638 | batch_normalize=1
639 | filters=256
640 | size=1
641 | stride=1
642 | pad=1
643 | activation=leaky
644 |
645 | [convolutional]
646 | batch_normalize=1
647 | size=3
648 | stride=1
649 | pad=1
650 | filters=512
651 | activation=leaky
652 |
653 | [convolutional]
654 | batch_normalize=1
655 | filters=256
656 | size=1
657 | stride=1
658 | pad=1
659 | activation=leaky
660 |
661 | [convolutional]
662 | batch_normalize=1
663 | size=3
664 | stride=1
665 | pad=1
666 | filters=512
667 | activation=leaky
668 |
669 | [convolutional]
670 | batch_normalize=1
671 | filters=256
672 | size=1
673 | stride=1
674 | pad=1
675 | activation=leaky
676 |
677 | [convolutional]
678 | batch_normalize=1
679 | size=3
680 | stride=1
681 | pad=1
682 | filters=512
683 | activation=leaky
684 |
685 | [convolutional]
686 | size=1
687 | stride=1
688 | pad=1
689 | filters=255
690 | activation=linear
691 |
692 |
693 | [yolo]
694 | mask = 3,4,5
695 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
696 | classes=80
697 | num=9
698 | jitter=.3
699 | ignore_thresh = .7
700 | truth_thresh = 1
701 | random=1
702 |
703 |
704 |
705 | [route]
706 | layers = -4
707 |
708 | [convolutional]
709 | batch_normalize=1
710 | filters=128
711 | size=1
712 | stride=1
713 | pad=1
714 | activation=leaky
715 |
716 | [upsample]
717 | stride=2
718 |
719 | [route]
720 | layers = -1, 36
721 |
722 |
723 |
724 | [convolutional]
725 | batch_normalize=1
726 | filters=128
727 | size=1
728 | stride=1
729 | pad=1
730 | activation=leaky
731 |
732 | [convolutional]
733 | batch_normalize=1
734 | size=3
735 | stride=1
736 | pad=1
737 | filters=256
738 | activation=leaky
739 |
740 | [convolutional]
741 | batch_normalize=1
742 | filters=128
743 | size=1
744 | stride=1
745 | pad=1
746 | activation=leaky
747 |
748 | [convolutional]
749 | batch_normalize=1
750 | size=3
751 | stride=1
752 | pad=1
753 | filters=256
754 | activation=leaky
755 |
756 | [convolutional]
757 | batch_normalize=1
758 | filters=128
759 | size=1
760 | stride=1
761 | pad=1
762 | activation=leaky
763 |
764 | [convolutional]
765 | batch_normalize=1
766 | size=3
767 | stride=1
768 | pad=1
769 | filters=256
770 | activation=leaky
771 |
772 | [convolutional]
773 | size=1
774 | stride=1
775 | pad=1
776 | filters=255
777 | activation=linear
778 |
779 |
780 | [yolo]
781 | mask = 0,1,2
782 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
783 | classes=80
784 | num=9
785 | jitter=.3
786 | ignore_thresh = .7
787 | truth_thresh = 1
788 | random=1
789 |
--------------------------------------------------------------------------------
/cfg/yolov3s-3a320.cfg:
--------------------------------------------------------------------------------
1 | [net]
2 | # Testing
3 | # batch=1
4 | # subdivisions=1
5 | # Training
6 | batch=64
7 | subdivisions=16
8 | width=608
9 | height=608
10 | channels=3
11 | momentum=0.9
12 | decay=0.0005
13 | angle=0
14 | saturation = 1.5
15 | exposure = 1.5
16 | hue=.1
17 |
18 | learning_rate=0.001
19 | burn_in=1000
20 | max_batches = 500200
21 | policy=steps
22 | steps=400000,450000
23 | scales=.1,.1
24 |
25 | [convolutional]
26 | batch_normalize=1
27 | filters=32
28 | size=3
29 | stride=1
30 | pad=1
31 | activation=leaky
32 |
33 | # Downsample
34 |
35 | [convolutional]
36 | batch_normalize=1
37 | filters=64
38 | size=3
39 | stride=2
40 | pad=1
41 | activation=leaky
42 |
43 | [convolutional]
44 | batch_normalize=1
45 | filters=32
46 | size=1
47 | stride=1
48 | pad=1
49 | activation=leaky
50 |
51 | [convolutional]
52 | batch_normalize=1
53 | filters=64
54 | size=3
55 | stride=1
56 | pad=1
57 | activation=leaky
58 |
59 | [shortcut]
60 | from=-3
61 | activation=linear
62 |
63 | # Downsample
64 |
65 | [convolutional]
66 | batch_normalize=1
67 | filters=128
68 | size=3
69 | stride=2
70 | pad=1
71 | activation=leaky
72 |
73 | [convolutional]
74 | batch_normalize=1
75 | filters=64
76 | size=1
77 | stride=1
78 | pad=1
79 | activation=leaky
80 |
81 | [convolutional]
82 | batch_normalize=1
83 | filters=128
84 | size=3
85 | stride=1
86 | pad=1
87 | activation=leaky
88 |
89 | [shortcut]
90 | from=-3
91 | activation=linear
92 |
93 | [convolutional]
94 | batch_normalize=1
95 | filters=64
96 | size=1
97 | stride=1
98 | pad=1
99 | activation=leaky
100 |
101 | [convolutional]
102 | batch_normalize=1
103 | filters=128
104 | size=3
105 | stride=1
106 | pad=1
107 | activation=leaky
108 |
109 | [shortcut]
110 | from=-3
111 | activation=linear
112 |
113 | # Downsample
114 |
115 | [convolutional]
116 | batch_normalize=1
117 | filters=256
118 | size=3
119 | stride=2
120 | pad=1
121 | activation=leaky
122 |
123 | [convolutional]
124 | batch_normalize=1
125 | filters=128
126 | size=1
127 | stride=1
128 | pad=1
129 | activation=leaky
130 |
131 | [convolutional]
132 | batch_normalize=1
133 | filters=256
134 | size=3
135 | stride=1
136 | pad=1
137 | activation=leaky
138 |
139 | [shortcut]
140 | from=-3
141 | activation=linear
142 |
143 | [convolutional]
144 | batch_normalize=1
145 | filters=128
146 | size=1
147 | stride=1
148 | pad=1
149 | activation=leaky
150 |
151 | [convolutional]
152 | batch_normalize=1
153 | filters=256
154 | size=3
155 | stride=1
156 | pad=1
157 | activation=leaky
158 |
159 | [shortcut]
160 | from=-3
161 | activation=linear
162 |
163 | [convolutional]
164 | batch_normalize=1
165 | filters=128
166 | size=1
167 | stride=1
168 | pad=1
169 | activation=leaky
170 |
171 | [convolutional]
172 | batch_normalize=1
173 | filters=256
174 | size=3
175 | stride=1
176 | pad=1
177 | activation=leaky
178 |
179 | [shortcut]
180 | from=-3
181 | activation=linear
182 |
183 | [convolutional]
184 | batch_normalize=1
185 | filters=128
186 | size=1
187 | stride=1
188 | pad=1
189 | activation=leaky
190 |
191 | [convolutional]
192 | batch_normalize=1
193 | filters=256
194 | size=3
195 | stride=1
196 | pad=1
197 | activation=leaky
198 |
199 | [shortcut]
200 | from=-3
201 | activation=linear
202 |
203 |
204 | [convolutional]
205 | batch_normalize=1
206 | filters=128
207 | size=1
208 | stride=1
209 | pad=1
210 | activation=leaky
211 |
212 | [convolutional]
213 | batch_normalize=1
214 | filters=256
215 | size=3
216 | stride=1
217 | pad=1
218 | activation=leaky
219 |
220 | [shortcut]
221 | from=-3
222 | activation=linear
223 |
224 | [convolutional]
225 | batch_normalize=1
226 | filters=128
227 | size=1
228 | stride=1
229 | pad=1
230 | activation=leaky
231 |
232 | [convolutional]
233 | batch_normalize=1
234 | filters=256
235 | size=3
236 | stride=1
237 | pad=1
238 | activation=leaky
239 |
240 | [shortcut]
241 | from=-3
242 | activation=linear
243 |
244 | [convolutional]
245 | batch_normalize=1
246 | filters=128
247 | size=1
248 | stride=1
249 | pad=1
250 | activation=leaky
251 |
252 | [convolutional]
253 | batch_normalize=1
254 | filters=256
255 | size=3
256 | stride=1
257 | pad=1
258 | activation=leaky
259 |
260 | [shortcut]
261 | from=-3
262 | activation=linear
263 |
264 | [convolutional]
265 | batch_normalize=1
266 | filters=128
267 | size=1
268 | stride=1
269 | pad=1
270 | activation=leaky
271 |
272 | [convolutional]
273 | batch_normalize=1
274 | filters=256
275 | size=3
276 | stride=1
277 | pad=1
278 | activation=leaky
279 |
280 | [shortcut]
281 | from=-3
282 | activation=linear
283 |
284 | # Downsample
285 |
286 | [convolutional]
287 | batch_normalize=1
288 | filters=512
289 | size=3
290 | stride=2
291 | pad=1
292 | activation=leaky
293 |
294 | [convolutional]
295 | batch_normalize=1
296 | filters=256
297 | size=1
298 | stride=1
299 | pad=1
300 | activation=leaky
301 |
302 | [convolutional]
303 | batch_normalize=1
304 | filters=512
305 | size=3
306 | stride=1
307 | pad=1
308 | activation=leaky
309 |
310 | [shortcut]
311 | from=-3
312 | activation=linear
313 |
314 |
315 | [convolutional]
316 | batch_normalize=1
317 | filters=256
318 | size=1
319 | stride=1
320 | pad=1
321 | activation=leaky
322 |
323 | [convolutional]
324 | batch_normalize=1
325 | filters=512
326 | size=3
327 | stride=1
328 | pad=1
329 | activation=leaky
330 |
331 | [shortcut]
332 | from=-3
333 | activation=linear
334 |
335 |
336 | [convolutional]
337 | batch_normalize=1
338 | filters=256
339 | size=1
340 | stride=1
341 | pad=1
342 | activation=leaky
343 |
344 | [convolutional]
345 | batch_normalize=1
346 | filters=512
347 | size=3
348 | stride=1
349 | pad=1
350 | activation=leaky
351 |
352 | [shortcut]
353 | from=-3
354 | activation=linear
355 |
356 |
357 | [convolutional]
358 | batch_normalize=1
359 | filters=256
360 | size=1
361 | stride=1
362 | pad=1
363 | activation=leaky
364 |
365 | [convolutional]
366 | batch_normalize=1
367 | filters=512
368 | size=3
369 | stride=1
370 | pad=1
371 | activation=leaky
372 |
373 | [shortcut]
374 | from=-3
375 | activation=linear
376 |
377 | [convolutional]
378 | batch_normalize=1
379 | filters=256
380 | size=1
381 | stride=1
382 | pad=1
383 | activation=leaky
384 |
385 | [convolutional]
386 | batch_normalize=1
387 | filters=512
388 | size=3
389 | stride=1
390 | pad=1
391 | activation=leaky
392 |
393 | [shortcut]
394 | from=-3
395 | activation=linear
396 |
397 |
398 | [convolutional]
399 | batch_normalize=1
400 | filters=256
401 | size=1
402 | stride=1
403 | pad=1
404 | activation=leaky
405 |
406 | [convolutional]
407 | batch_normalize=1
408 | filters=512
409 | size=3
410 | stride=1
411 | pad=1
412 | activation=leaky
413 |
414 | [shortcut]
415 | from=-3
416 | activation=linear
417 |
418 |
419 | [convolutional]
420 | batch_normalize=1
421 | filters=256
422 | size=1
423 | stride=1
424 | pad=1
425 | activation=leaky
426 |
427 | [convolutional]
428 | batch_normalize=1
429 | filters=512
430 | size=3
431 | stride=1
432 | pad=1
433 | activation=leaky
434 |
435 | [shortcut]
436 | from=-3
437 | activation=linear
438 |
439 | [convolutional]
440 | batch_normalize=1
441 | filters=256
442 | size=1
443 | stride=1
444 | pad=1
445 | activation=leaky
446 |
447 | [convolutional]
448 | batch_normalize=1
449 | filters=512
450 | size=3
451 | stride=1
452 | pad=1
453 | activation=leaky
454 |
455 | [shortcut]
456 | from=-3
457 | activation=linear
458 |
459 | # Downsample
460 |
461 | [convolutional]
462 | batch_normalize=1
463 | filters=1024
464 | size=3
465 | stride=2
466 | pad=1
467 | activation=leaky
468 |
469 | [convolutional]
470 | batch_normalize=1
471 | filters=512
472 | size=1
473 | stride=1
474 | pad=1
475 | activation=leaky
476 |
477 | [convolutional]
478 | batch_normalize=1
479 | filters=1024
480 | size=3
481 | stride=1
482 | pad=1
483 | activation=leaky
484 |
485 | [shortcut]
486 | from=-3
487 | activation=linear
488 |
489 | [convolutional]
490 | batch_normalize=1
491 | filters=512
492 | size=1
493 | stride=1
494 | pad=1
495 | activation=leaky
496 |
497 | [convolutional]
498 | batch_normalize=1
499 | filters=1024
500 | size=3
501 | stride=1
502 | pad=1
503 | activation=leaky
504 |
505 | [shortcut]
506 | from=-3
507 | activation=linear
508 |
509 | [convolutional]
510 | batch_normalize=1
511 | filters=512
512 | size=1
513 | stride=1
514 | pad=1
515 | activation=leaky
516 |
517 | [convolutional]
518 | batch_normalize=1
519 | filters=1024
520 | size=3
521 | stride=1
522 | pad=1
523 | activation=leaky
524 |
525 | [shortcut]
526 | from=-3
527 | activation=linear
528 |
529 | [convolutional]
530 | batch_normalize=1
531 | filters=512
532 | size=1
533 | stride=1
534 | pad=1
535 | activation=leaky
536 |
537 | [convolutional]
538 | batch_normalize=1
539 | filters=1024
540 | size=3
541 | stride=1
542 | pad=1
543 | activation=leaky
544 |
545 | [shortcut]
546 | from=-3
547 | activation=linear
548 |
549 | ######################
550 |
551 | [convolutional]
552 | batch_normalize=1
553 | filters=512
554 | size=1
555 | stride=1
556 | pad=1
557 | activation=leaky
558 |
559 | [convolutional]
560 | batch_normalize=1
561 | size=3
562 | stride=1
563 | pad=1
564 | filters=1024
565 | activation=leaky
566 |
567 | [convolutional]
568 | batch_normalize=1
569 | filters=512
570 | size=1
571 | stride=1
572 | pad=1
573 | activation=leaky
574 |
575 | ### SPP ###
576 | [maxpool]
577 | stride=1
578 | size=5
579 |
580 | [route]
581 | layers=-2
582 |
583 | [maxpool]
584 | stride=1
585 | size=9
586 |
587 | [route]
588 | layers=-4
589 |
590 | [maxpool]
591 | stride=1
592 | size=13
593 |
594 | [route]
595 | layers=-1,-3,-5,-6
596 |
597 | ### End SPP ###
598 |
599 | [convolutional]
600 | batch_normalize=1
601 | filters=512
602 | size=1
603 | stride=1
604 | pad=1
605 | activation=leaky
606 |
607 |
608 | [convolutional]
609 | batch_normalize=1
610 | size=3
611 | stride=1
612 | pad=1
613 | filters=1024
614 | activation=leaky
615 |
616 | [convolutional]
617 | batch_normalize=1
618 | filters=512
619 | size=1
620 | stride=1
621 | pad=1
622 | activation=leaky
623 |
624 | [convolutional]
625 | batch_normalize=1
626 | size=3
627 | stride=1
628 | pad=1
629 | filters=1024
630 | activation=leaky
631 |
632 | [convolutional]
633 | size=1
634 | stride=1
635 | pad=1
636 | filters=85
637 | activation=linear
638 |
639 |
640 | [yolo]
641 | mask = 2
642 | anchors = 16,30, 62,45, 156,198
643 | classes=80
644 | num=3
645 | jitter=.3
646 | ignore_thresh = .7
647 | truth_thresh = 1
648 | random=1
649 |
650 |
651 | [route]
652 | layers = -4
653 |
654 | [convolutional]
655 | batch_normalize=1
656 | filters=256
657 | size=1
658 | stride=1
659 | pad=1
660 | activation=leaky
661 |
662 | [upsample]
663 | stride=2
664 |
665 | [route]
666 | layers = -1, 61
667 |
668 |
669 |
670 | [convolutional]
671 | batch_normalize=1
672 | filters=256
673 | size=1
674 | stride=1
675 | pad=1
676 | activation=leaky
677 |
678 | [convolutional]
679 | batch_normalize=1
680 | size=3
681 | stride=1
682 | pad=1
683 | filters=512
684 | activation=leaky
685 |
686 | [convolutional]
687 | batch_normalize=1
688 | filters=256
689 | size=1
690 | stride=1
691 | pad=1
692 | activation=leaky
693 |
694 | [convolutional]
695 | batch_normalize=1
696 | size=3
697 | stride=1
698 | pad=1
699 | filters=512
700 | activation=leaky
701 |
702 | [convolutional]
703 | batch_normalize=1
704 | filters=256
705 | size=1
706 | stride=1
707 | pad=1
708 | activation=leaky
709 |
710 | [convolutional]
711 | batch_normalize=1
712 | size=3
713 | stride=1
714 | pad=1
715 | filters=512
716 | activation=leaky
717 |
718 | [convolutional]
719 | size=1
720 | stride=1
721 | pad=1
722 | filters=85
723 | activation=linear
724 |
725 |
726 | [yolo]
727 | mask = 1
728 | anchors = 16,30, 62,45, 156,198
729 | classes=80
730 | num=3
731 | jitter=.3
732 | ignore_thresh = .7
733 | truth_thresh = 1
734 | random=1
735 |
736 |
737 |
738 | [route]
739 | layers = -4
740 |
741 | [convolutional]
742 | batch_normalize=1
743 | filters=128
744 | size=1
745 | stride=1
746 | pad=1
747 | activation=leaky
748 |
749 | [upsample]
750 | stride=2
751 |
752 | [route]
753 | layers = -1, 36
754 |
755 |
756 |
757 | [convolutional]
758 | batch_normalize=1
759 | filters=128
760 | size=1
761 | stride=1
762 | pad=1
763 | activation=leaky
764 |
765 | [convolutional]
766 | batch_normalize=1
767 | size=3
768 | stride=1
769 | pad=1
770 | filters=256
771 | activation=leaky
772 |
773 | [convolutional]
774 | batch_normalize=1
775 | filters=128
776 | size=1
777 | stride=1
778 | pad=1
779 | activation=leaky
780 |
781 | [convolutional]
782 | batch_normalize=1
783 | size=3
784 | stride=1
785 | pad=1
786 | filters=256
787 | activation=leaky
788 |
789 | [convolutional]
790 | batch_normalize=1
791 | filters=128
792 | size=1
793 | stride=1
794 | pad=1
795 | activation=leaky
796 |
797 | [convolutional]
798 | batch_normalize=1
799 | size=3
800 | stride=1
801 | pad=1
802 | filters=256
803 | activation=leaky
804 |
805 | [convolutional]
806 | size=1
807 | stride=1
808 | pad=1
809 | filters=85
810 | activation=linear
811 |
812 |
813 | [yolo]
814 | mask = 0
815 | anchors = 16,30, 62,45, 156,198
816 | classes=80
817 | num=3
818 | jitter=.3
819 | ignore_thresh = .7
820 | truth_thresh = 1
821 | random=1
822 |
--------------------------------------------------------------------------------
/cfg/yolov4-tiny.cfg:
--------------------------------------------------------------------------------
1 | [net]
2 | # Testing
3 | #batch=1
4 | #subdivisions=1
5 | # Training
6 | batch=64
7 | subdivisions=1
8 | width=416
9 | height=416
10 | channels=3
11 | momentum=0.9
12 | decay=0.0005
13 | angle=0
14 | saturation = 1.5
15 | exposure = 1.5
16 | hue=.1
17 |
18 | learning_rate=0.00261
19 | burn_in=1000
20 | max_batches = 500200
21 | policy=steps
22 | steps=400000,450000
23 | scales=.1,.1
24 |
25 | [convolutional]
26 | batch_normalize=1
27 | filters=32
28 | size=3
29 | stride=2
30 | pad=1
31 | activation=leaky
32 |
33 | [convolutional]
34 | batch_normalize=1
35 | filters=64
36 | size=3
37 | stride=2
38 | pad=1
39 | activation=leaky
40 |
41 | [convolutional]
42 | batch_normalize=1
43 | filters=64
44 | size=3
45 | stride=1
46 | pad=1
47 | activation=leaky
48 |
49 | [route]
50 | layers=-1
51 | groups=2
52 | group_id=1
53 |
54 | [convolutional]
55 | batch_normalize=1
56 | filters=32
57 | size=3
58 | stride=1
59 | pad=1
60 | activation=leaky
61 |
62 | [convolutional]
63 | batch_normalize=1
64 | filters=32
65 | size=3
66 | stride=1
67 | pad=1
68 | activation=leaky
69 |
70 | [route]
71 | layers = -1,-2
72 |
73 | [convolutional]
74 | batch_normalize=1
75 | filters=64
76 | size=1
77 | stride=1
78 | pad=1
79 | activation=leaky
80 |
81 | [route]
82 | layers = -6,-1
83 |
84 | [maxpool]
85 | size=2
86 | stride=2
87 |
88 | [convolutional]
89 | batch_normalize=1
90 | filters=128
91 | size=3
92 | stride=1
93 | pad=1
94 | activation=leaky
95 |
96 | [route]
97 | layers=-1
98 | groups=2
99 | group_id=1
100 |
101 | [convolutional]
102 | batch_normalize=1
103 | filters=64
104 | size=3
105 | stride=1
106 | pad=1
107 | activation=leaky
108 |
109 | [convolutional]
110 | batch_normalize=1
111 | filters=64
112 | size=3
113 | stride=1
114 | pad=1
115 | activation=leaky
116 |
117 | [route]
118 | layers = -1,-2
119 |
120 | [convolutional]
121 | batch_normalize=1
122 | filters=128
123 | size=1
124 | stride=1
125 | pad=1
126 | activation=leaky
127 |
128 | [route]
129 | layers = -6,-1
130 |
131 | [maxpool]
132 | size=2
133 | stride=2
134 |
135 | [convolutional]
136 | batch_normalize=1
137 | filters=256
138 | size=3
139 | stride=1
140 | pad=1
141 | activation=leaky
142 |
143 | [route]
144 | layers=-1
145 | groups=2
146 | group_id=1
147 |
148 | [convolutional]
149 | batch_normalize=1
150 | filters=128
151 | size=3
152 | stride=1
153 | pad=1
154 | activation=leaky
155 |
156 | [convolutional]
157 | batch_normalize=1
158 | filters=128
159 | size=3
160 | stride=1
161 | pad=1
162 | activation=leaky
163 |
164 | [route]
165 | layers = -1,-2
166 |
167 | [convolutional]
168 | batch_normalize=1
169 | filters=256
170 | size=1
171 | stride=1
172 | pad=1
173 | activation=leaky
174 |
175 | [route]
176 | layers = -6,-1
177 |
178 | [maxpool]
179 | size=2
180 | stride=2
181 |
182 | [convolutional]
183 | batch_normalize=1
184 | filters=512
185 | size=3
186 | stride=1
187 | pad=1
188 | activation=leaky
189 |
190 | ##################################
191 |
192 | [convolutional]
193 | batch_normalize=1
194 | filters=256
195 | size=1
196 | stride=1
197 | pad=1
198 | activation=leaky
199 |
200 | [convolutional]
201 | batch_normalize=1
202 | filters=512
203 | size=3
204 | stride=1
205 | pad=1
206 | activation=leaky
207 |
208 | [convolutional]
209 | size=1
210 | stride=1
211 | pad=1
212 | filters=255
213 | activation=linear
214 |
215 |
216 |
217 | [yolo]
218 | mask = 3,4,5
219 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319
220 | classes=80
221 | num=6
222 | jitter=.3
223 | scale_x_y = 1.05
224 | cls_normalizer=1.0
225 | iou_normalizer=0.07
226 | iou_loss=ciou
227 | ignore_thresh = .7
228 | truth_thresh = 1
229 | random=0
230 | resize=1.5
231 | nms_kind=greedynms
232 | beta_nms=0.6
233 |
234 | [route]
235 | layers = -4
236 |
237 | [convolutional]
238 | batch_normalize=1
239 | filters=128
240 | size=1
241 | stride=1
242 | pad=1
243 | activation=leaky
244 |
245 | [upsample]
246 | stride=2
247 |
248 | [route]
249 | layers = -1, 23
250 |
251 | [convolutional]
252 | batch_normalize=1
253 | filters=256
254 | size=3
255 | stride=1
256 | pad=1
257 | activation=leaky
258 |
259 | [convolutional]
260 | size=1
261 | stride=1
262 | pad=1
263 | filters=255
264 | activation=linear
265 |
266 | [yolo]
267 | mask = 1,2,3
268 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319
269 | classes=80
270 | num=6
271 | jitter=.3
272 | scale_x_y = 1.05
273 | cls_normalizer=1.0
274 | iou_normalizer=0.07
275 | iou_loss=ciou
276 | ignore_thresh = .7
277 | truth_thresh = 1
278 | random=0
279 | resize=1.5
280 | nms_kind=greedynms
281 | beta_nms=0.6
--------------------------------------------------------------------------------
/data/coco.data:
--------------------------------------------------------------------------------
1 | classes=80
2 | train=../coco/trainvalno5k.txt
3 | valid=../coco/5k.txt
4 | names=data/coco.names
5 | backup=backup/
6 | eval=coco
7 |
--------------------------------------------------------------------------------
/data/coco.names:
--------------------------------------------------------------------------------
1 | person
2 | bicycle
3 | car
4 | motorcycle
5 | airplane
6 | bus
7 | train
8 | truck
9 | boat
10 | traffic light
11 | fire hydrant
12 | stop sign
13 | parking meter
14 | bench
15 | bird
16 | cat
17 | dog
18 | horse
19 | sheep
20 | cow
21 | elephant
22 | bear
23 | zebra
24 | giraffe
25 | backpack
26 | umbrella
27 | handbag
28 | tie
29 | suitcase
30 | frisbee
31 | skis
32 | snowboard
33 | sports ball
34 | kite
35 | baseball bat
36 | baseball glove
37 | skateboard
38 | surfboard
39 | tennis racket
40 | bottle
41 | wine glass
42 | cup
43 | fork
44 | knife
45 | spoon
46 | bowl
47 | banana
48 | apple
49 | sandwich
50 | orange
51 | broccoli
52 | carrot
53 | hot dog
54 | pizza
55 | donut
56 | cake
57 | chair
58 | couch
59 | potted plant
60 | bed
61 | dining table
62 | toilet
63 | tv
64 | laptop
65 | mouse
66 | remote
67 | keyboard
68 | cell phone
69 | microwave
70 | oven
71 | toaster
72 | sink
73 | refrigerator
74 | book
75 | clock
76 | vase
77 | scissors
78 | teddy bear
79 | hair drier
80 | toothbrush
81 |
--------------------------------------------------------------------------------
/data/coco_1000img.data:
--------------------------------------------------------------------------------
1 | classes=80
2 | train=./data/coco_1000img.txt
3 | valid=./data/coco_1000img.txt
4 | names=data/coco.names
5 | backup=backup/
6 | eval=coco
7 |
--------------------------------------------------------------------------------
/data/coco_1000val.data:
--------------------------------------------------------------------------------
1 | classes=80
2 | train=./data/coco_1000img.txt
3 | valid=./data/coco_1000val.txt
4 | names=data/coco.names
5 | backup=backup/
6 | eval=coco
7 |
--------------------------------------------------------------------------------
/data/coco_16img.data:
--------------------------------------------------------------------------------
1 | classes=80
2 | train=./data/coco_16img.txt
3 | valid=./data/coco_16img.txt
4 | names=data/coco.names
5 | backup=backup/
6 | eval=coco
7 |
--------------------------------------------------------------------------------
/data/coco_16img.txt:
--------------------------------------------------------------------------------
1 | ../coco/images/train2014/COCO_train2014_000000000009.jpg
2 | ../coco/images/train2014/COCO_train2014_000000000025.jpg
3 | ../coco/images/train2014/COCO_train2014_000000000030.jpg
4 | ../coco/images/train2014/COCO_train2014_000000000034.jpg
5 | ../coco/images/train2014/COCO_train2014_000000000036.jpg
6 | ../coco/images/train2014/COCO_train2014_000000000049.jpg
7 | ../coco/images/train2014/COCO_train2014_000000000061.jpg
8 | ../coco/images/train2014/COCO_train2014_000000000064.jpg
9 | ../coco/images/train2014/COCO_train2014_000000000071.jpg
10 | ../coco/images/train2014/COCO_train2014_000000000072.jpg
11 | ../coco/images/train2014/COCO_train2014_000000000077.jpg
12 | ../coco/images/train2014/COCO_train2014_000000000078.jpg
13 | ../coco/images/train2014/COCO_train2014_000000000081.jpg
14 | ../coco/images/train2014/COCO_train2014_000000000086.jpg
15 | ../coco/images/train2014/COCO_train2014_000000000089.jpg
16 | ../coco/images/train2014/COCO_train2014_000000000092.jpg
17 |
--------------------------------------------------------------------------------
/data/coco_1cls.data:
--------------------------------------------------------------------------------
1 | classes=1
2 | train=./data/coco_1cls.txt
3 | valid=./data/coco_1cls.txt
4 | names=data/coco.names
5 | backup=backup/
6 | eval=coco
7 |
--------------------------------------------------------------------------------
/data/coco_1cls.txt:
--------------------------------------------------------------------------------
1 | ../coco/images/val2014/COCO_val2014_000000013992.jpg
2 | ../coco/images/val2014/COCO_val2014_000000047226.jpg
3 | ../coco/images/val2014/COCO_val2014_000000050324.jpg
4 | ../coco/images/val2014/COCO_val2014_000000121497.jpg
5 | ../coco/images/val2014/COCO_val2014_000000001464.jpg
6 |
--------------------------------------------------------------------------------
/data/coco_1img.data:
--------------------------------------------------------------------------------
1 | classes=80
2 | train=./data/coco_1img.txt
3 | valid=./data/coco_1img.txt
4 | names=data/coco.names
5 | backup=backup/
6 | eval=coco
7 |
--------------------------------------------------------------------------------
/data/coco_1img.txt:
--------------------------------------------------------------------------------
1 | ../coco/images/val2014/COCO_val2014_000000581886.jpg
2 |
--------------------------------------------------------------------------------
/data/coco_1k5k.data:
--------------------------------------------------------------------------------
1 | classes=80
2 | train=./data/coco_1000img.txt
3 | valid=./data/5k.txt
4 | names=data/coco.names
5 | backup=backup/
6 | eval=coco
7 |
--------------------------------------------------------------------------------
/data/coco_32img.data:
--------------------------------------------------------------------------------
1 | classes=80
2 | train=./data/coco_32img.txt
3 | valid=./data/coco_32img.txt
4 | names=data/coco.names
5 | backup=backup/
6 | eval=coco
7 |
--------------------------------------------------------------------------------
/data/coco_32img.txt:
--------------------------------------------------------------------------------
1 | ../coco/images/train2014/COCO_train2014_000000000009.jpg
2 | ../coco/images/train2014/COCO_train2014_000000000025.jpg
3 | ../coco/images/train2014/COCO_train2014_000000000030.jpg
4 | ../coco/images/train2014/COCO_train2014_000000000034.jpg
5 | ../coco/images/train2014/COCO_train2014_000000000036.jpg
6 | ../coco/images/train2014/COCO_train2014_000000000049.jpg
7 | ../coco/images/train2014/COCO_train2014_000000000061.jpg
8 | ../coco/images/train2014/COCO_train2014_000000000064.jpg
9 | ../coco/images/train2014/COCO_train2014_000000000071.jpg
10 | ../coco/images/train2014/COCO_train2014_000000000072.jpg
11 | ../coco/images/train2014/COCO_train2014_000000000077.jpg
12 | ../coco/images/train2014/COCO_train2014_000000000078.jpg
13 | ../coco/images/train2014/COCO_train2014_000000000081.jpg
14 | ../coco/images/train2014/COCO_train2014_000000000086.jpg
15 | ../coco/images/train2014/COCO_train2014_000000000089.jpg
16 | ../coco/images/train2014/COCO_train2014_000000000092.jpg
17 | ../coco/images/train2014/COCO_train2014_000000000094.jpg
18 | ../coco/images/train2014/COCO_train2014_000000000109.jpg
19 | ../coco/images/train2014/COCO_train2014_000000000110.jpg
20 | ../coco/images/train2014/COCO_train2014_000000000113.jpg
21 | ../coco/images/train2014/COCO_train2014_000000000127.jpg
22 | ../coco/images/train2014/COCO_train2014_000000000138.jpg
23 | ../coco/images/train2014/COCO_train2014_000000000142.jpg
24 | ../coco/images/train2014/COCO_train2014_000000000144.jpg
25 | ../coco/images/train2014/COCO_train2014_000000000149.jpg
26 | ../coco/images/train2014/COCO_train2014_000000000151.jpg
27 | ../coco/images/train2014/COCO_train2014_000000000154.jpg
28 | ../coco/images/train2014/COCO_train2014_000000000165.jpg
29 | ../coco/images/train2014/COCO_train2014_000000000194.jpg
30 | ../coco/images/train2014/COCO_train2014_000000000201.jpg
31 | ../coco/images/train2014/COCO_train2014_000000000247.jpg
32 | ../coco/images/train2014/COCO_train2014_000000000260.jpg
33 |
--------------------------------------------------------------------------------
/data/coco_500val.data:
--------------------------------------------------------------------------------
1 | classes=80
2 | train=./data/coco_500img.txt
3 | valid=./data/coco_500val.txt
4 | names=data/coco.names
5 | backup=backup/
6 | eval=coco
7 |
--------------------------------------------------------------------------------
/data/coco_64img.data:
--------------------------------------------------------------------------------
1 | classes=80
2 | train=./data/coco_64img.txt
3 | valid=./data/coco_64img.txt
4 | names=data/coco.names
5 | backup=backup/
6 | eval=coco
7 |
--------------------------------------------------------------------------------
/data/coco_64img.shapes:
--------------------------------------------------------------------------------
1 | 640 480
2 | 640 426
3 | 640 428
4 | 640 425
5 | 481 640
6 | 381 500
7 | 640 488
8 | 480 640
9 | 640 426
10 | 427 640
11 | 500 375
12 | 612 612
13 | 640 425
14 | 512 640
15 | 640 480
16 | 640 427
17 | 640 427
18 | 640 416
19 | 640 480
20 | 416 640
21 | 640 481
22 | 640 573
23 | 480 640
24 | 640 480
25 | 640 428
26 | 480 640
27 | 427 640
28 | 640 536
29 | 640 480
30 | 640 428
31 | 640 424
32 | 500 333
33 | 591 640
34 | 640 480
35 | 640 426
36 | 600 600
37 | 640 427
38 | 640 427
39 | 640 480
40 | 640 481
41 | 640 427
42 | 640 480
43 | 640 480
44 | 480 640
45 | 480 640
46 | 640 480
47 | 446 640
48 | 640 480
49 | 640 611
50 | 426 640
51 | 640 480
52 | 640 389
53 | 427 640
54 | 640 480
55 | 640 480
56 | 480 640
57 | 640 480
58 | 640 427
59 | 500 495
60 | 500 313
61 | 640 480
62 | 360 640
63 | 427 640
64 | 640 480
65 |
--------------------------------------------------------------------------------
/data/coco_64img.txt:
--------------------------------------------------------------------------------
1 | ../coco/images/train2014/COCO_train2014_000000000009.jpg
2 | ../coco/images/train2014/COCO_train2014_000000000025.jpg
3 | ../coco/images/train2014/COCO_train2014_000000000030.jpg
4 | ../coco/images/train2014/COCO_train2014_000000000034.jpg
5 | ../coco/images/train2014/COCO_train2014_000000000036.jpg
6 | ../coco/images/train2014/COCO_train2014_000000000049.jpg
7 | ../coco/images/train2014/COCO_train2014_000000000061.jpg
8 | ../coco/images/train2014/COCO_train2014_000000000064.jpg
9 | ../coco/images/train2014/COCO_train2014_000000000071.jpg
10 | ../coco/images/train2014/COCO_train2014_000000000072.jpg
11 | ../coco/images/train2014/COCO_train2014_000000000077.jpg
12 | ../coco/images/train2014/COCO_train2014_000000000078.jpg
13 | ../coco/images/train2014/COCO_train2014_000000000081.jpg
14 | ../coco/images/train2014/COCO_train2014_000000000086.jpg
15 | ../coco/images/train2014/COCO_train2014_000000000089.jpg
16 | ../coco/images/train2014/COCO_train2014_000000000092.jpg
17 | ../coco/images/train2014/COCO_train2014_000000000094.jpg
18 | ../coco/images/train2014/COCO_train2014_000000000109.jpg
19 | ../coco/images/train2014/COCO_train2014_000000000110.jpg
20 | ../coco/images/train2014/COCO_train2014_000000000113.jpg
21 | ../coco/images/train2014/COCO_train2014_000000000127.jpg
22 | ../coco/images/train2014/COCO_train2014_000000000138.jpg
23 | ../coco/images/train2014/COCO_train2014_000000000142.jpg
24 | ../coco/images/train2014/COCO_train2014_000000000144.jpg
25 | ../coco/images/train2014/COCO_train2014_000000000149.jpg
26 | ../coco/images/train2014/COCO_train2014_000000000151.jpg
27 | ../coco/images/train2014/COCO_train2014_000000000154.jpg
28 | ../coco/images/train2014/COCO_train2014_000000000165.jpg
29 | ../coco/images/train2014/COCO_train2014_000000000194.jpg
30 | ../coco/images/train2014/COCO_train2014_000000000201.jpg
31 | ../coco/images/train2014/COCO_train2014_000000000247.jpg
32 | ../coco/images/train2014/COCO_train2014_000000000260.jpg
33 | ../coco/images/train2014/COCO_train2014_000000000263.jpg
34 | ../coco/images/train2014/COCO_train2014_000000000307.jpg
35 | ../coco/images/train2014/COCO_train2014_000000000308.jpg
36 | ../coco/images/train2014/COCO_train2014_000000000309.jpg
37 | ../coco/images/train2014/COCO_train2014_000000000312.jpg
38 | ../coco/images/train2014/COCO_train2014_000000000315.jpg
39 | ../coco/images/train2014/COCO_train2014_000000000321.jpg
40 | ../coco/images/train2014/COCO_train2014_000000000322.jpg
41 | ../coco/images/train2014/COCO_train2014_000000000326.jpg
42 | ../coco/images/train2014/COCO_train2014_000000000332.jpg
43 | ../coco/images/train2014/COCO_train2014_000000000349.jpg
44 | ../coco/images/train2014/COCO_train2014_000000000368.jpg
45 | ../coco/images/train2014/COCO_train2014_000000000370.jpg
46 | ../coco/images/train2014/COCO_train2014_000000000382.jpg
47 | ../coco/images/train2014/COCO_train2014_000000000384.jpg
48 | ../coco/images/train2014/COCO_train2014_000000000389.jpg
49 | ../coco/images/train2014/COCO_train2014_000000000394.jpg
50 | ../coco/images/train2014/COCO_train2014_000000000404.jpg
51 | ../coco/images/train2014/COCO_train2014_000000000419.jpg
52 | ../coco/images/train2014/COCO_train2014_000000000431.jpg
53 | ../coco/images/train2014/COCO_train2014_000000000436.jpg
54 | ../coco/images/train2014/COCO_train2014_000000000438.jpg
55 | ../coco/images/train2014/COCO_train2014_000000000443.jpg
56 | ../coco/images/train2014/COCO_train2014_000000000446.jpg
57 | ../coco/images/train2014/COCO_train2014_000000000450.jpg
58 | ../coco/images/train2014/COCO_train2014_000000000471.jpg
59 | ../coco/images/train2014/COCO_train2014_000000000490.jpg
60 | ../coco/images/train2014/COCO_train2014_000000000491.jpg
61 | ../coco/images/train2014/COCO_train2014_000000000510.jpg
62 | ../coco/images/train2014/COCO_train2014_000000000514.jpg
63 | ../coco/images/train2014/COCO_train2014_000000000529.jpg
64 | ../coco/images/train2014/COCO_train2014_000000000531.jpg
65 |
--------------------------------------------------------------------------------
/data/coco_paper.names:
--------------------------------------------------------------------------------
1 | person
2 | bicycle
3 | car
4 | motorcycle
5 | airplane
6 | bus
7 | train
8 | truck
9 | boat
10 | traffic light
11 | fire hydrant
12 | street sign
13 | stop sign
14 | parking meter
15 | bench
16 | bird
17 | cat
18 | dog
19 | horse
20 | sheep
21 | cow
22 | elephant
23 | bear
24 | zebra
25 | giraffe
26 | hat
27 | backpack
28 | umbrella
29 | shoe
30 | eye glasses
31 | handbag
32 | tie
33 | suitcase
34 | frisbee
35 | skis
36 | snowboard
37 | sports ball
38 | kite
39 | baseball bat
40 | baseball glove
41 | skateboard
42 | surfboard
43 | tennis racket
44 | bottle
45 | plate
46 | wine glass
47 | cup
48 | fork
49 | knife
50 | spoon
51 | bowl
52 | banana
53 | apple
54 | sandwich
55 | orange
56 | broccoli
57 | carrot
58 | hot dog
59 | pizza
60 | donut
61 | cake
62 | chair
63 | couch
64 | potted plant
65 | bed
66 | mirror
67 | dining table
68 | window
69 | desk
70 | toilet
71 | door
72 | tv
73 | laptop
74 | mouse
75 | remote
76 | keyboard
77 | cell phone
78 | microwave
79 | oven
80 | toaster
81 | sink
82 | refrigerator
83 | blender
84 | book
85 | clock
86 | vase
87 | scissors
88 | teddy bear
89 | hair drier
90 | toothbrush
91 | hair brush
--------------------------------------------------------------------------------
/data/converter.py:
--------------------------------------------------------------------------------
1 | import scipy.io as sio
2 | from PIL import Image
3 | import os, glob
4 | import datetime
5 | import shutil
6 |
7 | running_from_path = os.getcwd()
8 | created_images_dir = 'images'
9 | created_labels_dir = 'labels'
10 | data_dir = 'data' # data_dir为脚本所在的文件夹
11 |
12 | def hms_string(sec_elapsed): # 格式化显示已消耗时间
13 | h = int(sec_elapsed / (60 * 60))
14 | m = int((sec_elapsed % (60 * 60)) / 60)
15 | s = sec_elapsed % 60.
16 | return "{}:{:>02}:{:>05.2f}".format(h, m, s)
17 |
18 | def generate_dir(set_name, root_path): # 往images和labels文件夹下生成相应的文件夹
19 | images_dir = os.path.join(root_path, 'images')
20 | annotation_dir = os.path.join(root_path, 'annotations')
21 |
22 | new_images_dir = os.path.join(created_images_dir, set_name) # 将图片从原来的文件夹复制到该文件夹下
23 | new_annotation_dir = os.path.join(created_labels_dir, set_name)
24 |
25 | if not os.path.exists(new_images_dir):
26 | os.makedirs(new_images_dir)
27 |
28 | if not os.path.exists(new_annotation_dir):
29 | os.makedirs(new_annotation_dir)
30 |
31 | for img in glob.glob(os.path.join(images_dir, "*.jpg")): # 将图片从原来的文件夹复制到新文件夹下
32 | shutil.copy(img, new_images_dir)
33 |
34 | os.chdir(annotation_dir) # 切换到annotation的路径下
35 | matlab_annotations = glob.glob("*.mat") # 仅仅包含文件名,不包含路径
36 | os.chdir(running_from_path) # 切换回原来的路径
37 |
38 | for matfile in matlab_annotations:
39 | filename = matfile.split(".")[0]
40 |
41 | pil_image = Image.open(os.path.join(images_dir, filename+".jpg"))
42 |
43 | content = sio.loadmat(os.path.join(annotation_dir, matfile), matlab_compatible=False)
44 |
45 | boxes = content["boxes"]
46 |
47 | width, height = pil_image.size
48 |
49 | with open(os.path.join(new_annotation_dir, filename+".txt"), "w") as hs:
50 | for box_idx, box in enumerate(boxes.T):
51 | a = box[0][0][0][0]
52 | b = box[0][0][0][1]
53 | c = box[0][0][0][2]
54 | d = box[0][0][0][3]
55 |
56 | aXY = (a[0][1], a[0][0])
57 | bXY = (b[0][1], b[0][0])
58 | cXY = (c[0][1], c[0][0])
59 | dXY = (d[0][1], d[0][0])
60 |
61 | maxX = max(aXY[0], bXY[0], cXY[0], dXY[0])
62 | minX = min(aXY[0], bXY[0], cXY[0], dXY[0])
63 | maxY = max(aXY[1], bXY[1], cXY[1], dXY[1])
64 | minY = min(aXY[1], bXY[1], cXY[1], dXY[1])
65 |
66 | # clip,防止超出边界
67 | maxX = min(maxX, width-1)
68 | minX = max(minX, 0)
69 | maxY = min(maxY, height-1)
70 | minY = max(minY, 0)
71 |
72 | # ( / )
73 | norm_width = (maxX - minX) / width
74 |
75 | # ( / )
76 | norm_height = (maxY - minY) / height
77 |
78 | center_x, center_y = (maxX + minX) / 2, (maxY + minY) / 2
79 |
80 | norm_center_x = center_x / width
81 | norm_center_y = center_y / height
82 |
83 | if box_idx != 0:
84 | hs.write("\n")
85 |
86 | hs.write("0 %f %f %f %f" % (norm_center_x, norm_center_y, norm_width, norm_height)) # 0表示类别
87 |
88 | def create_txt(dirlist, filename):
89 | with open(filename, "w") as txtfile: # 在data文件夹下生成txt文件
90 | imglist = []
91 |
92 | for dir in dirlist: # dir='images/test'
93 | imglist.extend(glob.glob(os.path.join(dir, "*.jpg"))) # img='images/test/abc.jpg'
94 |
95 | for idx, img in enumerate(imglist):
96 | if idx != 0:
97 | txtfile.write("\n")
98 | txtfile.write(os.path.join(data_dir, img)) # 加上前缀data
99 |
100 | if __name__ == '__main__':
101 | start_time = datetime.datetime.now()
102 |
103 | generate_dir("train", "hand_dataset/training_dataset/training_data") # 第一个参数表示生成的文件夹的名称
104 | generate_dir("test", "hand_dataset/test_dataset/test_data")
105 | generate_dir("validation", "hand_dataset/validation_dataset/validation_data")
106 |
107 | create_txt((os.path.join(created_images_dir, 'train'), # 将train和validation文件夹下的图片合并成train
108 | os.path.join(created_images_dir, 'validation')),
109 | 'train.txt')
110 | create_txt((os.path.join(created_images_dir, 'test'), ),
111 | 'valid.txt')
112 |
113 | end_time = datetime.datetime.now()
114 | seconds_elapsed = (end_time - start_time).total_seconds()
115 | print("It took {} to execute this".format(hms_string(seconds_elapsed)))
--------------------------------------------------------------------------------
/data/get_coco_dataset.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # CREDIT: https://github.com/pjreddie/darknet/tree/master/scripts/get_coco_dataset.sh
3 |
4 | # Clone COCO API
5 | git clone https://github.com/pdollar/coco && cd coco
6 |
7 | # Download Images
8 | mkdir images && cd images
9 | wget -c https://pjreddie.com/media/files/train2014.zip
10 | wget -c https://pjreddie.com/media/files/val2014.zip
11 |
12 | # Unzip
13 | unzip -q train2014.zip
14 | unzip -q val2014.zip
15 |
16 | # (optional) Delete zip files
17 | rm -rf *.zip
18 |
19 | cd ..
20 |
21 | # Download COCO Metadata
22 | wget -c https://pjreddie.com/media/files/instances_train-val2014.zip
23 | wget -c https://pjreddie.com/media/files/coco/5k.part
24 | wget -c https://pjreddie.com/media/files/coco/trainvalno5k.part
25 | wget -c https://pjreddie.com/media/files/coco/labels.tgz
26 | tar xzf labels.tgz
27 | unzip -q instances_train-val2014.zip
28 |
29 | # Set Up Image Lists
30 | paste <(awk "{print \"$PWD\"}" <5k.part) 5k.part | tr -d '\t' > 5k.txt
31 | paste <(awk "{print \"$PWD\"}" trainvalno5k.txt
32 |
33 | # get xview training data
34 | # wget -O train_images.tgz 'https://d307kc0mrhucc3.cloudfront.net/train_images.tgz?Expires=1530124049&Signature=JrQoxipmsETvb7eQHCfDFUO-QEHJGAayUv0i-ParmS-1hn7hl9D~bzGuHWG82imEbZSLUARTtm0wOJ7EmYMGmG5PtLKz9H5qi6DjoSUuFc13NQ-~6yUhE~NfPaTnehUdUMCa3On2wl1h1ZtRG~0Jq1P-AJbpe~oQxbyBrs1KccaMa7FK4F4oMM6sMnNgoXx8-3O77kYw~uOpTMFmTaQdHln6EztW0Lx17i57kK3ogbSUpXgaUTqjHCRA1dWIl7PY1ngQnLslkLhZqmKcaL-BvWf0ZGjHxCDQBpnUjIlvMu5NasegkwD9Jjc0ClgTxsttSkmbapVqaVC8peR0pO619Q__&Key-Pair-Id=APKAIKGDJB5C3XUL2DXQ'
35 | # tar -xvzf train_images.tgz
36 | # sudo rm -rf train_images/._*
37 | # lastly convert each .tif to a .bmp for faster loading in cv2
38 |
39 | # ./coco/images/train2014/COCO_train2014_000000167126.jpg # corrupted image
40 |
--------------------------------------------------------------------------------
/data/get_coco_dataset_gdrive.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # https://stackoverflow.com/questions/48133080/how-to-download-a-google-drive-url-via-curl-or-wget/48133859
3 |
4 | # Zip coco folder
5 | # zip -r coco.zip coco
6 | # tar -czvf coco.tar.gz coco
7 |
8 | # Set fileid and filename
9 | filename="coco.zip"
10 | fileid="1HaXkef9z6y5l4vUnCYgdmEAj61c6bfWO" # coco.zip
11 |
12 | # Download from Google Drive, accepting presented query
13 | curl -c ./cookie -s -L "https://drive.google.com/uc?export=download&id=${fileid}" > /dev/null
14 | curl -Lb ./cookie "https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=${fileid}" -o ${filename}
15 | rm ./cookie
16 |
17 | # Unzip
18 | unzip -q ${filename} # for coco.zip
19 | # tar -xzf ${filename} # for coco.tar.gz
20 |
--------------------------------------------------------------------------------
/data/hand.data:
--------------------------------------------------------------------------------
1 | classes=1
2 | train=D:/dl/YOLOv3-model-pruning/data/train_.txt
3 | valid=D:/dl/YOLOv3-model-pruning/data/valid_.txt
4 | names=D:/dl/YOLOv3-model-pruning/data/oxfordhand.names
5 | backup=backup/
6 | eval=coco
7 |
--------------------------------------------------------------------------------
/data/img/1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanluren/yolov3-channel-and-layer-pruning/9220f301ed2fea90b0ce3e179f825dba46e7aace/data/img/1.jpg
--------------------------------------------------------------------------------
/data/img/2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanluren/yolov3-channel-and-layer-pruning/9220f301ed2fea90b0ce3e179f825dba46e7aace/data/img/2.jpg
--------------------------------------------------------------------------------
/data/img/baseline_and_sparse.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanluren/yolov3-channel-and-layer-pruning/9220f301ed2fea90b0ce3e179f825dba46e7aace/data/img/baseline_and_sparse.jpg
--------------------------------------------------------------------------------
/data/img/bn.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanluren/yolov3-channel-and-layer-pruning/9220f301ed2fea90b0ce3e179f825dba46e7aace/data/img/bn.jpg
--------------------------------------------------------------------------------
/data/img/finetune_and_bn.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanluren/yolov3-channel-and-layer-pruning/9220f301ed2fea90b0ce3e179f825dba46e7aace/data/img/finetune_and_bn.jpg
--------------------------------------------------------------------------------
/data/img/prune9316.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanluren/yolov3-channel-and-layer-pruning/9220f301ed2fea90b0ce3e179f825dba46e7aace/data/img/prune9316.png
--------------------------------------------------------------------------------
/data/oxfordhand.data:
--------------------------------------------------------------------------------
1 | classes= 1
2 | train=data/train.txt
3 | valid=data/valid.txt
4 | names=data/oxfordhand.names
5 |
--------------------------------------------------------------------------------
/data/oxfordhand.names:
--------------------------------------------------------------------------------
1 | hand
2 |
3 |
--------------------------------------------------------------------------------
/data/samples/bus.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanluren/yolov3-channel-and-layer-pruning/9220f301ed2fea90b0ce3e179f825dba46e7aace/data/samples/bus.jpg
--------------------------------------------------------------------------------
/data/samples/zidane.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tanluren/yolov3-channel-and-layer-pruning/9220f301ed2fea90b0ce3e179f825dba46e7aace/data/samples/zidane.jpg
--------------------------------------------------------------------------------
/data/valid_.shapes:
--------------------------------------------------------------------------------
1 | 500 375
2 | 500 375
3 | 500 375
4 | 500 375
5 | 500 375
6 | 500 375
7 | 375 500
8 | 500 333
9 | 333 500
10 | 500 375
11 | 500 434
12 | 500 375
13 | 333 500
14 | 500 375
15 | 500 331
16 | 500 375
17 | 500 375
18 | 500 374
19 | 500 375
20 | 500 375
21 | 375 500
22 | 500 333
23 | 500 333
24 | 500 375
25 | 500 332
26 | 500 486
27 | 500 375
28 | 500 375
29 | 375 500
30 | 375 500
31 | 500 375
32 | 500 375
33 | 500 375
34 | 355 500
35 | 375 500
36 | 500 333
37 | 500 375
38 | 500 377
39 | 375 500
40 | 500 375
41 | 500 375
42 | 500 375
43 | 500 375
44 | 333 500
45 | 500 375
46 | 500 333
47 | 500 346
48 | 500 375
49 | 476 500
50 | 500 333
51 | 500 420
52 | 500 333
53 | 500 333
54 | 500 333
55 | 333 500
56 | 333 500
57 | 375 500
58 | 500 379
59 | 500 375
60 | 500 375
61 | 500 357
62 | 375 500
63 | 500 393
64 | 333 500
65 | 500 375
66 | 500 375
67 | 500 333
68 | 333 500
69 | 327 500
70 | 500 375
71 | 500 375
72 | 500 345
73 | 333 500
74 | 375 500
75 | 500 380
76 | 500 375
77 | 487 377
78 | 500 375
79 | 500 333
80 | 500 333
81 | 333 500
82 | 500 375
83 | 375 500
84 | 500 375
85 | 500 375
86 | 500 375
87 | 375 500
88 | 500 375
89 | 500 332
90 | 333 500
91 | 480 360
92 | 500 334
93 | 500 375
94 | 500 375
95 | 333 500
96 | 500 333
97 | 375 500
98 | 500 375
99 | 500 375
100 | 500 375
101 | 500 375
102 | 500 375
103 | 332 500
104 | 500 375
105 | 500 375
106 | 375 500
107 | 500 333
108 | 500 331
109 | 500 375
110 | 333 500
111 | 333 500
112 | 486 500
113 | 500 375
114 | 375 500
115 | 356 500
116 | 500 375
117 | 375 500
118 | 500 375
119 | 500 375
120 | 268 400
121 | 389 500
122 | 333 500
123 | 500 375
124 | 500 375
125 | 500 367
126 | 500 375
127 | 500 375
128 | 500 334
129 | 495 500
130 | 319 480
131 | 500 375
132 | 500 375
133 | 333 500
134 | 500 375
135 | 500 375
136 | 500 375
137 | 326 500
138 | 500 375
139 | 500 375
140 | 500 375
141 | 500 400
142 | 332 500
143 | 500 375
144 | 500 375
145 | 360 331
146 | 333 500
147 | 500 332
148 | 500 374
149 | 500 375
150 | 375 500
151 | 500 375
152 | 500 375
153 | 500 375
154 | 500 367
155 | 500 375
156 | 500 375
157 | 500 375
158 | 375 500
159 | 500 375
160 | 500 375
161 | 500 390
162 | 500 358
163 | 500 397
164 | 500 341
165 | 375 500
166 | 500 333
167 | 500 375
168 | 332 500
169 | 500 375
170 | 500 375
171 | 500 375
172 | 375 500
173 | 240 320
174 | 450 480
175 | 417 500
176 | 500 400
177 | 500 375
178 | 500 411
179 | 338 500
180 | 500 375
181 | 500 375
182 | 379 500
183 | 500 375
184 | 333 500
185 | 500 332
186 | 500 375
187 | 500 375
188 | 500 375
189 | 500 375
190 | 332 500
191 | 469 500
192 | 500 375
193 | 333 500
194 | 500 375
195 | 500 375
196 | 500 376
197 | 500 375
198 | 500 334
199 | 500 375
200 | 500 375
201 | 500 341
202 | 500 333
203 | 500 375
204 | 500 375
205 | 500 334
206 | 500 375
207 | 500 375
208 | 500 357
209 | 375 500
210 | 500 375
211 | 500 375
212 | 375 500
213 | 500 375
214 | 500 497
215 | 375 500
216 | 375 500
217 | 500 334
218 | 500 375
219 | 500 375
220 | 500 375
221 | 500 375
222 | 500 375
223 | 333 500
224 | 500 375
225 | 375 500
226 | 500 375
227 | 500 375
228 | 375 500
229 | 500 375
230 | 334 500
231 | 500 375
232 | 364 500
233 | 375 500
234 | 494 500
235 | 484 500
236 | 500 333
237 | 500 375
238 | 500 443
239 | 375 500
240 | 500 375
241 | 500 334
242 | 500 375
243 | 375 500
244 | 500 375
245 | 500 333
246 | 500 375
247 | 313 500
248 | 500 375
249 | 400 300
250 | 375 500
251 | 375 500
252 | 500 375
253 | 333 500
254 | 500 337
255 | 375 500
256 | 500 290
257 | 500 375
258 | 500 312
259 | 500 333
260 | 500 375
261 | 375 500
262 | 500 333
263 | 500 333
264 | 500 333
265 | 500 375
266 | 500 375
267 | 500 375
268 | 500 333
269 | 500 375
270 | 500 375
271 | 500 375
272 | 500 375
273 | 500 375
274 | 375 500
275 | 375 500
276 | 500 375
277 | 500 374
278 | 333 500
279 | 375 500
280 | 500 375
281 | 500 375
282 | 500 375
283 | 500 375
284 | 333 500
285 | 500 375
286 | 500 375
287 | 500 375
288 | 500 333
289 | 294 500
290 | 500 375
291 | 500 375
292 | 500 375
293 | 500 334
294 | 375 500
295 | 333 500
296 | 500 375
297 | 333 500
298 | 500 375
299 | 500 221
300 | 500 374
301 | 500 375
302 | 333 500
303 | 500 333
304 | 500 375
305 | 270 360
306 | 500 371
307 | 500 333
308 | 500 335
309 | 358 500
310 | 220 500
311 | 500 375
312 | 500 375
313 | 375 500
314 | 500 375
315 | 500 375
316 | 375 500
317 | 366 500
318 | 500 375
319 | 500 379
320 | 500 375
321 | 500 489
322 | 500 333
323 | 500 375
324 | 500 375
325 | 500 333
326 | 500 375
327 | 500 375
328 | 334 500
329 | 500 395
330 | 333 500
331 | 500 369
332 | 500 375
333 | 375 500
334 | 500 375
335 | 500 375
336 | 375 500
337 | 500 333
338 | 500 332
339 | 500 375
340 | 500 375
341 | 375 500
342 | 375 500
343 | 500 379
344 | 500 395
345 | 500 333
346 | 500 375
347 | 500 375
348 | 378 500
349 | 500 333
350 | 500 335
351 | 500 333
352 | 375 500
353 | 375 500
354 | 281 500
355 | 500 336
356 | 500 333
357 | 500 375
358 | 500 245
359 | 500 375
360 | 500 375
361 | 500 333
362 | 500 375
363 | 500 334
364 | 500 375
365 | 500 419
366 | 500 375
367 | 500 333
368 | 500 375
369 | 500 375
370 | 375 500
371 | 500 375
372 | 500 375
373 | 500 375
374 | 375 500
375 | 500 332
376 | 500 333
377 | 500 277
378 | 500 333
379 | 500 333
380 | 375 500
381 | 500 334
382 | 500 375
383 | 500 375
384 | 500 333
385 | 335 500
386 | 500 375
387 | 500 375
388 | 332 500
389 | 500 375
390 | 500 375
391 | 500 375
392 | 500 375
393 | 500 375
394 | 500 313
395 | 500 375
396 | 500 375
397 | 333 500
398 | 500 375
399 | 500 375
400 | 335 500
401 | 500 375
402 | 500 375
403 | 500 375
404 | 375 500
405 | 500 335
406 | 375 500
407 | 500 375
408 | 375 500
409 | 500 500
410 | 500 375
411 | 500 375
412 | 500 333
413 | 500 375
414 | 500 375
415 | 500 375
416 | 500 375
417 | 333 500
418 | 500 375
419 | 500 375
420 | 500 375
421 | 500 332
422 | 500 375
423 | 334 500
424 | 332 500
425 | 375 500
426 | 500 333
427 | 500 405
428 | 333 500
429 | 500 334
430 | 500 333
431 | 500 375
432 | 500 375
433 | 500 332
434 | 333 500
435 | 368 500
436 | 375 500
437 | 500 375
438 | 500 375
439 | 500 375
440 | 500 375
441 | 500 375
442 | 288 432
443 | 375 500
444 | 500 375
445 | 500 333
446 | 500 375
447 | 500 333
448 | 375 500
449 | 500 375
450 | 500 281
451 | 333 500
452 | 500 333
453 | 500 375
454 | 500 333
455 | 500 375
456 | 500 334
457 | 500 375
458 | 375 500
459 | 375 500
460 | 375 500
461 | 500 333
462 | 500 333
463 | 500 375
464 | 500 375
465 | 500 375
466 | 500 375
467 | 500 375
468 | 500 356
469 | 474 500
470 | 500 375
471 | 500 375
472 | 500 326
473 | 360 480
474 | 500 375
475 | 500 375
476 | 500 488
477 | 500 375
478 | 442 500
479 | 500 333
480 | 450 349
481 | 375 500
482 | 500 375
483 | 375 500
484 | 306 500
485 | 500 338
486 | 500 333
487 | 500 375
488 | 375 500
489 | 500 375
490 | 500 333
491 | 375 500
492 | 500 375
493 | 375 500
494 | 378 500
495 | 500 375
496 | 500 375
497 | 500 375
498 | 500 405
499 | 500 333
500 | 500 375
501 | 500 500
502 | 500 375
503 | 450 300
504 | 500 375
505 | 500 375
506 | 500 375
507 | 333 500
508 | 500 375
509 | 500 375
510 | 500 375
511 | 500 375
512 | 500 377
513 | 500 375
514 | 500 333
515 | 500 375
516 | 375 500
517 | 375 500
518 | 500 375
519 | 500 375
520 | 500 375
521 | 375 500
522 | 500 375
523 | 500 332
524 | 500 375
525 | 500 375
526 | 500 375
527 | 334 500
528 | 500 375
529 | 500 375
530 | 332 500
531 | 500 333
532 | 500 375
533 | 375 500
534 | 375 500
535 | 333 500
536 | 500 332
537 | 500 375
538 | 500 375
539 | 500 375
540 | 500 364
541 | 333 500
542 | 500 375
543 | 500 333
544 | 500 375
545 | 500 375
546 | 500 333
547 | 375 500
548 | 500 375
549 | 500 333
550 | 500 336
551 | 500 375
552 | 500 359
553 | 500 333
554 | 500 375
555 | 500 375
556 | 500 375
557 | 500 332
558 | 500 375
559 | 500 333
560 | 500 375
561 | 282 500
562 | 500 375
563 | 500 375
564 | 500 375
565 | 500 375
566 | 500 375
567 | 375 500
568 | 500 375
569 | 375 500
570 | 500 375
571 | 500 375
572 | 500 375
573 | 500 390
574 | 334 500
575 | 332 500
576 | 500 375
577 | 500 375
578 | 425 319
579 | 500 333
580 | 500 334
581 | 500 375
582 | 375 500
583 | 500 333
584 | 500 375
585 | 375 500
586 | 500 332
587 | 500 375
588 | 448 336
589 | 500 375
590 | 500 375
591 | 500 375
592 | 500 375
593 | 500 375
594 | 500 375
595 | 500 335
596 | 500 333
597 | 375 500
598 | 375 500
599 | 500 333
600 | 500 375
601 | 333 500
602 | 500 375
603 | 500 375
604 | 500 375
605 | 375 500
606 | 500 375
607 | 500 375
608 | 500 375
609 | 333 500
610 | 375 500
611 | 500 439
612 | 375 500
613 | 500 375
614 | 500 375
615 | 334 500
616 | 374 500
617 | 500 375
618 | 321 500
619 | 500 400
620 | 500 375
621 | 500 375
622 | 500 375
623 | 500 375
624 | 500 410
625 | 500 333
626 | 500 375
627 | 500 375
628 | 500 334
629 | 500 375
630 | 500 375
631 | 500 326
632 | 500 375
633 | 500 357
634 | 500 374
635 | 500 375
636 | 500 374
637 | 500 333
638 | 500 375
639 | 500 375
640 | 500 334
641 | 375 500
642 | 500 375
643 | 500 334
644 | 500 375
645 | 500 333
646 | 500 375
647 | 500 375
648 | 500 375
649 | 480 361
650 | 375 500
651 | 333 500
652 | 500 333
653 | 333 500
654 | 500 333
655 | 500 372
656 | 375 500
657 | 500 375
658 | 375 500
659 | 500 319
660 | 500 333
661 | 500 375
662 | 375 500
663 | 500 377
664 | 400 498
665 | 500 393
666 | 500 334
667 | 333 500
668 | 500 333
669 | 500 375
670 | 500 375
671 | 500 375
672 | 500 375
673 | 375 500
674 | 375 500
675 | 333 500
676 | 500 333
677 | 500 333
678 | 333 500
679 | 306 500
680 | 500 375
681 | 500 334
682 | 320 448
683 | 333 500
684 | 375 500
685 | 500 334
686 | 270 360
687 | 361 500
688 | 500 393
689 | 500 375
690 | 396 500
691 | 500 333
692 | 500 333
693 | 320 480
694 | 500 375
695 | 500 375
696 | 500 375
697 | 500 375
698 | 399 500
699 | 500 375
700 | 500 375
701 | 500 333
702 | 500 332
703 | 500 375
704 | 320 480
705 | 500 375
706 | 375 500
707 | 500 334
708 | 451 500
709 | 500 375
710 | 500 375
711 | 500 368
712 | 500 375
713 | 375 500
714 | 500 375
715 | 500 375
716 | 319 500
717 | 500 375
718 | 450 500
719 | 375 500
720 | 500 375
721 | 375 500
722 | 500 374
723 | 500 375
724 | 500 375
725 | 500 375
726 | 345 500
727 | 500 375
728 | 500 375
729 | 500 325
730 | 500 375
731 | 500 379
732 | 500 333
733 | 500 375
734 | 375 500
735 | 500 375
736 | 500 375
737 | 354 500
738 | 500 375
739 | 375 500
740 | 500 375
741 | 333 500
742 | 375 500
743 | 500 333
744 | 225 417
745 | 333 500
746 | 500 333
747 | 375 500
748 | 500 332
749 | 500 334
750 | 400 500
751 | 500 333
752 | 333 500
753 | 500 375
754 | 375 500
755 | 333 500
756 | 500 333
757 | 340 500
758 | 500 375
759 | 375 500
760 | 333 500
761 | 500 375
762 | 500 375
763 | 500 375
764 | 500 375
765 | 500 402
766 | 375 500
767 | 500 333
768 | 500 333
769 | 374 500
770 | 500 333
771 | 375 500
772 | 500 333
773 | 500 375
774 | 500 375
775 | 500 294
776 | 500 375
777 | 375 500
778 | 500 375
779 | 500 332
780 | 332 500
781 | 358 500
782 | 500 333
783 | 380 472
784 | 500 375
785 | 375 500
786 | 500 375
787 | 500 375
788 | 500 375
789 | 453 500
790 | 375 500
791 | 500 333
792 | 500 500
793 | 375 500
794 | 500 375
795 | 375 500
796 | 500 375
797 | 375 500
798 | 402 500
799 | 459 288
800 | 273 500
801 | 415 500
802 | 346 336
803 | 320 500
804 | 500 375
805 | 500 333
806 | 500 333
807 | 500 410
808 | 500 375
809 | 500 458
810 | 500 333
811 | 500 375
812 | 443 437
813 | 341 251
814 | 375 500
815 | 500 375
816 | 252 400
817 | 288 432
818 | 500 334
819 | 375 500
820 | 375 500
821 | 377 500
822 |
--------------------------------------------------------------------------------
/detect.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | from sys import platform
3 |
4 | from models import * # set ONNX_EXPORT in models.py
5 | from utils.datasets import *
6 | from utils.utils import *
7 |
8 |
9 | def detect(save_txt=False, save_img=False):
10 | img_size = (320, 192) if ONNX_EXPORT else opt.img_size # (320, 192) or (416, 256) or (608, 352) for (height, width)
11 | out, source, weights, half, view_img = opt.output, opt.source, opt.weights, opt.half, opt.view_img
12 | webcam = source == '0' or source.startswith('rtsp') or source.startswith('http') or source.endswith('.txt')
13 |
14 | # Initialize
15 | device = torch_utils.select_device(device='cpu' if ONNX_EXPORT else opt.device)
16 | if os.path.exists(out):
17 | shutil.rmtree(out) # delete output folder
18 | os.makedirs(out) # make new output folder
19 |
20 | # Initialize model
21 | model = Darknet(opt.cfg, img_size)
22 |
23 | # Load weights
24 | attempt_download(weights)
25 | if weights.endswith('.pt'): # pytorch format
26 | model.load_state_dict(torch.load(weights, map_location=device)['model'])
27 | else: # darknet format
28 | _ = load_darknet_weights(model, weights)
29 |
30 | # Fuse Conv2d + BatchNorm2d layers
31 | # model.fuse()
32 |
33 | # Eval mode
34 | model.to(device).eval()
35 |
36 | # Export mode
37 | if ONNX_EXPORT:
38 | img = torch.zeros((1, 3) + img_size) # (1, 3, 320, 192)
39 | torch.onnx.export(model, img, 'weights/export.onnx', verbose=True)
40 | return
41 |
42 | # Half precision
43 | half = half and device.type != 'cpu' # half precision only supported on CUDA
44 | if half:
45 | model.half()
46 |
47 | # Set Dataloader
48 | vid_path, vid_writer = None, None
49 | if webcam:
50 | view_img = True
51 | torch.backends.cudnn.benchmark = True # set True to speed up constant image size inference
52 | dataset = LoadStreams(source, img_size=img_size, half=half)
53 | else:
54 | save_img = True
55 | dataset = LoadImages(source, img_size=img_size, half=half)
56 |
57 | # Get classes and colors
58 | classes = load_classes(parse_data_cfg(opt.data)['names'])
59 | colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(classes))]
60 |
61 | # Run inference
62 | t0 = time.time()
63 | for path, img, im0s, vid_cap in dataset:
64 | t = time.time()
65 |
66 | # Get detections
67 | img = torch.from_numpy(img).to(device)
68 | if img.ndimension() == 3:
69 | img = img.unsqueeze(0)
70 | pred, _ = model(img)
71 |
72 | if opt.half:
73 | pred = pred.float()
74 |
75 | for i, det in enumerate(non_max_suppression(pred, opt.conf_thres, opt.nms_thres)): # detections per image
76 | if webcam: # batch_size >= 1
77 | p, s, im0 = path[i], '%g: ' % i, im0s[i]
78 | else:
79 | p, s, im0 = path, '', im0s
80 |
81 | save_path = str(Path(out) / Path(p).name)
82 | s += '%gx%g ' % img.shape[2:] # print string
83 | if det is not None and len(det):
84 | # Rescale boxes from img_size to im0 size
85 | det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
86 |
87 | # Print results
88 | for c in det[:, -1].unique():
89 | n = (det[:, -1] == c).sum() # detections per class
90 | s += '%g %ss, ' % (n, classes[int(c)]) # add to string
91 |
92 | # Write results
93 | for *xyxy, conf, _, cls in det:
94 | if save_txt: # Write to file
95 | with open(save_path + '.txt', 'a') as file:
96 | file.write(('%g ' * 6 + '\n') % (*xyxy, cls, conf))
97 |
98 | if save_img or view_img: # Add bbox to image
99 | label = '%s %.2f' % (classes[int(cls)], conf)
100 | plot_one_box(xyxy, im0, label=label, color=colors[int(cls)])
101 |
102 | print('%sDone. (%.3fs)' % (s, time.time() - t))
103 |
104 | # Stream results
105 | if view_img:
106 | cv2.imshow(p, im0)
107 |
108 | # Save results (image with detections)
109 | if save_img:
110 | if dataset.mode == 'images':
111 | cv2.imwrite(save_path, im0)
112 | else:
113 | if vid_path != save_path: # new video
114 | vid_path = save_path
115 | if isinstance(vid_writer, cv2.VideoWriter):
116 | vid_writer.release() # release previous video writer
117 |
118 | fps = vid_cap.get(cv2.CAP_PROP_FPS)
119 | w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
120 | h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
121 | vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h))
122 | vid_writer.write(im0)
123 |
124 | if save_txt or save_img:
125 | print('Results saved to %s' % os.getcwd() + os.sep + out)
126 | if platform == 'darwin': # MacOS
127 | os.system('open ' + out + ' ' + save_path)
128 |
129 | print('Done. (%.3fs)' % (time.time() - t0))
130 |
131 |
132 | if __name__ == '__main__':
133 | parser = argparse.ArgumentParser()
134 | parser.add_argument('--cfg', type=str, default='cfg/yolov3-spp.cfg', help='cfg file path')
135 | parser.add_argument('--data', type=str, default='data/coco.data', help='coco.data file path')
136 | parser.add_argument('--weights', type=str, default='weights/yolov3-spp.weights', help='path to weights file')
137 | parser.add_argument('--source', type=str, default='data/samples', help='source') # input file/folder, 0 for webcam
138 | parser.add_argument('--output', type=str, default='output', help='output folder') # output folder
139 | parser.add_argument('--img-size', type=int, default=416, help='inference size (pixels)')
140 | parser.add_argument('--conf-thres', type=float, default=0.3, help='object confidence threshold')
141 | parser.add_argument('--nms-thres', type=float, default=0.5, help='iou threshold for non-maximum suppression')
142 | parser.add_argument('--fourcc', type=str, default='mp4v', help='output video codec (verify ffmpeg support)')
143 | parser.add_argument('--half', action='store_true', help='half precision FP16 inference')
144 | parser.add_argument('--device', default='', help='device id (i.e. 0 or 0,1) or cpu')
145 | parser.add_argument('--view-img', action='store_true', help='display results')
146 | opt = parser.parse_args()
147 | print(opt)
148 |
149 | with torch.no_grad():
150 | detect()
151 |
--------------------------------------------------------------------------------
/layer_prune.py:
--------------------------------------------------------------------------------
1 | from models import *
2 | from utils.utils import *
3 | import torch
4 | import numpy as np
5 | from copy import deepcopy
6 | from test import test
7 | from terminaltables import AsciiTable
8 | import time
9 | from utils.utils import *
10 | from utils.prune_utils import *
11 | import argparse
12 |
13 |
14 |
15 | if __name__ == '__main__':
16 | parser = argparse.ArgumentParser()
17 | parser.add_argument('--cfg', type=str, default='cfg/yolov3-hand.cfg', help='cfg file path')
18 | parser.add_argument('--data', type=str, default='data/oxfordhand.data', help='*.data file path')
19 | parser.add_argument('--weights', type=str, default='weights/last.pt', help='sparse model weights')
20 | parser.add_argument('--shortcuts', type=int, default=8, help='how many shortcut layers will be pruned,\
21 | pruning one shortcut will also prune two CBL,yolov3 has 23 shortcuts')
22 | parser.add_argument('--img_size', type=int, default=416, help='inference size (pixels)')
23 | opt = parser.parse_args()
24 | print(opt)
25 |
26 | img_size = opt.img_size
27 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
28 | model = Darknet(opt.cfg, (img_size, img_size)).to(device)
29 |
30 | if opt.weights.endswith(".pt"):
31 | model.load_state_dict(torch.load(opt.weights, map_location=device)['model'])
32 | else:
33 | load_darknet_weights(model, opt.weights)
34 | print('\nloaded weights from ',opt.weights)
35 |
36 |
37 | eval_model = lambda model:test(model=model,cfg=opt.cfg, data=opt.data, batch_size=16, img_size=img_size)
38 | obtain_num_parameters = lambda model:sum([param.nelement() for param in model.parameters()])
39 |
40 | with torch.no_grad():
41 | print("\nlet's test the original model first:")
42 | origin_model_metric = eval_model(model)
43 | origin_nparameters = obtain_num_parameters(model)
44 |
45 |
46 | CBL_idx, Conv_idx, shortcut_idx = parse_module_defs4(model.module_defs)
47 | print('all shortcut_idx:', [i + 1 for i in shortcut_idx])
48 |
49 |
50 | bn_weights = gather_bn_weights(model.module_list, shortcut_idx)
51 |
52 | sorted_bn = torch.sort(bn_weights)[0]
53 |
54 |
55 | # highest_thre = torch.zeros(len(shortcut_idx))
56 | # for i, idx in enumerate(shortcut_idx):
57 | # highest_thre[i] = model.module_list[idx][1].weight.data.abs().max().clone()
58 | # _, sorted_index_thre = torch.sort(highest_thre)
59 |
60 | #这里更改了选层策略,由最大值排序改为均值排序,均值一般表现要稍好,但不是绝对,可以自己切换尝试;前面注释的四行为原策略。
61 | bn_mean = torch.zeros(len(shortcut_idx))
62 | for i, idx in enumerate(shortcut_idx):
63 | bn_mean[i] = model.module_list[idx][1].weight.data.abs().mean().clone()
64 | _, sorted_index_thre = torch.sort(bn_mean)
65 |
66 |
67 | prune_shortcuts = torch.tensor(shortcut_idx)[[sorted_index_thre[:opt.shortcuts]]]
68 | prune_shortcuts = [int(x) for x in prune_shortcuts]
69 |
70 | index_all = list(range(len(model.module_defs)))
71 | index_prune = []
72 | for idx in prune_shortcuts:
73 | index_prune.extend([idx - 1, idx, idx + 1])
74 | index_remain = [idx for idx in index_all if idx not in index_prune]
75 |
76 | print('These shortcut layers and corresponding CBL will be pruned :', index_prune)
77 |
78 |
79 |
80 |
81 |
82 | def prune_and_eval(model, prune_shortcuts=[]):
83 | model_copy = deepcopy(model)
84 | for idx in prune_shortcuts:
85 | for i in [idx, idx-1]:
86 | bn_module = model_copy.module_list[i][1]
87 |
88 | mask = torch.zeros(bn_module.weight.data.shape[0]).cuda()
89 | bn_module.weight.data.mul_(mask)
90 |
91 |
92 | with torch.no_grad():
93 | mAP = eval_model(model_copy)[0][2]
94 |
95 | print(f'simply mask the BN Gama of to_be_pruned CBL as zero, now the mAP is {mAP:.4f}')
96 |
97 |
98 | prune_and_eval(model, prune_shortcuts)
99 |
100 |
101 |
102 |
103 |
104 | #%%
105 | def obtain_filters_mask(model, CBL_idx, prune_shortcuts):
106 |
107 | filters_mask = []
108 | for idx in CBL_idx:
109 | bn_module = model.module_list[idx][1]
110 | mask = np.ones(bn_module.weight.data.shape[0], dtype='float32')
111 | filters_mask.append(mask.copy())
112 | CBLidx2mask = {idx: mask for idx, mask in zip(CBL_idx, filters_mask)}
113 | for idx in prune_shortcuts:
114 | for i in [idx, idx - 1]:
115 | bn_module = model.module_list[i][1]
116 | mask = np.zeros(bn_module.weight.data.shape[0], dtype='float32')
117 | CBLidx2mask[i] = mask.copy()
118 | return CBLidx2mask
119 |
120 |
121 | CBLidx2mask = obtain_filters_mask(model, CBL_idx, prune_shortcuts)
122 |
123 |
124 |
125 | pruned_model = prune_model_keep_size2(model, CBL_idx, CBL_idx, CBLidx2mask)
126 |
127 | with torch.no_grad():
128 | mAP = eval_model(pruned_model)[0][2]
129 | print("after transfering the offset of pruned CBL's activation, map is {}".format(mAP))
130 |
131 |
132 | compact_module_defs = deepcopy(model.module_defs)
133 |
134 |
135 | for j, module_def in enumerate(compact_module_defs):
136 | if module_def['type'] == 'route':
137 | from_layers = [int(s) for s in module_def['layers'].split(',')]
138 | if len(from_layers) == 1 and from_layers[0] > 0:
139 | count = 0
140 | for i in index_prune:
141 | if i <= from_layers[0]:
142 | count += 1
143 | from_layers[0] = from_layers[0] - count
144 | from_layers = str(from_layers[0])
145 | module_def['layers'] = from_layers
146 |
147 | elif len(from_layers) == 2:
148 | count = 0
149 | if from_layers[1] > 0:
150 | for i in index_prune:
151 | if i <= from_layers[1]:
152 | count += 1
153 | from_layers[1] = from_layers[1] - count
154 | else:
155 | for i in index_prune:
156 | if i > j + from_layers[1] and i < j:
157 | count += 1
158 | from_layers[1] = from_layers[1] + count
159 |
160 | from_layers = ', '.join([str(s) for s in from_layers])
161 | module_def['layers'] = from_layers
162 |
163 | compact_module_defs = [compact_module_defs[i] for i in index_remain]
164 | compact_model = Darknet([model.hyperparams.copy()] + compact_module_defs, (img_size, img_size)).to(device)
165 | for i, index in enumerate(index_remain):
166 | compact_model.module_list[i] = pruned_model.module_list[index]
167 |
168 | compact_nparameters = obtain_num_parameters(compact_model)
169 |
170 | # init_weights_from_loose_model(compact_model, pruned_model, CBL_idx, Conv_idx, CBLidx2mask)
171 |
172 |
173 | random_input = torch.rand((1, 3, img_size, img_size)).to(device)
174 |
175 | def obtain_avg_forward_time(input, model, repeat=200):
176 |
177 | model.eval()
178 | start = time.time()
179 | with torch.no_grad():
180 | for i in range(repeat):
181 | output = model(input)
182 | avg_infer_time = (time.time() - start) / repeat
183 |
184 | return avg_infer_time, output
185 |
186 | pruned_forward_time, pruned_output = obtain_avg_forward_time(random_input, pruned_model)
187 | compact_forward_time, compact_output = obtain_avg_forward_time(random_input, compact_model)
188 |
189 |
190 | # 在测试集上测试剪枝后的模型, 并统计模型的参数数量
191 | with torch.no_grad():
192 | compact_model_metric = eval_model(compact_model)
193 |
194 |
195 | # 比较剪枝前后参数数量的变化、指标性能的变化
196 | metric_table = [
197 | ["Metric", "Before", "After"],
198 | ["mAP", f'{origin_model_metric[0][2]:.6f}', f'{compact_model_metric[0][2]:.6f}'],
199 | ["Parameters", f"{origin_nparameters}", f"{compact_nparameters}"],
200 | ["Inference", f'{pruned_forward_time:.4f}', f'{compact_forward_time:.4f}']
201 | ]
202 | print(AsciiTable(metric_table).table)
203 |
204 |
205 | # 生成剪枝后的cfg文件并保存模型
206 | pruned_cfg_name = opt.cfg.replace('/', f'/prune_{opt.shortcuts}_shortcut_')
207 | pruned_cfg_file = write_cfg(pruned_cfg_name, [model.hyperparams.copy()] + compact_module_defs)
208 | print(f'Config file has been saved: {pruned_cfg_file}')
209 |
210 | compact_model_name = opt.weights.replace('/', f'/prune_{opt.shortcuts}_shortcut_')
211 | if compact_model_name.endswith('.pt'):
212 | compact_model_name = compact_model_name.replace('.pt', '.weights')
213 |
214 | save_weights(compact_model, path=compact_model_name)
215 | print(f'Compact model has been saved: {compact_model_name}')
216 |
217 |
--------------------------------------------------------------------------------
/prune.py:
--------------------------------------------------------------------------------
1 | from models import *
2 | from utils.utils import *
3 | import numpy as np
4 | from copy import deepcopy
5 | from test import test
6 | from terminaltables import AsciiTable
7 | import time
8 | from utils.prune_utils import *
9 | import argparse
10 |
11 |
12 |
13 | if __name__ == '__main__':
14 | parser = argparse.ArgumentParser()
15 | parser.add_argument('--cfg', type=str, default='cfg/yolov3.cfg', help='cfg file path')
16 | parser.add_argument('--data', type=str, default='data/coco.data', help='*.data file path')
17 | parser.add_argument('--weights', type=str, default='weights/last.pt', help='sparse model weights')
18 | parser.add_argument('--percent', type=float, default=0.8, help='channel prune percent')
19 | parser.add_argument('--img_size', type=int, default=416, help='inference size (pixels)')
20 | opt = parser.parse_args()
21 | print(opt)
22 |
23 |
24 | img_size = opt.img_size
25 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
26 | model = Darknet(opt.cfg, (img_size, img_size)).to(device)
27 | if opt.weights.endswith('.pt'):
28 | model.load_state_dict(torch.load(opt.weights)['model'])
29 | else:
30 | load_darknet_weights(model, opt.weights)
31 | print('\nloaded weights from ',opt.weights)
32 |
33 | eval_model = lambda model:test(opt.cfg, opt.data,
34 | weights=opt.weights,
35 | batch_size=16,
36 | img_size=img_size,
37 | iou_thres=0.5,
38 | conf_thres=0.001,
39 | nms_thres=0.5,
40 | save_json=False,
41 | model=model)
42 | obtain_num_parameters = lambda model:sum([param.nelement() for param in model.parameters()])
43 |
44 | print("\nlet's test the original model first:")
45 | with torch.no_grad():
46 | origin_model_metric = eval_model(model)
47 |
48 | origin_nparameters = obtain_num_parameters(model)
49 |
50 | CBL_idx, Conv_idx, prune_idx= parse_module_defs(model.module_defs)
51 |
52 | bn_weights = gather_bn_weights(model.module_list, prune_idx)
53 |
54 | sorted_bn = torch.sort(bn_weights)[0]
55 |
56 | # 避免剪掉所有channel的最高阈值(每个BN层的gamma的最大值的最小值即为阈值上限)
57 | highest_thre = []
58 | for idx in prune_idx:
59 | highest_thre.append(model.module_list[idx][1].weight.data.abs().max().item())
60 | highest_thre = min(highest_thre)
61 |
62 | # 找到highest_thre对应的下标对应的百分比
63 | percent_limit = (sorted_bn==highest_thre).nonzero().item()/len(bn_weights)
64 |
65 | print(f'Suggested Gamma threshold should be less than {highest_thre:.4f}.')
66 | print(f'The corresponding prune ratio is {percent_limit:.3f}, but you can set higher.')
67 |
68 | #%%
69 | def prune_and_eval(model, sorted_bn, percent=.0):
70 | model_copy = deepcopy(model)
71 | thre_index = int(len(sorted_bn) * percent)
72 | thre = sorted_bn[thre_index]
73 |
74 | print(f'Gamma value that less than {thre:.4f} are set to zero!')
75 |
76 | remain_num = 0
77 | for idx in prune_idx:
78 |
79 | bn_module = model_copy.module_list[idx][1]
80 |
81 | mask = obtain_bn_mask(bn_module, thre)
82 |
83 | remain_num += int(mask.sum())
84 | bn_module.weight.data.mul_(mask)
85 | print("let's test the current model!")
86 | with torch.no_grad():
87 | mAP = eval_model(model_copy)[0][2]
88 |
89 |
90 | print(f'Number of channels has been reduced from {len(sorted_bn)} to {remain_num}')
91 | print(f'Prune ratio: {1-remain_num/len(sorted_bn):.3f}')
92 | print(f"mAP of the 'pruned' model is {mAP:.4f}")
93 |
94 | return thre
95 |
96 | percent = opt.percent
97 | print('the required prune percent is', percent)
98 | threshold = prune_and_eval(model, sorted_bn, percent)
99 | #%%
100 | def obtain_filters_mask(model, thre, CBL_idx, prune_idx):
101 |
102 | pruned = 0
103 | total = 0
104 | num_filters = []
105 | filters_mask = []
106 | for idx in CBL_idx:
107 | bn_module = model.module_list[idx][1]
108 | if idx in prune_idx:
109 |
110 | mask = obtain_bn_mask(bn_module, thre).cpu().numpy()
111 | remain = int(mask.sum())
112 | pruned = pruned + mask.shape[0] - remain
113 |
114 | if remain == 0:
115 | # print("Channels would be all pruned!")
116 | # raise Exception
117 | max_value = bn_module.weight.data.abs().max()
118 | mask = obtain_bn_mask(bn_module, max_value).cpu().numpy()
119 | remain = int(mask.sum())
120 | pruned = pruned + mask.shape[0] - remain
121 |
122 | print(f'layer index: {idx:>3d} \t total channel: {mask.shape[0]:>4d} \t '
123 | f'remaining channel: {remain:>4d}')
124 | else:
125 | mask = np.ones(bn_module.weight.data.shape)
126 | remain = mask.shape[0]
127 |
128 | total += mask.shape[0]
129 | num_filters.append(remain)
130 | filters_mask.append(mask.copy())
131 |
132 | prune_ratio = pruned / total
133 | print(f'Prune channels: {pruned}\tPrune ratio: {prune_ratio:.3f}')
134 |
135 | return num_filters, filters_mask
136 |
137 | num_filters, filters_mask = obtain_filters_mask(model, threshold, CBL_idx, prune_idx)
138 |
139 | #%%
140 | CBLidx2mask = {idx: mask.astype('float32') for idx, mask in zip(CBL_idx, filters_mask)}
141 |
142 | pruned_model = prune_model_keep_size2(model, CBL_idx, CBL_idx, CBLidx2mask)
143 |
144 | print("\nnow prune the model but keep size,(actually add offset of BN beta to next layer), let's see how the mAP goes")
145 | with torch.no_grad():
146 | eval_model(pruned_model)
147 |
148 |
149 | #%%
150 | compact_module_defs = deepcopy(model.module_defs)
151 | for idx, num in zip(CBL_idx, num_filters):
152 | assert compact_module_defs[idx]['type'] == 'convolutional'
153 | compact_module_defs[idx]['filters'] = str(num)
154 |
155 | #%%
156 | compact_model = Darknet([model.hyperparams.copy()] + compact_module_defs, (img_size, img_size)).to(device)
157 | compact_nparameters = obtain_num_parameters(compact_model)
158 |
159 | init_weights_from_loose_model(compact_model, pruned_model, CBL_idx, Conv_idx, CBLidx2mask)
160 |
161 | #%%
162 | random_input = torch.rand((1, 3, img_size, img_size)).to(device)
163 |
164 | def obtain_avg_forward_time(input, model, repeat=200):
165 |
166 | model.eval()
167 | start = time.time()
168 | with torch.no_grad():
169 | for i in range(repeat):
170 | output = model(input)[0]
171 | avg_infer_time = (time.time() - start) / repeat
172 |
173 | return avg_infer_time, output
174 |
175 | print('\ntesting avg forward time...')
176 | pruned_forward_time, pruned_output = obtain_avg_forward_time(random_input, pruned_model)
177 | compact_forward_time, compact_output = obtain_avg_forward_time(random_input, compact_model)
178 |
179 | diff = (pruned_output-compact_output).abs().gt(0.001).sum().item()
180 | if diff > 0:
181 | print('Something wrong with the pruned model!')
182 |
183 | #%%
184 | # 在测试集上测试剪枝后的模型, 并统计模型的参数数量
185 | print('testing the mAP of final pruned model')
186 | with torch.no_grad():
187 | compact_model_metric = eval_model(compact_model)
188 |
189 |
190 | #%%
191 | # 比较剪枝前后参数数量的变化、指标性能的变化
192 | metric_table = [
193 | ["Metric", "Before", "After"],
194 | ["mAP", f'{origin_model_metric[0][2]:.6f}', f'{compact_model_metric[0][2]:.6f}'],
195 | ["Parameters", f"{origin_nparameters}", f"{compact_nparameters}"],
196 | ["Inference", f'{pruned_forward_time:.4f}', f'{compact_forward_time:.4f}']
197 | ]
198 | print(AsciiTable(metric_table).table)
199 |
200 | #%%
201 | # 生成剪枝后的cfg文件并保存模型
202 | pruned_cfg_name = opt.cfg.replace('/', f'/prune_{percent}_')
203 | pruned_cfg_file = write_cfg(pruned_cfg_name, [model.hyperparams.copy()] + compact_module_defs)
204 | print(f'Config file has been saved: {pruned_cfg_file}')
205 |
206 | compact_model_name = opt.weights.replace('/', f'/prune_{percent}_')
207 | if compact_model_name.endswith('.pt'):
208 | compact_model_name = compact_model_name.replace('.pt', '.weights')
209 | save_weights(compact_model, compact_model_name)
210 | print(f'Compact model has been saved: {compact_model_name}')
211 |
212 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | # pip3 install -U -r requirements.txt
2 | numpy
3 | opencv-python
4 | torch >= 1.2
5 | matplotlib
6 | pycocotools
7 | tqdm
8 | tb-nightly
9 | future
10 | Pillow
11 |
12 | # Equivalent conda commands ----------------------------------------------------
13 | # conda update -n base -c defaults conda
14 | # conda install -yc anaconda future numpy opencv matplotlib tqdm pillow
15 | # conda install -yc conda-forge scikit-image tensorboard pycocotools
16 | # conda install -yc spyder-ide spyder-line-profiler
17 | # conda install -yc pytorch pytorch torchvision
18 |
--------------------------------------------------------------------------------
/shortcut_prune.py:
--------------------------------------------------------------------------------
1 | from models import *
2 | from utils.utils import *
3 | import numpy as np
4 | from copy import deepcopy
5 | from test import test
6 | from terminaltables import AsciiTable
7 | import time
8 | from utils.prune_utils import *
9 | import argparse
10 |
11 |
12 |
13 | if __name__ == '__main__':
14 | parser = argparse.ArgumentParser()
15 | parser.add_argument('--cfg', type=str, default='cfg/yolov3.cfg', help='cfg file path')
16 | parser.add_argument('--data', type=str, default='data/coco.data', help='*.data file path')
17 | parser.add_argument('--weights', type=str, default='weights/last.pt', help='sparse model weights')
18 | parser.add_argument('--percent', type=float, default=0.6, help='channel prune percent')
19 | parser.add_argument('--img_size', type=int, default=416, help='inference size (pixels)')
20 | opt = parser.parse_args()
21 | print(opt)
22 |
23 | img_size = opt.img_size
24 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
25 | model = Darknet(opt.cfg, (img_size, img_size)).to(device)
26 |
27 | if opt.weights.endswith(".pt"):
28 | model.load_state_dict(torch.load(opt.weights, map_location=device)['model'])
29 | else:
30 | _ = load_darknet_weights(model, opt.weights)
31 | print('\nloaded weights from ',opt.weights)
32 |
33 |
34 | eval_model = lambda model:test(model=model,cfg=opt.cfg, data=opt.data, batch_size=16, img_size=img_size)
35 | obtain_num_parameters = lambda model:sum([param.nelement() for param in model.parameters()])
36 |
37 | print("\nlet's test the original model first:")
38 | with torch.no_grad():
39 | origin_model_metric = eval_model(model)
40 | origin_nparameters = obtain_num_parameters(model)
41 |
42 | CBL_idx, Conv_idx, prune_idx,shortcut_idx,shortcut_all= parse_module_defs2(model.module_defs)
43 |
44 |
45 | sort_prune_idx=[idx for idx in prune_idx if idx not in shortcut_idx]
46 |
47 | #将所有要剪枝的BN层的α参数,拷贝到bn_weights列表
48 | bn_weights = gather_bn_weights(model.module_list, sort_prune_idx)
49 |
50 | #torch.sort返回二维列表,第一维是排序后的值列表,第二维是排序后的值列表对应的索引
51 | sorted_bn = torch.sort(bn_weights)[0]
52 |
53 |
54 | #避免剪掉所有channel的最高阈值(每个BN层的gamma的最大值的最小值即为阈值上限)
55 | highest_thre = []
56 | for idx in sort_prune_idx:
57 | #.item()可以得到张量里的元素值
58 | highest_thre.append(model.module_list[idx][1].weight.data.abs().max().item())
59 | highest_thre = min(highest_thre)
60 |
61 | # 找到highest_thre对应的下标对应的百分比
62 | percent_limit = (sorted_bn==highest_thre).nonzero().item()/len(bn_weights)
63 |
64 | print(f'Suggested Threshold should be less than {highest_thre:.4f}.')
65 | print(f'The corresponding prune ratio is {percent_limit:.3f},but you can set higher.')
66 |
67 |
68 | def prune_and_eval(model, sorted_bn, percent=.0):
69 | model_copy = deepcopy(model)
70 | thre_index = int(len(sorted_bn) * percent)
71 | #获得α参数的阈值,小于该值的α参数对应的通道,全部裁剪掉
72 | thre1 = sorted_bn[thre_index]
73 |
74 | print(f'Channels with Gamma value less than {thre1:.6f} are pruned!')
75 |
76 | remain_num = 0
77 | idx_new=dict()
78 | for idx in prune_idx:
79 |
80 | if idx not in shortcut_idx:
81 |
82 | bn_module = model_copy.module_list[idx][1]
83 |
84 | mask = obtain_bn_mask(bn_module, thre1)
85 | #记录剪枝后,每一层卷积层对应的mask
86 | # idx_new[idx]=mask.cpu().numpy()
87 | idx_new[idx]=mask
88 | remain_num += int(mask.sum())
89 | bn_module.weight.data.mul_(mask)
90 | #bn_module.bias.data.mul_(mask*0.0001)
91 | else:
92 |
93 | bn_module = model_copy.module_list[idx][1]
94 |
95 |
96 | mask=idx_new[shortcut_idx[idx]]
97 | idx_new[idx]=mask
98 |
99 |
100 | remain_num += int(mask.sum())
101 | bn_module.weight.data.mul_(mask)
102 |
103 | #print(int(mask.sum()))
104 |
105 | with torch.no_grad():
106 | mAP = eval_model(model_copy)[0][2]
107 |
108 | print(f'Number of channels has been reduced from {len(sorted_bn)} to {remain_num}')
109 | print(f'Prune ratio: {1-remain_num/len(sorted_bn):.3f}')
110 | print(f'mAP of the pruned model is {mAP:.4f}')
111 |
112 | return thre1
113 |
114 | percent = opt.percent
115 | threshold = prune_and_eval(model, sorted_bn, percent)
116 |
117 |
118 |
119 | #****************************************************************
120 | #虽然上面已经能看到剪枝后的效果,但是没有生成剪枝后的模型结构,因此下面的代码是为了生成新的模型结构并拷贝旧模型参数到新模型
121 |
122 |
123 |
124 | #%%
125 | def obtain_filters_mask(model, thre, CBL_idx, prune_idx):
126 |
127 | pruned = 0
128 | total = 0
129 | num_filters = []
130 | filters_mask = []
131 | idx_new=dict()
132 | #CBL_idx存储的是所有带BN的卷积层(YOLO层的前一层卷积层是不带BN的)
133 | for idx in CBL_idx:
134 | bn_module = model.module_list[idx][1]
135 | if idx in prune_idx:
136 | if idx not in shortcut_idx:
137 |
138 | mask = obtain_bn_mask(bn_module, thre).cpu().numpy()
139 | idx_new[idx]=mask
140 | remain = int(mask.sum())
141 | pruned = pruned + mask.shape[0] - remain
142 |
143 | # if remain == 0:
144 | # print("Channels would be all pruned!")
145 | # raise Exception
146 |
147 | # print(f'layer index: {idx:>3d} \t total channel: {mask.shape[0]:>4d} \t '
148 | # f'remaining channel: {remain:>4d}')
149 | else:
150 | mask=idx_new[shortcut_idx[idx]]
151 | idx_new[idx]=mask
152 | remain= int(mask.sum())
153 | pruned = pruned + mask.shape[0] - remain
154 |
155 | if remain == 0:
156 | # print("Channels would be all pruned!")
157 | # raise Exception
158 | max_value = bn_module.weight.data.abs().max()
159 | mask = obtain_bn_mask(bn_module, max_value).cpu().numpy()
160 | remain = int(mask.sum())
161 | pruned = pruned + mask.shape[0] - remain
162 |
163 | print(f'layer index: {idx:>3d} \t total channel: {mask.shape[0]:>4d} \t '
164 | f'remaining channel: {remain:>4d}')
165 | else:
166 | mask = np.ones(bn_module.weight.data.shape)
167 | remain = mask.shape[0]
168 |
169 | total += mask.shape[0]
170 | num_filters.append(remain)
171 | filters_mask.append(mask.copy())
172 |
173 | #因此,这里求出的prune_ratio,需要裁剪的α参数/cbl_idx中所有的α参数
174 | prune_ratio = pruned / total
175 | print(f'Prune channels: {pruned}\tPrune ratio: {prune_ratio:.3f}')
176 |
177 | return num_filters, filters_mask
178 |
179 | num_filters, filters_mask = obtain_filters_mask(model, threshold, CBL_idx, prune_idx)
180 |
181 |
182 | #CBLidx2mask存储CBL_idx中,每一层BN层对应的mask
183 | CBLidx2mask = {idx: mask for idx, mask in zip(CBL_idx, filters_mask)}
184 |
185 |
186 | pruned_model = prune_model_keep_size2(model, prune_idx, CBL_idx, CBLidx2mask)
187 | print("\nnow prune the model but keep size,(actually add offset of BN beta to next layer), let's see how the mAP goes")
188 |
189 | with torch.no_grad():
190 | eval_model(pruned_model)
191 |
192 |
193 |
194 | #获得原始模型的module_defs,并修改该defs中的卷积核数量
195 | compact_module_defs = deepcopy(model.module_defs)
196 | for idx, num in zip(CBL_idx, num_filters):
197 | assert compact_module_defs[idx]['type'] == 'convolutional'
198 | compact_module_defs[idx]['filters'] = str(num)
199 |
200 |
201 | compact_model = Darknet([model.hyperparams.copy()] + compact_module_defs, (img_size, img_size)).to(device)
202 | compact_nparameters = obtain_num_parameters(compact_model)
203 |
204 | init_weights_from_loose_model(compact_model, pruned_model, CBL_idx, Conv_idx, CBLidx2mask)
205 |
206 |
207 | random_input = torch.rand((1, 3, img_size, img_size)).to(device)
208 |
209 | def obtain_avg_forward_time(input, model, repeat=200):
210 |
211 | model.eval()
212 | start = time.time()
213 | with torch.no_grad():
214 | for i in range(repeat):
215 | output = model(input)
216 | avg_infer_time = (time.time() - start) / repeat
217 |
218 | return avg_infer_time, output
219 |
220 | print('testing Inference time...')
221 | pruned_forward_time, pruned_output = obtain_avg_forward_time(random_input, pruned_model)
222 | compact_forward_time, compact_output = obtain_avg_forward_time(random_input, compact_model)
223 |
224 |
225 | # 在测试集上测试剪枝后的模型, 并统计模型的参数数量
226 | print('testing final model')
227 | with torch.no_grad():
228 | compact_model_metric = eval_model(compact_model)
229 |
230 |
231 | # 比较剪枝前后参数数量的变化、指标性能的变化
232 | metric_table = [
233 | ["Metric", "Before", "After"],
234 | ["mAP", f'{origin_model_metric[0][2]:.6f}', f'{compact_model_metric[0][2]:.6f}'],
235 | ["Parameters", f"{origin_nparameters}", f"{compact_nparameters}"],
236 | ["Inference", f'{pruned_forward_time:.4f}', f'{compact_forward_time:.4f}']
237 | ]
238 | print(AsciiTable(metric_table).table)
239 |
240 |
241 | # 生成剪枝后的cfg文件并保存模型
242 | pruned_cfg_name = opt.cfg.replace('/', f'/prune_{percent}_')
243 | pruned_cfg_file = write_cfg(pruned_cfg_name, [model.hyperparams.copy()] + compact_module_defs)
244 | print(f'Config file has been saved: {pruned_cfg_file}')
245 |
246 | compact_model_name = opt.weights.replace('/', f'/prune_{percent}_')
247 | if compact_model_name.endswith('.pt'):
248 | compact_model_name = compact_model_name.replace('.pt', '.weights')
249 | save_weights(compact_model, path=compact_model_name)
250 | print(f'Compact model has been saved: {compact_model_name}')
251 |
252 |
--------------------------------------------------------------------------------
/slim_prune.py:
--------------------------------------------------------------------------------
1 | from models import *
2 | from utils.utils import *
3 | import numpy as np
4 | from copy import deepcopy
5 | from test import test
6 | from terminaltables import AsciiTable
7 | import time
8 | from utils.prune_utils import *
9 | import argparse
10 |
11 |
12 |
13 | if __name__ == '__main__':
14 | parser = argparse.ArgumentParser()
15 | parser.add_argument('--cfg', type=str, default='cfg/yolov3.cfg', help='cfg file path')
16 | parser.add_argument('--data', type=str, default='data/coco.data', help='*.data file path')
17 | parser.add_argument('--weights', type=str, default='weights/last.pt', help='sparse model weights')
18 | parser.add_argument('--global_percent', type=float, default=0.8, help='global channel prune percent')
19 | parser.add_argument('--layer_keep', type=float, default=0.01, help='channel keep percent per layer')
20 | parser.add_argument('--img_size', type=int, default=416, help='inference size (pixels)')
21 | opt = parser.parse_args()
22 | print(opt)
23 |
24 | img_size = opt.img_size
25 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
26 | model = Darknet(opt.cfg, (img_size, img_size)).to(device)
27 |
28 | if opt.weights.endswith(".pt"):
29 | model.load_state_dict(torch.load(opt.weights, map_location=device)['model'])
30 | else:
31 | _ = load_darknet_weights(model, opt.weights)
32 | print('\nloaded weights from ',opt.weights)
33 |
34 |
35 | eval_model = lambda model:test(model=model,cfg=opt.cfg, data=opt.data, batch_size=16, img_size=img_size)
36 | obtain_num_parameters = lambda model:sum([param.nelement() for param in model.parameters()])
37 |
38 | print("\nlet's test the original model first:")
39 | with torch.no_grad():
40 | origin_model_metric = eval_model(model)
41 | origin_nparameters = obtain_num_parameters(model)
42 |
43 | CBL_idx, Conv_idx, prune_idx, _, _= parse_module_defs2(model.module_defs)
44 |
45 |
46 |
47 | bn_weights = gather_bn_weights(model.module_list, prune_idx)
48 |
49 | sorted_bn = torch.sort(bn_weights)[0]
50 | sorted_bn, sorted_index = torch.sort(bn_weights)
51 | thresh_index = int(len(bn_weights) * opt.global_percent)
52 | thresh = sorted_bn[thresh_index].cuda()
53 |
54 | print(f'Global Threshold should be less than {thresh:.4f}.')
55 |
56 |
57 |
58 |
59 | #%%
60 | def obtain_filters_mask(model, thre, CBL_idx, prune_idx):
61 |
62 | pruned = 0
63 | total = 0
64 | num_filters = []
65 | filters_mask = []
66 | for idx in CBL_idx:
67 | bn_module = model.module_list[idx][1]
68 | if idx in prune_idx:
69 |
70 | weight_copy = bn_module.weight.data.abs().clone()
71 |
72 | channels = weight_copy.shape[0] #
73 | min_channel_num = int(channels * opt.layer_keep) if int(channels * opt.layer_keep) > 0 else 1
74 | mask = weight_copy.gt(thresh).float()
75 |
76 | if int(torch.sum(mask)) < min_channel_num:
77 | _, sorted_index_weights = torch.sort(weight_copy,descending=True)
78 | mask[sorted_index_weights[:min_channel_num]]=1.
79 | remain = int(mask.sum())
80 | pruned = pruned + mask.shape[0] - remain
81 |
82 | print(f'layer index: {idx:>3d} \t total channel: {mask.shape[0]:>4d} \t '
83 | f'remaining channel: {remain:>4d}')
84 | else:
85 | mask = torch.ones(bn_module.weight.data.shape)
86 | remain = mask.shape[0]
87 |
88 | total += mask.shape[0]
89 | num_filters.append(remain)
90 | filters_mask.append(mask.clone())
91 |
92 | prune_ratio = pruned / total
93 | print(f'Prune channels: {pruned}\tPrune ratio: {prune_ratio:.3f}')
94 |
95 | return num_filters, filters_mask
96 |
97 | num_filters, filters_mask = obtain_filters_mask(model, thresh, CBL_idx, prune_idx)
98 | CBLidx2mask = {idx: mask for idx, mask in zip(CBL_idx, filters_mask)}
99 | CBLidx2filters = {idx: filters for idx, filters in zip(CBL_idx, num_filters)}
100 |
101 | for i in model.module_defs:
102 | if i['type'] == 'shortcut':
103 | i['is_access'] = False
104 |
105 | print('merge the mask of layers connected to shortcut!')
106 | merge_mask(model, CBLidx2mask, CBLidx2filters)
107 |
108 |
109 |
110 | def prune_and_eval(model, CBL_idx, CBLidx2mask):
111 | model_copy = deepcopy(model)
112 |
113 | for idx in CBL_idx:
114 | bn_module = model_copy.module_list[idx][1]
115 | mask = CBLidx2mask[idx].cuda()
116 | bn_module.weight.data.mul_(mask)
117 |
118 | with torch.no_grad():
119 | mAP = eval_model(model_copy)[0][2]
120 |
121 | print(f'mask the gamma as zero, mAP of the model is {mAP:.4f}')
122 |
123 |
124 | prune_and_eval(model, CBL_idx, CBLidx2mask)
125 |
126 |
127 | for i in CBLidx2mask:
128 | CBLidx2mask[i] = CBLidx2mask[i].clone().cpu().numpy()
129 |
130 |
131 |
132 | pruned_model = prune_model_keep_size2(model, prune_idx, CBL_idx, CBLidx2mask)
133 | print("\nnow prune the model but keep size,(actually add offset of BN beta to following layers), let's see how the mAP goes")
134 |
135 | with torch.no_grad():
136 | eval_model(pruned_model)
137 |
138 | for i in model.module_defs:
139 | if i['type'] == 'shortcut':
140 | i.pop('is_access')
141 |
142 | compact_module_defs = deepcopy(model.module_defs)
143 | for idx in CBL_idx:
144 | assert compact_module_defs[idx]['type'] == 'convolutional'
145 | compact_module_defs[idx]['filters'] = str(CBLidx2filters[idx])
146 |
147 |
148 | compact_model = Darknet([model.hyperparams.copy()] + compact_module_defs, (img_size, img_size)).to(device)
149 | compact_nparameters = obtain_num_parameters(compact_model)
150 |
151 | init_weights_from_loose_model(compact_model, pruned_model, CBL_idx, Conv_idx, CBLidx2mask)
152 |
153 |
154 | random_input = torch.rand((1, 3, img_size, img_size)).to(device)
155 |
156 | def obtain_avg_forward_time(input, model, repeat=200):
157 |
158 | model.eval()
159 | start = time.time()
160 | with torch.no_grad():
161 | for i in range(repeat):
162 | output = model(input)
163 | avg_infer_time = (time.time() - start) / repeat
164 |
165 | return avg_infer_time, output
166 |
167 | print('testing inference time...')
168 | pruned_forward_time, pruned_output = obtain_avg_forward_time(random_input, pruned_model)
169 | compact_forward_time, compact_output = obtain_avg_forward_time(random_input, compact_model)
170 |
171 |
172 | print('testing the final model...')
173 | with torch.no_grad():
174 | compact_model_metric = eval_model(compact_model)
175 |
176 |
177 | metric_table = [
178 | ["Metric", "Before", "After"],
179 | ["mAP", f'{origin_model_metric[0][2]:.6f}', f'{compact_model_metric[0][2]:.6f}'],
180 | ["Parameters", f"{origin_nparameters}", f"{compact_nparameters}"],
181 | ["Inference", f'{pruned_forward_time:.4f}', f'{compact_forward_time:.4f}']
182 | ]
183 | print(AsciiTable(metric_table).table)
184 |
185 |
186 |
187 | pruned_cfg_name = opt.cfg.replace('/', f'/prune_{opt.global_percent}_keep_{opt.layer_keep}_')
188 | pruned_cfg_file = write_cfg(pruned_cfg_name, [model.hyperparams.copy()] + compact_module_defs)
189 | print(f'Config file has been saved: {pruned_cfg_file}')
190 |
191 | compact_model_name = opt.weights.replace('/', f'/prune_{opt.global_percent}_keep_{opt.layer_keep}_')
192 | if compact_model_name.endswith('.pt'):
193 | compact_model_name = compact_model_name.replace('.pt', '.weights')
194 | save_weights(compact_model, path=compact_model_name)
195 | print(f'Compact model has been saved: {compact_model_name}')
196 |
197 |
--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import json
3 |
4 | from torch.utils.data import DataLoader
5 |
6 | from models import *
7 | from utils.datasets import *
8 | from utils.utils import *
9 |
10 |
11 | def test(cfg,
12 | data,
13 | weights=None,
14 | batch_size=16,
15 | img_size=416,
16 | iou_thres=0.5,
17 | conf_thres=0.001,
18 | nms_thres=0.5,
19 | save_json=False,
20 | model=None):
21 |
22 | # Initialize/load model and set device
23 | if model is None:
24 | device = torch_utils.select_device(opt.device)
25 | verbose = True
26 |
27 | # Initialize model
28 | model = Darknet(cfg, img_size).to(device)
29 |
30 | # Load weights
31 | attempt_download(weights)
32 | if weights.endswith('.pt'): # pytorch format
33 | model.load_state_dict(torch.load(weights, map_location=device)['model'])
34 | else: # darknet format
35 | _ = load_darknet_weights(model, weights)
36 |
37 | if torch.cuda.device_count() > 1:
38 | model = nn.DataParallel(model)
39 | else:
40 | device = next(model.parameters()).device # get model device
41 | verbose = False
42 |
43 | # Configure run
44 | data = parse_data_cfg(data)
45 | nc = int(data['classes']) # number of classes
46 | test_path = data['valid'] # path to test images
47 | names = load_classes(data['names']) # class names
48 |
49 | # Dataloader
50 | dataset = LoadImagesAndLabels(test_path, img_size, batch_size)
51 | dataloader = DataLoader(dataset,
52 | batch_size=batch_size,
53 | num_workers=min([os.cpu_count(), batch_size, 16]),
54 | pin_memory=True,
55 | collate_fn=dataset.collate_fn)
56 |
57 | seen = 0
58 | model.eval()
59 | coco91class = coco80_to_coco91_class()
60 | s = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', 'mAP', 'F1')
61 | p, r, f1, mp, mr, map, mf1 = 0., 0., 0., 0., 0., 0., 0.
62 | loss = torch.zeros(3)
63 | jdict, stats, ap, ap_class = [], [], [], []
64 | for batch_i, (imgs, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)):
65 | targets = targets.to(device)
66 | imgs = imgs.to(device)
67 | _, _, height, width = imgs.shape # batch size, channels, height, width
68 |
69 | # Plot images with bounding boxes
70 | if batch_i == 0 and not os.path.exists('test_batch0.jpg'):
71 | plot_images(imgs=imgs, targets=targets, paths=paths, fname='test_batch0.jpg')
72 |
73 | # Run model
74 | inf_out, train_out = model(imgs) # inference and training outputs
75 |
76 | # Compute loss
77 | if hasattr(model, 'hyp'): # if model has loss hyperparameters
78 | loss += compute_loss(train_out, targets, model)[1][:3].cpu() # GIoU, obj, cls
79 |
80 | # Run NMS
81 | output = non_max_suppression(inf_out, conf_thres=conf_thres, nms_thres=nms_thres)
82 |
83 | # Statistics per image
84 | for si, pred in enumerate(output):
85 | labels = targets[targets[:, 0] == si, 1:]
86 | nl = len(labels)
87 | tcls = labels[:, 0].tolist() if nl else [] # target class
88 | seen += 1
89 |
90 | if pred is None:
91 | if nl:
92 | stats.append(([], torch.Tensor(), torch.Tensor(), tcls))
93 | continue
94 |
95 | # Append to text file
96 | # with open('test.txt', 'a') as file:
97 | # [file.write('%11.5g' * 7 % tuple(x) + '\n') for x in pred]
98 |
99 | # Append to pycocotools JSON dictionary
100 | if save_json:
101 | # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...
102 | image_id = int(Path(paths[si]).stem.split('_')[-1])
103 | box = pred[:, :4].clone() # xyxy
104 | scale_coords(imgs[si].shape[1:], box, shapes[si]) # to original shape
105 | box = xyxy2xywh(box) # xywh
106 | box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner
107 | for di, d in enumerate(pred):
108 | jdict.append({'image_id': image_id,
109 | 'category_id': coco91class[int(d[6])],
110 | 'bbox': [floatn(x, 3) for x in box[di]],
111 | 'score': floatn(d[4], 5)})
112 |
113 | # Clip boxes to image bounds
114 | clip_coords(pred, (height, width))
115 |
116 | # Assign all predictions as incorrect
117 | correct = [0] * len(pred)
118 | if nl:
119 | detected = []
120 | tcls_tensor = labels[:, 0]
121 |
122 | # target boxes
123 | tbox = xywh2xyxy(labels[:, 1:5])
124 | tbox[:, [0, 2]] *= width
125 | tbox[:, [1, 3]] *= height
126 |
127 | # Search for correct predictions
128 | for i, (*pbox, pconf, pcls_conf, pcls) in enumerate(pred):
129 |
130 | # Break if all targets already located in image
131 | if len(detected) == nl:
132 | break
133 |
134 | # Continue if predicted class not among image classes
135 | if pcls.item() not in tcls:
136 | continue
137 |
138 | # Best iou, index between pred and targets
139 | m = (pcls == tcls_tensor).nonzero().view(-1)
140 | iou, bi = bbox_iou(pbox, tbox[m]).max(0)
141 |
142 | # If iou > threshold and class is correct mark as correct
143 | if iou > iou_thres and m[bi] not in detected: # and pcls == tcls[bi]:
144 | correct[i] = 1
145 | detected.append(m[bi])
146 |
147 | # Append statistics (correct, conf, pcls, tcls)
148 | stats.append((correct, pred[:, 4].cpu(), pred[:, 6].cpu(), tcls))
149 |
150 | # Compute statistics
151 | stats = [np.concatenate(x, 0) for x in list(zip(*stats))] # to numpy
152 | if len(stats):
153 | p, r, ap, f1, ap_class = ap_per_class(*stats)
154 | mp, mr, map, mf1 = p.mean(), r.mean(), ap.mean(), f1.mean()
155 | nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class
156 | else:
157 | nt = torch.zeros(1)
158 |
159 | # Print results
160 | pf = '%20s' + '%10.3g' * 6 # print format
161 | print(pf % ('all', seen, nt.sum(), mp, mr, map, mf1))
162 |
163 | # Print results per class
164 | if verbose and nc > 1 and len(stats):
165 | for i, c in enumerate(ap_class):
166 | print(pf % (names[c], seen, nt[c], p[i], r[i], ap[i], f1[i]))
167 |
168 | # Save JSON
169 | if save_json and map and len(jdict):
170 | try:
171 | imgIds = [int(Path(x).stem.split('_')[-1]) for x in dataset.img_files]
172 | with open('results.json', 'w') as file:
173 | json.dump(jdict, file)
174 |
175 | from pycocotools.coco import COCO
176 | from pycocotools.cocoeval import COCOeval
177 |
178 | # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
179 | cocoGt = COCO('../coco/annotations/instances_val2014.json') # initialize COCO ground truth api
180 | cocoDt = cocoGt.loadRes('results.json') # initialize COCO pred api
181 |
182 | cocoEval = COCOeval(cocoGt, cocoDt, 'bbox')
183 | cocoEval.params.imgIds = imgIds # [:32] # only evaluate these images
184 | cocoEval.evaluate()
185 | cocoEval.accumulate()
186 | cocoEval.summarize()
187 | map = cocoEval.stats[1] # update mAP to pycocotools mAP
188 | except:
189 | print('WARNING: missing dependency pycocotools from requirements.txt. Can not compute official COCO mAP.')
190 |
191 | # Return results
192 | maps = np.zeros(nc) + map
193 | for i, c in enumerate(ap_class):
194 | maps[c] = ap[i]
195 | return (mp, mr, map, mf1, *(loss / len(dataloader)).tolist()), maps
196 |
197 |
198 | if __name__ == '__main__':
199 | parser = argparse.ArgumentParser(prog='test.py')
200 | parser.add_argument('--cfg', type=str, default='cfg/yolov3-spp.cfg', help='cfg file path')
201 | parser.add_argument('--data', type=str, default='data/coco.data', help='coco.data file path')
202 | parser.add_argument('--weights', type=str, default='weights/yolov3-spp.weights', help='path to weights file')
203 | parser.add_argument('--batch-size', type=int, default=16, help='size of each image batch')
204 | parser.add_argument('--img-size', type=int, default=416, help='inference size (pixels)')
205 | parser.add_argument('--iou-thres', type=float, default=0.5, help='iou threshold required to qualify as detected')
206 | parser.add_argument('--conf-thres', type=float, default=0.001, help='object confidence threshold')
207 | parser.add_argument('--nms-thres', type=float, default=0.5, help='iou threshold for non-maximum suppression')
208 | parser.add_argument('--save-json', action='store_true', help='save a cocoapi-compatible JSON results file')
209 | parser.add_argument('--device', default='', help='device id (i.e. 0 or 0,1) or cpu')
210 | opt = parser.parse_args()
211 | print(opt)
212 |
213 | with torch.no_grad():
214 | test(opt.cfg,
215 | opt.data,
216 | opt.weights,
217 | opt.batch_size,
218 | opt.img_size,
219 | opt.iou_thres,
220 | opt.conf_thres,
221 | opt.nms_thres,
222 | opt.save_json)
223 |
--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | #
--------------------------------------------------------------------------------
/utils/adabound.py:
--------------------------------------------------------------------------------
1 | import math
2 |
3 | import torch
4 | from torch.optim import Optimizer
5 |
6 |
7 | class AdaBound(Optimizer):
8 | """Implements AdaBound algorithm.
9 | It has been proposed in `Adaptive Gradient Methods with Dynamic Bound of Learning Rate`_.
10 | Arguments:
11 | params (iterable): iterable of parameters to optimize or dicts defining
12 | parameter groups
13 | lr (float, optional): Adam learning rate (default: 1e-3)
14 | betas (Tuple[float, float], optional): coefficients used for computing
15 | running averages of gradient and its square (default: (0.9, 0.999))
16 | final_lr (float, optional): final (SGD) learning rate (default: 0.1)
17 | gamma (float, optional): convergence speed of the bound functions (default: 1e-3)
18 | eps (float, optional): term added to the denominator to improve
19 | numerical stability (default: 1e-8)
20 | weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
21 | amsbound (boolean, optional): whether to use the AMSBound variant of this algorithm
22 | .. Adaptive Gradient Methods with Dynamic Bound of Learning Rate:
23 | https://openreview.net/forum?id=Bkg3g2R9FX
24 | """
25 |
26 | def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), final_lr=0.1, gamma=1e-3,
27 | eps=1e-8, weight_decay=0, amsbound=False):
28 | if not 0.0 <= lr:
29 | raise ValueError("Invalid learning rate: {}".format(lr))
30 | if not 0.0 <= eps:
31 | raise ValueError("Invalid epsilon value: {}".format(eps))
32 | if not 0.0 <= betas[0] < 1.0:
33 | raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0]))
34 | if not 0.0 <= betas[1] < 1.0:
35 | raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1]))
36 | if not 0.0 <= final_lr:
37 | raise ValueError("Invalid final learning rate: {}".format(final_lr))
38 | if not 0.0 <= gamma < 1.0:
39 | raise ValueError("Invalid gamma parameter: {}".format(gamma))
40 | defaults = dict(lr=lr, betas=betas, final_lr=final_lr, gamma=gamma, eps=eps,
41 | weight_decay=weight_decay, amsbound=amsbound)
42 | super(AdaBound, self).__init__(params, defaults)
43 |
44 | self.base_lrs = list(map(lambda group: group['lr'], self.param_groups))
45 |
46 | def __setstate__(self, state):
47 | super(AdaBound, self).__setstate__(state)
48 | for group in self.param_groups:
49 | group.setdefault('amsbound', False)
50 |
51 | def step(self, closure=None):
52 | """Performs a single optimization step.
53 | Arguments:
54 | closure (callable, optional): A closure that reevaluates the model
55 | and returns the loss.
56 | """
57 | loss = None
58 | if closure is not None:
59 | loss = closure()
60 |
61 | for group, base_lr in zip(self.param_groups, self.base_lrs):
62 | for p in group['params']:
63 | if p.grad is None:
64 | continue
65 | grad = p.grad.data
66 | if grad.is_sparse:
67 | raise RuntimeError(
68 | 'Adam does not support sparse gradients, please consider SparseAdam instead')
69 | amsbound = group['amsbound']
70 |
71 | state = self.state[p]
72 |
73 | # State initialization
74 | if len(state) == 0:
75 | state['step'] = 0
76 | # Exponential moving average of gradient values
77 | state['exp_avg'] = torch.zeros_like(p.data)
78 | # Exponential moving average of squared gradient values
79 | state['exp_avg_sq'] = torch.zeros_like(p.data)
80 | if amsbound:
81 | # Maintains max of all exp. moving avg. of sq. grad. values
82 | state['max_exp_avg_sq'] = torch.zeros_like(p.data)
83 |
84 | exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
85 | if amsbound:
86 | max_exp_avg_sq = state['max_exp_avg_sq']
87 | beta1, beta2 = group['betas']
88 |
89 | state['step'] += 1
90 |
91 | if group['weight_decay'] != 0:
92 | grad = grad.add(group['weight_decay'], p.data)
93 |
94 | # Decay the first and second moment running average coefficient
95 | exp_avg.mul_(beta1).add_(1 - beta1, grad)
96 | exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
97 | if amsbound:
98 | # Maintains the maximum of all 2nd moment running avg. till now
99 | torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq)
100 | # Use the max. for normalizing running avg. of gradient
101 | denom = max_exp_avg_sq.sqrt().add_(group['eps'])
102 | else:
103 | denom = exp_avg_sq.sqrt().add_(group['eps'])
104 |
105 | bias_correction1 = 1 - beta1 ** state['step']
106 | bias_correction2 = 1 - beta2 ** state['step']
107 | step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1
108 |
109 | # Applies bounds on actual learning rate
110 | # lr_scheduler cannot affect final_lr, this is a workaround to apply lr decay
111 | final_lr = group['final_lr'] * group['lr'] / base_lr
112 | lower_bound = final_lr * (1 - 1 / (group['gamma'] * state['step'] + 1))
113 | upper_bound = final_lr * (1 + 1 / (group['gamma'] * state['step']))
114 | step_size = torch.full_like(denom, step_size)
115 | step_size.div_(denom).clamp_(lower_bound, upper_bound).mul_(exp_avg)
116 |
117 | p.data.add_(-step_size)
118 |
119 | return loss
120 |
121 |
122 | class AdaBoundW(Optimizer):
123 | """Implements AdaBound algorithm with Decoupled Weight Decay (arxiv.org/abs/1711.05101)
124 | It has been proposed in `Adaptive Gradient Methods with Dynamic Bound of Learning Rate`_.
125 | Arguments:
126 | params (iterable): iterable of parameters to optimize or dicts defining
127 | parameter groups
128 | lr (float, optional): Adam learning rate (default: 1e-3)
129 | betas (Tuple[float, float], optional): coefficients used for computing
130 | running averages of gradient and its square (default: (0.9, 0.999))
131 | final_lr (float, optional): final (SGD) learning rate (default: 0.1)
132 | gamma (float, optional): convergence speed of the bound functions (default: 1e-3)
133 | eps (float, optional): term added to the denominator to improve
134 | numerical stability (default: 1e-8)
135 | weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
136 | amsbound (boolean, optional): whether to use the AMSBound variant of this algorithm
137 | .. Adaptive Gradient Methods with Dynamic Bound of Learning Rate:
138 | https://openreview.net/forum?id=Bkg3g2R9FX
139 | """
140 |
141 | def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), final_lr=0.1, gamma=1e-3,
142 | eps=1e-8, weight_decay=0, amsbound=False):
143 | if not 0.0 <= lr:
144 | raise ValueError("Invalid learning rate: {}".format(lr))
145 | if not 0.0 <= eps:
146 | raise ValueError("Invalid epsilon value: {}".format(eps))
147 | if not 0.0 <= betas[0] < 1.0:
148 | raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0]))
149 | if not 0.0 <= betas[1] < 1.0:
150 | raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1]))
151 | if not 0.0 <= final_lr:
152 | raise ValueError("Invalid final learning rate: {}".format(final_lr))
153 | if not 0.0 <= gamma < 1.0:
154 | raise ValueError("Invalid gamma parameter: {}".format(gamma))
155 | defaults = dict(lr=lr, betas=betas, final_lr=final_lr, gamma=gamma, eps=eps,
156 | weight_decay=weight_decay, amsbound=amsbound)
157 | super(AdaBoundW, self).__init__(params, defaults)
158 |
159 | self.base_lrs = list(map(lambda group: group['lr'], self.param_groups))
160 |
161 | def __setstate__(self, state):
162 | super(AdaBoundW, self).__setstate__(state)
163 | for group in self.param_groups:
164 | group.setdefault('amsbound', False)
165 |
166 | def step(self, closure=None):
167 | """Performs a single optimization step.
168 | Arguments:
169 | closure (callable, optional): A closure that reevaluates the model
170 | and returns the loss.
171 | """
172 | loss = None
173 | if closure is not None:
174 | loss = closure()
175 |
176 | for group, base_lr in zip(self.param_groups, self.base_lrs):
177 | for p in group['params']:
178 | if p.grad is None:
179 | continue
180 | grad = p.grad.data
181 | if grad.is_sparse:
182 | raise RuntimeError(
183 | 'Adam does not support sparse gradients, please consider SparseAdam instead')
184 | amsbound = group['amsbound']
185 |
186 | state = self.state[p]
187 |
188 | # State initialization
189 | if len(state) == 0:
190 | state['step'] = 0
191 | # Exponential moving average of gradient values
192 | state['exp_avg'] = torch.zeros_like(p.data)
193 | # Exponential moving average of squared gradient values
194 | state['exp_avg_sq'] = torch.zeros_like(p.data)
195 | if amsbound:
196 | # Maintains max of all exp. moving avg. of sq. grad. values
197 | state['max_exp_avg_sq'] = torch.zeros_like(p.data)
198 |
199 | exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
200 | if amsbound:
201 | max_exp_avg_sq = state['max_exp_avg_sq']
202 | beta1, beta2 = group['betas']
203 |
204 | state['step'] += 1
205 |
206 | # Decay the first and second moment running average coefficient
207 | exp_avg.mul_(beta1).add_(1 - beta1, grad)
208 | exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
209 | if amsbound:
210 | # Maintains the maximum of all 2nd moment running avg. till now
211 | torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq)
212 | # Use the max. for normalizing running avg. of gradient
213 | denom = max_exp_avg_sq.sqrt().add_(group['eps'])
214 | else:
215 | denom = exp_avg_sq.sqrt().add_(group['eps'])
216 |
217 | bias_correction1 = 1 - beta1 ** state['step']
218 | bias_correction2 = 1 - beta2 ** state['step']
219 | step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1
220 |
221 | # Applies bounds on actual learning rate
222 | # lr_scheduler cannot affect final_lr, this is a workaround to apply lr decay
223 | final_lr = group['final_lr'] * group['lr'] / base_lr
224 | lower_bound = final_lr * (1 - 1 / (group['gamma'] * state['step'] + 1))
225 | upper_bound = final_lr * (1 + 1 / (group['gamma'] * state['step']))
226 | step_size = torch.full_like(denom, step_size)
227 | step_size.div_(denom).clamp_(lower_bound, upper_bound).mul_(exp_avg)
228 |
229 | if group['weight_decay'] != 0:
230 | decayed_weights = torch.mul(p.data, group['weight_decay'])
231 | p.data.add_(-step_size)
232 | p.data.sub_(decayed_weights)
233 | else:
234 | p.data.add_(-step_size)
235 |
236 | return loss
237 |
--------------------------------------------------------------------------------
/utils/gcp.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # New VM
4 | rm -rf sample_data yolov3 darknet apex coco cocoapi knife knifec
5 | git clone https://github.com/ultralytics/yolov3
6 | # git clone https://github.com/AlexeyAB/darknet && cd darknet && make GPU=1 CUDNN=1 CUDNN_HALF=1 OPENCV=0 && wget -c https://pjreddie.com/media/files/darknet53.conv.74 && cd ..
7 | git clone https://github.com/NVIDIA/apex && cd apex && pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" . --user && cd .. && rm -rf apex
8 | # git clone https://github.com/cocodataset/cocoapi && cd cocoapi/PythonAPI && make && cd ../.. && cp -r cocoapi/PythonAPI/pycocotools yolov3
9 | sudo conda install -y -c conda-forge scikit-image tensorboard pycocotools
10 | python3 -c "
11 | from yolov3.utils.google_utils import gdrive_download
12 | gdrive_download('1HaXkef9z6y5l4vUnCYgdmEAj61c6bfWO','coco.zip')"
13 | sudo shutdown
14 |
15 | # Re-clone
16 | rm -rf yolov3 # Warning: remove existing
17 | git clone https://github.com/ultralytics/yolov3 && cd yolov3 # master
18 | # git clone -b test --depth 1 https://github.com/ultralytics/yolov3 test # branch
19 | python3 train.py --img-size 320 --weights weights/darknet53.conv.74 --epochs 27 --batch-size 64 --accumulate 1
20 |
21 | # Train
22 | python3 train.py
23 |
24 | # Resume
25 | python3 train.py --resume
26 |
27 | # Detect
28 | python3 detect.py
29 |
30 | # Test
31 | python3 test.py --save-json
32 |
33 | # Evolve
34 | for i in {0..500}
35 | do
36 | python3 train.py --data data/coco.data --img-size 320 --epochs 1 --batch-size 64 --accumulate 1 --evolve --bucket yolov4
37 | done
38 |
39 | # Git pull
40 | git pull https://github.com/ultralytics/yolov3 # master
41 | git pull https://github.com/ultralytics/yolov3 test # branch
42 |
43 | # Test Darknet training
44 | python3 test.py --weights ../darknet/backup/yolov3.backup
45 |
46 | # Copy last.pt TO bucket
47 | gsutil cp yolov3/weights/last1gpu.pt gs://ultralytics
48 |
49 | # Copy last.pt FROM bucket
50 | gsutil cp gs://ultralytics/last.pt yolov3/weights/last.pt
51 | wget https://storage.googleapis.com/ultralytics/yolov3/last_v1_0.pt -O weights/last_v1_0.pt
52 | wget https://storage.googleapis.com/ultralytics/yolov3/best_v1_0.pt -O weights/best_v1_0.pt
53 |
54 | # Reproduce tutorials
55 | rm results*.txt # WARNING: removes existing results
56 | python3 train.py --nosave --data data/coco_1img.data && mv results.txt results0r_1img.txt
57 | python3 train.py --nosave --data data/coco_10img.data && mv results.txt results0r_10img.txt
58 | python3 train.py --nosave --data data/coco_100img.data && mv results.txt results0r_100img.txt
59 | # python3 train.py --nosave --data data/coco_100img.data --transfer && mv results.txt results3_100imgTL.txt
60 | python3 -c "from utils import utils; utils.plot_results()"
61 | # gsutil cp results*.txt gs://ultralytics
62 | gsutil cp results.png gs://ultralytics
63 | sudo shutdown
64 |
65 | # Reproduce mAP
66 | python3 test.py --save-json --img-size 608
67 | python3 test.py --save-json --img-size 416
68 | python3 test.py --save-json --img-size 320
69 | sudo shutdown
70 |
71 | # Benchmark script
72 | git clone https://github.com/ultralytics/yolov3 # clone our repo
73 | git clone https://github.com/NVIDIA/apex && cd apex && pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" . --user && cd .. && rm -rf apex # install nvidia apex
74 | python3 -c "from yolov3.utils.google_utils import gdrive_download; gdrive_download('1HaXkef9z6y5l4vUnCYgdmEAj61c6bfWO','coco.zip')" # download coco dataset (20GB)
75 | cd yolov3 && clear && python3 train.py --epochs 1 # run benchmark (~30 min)
76 |
77 | # Unit tests
78 | python3 detect.py # detect 2 persons, 1 tie
79 | python3 test.py --data data/coco_32img.data # test mAP = 0.8
80 | python3 train.py --data data/coco_32img.data --epochs 5 --nosave # train 5 epochs
81 | python3 train.py --data data/coco_1cls.data --epochs 5 --nosave # train 5 epochs
82 | python3 train.py --data data/coco_1img.data --epochs 5 --nosave # train 5 epochs
83 |
84 | # AlexyAB Darknet
85 | gsutil cp -r gs://sm6/supermarket2 . # dataset from bucket
86 | rm -rf darknet && git clone https://github.com/AlexeyAB/darknet && cd darknet && wget -c https://pjreddie.com/media/files/darknet53.conv.74 # sudo apt install libopencv-dev && make
87 | ./darknet detector calc_anchors data/coco_img64.data -num_of_clusters 9 -width 320 -height 320 # kmeans anchor calculation
88 | ./darknet detector train ../supermarket2/supermarket2.data ../yolo_v3_spp_pan_scale.cfg darknet53.conv.74 -map -dont_show # train spp
89 | ./darknet detector train ../yolov3/data/coco.data ../yolov3-spp.cfg darknet53.conv.74 -map -dont_show # train spp coco
90 |
91 | ./darknet detector train data/coco.data ../yolov3-spp.cfg darknet53.conv.74 -map -dont_show # train spp
92 | gsutil cp -r backup/*5000.weights gs://sm6/weights
93 | sudo shutdown
94 |
95 |
96 | ./darknet detector train ../supermarket2/supermarket2.data ../yolov3-tiny-sm2-1cls.cfg yolov3-tiny.conv.15 -map -dont_show # train tiny
97 | ./darknet detector train ../supermarket2/supermarket2.data cfg/yolov3-spp-sm2-1cls.cfg backup/yolov3-spp-sm2-1cls_last.weights # resume
98 | python3 train.py --data ../supermarket2/supermarket2.data --cfg ../yolov3-spp-sm2-1cls.cfg --epochs 100 --num-workers 8 --img-size 320 --nosave # train ultralytics
99 | python3 test.py --data ../supermarket2/supermarket2.data --weights ../darknet/backup/yolov3-spp-sm2-1cls_5000.weights --cfg cfg/yolov3-spp-sm2-1cls.cfg # test
100 | gsutil cp -r backup/*.weights gs://sm6/weights # weights to bucket
101 |
102 | python3 test.py --data ../supermarket2/supermarket2.data --weights weights/yolov3-spp-sm2-1cls_5000.weights --cfg ../yolov3-spp-sm2-1cls.cfg --img-size 320 --conf-thres 0.2 # test
103 | python3 test.py --data ../supermarket2/supermarket2.data --weights weights/yolov3-spp-sm2-1cls-scalexy_125_5000.weights --cfg ../yolov3-spp-sm2-1cls-scalexy_125.cfg --img-size 320 --conf-thres 0.2 # test
104 | python3 test.py --data ../supermarket2/supermarket2.data --weights weights/yolov3-spp-sm2-1cls-scalexy_150_5000.weights --cfg ../yolov3-spp-sm2-1cls-scalexy_150.cfg --img-size 320 --conf-thres 0.2 # test
105 | python3 test.py --data ../supermarket2/supermarket2.data --weights weights/yolov3-spp-sm2-1cls-scalexy_200_5000.weights --cfg ../yolov3-spp-sm2-1cls-scalexy_200.cfg --img-size 320 --conf-thres 0.2 # test
106 | python3 test.py --data ../supermarket2/supermarket2.data --weights ../darknet/backup/yolov3-spp-sm2-1cls-scalexy_variable_5000.weights --cfg ../yolov3-spp-sm2-1cls-scalexy_variable.cfg --img-size 320 --conf-thres 0.2 # test
107 |
108 | python3 train.py --img-size 320 --epochs 27 --batch-size 64 --accumulate 1 --nosave --notest && python3 test.py --weights weights/last.pt --img-size 320 --save-json && sudo shutdown
109 |
110 | # Debug/Development
111 | python3 train.py --data data/coco.data --img-size 320 --single-scale --batch-size 64 --accumulate 1 --epochs 1 --evolve --giou
112 | python3 test.py --weights weights/last.pt --cfg cfg/yolov3-spp.cfg --img-size 320
113 |
114 | gsutil cp evolve.txt gs://ultralytics
115 | sudo shutdown
116 |
117 | #Docker
118 | sudo docker kill $(sudo docker ps -q)
119 | sudo docker pull ultralytics/yolov3:v1
120 | sudo nvidia-docker run -it --ipc=host --mount type=bind,source="$(pwd)"/coco,target=/usr/src/coco ultralytics/yolov3:v1
121 |
122 | clear
123 | while true
124 | do
125 | python3 train.py --data data/coco.data --img-size 320 --batch-size 64 --accumulate 1 --evolve --epochs 1 --adam --bucket yolov4/adamdefaultpw_coco_1e --device 1
126 | done
127 |
128 | python3 train.py --data data/coco.data --img-size 320 --batch-size 64 --accumulate 1 --epochs 1 --adam --device 1 --prebias
129 | while true; do python3 train.py --data data/coco.data --img-size 320 --batch-size 64 --accumulate 1 --evolve --epochs 1 --adam --bucket yolov4/adamdefaultpw_coco_1e; done
130 |
--------------------------------------------------------------------------------
/utils/google_utils.py:
--------------------------------------------------------------------------------
1 | # This file contains google utils: https://cloud.google.com/storage/docs/reference/libraries
2 | # pip install --upgrade google-cloud-storage
3 |
4 | import os
5 | import time
6 |
7 |
8 | # from google.cloud import storage
9 |
10 |
11 | def gdrive_download(id='1HaXkef9z6y5l4vUnCYgdmEAj61c6bfWO', name='coco.zip'):
12 | # https://gist.github.com/tanaikech/f0f2d122e05bf5f971611258c22c110f
13 | # Downloads a file from Google Drive, accepting presented query
14 | # from utils.google_utils import *; gdrive_download()
15 | t = time.time()
16 |
17 | print('Downloading https://drive.google.com/uc?export=download&id=%s as %s... ' % (id, name), end='')
18 | if os.path.exists(name): # remove existing
19 | os.remove(name)
20 |
21 | # Attempt large file download
22 | s = ["curl -c ./cookie -s -L \"https://drive.google.com/uc?export=download&id=%s\" > /dev/null" % id,
23 | "curl -Lb ./cookie -s \"https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=%s\" -o %s" % (
24 | id, name),
25 | 'rm ./cookie']
26 | [os.system(x) for x in s] # run commands
27 |
28 | # Attempt small file download
29 | if not os.path.exists(name): # file size < 40MB
30 | s = 'curl -f -L -o %s https://drive.google.com/uc?export=download&id=%s' % (name, id)
31 | os.system(s)
32 |
33 | # Unzip if archive
34 | if name.endswith('.zip'):
35 | print('unzipping... ', end='')
36 | os.system('unzip -q %s' % name) # unzip
37 | os.remove(name) # remove zip to free space
38 |
39 | print('Done (%.1fs)' % (time.time() - t))
40 |
41 |
42 | def upload_blob(bucket_name, source_file_name, destination_blob_name):
43 | # Uploads a file to a bucket
44 | # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python
45 |
46 | storage_client = storage.Client()
47 | bucket = storage_client.get_bucket(bucket_name)
48 | blob = bucket.blob(destination_blob_name)
49 |
50 | blob.upload_from_filename(source_file_name)
51 |
52 | print('File {} uploaded to {}.'.format(
53 | source_file_name,
54 | destination_blob_name))
55 |
56 |
57 | def download_blob(bucket_name, source_blob_name, destination_file_name):
58 | # Uploads a blob from a bucket
59 | storage_client = storage.Client()
60 | bucket = storage_client.get_bucket(bucket_name)
61 | blob = bucket.blob(source_blob_name)
62 |
63 | blob.download_to_filename(destination_file_name)
64 |
65 | print('Blob {} downloaded to {}.'.format(
66 | source_blob_name,
67 | destination_file_name))
68 |
--------------------------------------------------------------------------------
/utils/parse_config.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 |
4 | def parse_model_cfg(path):
5 | # Parses the yolo-v3 layer configuration file and returns module definitions
6 | file = open(path, 'r')
7 | lines = file.read().split('\n')
8 | lines = [x for x in lines if x and not x.startswith('#')]
9 | lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces
10 | mdefs = [] # module definitions
11 | for line in lines:
12 | if line.startswith('['): # This marks the start of a new block
13 | mdefs.append({})
14 | mdefs[-1]['type'] = line[1:-1].rstrip()
15 | if mdefs[-1]['type'] == 'convolutional':
16 | mdefs[-1]['batch_normalize'] = 0 # pre-populate with zeros (may be overwritten later)
17 | else:
18 | key, val = line.split("=")
19 | key = key.rstrip()
20 |
21 | if 'anchors' in key:
22 | mdefs[-1][key] = np.array([float(x) for x in val.split(',')]).reshape((-1, 2)) # np anchors
23 | else:
24 | mdefs[-1][key] = val.strip()
25 |
26 | return mdefs
27 |
28 |
29 | def parse_data_cfg(path):
30 | # Parses the data configuration file
31 | options = dict()
32 | with open(path, 'r') as fp:
33 | lines = fp.readlines()
34 |
35 | for line in lines:
36 | line = line.strip()
37 | if line == '' or line.startswith('#'):
38 | continue
39 | key, val = line.split('=')
40 | options[key.strip()] = val.strip()
41 |
42 | return options
43 |
44 |
--------------------------------------------------------------------------------
/utils/torch_utils.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import torch
4 |
5 |
6 | def init_seeds(seed=0):
7 | torch.manual_seed(seed)
8 | torch.cuda.manual_seed(seed)
9 | torch.cuda.manual_seed_all(seed)
10 |
11 | # Remove randomness (may be slower on Tesla GPUs) # https://pytorch.org/docs/stable/notes/randomness.html
12 | if seed == 0:
13 | torch.backends.cudnn.deterministic = True
14 | torch.backends.cudnn.benchmark = False
15 |
16 |
17 | def select_device(device='', apex=False):
18 | # device = 'cpu' or '0' or '0,1,2,3'
19 | cpu_request = device.lower() == 'cpu'
20 | if device and not cpu_request: # if device requested other than 'cpu'
21 | os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable
22 | assert torch.cuda.is_available(), 'CUDA unavailable, invalid device %s requested' % device # check availablity
23 |
24 | cuda = False if cpu_request else torch.cuda.is_available()
25 | if cuda:
26 | c = 1024 ** 2 # bytes to MB
27 | ng = torch.cuda.device_count()
28 | x = [torch.cuda.get_device_properties(i) for i in range(ng)]
29 | cuda_str = 'Using CUDA ' + ('Apex ' if apex else '') # apex for mixed precision https://github.com/NVIDIA/apex
30 | for i in range(0, ng):
31 | if i == 1:
32 | cuda_str = ' ' * len(cuda_str)
33 | print("%sdevice%g _CudaDeviceProperties(name='%s', total_memory=%dMB)" %
34 | (cuda_str, i, x[i].name, x[i].total_memory / c))
35 | else:
36 | print('Using CPU')
37 |
38 | print('') # skip a line
39 | return torch.device('cuda:0' if cuda else 'cpu')
40 |
41 |
42 | def fuse_conv_and_bn(conv, bn):
43 | # https://tehnokv.com/posts/fusing-batchnorm-and-conv/
44 | with torch.no_grad():
45 | # init
46 | fusedconv = torch.nn.Conv2d(conv.in_channels,
47 | conv.out_channels,
48 | kernel_size=conv.kernel_size,
49 | stride=conv.stride,
50 | padding=conv.padding,
51 | bias=True)
52 |
53 | # prepare filters
54 | w_conv = conv.weight.clone().view(conv.out_channels, -1)
55 | w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
56 | fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size()))
57 |
58 | # prepare spatial bias
59 | if conv.bias is not None:
60 | b_conv = conv.bias
61 | else:
62 | b_conv = torch.zeros(conv.weight.size(0))
63 | b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps))
64 | fusedconv.bias.copy_(b_conv + b_bn)
65 |
66 | return fusedconv
67 |
68 |
69 | def model_info(model, report='summary'):
70 | # Plots a line-by-line description of a PyTorch model
71 | n_p = sum(x.numel() for x in model.parameters()) # number parameters
72 | n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients
73 | if report is 'full':
74 | print('%5s %40s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma'))
75 | for i, (name, p) in enumerate(model.named_parameters()):
76 | name = name.replace('module_list.', '')
77 | print('%5g %40s %9s %12g %20s %10.3g %10.3g' %
78 | (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std()))
79 | print('Model Summary: %g layers, %g parameters, %g gradients' % (len(list(model.parameters())), n_p, n_g))
80 |
--------------------------------------------------------------------------------
/weights/download_yolov3_weights.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # make '/weights' directory if it does not exist and cd into it
4 | mkdir -p weights && cd weights
5 |
6 | # copy darknet weight files, continue '-c' if partially downloaded
7 | wget -c https://pjreddie.com/media/files/yolov3.weights
8 | wget -c https://pjreddie.com/media/files/yolov3-tiny.weights
9 | wget -c https://pjreddie.com/media/files/yolov3-spp.weights
10 |
11 | # yolov3 pytorch weights
12 | # download from Google Drive: https://drive.google.com/drive/folders/1uxgUBemJVw9wZsdpboYbzUN4bcRhsuAI
13 |
14 | # darknet53 weights (first 75 layers only)
15 | wget -c https://pjreddie.com/media/files/darknet53.conv.74
16 |
17 | # yolov3-tiny weights from darknet (first 16 layers only)
18 | # ./darknet partial cfg/yolov3-tiny.cfg yolov3-tiny.weights yolov3-tiny.conv.15 15
19 | # mv yolov3-tiny.conv.15 ../
20 |
21 |
--------------------------------------------------------------------------------