├── Dockerfile
├── README.md
├── cfg
├── oxfordhand.data
├── yolov3-1cls.cfg
├── yolov3-hand.cfg
├── yolov3-quantize-hand.cfg
├── yolov3-spp-1cls.cfg
├── yolov3-spp-pan-scale.cfg
├── yolov3-spp.cfg
├── yolov3-tiny-1cls.cfg
├── yolov3-tiny-hand.cfg
├── yolov3-tiny.cfg
├── yolov3.cfg
├── yolov3s-18a320.cfg
├── yolov3s-30a320.cfg
├── yolov3s-3a320.cfg
└── yolov3s-9a320.cfg
├── data
├── 5k.shapes
├── 5k.txt
├── coco.data
├── coco.names
├── coco_1000img.data
├── coco_1000img.txt
├── coco_1000val.data
├── coco_1000val.txt
├── coco_16img.data
├── coco_16img.txt
├── coco_1cls.data
├── coco_1cls.txt
├── coco_1img.data
├── coco_1img.txt
├── coco_1k5k.data
├── coco_32img.data
├── coco_32img.txt
├── coco_500img.txt
├── coco_500val.data
├── coco_500val.txt
├── coco_64img.data
├── coco_64img.txt
├── coco_paper.names
├── converter.py
├── get_coco_dataset.sh
├── get_coco_dataset_gdrive.sh
├── oxfordhand.data
├── oxfordhand.names
├── samples
│ ├── bus.jpg
│ └── zidane.jpg
└── trainvalno5k.shapes
├── detect.py
├── examples.ipynb
├── github_files
└── 64067835-51d5b500-cc2f-11e9-982e-843f7f9a6ea2.jpg
├── models.py
├── normal_prune.py
├── prune_tiny_yolo.py
├── quant_dorefa.py
├── regular_prune.py
├── requirements.txt
├── shortcut_prune.py
├── test.py
├── train.py
├── utils
├── __init__.py
├── __pycache__
│ ├── __init__.cpython-37.pyc
│ ├── datasets.cpython-37.pyc
│ ├── google_utils.cpython-37.pyc
│ ├── parse_config.cpython-37.pyc
│ ├── prune_utils.cpython-37.pyc
│ ├── torch_utils.cpython-37.pyc
│ └── utils.cpython-37.pyc
├── adabound.py
├── datasets.py
├── gcp.sh
├── google_utils.py
├── parse_config.py
├── prune_utils.py
├── tiny_prune_utils.py
├── torch_utils.py
└── utils.py
└── weights
└── download_yolov3_weights.sh
/Dockerfile:
--------------------------------------------------------------------------------
1 | # Start from Nvidia PyTorch image https://ngc.nvidia.com/catalog/containers/nvidia:pytorch
2 | FROM nvcr.io/nvidia/pytorch:19.08-py3
3 |
4 | # Install dependencies (pip or conda)
5 | RUN pip install -U gsutil
6 | # RUN pip install -U -r requirements.txt
7 | # RUN conda update -n base -c defaults conda
8 | # RUN conda install -y -c anaconda future numpy opencv matplotlib tqdm pillow
9 | # RUN conda install -y -c conda-forge scikit-image tensorboard pycocotools
10 |
11 | ## Install OpenCV with Gstreamer support
12 | #WORKDIR /usr/src
13 | #RUN pip uninstall -y opencv-python
14 | #RUN apt-get update
15 | #RUN apt-get install -y gstreamer1.0-tools gstreamer1.0-python3-dbg-plugin-loader libgstreamer1.0-dev libgstreamer-plugins-base1.0-dev
16 | #RUN git clone https://github.com/opencv/opencv.git && cd opencv && git checkout 4.1.1 && mkdir build
17 | #RUN git clone https://github.com/opencv/opencv_contrib.git && cd opencv_contrib && git checkout 4.1.1
18 | #RUN cd opencv/build && cmake ../ \
19 | # -D OPENCV_EXTRA_MODULES_PATH=../../opencv_contrib/modules \
20 | # -D BUILD_OPENCV_PYTHON3=ON \
21 | # -D PYTHON3_EXECUTABLE=/opt/conda/bin/python \
22 | # -D PYTHON3_INCLUDE_PATH=/opt/conda/include/python3.6m \
23 | # -D PYTHON3_LIBRARIES=/opt/conda/lib/python3.6/site-packages \
24 | # -D WITH_GSTREAMER=ON \
25 | # -D WITH_FFMPEG=OFF \
26 | # && make && make install && ldconfig
27 | #RUN cd /usr/local/lib/python3.6/site-packages/cv2/python-3.6/ && mv cv2.cpython-36m-x86_64-linux-gnu.so cv2.so
28 | #RUN cd /opt/conda/lib/python3.6/site-packages/ && ln -s /usr/local/lib/python3.6/site-packages/cv2/python-3.6/cv2.so cv2.so
29 | #RUN python3 -c "import cv2; print(cv2.getBuildInformation())"
30 |
31 | # Create working directory
32 | RUN mkdir -p /usr/src/app
33 | WORKDIR /usr/src/app
34 |
35 | # Copy contents
36 | COPY . /usr/src/app
37 |
38 | # Copy weights
39 | #RUN python3 -c "from utils.google_utils import *; \
40 | # gdrive_download(id='18xqvs_uwAqfTXp-LJCYLYNHBOcrwbrp0', name='weights/darknet53.conv.74'); \
41 | # gdrive_download(id='1oPCHKsM2JpM-zgyepQciGli9X0MTsJCO', name='weights/yolov3-spp.weights'); \
42 | # gdrive_download(id='1vFlbJ_dXPvtwaLLOu-twnjK4exdFiQ73', name='weights/yolov3-spp.pt)"
43 |
44 |
45 | # --------------------------------------------------- Extras Below ---------------------------------------------------
46 |
47 | # Build
48 | # rm -rf yolov3 # Warning: remove existing
49 | # git clone https://github.com/ultralytics/yolov3 && cd yolov3 && python3 detect.py
50 | # sudo docker image prune -af && sudo docker build -t ultralytics/yolov3:v0 .
51 |
52 | # Run
53 | # sudo nvidia-docker run --ipc=host ultralytics/yolov3:v0 python3 detect.py
54 |
55 | # Run with local directory access
56 | # sudo nvidia-docker run --ipc=host --mount type=bind,source="$(pwd)"/coco,target=/usr/src/coco ultralytics/yolov3:v0 python3 train.py
57 |
58 | # Pull and Run with local directory access
59 | # export tag=ultralytics/yolov3:v0 && sudo docker pull $tag && sudo nvidia-docker run --ipc=host --mount type=bind,source="$(pwd)"/coco,target=/usr/src/coco $tag python3 train.py
60 |
61 | # Build and Push
62 | # export tag=ultralytics/yolov3:v0 && sudo docker build -t $tag . && docker push $tag
63 |
64 | # Kill all
65 | # sudo docker kill $(sudo docker ps -q)
66 |
67 | # Run bash for loop
68 | # sudo nvidia-docker run --ipc=host ultralytics/yolov3:v0 while true; do python3 train.py --evolve; done
69 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # YOLOv3-complete-pruning
2 |
3 | 本项目以[ultralytics/yolov3](https://github.com/ultralytics/yolov3)为YOLOv3的Pytorch实现,并在[YOLOv3-model-pruning](https://github.com/Lam1360/YOLOv3-model-pruning)剪枝的基础上,推出了4个YOLO-v3剪枝版本。(在此致谢两位)
4 |
5 | 此外,最近还更新了YOLO的1bit、4bit、8bit、16bit量化。
6 |
7 | |剪枝方式|
优点|缺点 |
8 | | --- | --- | --- |
9 | | 正常剪枝 |不对shortcut剪枝,拥有可观且稳定的压缩率,无需微调。 |压缩率达不到极致。 |
10 | | 极限剪枝 |极高的压缩率。 |需要微调。 |
11 | | 规整剪枝 |专为硬件部署设计,剪枝后filter个数均为8的倍数,无需微调。 | 为规整牺牲了部分压缩率。 |
12 | | Tiny剪枝 |稳定的压缩率。 |由于Tiny本来已很小,压缩率中规中矩。 |
13 |
14 | ## 项目特点
15 |
16 | 1.采用的YOLO-v3实现较为准确,mAP相对较高。
17 |
18 | 模型 | 320 | 416 | 608
19 | --- | --- | --- | ---
20 | `YOLOv3` | 51.8 (51.5) | 55.4 (55.3) | 58.2 (57.9)
21 | `YOLOv3-tiny` | 29.0 | 32.9 (33.1) | 35.5
22 |
23 | 2.提供对YOLOv3及Tiny的多种剪枝版本、量化版本,以适应不同的需求。
24 |
25 | 3.剪枝后保存为.weights格式,可在任何框架下继续训练、推理,或以图像视频展示。
26 |
27 |
28 |
29 | 4.目前支持情况
30 |
31 | |剪枝方式|单卡|多卡|
32 | | --- | --- | --- |
33 | |正常训练|√|√|
34 | |稀疏化|√|√ |
35 | |正常剪枝|√|√|
36 | |规整剪枝 | √ |√ |
37 | |极限剪枝(shortcut) | √ | √ |
38 | |Tiny剪枝|√|√ |
39 |
40 | |二值量化|8bit量化|16bit量化|混合量化|任意bit量化|
41 | | --- | --- | --- | --- | --- |
42 | |√|√|√|√|√|
43 |
44 |
45 | ## 最新进展
46 |
47 | - 2019年12月26日:支持任意bit量化。
48 | - 2019年11月6日:极限剪枝已支持无需微调。
49 | - 非常感谢github大佬[tanluren](https://github.com/tanluren),对该项目指出的众多问题和支持,实在太强了^_^。
50 |
51 |
52 | ## 环境搭建
53 |
54 | 1.由于采用[ultralytics/yolov3](https://github.com/ultralytics/yolov3)的YOLO实现,环境搭建见[ultralytics/yolov3](https://github.com/ultralytics/yolov3)。这里重复介绍一下:
55 |
56 | - `numpy`
57 | - `torch >= 1.1.0`
58 | - `opencv-python`
59 | - `tqdm`
60 |
61 | 可直接`pip3 install -U -r requirements.txt`搭建环境,或根据该.txt文件使用conda搭建。
62 |
63 | ## 数据获取
64 |
65 | 依然采用oxford hand数据集
66 |
67 |
68 |
69 |
70 | 1.下载[数据集](http://www.robots.ox.ac.uk/~vgg/data/hands/downloads/hand_dataset.tar.gz),并解压至/data目录下,得到hand_dataset文件夹。
71 |
72 | 2.执行`python converter.py` ,生成 images、labels 文件夹和 train.txt、valid.txt 文件。
73 |
74 | 3.获取YOLO预训练权重,/weights文件夹下执行`bash download_yolov3_weights.sh`,或自行下载。
75 |
76 | 4.至此,数据部分完成。
77 |
78 | ## 剪枝篇
79 |
80 | 1.正常训练
81 |
82 | ```bash
83 | python3 train.py --data data/oxfordhand.data --batch-size 32 --accumulate 1 --weights weights/yolov3.weights --cfg cfg/yolov3-hand.cfg
84 | ```
85 |
86 | 2.稀疏化训练
87 |
88 | `-sr`开启稀疏化,`--s`指定稀疏因子大小,`--prune`指定稀疏类型。
89 |
90 | 其中:
91 |
92 | `--prune 0`为正常剪枝和规整剪枝的稀疏化
93 |
94 | `--prune 1`为极限剪枝的稀疏化
95 |
96 | `--prune 2`为Tiny剪枝的稀疏化
97 |
98 | ```bash
99 | python3 train.py --data data/oxfordhand.data --batch-size 32 --accumulate 1 --weights weights/yolov3.weights --cfg cfg/yolov3-hand.cfg -sr --s 0.001 --prune 0
100 | ```
101 |
102 | 3.模型剪枝
103 |
104 | - 正常剪枝
105 | ```bash
106 | python3 normal_prune.py
107 | ```
108 | - 规整剪枝
109 | ```bash
110 | python3 regular_prune.py
111 | ```
112 | - 极限剪枝
113 | ```bash
114 | python3 shortcut_prune.py
115 | ```
116 | - Tiny剪枝
117 | ```bash
118 | python3 prune_tiny_yolo.py
119 | ```
120 | 需要注意的是,这里需要在.py文件内,将opt内的cfg和weights变量指向第2步稀疏化后生成的cfg文件和weights文件。
121 | 此外,可通过增大代码中percent的值来获得更大的压缩率。(若稀疏化不到位,且percent值过大,程序会报错。)
122 |
123 | ## 量化篇
124 |
125 | 1 指定需要量化的层
126 |
127 | 打开任意一个可用的配置文件,例如yolov3-hand.cfg。将需要量化的层,从原来的convolutional替换为quantize_convolutional。
128 |
129 | 2 指定量化方式
130 |
131 | 通过修改models.py中的W_bit和A_bit,指定权重的量化方式,激活的量化方式。(目前默认为16bit量化)
132 |
133 | 3 量化训练
134 |
135 |
136 | ```bash
137 | python3 train.py --data data/oxfordhand.data --batch-size 32 --accumulate 1 --weights weights/yolov3.weights --cfg cfg/yolov3-quantize-hand.cfg
138 | ```
139 |
140 | 与正常训练相同,只是cfg要指向修改过的cfg文件。
141 |
142 | ## 推理展示
143 |
144 | 这里,我们不仅可以使用原始的YOLOV3用来推理展示,还可使用我们剪枝后的模型来推理展示。(修改cfg,weights的指向即可)
145 |
146 |
147 |
148 | ```bash
149 | python3 detect.py --source ...
150 | ```
151 |
152 | - Image: `--source file.jpg`
153 | - Video: `--source file.mp4`
154 | - Directory: `--source dir/`
155 | - Webcam: `--source 0`
156 | - RTSP stream: `--source rtsp://170.93.143.139/rtplive/470011e600ef003a004ee33696235daa`
157 | - HTTP stream: `--source http://wmccpinetop.axiscam.net/mjpg/video.mjpg`
158 |
159 |
160 |
161 | 例如:
162 | ```bash
163 | python3 detect.py --cfg cfg/prune_0.8_yolov3-hand.cfg --weights weights/yolov3_hand_pruning_percent0.8.weights --data data/oxfordhand.data --source test.jpg
164 | ```
165 |
166 | ## 剪枝效果
167 |
168 | 以下数据除极限剪枝外,均未微调。
169 |
170 | ### YOLO-v3剪枝
171 |
172 | | 模型 | 参数量 |模型体积 |压缩率 |耗时 |mAP |
173 | | --- | --- | --- | --- | --- | --- |
174 | | Baseline(416)| 61.5M |246.4MB |0% |11.7ms |0.7924 |
175 | | 正常剪枝 | 10.9M |43.9MB |82.2% |5.92ms |0.7712 |
176 | | 规整剪枝 | 15.31M |61.4MB |75.1% |6.01ms |0.7832 |
177 | | 极限剪枝 | 7.13M |28.6MB |88.4% |5.90ms |0.7382 |
178 |
179 | ### YOLO-v3-Tiny剪枝
180 |
181 | | 模型 |参数量 | 模型体积 | 压缩率| 耗时 | mAP |
182 | | --- | --- | --- | --- | --- | --- |
183 | | Baseline(416) | 8.7M | 33.1MB | 0% | 2.2ms | 0.6378 |
184 | | Tiny剪枝 | 4.4M | 16.8MB | 40.1% | 2.0ms| 0.6132 |
185 |
186 | ## 量化效果(数据更新中)
187 |
188 | 以下量化数据中,权重激活均被量化。
189 |
190 | ### YOLO-v3量化
191 |
192 | | 模型 |mAP |
193 | | --- | --- |
194 | | Baseline(416)|0.8246 |
195 | | 第一层float32+中间层8bit+最后一层float32 |0.8174 |
196 | | 全16bit量化 |0.8132 |
197 | | 全8bit量化 | 0.8024 |
198 |
199 |
200 |
201 | ## 核心思想
202 |
203 | 剪枝方法来源于论文[Learning Efficient Convolutional Networks through Network Slimming](https://arxiv.org/abs/1708.06519),剪枝无需微调方法来源于[Rethinking the Smaller-Norm-Less-Informative Assumption in Channel Pruning of Convolution Layers](https://arxiv.org/abs/1802.00124?context=cs)。
204 |
205 |
206 | 量化方法来源于论文[DoReFa-Net: Training Low Bitwidth Convolutional Neural Networks with Low Bitwidth Gradients](https://arxiv.org/abs/1606.06160)。
207 |
208 | 此外,具体实现时,在论文作者的基础上做了改进。
209 |
210 |
211 | ## 互动
212 |
213 | ### 1.如何获得较高的压缩率?
214 |
215 | 提高压缩率的关键在于稀疏化训练,可以加大`--s`的值并迭代训练多次等手段。
216 |
217 | ### 2.我的压缩率比表格中更高!
218 |
219 | 以上数据仅仅是测试了不到20次的结果,如果有同学的压缩率更高,欢迎在评论区分享!
220 |
221 | ### 3.程序报错怎么办?
222 |
223 | #### YOLOv3报错
224 | 由于采用了[ultralytics/yolov3](https://github.com/ultralytics/yolov3)为YOLOv3的Pytorch实现,因此这类错误可跳转至此链接询问。
225 |
226 | #### 剪枝量化错误
227 |
228 | 一定要在本评论区留言,我会尽快修正!
229 |
230 |
231 |
232 |
--------------------------------------------------------------------------------
/cfg/oxfordhand.data:
--------------------------------------------------------------------------------
1 | classes= 1
2 | train=data/train.txt
3 | valid=data/valid.txt
4 | names=data/oxfordhand.names
5 |
--------------------------------------------------------------------------------
/cfg/yolov3-1cls.cfg:
--------------------------------------------------------------------------------
1 | [net]
2 | # Testing
3 | #batch=1
4 | #subdivisions=1
5 | # Training
6 | batch=16
7 | subdivisions=1
8 | width=416
9 | height=416
10 | channels=3
11 | momentum=0.9
12 | decay=0.0005
13 | angle=0
14 | saturation = 1.5
15 | exposure = 1.5
16 | hue=.1
17 |
18 | learning_rate=0.001
19 | burn_in=1000
20 | max_batches = 500200
21 | policy=steps
22 | steps=400000,450000
23 | scales=.1,.1
24 |
25 | [convolutional]
26 | batch_normalize=1
27 | filters=32
28 | size=3
29 | stride=1
30 | pad=1
31 | activation=leaky
32 |
33 | # Downsample
34 |
35 | [convolutional]
36 | batch_normalize=1
37 | filters=64
38 | size=3
39 | stride=2
40 | pad=1
41 | activation=leaky
42 |
43 | [convolutional]
44 | batch_normalize=1
45 | filters=32
46 | size=1
47 | stride=1
48 | pad=1
49 | activation=leaky
50 |
51 | [convolutional]
52 | batch_normalize=1
53 | filters=64
54 | size=3
55 | stride=1
56 | pad=1
57 | activation=leaky
58 |
59 | [shortcut]
60 | from=-3
61 | activation=linear
62 |
63 | # Downsample
64 |
65 | [convolutional]
66 | batch_normalize=1
67 | filters=128
68 | size=3
69 | stride=2
70 | pad=1
71 | activation=leaky
72 |
73 | [convolutional]
74 | batch_normalize=1
75 | filters=64
76 | size=1
77 | stride=1
78 | pad=1
79 | activation=leaky
80 |
81 | [convolutional]
82 | batch_normalize=1
83 | filters=128
84 | size=3
85 | stride=1
86 | pad=1
87 | activation=leaky
88 |
89 | [shortcut]
90 | from=-3
91 | activation=linear
92 |
93 | [convolutional]
94 | batch_normalize=1
95 | filters=64
96 | size=1
97 | stride=1
98 | pad=1
99 | activation=leaky
100 |
101 | [convolutional]
102 | batch_normalize=1
103 | filters=128
104 | size=3
105 | stride=1
106 | pad=1
107 | activation=leaky
108 |
109 | [shortcut]
110 | from=-3
111 | activation=linear
112 |
113 | # Downsample
114 |
115 | [convolutional]
116 | batch_normalize=1
117 | filters=256
118 | size=3
119 | stride=2
120 | pad=1
121 | activation=leaky
122 |
123 | [convolutional]
124 | batch_normalize=1
125 | filters=128
126 | size=1
127 | stride=1
128 | pad=1
129 | activation=leaky
130 |
131 | [convolutional]
132 | batch_normalize=1
133 | filters=256
134 | size=3
135 | stride=1
136 | pad=1
137 | activation=leaky
138 |
139 | [shortcut]
140 | from=-3
141 | activation=linear
142 |
143 | [convolutional]
144 | batch_normalize=1
145 | filters=128
146 | size=1
147 | stride=1
148 | pad=1
149 | activation=leaky
150 |
151 | [convolutional]
152 | batch_normalize=1
153 | filters=256
154 | size=3
155 | stride=1
156 | pad=1
157 | activation=leaky
158 |
159 | [shortcut]
160 | from=-3
161 | activation=linear
162 |
163 | [convolutional]
164 | batch_normalize=1
165 | filters=128
166 | size=1
167 | stride=1
168 | pad=1
169 | activation=leaky
170 |
171 | [convolutional]
172 | batch_normalize=1
173 | filters=256
174 | size=3
175 | stride=1
176 | pad=1
177 | activation=leaky
178 |
179 | [shortcut]
180 | from=-3
181 | activation=linear
182 |
183 | [convolutional]
184 | batch_normalize=1
185 | filters=128
186 | size=1
187 | stride=1
188 | pad=1
189 | activation=leaky
190 |
191 | [convolutional]
192 | batch_normalize=1
193 | filters=256
194 | size=3
195 | stride=1
196 | pad=1
197 | activation=leaky
198 |
199 | [shortcut]
200 | from=-3
201 | activation=linear
202 |
203 |
204 | [convolutional]
205 | batch_normalize=1
206 | filters=128
207 | size=1
208 | stride=1
209 | pad=1
210 | activation=leaky
211 |
212 | [convolutional]
213 | batch_normalize=1
214 | filters=256
215 | size=3
216 | stride=1
217 | pad=1
218 | activation=leaky
219 |
220 | [shortcut]
221 | from=-3
222 | activation=linear
223 |
224 | [convolutional]
225 | batch_normalize=1
226 | filters=128
227 | size=1
228 | stride=1
229 | pad=1
230 | activation=leaky
231 |
232 | [convolutional]
233 | batch_normalize=1
234 | filters=256
235 | size=3
236 | stride=1
237 | pad=1
238 | activation=leaky
239 |
240 | [shortcut]
241 | from=-3
242 | activation=linear
243 |
244 | [convolutional]
245 | batch_normalize=1
246 | filters=128
247 | size=1
248 | stride=1
249 | pad=1
250 | activation=leaky
251 |
252 | [convolutional]
253 | batch_normalize=1
254 | filters=256
255 | size=3
256 | stride=1
257 | pad=1
258 | activation=leaky
259 |
260 | [shortcut]
261 | from=-3
262 | activation=linear
263 |
264 | [convolutional]
265 | batch_normalize=1
266 | filters=128
267 | size=1
268 | stride=1
269 | pad=1
270 | activation=leaky
271 |
272 | [convolutional]
273 | batch_normalize=1
274 | filters=256
275 | size=3
276 | stride=1
277 | pad=1
278 | activation=leaky
279 |
280 | [shortcut]
281 | from=-3
282 | activation=linear
283 |
284 | # Downsample
285 |
286 | [convolutional]
287 | batch_normalize=1
288 | filters=512
289 | size=3
290 | stride=2
291 | pad=1
292 | activation=leaky
293 |
294 | [convolutional]
295 | batch_normalize=1
296 | filters=256
297 | size=1
298 | stride=1
299 | pad=1
300 | activation=leaky
301 |
302 | [convolutional]
303 | batch_normalize=1
304 | filters=512
305 | size=3
306 | stride=1
307 | pad=1
308 | activation=leaky
309 |
310 | [shortcut]
311 | from=-3
312 | activation=linear
313 |
314 |
315 | [convolutional]
316 | batch_normalize=1
317 | filters=256
318 | size=1
319 | stride=1
320 | pad=1
321 | activation=leaky
322 |
323 | [convolutional]
324 | batch_normalize=1
325 | filters=512
326 | size=3
327 | stride=1
328 | pad=1
329 | activation=leaky
330 |
331 | [shortcut]
332 | from=-3
333 | activation=linear
334 |
335 |
336 | [convolutional]
337 | batch_normalize=1
338 | filters=256
339 | size=1
340 | stride=1
341 | pad=1
342 | activation=leaky
343 |
344 | [convolutional]
345 | batch_normalize=1
346 | filters=512
347 | size=3
348 | stride=1
349 | pad=1
350 | activation=leaky
351 |
352 | [shortcut]
353 | from=-3
354 | activation=linear
355 |
356 |
357 | [convolutional]
358 | batch_normalize=1
359 | filters=256
360 | size=1
361 | stride=1
362 | pad=1
363 | activation=leaky
364 |
365 | [convolutional]
366 | batch_normalize=1
367 | filters=512
368 | size=3
369 | stride=1
370 | pad=1
371 | activation=leaky
372 |
373 | [shortcut]
374 | from=-3
375 | activation=linear
376 |
377 | [convolutional]
378 | batch_normalize=1
379 | filters=256
380 | size=1
381 | stride=1
382 | pad=1
383 | activation=leaky
384 |
385 | [convolutional]
386 | batch_normalize=1
387 | filters=512
388 | size=3
389 | stride=1
390 | pad=1
391 | activation=leaky
392 |
393 | [shortcut]
394 | from=-3
395 | activation=linear
396 |
397 |
398 | [convolutional]
399 | batch_normalize=1
400 | filters=256
401 | size=1
402 | stride=1
403 | pad=1
404 | activation=leaky
405 |
406 | [convolutional]
407 | batch_normalize=1
408 | filters=512
409 | size=3
410 | stride=1
411 | pad=1
412 | activation=leaky
413 |
414 | [shortcut]
415 | from=-3
416 | activation=linear
417 |
418 |
419 | [convolutional]
420 | batch_normalize=1
421 | filters=256
422 | size=1
423 | stride=1
424 | pad=1
425 | activation=leaky
426 |
427 | [convolutional]
428 | batch_normalize=1
429 | filters=512
430 | size=3
431 | stride=1
432 | pad=1
433 | activation=leaky
434 |
435 | [shortcut]
436 | from=-3
437 | activation=linear
438 |
439 | [convolutional]
440 | batch_normalize=1
441 | filters=256
442 | size=1
443 | stride=1
444 | pad=1
445 | activation=leaky
446 |
447 | [convolutional]
448 | batch_normalize=1
449 | filters=512
450 | size=3
451 | stride=1
452 | pad=1
453 | activation=leaky
454 |
455 | [shortcut]
456 | from=-3
457 | activation=linear
458 |
459 | # Downsample
460 |
461 | [convolutional]
462 | batch_normalize=1
463 | filters=1024
464 | size=3
465 | stride=2
466 | pad=1
467 | activation=leaky
468 |
469 | [convolutional]
470 | batch_normalize=1
471 | filters=512
472 | size=1
473 | stride=1
474 | pad=1
475 | activation=leaky
476 |
477 | [convolutional]
478 | batch_normalize=1
479 | filters=1024
480 | size=3
481 | stride=1
482 | pad=1
483 | activation=leaky
484 |
485 | [shortcut]
486 | from=-3
487 | activation=linear
488 |
489 | [convolutional]
490 | batch_normalize=1
491 | filters=512
492 | size=1
493 | stride=1
494 | pad=1
495 | activation=leaky
496 |
497 | [convolutional]
498 | batch_normalize=1
499 | filters=1024
500 | size=3
501 | stride=1
502 | pad=1
503 | activation=leaky
504 |
505 | [shortcut]
506 | from=-3
507 | activation=linear
508 |
509 | [convolutional]
510 | batch_normalize=1
511 | filters=512
512 | size=1
513 | stride=1
514 | pad=1
515 | activation=leaky
516 |
517 | [convolutional]
518 | batch_normalize=1
519 | filters=1024
520 | size=3
521 | stride=1
522 | pad=1
523 | activation=leaky
524 |
525 | [shortcut]
526 | from=-3
527 | activation=linear
528 |
529 | [convolutional]
530 | batch_normalize=1
531 | filters=512
532 | size=1
533 | stride=1
534 | pad=1
535 | activation=leaky
536 |
537 | [convolutional]
538 | batch_normalize=1
539 | filters=1024
540 | size=3
541 | stride=1
542 | pad=1
543 | activation=leaky
544 |
545 | [shortcut]
546 | from=-3
547 | activation=linear
548 |
549 | ######################
550 |
551 | [convolutional]
552 | batch_normalize=1
553 | filters=512
554 | size=1
555 | stride=1
556 | pad=1
557 | activation=leaky
558 |
559 | [convolutional]
560 | batch_normalize=1
561 | size=3
562 | stride=1
563 | pad=1
564 | filters=1024
565 | activation=leaky
566 |
567 | [convolutional]
568 | batch_normalize=1
569 | filters=512
570 | size=1
571 | stride=1
572 | pad=1
573 | activation=leaky
574 |
575 | [convolutional]
576 | batch_normalize=1
577 | size=3
578 | stride=1
579 | pad=1
580 | filters=1024
581 | activation=leaky
582 |
583 | [convolutional]
584 | batch_normalize=1
585 | filters=512
586 | size=1
587 | stride=1
588 | pad=1
589 | activation=leaky
590 |
591 | [convolutional]
592 | batch_normalize=1
593 | size=3
594 | stride=1
595 | pad=1
596 | filters=1024
597 | activation=leaky
598 |
599 | [convolutional]
600 | size=1
601 | stride=1
602 | pad=1
603 | filters=18
604 | activation=linear
605 |
606 |
607 | [yolo]
608 | mask = 6,7,8
609 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
610 | classes=1
611 | num=9
612 | jitter=.3
613 | ignore_thresh = .7
614 | truth_thresh = 1
615 | random=1
616 |
617 |
618 | [route]
619 | layers = -4
620 |
621 | [convolutional]
622 | batch_normalize=1
623 | filters=256
624 | size=1
625 | stride=1
626 | pad=1
627 | activation=leaky
628 |
629 | [upsample]
630 | stride=2
631 |
632 | [route]
633 | layers = -1, 61
634 |
635 |
636 |
637 | [convolutional]
638 | batch_normalize=1
639 | filters=256
640 | size=1
641 | stride=1
642 | pad=1
643 | activation=leaky
644 |
645 | [convolutional]
646 | batch_normalize=1
647 | size=3
648 | stride=1
649 | pad=1
650 | filters=512
651 | activation=leaky
652 |
653 | [convolutional]
654 | batch_normalize=1
655 | filters=256
656 | size=1
657 | stride=1
658 | pad=1
659 | activation=leaky
660 |
661 | [convolutional]
662 | batch_normalize=1
663 | size=3
664 | stride=1
665 | pad=1
666 | filters=512
667 | activation=leaky
668 |
669 | [convolutional]
670 | batch_normalize=1
671 | filters=256
672 | size=1
673 | stride=1
674 | pad=1
675 | activation=leaky
676 |
677 | [convolutional]
678 | batch_normalize=1
679 | size=3
680 | stride=1
681 | pad=1
682 | filters=512
683 | activation=leaky
684 |
685 | [convolutional]
686 | size=1
687 | stride=1
688 | pad=1
689 | filters=18
690 | activation=linear
691 |
692 |
693 | [yolo]
694 | mask = 3,4,5
695 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
696 | classes=1
697 | num=9
698 | jitter=.3
699 | ignore_thresh = .7
700 | truth_thresh = 1
701 | random=1
702 |
703 |
704 |
705 | [route]
706 | layers = -4
707 |
708 | [convolutional]
709 | batch_normalize=1
710 | filters=128
711 | size=1
712 | stride=1
713 | pad=1
714 | activation=leaky
715 |
716 | [upsample]
717 | stride=2
718 |
719 | [route]
720 | layers = -1, 36
721 |
722 |
723 |
724 | [convolutional]
725 | batch_normalize=1
726 | filters=128
727 | size=1
728 | stride=1
729 | pad=1
730 | activation=leaky
731 |
732 | [convolutional]
733 | batch_normalize=1
734 | size=3
735 | stride=1
736 | pad=1
737 | filters=256
738 | activation=leaky
739 |
740 | [convolutional]
741 | batch_normalize=1
742 | filters=128
743 | size=1
744 | stride=1
745 | pad=1
746 | activation=leaky
747 |
748 | [convolutional]
749 | batch_normalize=1
750 | size=3
751 | stride=1
752 | pad=1
753 | filters=256
754 | activation=leaky
755 |
756 | [convolutional]
757 | batch_normalize=1
758 | filters=128
759 | size=1
760 | stride=1
761 | pad=1
762 | activation=leaky
763 |
764 | [convolutional]
765 | batch_normalize=1
766 | size=3
767 | stride=1
768 | pad=1
769 | filters=256
770 | activation=leaky
771 |
772 | [convolutional]
773 | size=1
774 | stride=1
775 | pad=1
776 | filters=18
777 | activation=linear
778 |
779 |
780 | [yolo]
781 | mask = 0,1,2
782 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
783 | classes=1
784 | num=9
785 | jitter=.3
786 | ignore_thresh = .7
787 | truth_thresh = 1
788 | random=1
789 |
--------------------------------------------------------------------------------
/cfg/yolov3-hand.cfg:
--------------------------------------------------------------------------------
1 |
2 | [net]
3 | # Testing
4 | #batch=1
5 | #subdivisions=1
6 | # Training
7 | batch=16
8 | subdivisions=1
9 | width=416
10 | height=416
11 | channels=3
12 | momentum=0.9
13 | decay=0.0005
14 | angle=0
15 | saturation = 1.5
16 | exposure = 1.5
17 | hue=.1
18 |
19 | learning_rate=0.001
20 | burn_in=1000
21 | max_batches = 500200
22 | policy=steps
23 | steps=400000,450000
24 | scales=.1,.1
25 |
26 | [convolutional]
27 | batch_normalize=1
28 | filters=32
29 | size=3
30 | stride=1
31 | pad=1
32 | activation=leaky
33 |
34 | # Downsample
35 |
36 | [convolutional]
37 | batch_normalize=1
38 | filters=64
39 | size=3
40 | stride=2
41 | pad=1
42 | activation=leaky
43 |
44 | [convolutional]
45 | batch_normalize=1
46 | filters=32
47 | size=1
48 | stride=1
49 | pad=1
50 | activation=leaky
51 |
52 | [convolutional]
53 | batch_normalize=1
54 | filters=64
55 | size=3
56 | stride=1
57 | pad=1
58 | activation=leaky
59 |
60 | [shortcut]
61 | from=-3
62 | activation=linear
63 |
64 | # Downsample
65 |
66 | [convolutional]
67 | batch_normalize=1
68 | filters=128
69 | size=3
70 | stride=2
71 | pad=1
72 | activation=leaky
73 |
74 | [convolutional]
75 | batch_normalize=1
76 | filters=64
77 | size=1
78 | stride=1
79 | pad=1
80 | activation=leaky
81 |
82 | [convolutional]
83 | batch_normalize=1
84 | filters=128
85 | size=3
86 | stride=1
87 | pad=1
88 | activation=leaky
89 |
90 | [shortcut]
91 | from=-3
92 | activation=linear
93 |
94 | [convolutional]
95 | batch_normalize=1
96 | filters=64
97 | size=1
98 | stride=1
99 | pad=1
100 | activation=leaky
101 |
102 | [convolutional]
103 | batch_normalize=1
104 | filters=128
105 | size=3
106 | stride=1
107 | pad=1
108 | activation=leaky
109 |
110 | [shortcut]
111 | from=-3
112 | activation=linear
113 |
114 | # Downsample
115 |
116 | [convolutional]
117 | batch_normalize=1
118 | filters=256
119 | size=3
120 | stride=2
121 | pad=1
122 | activation=leaky
123 |
124 | [convolutional]
125 | batch_normalize=1
126 | filters=128
127 | size=1
128 | stride=1
129 | pad=1
130 | activation=leaky
131 |
132 | [convolutional]
133 | batch_normalize=1
134 | filters=256
135 | size=3
136 | stride=1
137 | pad=1
138 | activation=leaky
139 |
140 | [shortcut]
141 | from=-3
142 | activation=linear
143 |
144 | [convolutional]
145 | batch_normalize=1
146 | filters=128
147 | size=1
148 | stride=1
149 | pad=1
150 | activation=leaky
151 |
152 | [convolutional]
153 | batch_normalize=1
154 | filters=256
155 | size=3
156 | stride=1
157 | pad=1
158 | activation=leaky
159 |
160 | [shortcut]
161 | from=-3
162 | activation=linear
163 |
164 | [convolutional]
165 | batch_normalize=1
166 | filters=128
167 | size=1
168 | stride=1
169 | pad=1
170 | activation=leaky
171 |
172 | [convolutional]
173 | batch_normalize=1
174 | filters=256
175 | size=3
176 | stride=1
177 | pad=1
178 | activation=leaky
179 |
180 | [shortcut]
181 | from=-3
182 | activation=linear
183 |
184 | [convolutional]
185 | batch_normalize=1
186 | filters=128
187 | size=1
188 | stride=1
189 | pad=1
190 | activation=leaky
191 |
192 | [convolutional]
193 | batch_normalize=1
194 | filters=256
195 | size=3
196 | stride=1
197 | pad=1
198 | activation=leaky
199 |
200 | [shortcut]
201 | from=-3
202 | activation=linear
203 |
204 |
205 | [convolutional]
206 | batch_normalize=1
207 | filters=128
208 | size=1
209 | stride=1
210 | pad=1
211 | activation=leaky
212 |
213 | [convolutional]
214 | batch_normalize=1
215 | filters=256
216 | size=3
217 | stride=1
218 | pad=1
219 | activation=leaky
220 |
221 | [shortcut]
222 | from=-3
223 | activation=linear
224 |
225 | [convolutional]
226 | batch_normalize=1
227 | filters=128
228 | size=1
229 | stride=1
230 | pad=1
231 | activation=leaky
232 |
233 | [convolutional]
234 | batch_normalize=1
235 | filters=256
236 | size=3
237 | stride=1
238 | pad=1
239 | activation=leaky
240 |
241 | [shortcut]
242 | from=-3
243 | activation=linear
244 |
245 | [convolutional]
246 | batch_normalize=1
247 | filters=128
248 | size=1
249 | stride=1
250 | pad=1
251 | activation=leaky
252 |
253 | [convolutional]
254 | batch_normalize=1
255 | filters=256
256 | size=3
257 | stride=1
258 | pad=1
259 | activation=leaky
260 |
261 | [shortcut]
262 | from=-3
263 | activation=linear
264 |
265 | [convolutional]
266 | batch_normalize=1
267 | filters=128
268 | size=1
269 | stride=1
270 | pad=1
271 | activation=leaky
272 |
273 | [convolutional]
274 | batch_normalize=1
275 | filters=256
276 | size=3
277 | stride=1
278 | pad=1
279 | activation=leaky
280 |
281 | [shortcut]
282 | from=-3
283 | activation=linear
284 |
285 | # Downsample
286 |
287 | [convolutional]
288 | batch_normalize=1
289 | filters=512
290 | size=3
291 | stride=2
292 | pad=1
293 | activation=leaky
294 |
295 | [convolutional]
296 | batch_normalize=1
297 | filters=256
298 | size=1
299 | stride=1
300 | pad=1
301 | activation=leaky
302 |
303 | [convolutional]
304 | batch_normalize=1
305 | filters=512
306 | size=3
307 | stride=1
308 | pad=1
309 | activation=leaky
310 |
311 | [shortcut]
312 | from=-3
313 | activation=linear
314 |
315 |
316 | [convolutional]
317 | batch_normalize=1
318 | filters=256
319 | size=1
320 | stride=1
321 | pad=1
322 | activation=leaky
323 |
324 | [convolutional]
325 | batch_normalize=1
326 | filters=512
327 | size=3
328 | stride=1
329 | pad=1
330 | activation=leaky
331 |
332 | [shortcut]
333 | from=-3
334 | activation=linear
335 |
336 |
337 | [convolutional]
338 | batch_normalize=1
339 | filters=256
340 | size=1
341 | stride=1
342 | pad=1
343 | activation=leaky
344 |
345 | [convolutional]
346 | batch_normalize=1
347 | filters=512
348 | size=3
349 | stride=1
350 | pad=1
351 | activation=leaky
352 |
353 | [shortcut]
354 | from=-3
355 | activation=linear
356 |
357 |
358 | [convolutional]
359 | batch_normalize=1
360 | filters=256
361 | size=1
362 | stride=1
363 | pad=1
364 | activation=leaky
365 |
366 | [convolutional]
367 | batch_normalize=1
368 | filters=512
369 | size=3
370 | stride=1
371 | pad=1
372 | activation=leaky
373 |
374 | [shortcut]
375 | from=-3
376 | activation=linear
377 |
378 | [convolutional]
379 | batch_normalize=1
380 | filters=256
381 | size=1
382 | stride=1
383 | pad=1
384 | activation=leaky
385 |
386 | [convolutional]
387 | batch_normalize=1
388 | filters=512
389 | size=3
390 | stride=1
391 | pad=1
392 | activation=leaky
393 |
394 | [shortcut]
395 | from=-3
396 | activation=linear
397 |
398 |
399 | [convolutional]
400 | batch_normalize=1
401 | filters=256
402 | size=1
403 | stride=1
404 | pad=1
405 | activation=leaky
406 |
407 | [convolutional]
408 | batch_normalize=1
409 | filters=512
410 | size=3
411 | stride=1
412 | pad=1
413 | activation=leaky
414 |
415 | [shortcut]
416 | from=-3
417 | activation=linear
418 |
419 |
420 | [convolutional]
421 | batch_normalize=1
422 | filters=256
423 | size=1
424 | stride=1
425 | pad=1
426 | activation=leaky
427 |
428 | [convolutional]
429 | batch_normalize=1
430 | filters=512
431 | size=3
432 | stride=1
433 | pad=1
434 | activation=leaky
435 |
436 | [shortcut]
437 | from=-3
438 | activation=linear
439 |
440 | [convolutional]
441 | batch_normalize=1
442 | filters=256
443 | size=1
444 | stride=1
445 | pad=1
446 | activation=leaky
447 |
448 | [convolutional]
449 | batch_normalize=1
450 | filters=512
451 | size=3
452 | stride=1
453 | pad=1
454 | activation=leaky
455 |
456 | [shortcut]
457 | from=-3
458 | activation=linear
459 |
460 | # Downsample
461 |
462 | [convolutional]
463 | batch_normalize=1
464 | filters=1024
465 | size=3
466 | stride=2
467 | pad=1
468 | activation=leaky
469 |
470 | [convolutional]
471 | batch_normalize=1
472 | filters=512
473 | size=1
474 | stride=1
475 | pad=1
476 | activation=leaky
477 |
478 | [convolutional]
479 | batch_normalize=1
480 | filters=1024
481 | size=3
482 | stride=1
483 | pad=1
484 | activation=leaky
485 |
486 | [shortcut]
487 | from=-3
488 | activation=linear
489 |
490 | [convolutional]
491 | batch_normalize=1
492 | filters=512
493 | size=1
494 | stride=1
495 | pad=1
496 | activation=leaky
497 |
498 | [convolutional]
499 | batch_normalize=1
500 | filters=1024
501 | size=3
502 | stride=1
503 | pad=1
504 | activation=leaky
505 |
506 | [shortcut]
507 | from=-3
508 | activation=linear
509 |
510 | [convolutional]
511 | batch_normalize=1
512 | filters=512
513 | size=1
514 | stride=1
515 | pad=1
516 | activation=leaky
517 |
518 | [convolutional]
519 | batch_normalize=1
520 | filters=1024
521 | size=3
522 | stride=1
523 | pad=1
524 | activation=leaky
525 |
526 | [shortcut]
527 | from=-3
528 | activation=linear
529 |
530 | [convolutional]
531 | batch_normalize=1
532 | filters=512
533 | size=1
534 | stride=1
535 | pad=1
536 | activation=leaky
537 |
538 | [convolutional]
539 | batch_normalize=1
540 | filters=1024
541 | size=3
542 | stride=1
543 | pad=1
544 | activation=leaky
545 |
546 | [shortcut]
547 | from=-3
548 | activation=linear
549 |
550 | ######################
551 |
552 | [convolutional]
553 | batch_normalize=1
554 | filters=512
555 | size=1
556 | stride=1
557 | pad=1
558 | activation=leaky
559 |
560 | [convolutional]
561 | batch_normalize=1
562 | size=3
563 | stride=1
564 | pad=1
565 | filters=1024
566 | activation=leaky
567 |
568 | [convolutional]
569 | batch_normalize=1
570 | filters=512
571 | size=1
572 | stride=1
573 | pad=1
574 | activation=leaky
575 |
576 | [convolutional]
577 | batch_normalize=1
578 | size=3
579 | stride=1
580 | pad=1
581 | filters=1024
582 | activation=leaky
583 |
584 | [convolutional]
585 | batch_normalize=1
586 | filters=512
587 | size=1
588 | stride=1
589 | pad=1
590 | activation=leaky
591 |
592 | [convolutional]
593 | batch_normalize=1
594 | size=3
595 | stride=1
596 | pad=1
597 | filters=1024
598 | activation=leaky
599 |
600 | [convolutional]
601 | size=1
602 | stride=1
603 | pad=1
604 | filters=18
605 | activation=linear
606 |
607 |
608 | [yolo]
609 | mask = 6,7,8
610 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
611 | classes=1
612 | num=9
613 | jitter=.3
614 | ignore_thresh = .7
615 | truth_thresh = 1
616 | random=1
617 |
618 |
619 | [route]
620 | layers = -4
621 |
622 | [convolutional]
623 | batch_normalize=1
624 | filters=256
625 | size=1
626 | stride=1
627 | pad=1
628 | activation=leaky
629 |
630 | [upsample]
631 | stride=2
632 |
633 | [route]
634 | layers = -1, 61
635 |
636 |
637 |
638 | [convolutional]
639 | batch_normalize=1
640 | filters=256
641 | size=1
642 | stride=1
643 | pad=1
644 | activation=leaky
645 |
646 | [convolutional]
647 | batch_normalize=1
648 | size=3
649 | stride=1
650 | pad=1
651 | filters=512
652 | activation=leaky
653 |
654 | [convolutional]
655 | batch_normalize=1
656 | filters=256
657 | size=1
658 | stride=1
659 | pad=1
660 | activation=leaky
661 |
662 | [convolutional]
663 | batch_normalize=1
664 | size=3
665 | stride=1
666 | pad=1
667 | filters=512
668 | activation=leaky
669 |
670 | [convolutional]
671 | batch_normalize=1
672 | filters=256
673 | size=1
674 | stride=1
675 | pad=1
676 | activation=leaky
677 |
678 | [convolutional]
679 | batch_normalize=1
680 | size=3
681 | stride=1
682 | pad=1
683 | filters=512
684 | activation=leaky
685 |
686 | [convolutional]
687 | size=1
688 | stride=1
689 | pad=1
690 | filters=18
691 | activation=linear
692 |
693 |
694 | [yolo]
695 | mask = 3,4,5
696 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
697 | classes=1
698 | num=9
699 | jitter=.3
700 | ignore_thresh = .7
701 | truth_thresh = 1
702 | random=1
703 |
704 |
705 |
706 | [route]
707 | layers = -4
708 |
709 | [convolutional]
710 | batch_normalize=1
711 | filters=128
712 | size=1
713 | stride=1
714 | pad=1
715 | activation=leaky
716 |
717 | [upsample]
718 | stride=2
719 |
720 | [route]
721 | layers = -1, 36
722 |
723 |
724 |
725 | [convolutional]
726 | batch_normalize=1
727 | filters=128
728 | size=1
729 | stride=1
730 | pad=1
731 | activation=leaky
732 |
733 | [convolutional]
734 | batch_normalize=1
735 | size=3
736 | stride=1
737 | pad=1
738 | filters=256
739 | activation=leaky
740 |
741 | [convolutional]
742 | batch_normalize=1
743 | filters=128
744 | size=1
745 | stride=1
746 | pad=1
747 | activation=leaky
748 |
749 | [convolutional]
750 | batch_normalize=1
751 | size=3
752 | stride=1
753 | pad=1
754 | filters=256
755 | activation=leaky
756 |
757 | [convolutional]
758 | batch_normalize=1
759 | filters=128
760 | size=1
761 | stride=1
762 | pad=1
763 | activation=leaky
764 |
765 | [convolutional]
766 | batch_normalize=1
767 | size=3
768 | stride=1
769 | pad=1
770 | filters=256
771 | activation=leaky
772 |
773 | [convolutional]
774 | size=1
775 | stride=1
776 | pad=1
777 | filters=18
778 | activation=linear
779 |
780 |
781 | [yolo]
782 | mask = 0,1,2
783 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
784 | classes=1
785 | num=9
786 | jitter=.3
787 | ignore_thresh = .7
788 | truth_thresh = 1
789 | random=1
790 |
791 |
--------------------------------------------------------------------------------
/cfg/yolov3-spp-1cls.cfg:
--------------------------------------------------------------------------------
1 | [net]
2 | # Testing
3 | # batch=1
4 | # subdivisions=1
5 | # Training
6 | batch=64
7 | subdivisions=16
8 | width=608
9 | height=608
10 | channels=3
11 | momentum=0.9
12 | decay=0.0005
13 | angle=0
14 | saturation = 1.5
15 | exposure = 1.5
16 | hue=.1
17 |
18 | learning_rate=0.001
19 | burn_in=100
20 | max_batches = 5000
21 | policy=steps
22 | steps=4000,4500
23 | scales=.1,.1
24 |
25 | [convolutional]
26 | batch_normalize=1
27 | filters=32
28 | size=3
29 | stride=1
30 | pad=1
31 | activation=leaky
32 |
33 | # Downsample
34 |
35 | [convolutional]
36 | batch_normalize=1
37 | filters=64
38 | size=3
39 | stride=2
40 | pad=1
41 | activation=leaky
42 |
43 | [convolutional]
44 | batch_normalize=1
45 | filters=32
46 | size=1
47 | stride=1
48 | pad=1
49 | activation=leaky
50 |
51 | [convolutional]
52 | batch_normalize=1
53 | filters=64
54 | size=3
55 | stride=1
56 | pad=1
57 | activation=leaky
58 |
59 | [shortcut]
60 | from=-3
61 | activation=linear
62 |
63 | # Downsample
64 |
65 | [convolutional]
66 | batch_normalize=1
67 | filters=128
68 | size=3
69 | stride=2
70 | pad=1
71 | activation=leaky
72 |
73 | [convolutional]
74 | batch_normalize=1
75 | filters=64
76 | size=1
77 | stride=1
78 | pad=1
79 | activation=leaky
80 |
81 | [convolutional]
82 | batch_normalize=1
83 | filters=128
84 | size=3
85 | stride=1
86 | pad=1
87 | activation=leaky
88 |
89 | [shortcut]
90 | from=-3
91 | activation=linear
92 |
93 | [convolutional]
94 | batch_normalize=1
95 | filters=64
96 | size=1
97 | stride=1
98 | pad=1
99 | activation=leaky
100 |
101 | [convolutional]
102 | batch_normalize=1
103 | filters=128
104 | size=3
105 | stride=1
106 | pad=1
107 | activation=leaky
108 |
109 | [shortcut]
110 | from=-3
111 | activation=linear
112 |
113 | # Downsample
114 |
115 | [convolutional]
116 | batch_normalize=1
117 | filters=256
118 | size=3
119 | stride=2
120 | pad=1
121 | activation=leaky
122 |
123 | [convolutional]
124 | batch_normalize=1
125 | filters=128
126 | size=1
127 | stride=1
128 | pad=1
129 | activation=leaky
130 |
131 | [convolutional]
132 | batch_normalize=1
133 | filters=256
134 | size=3
135 | stride=1
136 | pad=1
137 | activation=leaky
138 |
139 | [shortcut]
140 | from=-3
141 | activation=linear
142 |
143 | [convolutional]
144 | batch_normalize=1
145 | filters=128
146 | size=1
147 | stride=1
148 | pad=1
149 | activation=leaky
150 |
151 | [convolutional]
152 | batch_normalize=1
153 | filters=256
154 | size=3
155 | stride=1
156 | pad=1
157 | activation=leaky
158 |
159 | [shortcut]
160 | from=-3
161 | activation=linear
162 |
163 | [convolutional]
164 | batch_normalize=1
165 | filters=128
166 | size=1
167 | stride=1
168 | pad=1
169 | activation=leaky
170 |
171 | [convolutional]
172 | batch_normalize=1
173 | filters=256
174 | size=3
175 | stride=1
176 | pad=1
177 | activation=leaky
178 |
179 | [shortcut]
180 | from=-3
181 | activation=linear
182 |
183 | [convolutional]
184 | batch_normalize=1
185 | filters=128
186 | size=1
187 | stride=1
188 | pad=1
189 | activation=leaky
190 |
191 | [convolutional]
192 | batch_normalize=1
193 | filters=256
194 | size=3
195 | stride=1
196 | pad=1
197 | activation=leaky
198 |
199 | [shortcut]
200 | from=-3
201 | activation=linear
202 |
203 |
204 | [convolutional]
205 | batch_normalize=1
206 | filters=128
207 | size=1
208 | stride=1
209 | pad=1
210 | activation=leaky
211 |
212 | [convolutional]
213 | batch_normalize=1
214 | filters=256
215 | size=3
216 | stride=1
217 | pad=1
218 | activation=leaky
219 |
220 | [shortcut]
221 | from=-3
222 | activation=linear
223 |
224 | [convolutional]
225 | batch_normalize=1
226 | filters=128
227 | size=1
228 | stride=1
229 | pad=1
230 | activation=leaky
231 |
232 | [convolutional]
233 | batch_normalize=1
234 | filters=256
235 | size=3
236 | stride=1
237 | pad=1
238 | activation=leaky
239 |
240 | [shortcut]
241 | from=-3
242 | activation=linear
243 |
244 | [convolutional]
245 | batch_normalize=1
246 | filters=128
247 | size=1
248 | stride=1
249 | pad=1
250 | activation=leaky
251 |
252 | [convolutional]
253 | batch_normalize=1
254 | filters=256
255 | size=3
256 | stride=1
257 | pad=1
258 | activation=leaky
259 |
260 | [shortcut]
261 | from=-3
262 | activation=linear
263 |
264 | [convolutional]
265 | batch_normalize=1
266 | filters=128
267 | size=1
268 | stride=1
269 | pad=1
270 | activation=leaky
271 |
272 | [convolutional]
273 | batch_normalize=1
274 | filters=256
275 | size=3
276 | stride=1
277 | pad=1
278 | activation=leaky
279 |
280 | [shortcut]
281 | from=-3
282 | activation=linear
283 |
284 | # Downsample
285 |
286 | [convolutional]
287 | batch_normalize=1
288 | filters=512
289 | size=3
290 | stride=2
291 | pad=1
292 | activation=leaky
293 |
294 | [convolutional]
295 | batch_normalize=1
296 | filters=256
297 | size=1
298 | stride=1
299 | pad=1
300 | activation=leaky
301 |
302 | [convolutional]
303 | batch_normalize=1
304 | filters=512
305 | size=3
306 | stride=1
307 | pad=1
308 | activation=leaky
309 |
310 | [shortcut]
311 | from=-3
312 | activation=linear
313 |
314 |
315 | [convolutional]
316 | batch_normalize=1
317 | filters=256
318 | size=1
319 | stride=1
320 | pad=1
321 | activation=leaky
322 |
323 | [convolutional]
324 | batch_normalize=1
325 | filters=512
326 | size=3
327 | stride=1
328 | pad=1
329 | activation=leaky
330 |
331 | [shortcut]
332 | from=-3
333 | activation=linear
334 |
335 |
336 | [convolutional]
337 | batch_normalize=1
338 | filters=256
339 | size=1
340 | stride=1
341 | pad=1
342 | activation=leaky
343 |
344 | [convolutional]
345 | batch_normalize=1
346 | filters=512
347 | size=3
348 | stride=1
349 | pad=1
350 | activation=leaky
351 |
352 | [shortcut]
353 | from=-3
354 | activation=linear
355 |
356 |
357 | [convolutional]
358 | batch_normalize=1
359 | filters=256
360 | size=1
361 | stride=1
362 | pad=1
363 | activation=leaky
364 |
365 | [convolutional]
366 | batch_normalize=1
367 | filters=512
368 | size=3
369 | stride=1
370 | pad=1
371 | activation=leaky
372 |
373 | [shortcut]
374 | from=-3
375 | activation=linear
376 |
377 | [convolutional]
378 | batch_normalize=1
379 | filters=256
380 | size=1
381 | stride=1
382 | pad=1
383 | activation=leaky
384 |
385 | [convolutional]
386 | batch_normalize=1
387 | filters=512
388 | size=3
389 | stride=1
390 | pad=1
391 | activation=leaky
392 |
393 | [shortcut]
394 | from=-3
395 | activation=linear
396 |
397 |
398 | [convolutional]
399 | batch_normalize=1
400 | filters=256
401 | size=1
402 | stride=1
403 | pad=1
404 | activation=leaky
405 |
406 | [convolutional]
407 | batch_normalize=1
408 | filters=512
409 | size=3
410 | stride=1
411 | pad=1
412 | activation=leaky
413 |
414 | [shortcut]
415 | from=-3
416 | activation=linear
417 |
418 |
419 | [convolutional]
420 | batch_normalize=1
421 | filters=256
422 | size=1
423 | stride=1
424 | pad=1
425 | activation=leaky
426 |
427 | [convolutional]
428 | batch_normalize=1
429 | filters=512
430 | size=3
431 | stride=1
432 | pad=1
433 | activation=leaky
434 |
435 | [shortcut]
436 | from=-3
437 | activation=linear
438 |
439 | [convolutional]
440 | batch_normalize=1
441 | filters=256
442 | size=1
443 | stride=1
444 | pad=1
445 | activation=leaky
446 |
447 | [convolutional]
448 | batch_normalize=1
449 | filters=512
450 | size=3
451 | stride=1
452 | pad=1
453 | activation=leaky
454 |
455 | [shortcut]
456 | from=-3
457 | activation=linear
458 |
459 | # Downsample
460 |
461 | [convolutional]
462 | batch_normalize=1
463 | filters=1024
464 | size=3
465 | stride=2
466 | pad=1
467 | activation=leaky
468 |
469 | [convolutional]
470 | batch_normalize=1
471 | filters=512
472 | size=1
473 | stride=1
474 | pad=1
475 | activation=leaky
476 |
477 | [convolutional]
478 | batch_normalize=1
479 | filters=1024
480 | size=3
481 | stride=1
482 | pad=1
483 | activation=leaky
484 |
485 | [shortcut]
486 | from=-3
487 | activation=linear
488 |
489 | [convolutional]
490 | batch_normalize=1
491 | filters=512
492 | size=1
493 | stride=1
494 | pad=1
495 | activation=leaky
496 |
497 | [convolutional]
498 | batch_normalize=1
499 | filters=1024
500 | size=3
501 | stride=1
502 | pad=1
503 | activation=leaky
504 |
505 | [shortcut]
506 | from=-3
507 | activation=linear
508 |
509 | [convolutional]
510 | batch_normalize=1
511 | filters=512
512 | size=1
513 | stride=1
514 | pad=1
515 | activation=leaky
516 |
517 | [convolutional]
518 | batch_normalize=1
519 | filters=1024
520 | size=3
521 | stride=1
522 | pad=1
523 | activation=leaky
524 |
525 | [shortcut]
526 | from=-3
527 | activation=linear
528 |
529 | [convolutional]
530 | batch_normalize=1
531 | filters=512
532 | size=1
533 | stride=1
534 | pad=1
535 | activation=leaky
536 |
537 | [convolutional]
538 | batch_normalize=1
539 | filters=1024
540 | size=3
541 | stride=1
542 | pad=1
543 | activation=leaky
544 |
545 | [shortcut]
546 | from=-3
547 | activation=linear
548 |
549 | ######################
550 |
551 | [convolutional]
552 | batch_normalize=1
553 | filters=512
554 | size=1
555 | stride=1
556 | pad=1
557 | activation=leaky
558 |
559 | [convolutional]
560 | batch_normalize=1
561 | size=3
562 | stride=1
563 | pad=1
564 | filters=1024
565 | activation=leaky
566 |
567 | [convolutional]
568 | batch_normalize=1
569 | filters=512
570 | size=1
571 | stride=1
572 | pad=1
573 | activation=leaky
574 |
575 | ### SPP ###
576 | [maxpool]
577 | stride=1
578 | size=5
579 |
580 | [route]
581 | layers=-2
582 |
583 | [maxpool]
584 | stride=1
585 | size=9
586 |
587 | [route]
588 | layers=-4
589 |
590 | [maxpool]
591 | stride=1
592 | size=13
593 |
594 | [route]
595 | layers=-1,-3,-5,-6
596 |
597 | ### End SPP ###
598 |
599 | [convolutional]
600 | batch_normalize=1
601 | filters=512
602 | size=1
603 | stride=1
604 | pad=1
605 | activation=leaky
606 |
607 |
608 | [convolutional]
609 | batch_normalize=1
610 | size=3
611 | stride=1
612 | pad=1
613 | filters=1024
614 | activation=leaky
615 |
616 | [convolutional]
617 | batch_normalize=1
618 | filters=512
619 | size=1
620 | stride=1
621 | pad=1
622 | activation=leaky
623 |
624 | [convolutional]
625 | batch_normalize=1
626 | size=3
627 | stride=1
628 | pad=1
629 | filters=1024
630 | activation=leaky
631 |
632 | [convolutional]
633 | size=1
634 | stride=1
635 | pad=1
636 | filters=18
637 | activation=linear
638 |
639 |
640 | [yolo]
641 | mask = 6,7,8
642 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
643 | classes=1
644 | num=9
645 | jitter=.3
646 | ignore_thresh = .7
647 | truth_thresh = 1
648 | random=1
649 |
650 |
651 | [route]
652 | layers = -4
653 |
654 | [convolutional]
655 | batch_normalize=1
656 | filters=256
657 | size=1
658 | stride=1
659 | pad=1
660 | activation=leaky
661 |
662 | [upsample]
663 | stride=2
664 |
665 | [route]
666 | layers = -1, 61
667 |
668 |
669 |
670 | [convolutional]
671 | batch_normalize=1
672 | filters=256
673 | size=1
674 | stride=1
675 | pad=1
676 | activation=leaky
677 |
678 | [convolutional]
679 | batch_normalize=1
680 | size=3
681 | stride=1
682 | pad=1
683 | filters=512
684 | activation=leaky
685 |
686 | [convolutional]
687 | batch_normalize=1
688 | filters=256
689 | size=1
690 | stride=1
691 | pad=1
692 | activation=leaky
693 |
694 | [convolutional]
695 | batch_normalize=1
696 | size=3
697 | stride=1
698 | pad=1
699 | filters=512
700 | activation=leaky
701 |
702 | [convolutional]
703 | batch_normalize=1
704 | filters=256
705 | size=1
706 | stride=1
707 | pad=1
708 | activation=leaky
709 |
710 | [convolutional]
711 | batch_normalize=1
712 | size=3
713 | stride=1
714 | pad=1
715 | filters=512
716 | activation=leaky
717 |
718 | [convolutional]
719 | size=1
720 | stride=1
721 | pad=1
722 | filters=18
723 | activation=linear
724 |
725 |
726 | [yolo]
727 | mask = 3,4,5
728 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
729 | classes=1
730 | num=9
731 | jitter=.3
732 | ignore_thresh = .7
733 | truth_thresh = 1
734 | random=1
735 |
736 |
737 |
738 | [route]
739 | layers = -4
740 |
741 | [convolutional]
742 | batch_normalize=1
743 | filters=128
744 | size=1
745 | stride=1
746 | pad=1
747 | activation=leaky
748 |
749 | [upsample]
750 | stride=2
751 |
752 | [route]
753 | layers = -1, 36
754 |
755 |
756 |
757 | [convolutional]
758 | batch_normalize=1
759 | filters=128
760 | size=1
761 | stride=1
762 | pad=1
763 | activation=leaky
764 |
765 | [convolutional]
766 | batch_normalize=1
767 | size=3
768 | stride=1
769 | pad=1
770 | filters=256
771 | activation=leaky
772 |
773 | [convolutional]
774 | batch_normalize=1
775 | filters=128
776 | size=1
777 | stride=1
778 | pad=1
779 | activation=leaky
780 |
781 | [convolutional]
782 | batch_normalize=1
783 | size=3
784 | stride=1
785 | pad=1
786 | filters=256
787 | activation=leaky
788 |
789 | [convolutional]
790 | batch_normalize=1
791 | filters=128
792 | size=1
793 | stride=1
794 | pad=1
795 | activation=leaky
796 |
797 | [convolutional]
798 | batch_normalize=1
799 | size=3
800 | stride=1
801 | pad=1
802 | filters=256
803 | activation=leaky
804 |
805 | [convolutional]
806 | size=1
807 | stride=1
808 | pad=1
809 | filters=18
810 | activation=linear
811 |
812 |
813 | [yolo]
814 | mask = 0,1,2
815 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
816 | classes=1
817 | num=9
818 | jitter=.3
819 | ignore_thresh = .7
820 | truth_thresh = 1
821 | random=1
822 |
--------------------------------------------------------------------------------
/cfg/yolov3-spp.cfg:
--------------------------------------------------------------------------------
1 | [net]
2 | # Testing
3 | # batch=1
4 | # subdivisions=1
5 | # Training
6 | batch=64
7 | subdivisions=16
8 | width=608
9 | height=608
10 | channels=3
11 | momentum=0.9
12 | decay=0.0005
13 | angle=0
14 | saturation = 1.5
15 | exposure = 1.5
16 | hue=.1
17 |
18 | learning_rate=0.001
19 | burn_in=1000
20 | max_batches = 500200
21 | policy=steps
22 | steps=400000,450000
23 | scales=.1,.1
24 |
25 | [convolutional]
26 | batch_normalize=1
27 | filters=32
28 | size=3
29 | stride=1
30 | pad=1
31 | activation=leaky
32 |
33 | # Downsample
34 |
35 | [convolutional]
36 | batch_normalize=1
37 | filters=64
38 | size=3
39 | stride=2
40 | pad=1
41 | activation=leaky
42 |
43 | [convolutional]
44 | batch_normalize=1
45 | filters=32
46 | size=1
47 | stride=1
48 | pad=1
49 | activation=leaky
50 |
51 | [convolutional]
52 | batch_normalize=1
53 | filters=64
54 | size=3
55 | stride=1
56 | pad=1
57 | activation=leaky
58 |
59 | [shortcut]
60 | from=-3
61 | activation=linear
62 |
63 | # Downsample
64 |
65 | [convolutional]
66 | batch_normalize=1
67 | filters=128
68 | size=3
69 | stride=2
70 | pad=1
71 | activation=leaky
72 |
73 | [convolutional]
74 | batch_normalize=1
75 | filters=64
76 | size=1
77 | stride=1
78 | pad=1
79 | activation=leaky
80 |
81 | [convolutional]
82 | batch_normalize=1
83 | filters=128
84 | size=3
85 | stride=1
86 | pad=1
87 | activation=leaky
88 |
89 | [shortcut]
90 | from=-3
91 | activation=linear
92 |
93 | [convolutional]
94 | batch_normalize=1
95 | filters=64
96 | size=1
97 | stride=1
98 | pad=1
99 | activation=leaky
100 |
101 | [convolutional]
102 | batch_normalize=1
103 | filters=128
104 | size=3
105 | stride=1
106 | pad=1
107 | activation=leaky
108 |
109 | [shortcut]
110 | from=-3
111 | activation=linear
112 |
113 | # Downsample
114 |
115 | [convolutional]
116 | batch_normalize=1
117 | filters=256
118 | size=3
119 | stride=2
120 | pad=1
121 | activation=leaky
122 |
123 | [convolutional]
124 | batch_normalize=1
125 | filters=128
126 | size=1
127 | stride=1
128 | pad=1
129 | activation=leaky
130 |
131 | [convolutional]
132 | batch_normalize=1
133 | filters=256
134 | size=3
135 | stride=1
136 | pad=1
137 | activation=leaky
138 |
139 | [shortcut]
140 | from=-3
141 | activation=linear
142 |
143 | [convolutional]
144 | batch_normalize=1
145 | filters=128
146 | size=1
147 | stride=1
148 | pad=1
149 | activation=leaky
150 |
151 | [convolutional]
152 | batch_normalize=1
153 | filters=256
154 | size=3
155 | stride=1
156 | pad=1
157 | activation=leaky
158 |
159 | [shortcut]
160 | from=-3
161 | activation=linear
162 |
163 | [convolutional]
164 | batch_normalize=1
165 | filters=128
166 | size=1
167 | stride=1
168 | pad=1
169 | activation=leaky
170 |
171 | [convolutional]
172 | batch_normalize=1
173 | filters=256
174 | size=3
175 | stride=1
176 | pad=1
177 | activation=leaky
178 |
179 | [shortcut]
180 | from=-3
181 | activation=linear
182 |
183 | [convolutional]
184 | batch_normalize=1
185 | filters=128
186 | size=1
187 | stride=1
188 | pad=1
189 | activation=leaky
190 |
191 | [convolutional]
192 | batch_normalize=1
193 | filters=256
194 | size=3
195 | stride=1
196 | pad=1
197 | activation=leaky
198 |
199 | [shortcut]
200 | from=-3
201 | activation=linear
202 |
203 |
204 | [convolutional]
205 | batch_normalize=1
206 | filters=128
207 | size=1
208 | stride=1
209 | pad=1
210 | activation=leaky
211 |
212 | [convolutional]
213 | batch_normalize=1
214 | filters=256
215 | size=3
216 | stride=1
217 | pad=1
218 | activation=leaky
219 |
220 | [shortcut]
221 | from=-3
222 | activation=linear
223 |
224 | [convolutional]
225 | batch_normalize=1
226 | filters=128
227 | size=1
228 | stride=1
229 | pad=1
230 | activation=leaky
231 |
232 | [convolutional]
233 | batch_normalize=1
234 | filters=256
235 | size=3
236 | stride=1
237 | pad=1
238 | activation=leaky
239 |
240 | [shortcut]
241 | from=-3
242 | activation=linear
243 |
244 | [convolutional]
245 | batch_normalize=1
246 | filters=128
247 | size=1
248 | stride=1
249 | pad=1
250 | activation=leaky
251 |
252 | [convolutional]
253 | batch_normalize=1
254 | filters=256
255 | size=3
256 | stride=1
257 | pad=1
258 | activation=leaky
259 |
260 | [shortcut]
261 | from=-3
262 | activation=linear
263 |
264 | [convolutional]
265 | batch_normalize=1
266 | filters=128
267 | size=1
268 | stride=1
269 | pad=1
270 | activation=leaky
271 |
272 | [convolutional]
273 | batch_normalize=1
274 | filters=256
275 | size=3
276 | stride=1
277 | pad=1
278 | activation=leaky
279 |
280 | [shortcut]
281 | from=-3
282 | activation=linear
283 |
284 | # Downsample
285 |
286 | [convolutional]
287 | batch_normalize=1
288 | filters=512
289 | size=3
290 | stride=2
291 | pad=1
292 | activation=leaky
293 |
294 | [convolutional]
295 | batch_normalize=1
296 | filters=256
297 | size=1
298 | stride=1
299 | pad=1
300 | activation=leaky
301 |
302 | [convolutional]
303 | batch_normalize=1
304 | filters=512
305 | size=3
306 | stride=1
307 | pad=1
308 | activation=leaky
309 |
310 | [shortcut]
311 | from=-3
312 | activation=linear
313 |
314 |
315 | [convolutional]
316 | batch_normalize=1
317 | filters=256
318 | size=1
319 | stride=1
320 | pad=1
321 | activation=leaky
322 |
323 | [convolutional]
324 | batch_normalize=1
325 | filters=512
326 | size=3
327 | stride=1
328 | pad=1
329 | activation=leaky
330 |
331 | [shortcut]
332 | from=-3
333 | activation=linear
334 |
335 |
336 | [convolutional]
337 | batch_normalize=1
338 | filters=256
339 | size=1
340 | stride=1
341 | pad=1
342 | activation=leaky
343 |
344 | [convolutional]
345 | batch_normalize=1
346 | filters=512
347 | size=3
348 | stride=1
349 | pad=1
350 | activation=leaky
351 |
352 | [shortcut]
353 | from=-3
354 | activation=linear
355 |
356 |
357 | [convolutional]
358 | batch_normalize=1
359 | filters=256
360 | size=1
361 | stride=1
362 | pad=1
363 | activation=leaky
364 |
365 | [convolutional]
366 | batch_normalize=1
367 | filters=512
368 | size=3
369 | stride=1
370 | pad=1
371 | activation=leaky
372 |
373 | [shortcut]
374 | from=-3
375 | activation=linear
376 |
377 | [convolutional]
378 | batch_normalize=1
379 | filters=256
380 | size=1
381 | stride=1
382 | pad=1
383 | activation=leaky
384 |
385 | [convolutional]
386 | batch_normalize=1
387 | filters=512
388 | size=3
389 | stride=1
390 | pad=1
391 | activation=leaky
392 |
393 | [shortcut]
394 | from=-3
395 | activation=linear
396 |
397 |
398 | [convolutional]
399 | batch_normalize=1
400 | filters=256
401 | size=1
402 | stride=1
403 | pad=1
404 | activation=leaky
405 |
406 | [convolutional]
407 | batch_normalize=1
408 | filters=512
409 | size=3
410 | stride=1
411 | pad=1
412 | activation=leaky
413 |
414 | [shortcut]
415 | from=-3
416 | activation=linear
417 |
418 |
419 | [convolutional]
420 | batch_normalize=1
421 | filters=256
422 | size=1
423 | stride=1
424 | pad=1
425 | activation=leaky
426 |
427 | [convolutional]
428 | batch_normalize=1
429 | filters=512
430 | size=3
431 | stride=1
432 | pad=1
433 | activation=leaky
434 |
435 | [shortcut]
436 | from=-3
437 | activation=linear
438 |
439 | [convolutional]
440 | batch_normalize=1
441 | filters=256
442 | size=1
443 | stride=1
444 | pad=1
445 | activation=leaky
446 |
447 | [convolutional]
448 | batch_normalize=1
449 | filters=512
450 | size=3
451 | stride=1
452 | pad=1
453 | activation=leaky
454 |
455 | [shortcut]
456 | from=-3
457 | activation=linear
458 |
459 | # Downsample
460 |
461 | [convolutional]
462 | batch_normalize=1
463 | filters=1024
464 | size=3
465 | stride=2
466 | pad=1
467 | activation=leaky
468 |
469 | [convolutional]
470 | batch_normalize=1
471 | filters=512
472 | size=1
473 | stride=1
474 | pad=1
475 | activation=leaky
476 |
477 | [convolutional]
478 | batch_normalize=1
479 | filters=1024
480 | size=3
481 | stride=1
482 | pad=1
483 | activation=leaky
484 |
485 | [shortcut]
486 | from=-3
487 | activation=linear
488 |
489 | [convolutional]
490 | batch_normalize=1
491 | filters=512
492 | size=1
493 | stride=1
494 | pad=1
495 | activation=leaky
496 |
497 | [convolutional]
498 | batch_normalize=1
499 | filters=1024
500 | size=3
501 | stride=1
502 | pad=1
503 | activation=leaky
504 |
505 | [shortcut]
506 | from=-3
507 | activation=linear
508 |
509 | [convolutional]
510 | batch_normalize=1
511 | filters=512
512 | size=1
513 | stride=1
514 | pad=1
515 | activation=leaky
516 |
517 | [convolutional]
518 | batch_normalize=1
519 | filters=1024
520 | size=3
521 | stride=1
522 | pad=1
523 | activation=leaky
524 |
525 | [shortcut]
526 | from=-3
527 | activation=linear
528 |
529 | [convolutional]
530 | batch_normalize=1
531 | filters=512
532 | size=1
533 | stride=1
534 | pad=1
535 | activation=leaky
536 |
537 | [convolutional]
538 | batch_normalize=1
539 | filters=1024
540 | size=3
541 | stride=1
542 | pad=1
543 | activation=leaky
544 |
545 | [shortcut]
546 | from=-3
547 | activation=linear
548 |
549 | ######################
550 |
551 | [convolutional]
552 | batch_normalize=1
553 | filters=512
554 | size=1
555 | stride=1
556 | pad=1
557 | activation=leaky
558 |
559 | [convolutional]
560 | batch_normalize=1
561 | size=3
562 | stride=1
563 | pad=1
564 | filters=1024
565 | activation=leaky
566 |
567 | [convolutional]
568 | batch_normalize=1
569 | filters=512
570 | size=1
571 | stride=1
572 | pad=1
573 | activation=leaky
574 |
575 | ### SPP ###
576 | [maxpool]
577 | stride=1
578 | size=5
579 |
580 | [route]
581 | layers=-2
582 |
583 | [maxpool]
584 | stride=1
585 | size=9
586 |
587 | [route]
588 | layers=-4
589 |
590 | [maxpool]
591 | stride=1
592 | size=13
593 |
594 | [route]
595 | layers=-1,-3,-5,-6
596 |
597 | ### End SPP ###
598 |
599 | [convolutional]
600 | batch_normalize=1
601 | filters=512
602 | size=1
603 | stride=1
604 | pad=1
605 | activation=leaky
606 |
607 |
608 | [convolutional]
609 | batch_normalize=1
610 | size=3
611 | stride=1
612 | pad=1
613 | filters=1024
614 | activation=leaky
615 |
616 | [convolutional]
617 | batch_normalize=1
618 | filters=512
619 | size=1
620 | stride=1
621 | pad=1
622 | activation=leaky
623 |
624 | [convolutional]
625 | batch_normalize=1
626 | size=3
627 | stride=1
628 | pad=1
629 | filters=1024
630 | activation=leaky
631 |
632 | [convolutional]
633 | size=1
634 | stride=1
635 | pad=1
636 | filters=255
637 | activation=linear
638 |
639 |
640 | [yolo]
641 | mask = 6,7,8
642 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
643 | classes=80
644 | num=9
645 | jitter=.3
646 | ignore_thresh = .7
647 | truth_thresh = 1
648 | random=1
649 |
650 |
651 | [route]
652 | layers = -4
653 |
654 | [convolutional]
655 | batch_normalize=1
656 | filters=256
657 | size=1
658 | stride=1
659 | pad=1
660 | activation=leaky
661 |
662 | [upsample]
663 | stride=2
664 |
665 | [route]
666 | layers = -1, 61
667 |
668 |
669 |
670 | [convolutional]
671 | batch_normalize=1
672 | filters=256
673 | size=1
674 | stride=1
675 | pad=1
676 | activation=leaky
677 |
678 | [convolutional]
679 | batch_normalize=1
680 | size=3
681 | stride=1
682 | pad=1
683 | filters=512
684 | activation=leaky
685 |
686 | [convolutional]
687 | batch_normalize=1
688 | filters=256
689 | size=1
690 | stride=1
691 | pad=1
692 | activation=leaky
693 |
694 | [convolutional]
695 | batch_normalize=1
696 | size=3
697 | stride=1
698 | pad=1
699 | filters=512
700 | activation=leaky
701 |
702 | [convolutional]
703 | batch_normalize=1
704 | filters=256
705 | size=1
706 | stride=1
707 | pad=1
708 | activation=leaky
709 |
710 | [convolutional]
711 | batch_normalize=1
712 | size=3
713 | stride=1
714 | pad=1
715 | filters=512
716 | activation=leaky
717 |
718 | [convolutional]
719 | size=1
720 | stride=1
721 | pad=1
722 | filters=255
723 | activation=linear
724 |
725 |
726 | [yolo]
727 | mask = 3,4,5
728 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
729 | classes=80
730 | num=9
731 | jitter=.3
732 | ignore_thresh = .7
733 | truth_thresh = 1
734 | random=1
735 |
736 |
737 |
738 | [route]
739 | layers = -4
740 |
741 | [convolutional]
742 | batch_normalize=1
743 | filters=128
744 | size=1
745 | stride=1
746 | pad=1
747 | activation=leaky
748 |
749 | [upsample]
750 | stride=2
751 |
752 | [route]
753 | layers = -1, 36
754 |
755 |
756 |
757 | [convolutional]
758 | batch_normalize=1
759 | filters=128
760 | size=1
761 | stride=1
762 | pad=1
763 | activation=leaky
764 |
765 | [convolutional]
766 | batch_normalize=1
767 | size=3
768 | stride=1
769 | pad=1
770 | filters=256
771 | activation=leaky
772 |
773 | [convolutional]
774 | batch_normalize=1
775 | filters=128
776 | size=1
777 | stride=1
778 | pad=1
779 | activation=leaky
780 |
781 | [convolutional]
782 | batch_normalize=1
783 | size=3
784 | stride=1
785 | pad=1
786 | filters=256
787 | activation=leaky
788 |
789 | [convolutional]
790 | batch_normalize=1
791 | filters=128
792 | size=1
793 | stride=1
794 | pad=1
795 | activation=leaky
796 |
797 | [convolutional]
798 | batch_normalize=1
799 | size=3
800 | stride=1
801 | pad=1
802 | filters=256
803 | activation=leaky
804 |
805 | [convolutional]
806 | size=1
807 | stride=1
808 | pad=1
809 | filters=255
810 | activation=linear
811 |
812 |
813 | [yolo]
814 | mask = 0,1,2
815 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
816 | classes=80
817 | num=9
818 | jitter=.3
819 | ignore_thresh = .7
820 | truth_thresh = 1
821 | random=1
822 |
--------------------------------------------------------------------------------
/cfg/yolov3-tiny-1cls.cfg:
--------------------------------------------------------------------------------
1 | [net]
2 | # Testing
3 | batch=1
4 | subdivisions=1
5 | # Training
6 | # batch=64
7 | # subdivisions=2
8 | width=416
9 | height=416
10 | channels=3
11 | momentum=0.9
12 | decay=0.0005
13 | angle=0
14 | saturation = 1.5
15 | exposure = 1.5
16 | hue=.1
17 |
18 | learning_rate=0.001
19 | burn_in=1000
20 | max_batches = 500200
21 | policy=steps
22 | steps=400000,450000
23 | scales=.1,.1
24 |
25 | [convolutional]
26 | batch_normalize=1
27 | filters=16
28 | size=3
29 | stride=1
30 | pad=1
31 | activation=leaky
32 |
33 | [maxpool]
34 | size=2
35 | stride=2
36 |
37 | [convolutional]
38 | batch_normalize=1
39 | filters=32
40 | size=3
41 | stride=1
42 | pad=1
43 | activation=leaky
44 |
45 | [maxpool]
46 | size=2
47 | stride=2
48 |
49 | [convolutional]
50 | batch_normalize=1
51 | filters=64
52 | size=3
53 | stride=1
54 | pad=1
55 | activation=leaky
56 |
57 | [maxpool]
58 | size=2
59 | stride=2
60 |
61 | [convolutional]
62 | batch_normalize=1
63 | filters=128
64 | size=3
65 | stride=1
66 | pad=1
67 | activation=leaky
68 |
69 | [maxpool]
70 | size=2
71 | stride=2
72 |
73 | [convolutional]
74 | batch_normalize=1
75 | filters=256
76 | size=3
77 | stride=1
78 | pad=1
79 | activation=leaky
80 |
81 | [maxpool]
82 | size=2
83 | stride=2
84 |
85 | [convolutional]
86 | batch_normalize=1
87 | filters=512
88 | size=3
89 | stride=1
90 | pad=1
91 | activation=leaky
92 |
93 | [maxpool]
94 | size=2
95 | stride=1
96 |
97 | [convolutional]
98 | batch_normalize=1
99 | filters=1024
100 | size=3
101 | stride=1
102 | pad=1
103 | activation=leaky
104 |
105 | ###########
106 |
107 | [convolutional]
108 | batch_normalize=1
109 | filters=256
110 | size=1
111 | stride=1
112 | pad=1
113 | activation=leaky
114 |
115 | [convolutional]
116 | batch_normalize=1
117 | filters=512
118 | size=3
119 | stride=1
120 | pad=1
121 | activation=leaky
122 |
123 | [convolutional]
124 | size=1
125 | stride=1
126 | pad=1
127 | filters=18
128 | activation=linear
129 |
130 |
131 |
132 | [yolo]
133 | mask = 3,4,5
134 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319
135 | classes=1
136 | num=6
137 | jitter=.3
138 | ignore_thresh = .7
139 | truth_thresh = 1
140 | random=1
141 |
142 | [route]
143 | layers = -4
144 |
145 | [convolutional]
146 | batch_normalize=1
147 | filters=128
148 | size=1
149 | stride=1
150 | pad=1
151 | activation=leaky
152 |
153 | [upsample]
154 | stride=2
155 |
156 | [route]
157 | layers = -1, 8
158 |
159 | [convolutional]
160 | batch_normalize=1
161 | filters=256
162 | size=3
163 | stride=1
164 | pad=1
165 | activation=leaky
166 |
167 | [convolutional]
168 | size=1
169 | stride=1
170 | pad=1
171 | filters=18
172 | activation=linear
173 |
174 | [yolo]
175 | mask = 0,1,2
176 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319
177 | classes=1
178 | num=6
179 | jitter=.3
180 | ignore_thresh = .7
181 | truth_thresh = 1
182 | random=1
183 |
--------------------------------------------------------------------------------
/cfg/yolov3-tiny-hand.cfg:
--------------------------------------------------------------------------------
1 | [net]
2 | # Testing
3 | batch=1
4 | subdivisions=1
5 | # Training
6 | batch=16
7 | subdivisions=1
8 | width=416
9 | height=416
10 | channels=3
11 | momentum=0.9
12 | decay=0.0005
13 | angle=0
14 | saturation = 1.5
15 | exposure = 1.5
16 | hue=.1
17 |
18 | learning_rate=0.001
19 | burn_in=1000
20 | max_batches = 500200
21 | policy=steps
22 | steps=15,25,60,99,150,160,180
23 | scales=0.5,0.5,0.1,0.5,0.5,0.1,0.1
24 |
25 | # 0
26 | [convolutional]
27 | batch_normalize=1
28 | filters=16
29 | size=3
30 | stride=1
31 | pad=1
32 | activation=leaky
33 |
34 | # 1
35 | [maxpool]
36 | size=2
37 | stride=2
38 |
39 | # 2
40 | [convolutional]
41 | batch_normalize=1
42 | filters=32
43 | size=3
44 | stride=1
45 | pad=1
46 | activation=leaky
47 |
48 | # 3
49 | [maxpool]
50 | size=2
51 | stride=2
52 |
53 | # 4
54 | [convolutional]
55 | batch_normalize=1
56 | filters=64
57 | size=3
58 | stride=1
59 | pad=1
60 | activation=leaky
61 |
62 | # 5
63 | [maxpool]
64 | size=2
65 | stride=2
66 |
67 | # 6
68 | [convolutional]
69 | batch_normalize=1
70 | filters=128
71 | size=3
72 | stride=1
73 | pad=1
74 | activation=leaky
75 |
76 | # 7
77 | [maxpool]
78 | size=2
79 | stride=2
80 |
81 | # 8
82 | [convolutional]
83 | batch_normalize=1
84 | filters=256
85 | size=3
86 | stride=1
87 | pad=1
88 | activation=leaky
89 |
90 | # 9
91 | [maxpool]
92 | size=2
93 | stride=2
94 |
95 | # 10
96 | [convolutional]
97 | batch_normalize=1
98 | filters=512
99 | size=3
100 | stride=1
101 | pad=1
102 | activation=leaky
103 |
104 | # 11
105 | [maxpool]
106 | size=2
107 | stride=1
108 |
109 | # 12
110 | [convolutional]
111 | batch_normalize=1
112 | filters=1024
113 | size=3
114 | stride=1
115 | pad=1
116 | activation=leaky
117 |
118 | ###########
119 |
120 | # 13
121 | [convolutional]
122 | batch_normalize=1
123 | filters=256
124 | size=1
125 | stride=1
126 | pad=1
127 | activation=leaky
128 |
129 | # 14
130 | [convolutional]
131 | batch_normalize=1
132 | filters=512
133 | size=3
134 | stride=1
135 | pad=1
136 | activation=leaky
137 |
138 | # 15
139 | [convolutional]
140 | size=1
141 | stride=1
142 | pad=1
143 | filters=18
144 | activation=linear
145 |
146 |
147 |
148 | # 16
149 | [yolo]
150 | mask = 3,4,5
151 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319
152 | classes=1
153 | num=6
154 | jitter=.3
155 | ignore_thresh = .7
156 | truth_thresh = 1
157 | random=1
158 |
159 | # 17
160 | [route]
161 | layers = -4
162 |
163 | # 18
164 | [convolutional]
165 | batch_normalize=1
166 | filters=128
167 | size=1
168 | stride=1
169 | pad=1
170 | activation=leaky
171 |
172 | # 19
173 | [upsample]
174 | stride=2
175 |
176 | # 20
177 | [route]
178 | layers = -1, 8
179 |
180 | # 21
181 | [convolutional]
182 | batch_normalize=1
183 | filters=256
184 | size=3
185 | stride=1
186 | pad=1
187 | activation=leaky
188 |
189 | # 22
190 | [convolutional]
191 | size=1
192 | stride=1
193 | pad=1
194 | filters=18
195 | activation=linear
196 |
197 | # 23
198 | [yolo]
199 | mask = 1,2,3
200 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319
201 | classes=1
202 | num=6
203 | jitter=.3
204 | ignore_thresh = .7
205 | truth_thresh = 1
206 | random=1
207 |
--------------------------------------------------------------------------------
/cfg/yolov3-tiny.cfg:
--------------------------------------------------------------------------------
1 | [net]
2 | # Testing
3 | batch=1
4 | subdivisions=1
5 | # Training
6 | # batch=64
7 | # subdivisions=2
8 | width=416
9 | height=416
10 | channels=3
11 | momentum=0.9
12 | decay=0.0005
13 | angle=0
14 | saturation = 1.5
15 | exposure = 1.5
16 | hue=.1
17 |
18 | learning_rate=0.001
19 | burn_in=1000
20 | max_batches = 500200
21 | policy=steps
22 | steps=400000,450000
23 | scales=.1,.1
24 |
25 | [convolutional]
26 | batch_normalize=1
27 | filters=16
28 | size=3
29 | stride=1
30 | pad=1
31 | activation=leaky
32 |
33 | [maxpool]
34 | size=2
35 | stride=2
36 |
37 | [convolutional]
38 | batch_normalize=1
39 | filters=32
40 | size=3
41 | stride=1
42 | pad=1
43 | activation=leaky
44 |
45 | [maxpool]
46 | size=2
47 | stride=2
48 |
49 | [convolutional]
50 | batch_normalize=1
51 | filters=64
52 | size=3
53 | stride=1
54 | pad=1
55 | activation=leaky
56 |
57 | [maxpool]
58 | size=2
59 | stride=2
60 |
61 | [convolutional]
62 | batch_normalize=1
63 | filters=128
64 | size=3
65 | stride=1
66 | pad=1
67 | activation=leaky
68 |
69 | [maxpool]
70 | size=2
71 | stride=2
72 |
73 | [convolutional]
74 | batch_normalize=1
75 | filters=256
76 | size=3
77 | stride=1
78 | pad=1
79 | activation=leaky
80 |
81 | [maxpool]
82 | size=2
83 | stride=2
84 |
85 | [convolutional]
86 | batch_normalize=1
87 | filters=512
88 | size=3
89 | stride=1
90 | pad=1
91 | activation=leaky
92 |
93 | [maxpool]
94 | size=2
95 | stride=1
96 |
97 | [convolutional]
98 | batch_normalize=1
99 | filters=1024
100 | size=3
101 | stride=1
102 | pad=1
103 | activation=leaky
104 |
105 | ###########
106 |
107 | [convolutional]
108 | batch_normalize=1
109 | filters=256
110 | size=1
111 | stride=1
112 | pad=1
113 | activation=leaky
114 |
115 | [convolutional]
116 | batch_normalize=1
117 | filters=512
118 | size=3
119 | stride=1
120 | pad=1
121 | activation=leaky
122 |
123 | [convolutional]
124 | size=1
125 | stride=1
126 | pad=1
127 | filters=255
128 | activation=linear
129 |
130 |
131 |
132 | [yolo]
133 | mask = 3,4,5
134 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319
135 | classes=80
136 | num=6
137 | jitter=.3
138 | ignore_thresh = .7
139 | truth_thresh = 1
140 | random=1
141 |
142 | [route]
143 | layers = -4
144 |
145 | [convolutional]
146 | batch_normalize=1
147 | filters=128
148 | size=1
149 | stride=1
150 | pad=1
151 | activation=leaky
152 |
153 | [upsample]
154 | stride=2
155 |
156 | [route]
157 | layers = -1, 8
158 |
159 | [convolutional]
160 | batch_normalize=1
161 | filters=256
162 | size=3
163 | stride=1
164 | pad=1
165 | activation=leaky
166 |
167 | [convolutional]
168 | size=1
169 | stride=1
170 | pad=1
171 | filters=255
172 | activation=linear
173 |
174 | [yolo]
175 | mask = 1,2,3
176 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319
177 | classes=80
178 | num=6
179 | jitter=.3
180 | ignore_thresh = .7
181 | truth_thresh = 1
182 | random=1
183 |
--------------------------------------------------------------------------------
/cfg/yolov3.cfg:
--------------------------------------------------------------------------------
1 | [net]
2 | # Testing
3 | #batch=1
4 | #subdivisions=1
5 | # Training
6 | batch=16
7 | subdivisions=1
8 | width=416
9 | height=416
10 | channels=3
11 | momentum=0.9
12 | decay=0.0005
13 | angle=0
14 | saturation = 1.5
15 | exposure = 1.5
16 | hue=.1
17 |
18 | learning_rate=0.001
19 | burn_in=1000
20 | max_batches = 500200
21 | policy=steps
22 | steps=400000,450000
23 | scales=.1,.1
24 |
25 | [convolutional]
26 | batch_normalize=1
27 | filters=32
28 | size=3
29 | stride=1
30 | pad=1
31 | activation=leaky
32 |
33 | # Downsample
34 |
35 | [convolutional]
36 | batch_normalize=1
37 | filters=64
38 | size=3
39 | stride=2
40 | pad=1
41 | activation=leaky
42 |
43 | [convolutional]
44 | batch_normalize=1
45 | filters=32
46 | size=1
47 | stride=1
48 | pad=1
49 | activation=leaky
50 |
51 | [convolutional]
52 | batch_normalize=1
53 | filters=64
54 | size=3
55 | stride=1
56 | pad=1
57 | activation=leaky
58 |
59 | [shortcut]
60 | from=-3
61 | activation=linear
62 |
63 | # Downsample
64 |
65 | [convolutional]
66 | batch_normalize=1
67 | filters=128
68 | size=3
69 | stride=2
70 | pad=1
71 | activation=leaky
72 |
73 | [convolutional]
74 | batch_normalize=1
75 | filters=64
76 | size=1
77 | stride=1
78 | pad=1
79 | activation=leaky
80 |
81 | [convolutional]
82 | batch_normalize=1
83 | filters=128
84 | size=3
85 | stride=1
86 | pad=1
87 | activation=leaky
88 |
89 | [shortcut]
90 | from=-3
91 | activation=linear
92 |
93 | [convolutional]
94 | batch_normalize=1
95 | filters=64
96 | size=1
97 | stride=1
98 | pad=1
99 | activation=leaky
100 |
101 | [convolutional]
102 | batch_normalize=1
103 | filters=128
104 | size=3
105 | stride=1
106 | pad=1
107 | activation=leaky
108 |
109 | [shortcut]
110 | from=-3
111 | activation=linear
112 |
113 | # Downsample
114 |
115 | [convolutional]
116 | batch_normalize=1
117 | filters=256
118 | size=3
119 | stride=2
120 | pad=1
121 | activation=leaky
122 |
123 | [convolutional]
124 | batch_normalize=1
125 | filters=128
126 | size=1
127 | stride=1
128 | pad=1
129 | activation=leaky
130 |
131 | [convolutional]
132 | batch_normalize=1
133 | filters=256
134 | size=3
135 | stride=1
136 | pad=1
137 | activation=leaky
138 |
139 | [shortcut]
140 | from=-3
141 | activation=linear
142 |
143 | [convolutional]
144 | batch_normalize=1
145 | filters=128
146 | size=1
147 | stride=1
148 | pad=1
149 | activation=leaky
150 |
151 | [convolutional]
152 | batch_normalize=1
153 | filters=256
154 | size=3
155 | stride=1
156 | pad=1
157 | activation=leaky
158 |
159 | [shortcut]
160 | from=-3
161 | activation=linear
162 |
163 | [convolutional]
164 | batch_normalize=1
165 | filters=128
166 | size=1
167 | stride=1
168 | pad=1
169 | activation=leaky
170 |
171 | [convolutional]
172 | batch_normalize=1
173 | filters=256
174 | size=3
175 | stride=1
176 | pad=1
177 | activation=leaky
178 |
179 | [shortcut]
180 | from=-3
181 | activation=linear
182 |
183 | [convolutional]
184 | batch_normalize=1
185 | filters=128
186 | size=1
187 | stride=1
188 | pad=1
189 | activation=leaky
190 |
191 | [convolutional]
192 | batch_normalize=1
193 | filters=256
194 | size=3
195 | stride=1
196 | pad=1
197 | activation=leaky
198 |
199 | [shortcut]
200 | from=-3
201 | activation=linear
202 |
203 |
204 | [convolutional]
205 | batch_normalize=1
206 | filters=128
207 | size=1
208 | stride=1
209 | pad=1
210 | activation=leaky
211 |
212 | [convolutional]
213 | batch_normalize=1
214 | filters=256
215 | size=3
216 | stride=1
217 | pad=1
218 | activation=leaky
219 |
220 | [shortcut]
221 | from=-3
222 | activation=linear
223 |
224 | [convolutional]
225 | batch_normalize=1
226 | filters=128
227 | size=1
228 | stride=1
229 | pad=1
230 | activation=leaky
231 |
232 | [convolutional]
233 | batch_normalize=1
234 | filters=256
235 | size=3
236 | stride=1
237 | pad=1
238 | activation=leaky
239 |
240 | [shortcut]
241 | from=-3
242 | activation=linear
243 |
244 | [convolutional]
245 | batch_normalize=1
246 | filters=128
247 | size=1
248 | stride=1
249 | pad=1
250 | activation=leaky
251 |
252 | [convolutional]
253 | batch_normalize=1
254 | filters=256
255 | size=3
256 | stride=1
257 | pad=1
258 | activation=leaky
259 |
260 | [shortcut]
261 | from=-3
262 | activation=linear
263 |
264 | [convolutional]
265 | batch_normalize=1
266 | filters=128
267 | size=1
268 | stride=1
269 | pad=1
270 | activation=leaky
271 |
272 | [convolutional]
273 | batch_normalize=1
274 | filters=256
275 | size=3
276 | stride=1
277 | pad=1
278 | activation=leaky
279 |
280 | [shortcut]
281 | from=-3
282 | activation=linear
283 |
284 | # Downsample
285 |
286 | [convolutional]
287 | batch_normalize=1
288 | filters=512
289 | size=3
290 | stride=2
291 | pad=1
292 | activation=leaky
293 |
294 | [convolutional]
295 | batch_normalize=1
296 | filters=256
297 | size=1
298 | stride=1
299 | pad=1
300 | activation=leaky
301 |
302 | [convolutional]
303 | batch_normalize=1
304 | filters=512
305 | size=3
306 | stride=1
307 | pad=1
308 | activation=leaky
309 |
310 | [shortcut]
311 | from=-3
312 | activation=linear
313 |
314 |
315 | [convolutional]
316 | batch_normalize=1
317 | filters=256
318 | size=1
319 | stride=1
320 | pad=1
321 | activation=leaky
322 |
323 | [convolutional]
324 | batch_normalize=1
325 | filters=512
326 | size=3
327 | stride=1
328 | pad=1
329 | activation=leaky
330 |
331 | [shortcut]
332 | from=-3
333 | activation=linear
334 |
335 |
336 | [convolutional]
337 | batch_normalize=1
338 | filters=256
339 | size=1
340 | stride=1
341 | pad=1
342 | activation=leaky
343 |
344 | [convolutional]
345 | batch_normalize=1
346 | filters=512
347 | size=3
348 | stride=1
349 | pad=1
350 | activation=leaky
351 |
352 | [shortcut]
353 | from=-3
354 | activation=linear
355 |
356 |
357 | [convolutional]
358 | batch_normalize=1
359 | filters=256
360 | size=1
361 | stride=1
362 | pad=1
363 | activation=leaky
364 |
365 | [convolutional]
366 | batch_normalize=1
367 | filters=512
368 | size=3
369 | stride=1
370 | pad=1
371 | activation=leaky
372 |
373 | [shortcut]
374 | from=-3
375 | activation=linear
376 |
377 | [convolutional]
378 | batch_normalize=1
379 | filters=256
380 | size=1
381 | stride=1
382 | pad=1
383 | activation=leaky
384 |
385 | [convolutional]
386 | batch_normalize=1
387 | filters=512
388 | size=3
389 | stride=1
390 | pad=1
391 | activation=leaky
392 |
393 | [shortcut]
394 | from=-3
395 | activation=linear
396 |
397 |
398 | [convolutional]
399 | batch_normalize=1
400 | filters=256
401 | size=1
402 | stride=1
403 | pad=1
404 | activation=leaky
405 |
406 | [convolutional]
407 | batch_normalize=1
408 | filters=512
409 | size=3
410 | stride=1
411 | pad=1
412 | activation=leaky
413 |
414 | [shortcut]
415 | from=-3
416 | activation=linear
417 |
418 |
419 | [convolutional]
420 | batch_normalize=1
421 | filters=256
422 | size=1
423 | stride=1
424 | pad=1
425 | activation=leaky
426 |
427 | [convolutional]
428 | batch_normalize=1
429 | filters=512
430 | size=3
431 | stride=1
432 | pad=1
433 | activation=leaky
434 |
435 | [shortcut]
436 | from=-3
437 | activation=linear
438 |
439 | [convolutional]
440 | batch_normalize=1
441 | filters=256
442 | size=1
443 | stride=1
444 | pad=1
445 | activation=leaky
446 |
447 | [convolutional]
448 | batch_normalize=1
449 | filters=512
450 | size=3
451 | stride=1
452 | pad=1
453 | activation=leaky
454 |
455 | [shortcut]
456 | from=-3
457 | activation=linear
458 |
459 | # Downsample
460 |
461 | [convolutional]
462 | batch_normalize=1
463 | filters=1024
464 | size=3
465 | stride=2
466 | pad=1
467 | activation=leaky
468 |
469 | [convolutional]
470 | batch_normalize=1
471 | filters=512
472 | size=1
473 | stride=1
474 | pad=1
475 | activation=leaky
476 |
477 | [convolutional]
478 | batch_normalize=1
479 | filters=1024
480 | size=3
481 | stride=1
482 | pad=1
483 | activation=leaky
484 |
485 | [shortcut]
486 | from=-3
487 | activation=linear
488 |
489 | [convolutional]
490 | batch_normalize=1
491 | filters=512
492 | size=1
493 | stride=1
494 | pad=1
495 | activation=leaky
496 |
497 | [convolutional]
498 | batch_normalize=1
499 | filters=1024
500 | size=3
501 | stride=1
502 | pad=1
503 | activation=leaky
504 |
505 | [shortcut]
506 | from=-3
507 | activation=linear
508 |
509 | [convolutional]
510 | batch_normalize=1
511 | filters=512
512 | size=1
513 | stride=1
514 | pad=1
515 | activation=leaky
516 |
517 | [convolutional]
518 | batch_normalize=1
519 | filters=1024
520 | size=3
521 | stride=1
522 | pad=1
523 | activation=leaky
524 |
525 | [shortcut]
526 | from=-3
527 | activation=linear
528 |
529 | [convolutional]
530 | batch_normalize=1
531 | filters=512
532 | size=1
533 | stride=1
534 | pad=1
535 | activation=leaky
536 |
537 | [convolutional]
538 | batch_normalize=1
539 | filters=1024
540 | size=3
541 | stride=1
542 | pad=1
543 | activation=leaky
544 |
545 | [shortcut]
546 | from=-3
547 | activation=linear
548 |
549 | ######################
550 |
551 | [convolutional]
552 | batch_normalize=1
553 | filters=512
554 | size=1
555 | stride=1
556 | pad=1
557 | activation=leaky
558 |
559 | [convolutional]
560 | batch_normalize=1
561 | size=3
562 | stride=1
563 | pad=1
564 | filters=1024
565 | activation=leaky
566 |
567 | [convolutional]
568 | batch_normalize=1
569 | filters=512
570 | size=1
571 | stride=1
572 | pad=1
573 | activation=leaky
574 |
575 | [convolutional]
576 | batch_normalize=1
577 | size=3
578 | stride=1
579 | pad=1
580 | filters=1024
581 | activation=leaky
582 |
583 | [convolutional]
584 | batch_normalize=1
585 | filters=512
586 | size=1
587 | stride=1
588 | pad=1
589 | activation=leaky
590 |
591 | [convolutional]
592 | batch_normalize=1
593 | size=3
594 | stride=1
595 | pad=1
596 | filters=1024
597 | activation=leaky
598 |
599 | [convolutional]
600 | size=1
601 | stride=1
602 | pad=1
603 | filters=255
604 | activation=linear
605 |
606 |
607 | [yolo]
608 | mask = 6,7,8
609 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
610 | classes=80
611 | num=9
612 | jitter=.3
613 | ignore_thresh = .7
614 | truth_thresh = 1
615 | random=1
616 |
617 |
618 | [route]
619 | layers = -4
620 |
621 | [convolutional]
622 | batch_normalize=1
623 | filters=256
624 | size=1
625 | stride=1
626 | pad=1
627 | activation=leaky
628 |
629 | [upsample]
630 | stride=2
631 |
632 | [route]
633 | layers = -1, 61
634 |
635 |
636 |
637 | [convolutional]
638 | batch_normalize=1
639 | filters=256
640 | size=1
641 | stride=1
642 | pad=1
643 | activation=leaky
644 |
645 | [convolutional]
646 | batch_normalize=1
647 | size=3
648 | stride=1
649 | pad=1
650 | filters=512
651 | activation=leaky
652 |
653 | [convolutional]
654 | batch_normalize=1
655 | filters=256
656 | size=1
657 | stride=1
658 | pad=1
659 | activation=leaky
660 |
661 | [convolutional]
662 | batch_normalize=1
663 | size=3
664 | stride=1
665 | pad=1
666 | filters=512
667 | activation=leaky
668 |
669 | [convolutional]
670 | batch_normalize=1
671 | filters=256
672 | size=1
673 | stride=1
674 | pad=1
675 | activation=leaky
676 |
677 | [convolutional]
678 | batch_normalize=1
679 | size=3
680 | stride=1
681 | pad=1
682 | filters=512
683 | activation=leaky
684 |
685 | [convolutional]
686 | size=1
687 | stride=1
688 | pad=1
689 | filters=255
690 | activation=linear
691 |
692 |
693 | [yolo]
694 | mask = 3,4,5
695 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
696 | classes=80
697 | num=9
698 | jitter=.3
699 | ignore_thresh = .7
700 | truth_thresh = 1
701 | random=1
702 |
703 |
704 |
705 | [route]
706 | layers = -4
707 |
708 | [convolutional]
709 | batch_normalize=1
710 | filters=128
711 | size=1
712 | stride=1
713 | pad=1
714 | activation=leaky
715 |
716 | [upsample]
717 | stride=2
718 |
719 | [route]
720 | layers = -1, 36
721 |
722 |
723 |
724 | [convolutional]
725 | batch_normalize=1
726 | filters=128
727 | size=1
728 | stride=1
729 | pad=1
730 | activation=leaky
731 |
732 | [convolutional]
733 | batch_normalize=1
734 | size=3
735 | stride=1
736 | pad=1
737 | filters=256
738 | activation=leaky
739 |
740 | [convolutional]
741 | batch_normalize=1
742 | filters=128
743 | size=1
744 | stride=1
745 | pad=1
746 | activation=leaky
747 |
748 | [convolutional]
749 | batch_normalize=1
750 | size=3
751 | stride=1
752 | pad=1
753 | filters=256
754 | activation=leaky
755 |
756 | [convolutional]
757 | batch_normalize=1
758 | filters=128
759 | size=1
760 | stride=1
761 | pad=1
762 | activation=leaky
763 |
764 | [convolutional]
765 | batch_normalize=1
766 | size=3
767 | stride=1
768 | pad=1
769 | filters=256
770 | activation=leaky
771 |
772 | [convolutional]
773 | size=1
774 | stride=1
775 | pad=1
776 | filters=255
777 | activation=linear
778 |
779 |
780 | [yolo]
781 | mask = 0,1,2
782 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
783 | classes=80
784 | num=9
785 | jitter=.3
786 | ignore_thresh = .7
787 | truth_thresh = 1
788 | random=1
789 |
--------------------------------------------------------------------------------
/cfg/yolov3s-3a320.cfg:
--------------------------------------------------------------------------------
1 | [net]
2 | # Testing
3 | # batch=1
4 | # subdivisions=1
5 | # Training
6 | batch=64
7 | subdivisions=16
8 | width=608
9 | height=608
10 | channels=3
11 | momentum=0.9
12 | decay=0.0005
13 | angle=0
14 | saturation = 1.5
15 | exposure = 1.5
16 | hue=.1
17 |
18 | learning_rate=0.001
19 | burn_in=1000
20 | max_batches = 500200
21 | policy=steps
22 | steps=400000,450000
23 | scales=.1,.1
24 |
25 | [convolutional]
26 | batch_normalize=1
27 | filters=32
28 | size=3
29 | stride=1
30 | pad=1
31 | activation=leaky
32 |
33 | # Downsample
34 |
35 | [convolutional]
36 | batch_normalize=1
37 | filters=64
38 | size=3
39 | stride=2
40 | pad=1
41 | activation=leaky
42 |
43 | [convolutional]
44 | batch_normalize=1
45 | filters=32
46 | size=1
47 | stride=1
48 | pad=1
49 | activation=leaky
50 |
51 | [convolutional]
52 | batch_normalize=1
53 | filters=64
54 | size=3
55 | stride=1
56 | pad=1
57 | activation=leaky
58 |
59 | [shortcut]
60 | from=-3
61 | activation=linear
62 |
63 | # Downsample
64 |
65 | [convolutional]
66 | batch_normalize=1
67 | filters=128
68 | size=3
69 | stride=2
70 | pad=1
71 | activation=leaky
72 |
73 | [convolutional]
74 | batch_normalize=1
75 | filters=64
76 | size=1
77 | stride=1
78 | pad=1
79 | activation=leaky
80 |
81 | [convolutional]
82 | batch_normalize=1
83 | filters=128
84 | size=3
85 | stride=1
86 | pad=1
87 | activation=leaky
88 |
89 | [shortcut]
90 | from=-3
91 | activation=linear
92 |
93 | [convolutional]
94 | batch_normalize=1
95 | filters=64
96 | size=1
97 | stride=1
98 | pad=1
99 | activation=leaky
100 |
101 | [convolutional]
102 | batch_normalize=1
103 | filters=128
104 | size=3
105 | stride=1
106 | pad=1
107 | activation=leaky
108 |
109 | [shortcut]
110 | from=-3
111 | activation=linear
112 |
113 | # Downsample
114 |
115 | [convolutional]
116 | batch_normalize=1
117 | filters=256
118 | size=3
119 | stride=2
120 | pad=1
121 | activation=leaky
122 |
123 | [convolutional]
124 | batch_normalize=1
125 | filters=128
126 | size=1
127 | stride=1
128 | pad=1
129 | activation=leaky
130 |
131 | [convolutional]
132 | batch_normalize=1
133 | filters=256
134 | size=3
135 | stride=1
136 | pad=1
137 | activation=leaky
138 |
139 | [shortcut]
140 | from=-3
141 | activation=linear
142 |
143 | [convolutional]
144 | batch_normalize=1
145 | filters=128
146 | size=1
147 | stride=1
148 | pad=1
149 | activation=leaky
150 |
151 | [convolutional]
152 | batch_normalize=1
153 | filters=256
154 | size=3
155 | stride=1
156 | pad=1
157 | activation=leaky
158 |
159 | [shortcut]
160 | from=-3
161 | activation=linear
162 |
163 | [convolutional]
164 | batch_normalize=1
165 | filters=128
166 | size=1
167 | stride=1
168 | pad=1
169 | activation=leaky
170 |
171 | [convolutional]
172 | batch_normalize=1
173 | filters=256
174 | size=3
175 | stride=1
176 | pad=1
177 | activation=leaky
178 |
179 | [shortcut]
180 | from=-3
181 | activation=linear
182 |
183 | [convolutional]
184 | batch_normalize=1
185 | filters=128
186 | size=1
187 | stride=1
188 | pad=1
189 | activation=leaky
190 |
191 | [convolutional]
192 | batch_normalize=1
193 | filters=256
194 | size=3
195 | stride=1
196 | pad=1
197 | activation=leaky
198 |
199 | [shortcut]
200 | from=-3
201 | activation=linear
202 |
203 |
204 | [convolutional]
205 | batch_normalize=1
206 | filters=128
207 | size=1
208 | stride=1
209 | pad=1
210 | activation=leaky
211 |
212 | [convolutional]
213 | batch_normalize=1
214 | filters=256
215 | size=3
216 | stride=1
217 | pad=1
218 | activation=leaky
219 |
220 | [shortcut]
221 | from=-3
222 | activation=linear
223 |
224 | [convolutional]
225 | batch_normalize=1
226 | filters=128
227 | size=1
228 | stride=1
229 | pad=1
230 | activation=leaky
231 |
232 | [convolutional]
233 | batch_normalize=1
234 | filters=256
235 | size=3
236 | stride=1
237 | pad=1
238 | activation=leaky
239 |
240 | [shortcut]
241 | from=-3
242 | activation=linear
243 |
244 | [convolutional]
245 | batch_normalize=1
246 | filters=128
247 | size=1
248 | stride=1
249 | pad=1
250 | activation=leaky
251 |
252 | [convolutional]
253 | batch_normalize=1
254 | filters=256
255 | size=3
256 | stride=1
257 | pad=1
258 | activation=leaky
259 |
260 | [shortcut]
261 | from=-3
262 | activation=linear
263 |
264 | [convolutional]
265 | batch_normalize=1
266 | filters=128
267 | size=1
268 | stride=1
269 | pad=1
270 | activation=leaky
271 |
272 | [convolutional]
273 | batch_normalize=1
274 | filters=256
275 | size=3
276 | stride=1
277 | pad=1
278 | activation=leaky
279 |
280 | [shortcut]
281 | from=-3
282 | activation=linear
283 |
284 | # Downsample
285 |
286 | [convolutional]
287 | batch_normalize=1
288 | filters=512
289 | size=3
290 | stride=2
291 | pad=1
292 | activation=leaky
293 |
294 | [convolutional]
295 | batch_normalize=1
296 | filters=256
297 | size=1
298 | stride=1
299 | pad=1
300 | activation=leaky
301 |
302 | [convolutional]
303 | batch_normalize=1
304 | filters=512
305 | size=3
306 | stride=1
307 | pad=1
308 | activation=leaky
309 |
310 | [shortcut]
311 | from=-3
312 | activation=linear
313 |
314 |
315 | [convolutional]
316 | batch_normalize=1
317 | filters=256
318 | size=1
319 | stride=1
320 | pad=1
321 | activation=leaky
322 |
323 | [convolutional]
324 | batch_normalize=1
325 | filters=512
326 | size=3
327 | stride=1
328 | pad=1
329 | activation=leaky
330 |
331 | [shortcut]
332 | from=-3
333 | activation=linear
334 |
335 |
336 | [convolutional]
337 | batch_normalize=1
338 | filters=256
339 | size=1
340 | stride=1
341 | pad=1
342 | activation=leaky
343 |
344 | [convolutional]
345 | batch_normalize=1
346 | filters=512
347 | size=3
348 | stride=1
349 | pad=1
350 | activation=leaky
351 |
352 | [shortcut]
353 | from=-3
354 | activation=linear
355 |
356 |
357 | [convolutional]
358 | batch_normalize=1
359 | filters=256
360 | size=1
361 | stride=1
362 | pad=1
363 | activation=leaky
364 |
365 | [convolutional]
366 | batch_normalize=1
367 | filters=512
368 | size=3
369 | stride=1
370 | pad=1
371 | activation=leaky
372 |
373 | [shortcut]
374 | from=-3
375 | activation=linear
376 |
377 | [convolutional]
378 | batch_normalize=1
379 | filters=256
380 | size=1
381 | stride=1
382 | pad=1
383 | activation=leaky
384 |
385 | [convolutional]
386 | batch_normalize=1
387 | filters=512
388 | size=3
389 | stride=1
390 | pad=1
391 | activation=leaky
392 |
393 | [shortcut]
394 | from=-3
395 | activation=linear
396 |
397 |
398 | [convolutional]
399 | batch_normalize=1
400 | filters=256
401 | size=1
402 | stride=1
403 | pad=1
404 | activation=leaky
405 |
406 | [convolutional]
407 | batch_normalize=1
408 | filters=512
409 | size=3
410 | stride=1
411 | pad=1
412 | activation=leaky
413 |
414 | [shortcut]
415 | from=-3
416 | activation=linear
417 |
418 |
419 | [convolutional]
420 | batch_normalize=1
421 | filters=256
422 | size=1
423 | stride=1
424 | pad=1
425 | activation=leaky
426 |
427 | [convolutional]
428 | batch_normalize=1
429 | filters=512
430 | size=3
431 | stride=1
432 | pad=1
433 | activation=leaky
434 |
435 | [shortcut]
436 | from=-3
437 | activation=linear
438 |
439 | [convolutional]
440 | batch_normalize=1
441 | filters=256
442 | size=1
443 | stride=1
444 | pad=1
445 | activation=leaky
446 |
447 | [convolutional]
448 | batch_normalize=1
449 | filters=512
450 | size=3
451 | stride=1
452 | pad=1
453 | activation=leaky
454 |
455 | [shortcut]
456 | from=-3
457 | activation=linear
458 |
459 | # Downsample
460 |
461 | [convolutional]
462 | batch_normalize=1
463 | filters=1024
464 | size=3
465 | stride=2
466 | pad=1
467 | activation=leaky
468 |
469 | [convolutional]
470 | batch_normalize=1
471 | filters=512
472 | size=1
473 | stride=1
474 | pad=1
475 | activation=leaky
476 |
477 | [convolutional]
478 | batch_normalize=1
479 | filters=1024
480 | size=3
481 | stride=1
482 | pad=1
483 | activation=leaky
484 |
485 | [shortcut]
486 | from=-3
487 | activation=linear
488 |
489 | [convolutional]
490 | batch_normalize=1
491 | filters=512
492 | size=1
493 | stride=1
494 | pad=1
495 | activation=leaky
496 |
497 | [convolutional]
498 | batch_normalize=1
499 | filters=1024
500 | size=3
501 | stride=1
502 | pad=1
503 | activation=leaky
504 |
505 | [shortcut]
506 | from=-3
507 | activation=linear
508 |
509 | [convolutional]
510 | batch_normalize=1
511 | filters=512
512 | size=1
513 | stride=1
514 | pad=1
515 | activation=leaky
516 |
517 | [convolutional]
518 | batch_normalize=1
519 | filters=1024
520 | size=3
521 | stride=1
522 | pad=1
523 | activation=leaky
524 |
525 | [shortcut]
526 | from=-3
527 | activation=linear
528 |
529 | [convolutional]
530 | batch_normalize=1
531 | filters=512
532 | size=1
533 | stride=1
534 | pad=1
535 | activation=leaky
536 |
537 | [convolutional]
538 | batch_normalize=1
539 | filters=1024
540 | size=3
541 | stride=1
542 | pad=1
543 | activation=leaky
544 |
545 | [shortcut]
546 | from=-3
547 | activation=linear
548 |
549 | ######################
550 |
551 | [convolutional]
552 | batch_normalize=1
553 | filters=512
554 | size=1
555 | stride=1
556 | pad=1
557 | activation=leaky
558 |
559 | [convolutional]
560 | batch_normalize=1
561 | size=3
562 | stride=1
563 | pad=1
564 | filters=1024
565 | activation=leaky
566 |
567 | [convolutional]
568 | batch_normalize=1
569 | filters=512
570 | size=1
571 | stride=1
572 | pad=1
573 | activation=leaky
574 |
575 | ### SPP ###
576 | [maxpool]
577 | stride=1
578 | size=5
579 |
580 | [route]
581 | layers=-2
582 |
583 | [maxpool]
584 | stride=1
585 | size=9
586 |
587 | [route]
588 | layers=-4
589 |
590 | [maxpool]
591 | stride=1
592 | size=13
593 |
594 | [route]
595 | layers=-1,-3,-5,-6
596 |
597 | ### End SPP ###
598 |
599 | [convolutional]
600 | batch_normalize=1
601 | filters=512
602 | size=1
603 | stride=1
604 | pad=1
605 | activation=leaky
606 |
607 |
608 | [convolutional]
609 | batch_normalize=1
610 | size=3
611 | stride=1
612 | pad=1
613 | filters=1024
614 | activation=leaky
615 |
616 | [convolutional]
617 | batch_normalize=1
618 | filters=512
619 | size=1
620 | stride=1
621 | pad=1
622 | activation=leaky
623 |
624 | [convolutional]
625 | batch_normalize=1
626 | size=3
627 | stride=1
628 | pad=1
629 | filters=1024
630 | activation=leaky
631 |
632 | [convolutional]
633 | size=1
634 | stride=1
635 | pad=1
636 | filters=85
637 | activation=linear
638 |
639 |
640 | [yolo]
641 | mask = 2
642 | anchors = 16,30, 62,45, 156,198
643 | classes=80
644 | num=3
645 | jitter=.3
646 | ignore_thresh = .7
647 | truth_thresh = 1
648 | random=1
649 |
650 |
651 | [route]
652 | layers = -4
653 |
654 | [convolutional]
655 | batch_normalize=1
656 | filters=256
657 | size=1
658 | stride=1
659 | pad=1
660 | activation=leaky
661 |
662 | [upsample]
663 | stride=2
664 |
665 | [route]
666 | layers = -1, 61
667 |
668 |
669 |
670 | [convolutional]
671 | batch_normalize=1
672 | filters=256
673 | size=1
674 | stride=1
675 | pad=1
676 | activation=leaky
677 |
678 | [convolutional]
679 | batch_normalize=1
680 | size=3
681 | stride=1
682 | pad=1
683 | filters=512
684 | activation=leaky
685 |
686 | [convolutional]
687 | batch_normalize=1
688 | filters=256
689 | size=1
690 | stride=1
691 | pad=1
692 | activation=leaky
693 |
694 | [convolutional]
695 | batch_normalize=1
696 | size=3
697 | stride=1
698 | pad=1
699 | filters=512
700 | activation=leaky
701 |
702 | [convolutional]
703 | batch_normalize=1
704 | filters=256
705 | size=1
706 | stride=1
707 | pad=1
708 | activation=leaky
709 |
710 | [convolutional]
711 | batch_normalize=1
712 | size=3
713 | stride=1
714 | pad=1
715 | filters=512
716 | activation=leaky
717 |
718 | [convolutional]
719 | size=1
720 | stride=1
721 | pad=1
722 | filters=85
723 | activation=linear
724 |
725 |
726 | [yolo]
727 | mask = 1
728 | anchors = 16,30, 62,45, 156,198
729 | classes=80
730 | num=3
731 | jitter=.3
732 | ignore_thresh = .7
733 | truth_thresh = 1
734 | random=1
735 |
736 |
737 |
738 | [route]
739 | layers = -4
740 |
741 | [convolutional]
742 | batch_normalize=1
743 | filters=128
744 | size=1
745 | stride=1
746 | pad=1
747 | activation=leaky
748 |
749 | [upsample]
750 | stride=2
751 |
752 | [route]
753 | layers = -1, 36
754 |
755 |
756 |
757 | [convolutional]
758 | batch_normalize=1
759 | filters=128
760 | size=1
761 | stride=1
762 | pad=1
763 | activation=leaky
764 |
765 | [convolutional]
766 | batch_normalize=1
767 | size=3
768 | stride=1
769 | pad=1
770 | filters=256
771 | activation=leaky
772 |
773 | [convolutional]
774 | batch_normalize=1
775 | filters=128
776 | size=1
777 | stride=1
778 | pad=1
779 | activation=leaky
780 |
781 | [convolutional]
782 | batch_normalize=1
783 | size=3
784 | stride=1
785 | pad=1
786 | filters=256
787 | activation=leaky
788 |
789 | [convolutional]
790 | batch_normalize=1
791 | filters=128
792 | size=1
793 | stride=1
794 | pad=1
795 | activation=leaky
796 |
797 | [convolutional]
798 | batch_normalize=1
799 | size=3
800 | stride=1
801 | pad=1
802 | filters=256
803 | activation=leaky
804 |
805 | [convolutional]
806 | size=1
807 | stride=1
808 | pad=1
809 | filters=85
810 | activation=linear
811 |
812 |
813 | [yolo]
814 | mask = 0
815 | anchors = 16,30, 62,45, 156,198
816 | classes=80
817 | num=3
818 | jitter=.3
819 | ignore_thresh = .7
820 | truth_thresh = 1
821 | random=1
822 |
--------------------------------------------------------------------------------
/cfg/yolov3s-9a320.cfg:
--------------------------------------------------------------------------------
1 | [net]
2 | # Testing
3 | # batch=1
4 | # subdivisions=1
5 | # Training
6 | batch=64
7 | subdivisions=16
8 | width=608
9 | height=608
10 | channels=3
11 | momentum=0.9
12 | decay=0.0005
13 | angle=0
14 | saturation = 1.5
15 | exposure = 1.5
16 | hue=.1
17 |
18 | learning_rate=0.001
19 | burn_in=1000
20 | max_batches = 500200
21 | policy=steps
22 | steps=400000,450000
23 | scales=.1,.1
24 |
25 | [convolutional]
26 | batch_normalize=1
27 | filters=32
28 | size=3
29 | stride=1
30 | pad=1
31 | activation=leaky
32 |
33 | # Downsample
34 |
35 | [convolutional]
36 | batch_normalize=1
37 | filters=64
38 | size=3
39 | stride=2
40 | pad=1
41 | activation=leaky
42 |
43 | [convolutional]
44 | batch_normalize=1
45 | filters=32
46 | size=1
47 | stride=1
48 | pad=1
49 | activation=leaky
50 |
51 | [convolutional]
52 | batch_normalize=1
53 | filters=64
54 | size=3
55 | stride=1
56 | pad=1
57 | activation=leaky
58 |
59 | [shortcut]
60 | from=-3
61 | activation=linear
62 |
63 | # Downsample
64 |
65 | [convolutional]
66 | batch_normalize=1
67 | filters=128
68 | size=3
69 | stride=2
70 | pad=1
71 | activation=leaky
72 |
73 | [convolutional]
74 | batch_normalize=1
75 | filters=64
76 | size=1
77 | stride=1
78 | pad=1
79 | activation=leaky
80 |
81 | [convolutional]
82 | batch_normalize=1
83 | filters=128
84 | size=3
85 | stride=1
86 | pad=1
87 | activation=leaky
88 |
89 | [shortcut]
90 | from=-3
91 | activation=linear
92 |
93 | [convolutional]
94 | batch_normalize=1
95 | filters=64
96 | size=1
97 | stride=1
98 | pad=1
99 | activation=leaky
100 |
101 | [convolutional]
102 | batch_normalize=1
103 | filters=128
104 | size=3
105 | stride=1
106 | pad=1
107 | activation=leaky
108 |
109 | [shortcut]
110 | from=-3
111 | activation=linear
112 |
113 | # Downsample
114 |
115 | [convolutional]
116 | batch_normalize=1
117 | filters=256
118 | size=3
119 | stride=2
120 | pad=1
121 | activation=leaky
122 |
123 | [convolutional]
124 | batch_normalize=1
125 | filters=128
126 | size=1
127 | stride=1
128 | pad=1
129 | activation=leaky
130 |
131 | [convolutional]
132 | batch_normalize=1
133 | filters=256
134 | size=3
135 | stride=1
136 | pad=1
137 | activation=leaky
138 |
139 | [shortcut]
140 | from=-3
141 | activation=linear
142 |
143 | [convolutional]
144 | batch_normalize=1
145 | filters=128
146 | size=1
147 | stride=1
148 | pad=1
149 | activation=leaky
150 |
151 | [convolutional]
152 | batch_normalize=1
153 | filters=256
154 | size=3
155 | stride=1
156 | pad=1
157 | activation=leaky
158 |
159 | [shortcut]
160 | from=-3
161 | activation=linear
162 |
163 | [convolutional]
164 | batch_normalize=1
165 | filters=128
166 | size=1
167 | stride=1
168 | pad=1
169 | activation=leaky
170 |
171 | [convolutional]
172 | batch_normalize=1
173 | filters=256
174 | size=3
175 | stride=1
176 | pad=1
177 | activation=leaky
178 |
179 | [shortcut]
180 | from=-3
181 | activation=linear
182 |
183 | [convolutional]
184 | batch_normalize=1
185 | filters=128
186 | size=1
187 | stride=1
188 | pad=1
189 | activation=leaky
190 |
191 | [convolutional]
192 | batch_normalize=1
193 | filters=256
194 | size=3
195 | stride=1
196 | pad=1
197 | activation=leaky
198 |
199 | [shortcut]
200 | from=-3
201 | activation=linear
202 |
203 |
204 | [convolutional]
205 | batch_normalize=1
206 | filters=128
207 | size=1
208 | stride=1
209 | pad=1
210 | activation=leaky
211 |
212 | [convolutional]
213 | batch_normalize=1
214 | filters=256
215 | size=3
216 | stride=1
217 | pad=1
218 | activation=leaky
219 |
220 | [shortcut]
221 | from=-3
222 | activation=linear
223 |
224 | [convolutional]
225 | batch_normalize=1
226 | filters=128
227 | size=1
228 | stride=1
229 | pad=1
230 | activation=leaky
231 |
232 | [convolutional]
233 | batch_normalize=1
234 | filters=256
235 | size=3
236 | stride=1
237 | pad=1
238 | activation=leaky
239 |
240 | [shortcut]
241 | from=-3
242 | activation=linear
243 |
244 | [convolutional]
245 | batch_normalize=1
246 | filters=128
247 | size=1
248 | stride=1
249 | pad=1
250 | activation=leaky
251 |
252 | [convolutional]
253 | batch_normalize=1
254 | filters=256
255 | size=3
256 | stride=1
257 | pad=1
258 | activation=leaky
259 |
260 | [shortcut]
261 | from=-3
262 | activation=linear
263 |
264 | [convolutional]
265 | batch_normalize=1
266 | filters=128
267 | size=1
268 | stride=1
269 | pad=1
270 | activation=leaky
271 |
272 | [convolutional]
273 | batch_normalize=1
274 | filters=256
275 | size=3
276 | stride=1
277 | pad=1
278 | activation=leaky
279 |
280 | [shortcut]
281 | from=-3
282 | activation=linear
283 |
284 | # Downsample
285 |
286 | [convolutional]
287 | batch_normalize=1
288 | filters=512
289 | size=3
290 | stride=2
291 | pad=1
292 | activation=leaky
293 |
294 | [convolutional]
295 | batch_normalize=1
296 | filters=256
297 | size=1
298 | stride=1
299 | pad=1
300 | activation=leaky
301 |
302 | [convolutional]
303 | batch_normalize=1
304 | filters=512
305 | size=3
306 | stride=1
307 | pad=1
308 | activation=leaky
309 |
310 | [shortcut]
311 | from=-3
312 | activation=linear
313 |
314 |
315 | [convolutional]
316 | batch_normalize=1
317 | filters=256
318 | size=1
319 | stride=1
320 | pad=1
321 | activation=leaky
322 |
323 | [convolutional]
324 | batch_normalize=1
325 | filters=512
326 | size=3
327 | stride=1
328 | pad=1
329 | activation=leaky
330 |
331 | [shortcut]
332 | from=-3
333 | activation=linear
334 |
335 |
336 | [convolutional]
337 | batch_normalize=1
338 | filters=256
339 | size=1
340 | stride=1
341 | pad=1
342 | activation=leaky
343 |
344 | [convolutional]
345 | batch_normalize=1
346 | filters=512
347 | size=3
348 | stride=1
349 | pad=1
350 | activation=leaky
351 |
352 | [shortcut]
353 | from=-3
354 | activation=linear
355 |
356 |
357 | [convolutional]
358 | batch_normalize=1
359 | filters=256
360 | size=1
361 | stride=1
362 | pad=1
363 | activation=leaky
364 |
365 | [convolutional]
366 | batch_normalize=1
367 | filters=512
368 | size=3
369 | stride=1
370 | pad=1
371 | activation=leaky
372 |
373 | [shortcut]
374 | from=-3
375 | activation=linear
376 |
377 | [convolutional]
378 | batch_normalize=1
379 | filters=256
380 | size=1
381 | stride=1
382 | pad=1
383 | activation=leaky
384 |
385 | [convolutional]
386 | batch_normalize=1
387 | filters=512
388 | size=3
389 | stride=1
390 | pad=1
391 | activation=leaky
392 |
393 | [shortcut]
394 | from=-3
395 | activation=linear
396 |
397 |
398 | [convolutional]
399 | batch_normalize=1
400 | filters=256
401 | size=1
402 | stride=1
403 | pad=1
404 | activation=leaky
405 |
406 | [convolutional]
407 | batch_normalize=1
408 | filters=512
409 | size=3
410 | stride=1
411 | pad=1
412 | activation=leaky
413 |
414 | [shortcut]
415 | from=-3
416 | activation=linear
417 |
418 |
419 | [convolutional]
420 | batch_normalize=1
421 | filters=256
422 | size=1
423 | stride=1
424 | pad=1
425 | activation=leaky
426 |
427 | [convolutional]
428 | batch_normalize=1
429 | filters=512
430 | size=3
431 | stride=1
432 | pad=1
433 | activation=leaky
434 |
435 | [shortcut]
436 | from=-3
437 | activation=linear
438 |
439 | [convolutional]
440 | batch_normalize=1
441 | filters=256
442 | size=1
443 | stride=1
444 | pad=1
445 | activation=leaky
446 |
447 | [convolutional]
448 | batch_normalize=1
449 | filters=512
450 | size=3
451 | stride=1
452 | pad=1
453 | activation=leaky
454 |
455 | [shortcut]
456 | from=-3
457 | activation=linear
458 |
459 | # Downsample
460 |
461 | [convolutional]
462 | batch_normalize=1
463 | filters=1024
464 | size=3
465 | stride=2
466 | pad=1
467 | activation=leaky
468 |
469 | [convolutional]
470 | batch_normalize=1
471 | filters=512
472 | size=1
473 | stride=1
474 | pad=1
475 | activation=leaky
476 |
477 | [convolutional]
478 | batch_normalize=1
479 | filters=1024
480 | size=3
481 | stride=1
482 | pad=1
483 | activation=leaky
484 |
485 | [shortcut]
486 | from=-3
487 | activation=linear
488 |
489 | [convolutional]
490 | batch_normalize=1
491 | filters=512
492 | size=1
493 | stride=1
494 | pad=1
495 | activation=leaky
496 |
497 | [convolutional]
498 | batch_normalize=1
499 | filters=1024
500 | size=3
501 | stride=1
502 | pad=1
503 | activation=leaky
504 |
505 | [shortcut]
506 | from=-3
507 | activation=linear
508 |
509 | [convolutional]
510 | batch_normalize=1
511 | filters=512
512 | size=1
513 | stride=1
514 | pad=1
515 | activation=leaky
516 |
517 | [convolutional]
518 | batch_normalize=1
519 | filters=1024
520 | size=3
521 | stride=1
522 | pad=1
523 | activation=leaky
524 |
525 | [shortcut]
526 | from=-3
527 | activation=linear
528 |
529 | [convolutional]
530 | batch_normalize=1
531 | filters=512
532 | size=1
533 | stride=1
534 | pad=1
535 | activation=leaky
536 |
537 | [convolutional]
538 | batch_normalize=1
539 | filters=1024
540 | size=3
541 | stride=1
542 | pad=1
543 | activation=leaky
544 |
545 | [shortcut]
546 | from=-3
547 | activation=linear
548 |
549 | ######################
550 |
551 | [convolutional]
552 | batch_normalize=1
553 | filters=512
554 | size=1
555 | stride=1
556 | pad=1
557 | activation=leaky
558 |
559 | [convolutional]
560 | batch_normalize=1
561 | size=3
562 | stride=1
563 | pad=1
564 | filters=1024
565 | activation=leaky
566 |
567 | [convolutional]
568 | batch_normalize=1
569 | filters=512
570 | size=1
571 | stride=1
572 | pad=1
573 | activation=leaky
574 |
575 | ### SPP ###
576 | [maxpool]
577 | stride=1
578 | size=5
579 |
580 | [route]
581 | layers=-2
582 |
583 | [maxpool]
584 | stride=1
585 | size=9
586 |
587 | [route]
588 | layers=-4
589 |
590 | [maxpool]
591 | stride=1
592 | size=13
593 |
594 | [route]
595 | layers=-1,-3,-5,-6
596 |
597 | ### End SPP ###
598 |
599 | [convolutional]
600 | batch_normalize=1
601 | filters=512
602 | size=1
603 | stride=1
604 | pad=1
605 | activation=leaky
606 |
607 |
608 | [convolutional]
609 | batch_normalize=1
610 | size=3
611 | stride=1
612 | pad=1
613 | filters=1024
614 | activation=leaky
615 |
616 | [convolutional]
617 | batch_normalize=1
618 | filters=512
619 | size=1
620 | stride=1
621 | pad=1
622 | activation=leaky
623 |
624 | [convolutional]
625 | batch_normalize=1
626 | size=3
627 | stride=1
628 | pad=1
629 | filters=1024
630 | activation=leaky
631 |
632 | [convolutional]
633 | size=1
634 | stride=1
635 | pad=1
636 | filters=255
637 | activation=linear
638 |
639 |
640 | [yolo]
641 | mask = 6,7,8
642 | anchors = 9,11, 25,27, 33,63, 71,43, 62,120, 135,86, 123,199, 257,100, 264,223
643 | classes=80
644 | num=9
645 | jitter=.3
646 | ignore_thresh = .7
647 | truth_thresh = 1
648 | random=1
649 |
650 |
651 | [route]
652 | layers = -4
653 |
654 | [convolutional]
655 | batch_normalize=1
656 | filters=256
657 | size=1
658 | stride=1
659 | pad=1
660 | activation=leaky
661 |
662 | [upsample]
663 | stride=2
664 |
665 | [route]
666 | layers = -1, 61
667 |
668 |
669 |
670 | [convolutional]
671 | batch_normalize=1
672 | filters=256
673 | size=1
674 | stride=1
675 | pad=1
676 | activation=leaky
677 |
678 | [convolutional]
679 | batch_normalize=1
680 | size=3
681 | stride=1
682 | pad=1
683 | filters=512
684 | activation=leaky
685 |
686 | [convolutional]
687 | batch_normalize=1
688 | filters=256
689 | size=1
690 | stride=1
691 | pad=1
692 | activation=leaky
693 |
694 | [convolutional]
695 | batch_normalize=1
696 | size=3
697 | stride=1
698 | pad=1
699 | filters=512
700 | activation=leaky
701 |
702 | [convolutional]
703 | batch_normalize=1
704 | filters=256
705 | size=1
706 | stride=1
707 | pad=1
708 | activation=leaky
709 |
710 | [convolutional]
711 | batch_normalize=1
712 | size=3
713 | stride=1
714 | pad=1
715 | filters=512
716 | activation=leaky
717 |
718 | [convolutional]
719 | size=1
720 | stride=1
721 | pad=1
722 | filters=255
723 | activation=linear
724 |
725 |
726 | [yolo]
727 | mask = 3,4,5
728 | anchors = 9,11, 25,27, 33,63, 71,43, 62,120, 135,86, 123,199, 257,100, 264,223
729 | classes=80
730 | num=9
731 | jitter=.3
732 | ignore_thresh = .7
733 | truth_thresh = 1
734 | random=1
735 |
736 |
737 |
738 | [route]
739 | layers = -4
740 |
741 | [convolutional]
742 | batch_normalize=1
743 | filters=128
744 | size=1
745 | stride=1
746 | pad=1
747 | activation=leaky
748 |
749 | [upsample]
750 | stride=2
751 |
752 | [route]
753 | layers = -1, 36
754 |
755 |
756 |
757 | [convolutional]
758 | batch_normalize=1
759 | filters=128
760 | size=1
761 | stride=1
762 | pad=1
763 | activation=leaky
764 |
765 | [convolutional]
766 | batch_normalize=1
767 | size=3
768 | stride=1
769 | pad=1
770 | filters=256
771 | activation=leaky
772 |
773 | [convolutional]
774 | batch_normalize=1
775 | filters=128
776 | size=1
777 | stride=1
778 | pad=1
779 | activation=leaky
780 |
781 | [convolutional]
782 | batch_normalize=1
783 | size=3
784 | stride=1
785 | pad=1
786 | filters=256
787 | activation=leaky
788 |
789 | [convolutional]
790 | batch_normalize=1
791 | filters=128
792 | size=1
793 | stride=1
794 | pad=1
795 | activation=leaky
796 |
797 | [convolutional]
798 | batch_normalize=1
799 | size=3
800 | stride=1
801 | pad=1
802 | filters=256
803 | activation=leaky
804 |
805 | [convolutional]
806 | size=1
807 | stride=1
808 | pad=1
809 | filters=255
810 | activation=linear
811 |
812 |
813 | [yolo]
814 | mask = 0,1,2
815 | anchors = 9,11, 25,27, 33,63, 71,43, 62,120, 135,86, 123,199, 257,100, 264,223
816 | classes=80
817 | num=9
818 | jitter=.3
819 | ignore_thresh = .7
820 | truth_thresh = 1
821 | random=1
822 |
--------------------------------------------------------------------------------
/data/coco.data:
--------------------------------------------------------------------------------
1 | classes=80
2 | train=../coco/trainvalno5k.txt
3 | valid=../coco/5k.txt
4 | names=data/coco.names
5 | backup=backup/
6 | eval=coco
7 |
--------------------------------------------------------------------------------
/data/coco.names:
--------------------------------------------------------------------------------
1 | person
2 | bicycle
3 | car
4 | motorcycle
5 | airplane
6 | bus
7 | train
8 | truck
9 | boat
10 | traffic light
11 | fire hydrant
12 | stop sign
13 | parking meter
14 | bench
15 | bird
16 | cat
17 | dog
18 | horse
19 | sheep
20 | cow
21 | elephant
22 | bear
23 | zebra
24 | giraffe
25 | backpack
26 | umbrella
27 | handbag
28 | tie
29 | suitcase
30 | frisbee
31 | skis
32 | snowboard
33 | sports ball
34 | kite
35 | baseball bat
36 | baseball glove
37 | skateboard
38 | surfboard
39 | tennis racket
40 | bottle
41 | wine glass
42 | cup
43 | fork
44 | knife
45 | spoon
46 | bowl
47 | banana
48 | apple
49 | sandwich
50 | orange
51 | broccoli
52 | carrot
53 | hot dog
54 | pizza
55 | donut
56 | cake
57 | chair
58 | couch
59 | potted plant
60 | bed
61 | dining table
62 | toilet
63 | tv
64 | laptop
65 | mouse
66 | remote
67 | keyboard
68 | cell phone
69 | microwave
70 | oven
71 | toaster
72 | sink
73 | refrigerator
74 | book
75 | clock
76 | vase
77 | scissors
78 | teddy bear
79 | hair drier
80 | toothbrush
81 |
--------------------------------------------------------------------------------
/data/coco_1000img.data:
--------------------------------------------------------------------------------
1 | classes=80
2 | train=./data/coco_1000img.txt
3 | valid=./data/coco_1000img.txt
4 | names=data/coco.names
5 | backup=backup/
6 | eval=coco
7 |
--------------------------------------------------------------------------------
/data/coco_1000val.data:
--------------------------------------------------------------------------------
1 | classes=80
2 | train=./data/coco_1000img.txt
3 | valid=./data/coco_1000val.txt
4 | names=data/coco.names
5 | backup=backup/
6 | eval=coco
7 |
--------------------------------------------------------------------------------
/data/coco_16img.data:
--------------------------------------------------------------------------------
1 | classes=80
2 | train=./data/coco_16img.txt
3 | valid=./data/coco_16img.txt
4 | names=data/coco.names
5 | backup=backup/
6 | eval=coco
7 |
--------------------------------------------------------------------------------
/data/coco_16img.txt:
--------------------------------------------------------------------------------
1 | ../coco/images/train2014/COCO_train2014_000000000009.jpg
2 | ../coco/images/train2014/COCO_train2014_000000000025.jpg
3 | ../coco/images/train2014/COCO_train2014_000000000030.jpg
4 | ../coco/images/train2014/COCO_train2014_000000000034.jpg
5 | ../coco/images/train2014/COCO_train2014_000000000036.jpg
6 | ../coco/images/train2014/COCO_train2014_000000000049.jpg
7 | ../coco/images/train2014/COCO_train2014_000000000061.jpg
8 | ../coco/images/train2014/COCO_train2014_000000000064.jpg
9 | ../coco/images/train2014/COCO_train2014_000000000071.jpg
10 | ../coco/images/train2014/COCO_train2014_000000000072.jpg
11 | ../coco/images/train2014/COCO_train2014_000000000077.jpg
12 | ../coco/images/train2014/COCO_train2014_000000000078.jpg
13 | ../coco/images/train2014/COCO_train2014_000000000081.jpg
14 | ../coco/images/train2014/COCO_train2014_000000000086.jpg
15 | ../coco/images/train2014/COCO_train2014_000000000089.jpg
16 | ../coco/images/train2014/COCO_train2014_000000000092.jpg
17 |
--------------------------------------------------------------------------------
/data/coco_1cls.data:
--------------------------------------------------------------------------------
1 | classes=1
2 | train=./data/coco_1cls.txt
3 | valid=./data/coco_1cls.txt
4 | names=data/coco.names
5 | backup=backup/
6 | eval=coco
7 |
--------------------------------------------------------------------------------
/data/coco_1cls.txt:
--------------------------------------------------------------------------------
1 | ../coco/images/val2014/COCO_val2014_000000013992.jpg
2 | ../coco/images/val2014/COCO_val2014_000000047226.jpg
3 | ../coco/images/val2014/COCO_val2014_000000050324.jpg
4 | ../coco/images/val2014/COCO_val2014_000000121497.jpg
5 | ../coco/images/val2014/COCO_val2014_000000001464.jpg
6 |
--------------------------------------------------------------------------------
/data/coco_1img.data:
--------------------------------------------------------------------------------
1 | classes=80
2 | train=./data/coco_1img.txt
3 | valid=./data/coco_1img.txt
4 | names=data/coco.names
5 | backup=backup/
6 | eval=coco
7 |
--------------------------------------------------------------------------------
/data/coco_1img.txt:
--------------------------------------------------------------------------------
1 | ../coco/images/val2014/COCO_val2014_000000581886.jpg
2 |
--------------------------------------------------------------------------------
/data/coco_1k5k.data:
--------------------------------------------------------------------------------
1 | classes=80
2 | train=./data/coco_1000img.txt
3 | valid=./data/5k.txt
4 | names=data/coco.names
5 | backup=backup/
6 | eval=coco
7 |
--------------------------------------------------------------------------------
/data/coco_32img.data:
--------------------------------------------------------------------------------
1 | classes=80
2 | train=./data/coco_32img.txt
3 | valid=./data/coco_32img.txt
4 | names=data/coco.names
5 | backup=backup/
6 | eval=coco
7 |
--------------------------------------------------------------------------------
/data/coco_32img.txt:
--------------------------------------------------------------------------------
1 | ../coco/images/train2014/COCO_train2014_000000000009.jpg
2 | ../coco/images/train2014/COCO_train2014_000000000025.jpg
3 | ../coco/images/train2014/COCO_train2014_000000000030.jpg
4 | ../coco/images/train2014/COCO_train2014_000000000034.jpg
5 | ../coco/images/train2014/COCO_train2014_000000000036.jpg
6 | ../coco/images/train2014/COCO_train2014_000000000049.jpg
7 | ../coco/images/train2014/COCO_train2014_000000000061.jpg
8 | ../coco/images/train2014/COCO_train2014_000000000064.jpg
9 | ../coco/images/train2014/COCO_train2014_000000000071.jpg
10 | ../coco/images/train2014/COCO_train2014_000000000072.jpg
11 | ../coco/images/train2014/COCO_train2014_000000000077.jpg
12 | ../coco/images/train2014/COCO_train2014_000000000078.jpg
13 | ../coco/images/train2014/COCO_train2014_000000000081.jpg
14 | ../coco/images/train2014/COCO_train2014_000000000086.jpg
15 | ../coco/images/train2014/COCO_train2014_000000000089.jpg
16 | ../coco/images/train2014/COCO_train2014_000000000092.jpg
17 | ../coco/images/train2014/COCO_train2014_000000000094.jpg
18 | ../coco/images/train2014/COCO_train2014_000000000109.jpg
19 | ../coco/images/train2014/COCO_train2014_000000000110.jpg
20 | ../coco/images/train2014/COCO_train2014_000000000113.jpg
21 | ../coco/images/train2014/COCO_train2014_000000000127.jpg
22 | ../coco/images/train2014/COCO_train2014_000000000138.jpg
23 | ../coco/images/train2014/COCO_train2014_000000000142.jpg
24 | ../coco/images/train2014/COCO_train2014_000000000144.jpg
25 | ../coco/images/train2014/COCO_train2014_000000000149.jpg
26 | ../coco/images/train2014/COCO_train2014_000000000151.jpg
27 | ../coco/images/train2014/COCO_train2014_000000000154.jpg
28 | ../coco/images/train2014/COCO_train2014_000000000165.jpg
29 | ../coco/images/train2014/COCO_train2014_000000000194.jpg
30 | ../coco/images/train2014/COCO_train2014_000000000201.jpg
31 | ../coco/images/train2014/COCO_train2014_000000000247.jpg
32 | ../coco/images/train2014/COCO_train2014_000000000260.jpg
33 |
--------------------------------------------------------------------------------
/data/coco_500val.data:
--------------------------------------------------------------------------------
1 | classes=80
2 | train=./data/coco_500img.txt
3 | valid=./data/coco_500val.txt
4 | names=data/coco.names
5 | backup=backup/
6 | eval=coco
7 |
--------------------------------------------------------------------------------
/data/coco_64img.data:
--------------------------------------------------------------------------------
1 | classes=80
2 | train=./data/coco_64img.txt
3 | valid=./data/coco_64img.txt
4 | names=data/coco.names
5 | backup=backup/
6 | eval=coco
7 |
--------------------------------------------------------------------------------
/data/coco_64img.txt:
--------------------------------------------------------------------------------
1 | ../coco/images/train2014/COCO_train2014_000000000009.jpg
2 | ../coco/images/train2014/COCO_train2014_000000000025.jpg
3 | ../coco/images/train2014/COCO_train2014_000000000030.jpg
4 | ../coco/images/train2014/COCO_train2014_000000000034.jpg
5 | ../coco/images/train2014/COCO_train2014_000000000036.jpg
6 | ../coco/images/train2014/COCO_train2014_000000000049.jpg
7 | ../coco/images/train2014/COCO_train2014_000000000061.jpg
8 | ../coco/images/train2014/COCO_train2014_000000000064.jpg
9 | ../coco/images/train2014/COCO_train2014_000000000071.jpg
10 | ../coco/images/train2014/COCO_train2014_000000000072.jpg
11 | ../coco/images/train2014/COCO_train2014_000000000077.jpg
12 | ../coco/images/train2014/COCO_train2014_000000000078.jpg
13 | ../coco/images/train2014/COCO_train2014_000000000081.jpg
14 | ../coco/images/train2014/COCO_train2014_000000000086.jpg
15 | ../coco/images/train2014/COCO_train2014_000000000089.jpg
16 | ../coco/images/train2014/COCO_train2014_000000000092.jpg
17 | ../coco/images/train2014/COCO_train2014_000000000094.jpg
18 | ../coco/images/train2014/COCO_train2014_000000000109.jpg
19 | ../coco/images/train2014/COCO_train2014_000000000110.jpg
20 | ../coco/images/train2014/COCO_train2014_000000000113.jpg
21 | ../coco/images/train2014/COCO_train2014_000000000127.jpg
22 | ../coco/images/train2014/COCO_train2014_000000000138.jpg
23 | ../coco/images/train2014/COCO_train2014_000000000142.jpg
24 | ../coco/images/train2014/COCO_train2014_000000000144.jpg
25 | ../coco/images/train2014/COCO_train2014_000000000149.jpg
26 | ../coco/images/train2014/COCO_train2014_000000000151.jpg
27 | ../coco/images/train2014/COCO_train2014_000000000154.jpg
28 | ../coco/images/train2014/COCO_train2014_000000000165.jpg
29 | ../coco/images/train2014/COCO_train2014_000000000194.jpg
30 | ../coco/images/train2014/COCO_train2014_000000000201.jpg
31 | ../coco/images/train2014/COCO_train2014_000000000247.jpg
32 | ../coco/images/train2014/COCO_train2014_000000000260.jpg
33 | ../coco/images/train2014/COCO_train2014_000000000263.jpg
34 | ../coco/images/train2014/COCO_train2014_000000000307.jpg
35 | ../coco/images/train2014/COCO_train2014_000000000308.jpg
36 | ../coco/images/train2014/COCO_train2014_000000000309.jpg
37 | ../coco/images/train2014/COCO_train2014_000000000312.jpg
38 | ../coco/images/train2014/COCO_train2014_000000000315.jpg
39 | ../coco/images/train2014/COCO_train2014_000000000321.jpg
40 | ../coco/images/train2014/COCO_train2014_000000000322.jpg
41 | ../coco/images/train2014/COCO_train2014_000000000326.jpg
42 | ../coco/images/train2014/COCO_train2014_000000000332.jpg
43 | ../coco/images/train2014/COCO_train2014_000000000349.jpg
44 | ../coco/images/train2014/COCO_train2014_000000000368.jpg
45 | ../coco/images/train2014/COCO_train2014_000000000370.jpg
46 | ../coco/images/train2014/COCO_train2014_000000000382.jpg
47 | ../coco/images/train2014/COCO_train2014_000000000384.jpg
48 | ../coco/images/train2014/COCO_train2014_000000000389.jpg
49 | ../coco/images/train2014/COCO_train2014_000000000394.jpg
50 | ../coco/images/train2014/COCO_train2014_000000000404.jpg
51 | ../coco/images/train2014/COCO_train2014_000000000419.jpg
52 | ../coco/images/train2014/COCO_train2014_000000000431.jpg
53 | ../coco/images/train2014/COCO_train2014_000000000436.jpg
54 | ../coco/images/train2014/COCO_train2014_000000000438.jpg
55 | ../coco/images/train2014/COCO_train2014_000000000443.jpg
56 | ../coco/images/train2014/COCO_train2014_000000000446.jpg
57 | ../coco/images/train2014/COCO_train2014_000000000450.jpg
58 | ../coco/images/train2014/COCO_train2014_000000000471.jpg
59 | ../coco/images/train2014/COCO_train2014_000000000490.jpg
60 | ../coco/images/train2014/COCO_train2014_000000000491.jpg
61 | ../coco/images/train2014/COCO_train2014_000000000510.jpg
62 | ../coco/images/train2014/COCO_train2014_000000000514.jpg
63 | ../coco/images/train2014/COCO_train2014_000000000529.jpg
64 | ../coco/images/train2014/COCO_train2014_000000000531.jpg
65 |
--------------------------------------------------------------------------------
/data/coco_paper.names:
--------------------------------------------------------------------------------
1 | person
2 | bicycle
3 | car
4 | motorcycle
5 | airplane
6 | bus
7 | train
8 | truck
9 | boat
10 | traffic light
11 | fire hydrant
12 | street sign
13 | stop sign
14 | parking meter
15 | bench
16 | bird
17 | cat
18 | dog
19 | horse
20 | sheep
21 | cow
22 | elephant
23 | bear
24 | zebra
25 | giraffe
26 | hat
27 | backpack
28 | umbrella
29 | shoe
30 | eye glasses
31 | handbag
32 | tie
33 | suitcase
34 | frisbee
35 | skis
36 | snowboard
37 | sports ball
38 | kite
39 | baseball bat
40 | baseball glove
41 | skateboard
42 | surfboard
43 | tennis racket
44 | bottle
45 | plate
46 | wine glass
47 | cup
48 | fork
49 | knife
50 | spoon
51 | bowl
52 | banana
53 | apple
54 | sandwich
55 | orange
56 | broccoli
57 | carrot
58 | hot dog
59 | pizza
60 | donut
61 | cake
62 | chair
63 | couch
64 | potted plant
65 | bed
66 | mirror
67 | dining table
68 | window
69 | desk
70 | toilet
71 | door
72 | tv
73 | laptop
74 | mouse
75 | remote
76 | keyboard
77 | cell phone
78 | microwave
79 | oven
80 | toaster
81 | sink
82 | refrigerator
83 | blender
84 | book
85 | clock
86 | vase
87 | scissors
88 | teddy bear
89 | hair drier
90 | toothbrush
91 | hair brush
--------------------------------------------------------------------------------
/data/converter.py:
--------------------------------------------------------------------------------
1 | import scipy.io as sio
2 | from PIL import Image
3 | import os, glob
4 | import datetime
5 | import shutil
6 |
7 | running_from_path = os.getcwd()
8 | created_images_dir = 'images'
9 | created_labels_dir = 'labels'
10 | data_dir = 'data' # data_dir为脚本所在的文件夹
11 |
12 | def hms_string(sec_elapsed): # 格式化显示已消耗时间
13 | h = int(sec_elapsed / (60 * 60))
14 | m = int((sec_elapsed % (60 * 60)) / 60)
15 | s = sec_elapsed % 60.
16 | return "{}:{:>02}:{:>05.2f}".format(h, m, s)
17 |
18 | def generate_dir(set_name, root_path): # 往images和labels文件夹下生成相应的文件夹
19 | images_dir = os.path.join(root_path, 'images')
20 | annotation_dir = os.path.join(root_path, 'annotations')
21 |
22 | new_images_dir = os.path.join(created_images_dir, set_name) # 将图片从原来的文件夹复制到该文件夹下
23 | new_annotation_dir = os.path.join(created_labels_dir, set_name)
24 |
25 | if not os.path.exists(new_images_dir):
26 | os.makedirs(new_images_dir)
27 |
28 | if not os.path.exists(new_annotation_dir):
29 | os.makedirs(new_annotation_dir)
30 |
31 | for img in glob.glob(os.path.join(images_dir, "*.jpg")): # 将图片从原来的文件夹复制到新文件夹下
32 | shutil.copy(img, new_images_dir)
33 |
34 | os.chdir(annotation_dir) # 切换到annotation的路径下
35 | matlab_annotations = glob.glob("*.mat") # 仅仅包含文件名,不包含路径
36 | os.chdir(running_from_path) # 切换回原来的路径
37 |
38 | for matfile in matlab_annotations:
39 | filename = matfile.split(".")[0]
40 |
41 | pil_image = Image.open(os.path.join(images_dir, filename+".jpg"))
42 |
43 | content = sio.loadmat(os.path.join(annotation_dir, matfile), matlab_compatible=False)
44 |
45 | boxes = content["boxes"]
46 |
47 | width, height = pil_image.size
48 |
49 | with open(os.path.join(new_annotation_dir, filename+".txt"), "w") as hs:
50 | for box_idx, box in enumerate(boxes.T):
51 | a = box[0][0][0][0]
52 | b = box[0][0][0][1]
53 | c = box[0][0][0][2]
54 | d = box[0][0][0][3]
55 |
56 | aXY = (a[0][1], a[0][0])
57 | bXY = (b[0][1], b[0][0])
58 | cXY = (c[0][1], c[0][0])
59 | dXY = (d[0][1], d[0][0])
60 |
61 | maxX = max(aXY[0], bXY[0], cXY[0], dXY[0])
62 | minX = min(aXY[0], bXY[0], cXY[0], dXY[0])
63 | maxY = max(aXY[1], bXY[1], cXY[1], dXY[1])
64 | minY = min(aXY[1], bXY[1], cXY[1], dXY[1])
65 |
66 | # clip,防止超出边界
67 | maxX = min(maxX, width-1)
68 | minX = max(minX, 0)
69 | maxY = min(maxY, height-1)
70 | minY = max(minY, 0)
71 |
72 | # ( / )
73 | norm_width = (maxX - minX) / width
74 |
75 | # ( / )
76 | norm_height = (maxY - minY) / height
77 |
78 | center_x, center_y = (maxX + minX) / 2, (maxY + minY) / 2
79 |
80 | norm_center_x = center_x / width
81 | norm_center_y = center_y / height
82 |
83 | if box_idx != 0:
84 | hs.write("\n")
85 |
86 | hs.write("0 %f %f %f %f" % (norm_center_x, norm_center_y, norm_width, norm_height)) # 0表示类别
87 |
88 | def create_txt(dirlist, filename):
89 | with open(filename, "w") as txtfile: # 在data文件夹下生成txt文件
90 | imglist = []
91 |
92 | for dir in dirlist: # dir='images/test'
93 | imglist.extend(glob.glob(os.path.join(dir, "*.jpg"))) # img='images/test/abc.jpg'
94 |
95 | for idx, img in enumerate(imglist):
96 | if idx != 0:
97 | txtfile.write("\n")
98 | txtfile.write(os.path.join(data_dir, img)) # 加上前缀data
99 |
100 | if __name__ == '__main__':
101 | start_time = datetime.datetime.now()
102 |
103 | generate_dir("train", "hand_dataset/training_dataset/training_data") # 第一个参数表示生成的文件夹的名称
104 | generate_dir("test", "hand_dataset/test_dataset/test_data")
105 | generate_dir("validation", "hand_dataset/validation_dataset/validation_data")
106 |
107 | create_txt((os.path.join(created_images_dir, 'train'), # 将train和validation文件夹下的图片合并成train
108 | os.path.join(created_images_dir, 'validation')),
109 | 'train.txt')
110 | create_txt((os.path.join(created_images_dir, 'test'), ),
111 | 'valid.txt')
112 |
113 | end_time = datetime.datetime.now()
114 | seconds_elapsed = (end_time - start_time).total_seconds()
115 | print("It took {} to execute this".format(hms_string(seconds_elapsed)))
--------------------------------------------------------------------------------
/data/get_coco_dataset.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # CREDIT: https://github.com/pjreddie/darknet/tree/master/scripts/get_coco_dataset.sh
3 |
4 | # Clone COCO API
5 | git clone https://github.com/pdollar/coco && cd coco
6 |
7 | # Download Images
8 | mkdir images && cd images
9 | wget -c https://pjreddie.com/media/files/train2014.zip
10 | wget -c https://pjreddie.com/media/files/val2014.zip
11 |
12 | # Unzip
13 | unzip -q train2014.zip
14 | unzip -q val2014.zip
15 |
16 | # (optional) Delete zip files
17 | rm -rf *.zip
18 |
19 | cd ..
20 |
21 | # Download COCO Metadata
22 | wget -c https://pjreddie.com/media/files/instances_train-val2014.zip
23 | wget -c https://pjreddie.com/media/files/coco/5k.part
24 | wget -c https://pjreddie.com/media/files/coco/trainvalno5k.part
25 | wget -c https://pjreddie.com/media/files/coco/labels.tgz
26 | tar xzf labels.tgz
27 | unzip -q instances_train-val2014.zip
28 |
29 | # Set Up Image Lists
30 | paste <(awk "{print \"$PWD\"}" <5k.part) 5k.part | tr -d '\t' > 5k.txt
31 | paste <(awk "{print \"$PWD\"}" trainvalno5k.txt
32 |
33 | # get xview training data
34 | # wget -O train_images.tgz 'https://d307kc0mrhucc3.cloudfront.net/train_images.tgz?Expires=1530124049&Signature=JrQoxipmsETvb7eQHCfDFUO-QEHJGAayUv0i-ParmS-1hn7hl9D~bzGuHWG82imEbZSLUARTtm0wOJ7EmYMGmG5PtLKz9H5qi6DjoSUuFc13NQ-~6yUhE~NfPaTnehUdUMCa3On2wl1h1ZtRG~0Jq1P-AJbpe~oQxbyBrs1KccaMa7FK4F4oMM6sMnNgoXx8-3O77kYw~uOpTMFmTaQdHln6EztW0Lx17i57kK3ogbSUpXgaUTqjHCRA1dWIl7PY1ngQnLslkLhZqmKcaL-BvWf0ZGjHxCDQBpnUjIlvMu5NasegkwD9Jjc0ClgTxsttSkmbapVqaVC8peR0pO619Q__&Key-Pair-Id=APKAIKGDJB5C3XUL2DXQ'
35 | # tar -xvzf train_images.tgz
36 | # sudo rm -rf train_images/._*
37 | # lastly convert each .tif to a .bmp for faster loading in cv2
38 |
39 | # ./coco/images/train2014/COCO_train2014_000000167126.jpg # corrupted image
40 |
--------------------------------------------------------------------------------
/data/get_coco_dataset_gdrive.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # https://stackoverflow.com/questions/48133080/how-to-download-a-google-drive-url-via-curl-or-wget/48133859
3 |
4 | # Zip coco folder
5 | # zip -r coco.zip coco
6 | # tar -czvf coco.tar.gz coco
7 |
8 | # Set fileid and filename
9 | filename="coco.zip"
10 | fileid="1HaXkef9z6y5l4vUnCYgdmEAj61c6bfWO" # coco.zip
11 |
12 | # Download from Google Drive, accepting presented query
13 | curl -c ./cookie -s -L "https://drive.google.com/uc?export=download&id=${fileid}" > /dev/null
14 | curl -Lb ./cookie "https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=${fileid}" -o ${filename}
15 | rm ./cookie
16 |
17 | # Unzip
18 | unzip -q ${filename} # for coco.zip
19 | # tar -xzf ${filename} # for coco.tar.gz
20 |
--------------------------------------------------------------------------------
/data/oxfordhand.data:
--------------------------------------------------------------------------------
1 | classes= 1
2 | train=data/train.txt
3 | valid=data/valid.txt
4 | names=data/oxfordhand.names
5 |
--------------------------------------------------------------------------------
/data/oxfordhand.names:
--------------------------------------------------------------------------------
1 | hand
2 |
3 |
--------------------------------------------------------------------------------
/data/samples/bus.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coldlarry/YOLOv3-complete-pruning/90c5869d265a632829ba7798598aa2798e4f5227/data/samples/bus.jpg
--------------------------------------------------------------------------------
/data/samples/zidane.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coldlarry/YOLOv3-complete-pruning/90c5869d265a632829ba7798598aa2798e4f5227/data/samples/zidane.jpg
--------------------------------------------------------------------------------
/detect.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | from sys import platform
3 |
4 | from models import * # set ONNX_EXPORT in models.py
5 | from utils.datasets import *
6 | from utils.utils import *
7 |
8 |
9 | def detect(save_txt=False, save_img=False):
10 | img_size = (320, 192) if ONNX_EXPORT else opt.img_size # (320, 192) or (416, 256) or (608, 352) for (height, width)
11 | out, source, weights, half, view_img = opt.output, opt.source, opt.weights, opt.half, opt.view_img
12 | webcam = source == '0' or source.startswith('rtsp') or source.startswith('http') or source.endswith('.txt')
13 |
14 | # Initialize
15 | device = torch_utils.select_device(device='cpu' if ONNX_EXPORT else opt.device)
16 | if os.path.exists(out):
17 | shutil.rmtree(out) # delete output folder
18 | os.makedirs(out) # make new output folder
19 |
20 | # Initialize model
21 | model = Darknet(opt.cfg, img_size)
22 |
23 | # Load weights
24 | attempt_download(weights)
25 | if weights.endswith('.pt'): # pytorch format
26 | model.load_state_dict(torch.load(weights, map_location=device)['model'])
27 | else: # darknet format
28 | _ = load_darknet_weights(model, weights)
29 |
30 | # Second-stage classifier
31 | classify = False
32 | if classify:
33 | modelc = torch_utils.load_classifier(name='resnet101', n=2) # initialize
34 | modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights
35 | modelc.to(device).eval()
36 |
37 | # Fuse Conv2d + BatchNorm2d layers
38 | # model.fuse()
39 |
40 | # Eval mode
41 | model.to(device).eval()
42 |
43 | # Export mode
44 | if ONNX_EXPORT:
45 | img = torch.zeros((1, 3) + img_size) # (1, 3, 320, 192)
46 | torch.onnx.export(model, img, 'weights/export.onnx', verbose=True)
47 | return
48 |
49 | # Half precision
50 | half = half and device.type != 'cpu' # half precision only supported on CUDA
51 | if half:
52 | model.half()
53 |
54 | # Set Dataloader
55 | vid_path, vid_writer = None, None
56 | if webcam:
57 | view_img = True
58 | torch.backends.cudnn.benchmark = True # set True to speed up constant image size inference
59 | dataset = LoadStreams(source, img_size=img_size, half=half)
60 | else:
61 | save_img = True
62 | dataset = LoadImages(source, img_size=img_size, half=half)
63 |
64 | # Get classes and colors
65 | classes = load_classes(parse_data_cfg(opt.data)['names'])
66 | colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(classes))]
67 |
68 | # Run inference
69 | t0 = time.time()
70 | for path, img, im0s, vid_cap in dataset:
71 | t = time.time()
72 |
73 | # Get detections
74 | img = torch.from_numpy(img).to(device)
75 | if img.ndimension() == 3:
76 | img = img.unsqueeze(0)
77 | pred = model(img)[0]
78 |
79 | if opt.half:
80 | pred = pred.float()
81 |
82 | # Apply NMS
83 | pred = non_max_suppression(pred, opt.conf_thres, opt.nms_thres)
84 |
85 | # Apply
86 | if classify:
87 | pred = apply_classifier(pred, modelc, img, im0s)
88 |
89 | # Process detections
90 | for i, det in enumerate(pred): # detections per image
91 | if webcam: # batch_size >= 1
92 | p, s, im0 = path[i], '%g: ' % i, im0s[i]
93 | else:
94 | p, s, im0 = path, '', im0s
95 |
96 | save_path = str(Path(out) / Path(p).name)
97 | s += '%gx%g ' % img.shape[2:] # print string
98 | if det is not None and len(det):
99 | # Rescale boxes from img_size to im0 size
100 | det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
101 |
102 | # Print results
103 | for c in det[:, -1].unique():
104 | n = (det[:, -1] == c).sum() # detections per class
105 | s += '%g %ss, ' % (n, classes[int(c)]) # add to string
106 |
107 | # Write results
108 | for *xyxy, conf, _, cls in det:
109 | if save_txt: # Write to file
110 | with open(save_path + '.txt', 'a') as file:
111 | file.write(('%g ' * 6 + '\n') % (*xyxy, cls, conf))
112 |
113 | if save_img or view_img: # Add bbox to image
114 | label = '%s %.2f' % (classes[int(cls)], conf)
115 | #plot_one_box(xyxy, im0, label=label, color=colors[int(cls)])
116 | plot_one_box(xyxy, im0, label=None, color=colors[int(cls)])
117 |
118 | print('%sDone. (%.3fs)' % (s, time.time() - t))
119 |
120 | # Stream results
121 | if view_img:
122 | cv2.imshow(p, im0)
123 |
124 | # Save results (image with detections)
125 | if save_img:
126 | if dataset.mode == 'images':
127 | cv2.imwrite(save_path, im0)
128 | else:
129 | if vid_path != save_path: # new video
130 | vid_path = save_path
131 | if isinstance(vid_writer, cv2.VideoWriter):
132 | vid_writer.release() # release previous video writer
133 |
134 | fps = vid_cap.get(cv2.CAP_PROP_FPS)
135 | w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
136 | h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
137 | vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h))
138 | vid_writer.write(im0)
139 |
140 | if save_txt or save_img:
141 | print('Results saved to %s' % os.getcwd() + os.sep + out)
142 | if platform == 'darwin': # MacOS
143 | os.system('open ' + out + ' ' + save_path)
144 |
145 | print('Done. (%.3fs)' % (time.time() - t0))
146 |
147 |
148 | if __name__ == '__main__':
149 | parser = argparse.ArgumentParser()
150 | parser.add_argument('--cfg', type=str, default='cfg/yolov3-spp.cfg', help='cfg file path')
151 | parser.add_argument('--data', type=str, default='data/coco.data', help='coco.data file path')
152 | parser.add_argument('--weights', type=str, default='weights/yolov3-spp.weights', help='path to weights file')
153 | parser.add_argument('--source', type=str, default='data/samples', help='source') # input file/folder, 0 for webcam
154 | parser.add_argument('--output', type=str, default='output', help='output folder') # output folder
155 | parser.add_argument('--img-size', type=int, default=416, help='inference size (pixels)')
156 | parser.add_argument('--conf-thres', type=float, default=0.3, help='object confidence threshold')
157 | parser.add_argument('--nms-thres', type=float, default=0.5, help='iou threshold for non-maximum suppression')
158 | parser.add_argument('--fourcc', type=str, default='mp4v', help='output video codec (verify ffmpeg support)')
159 | parser.add_argument('--half', action='store_true', help='half precision FP16 inference')
160 | parser.add_argument('--device', default='', help='device id (i.e. 0 or 0,1) or cpu')
161 | parser.add_argument('--view-img', action='store_true', help='display results')
162 | opt = parser.parse_args()
163 | print(opt)
164 |
165 | with torch.no_grad():
166 | detect()
167 |
--------------------------------------------------------------------------------
/github_files/64067835-51d5b500-cc2f-11e9-982e-843f7f9a6ea2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coldlarry/YOLOv3-complete-pruning/90c5869d265a632829ba7798598aa2798e4f5227/github_files/64067835-51d5b500-cc2f-11e9-982e-843f7f9a6ea2.jpg
--------------------------------------------------------------------------------
/normal_prune.py:
--------------------------------------------------------------------------------
1 | from models import *
2 | from utils.utils import *
3 | import torch
4 | import numpy as np
5 | from copy import deepcopy
6 | from test import test
7 | from terminaltables import AsciiTable
8 | import time
9 | from utils.utils import *
10 | from utils.prune_utils import *
11 | import os
12 |
13 |
14 | class opt():
15 | model_def = "cfg/yolov3-hand.cfg"
16 | data_config = "cfg/oxfordhand.data"
17 | model = 'weights/last.pt'
18 |
19 | #指定GPU
20 | #torch.cuda.set_device(2)
21 | percent = 0.5
22 |
23 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
24 | model = Darknet(opt.model_def).to(device)
25 |
26 | if opt.model:
27 | if opt.model.endswith(".pt"):
28 | model.load_state_dict(torch.load(opt.model, map_location=device)['model'])
29 | else:
30 | _ = load_darknet_weights(model, opt.model)
31 |
32 |
33 | data_config = parse_data_cfg(opt.data_config)
34 |
35 | valid_path = data_config["valid"]
36 | class_names = load_classes(data_config["names"])
37 |
38 |
39 | eval_model = lambda model:test(model=model,cfg=opt.model_def, data=opt.data_config)
40 |
41 |
42 | obtain_num_parameters = lambda model:sum([param.nelement() for param in model.parameters()])
43 |
44 | #这个不应该注释掉,等会要恢复
45 | with torch.no_grad():
46 | origin_model_metric = eval_model(model)
47 | origin_nparameters = obtain_num_parameters(model)
48 |
49 | CBL_idx, Conv_idx, prune_idx= parse_module_defs(model.module_defs)
50 |
51 |
52 | #将所有要剪枝的BN层的α参数,拷贝到bn_weights列表
53 | bn_weights = gather_bn_weights(model.module_list, prune_idx)
54 |
55 | #torch.sort返回二维列表,第一维是排序后的值列表,第二维是排序后的值列表对应的索引
56 | sorted_bn = torch.sort(bn_weights)[0]
57 |
58 |
59 | #避免剪掉所有channel的最高阈值(每个BN层的gamma的最大值的最小值即为阈值上限)
60 | highest_thre = []
61 | for idx in prune_idx:
62 | #.item()可以得到张量里的元素值
63 | highest_thre.append(model.module_list[idx][1].weight.data.abs().max().item())
64 | highest_thre = min(highest_thre)
65 |
66 | # 找到highest_thre对应的下标对应的百分比
67 | percent_limit = (sorted_bn==highest_thre).nonzero().item()/len(bn_weights)
68 |
69 | print(f'Threshold should be less than {highest_thre:.4f}.')
70 | print(f'The corresponding prune ratio is {percent_limit:.3f}.')
71 |
72 |
73 | # 该函数有很重要的意义:
74 | # ①先用深拷贝将原始模型拷贝下来,得到model_copy
75 | # ②将model_copy中,BN层中低于阈值的α参数赋值为0
76 | # ③在BN层中,输出y=α*x+β,由于α参数的值被赋值为0,因此输入仅加了一个偏置β
77 | # ④很神奇的是,network slimming中是将α参数和β参数都置0,该处只将α参数置0,但效果却很好:其实在另外一篇论文中,已经提到,可以先将β参数的效果移到
78 | # 下一层卷积层,再去剪掉本层的α参数
79 |
80 | # 该函数用最简单的方法,让我们看到了,如何快速看到剪枝后的效果
81 |
82 |
83 |
84 | def prune_and_eval(model, sorted_bn, percent=.0):
85 | model_copy = deepcopy(model)
86 | thre_index = int(len(sorted_bn) * percent)
87 | #获得α参数的阈值,小于该值的α参数对应的通道,全部裁剪掉
88 | thre = sorted_bn[thre_index]
89 |
90 | print(f'Channels with Gamma value less than {thre:.4f} are pruned!')
91 |
92 | remain_num = 0
93 | for idx in prune_idx:
94 |
95 | bn_module = model_copy.module_list[idx][1]
96 |
97 | mask = obtain_bn_mask(bn_module, thre)
98 |
99 | remain_num += int(mask.sum())
100 | bn_module.weight.data.mul_(mask)
101 | with torch.no_grad():
102 | mAP = eval_model(model_copy)[1].mean()
103 |
104 | print(f'Number of channels has been reduced from {len(sorted_bn)} to {remain_num}')
105 | print(f'Prune ratio: {1-remain_num/len(sorted_bn):.3f}')
106 | print(f'mAP of the pruned model is {mAP:.4f}')
107 |
108 | return thre
109 |
110 |
111 | threshold = prune_and_eval(model, sorted_bn, percent)
112 |
113 |
114 |
115 | #****************************************************************
116 | #虽然上面已经能看到剪枝后的效果,但是没有生成剪枝后的模型结构,因此下面的代码是为了生成新的模型结构并拷贝旧模型参数到新模型
117 |
118 |
119 | #%%
120 | def obtain_filters_mask(model, thre, CBL_idx, prune_idx):
121 |
122 | pruned = 0
123 | total = 0
124 | num_filters = []
125 | filters_mask = []
126 | #CBL_idx存储的是所有带BN的卷积层(YOLO层的前一层卷积层是不带BN的)
127 | for idx in CBL_idx:
128 | bn_module = model.module_list[idx][1]
129 | if idx in prune_idx:
130 |
131 | mask = obtain_bn_mask(bn_module, thre).cpu().numpy()
132 | remain = int(mask.sum())
133 | pruned = pruned + mask.shape[0] - remain
134 |
135 | if remain == 0:
136 | print("Channels would be all pruned!")
137 | raise Exception
138 |
139 | print(f'layer index: {idx:>3d} \t total channel: {mask.shape[0]:>4d} \t '
140 | f'remaining channel: {remain:>4d}')
141 | else:
142 | mask = np.ones(bn_module.weight.data.shape)
143 | remain = mask.shape[0]
144 |
145 | total += mask.shape[0]
146 | num_filters.append(remain)
147 | filters_mask.append(mask.copy())
148 |
149 | #因此,这里求出的prune_ratio,需要裁剪的α参数/cbl_idx中所有的α参数
150 | prune_ratio = pruned / total
151 | print(f'Prune channels: {pruned}\tPrune ratio: {prune_ratio:.3f}')
152 |
153 | return num_filters, filters_mask
154 |
155 | num_filters, filters_mask = obtain_filters_mask(model, threshold, CBL_idx, prune_idx)
156 |
157 |
158 | #CBLidx2mask存储CBL_idx中,每一层BN层对应的mask
159 | CBLidx2mask = {idx: mask for idx, mask in zip(CBL_idx, filters_mask)}
160 |
161 | pruned_model = prune_model_keep_size(model, prune_idx, CBL_idx, CBLidx2mask)
162 |
163 |
164 |
165 |
166 | with torch.no_grad():
167 | mAP = eval_model(pruned_model)[1].mean()
168 | print('after prune_model_keep_size map is {}'.format(mAP))
169 |
170 |
171 | #获得原始模型的module_defs,并修改该defs中的卷积核数量
172 | compact_module_defs = deepcopy(model.module_defs)
173 | for idx, num in zip(CBL_idx, num_filters):
174 | assert compact_module_defs[idx]['type'] == 'convolutional'
175 | compact_module_defs[idx]['filters'] = str(num)
176 |
177 |
178 |
179 | compact_model = Darknet([model.hyperparams.copy()] + compact_module_defs).to(device)
180 | compact_nparameters = obtain_num_parameters(compact_model)
181 |
182 | init_weights_from_loose_model(compact_model, pruned_model, CBL_idx, Conv_idx, CBLidx2mask)
183 |
184 |
185 | random_input = torch.rand((16, 3, 416, 416)).to(device)
186 |
187 | def obtain_avg_forward_time(input, model, repeat=200):
188 |
189 | model.eval()
190 | start = time.time()
191 | with torch.no_grad():
192 | for i in range(repeat):
193 | output = model(input)
194 | avg_infer_time = (time.time() - start) / repeat
195 |
196 | return avg_infer_time, output
197 |
198 | pruned_forward_time, pruned_output = obtain_avg_forward_time(random_input, pruned_model)
199 | compact_forward_time, compact_output = obtain_avg_forward_time(random_input, compact_model)
200 |
201 |
202 |
203 | # 在测试集上测试剪枝后的模型, 并统计模型的参数数量
204 | with torch.no_grad():
205 | compact_model_metric = eval_model(compact_model)
206 |
207 |
208 | # 比较剪枝前后参数数量的变化、指标性能的变化
209 | metric_table = [
210 | ["Metric", "Before", "After"],
211 | ["mAP", f'{origin_model_metric[1].mean():.6f}', f'{compact_model_metric[1].mean():.6f}'],
212 | ["Parameters", f"{origin_nparameters}", f"{compact_nparameters}"],
213 | ["Inference", f'{pruned_forward_time:.4f}', f'{compact_forward_time:.4f}']
214 | ]
215 | print(AsciiTable(metric_table).table)
216 |
217 |
218 |
219 | # 生成剪枝后的cfg文件并保存模型
220 | pruned_cfg_name = opt.model_def.replace('/', f'/prune_{percent}_')
221 |
222 | #由于原始的compact_module_defs将anchor从字符串变为了数组,因此这里将anchors重新变为字符串
223 |
224 | for item in compact_module_defs:
225 | if item['type']=='yolo':
226 | item['anchors']='10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326'
227 |
228 | pruned_cfg_file = write_cfg(pruned_cfg_name, [model.hyperparams.copy()] + compact_module_defs)
229 | print(f'Config file has been saved: {pruned_cfg_file}')
230 |
231 | #compact_model_name = opt.model.replace('/', f'/prune_{percent}_')
232 | compact_model_name = 'weights/yolov3_hand_normal_pruning_'+str(percent)+'percent.weights'
233 |
234 | save_weights(compact_model, path=compact_model_name)
235 | print(f'Compact model has been saved: {compact_model_name}')
236 |
237 |
238 |
239 |
--------------------------------------------------------------------------------
/prune_tiny_yolo.py:
--------------------------------------------------------------------------------
1 | from models import *
2 | from utils.utils import *
3 | import torch
4 | import numpy as np
5 | from copy import deepcopy
6 | from test import test
7 | from terminaltables import AsciiTable
8 | import time
9 | import os
10 | from utils.tiny_prune_utils import *
11 |
12 | class opt():
13 | model_def = "cfg/yolov3-tiny-hand.cfg"
14 | data_config = "cfg/oxfordhand.data"
15 | model = 'weights/last.pt'
16 |
17 | percent = 0.3
18 |
19 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
20 | model = Darknet(opt.model_def).to(device)
21 |
22 | if opt.model:
23 | if opt.model.endswith(".pt"):
24 | model.load_state_dict(torch.load(opt.model, map_location=device)['model'])
25 | else:
26 | _ = load_darknet_weights(model, opt.model)
27 |
28 | data_config = parse_data_cfg(opt.data_config)
29 |
30 | valid_path = data_config["valid"]
31 | class_names = load_classes(data_config["names"])
32 |
33 | eval_model = lambda model:test(model=model,cfg=opt.model_def, data=opt.data_config)
34 | obtain_num_parameters = lambda model:sum([param.nelement() for param in model.parameters()])
35 |
36 | #这个不应该注释掉,等会要恢复
37 | with torch.no_grad():
38 | origin_model_metric = eval_model(model)
39 | origin_nparameters = obtain_num_parameters(model)
40 |
41 |
42 | CBL_idx, Conv_idx, prune_idx= parse_module_defs(model.module_defs)
43 |
44 |
45 |
46 | #将所有要剪枝的BN层的α参数,拷贝到bn_weights列表
47 | bn_weights = gather_bn_weights(model.module_list, prune_idx)
48 |
49 |
50 | #torch.sort返回二维列表,第一维是排序后的值列表,第二维是排序后的值列表对应的索引
51 | sorted_bn = torch.sort(bn_weights)[0]
52 |
53 |
54 |
55 | #避免剪掉所有channel的最高阈值(每个BN层的gamma的最大值的最小值即为阈值上限)
56 | highest_thre = []
57 | for idx in prune_idx:
58 | #.item()可以得到张量里的元素值
59 | highest_thre.append(model.module_list[idx][1].weight.data.abs().max().item())
60 | highest_thre = min(highest_thre)
61 |
62 | # 找到highest_thre对应的下标对应的百分比
63 | percent_limit = (sorted_bn==highest_thre).nonzero().item()/len(bn_weights)
64 |
65 | print(f'Threshold should be less than {highest_thre:.4f}.')
66 | print(f'The corresponding prune ratio is {percent_limit:.3f}.')
67 |
68 |
69 |
70 |
71 | # 该函数有很重要的意义:
72 | # ①先用深拷贝将原始模型拷贝下来,得到model_copy
73 | # ②将model_copy中,BN层中低于阈值的α参数赋值为0
74 | # ③在BN层中,输出y=α*x+β,由于α参数的值被赋值为0,因此输入仅加了一个偏置β
75 | # ④很神奇的是,network slimming中是将α参数和β参数都置0,该处只将α参数置0,但效果却很好:其实在另外一篇论文中,已经提到,可以先将β参数的效果移到
76 | # 下一层卷积层,再去剪掉本层的α参数
77 |
78 | # 该函数用最简单的方法,让我们看到了,如何快速看到剪枝后的效果
79 |
80 |
81 |
82 | def prune_and_eval(model, sorted_bn, percent=.0):
83 | model_copy = deepcopy(model)
84 | thre_index = int(len(sorted_bn) * percent)
85 | #获得α参数的阈值,小于该值的α参数对应的通道,全部裁剪掉
86 | thre = sorted_bn[thre_index]
87 |
88 | print(f'Channels with Gamma value less than {thre:.4f} are pruned!')
89 |
90 | remain_num = 0
91 | for idx in prune_idx:
92 |
93 | bn_module = model_copy.module_list[idx][1]
94 |
95 | mask = obtain_bn_mask(bn_module, thre)
96 |
97 | remain_num += int(mask.sum())
98 | bn_module.weight.data.mul_(mask)
99 |
100 | with torch.no_grad():
101 | mAP = eval_model(model_copy)[1].mean()
102 |
103 | print(f'Number of channels has been reduced from {len(sorted_bn)} to {remain_num}')
104 | print(f'Prune ratio: {1-remain_num/len(sorted_bn):.3f}')
105 | print(f'mAP of the pruned model is {mAP:.4f}')
106 |
107 | return thre
108 |
109 |
110 | threshold = prune_and_eval(model, sorted_bn, percent)
111 |
112 |
113 |
114 | # ****************************************************************
115 | # 虽然上面已经能看到剪枝后的效果,但是没有生成剪枝后的模型结构,因此下面的代码是为了生成新的模型结构并拷贝旧模型参数到新模型
116 |
117 |
118 |
119 |
120 |
121 |
122 | #%%
123 | def obtain_filters_mask(model, thre, CBL_idx, prune_idx):
124 |
125 | pruned = 0
126 | total = 0
127 | num_filters = []
128 | filters_mask = []
129 | #CBL_idx存储的是所有带BN的卷积层(YOLO层的前一层卷积层是不带BN的)
130 | for idx in CBL_idx:
131 | bn_module = model.module_list[idx][1]
132 | if idx in prune_idx:
133 |
134 | mask = obtain_bn_mask(bn_module, thre).cpu().numpy()
135 | remain = int(mask.sum())
136 | pruned = pruned + mask.shape[0] - remain
137 |
138 | if remain == 0:
139 | print("Channels would be all pruned!")
140 | raise Exception
141 |
142 | # print(f'layer index: {idx:>3d} \t total channel: {mask.shape[0]:>4d} \t '
143 | # f'remaining channel: {remain:>4d}')
144 | else:
145 | mask = np.ones(bn_module.weight.data.shape)
146 | remain = mask.shape[0]
147 |
148 | total += mask.shape[0]
149 | num_filters.append(remain)
150 | filters_mask.append(mask.copy())
151 |
152 | #因此,这里求出的prune_ratio,需要裁剪的α参数/cbl_idx中所有的α参数
153 | prune_ratio = pruned / total
154 | print(f'Prune channels: {pruned}\tPrune ratio: {prune_ratio:.3f}')
155 |
156 | return num_filters, filters_mask
157 |
158 | num_filters, filters_mask = obtain_filters_mask(model, threshold, CBL_idx, prune_idx)
159 |
160 |
161 | #CBLidx2mask存储CBL_idx中,每一层BN层对应的mask
162 | CBLidx2mask = {idx: mask for idx, mask in zip(CBL_idx, filters_mask)}
163 |
164 |
165 |
166 | pruned_model = prune_model_keep_size(model, prune_idx, CBL_idx, CBLidx2mask)
167 |
168 |
169 |
170 |
171 | with torch.no_grad():
172 | mAP = eval_model(pruned_model)[1].mean()
173 | print('after prune_model_keep_size map is {}'.format(mAP))
174 |
175 |
176 |
177 |
178 | #%%
179 |
180 | #获得原始模型的module_defs,并修改该defs中的卷积核数量
181 | compact_module_defs = deepcopy(model.module_defs)
182 | for idx, num in zip(CBL_idx, num_filters):
183 | assert compact_module_defs[idx]['type'] == 'convolutional'
184 | compact_module_defs[idx]['filters'] = str(num)
185 |
186 |
187 |
188 | compact_model = Darknet([model.hyperparams.copy()] + compact_module_defs).to(device)
189 | compact_nparameters = obtain_num_parameters(compact_model)
190 |
191 |
192 | def get_input_mask2(module_defs, idx, CBLidx2mask):
193 |
194 | if idx == 0:
195 | #如果是一层卷积层,它的上一通道mask为3(因为图像为三通道)
196 | return np.ones(3)
197 | if idx<=12:
198 | if module_defs[idx - 2]['type'] == 'convolutional':
199 | return CBLidx2mask[idx - 2]
200 |
201 | else:
202 | if module_defs[idx - 1]['type'] == 'convolutional':
203 | return CBLidx2mask[idx - 1]
204 | elif module_defs[idx - 1]['type'] == 'shortcut':
205 | return CBLidx2mask[idx - 2]
206 | elif module_defs[idx - 1]['type'] == 'route':
207 | route_in_idxs = []
208 | for layer_i in module_defs[idx - 1]['layers'].split(","):
209 | if int(layer_i) < 0:
210 | route_in_idxs.append(idx - 1 + int(layer_i))
211 | else:
212 | route_in_idxs.append(int(layer_i))
213 | if len(route_in_idxs) == 1:
214 | return CBLidx2mask[route_in_idxs[0]]
215 |
216 | elif len(route_in_idxs)==2:
217 | return np.concatenate([CBLidx2mask[route_in_idxs[0]-1],CBLidx2mask[route_in_idxs[1]]])
218 |
219 | else:
220 | print("Something wrong with route module!")
221 | raise Exception
222 |
223 | def init_weights_from_loose_model2(compact_model, loose_model, CBL_idx, Conv_idx, CBLidx2mask):
224 | #compact_model新模型,loose_model旧模型
225 | for idx in CBL_idx:
226 | compact_CBL = compact_model.module_list[idx]
227 | loose_CBL = loose_model.module_list[idx]
228 | #np.argwhere返回非零元素的索引
229 |
230 | #[3, 4, 7, 8, 9, 10, 11, 12, 13, 16, 19, 22, 23, 24, 26, 30, 31]
231 | out_channel_idx = np.argwhere(CBLidx2mask[idx])[:, 0].tolist()
232 |
233 |
234 | compact_bn, loose_bn = compact_CBL[1], loose_CBL[1]
235 | compact_bn.weight.data = loose_bn.weight.data[out_channel_idx].clone()
236 | compact_bn.bias.data = loose_bn.bias.data[out_channel_idx].clone()
237 | compact_bn.running_mean.data = loose_bn.running_mean.data[out_channel_idx].clone()
238 | compact_bn.running_var.data = loose_bn.running_var.data[out_channel_idx].clone()
239 |
240 |
241 | input_mask = get_input_mask2(loose_model.module_defs, idx, CBLidx2mask)
242 |
243 | in_channel_idx = np.argwhere(input_mask)[:, 0].tolist()
244 |
245 |
246 | compact_conv, loose_conv = compact_CBL[0], loose_CBL[0]
247 | tmp = loose_conv.weight.data[:, in_channel_idx, :, :].clone()
248 | compact_conv.weight.data = tmp[out_channel_idx, :, :, :].clone()
249 |
250 | for idx in Conv_idx:
251 | compact_conv = compact_model.module_list[idx][0]
252 | loose_conv = loose_model.module_list[idx][0]
253 |
254 | input_mask = get_input_mask(loose_model.module_defs, idx, CBLidx2mask)
255 | in_channel_idx = np.argwhere(input_mask)[:, 0].tolist()
256 | compact_conv.weight.data = loose_conv.weight.data[:, in_channel_idx, :, :].clone()
257 | compact_conv.bias.data = loose_conv.bias.data.clone()
258 |
259 |
260 | init_weights_from_loose_model2(compact_model, pruned_model, CBL_idx, Conv_idx, CBLidx2mask)
261 |
262 |
263 | random_input = torch.rand((1, 3, 416, 416)).to(device)
264 |
265 | def obtain_avg_forward_time(input, model, repeat=200):
266 |
267 | model.eval()
268 | start = time.time()
269 | with torch.no_grad():
270 | for i in range(repeat):
271 | output = model(input)
272 | avg_infer_time = (time.time() - start) / repeat
273 |
274 | return avg_infer_time, output
275 |
276 |
277 |
278 |
279 | pruned_forward_time, pruned_output = obtain_avg_forward_time(random_input, pruned_model)
280 |
281 | compact_forward_time, compact_output = obtain_avg_forward_time(random_input, compact_model)
282 |
283 |
284 |
285 | # 在测试集上测试剪枝后的模型, 并统计模型的参数数量
286 | with torch.no_grad():
287 | compact_model_metric = eval_model(compact_model)
288 |
289 | #%%
290 | # 比较剪枝前后参数数量的变化、指标性能的变化
291 | metric_table = [
292 | ["Metric", "Before", "After"],
293 | ["mAP", f'{origin_model_metric[1].mean():.6f}', f'{compact_model_metric[1].mean():.6f}'],
294 | ["Parameters", f"{origin_nparameters}", f"{compact_nparameters}"],
295 | ["Inference", f'{pruned_forward_time:.4f}', f'{compact_forward_time:.4f}']
296 | ]
297 | print(AsciiTable(metric_table).table)
298 |
299 | #%%
300 | # 生成剪枝后的cfg文件并保存模型
301 | pruned_cfg_name = opt.model_def.replace('/', f'/prune_{percent}_')
302 | #由于原始的compact_module_defs将anchor从字符串变为了数组,因此这里将anchors重新变为字符串
303 | for item in compact_module_defs:
304 | if item['type']=='yolo':
305 | item['anchors']='10,14, 23,27, 37,58, 81,82, 135,169, 344,319'
306 |
307 | pruned_cfg_file = write_cfg(pruned_cfg_name, [model.hyperparams.copy()] + compact_module_defs)
308 | print(f'Config file has been saved: {pruned_cfg_file}')
309 |
310 |
311 | compact_model_name = 'weights/yolov3_tiny_hand_pruning_'+str(percent)+'percent.weights'
312 |
313 | save_weights(compact_model, path=compact_model_name)
314 | print(f'Compact model has been saved: {compact_model_name}')
315 |
316 |
--------------------------------------------------------------------------------
/quant_dorefa.py:
--------------------------------------------------------------------------------
1 | import math
2 | import time
3 | import torch
4 | import torch.nn as nn
5 | import numpy as np
6 | from torch.autograd import Function
7 | import torch.nn.functional as F
8 |
9 |
10 |
11 |
12 | class ScaleSigner(Function):
13 | """take a real value x, output sign(x)*E(|x|)"""
14 | @staticmethod
15 | def forward(ctx, input):
16 | return torch.sign(input) * torch.mean(torch.abs(input))
17 |
18 | @staticmethod
19 | def backward(ctx, grad_output):
20 | return grad_output
21 |
22 |
23 | def scale_sign(input):
24 | return ScaleSigner.apply(input)
25 |
26 |
27 | #真正起作用的量化函数
28 | class Quantizer(Function):
29 | @staticmethod
30 | def forward(ctx, input, nbit):
31 | scale = 2 ** nbit - 1
32 | return torch.round(input * scale) / scale
33 |
34 | @staticmethod
35 | def backward(ctx, grad_output):
36 | return grad_output, None
37 |
38 |
39 | def quantize(input, nbit):
40 | return Quantizer.apply(input, nbit)
41 |
42 |
43 | def dorefa_w(w, nbit_w):
44 | if nbit_w == 1:
45 | w = scale_sign(w)
46 | else:
47 | w = torch.tanh(w)
48 | #将权重限制在[0,1]之间
49 | w = w / (2 * torch.max(torch.abs(w))) + 0.5
50 | #权重量化
51 | w = 2 * quantize(w, nbit_w) - 1
52 |
53 | return w
54 |
55 |
56 | def dorefa_a(input, nbit_a):
57 | return quantize(torch.clamp(0.1 * input, 0, 1), nbit_a)
58 |
59 |
60 | class QuanConv(nn.Conv2d):
61 | """docstring for QuanConv"""
62 | def __init__(self, in_channels, out_channels, kernel_size, quan_name_w='dorefa', quan_name_a='dorefa', nbit_w=32,
63 | nbit_a=32, stride=1,
64 | padding=0, dilation=1, groups=1,
65 | bias=True):
66 | super(QuanConv, self).__init__(
67 | in_channels, out_channels, kernel_size, stride, padding, dilation,
68 | groups, bias)
69 | self.nbit_w = nbit_w
70 | self.nbit_a = nbit_a
71 | name_w_dict = {'dorefa': dorefa_w}
72 | name_a_dict = {'dorefa': dorefa_a}
73 | self.quan_w = name_w_dict[quan_name_w]
74 | self.quan_a = name_a_dict[quan_name_a]
75 |
76 | # @weak_script_method
77 | def forward(self, input):
78 | if self.nbit_w <=32:
79 | #量化卷积
80 | w = self.quan_w(self.weight, self.nbit_w)
81 | else:
82 | #卷积保持不变
83 | w = self.weight
84 |
85 | if self.nbit_a <=32:
86 | #量化激活
87 | x = self.quan_a(input, self.nbit_a)
88 | else:
89 | #激活保持不变
90 | x = input
91 | # print('x unique',np.unique(x.detach().numpy()).shape)
92 | # print('w unique',np.unique(w.detach().numpy()).shape)
93 |
94 | #做真正的卷积运算
95 |
96 | output = F.conv2d(x, w, self.bias, self.stride, self.padding, self.dilation, self.groups)
97 |
98 | return output
99 |
100 | class Linear_Q(nn.Linear):
101 | def __init__(self, in_features, out_features, bias=True, quan_name_w='dorefa', quan_name_a='dorefa', nbit_w=32, nbit_a=32):
102 | super(Linear_Q, self).__init__(in_features, out_features, bias)
103 | self.nbit_w = nbit_w
104 | self.nbit_a = nbit_a
105 | name_w_dict = {'dorefa': dorefa_w}
106 | name_a_dict = {'dorefa': dorefa_a}
107 | self.quan_w = name_w_dict[quan_name_w]
108 | self.quan_a = name_a_dict[quan_name_a]
109 |
110 | # @weak_script_method
111 | def forward(self, input):
112 | if self.nbit_w < 32:
113 | w = self.quan_w(self.weight, self.nbit_w)
114 | else:
115 | w = self.weight
116 |
117 | if self.nbit_a < 32:
118 | x = self.quan_a(input, self.nbit_a)
119 | else:
120 | x = input
121 |
122 | # print('x unique',np.unique(x.detach().numpy()))
123 | # print('w unique',np.unique(w.detach().numpy()))
124 |
125 | output = F.linear(x, w, self.bias)
126 |
127 | return output
128 |
129 |
130 |
--------------------------------------------------------------------------------
/regular_prune.py:
--------------------------------------------------------------------------------
1 | from models import *
2 | from utils.utils import *
3 | import torch
4 | import numpy as np
5 | from copy import deepcopy
6 | from test import test
7 | from terminaltables import AsciiTable
8 | import time
9 | from utils.utils import *
10 | from utils.prune_utils import *
11 | import os
12 |
13 | #规整剪枝
14 | class opt():
15 | model_def = "cfg/yolov3-hand.cfg"
16 | data_config = "cfg/oxfordhand.data"
17 | model = 'weights/last.pt'
18 |
19 | #指定GPU
20 | # torch.cuda.set_device(2)
21 | percent = 0.5
22 | filter_switch=[8,16,32,64,128,256,512,1024]
23 |
24 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
25 | model = Darknet(opt.model_def).to(device)
26 |
27 | if opt.model:
28 | if opt.model.endswith(".pt"):
29 | model.load_state_dict(torch.load(opt.model, map_location=device)['model'])
30 | else:
31 | _ = load_darknet_weights(model, opt.model)
32 |
33 |
34 |
35 | data_config = parse_data_cfg(opt.data_config)
36 |
37 | valid_path = data_config["valid"]
38 | class_names = load_classes(data_config["names"])
39 |
40 | eval_model = lambda model:test(model=model,cfg=opt.model_def, data=opt.data_config)
41 |
42 |
43 | obtain_num_parameters = lambda model:sum([param.nelement() for param in model.parameters()])
44 |
45 | #这个不应该注释掉,等会要恢复
46 | with torch.no_grad():
47 | origin_model_metric = eval_model(model)
48 | origin_nparameters = obtain_num_parameters(model)
49 |
50 |
51 | CBL_idx, Conv_idx, prune_idx= parse_module_defs(model.module_defs)
52 |
53 |
54 |
55 |
56 |
57 | #将所有要剪枝的BN层的α参数,拷贝到bn_weights列表
58 | bn_weights = gather_bn_weights(model.module_list, prune_idx)
59 |
60 | #torch.sort返回二维列表,第一维是排序后的值列表,第二维是排序后的值列表对应的索引
61 | sorted_bn = torch.sort(bn_weights)[0]
62 |
63 |
64 | #避免剪掉所有channel的最高阈值(每个BN层的gamma的最大值的最小值即为阈值上限)
65 | highest_thre = []
66 | for idx in prune_idx:
67 | #.item()可以得到张量里的元素值
68 | highest_thre.append(model.module_list[idx][1].weight.data.abs().max().item())
69 | highest_thre = min(highest_thre)
70 |
71 | # 找到highest_thre对应的下标对应的百分比
72 | percent_limit = (sorted_bn==highest_thre).nonzero().item()/len(bn_weights)
73 |
74 | print(f'Threshold should be less than {highest_thre:.4f}.')
75 | print(f'The corresponding prune ratio is {percent_limit:.3f}.')
76 |
77 |
78 |
79 |
80 |
81 | # 该函数有很重要的意义:
82 | # ①先用深拷贝将原始模型拷贝下来,得到model_copy
83 | # ②将model_copy中,BN层中低于阈值的α参数赋值为0
84 | # ③在BN层中,输出y=α*x+β,由于α参数的值被赋值为0,因此输入仅加了一个偏置β
85 | # ④很神奇的是,network slimming中是将α参数和β参数都置0,该处只将α参数置0,但效果却很好:其实在另外一篇论文中,已经提到,可以先将β参数的效果移到
86 | # 下一层卷积层,再去剪掉本层的α参数
87 |
88 | # 该函数用最简单的方法,让我们看到了,如何快速看到剪枝后的效果
89 |
90 |
91 |
92 | def prune_and_eval(model, sorted_bn, percent=.0):
93 | model_copy = deepcopy(model)
94 | thre_index = int(len(sorted_bn) * percent)
95 | #获得α参数的阈值,小于该值的α参数对应的通道,全部裁剪掉
96 | thre = sorted_bn[thre_index]
97 |
98 | print(f'Channels with Gamma value less than {thre:.4f} are pruned!')
99 |
100 | remain_num = 0
101 | for idx in prune_idx:
102 |
103 | bn_module = model_copy.module_list[idx][1]
104 |
105 | mask = obtain_bn_mask(bn_module, thre)
106 | mask_cnt=int(mask.sum())
107 | if mask_cnt==0:
108 | this_layer_sort_bn=bn_module.weight.data.abs().clone()
109 | sort_bn_values= torch.sort(this_layer_sort_bn)[0]
110 | bn_cnt=bn_module.weight.shape[0]
111 | this_layer_thre=sort_bn_values[bn_cnt-8]
112 | mask = obtain_bn_mask(bn_module, this_layer_thre)
113 | else:
114 | for i in range(len(filter_switch)):
115 | if mask_cnt<=filter_switch[i]:
116 | mask_cnt=filter_switch[i]
117 | break
118 | this_layer_sort_bn=bn_module.weight.data.abs().clone()
119 | sort_bn_values= torch.sort(this_layer_sort_bn)[0]
120 | bn_cnt=bn_module.weight.shape[0]
121 | this_layer_thre=sort_bn_values[bn_cnt-mask_cnt]
122 | mask = obtain_bn_mask(bn_module, this_layer_thre)
123 |
124 |
125 | remain_num += int(mask.sum())
126 | bn_module.weight.data.mul_(mask)
127 |
128 | with torch.no_grad():
129 | mAP = eval_model(model_copy)[1].mean()
130 |
131 | print(f'Number of channels has been reduced from {len(sorted_bn)} to {remain_num}')
132 | print(f'Prune ratio: {1-remain_num/len(sorted_bn):.3f}')
133 | print(f'mAP of the pruned model is {mAP:.4f}')
134 |
135 | return thre
136 |
137 |
138 | threshold = prune_and_eval(model, sorted_bn, percent)
139 |
140 |
141 |
142 | #****************************************************************
143 | #虽然上面已经能看到剪枝后的效果,但是没有生成剪枝后的模型结构,因此下面的代码是为了生成新的模型结构并拷贝旧模型参数到新模型
144 |
145 |
146 |
147 |
148 |
149 |
150 | #%%
151 | def obtain_filters_mask(model, thre, CBL_idx, prune_idx):
152 |
153 | pruned = 0
154 | total = 0
155 | num_filters = []
156 | filters_mask = []
157 | #CBL_idx存储的是所有带BN的卷积层(YOLO层的前一层卷积层是不带BN的)
158 | for idx in CBL_idx:
159 | bn_module = model.module_list[idx][1]
160 | if idx in prune_idx:
161 |
162 | mask = obtain_bn_mask(bn_module, thre).cpu().numpy()
163 |
164 | mask_cnt=int(mask.sum())
165 |
166 | if mask_cnt==0:
167 | this_layer_sort_bn=bn_module.weight.data.abs().clone()
168 | sort_bn_values= torch.sort(this_layer_sort_bn)[0]
169 | bn_cnt=bn_module.weight.shape[0]
170 | this_layer_thre=sort_bn_values[bn_cnt-8]
171 | mask = obtain_bn_mask(bn_module, this_layer_thre).cpu().numpy()
172 |
173 | else:
174 | for i in range(len(filter_switch)):
175 | if mask_cnt<=filter_switch[i]:
176 | mask_cnt=filter_switch[i]
177 | break
178 | this_layer_sort_bn=bn_module.weight.data.abs().clone()
179 | sort_bn_values= torch.sort(this_layer_sort_bn)[0]
180 | bn_cnt=bn_module.weight.shape[0]
181 | this_layer_thre=sort_bn_values[bn_cnt-mask_cnt]
182 | mask = obtain_bn_mask(bn_module, this_layer_thre).cpu().numpy()
183 |
184 | remain = int(mask.sum())
185 | pruned = pruned + mask.shape[0] - remain
186 |
187 |
188 | if remain == 0:
189 | print("Channels would be all pruned!")
190 | raise Exception
191 |
192 | print(f'layer index: {idx:>3d} \t total channel: {mask.shape[0]:>4d} \t '
193 | f'remaining channel: {remain:>4d}')
194 | else:
195 | mask = np.ones(bn_module.weight.data.shape)
196 | remain = mask.shape[0]
197 |
198 | total += mask.shape[0]
199 | num_filters.append(remain)
200 | filters_mask.append(mask.copy())
201 |
202 | #因此,这里求出的prune_ratio,需要裁剪的α参数/cbl_idx中所有的α参数
203 | prune_ratio = pruned / total
204 | print(f'Prune channels: {pruned}\tPrune ratio: {prune_ratio:.3f}')
205 |
206 | return num_filters, filters_mask
207 |
208 | num_filters, filters_mask = obtain_filters_mask(model, threshold, CBL_idx, prune_idx)
209 |
210 |
211 | #CBLidx2mask存储CBL_idx中,每一层BN层对应的mask
212 | CBLidx2mask = {idx: mask for idx, mask in zip(CBL_idx, filters_mask)}
213 |
214 | pruned_model = prune_model_keep_size(model, prune_idx, CBL_idx, CBLidx2mask)
215 |
216 |
217 |
218 | with torch.no_grad():
219 | mAP = eval_model(pruned_model)[1].mean()
220 | print('after prune_model_keep_size map is {}'.format(mAP))
221 |
222 |
223 | #获得原始模型的module_defs,并修改该defs中的卷积核数量
224 | compact_module_defs = deepcopy(model.module_defs)
225 | for idx, num in zip(CBL_idx, num_filters):
226 | assert compact_module_defs[idx]['type'] == 'convolutional'
227 | compact_module_defs[idx]['filters'] = str(num)
228 |
229 |
230 |
231 | compact_model = Darknet([model.hyperparams.copy()] + compact_module_defs).to(device)
232 | compact_nparameters = obtain_num_parameters(compact_model)
233 |
234 | init_weights_from_loose_model(compact_model, pruned_model, CBL_idx, Conv_idx, CBLidx2mask)
235 |
236 |
237 | random_input = torch.rand((16, 3, 416, 416)).to(device)
238 |
239 | def obtain_avg_forward_time(input, model, repeat=200):
240 |
241 | model.eval()
242 | start = time.time()
243 | with torch.no_grad():
244 | for i in range(repeat):
245 | output = model(input)
246 | avg_infer_time = (time.time() - start) / repeat
247 |
248 | return avg_infer_time, output
249 |
250 | pruned_forward_time, pruned_output = obtain_avg_forward_time(random_input, pruned_model)
251 | compact_forward_time, compact_output = obtain_avg_forward_time(random_input, compact_model)
252 |
253 |
254 |
255 | # 在测试集上测试剪枝后的模型, 并统计模型的参数数量
256 | with torch.no_grad():
257 | compact_model_metric = eval_model(compact_model)
258 |
259 |
260 | # 比较剪枝前后参数数量的变化、指标性能的变化
261 | metric_table = [
262 | ["Metric", "Before", "After"],
263 | ["mAP", f'{origin_model_metric[1].mean():.6f}', f'{compact_model_metric[1].mean():.6f}'],
264 | ["Parameters", f"{origin_nparameters}", f"{compact_nparameters}"],
265 | ["Inference", f'{pruned_forward_time:.4f}', f'{compact_forward_time:.4f}']
266 | ]
267 | print(AsciiTable(metric_table).table)
268 |
269 |
270 |
271 | # 生成剪枝后的cfg文件并保存模型
272 | pruned_cfg_name = opt.model_def.replace('/', f'/prune_{percent}_')
273 | for item in compact_module_defs:
274 | if item['type']=='yolo':
275 | item['anchors']='10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326'
276 |
277 | pruned_cfg_file = write_cfg(pruned_cfg_name, [model.hyperparams.copy()] + compact_module_defs)
278 | print(f'Config file has been saved: {pruned_cfg_file}')
279 |
280 | compact_model_name = 'weights/yolov3_hand_regular_pruning_'+str(percent)+'percent.weights'
281 |
282 | save_weights(compact_model, path=compact_model_name)
283 | print(f'Compact model has been saved: {compact_model_name}')
284 |
285 |
286 |
287 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | # pip3 install -U -r requirements.txt
2 | numpy
3 | opencv-python
4 | torch >= 1.2
5 | matplotlib
6 | pycocotools
7 | tqdm
8 | tb-nightly
9 | future
10 | Pillow
11 |
12 | # Equivalent conda commands ----------------------------------------------------
13 | # conda update -n base -c defaults conda
14 | # conda install -yc anaconda future numpy opencv matplotlib tqdm pillow
15 | # conda install -yc conda-forge scikit-image tensorboard pycocotools
16 | # conda install -yc spyder-ide spyder-line-profiler
17 | # conda install -yc pytorch pytorch torchvision
18 |
--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import json
3 |
4 | from torch.utils.data import DataLoader
5 |
6 | from models import *
7 | from utils.datasets import *
8 | from utils.utils import *
9 |
10 |
11 |
12 | def test(cfg,
13 | data,
14 | weights=None,
15 | batch_size=16,
16 | img_size=416,
17 | iou_thres=0.5,
18 | conf_thres=0.001,
19 | nms_thres=0.5,
20 | save_json=False,
21 | model=None):
22 | # Initialize/load model and set device
23 | if model is None:
24 | device = torch_utils.select_device(opt.device)
25 | verbose = True
26 |
27 | # Initialize model
28 | model = Darknet(cfg, img_size).to(device)
29 | #print(model)
30 | # Load weights
31 | #本身有,被我去掉了
32 | attempt_download(weights)
33 | if weights.endswith('.pt'): # pytorch format
34 | print('.pth is reading')
35 | model.load_state_dict(torch.load(weights, map_location=device)['model'])
36 |
37 |
38 | else: # darknet format
39 | print('darknet weights is reading')
40 | _ = load_darknet_weights(model, weights)
41 |
42 | if torch.cuda.device_count() > 1:
43 | model = nn.DataParallel(model)
44 | else:
45 | device = next(model.parameters()).device # get model device
46 | verbose = False
47 |
48 | # Configure run
49 | data = parse_data_cfg(data)
50 | nc = int(data['classes']) # number of classes
51 | test_path = data['valid'] # path to test images
52 | names = load_classes(data['names']) # class names
53 |
54 | # Dataloader
55 | dataset = LoadImagesAndLabels(test_path, img_size, batch_size)
56 | dataloader = DataLoader(dataset,
57 | batch_size=batch_size,
58 | num_workers=min([os.cpu_count(), batch_size, 16]),
59 | pin_memory=True,
60 | collate_fn=dataset.collate_fn)
61 |
62 | seen = 0
63 | model.eval()
64 | coco91class = coco80_to_coco91_class()
65 | s = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', 'mAP', 'F1')
66 | p, r, f1, mp, mr, map, mf1 = 0., 0., 0., 0., 0., 0., 0.
67 | loss = torch.zeros(3)
68 | jdict, stats, ap, ap_class = [], [], [], []
69 | for batch_i, (imgs, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)):
70 | targets = targets.to(device)
71 | imgs = imgs.to(device)
72 | _, _, height, width = imgs.shape # batch size, channels, height, width
73 |
74 | # Plot images with bounding boxes
75 | if batch_i == 0 and not os.path.exists('test_batch0.jpg'):
76 | plot_images(imgs=imgs, targets=targets, paths=paths, fname='test_batch0.jpg')
77 |
78 | # Run model
79 | inf_out, train_out = model(imgs) # inference and training outputs
80 |
81 | # Compute loss
82 | if hasattr(model, 'hyp'): # if model has loss hyperparameters
83 | loss += compute_loss(train_out, targets, model)[1][:3].cpu() # GIoU, obj, cls
84 |
85 | # Run NMS
86 | output = non_max_suppression(inf_out, conf_thres=conf_thres, nms_thres=nms_thres)
87 |
88 | # Statistics per image
89 | for si, pred in enumerate(output):
90 | labels = targets[targets[:, 0] == si, 1:]
91 | nl = len(labels)
92 | tcls = labels[:, 0].tolist() if nl else [] # target class
93 | seen += 1
94 |
95 | if pred is None:
96 | if nl:
97 | stats.append(([], torch.Tensor(), torch.Tensor(), tcls))
98 | continue
99 |
100 | # Append to text file
101 | # with open('test.txt', 'a') as file:
102 | # [file.write('%11.5g' * 7 % tuple(x) + '\n') for x in pred]
103 |
104 | # Append to pycocotools JSON dictionary
105 | if save_json:
106 | # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...
107 | image_id = int(Path(paths[si]).stem.split('_')[-1])
108 | box = pred[:, :4].clone() # xyxy
109 | scale_coords(imgs[si].shape[1:], box, shapes[si]) # to original shape
110 | box = xyxy2xywh(box) # xywh
111 | box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner
112 | for di, d in enumerate(pred):
113 | jdict.append({'image_id': image_id,
114 | 'category_id': coco91class[int(d[6])],
115 | 'bbox': [floatn(x, 3) for x in box[di]],
116 | 'score': floatn(d[4], 5)})
117 |
118 | # Clip boxes to image bounds
119 | clip_coords(pred, (height, width))
120 |
121 | # Assign all predictions as incorrect
122 | correct = [0] * len(pred)
123 | if nl:
124 | detected = []
125 | tcls_tensor = labels[:, 0]
126 |
127 | # target boxes
128 | tbox = xywh2xyxy(labels[:, 1:5])
129 | tbox[:, [0, 2]] *= width
130 | tbox[:, [1, 3]] *= height
131 |
132 | # Search for correct predictions
133 | for i, (*pbox, pconf, pcls_conf, pcls) in enumerate(pred):
134 |
135 | # Break if all targets already located in image
136 | if len(detected) == nl:
137 | break
138 |
139 | # Continue if predicted class not among image classes
140 | if pcls.item() not in tcls:
141 | continue
142 |
143 | # Best iou, index between pred and targets
144 | m = (pcls == tcls_tensor).nonzero().view(-1)
145 | iou, bi = bbox_iou(pbox, tbox[m]).max(0)
146 |
147 | # If iou > threshold and class is correct mark as correct
148 | if iou > iou_thres and m[bi] not in detected: # and pcls == tcls[bi]:
149 | correct[i] = 1
150 | detected.append(m[bi])
151 |
152 | # Append statistics (correct, conf, pcls, tcls)
153 | stats.append((correct, pred[:, 4].cpu(), pred[:, 6].cpu(), tcls))
154 |
155 | # Compute statistics
156 | stats = [np.concatenate(x, 0) for x in list(zip(*stats))] # to numpy
157 | if len(stats):
158 | p, r, ap, f1, ap_class = ap_per_class(*stats)
159 | mp, mr, map, mf1 = p.mean(), r.mean(), ap.mean(), f1.mean()
160 | nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class
161 | else:
162 | nt = torch.zeros(1)
163 |
164 | # Print results
165 | pf = '%20s' + '%10.3g' * 6 # print format
166 | print(pf % ('all', seen, nt.sum(), mp, mr, map, mf1))
167 |
168 | # Print results per class
169 | if verbose and nc > 1 and len(stats):
170 | for i, c in enumerate(ap_class):
171 | print(pf % (names[c], seen, nt[c], p[i], r[i], ap[i], f1[i]))
172 |
173 | # Save JSON
174 | if save_json and map and len(jdict):
175 | try:
176 | imgIds = [int(Path(x).stem.split('_')[-1]) for x in dataset.img_files]
177 | with open('results.json', 'w') as file:
178 | json.dump(jdict, file)
179 |
180 | from pycocotools.coco import COCO
181 | from pycocotools.cocoeval import COCOeval
182 |
183 | # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
184 | cocoGt = COCO('../coco/annotations/instances_val2014.json') # initialize COCO ground truth api
185 | cocoDt = cocoGt.loadRes('results.json') # initialize COCO pred api
186 |
187 | cocoEval = COCOeval(cocoGt, cocoDt, 'bbox')
188 | cocoEval.params.imgIds = imgIds # [:32] # only evaluate these images
189 | cocoEval.evaluate()
190 | cocoEval.accumulate()
191 | cocoEval.summarize()
192 | map = cocoEval.stats[1] # update mAP to pycocotools mAP
193 | except:
194 | print('WARNING: missing dependency pycocotools from requirements.txt. Can not compute official COCO mAP.')
195 |
196 | # Return results
197 | maps = np.zeros(nc) + map
198 | for i, c in enumerate(ap_class):
199 | maps[c] = ap[i]
200 | return (mp, mr, map, mf1, *(loss / len(dataloader)).tolist()), maps
201 |
202 |
203 | if __name__ == '__main__':
204 | parser = argparse.ArgumentParser(prog='test.py')
205 | parser.add_argument('--cfg', type=str, default='cfg/yolov3-spp.cfg', help='cfg file path')
206 | parser.add_argument('--data', type=str, default='data/coco.data', help='coco.data file path')
207 | parser.add_argument('--weights', type=str, default='weights/yolov3-spp.weights', help='path to weights file')
208 | parser.add_argument('--batch-size', type=int, default=16, help='size of each image batch')
209 | parser.add_argument('--img-size', type=int, default=416, help='inference size (pixels)')
210 | parser.add_argument('--iou-thres', type=float, default=0.5, help='iou threshold required to qualify as detected')
211 | parser.add_argument('--conf-thres', type=float, default=0.001, help='object confidence threshold')
212 | parser.add_argument('--nms-thres', type=float, default=0.5, help='iou threshold for non-maximum suppression')
213 | parser.add_argument('--save-json', action='store_true', help='save a cocoapi-compatible JSON results file')
214 | parser.add_argument('--device', default='', help='device id (i.e. 0 or 0,1) or cpu')
215 | opt = parser.parse_args()
216 | print(opt)
217 |
218 | with torch.no_grad():
219 | test(opt.cfg,
220 | opt.data,
221 | opt.weights,
222 | opt.batch_size,
223 | opt.img_size,
224 | opt.iou_thres,
225 | opt.conf_thres,
226 | opt.nms_thres,
227 | opt.save_json)
228 |
--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coldlarry/YOLOv3-complete-pruning/90c5869d265a632829ba7798598aa2798e4f5227/utils/__init__.py
--------------------------------------------------------------------------------
/utils/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coldlarry/YOLOv3-complete-pruning/90c5869d265a632829ba7798598aa2798e4f5227/utils/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/datasets.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coldlarry/YOLOv3-complete-pruning/90c5869d265a632829ba7798598aa2798e4f5227/utils/__pycache__/datasets.cpython-37.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/google_utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coldlarry/YOLOv3-complete-pruning/90c5869d265a632829ba7798598aa2798e4f5227/utils/__pycache__/google_utils.cpython-37.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/parse_config.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coldlarry/YOLOv3-complete-pruning/90c5869d265a632829ba7798598aa2798e4f5227/utils/__pycache__/parse_config.cpython-37.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/prune_utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coldlarry/YOLOv3-complete-pruning/90c5869d265a632829ba7798598aa2798e4f5227/utils/__pycache__/prune_utils.cpython-37.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/torch_utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coldlarry/YOLOv3-complete-pruning/90c5869d265a632829ba7798598aa2798e4f5227/utils/__pycache__/torch_utils.cpython-37.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coldlarry/YOLOv3-complete-pruning/90c5869d265a632829ba7798598aa2798e4f5227/utils/__pycache__/utils.cpython-37.pyc
--------------------------------------------------------------------------------
/utils/adabound.py:
--------------------------------------------------------------------------------
1 | import math
2 |
3 | import torch
4 | from torch.optim import Optimizer
5 |
6 |
7 | class AdaBound(Optimizer):
8 | """Implements AdaBound algorithm.
9 | It has been proposed in `Adaptive Gradient Methods with Dynamic Bound of Learning Rate`_.
10 | Arguments:
11 | params (iterable): iterable of parameters to optimize or dicts defining
12 | parameter groups
13 | lr (float, optional): Adam learning rate (default: 1e-3)
14 | betas (Tuple[float, float], optional): coefficients used for computing
15 | running averages of gradient and its square (default: (0.9, 0.999))
16 | final_lr (float, optional): final (SGD) learning rate (default: 0.1)
17 | gamma (float, optional): convergence speed of the bound functions (default: 1e-3)
18 | eps (float, optional): term added to the denominator to improve
19 | numerical stability (default: 1e-8)
20 | weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
21 | amsbound (boolean, optional): whether to use the AMSBound variant of this algorithm
22 | .. Adaptive Gradient Methods with Dynamic Bound of Learning Rate:
23 | https://openreview.net/forum?id=Bkg3g2R9FX
24 | """
25 |
26 | def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), final_lr=0.1, gamma=1e-3,
27 | eps=1e-8, weight_decay=0, amsbound=False):
28 | if not 0.0 <= lr:
29 | raise ValueError("Invalid learning rate: {}".format(lr))
30 | if not 0.0 <= eps:
31 | raise ValueError("Invalid epsilon value: {}".format(eps))
32 | if not 0.0 <= betas[0] < 1.0:
33 | raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0]))
34 | if not 0.0 <= betas[1] < 1.0:
35 | raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1]))
36 | if not 0.0 <= final_lr:
37 | raise ValueError("Invalid final learning rate: {}".format(final_lr))
38 | if not 0.0 <= gamma < 1.0:
39 | raise ValueError("Invalid gamma parameter: {}".format(gamma))
40 | defaults = dict(lr=lr, betas=betas, final_lr=final_lr, gamma=gamma, eps=eps,
41 | weight_decay=weight_decay, amsbound=amsbound)
42 | super(AdaBound, self).__init__(params, defaults)
43 |
44 | self.base_lrs = list(map(lambda group: group['lr'], self.param_groups))
45 |
46 | def __setstate__(self, state):
47 | super(AdaBound, self).__setstate__(state)
48 | for group in self.param_groups:
49 | group.setdefault('amsbound', False)
50 |
51 | def step(self, closure=None):
52 | """Performs a single optimization step.
53 | Arguments:
54 | closure (callable, optional): A closure that reevaluates the model
55 | and returns the loss.
56 | """
57 | loss = None
58 | if closure is not None:
59 | loss = closure()
60 |
61 | for group, base_lr in zip(self.param_groups, self.base_lrs):
62 | for p in group['params']:
63 | if p.grad is None:
64 | continue
65 | grad = p.grad.data
66 | if grad.is_sparse:
67 | raise RuntimeError(
68 | 'Adam does not support sparse gradients, please consider SparseAdam instead')
69 | amsbound = group['amsbound']
70 |
71 | state = self.state[p]
72 |
73 | # State initialization
74 | if len(state) == 0:
75 | state['step'] = 0
76 | # Exponential moving average of gradient values
77 | state['exp_avg'] = torch.zeros_like(p.data)
78 | # Exponential moving average of squared gradient values
79 | state['exp_avg_sq'] = torch.zeros_like(p.data)
80 | if amsbound:
81 | # Maintains max of all exp. moving avg. of sq. grad. values
82 | state['max_exp_avg_sq'] = torch.zeros_like(p.data)
83 |
84 | exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
85 | if amsbound:
86 | max_exp_avg_sq = state['max_exp_avg_sq']
87 | beta1, beta2 = group['betas']
88 |
89 | state['step'] += 1
90 |
91 | if group['weight_decay'] != 0:
92 | grad = grad.add(group['weight_decay'], p.data)
93 |
94 | # Decay the first and second moment running average coefficient
95 | exp_avg.mul_(beta1).add_(1 - beta1, grad)
96 | exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
97 | if amsbound:
98 | # Maintains the maximum of all 2nd moment running avg. till now
99 | torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq)
100 | # Use the max. for normalizing running avg. of gradient
101 | denom = max_exp_avg_sq.sqrt().add_(group['eps'])
102 | else:
103 | denom = exp_avg_sq.sqrt().add_(group['eps'])
104 |
105 | bias_correction1 = 1 - beta1 ** state['step']
106 | bias_correction2 = 1 - beta2 ** state['step']
107 | step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1
108 |
109 | # Applies bounds on actual learning rate
110 | # lr_scheduler cannot affect final_lr, this is a workaround to apply lr decay
111 | final_lr = group['final_lr'] * group['lr'] / base_lr
112 | lower_bound = final_lr * (1 - 1 / (group['gamma'] * state['step'] + 1))
113 | upper_bound = final_lr * (1 + 1 / (group['gamma'] * state['step']))
114 | step_size = torch.full_like(denom, step_size)
115 | step_size.div_(denom).clamp_(lower_bound, upper_bound).mul_(exp_avg)
116 |
117 | p.data.add_(-step_size)
118 |
119 | return loss
120 |
121 |
122 | class AdaBoundW(Optimizer):
123 | """Implements AdaBound algorithm with Decoupled Weight Decay (arxiv.org/abs/1711.05101)
124 | It has been proposed in `Adaptive Gradient Methods with Dynamic Bound of Learning Rate`_.
125 | Arguments:
126 | params (iterable): iterable of parameters to optimize or dicts defining
127 | parameter groups
128 | lr (float, optional): Adam learning rate (default: 1e-3)
129 | betas (Tuple[float, float], optional): coefficients used for computing
130 | running averages of gradient and its square (default: (0.9, 0.999))
131 | final_lr (float, optional): final (SGD) learning rate (default: 0.1)
132 | gamma (float, optional): convergence speed of the bound functions (default: 1e-3)
133 | eps (float, optional): term added to the denominator to improve
134 | numerical stability (default: 1e-8)
135 | weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
136 | amsbound (boolean, optional): whether to use the AMSBound variant of this algorithm
137 | .. Adaptive Gradient Methods with Dynamic Bound of Learning Rate:
138 | https://openreview.net/forum?id=Bkg3g2R9FX
139 | """
140 |
141 | def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), final_lr=0.1, gamma=1e-3,
142 | eps=1e-8, weight_decay=0, amsbound=False):
143 | if not 0.0 <= lr:
144 | raise ValueError("Invalid learning rate: {}".format(lr))
145 | if not 0.0 <= eps:
146 | raise ValueError("Invalid epsilon value: {}".format(eps))
147 | if not 0.0 <= betas[0] < 1.0:
148 | raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0]))
149 | if not 0.0 <= betas[1] < 1.0:
150 | raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1]))
151 | if not 0.0 <= final_lr:
152 | raise ValueError("Invalid final learning rate: {}".format(final_lr))
153 | if not 0.0 <= gamma < 1.0:
154 | raise ValueError("Invalid gamma parameter: {}".format(gamma))
155 | defaults = dict(lr=lr, betas=betas, final_lr=final_lr, gamma=gamma, eps=eps,
156 | weight_decay=weight_decay, amsbound=amsbound)
157 | super(AdaBoundW, self).__init__(params, defaults)
158 |
159 | self.base_lrs = list(map(lambda group: group['lr'], self.param_groups))
160 |
161 | def __setstate__(self, state):
162 | super(AdaBoundW, self).__setstate__(state)
163 | for group in self.param_groups:
164 | group.setdefault('amsbound', False)
165 |
166 | def step(self, closure=None):
167 | """Performs a single optimization step.
168 | Arguments:
169 | closure (callable, optional): A closure that reevaluates the model
170 | and returns the loss.
171 | """
172 | loss = None
173 | if closure is not None:
174 | loss = closure()
175 |
176 | for group, base_lr in zip(self.param_groups, self.base_lrs):
177 | for p in group['params']:
178 | if p.grad is None:
179 | continue
180 | grad = p.grad.data
181 | if grad.is_sparse:
182 | raise RuntimeError(
183 | 'Adam does not support sparse gradients, please consider SparseAdam instead')
184 | amsbound = group['amsbound']
185 |
186 | state = self.state[p]
187 |
188 | # State initialization
189 | if len(state) == 0:
190 | state['step'] = 0
191 | # Exponential moving average of gradient values
192 | state['exp_avg'] = torch.zeros_like(p.data)
193 | # Exponential moving average of squared gradient values
194 | state['exp_avg_sq'] = torch.zeros_like(p.data)
195 | if amsbound:
196 | # Maintains max of all exp. moving avg. of sq. grad. values
197 | state['max_exp_avg_sq'] = torch.zeros_like(p.data)
198 |
199 | exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
200 | if amsbound:
201 | max_exp_avg_sq = state['max_exp_avg_sq']
202 | beta1, beta2 = group['betas']
203 |
204 | state['step'] += 1
205 |
206 | # Decay the first and second moment running average coefficient
207 | exp_avg.mul_(beta1).add_(1 - beta1, grad)
208 | exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
209 | if amsbound:
210 | # Maintains the maximum of all 2nd moment running avg. till now
211 | torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq)
212 | # Use the max. for normalizing running avg. of gradient
213 | denom = max_exp_avg_sq.sqrt().add_(group['eps'])
214 | else:
215 | denom = exp_avg_sq.sqrt().add_(group['eps'])
216 |
217 | bias_correction1 = 1 - beta1 ** state['step']
218 | bias_correction2 = 1 - beta2 ** state['step']
219 | step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1
220 |
221 | # Applies bounds on actual learning rate
222 | # lr_scheduler cannot affect final_lr, this is a workaround to apply lr decay
223 | final_lr = group['final_lr'] * group['lr'] / base_lr
224 | lower_bound = final_lr * (1 - 1 / (group['gamma'] * state['step'] + 1))
225 | upper_bound = final_lr * (1 + 1 / (group['gamma'] * state['step']))
226 | step_size = torch.full_like(denom, step_size)
227 | step_size.div_(denom).clamp_(lower_bound, upper_bound).mul_(exp_avg)
228 |
229 | if group['weight_decay'] != 0:
230 | decayed_weights = torch.mul(p.data, group['weight_decay'])
231 | p.data.add_(-step_size)
232 | p.data.sub_(decayed_weights)
233 | else:
234 | p.data.add_(-step_size)
235 |
236 | return loss
237 |
--------------------------------------------------------------------------------
/utils/gcp.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # New VM
4 | rm -rf sample_data yolov3 darknet apex coco cocoapi knife knifec
5 | git clone https://github.com/ultralytics/yolov3
6 | # git clone https://github.com/AlexeyAB/darknet && cd darknet && make GPU=1 CUDNN=1 CUDNN_HALF=1 OPENCV=0 && wget -c https://pjreddie.com/media/files/darknet53.conv.74 && cd ..
7 | git clone https://github.com/NVIDIA/apex && cd apex && pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" . --user && cd .. && rm -rf apex
8 | # git clone https://github.com/cocodataset/cocoapi && cd cocoapi/PythonAPI && make && cd ../.. && cp -r cocoapi/PythonAPI/pycocotools yolov3
9 | sudo conda install -y -c conda-forge scikit-image tensorboard pycocotools
10 | python3 -c "
11 | from yolov3.utils.google_utils import gdrive_download
12 | gdrive_download('1HaXkef9z6y5l4vUnCYgdmEAj61c6bfWO','coco.zip')"
13 | sudo shutdown
14 |
15 | # Re-clone
16 | rm -rf yolov3 # Warning: remove existing
17 | git clone https://github.com/ultralytics/yolov3 && cd yolov3 # master
18 | # git clone -b test --depth 1 https://github.com/ultralytics/yolov3 test # branch
19 | python3 train.py --img-size 320 --weights weights/darknet53.conv.74 --epochs 27 --batch-size 64 --accumulate 1
20 |
21 | # Train
22 | python3 train.py
23 |
24 | # Resume
25 | python3 train.py --resume
26 |
27 | # Detect
28 | python3 detect.py
29 |
30 | # Test
31 | python3 test.py --save-json
32 |
33 | # Evolve
34 | for i in {0..500}
35 | do
36 | python3 train.py --data data/coco.data --img-size 320 --epochs 1 --batch-size 64 --accumulate 1 --evolve --bucket yolov4
37 | done
38 |
39 | # Git pull
40 | git pull https://github.com/ultralytics/yolov3 # master
41 | git pull https://github.com/ultralytics/yolov3 test # branch
42 |
43 | # Test Darknet training
44 | python3 test.py --weights ../darknet/backup/yolov3.backup
45 |
46 | # Copy last.pt TO bucket
47 | gsutil cp yolov3/weights/last1gpu.pt gs://ultralytics
48 |
49 | # Copy last.pt FROM bucket
50 | gsutil cp gs://ultralytics/last.pt yolov3/weights/last.pt
51 | wget https://storage.googleapis.com/ultralytics/yolov3/last_v1_0.pt -O weights/last_v1_0.pt
52 | wget https://storage.googleapis.com/ultralytics/yolov3/best_v1_0.pt -O weights/best_v1_0.pt
53 |
54 | # Reproduce tutorials
55 | rm results*.txt # WARNING: removes existing results
56 | python3 train.py --nosave --data data/coco_1img.data && mv results.txt results0r_1img.txt
57 | python3 train.py --nosave --data data/coco_10img.data && mv results.txt results0r_10img.txt
58 | python3 train.py --nosave --data data/coco_100img.data && mv results.txt results0r_100img.txt
59 | # python3 train.py --nosave --data data/coco_100img.data --transfer && mv results.txt results3_100imgTL.txt
60 | python3 -c "from utils import utils; utils.plot_results()"
61 | # gsutil cp results*.txt gs://ultralytics
62 | gsutil cp results.png gs://ultralytics
63 | sudo shutdown
64 |
65 | # Reproduce mAP
66 | python3 test.py --save-json --img-size 608
67 | python3 test.py --save-json --img-size 416
68 | python3 test.py --save-json --img-size 320
69 | sudo shutdown
70 |
71 | # Benchmark script
72 | git clone https://github.com/ultralytics/yolov3 # clone our repo
73 | git clone https://github.com/NVIDIA/apex && cd apex && pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" . --user && cd .. && rm -rf apex # install nvidia apex
74 | python3 -c "from yolov3.utils.google_utils import gdrive_download; gdrive_download('1HaXkef9z6y5l4vUnCYgdmEAj61c6bfWO','coco.zip')" # download coco dataset (20GB)
75 | cd yolov3 && clear && python3 train.py --epochs 1 # run benchmark (~30 min)
76 |
77 | # Unit tests
78 | python3 detect.py # detect 2 persons, 1 tie
79 | python3 test.py --data data/coco_32img.data # test mAP = 0.8
80 | python3 train.py --data data/coco_32img.data --epochs 5 --nosave # train 5 epochs
81 | python3 train.py --data data/coco_1cls.data --epochs 5 --nosave # train 5 epochs
82 | python3 train.py --data data/coco_1img.data --epochs 5 --nosave # train 5 epochs
83 |
84 | # AlexyAB Darknet
85 | gsutil cp -r gs://sm6/supermarket2 . # dataset from bucket
86 | rm -rf darknet && git clone https://github.com/AlexeyAB/darknet && cd darknet && wget -c https://pjreddie.com/media/files/darknet53.conv.74 # sudo apt install libopencv-dev && make
87 | ./darknet detector calc_anchors data/coco_img64.data -num_of_clusters 9 -width 320 -height 320 # kmeans anchor calculation
88 | ./darknet detector train ../supermarket2/supermarket2.data ../yolo_v3_spp_pan_scale.cfg darknet53.conv.74 -map -dont_show # train spp
89 | ./darknet detector train ../yolov3/data/coco.data ../yolov3-spp.cfg darknet53.conv.74 -map -dont_show # train spp coco
90 |
91 | ./darknet detector train data/coco.data ../yolov3-spp.cfg darknet53.conv.74 -map -dont_show # train spp
92 | gsutil cp -r backup/*5000.weights gs://sm6/weights
93 | sudo shutdown
94 |
95 |
96 | ./darknet detector train ../supermarket2/supermarket2.data ../yolov3-tiny-sm2-1cls.cfg yolov3-tiny.conv.15 -map -dont_show # train tiny
97 | ./darknet detector train ../supermarket2/supermarket2.data cfg/yolov3-spp-sm2-1cls.cfg backup/yolov3-spp-sm2-1cls_last.weights # resume
98 | python3 train.py --data ../supermarket2/supermarket2.data --cfg ../yolov3-spp-sm2-1cls.cfg --epochs 100 --num-workers 8 --img-size 320 --nosave # train ultralytics
99 | python3 test.py --data ../supermarket2/supermarket2.data --weights ../darknet/backup/yolov3-spp-sm2-1cls_5000.weights --cfg cfg/yolov3-spp-sm2-1cls.cfg # test
100 | gsutil cp -r backup/*.weights gs://sm6/weights # weights to bucket
101 |
102 | python3 test.py --data ../supermarket2/supermarket2.data --weights weights/yolov3-spp-sm2-1cls_5000.weights --cfg ../yolov3-spp-sm2-1cls.cfg --img-size 320 --conf-thres 0.2 # test
103 | python3 test.py --data ../supermarket2/supermarket2.data --weights weights/yolov3-spp-sm2-1cls-scalexy_125_5000.weights --cfg ../yolov3-spp-sm2-1cls-scalexy_125.cfg --img-size 320 --conf-thres 0.2 # test
104 | python3 test.py --data ../supermarket2/supermarket2.data --weights weights/yolov3-spp-sm2-1cls-scalexy_150_5000.weights --cfg ../yolov3-spp-sm2-1cls-scalexy_150.cfg --img-size 320 --conf-thres 0.2 # test
105 | python3 test.py --data ../supermarket2/supermarket2.data --weights weights/yolov3-spp-sm2-1cls-scalexy_200_5000.weights --cfg ../yolov3-spp-sm2-1cls-scalexy_200.cfg --img-size 320 --conf-thres 0.2 # test
106 | python3 test.py --data ../supermarket2/supermarket2.data --weights ../darknet/backup/yolov3-spp-sm2-1cls-scalexy_variable_5000.weights --cfg ../yolov3-spp-sm2-1cls-scalexy_variable.cfg --img-size 320 --conf-thres 0.2 # test
107 |
108 | python3 train.py --img-size 320 --epochs 27 --batch-size 64 --accumulate 1 --nosave --notest && python3 test.py --weights weights/last.pt --img-size 320 --save-json && sudo shutdown
109 |
110 | # Debug/Development
111 | python3 train.py --data data/coco.data --img-size 320 --single-scale --batch-size 64 --accumulate 1 --epochs 1 --evolve --giou
112 | python3 test.py --weights weights/last.pt --cfg cfg/yolov3-spp.cfg --img-size 320
113 |
114 | gsutil cp evolve.txt gs://ultralytics
115 | sudo shutdown
116 |
117 | #Docker
118 | sudo docker kill $(sudo docker ps -q)
119 | sudo docker pull ultralytics/yolov3:v1
120 | sudo nvidia-docker run -it --ipc=host --mount type=bind,source="$(pwd)"/coco,target=/usr/src/coco ultralytics/yolov3:v1
121 |
122 | clear
123 | while true
124 | do
125 | python3 train.py --data data/coco.data --img-size 320 --batch-size 64 --accumulate 1 --evolve --epochs 1 --adam --bucket yolov4/adamdefaultpw_coco_1e --device 1
126 | done
127 |
128 | python3 train.py --data data/coco.data --img-size 320 --batch-size 64 --accumulate 1 --epochs 1 --adam --device 1 --prebias
129 | while true; do python3 train.py --data data/coco.data --img-size 320 --batch-size 64 --accumulate 1 --evolve --epochs 1 --adam --bucket yolov4/adamdefaultpw_coco_1e; done
130 |
--------------------------------------------------------------------------------
/utils/google_utils.py:
--------------------------------------------------------------------------------
1 | # This file contains google utils: https://cloud.google.com/storage/docs/reference/libraries
2 | # pip install --upgrade google-cloud-storage
3 |
4 | import os
5 | import time
6 |
7 |
8 | # from google.cloud import storage
9 |
10 |
11 | def gdrive_download(id='1HaXkef9z6y5l4vUnCYgdmEAj61c6bfWO', name='coco.zip'):
12 | # https://gist.github.com/tanaikech/f0f2d122e05bf5f971611258c22c110f
13 | # Downloads a file from Google Drive, accepting presented query
14 | # from utils.google_utils import *; gdrive_download()
15 | t = time.time()
16 |
17 | print('Downloading https://drive.google.com/uc?export=download&id=%s as %s... ' % (id, name), end='')
18 | if os.path.exists(name): # remove existing
19 | os.remove(name)
20 |
21 | # Attempt large file download
22 | s = ["curl -c ./cookie -s -L \"https://drive.google.com/uc?export=download&id=%s\" > /dev/null" % id,
23 | "curl -Lb ./cookie -s \"https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=%s\" -o %s" % (
24 | id, name),
25 | 'rm ./cookie']
26 | [os.system(x) for x in s] # run commands
27 |
28 | # Attempt small file download
29 | if not os.path.exists(name): # file size < 40MB
30 | s = 'curl -f -L -o %s https://drive.google.com/uc?export=download&id=%s' % (name, id)
31 | os.system(s)
32 |
33 | # Unzip if archive
34 | if name.endswith('.zip'):
35 | print('unzipping... ', end='')
36 | os.system('unzip -q %s' % name) # unzip
37 | os.remove(name) # remove zip to free space
38 |
39 | print('Done (%.1fs)' % (time.time() - t))
40 |
41 |
42 | def upload_blob(bucket_name, source_file_name, destination_blob_name):
43 | # Uploads a file to a bucket
44 | # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python
45 |
46 | storage_client = storage.Client()
47 | bucket = storage_client.get_bucket(bucket_name)
48 | blob = bucket.blob(destination_blob_name)
49 |
50 | blob.upload_from_filename(source_file_name)
51 |
52 | print('File {} uploaded to {}.'.format(
53 | source_file_name,
54 | destination_blob_name))
55 |
56 |
57 | def download_blob(bucket_name, source_blob_name, destination_file_name):
58 | # Uploads a blob from a bucket
59 | storage_client = storage.Client()
60 | bucket = storage_client.get_bucket(bucket_name)
61 | blob = bucket.blob(source_blob_name)
62 |
63 | blob.download_to_filename(destination_file_name)
64 |
65 | print('Blob {} downloaded to {}.'.format(
66 | source_blob_name,
67 | destination_file_name))
68 |
--------------------------------------------------------------------------------
/utils/parse_config.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | def parse_model_cfg(path):
5 | # Parses the yolo-v3 layer configuration file and returns module definitions
6 | file = open(path, 'r')
7 | lines = file.read().split('\n')
8 | lines = [x for x in lines if x and not x.startswith('#')]
9 | lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces
10 | mdefs = [] # module definitions
11 | for line in lines:
12 | if line.startswith('['): # This marks the start of a new block
13 | mdefs.append({})
14 | mdefs[-1]['type'] = line[1:-1].rstrip()
15 | if mdefs[-1]['type'] == 'convolutional':
16 | mdefs[-1]['batch_normalize'] = 0 # pre-populate with zeros (may be overwritten later)
17 | else:
18 | key, val = line.split("=")
19 | key = key.rstrip()
20 |
21 | if 'anchors' in key:
22 | mdefs[-1][key] = np.array([float(x) for x in val.split(',')]).reshape((-1, 2)) # np anchors
23 | else:
24 | mdefs[-1][key] = val.strip()
25 |
26 | return mdefs
27 |
28 |
29 | def parse_data_cfg(path):
30 | # Parses the data configuration file
31 | options = dict()
32 | with open(path, 'r') as fp:
33 | lines = fp.readlines()
34 |
35 | for line in lines:
36 | line = line.strip()
37 | if line == '' or line.startswith('#'):
38 | continue
39 | key, val = line.split('=')
40 | options[key.strip()] = val.strip()
41 |
42 | return options
43 |
--------------------------------------------------------------------------------
/utils/prune_utils.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from terminaltables import AsciiTable
3 | from copy import deepcopy
4 | import numpy as np
5 | import torch.nn.functional as F
6 |
7 |
8 | def get_sr_flag(epoch, sr):
9 | # return epoch >= 5 and sr
10 | return sr
11 |
12 | def parse_module_defs3(module_defs):
13 |
14 | CBL_idx = []
15 | Conv_idx = []
16 | for i, module_def in enumerate(module_defs):
17 | if module_def['type'] == 'convolutional':
18 | if module_def['batch_normalize'] == '1':
19 | CBL_idx.append(i)
20 | else:
21 | Conv_idx.append(i)
22 |
23 | ignore_idx = set()
24 |
25 | ignore_idx.add(18)
26 |
27 |
28 | prune_idx = [idx for idx in CBL_idx if idx not in ignore_idx]
29 |
30 | return CBL_idx, Conv_idx, prune_idx
31 |
32 | def parse_module_defs2(module_defs):
33 |
34 | CBL_idx = []
35 | Conv_idx = []
36 | shortcut_idx=dict()
37 | shortcut_all=set()
38 | for i, module_def in enumerate(module_defs):
39 | if module_def['type'] == 'convolutional':
40 | if module_def['batch_normalize'] == '1':
41 | CBL_idx.append(i)
42 | else:
43 | Conv_idx.append(i)
44 |
45 | ignore_idx = set()
46 | for i, module_def in enumerate(module_defs):
47 | if module_def['type'] == 'shortcut':
48 | identity_idx = (i + int(module_def['from']))
49 | if module_defs[identity_idx]['type'] == 'convolutional':
50 |
51 | #ignore_idx.add(identity_idx)
52 | shortcut_idx[i-1]=identity_idx
53 | shortcut_all.add(identity_idx)
54 | elif module_defs[identity_idx]['type'] == 'shortcut':
55 |
56 | #ignore_idx.add(identity_idx - 1)
57 | shortcut_idx[i-1]=identity_idx-1
58 | shortcut_all.add(identity_idx-1)
59 | shortcut_all.add(i-1)
60 | #上采样层前的卷积层不裁剪
61 | ignore_idx.add(84)
62 | ignore_idx.add(96)
63 |
64 | prune_idx = [idx for idx in CBL_idx if idx not in ignore_idx]
65 |
66 | return CBL_idx, Conv_idx, prune_idx,shortcut_idx,shortcut_all
67 |
68 | def parse_module_defs(module_defs):
69 |
70 | CBL_idx = []
71 | Conv_idx = []
72 | for i, module_def in enumerate(module_defs):
73 | if module_def['type'] == 'convolutional':
74 | if module_def['batch_normalize'] == '1':
75 | CBL_idx.append(i)
76 | else:
77 | Conv_idx.append(i)
78 | ignore_idx = set()
79 | for i, module_def in enumerate(module_defs):
80 | if module_def['type'] == 'shortcut':
81 | ignore_idx.add(i-1)
82 | identity_idx = (i + int(module_def['from']))
83 | if module_defs[identity_idx]['type'] == 'convolutional':
84 | ignore_idx.add(identity_idx)
85 | elif module_defs[identity_idx]['type'] == 'shortcut':
86 | ignore_idx.add(identity_idx - 1)
87 | #上采样层前的卷积层不裁剪
88 | ignore_idx.add(84)
89 | ignore_idx.add(96)
90 |
91 | prune_idx = [idx for idx in CBL_idx if idx not in ignore_idx]
92 |
93 | return CBL_idx, Conv_idx, prune_idx
94 |
95 |
96 | def gather_bn_weights(module_list, prune_idx):
97 |
98 | size_list = [module_list[idx][1].weight.data.shape[0] for idx in prune_idx]
99 |
100 | bn_weights = torch.zeros(sum(size_list))
101 | index = 0
102 | for idx, size in zip(prune_idx, size_list):
103 | bn_weights[index:(index + size)] = module_list[idx][1].weight.data.abs().clone()
104 | index += size
105 |
106 | return bn_weights
107 |
108 |
109 | def write_cfg(cfg_file, module_defs):
110 |
111 | with open(cfg_file, 'w') as f:
112 | for module_def in module_defs:
113 | f.write(f"[{module_def['type']}]\n")
114 | for key, value in module_def.items():
115 | if key != 'type':
116 | f.write(f"{key}={value}\n")
117 | f.write("\n")
118 | return cfg_file
119 |
120 |
121 | class BNOptimizer():
122 |
123 | @staticmethod
124 | def updateBN(sr_flag, module_list, s, prune_idx):
125 | if sr_flag:
126 | for idx in prune_idx:
127 | # Squential(Conv, BN, Lrelu)
128 | bn_module = module_list[idx][1]
129 | bn_module.weight.grad.data.add_(s * torch.sign(bn_module.weight.data)) # L1
130 |
131 |
132 | def obtain_quantiles(bn_weights, num_quantile=5):
133 |
134 | sorted_bn_weights, i = torch.sort(bn_weights)
135 | total = sorted_bn_weights.shape[0]
136 | quantiles = sorted_bn_weights.tolist()[-1::-total//num_quantile][::-1]
137 | print("\nBN weights quantile:")
138 | quantile_table = [
139 | [f'{i}/{num_quantile}' for i in range(1, num_quantile+1)],
140 | ["%.3f" % quantile for quantile in quantiles]
141 | ]
142 | print(AsciiTable(quantile_table).table)
143 |
144 | return quantiles
145 |
146 |
147 | def get_input_mask(module_defs, idx, CBLidx2mask):
148 |
149 | if idx == 0:
150 | return np.ones(3)
151 |
152 | if module_defs[idx - 1]['type'] == 'convolutional':
153 | return CBLidx2mask[idx - 1]
154 | elif module_defs[idx - 1]['type'] == 'shortcut':
155 | return CBLidx2mask[idx - 2]
156 | elif module_defs[idx - 1]['type'] == 'route':
157 | route_in_idxs = []
158 | for layer_i in module_defs[idx - 1]['layers'].split(","):
159 | if int(layer_i) < 0:
160 | route_in_idxs.append(idx - 1 + int(layer_i))
161 | else:
162 | route_in_idxs.append(int(layer_i))
163 | if len(route_in_idxs) == 1:
164 | return CBLidx2mask[route_in_idxs[0]]
165 | elif len(route_in_idxs) == 2:
166 | return np.concatenate([CBLidx2mask[in_idx - 1] for in_idx in route_in_idxs])
167 | else:
168 | print("Something wrong with route module!")
169 | raise Exception
170 |
171 |
172 | def init_weights_from_loose_model(compact_model, loose_model, CBL_idx, Conv_idx, CBLidx2mask):
173 |
174 | for idx in CBL_idx:
175 | compact_CBL = compact_model.module_list[idx]
176 | loose_CBL = loose_model.module_list[idx]
177 | out_channel_idx = np.argwhere(CBLidx2mask[idx])[:, 0].tolist()
178 |
179 | compact_bn, loose_bn = compact_CBL[1], loose_CBL[1]
180 | compact_bn.weight.data = loose_bn.weight.data[out_channel_idx].clone()
181 | compact_bn.bias.data = loose_bn.bias.data[out_channel_idx].clone()
182 | compact_bn.running_mean.data = loose_bn.running_mean.data[out_channel_idx].clone()
183 | compact_bn.running_var.data = loose_bn.running_var.data[out_channel_idx].clone()
184 |
185 | input_mask = get_input_mask(loose_model.module_defs, idx, CBLidx2mask)
186 | in_channel_idx = np.argwhere(input_mask)[:, 0].tolist()
187 | compact_conv, loose_conv = compact_CBL[0], loose_CBL[0]
188 | tmp = loose_conv.weight.data[:, in_channel_idx, :, :].clone()
189 | compact_conv.weight.data = tmp[out_channel_idx, :, :, :].clone()
190 |
191 | for idx in Conv_idx:
192 | compact_conv = compact_model.module_list[idx][0]
193 | loose_conv = loose_model.module_list[idx][0]
194 |
195 | input_mask = get_input_mask(loose_model.module_defs, idx, CBLidx2mask)
196 | in_channel_idx = np.argwhere(input_mask)[:, 0].tolist()
197 | compact_conv.weight.data = loose_conv.weight.data[:, in_channel_idx, :, :].clone()
198 | compact_conv.bias.data = loose_conv.bias.data.clone()
199 |
200 |
201 | def prune_model_keep_size(model, prune_idx, CBL_idx, CBLidx2mask):
202 |
203 | pruned_model = deepcopy(model)
204 | for idx in prune_idx:
205 | mask = torch.from_numpy(CBLidx2mask[idx]).cuda()
206 | bn_module = pruned_model.module_list[idx][1]
207 |
208 | bn_module.weight.data.mul_(mask)
209 |
210 | activation = F.leaky_relu((1 - mask) * bn_module.bias.data, 0.1)
211 |
212 | # 两个上采样层前的卷积层
213 | next_idx_list = [idx + 1]
214 | if idx == 79:
215 | next_idx_list.append(84)
216 | elif idx == 91:
217 | next_idx_list.append(96)
218 |
219 | for next_idx in next_idx_list:
220 | next_conv = pruned_model.module_list[next_idx][0]
221 | conv_sum = next_conv.weight.data.sum(dim=(2, 3))
222 | offset = conv_sum.matmul(activation.reshape(-1, 1)).reshape(-1)
223 | if next_idx in CBL_idx:
224 | next_bn = pruned_model.module_list[next_idx][1]
225 | next_bn.running_mean.data.sub_(offset)
226 | else:
227 | #这里需要注意的是,对于convolutionnal,如果有BN,则该层卷积层不使用bias,如果无BN,则使用bias
228 | next_conv.bias.data.add_(offset)
229 |
230 | bn_module.bias.data.mul_(mask)
231 |
232 | return pruned_model
233 |
234 |
235 | def obtain_bn_mask(bn_module, thre):
236 |
237 | thre = thre.cuda()
238 | mask = bn_module.weight.data.abs().ge(thre).float()
239 |
240 | return mask
241 |
--------------------------------------------------------------------------------
/utils/tiny_prune_utils.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from terminaltables import AsciiTable
3 | from copy import deepcopy
4 | import numpy as np
5 | import torch.nn.functional as F
6 |
7 |
8 | def get_sr_flag(epoch, sr):
9 | # return epoch >= 5 and sr
10 | return sr
11 |
12 |
13 | def parse_module_defs(module_defs):
14 |
15 | CBL_idx = []
16 | Conv_idx = []
17 | for i, module_def in enumerate(module_defs):
18 | if module_def['type'] == 'convolutional':
19 | if module_def['batch_normalize'] == '1':
20 | CBL_idx.append(i)
21 | else:
22 | Conv_idx.append(i)
23 |
24 | ignore_idx = set()
25 |
26 | ignore_idx.add(18)
27 |
28 |
29 | prune_idx = [idx for idx in CBL_idx if idx not in ignore_idx]
30 |
31 | return CBL_idx, Conv_idx, prune_idx
32 |
33 |
34 | def gather_bn_weights(module_list, prune_idx):
35 |
36 | size_list = [module_list[idx][1].weight.data.shape[0] for idx in prune_idx]
37 |
38 | bn_weights = torch.zeros(sum(size_list))
39 | index = 0
40 | for idx, size in zip(prune_idx, size_list):
41 | bn_weights[index:(index + size)] = module_list[idx][1].weight.data.abs().clone()
42 | index += size
43 |
44 | return bn_weights
45 |
46 |
47 | def write_cfg(cfg_file, module_defs):
48 |
49 | with open(cfg_file, 'w') as f:
50 | for module_def in module_defs:
51 | f.write(f"[{module_def['type']}]\n")
52 | for key, value in module_def.items():
53 | if key != 'type':
54 | f.write(f"{key}={value}\n")
55 | f.write("\n")
56 | return cfg_file
57 |
58 |
59 | class BNOptimizer():
60 |
61 | @staticmethod
62 | def updateBN(sr_flag, module_list, s, prune_idx):
63 | if sr_flag:
64 | for idx in prune_idx:
65 | # Squential(Conv, BN, Lrelu)
66 | bn_module = module_list[idx][1]
67 | bn_module.weight.grad.data.add_(s * torch.sign(bn_module.weight.data)) # L1
68 |
69 |
70 | def obtain_quantiles(bn_weights, num_quantile=5):
71 |
72 | sorted_bn_weights, i = torch.sort(bn_weights)
73 | total = sorted_bn_weights.shape[0]
74 | quantiles = sorted_bn_weights.tolist()[-1::-total//num_quantile][::-1]
75 | print("\nBN weights quantile:")
76 | quantile_table = [
77 | [f'{i}/{num_quantile}' for i in range(1, num_quantile+1)],
78 | ["%.3f" % quantile for quantile in quantiles]
79 | ]
80 | print(AsciiTable(quantile_table).table)
81 |
82 | return quantiles
83 |
84 |
85 | def get_input_mask(module_defs, idx, CBLidx2mask):
86 |
87 | if idx == 0:
88 | return np.ones(3)
89 |
90 | if module_defs[idx - 1]['type'] == 'convolutional':
91 | return CBLidx2mask[idx - 1]
92 | elif module_defs[idx - 1]['type'] == 'shortcut':
93 | return CBLidx2mask[idx - 2]
94 | elif module_defs[idx - 1]['type'] == 'route':
95 | route_in_idxs = []
96 | for layer_i in module_defs[idx - 1]['layers'].split(","):
97 | if int(layer_i) < 0:
98 | route_in_idxs.append(idx - 1 + int(layer_i))
99 | else:
100 | route_in_idxs.append(int(layer_i))
101 | if len(route_in_idxs) == 1:
102 | return CBLidx2mask[route_in_idxs[0]]
103 | elif len(route_in_idxs) == 2:
104 | return np.concatenate([CBLidx2mask[in_idx - 1] for in_idx in route_in_idxs])
105 | else:
106 | print("Something wrong with route module!")
107 | raise Exception
108 |
109 |
110 | def init_weights_from_loose_model(compact_model, loose_model, CBL_idx, Conv_idx, CBLidx2mask):
111 |
112 | for idx in CBL_idx:
113 | compact_CBL = compact_model.module_list[idx]
114 | loose_CBL = loose_model.module_list[idx]
115 | out_channel_idx = np.argwhere(CBLidx2mask[idx])[:, 0].tolist()
116 |
117 | compact_bn, loose_bn = compact_CBL[1], loose_CBL[1]
118 | compact_bn.weight.data = loose_bn.weight.data[out_channel_idx].clone()
119 | compact_bn.bias.data = loose_bn.bias.data[out_channel_idx].clone()
120 | compact_bn.running_mean.data = loose_bn.running_mean.data[out_channel_idx].clone()
121 | compact_bn.running_var.data = loose_bn.running_var.data[out_channel_idx].clone()
122 |
123 | input_mask = get_input_mask(loose_model.module_defs, idx, CBLidx2mask)
124 | in_channel_idx = np.argwhere(input_mask)[:, 0].tolist()
125 | compact_conv, loose_conv = compact_CBL[0], loose_CBL[0]
126 | tmp = loose_conv.weight.data[:, in_channel_idx, :, :].clone()
127 | compact_conv.weight.data = tmp[out_channel_idx, :, :, :].clone()
128 |
129 | for idx in Conv_idx:
130 | compact_conv = compact_model.module_list[idx][0]
131 | loose_conv = loose_model.module_list[idx][0]
132 |
133 | input_mask = get_input_mask(loose_model.module_defs, idx, CBLidx2mask)
134 | in_channel_idx = np.argwhere(input_mask)[:, 0].tolist()
135 | compact_conv.weight.data = loose_conv.weight.data[:, in_channel_idx, :, :].clone()
136 | compact_conv.bias.data = loose_conv.bias.data.clone()
137 |
138 |
139 | def prune_model_keep_size(model, prune_idx, CBL_idx, CBLidx2mask):
140 |
141 | pruned_model = deepcopy(model)
142 | for idx in prune_idx:
143 | mask = torch.from_numpy(CBLidx2mask[idx]).cuda()
144 | bn_module = pruned_model.module_list[idx][1]
145 |
146 | bn_module.weight.data.mul_(mask)
147 |
148 | activation = F.leaky_relu((1 - mask) * bn_module.bias.data, 0.1)
149 |
150 |
151 | if idx<12:
152 | next_idx_list = [idx + 2]
153 | else:
154 | next_idx_list = [idx + 1]
155 |
156 |
157 | #next_idx_list = [idx + 1]
158 | if idx == 13:
159 | next_idx_list.append(18)
160 |
161 |
162 |
163 | for next_idx in next_idx_list:
164 | next_conv = pruned_model.module_list[next_idx][0]
165 | conv_sum = next_conv.weight.data.sum(dim=(2, 3))
166 | offset = conv_sum.matmul(activation.reshape(-1, 1)).reshape(-1)
167 |
168 | if next_idx in CBL_idx:
169 | next_bn = pruned_model.module_list[next_idx][1]
170 | next_bn.running_mean.data.sub_(offset)
171 | else:
172 | #这里需要注意的是,对于convolutionnal,如果有BN,则该层卷积层不使用bias,如果无BN,则使用bias
173 | next_conv.bias.data.add_(offset)
174 |
175 |
176 | bn_module.bias.data.mul_(mask)
177 |
178 | return pruned_model
179 |
180 |
181 | def obtain_bn_mask(bn_module, thre):
182 |
183 | thre = thre.cuda()
184 | mask = bn_module.weight.data.abs().ge(thre).float()
185 |
186 | return mask
187 |
--------------------------------------------------------------------------------
/utils/torch_utils.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import torch
4 |
5 |
6 | def init_seeds(seed=0):
7 | torch.manual_seed(seed)
8 | torch.cuda.manual_seed(seed)
9 | torch.cuda.manual_seed_all(seed)
10 |
11 | # Remove randomness (may be slower on Tesla GPUs) # https://pytorch.org/docs/stable/notes/randomness.html
12 | if seed == 0:
13 | torch.backends.cudnn.deterministic = True
14 | torch.backends.cudnn.benchmark = False
15 |
16 |
17 | def select_device(device='', apex=False):
18 | # device = 'cpu' or '0' or '0,1,2,3'
19 | cpu_request = device.lower() == 'cpu'
20 | if device and not cpu_request: # if device requested other than 'cpu'
21 | os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable
22 | assert torch.cuda.is_available(), 'CUDA unavailable, invalid device %s requested' % device # check availablity
23 |
24 | cuda = False if cpu_request else torch.cuda.is_available()
25 | if cuda:
26 | c = 1024 ** 2 # bytes to MB
27 | ng = torch.cuda.device_count()
28 | x = [torch.cuda.get_device_properties(i) for i in range(ng)]
29 | cuda_str = 'Using CUDA ' + ('Apex ' if apex else '') # apex for mixed precision https://github.com/NVIDIA/apex
30 | for i in range(0, ng):
31 | if i == 1:
32 | cuda_str = ' ' * len(cuda_str)
33 | print("%sdevice%g _CudaDeviceProperties(name='%s', total_memory=%dMB)" %
34 | (cuda_str, i, x[i].name, x[i].total_memory / c))
35 | else:
36 | print('Using CPU')
37 |
38 | print('') # skip a line
39 | return torch.device('cuda:0' if cuda else 'cpu')
40 |
41 |
42 | def fuse_conv_and_bn(conv, bn):
43 | # https://tehnokv.com/posts/fusing-batchnorm-and-conv/
44 | with torch.no_grad():
45 | # init
46 | fusedconv = torch.nn.Conv2d(conv.in_channels,
47 | conv.out_channels,
48 | kernel_size=conv.kernel_size,
49 | stride=conv.stride,
50 | padding=conv.padding,
51 | bias=True)
52 |
53 | # prepare filters
54 | w_conv = conv.weight.clone().view(conv.out_channels, -1)
55 | w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
56 | fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size()))
57 |
58 | # prepare spatial bias
59 | if conv.bias is not None:
60 | b_conv = conv.bias
61 | else:
62 | b_conv = torch.zeros(conv.weight.size(0)).cuda()
63 | b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps))
64 | fusedconv.bias.copy_(b_conv + b_bn)
65 |
66 | return fusedconv
67 |
68 |
69 | def model_info(model, report='summary'):
70 | # Plots a line-by-line description of a PyTorch model
71 | n_p = sum(x.numel() for x in model.parameters()) # number parameters
72 | n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients
73 | if report is 'full':
74 | print('%5s %40s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma'))
75 | for i, (name, p) in enumerate(model.named_parameters()):
76 | name = name.replace('module_list.', '')
77 | print('%5g %40s %9s %12g %20s %10.3g %10.3g' %
78 | (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std()))
79 | print('Model Summary: %g layers, %g parameters, %g gradients' % (len(list(model.parameters())), n_p, n_g))
80 |
81 |
82 | def load_classifier(name='resnet101', n=2):
83 | # Loads a pretrained model reshaped to n-class output
84 | import pretrainedmodels # https://github.com/Cadene/pretrained-models.pytorch#torchvision
85 | model = pretrainedmodels.__dict__[name](num_classes=1000, pretrained='imagenet')
86 |
87 | # Display model properties
88 | for x in ['model.input_size', 'model.input_space', 'model.input_range', 'model.mean', 'model.std']:
89 | print(x + ' =', eval(x))
90 |
91 | # Reshape output to n classes
92 | filters = model.last_linear.weight.shape[1]
93 | model.last_linear.bias = torch.nn.Parameter(torch.zeros(n))
94 | model.last_linear.weight = torch.nn.Parameter(torch.zeros(n, filters))
95 | model.last_linear.out_features = n
96 | return model
97 |
--------------------------------------------------------------------------------
/weights/download_yolov3_weights.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # make '/weights' directory if it does not exist and cd into it
4 | mkdir -p weights && cd weights
5 |
6 | # copy darknet weight files, continue '-c' if partially downloaded
7 | wget -c https://pjreddie.com/media/files/yolov3.weights
8 | wget -c https://pjreddie.com/media/files/yolov3-tiny.weights
9 | wget -c https://pjreddie.com/media/files/yolov3-spp.weights
10 |
11 | # yolov3 pytorch weights
12 | # download from Google Drive: https://drive.google.com/drive/folders/1uxgUBemJVw9wZsdpboYbzUN4bcRhsuAI
13 |
14 | # darknet53 weights (first 75 layers only)
15 | wget -c https://pjreddie.com/media/files/darknet53.conv.74
16 |
17 | # yolov3-tiny weights from darknet (first 16 layers only)
18 | # ./darknet partial cfg/yolov3-tiny.cfg yolov3-tiny.weights yolov3-tiny.conv.15 15
19 | # mv yolov3-tiny.conv.15 ../
20 |
21 |
--------------------------------------------------------------------------------