├── .github
    └── ISSUE_TEMPLATE
    │   └── bug_report.md
├── Dockerfile
├── LICENSE
├── README.md
├── cfg
    ├── yolov3-1cls.cfg
    ├── yolov3-spp-1cls.cfg
    ├── yolov3-spp.cfg
    ├── yolov3-tiny-1cls.cfg
    ├── yolov3-tiny.cfg
    └── yolov3.cfg
├── data
    ├── 5k.shapes
    ├── 5k.txt
    ├── coco.data
    ├── coco.names
    ├── coco_1000img.data
    ├── coco_1000img.txt
    ├── coco_1000val.data
    ├── coco_1000val.txt
    ├── coco_16img.data
    ├── coco_16img.txt
    ├── coco_1cls.data
    ├── coco_1cls.txt
    ├── coco_1img.data
    ├── coco_1img.txt
    ├── coco_1k5k.data
    ├── coco_32img.data
    ├── coco_32img.txt
    ├── coco_500img.txt
    ├── coco_500val.data
    ├── coco_500val.txt
    ├── coco_64img.data
    ├── coco_64img.shapes
    ├── coco_64img.txt
    ├── coco_paper.names
    ├── get_coco_dataset.sh
    ├── get_coco_dataset_gdrive.sh
    ├── hand.data
    ├── samples
    │   ├── bus.jpg
    │   └── zidane.jpg
    ├── trainvalno5k.shapes
    └── valid_.shapes
├── detect.py
├── models.py
├── prune.py
├── requirements.txt
├── shortcut_prune.py
├── test.py
├── train.py
├── utils
    ├── adabound.py
    ├── datasets.py
    ├── google_utils.py
    ├── parse_config.py
    ├── prune_utils.py
    ├── torch_utils.py
    └── utils.py
└── weights
    └── download_yolov3_weights.sh


/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 | 
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 | 
23 | **Screenshots**
24 | If applicable, add screenshots to help explain your problem.
25 | 
26 | **Desktop (please complete the following information):**
27 |  - OS: [e.g. iOS]
28 |  - Browser [e.g. chrome, safari]
29 |  - Version [e.g. 22]
30 | 
31 | **Smartphone (please complete the following information):**
32 |  - Device: [e.g. iPhone6]
33 |  - OS: [e.g. iOS8.1]
34 |  - Browser [e.g. stock browser, safari]
35 |  - Version [e.g. 22]
36 | 
37 | **Additional context**
38 | Add any other context about the problem here.
39 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Start from Nvidia PyTorch image https://ngc.nvidia.com/catalog/containers/nvidia:pytorch
 2 | FROM nvcr.io/nvidia/pytorch:19.08-py3
 3 | 
 4 | # Install dependencies (pip or conda)
 5 | RUN pip install -U gsutil
 6 | # RUN pip install -U -r requirements.txt
 7 | # RUN conda update -n base -c defaults conda
 8 | # RUN conda install -y -c anaconda future numpy opencv matplotlib tqdm pillow
 9 | # RUN conda install -y -c conda-forge scikit-image tensorboard pycocotools
10 | 
11 | ## Install OpenCV with Gstreamer support
12 | #WORKDIR /usr/src
13 | #RUN pip uninstall -y opencv-python
14 | #RUN apt-get update
15 | #RUN apt-get install -y gstreamer1.0-tools gstreamer1.0-python3-dbg-plugin-loader libgstreamer1.0-dev libgstreamer-plugins-base1.0-dev
16 | #RUN git clone https://github.com/opencv/opencv.git && cd opencv && git checkout 4.1.1 && mkdir build
17 | #RUN git clone https://github.com/opencv/opencv_contrib.git && cd opencv_contrib && git checkout 4.1.1
18 | #RUN cd opencv/build && cmake ../ \
19 | #    -D OPENCV_EXTRA_MODULES_PATH=../../opencv_contrib/modules \
20 | #    -D BUILD_OPENCV_PYTHON3=ON \
21 | #    -D PYTHON3_EXECUTABLE=/opt/conda/bin/python \
22 | #    -D PYTHON3_INCLUDE_PATH=/opt/conda/include/python3.6m \
23 | #    -D PYTHON3_LIBRARIES=/opt/conda/lib/python3.6/site-packages \
24 | #    -D WITH_GSTREAMER=ON \
25 | #    -D WITH_FFMPEG=OFF \
26 | #    && make && make install && ldconfig
27 | #RUN cd /usr/local/lib/python3.6/site-packages/cv2/python-3.6/ && mv cv2.cpython-36m-x86_64-linux-gnu.so cv2.so
28 | #RUN cd /opt/conda/lib/python3.6/site-packages/ && ln -s /usr/local/lib/python3.6/site-packages/cv2/python-3.6/cv2.so cv2.so
29 | #RUN python3 -c "import cv2; print(cv2.getBuildInformation())"
30 | 
31 | # Create working directory
32 | RUN mkdir -p /usr/src/app
33 | WORKDIR /usr/src/app
34 | 
35 | # Copy contents
36 | COPY . /usr/src/app
37 | 
38 | # Copy weights
39 | #RUN python3 -c "from utils.google_utils import *; \
40 | #    gdrive_download(id='18xqvs_uwAqfTXp-LJCYLYNHBOcrwbrp0', name='weights/darknet53.conv.74'); \
41 | #    gdrive_download(id='1oPCHKsM2JpM-zgyepQciGli9X0MTsJCO', name='weights/yolov3-spp.weights'); \
42 | #    gdrive_download(id='1vFlbJ_dXPvtwaLLOu-twnjK4exdFiQ73', name='weights/yolov3-spp.pt)"
43 | 
44 | 
45 | # ---------------------------------------------------  Extras Below  ---------------------------------------------------
46 | 
47 | # Build
48 | # rm -rf yolov3  # Warning: remove existing
49 | # git clone https://github.com/ultralytics/yolov3 && cd yolov3 && python3 detect.py
50 | # sudo docker image prune -af && sudo docker build -t ultralytics/yolov3:v0 .
51 | 
52 | # Run
53 | # sudo nvidia-docker run --ipc=host ultralytics/yolov3:v0 python3 detect.py
54 | 
55 | # Run with local directory access
56 | # sudo nvidia-docker run --ipc=host --mount type=bind,source="$(pwd)"/coco,target=/usr/src/coco ultralytics/yolov3:v0 python3 train.py
57 | 
58 | # Build and Push
59 | # export tag=ultralytics/yolov3:v0 && sudo docker build -t $tag . && docker push $tag
60 | 
61 | # Kill all
62 | # sudo docker kill $(sudo docker ps -q)
63 | 
64 | # Run bash for loop
65 | # sudo nvidia-docker run --ipc=host ultralytics/yolov3:v0 while true; do python3 train.py --evolve; done
66 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # YOLOv3-training-prune
 2 | 
 3 | ## 环境
 4 | 
 5 | Python3.6, 
 6 | Pytorch 1.1及以上，
 7 | numpy>1.16，
 8 | tensorboard=1.13以上
 9 | 
10 | YOLOv3 的训练参考[博客]（https://blog.csdn.net/qq_34795071/article/details/90769094  ） 代码基于的 [ultralytics/yolov3](https://github.com/ultralytics/yolov3)
11 | 
12 | 大家也可以看下这个：https://github.com/tanluren/yolov3-channel-and-layer-pruning
13 | 
14 | ## 正常训练（Baseline）
15 | 
16 | ```bash
17 | python train.py --data data/VHR.data --cfg cfg/yolov3.cfg --weights/yolov3.weights --epochs 100 --batch-size 32 #后面的epochs自行更改 直接加载weights可以更好的收敛
18 | ```
19 | 
20 | ## 剪枝算法介绍
21 | 
22 | 本代码基于论文 [Learning Efficient Convolutional Networks Through Network Slimming (ICCV 2017)](http://openaccess.thecvf.com/content_iccv_2017/html/Liu_Learning_Efficient_Convolutional_ICCV_2017_paper.html) 进行改进实现的 channel pruning算法，类似的代码实现还有这个 [yolov3-network-slimming](https://github.com/talebolano/yolov3-network-slimming)。原始论文中的算法是针对分类模型的，基于 BN 层的 gamma 系数进行剪枝的。
23 | 
24 | 以下只是算法的大概步骤，具体实现过程中还要做 s 参数的尝试或者需要进行迭代式剪枝等。
25 | 
26 | ## 参数设置
27 | 
28 | -sr开启稀疏化，
29 | 
30 | --s指定稀疏因子大小，
31 | 
32 | --prune指定稀疏类型，
33 | 
34 | --prune 0为正常剪枝和规整剪枝的稀疏化
35 | 
36 | --prune 1为极限剪枝的稀疏化
37 | 
38 | ## 进行稀疏化训练
39 | #### baseline后  生成pt  如果你不想加载yolov3.weights进行稀疏化训练   把baseline最后的pt转化为weights  进行稀疏化
40 | 
41 | ```bashpython 
42 | python train.py --cfg cfg/yolov3.cfg --data data/VHR.data --weights weights/XX.weights --epochs 100 --batch-size 32 -sr --s 0.001  --prune 0  #scale参数默认0.001，在数据分布广类别多的或者稀疏时掉点厉害的适当调小s
43 | ```
44 | ## 训练过程中模型可视化
45 | ```bash
46 | tensorboard --logdir=runs 
47 | ```
48 | ##  模型剪枝
49 | ```bash
50 | python prune.py --cfg cfg/yolov3.cfg --data data/VHR.data --weights weights/last.pt --percent 0.5
51 | ```
52 | ## shortcut_prune剪枝
53 | ```bash
54 | python shortcut_prune.py  --cfg/yolov3.cfg --data data/VHR.data --weights weights/last.pt --percent 0.5
55 | ```
56 | ##  模型进行微调
57 |  ```bash
58 |  python train.py --cfg cfg/prune_0.5_yolov3_cfg.cfg --data data/VHR.data --weights weights/prune_0.5_last.weights --epochs 100 --batch-size 32
59 | ```
60 | ## convert cfg/pytorch model to darknet weights
61 | ```bash
62 | python  -c "from models import *; convert('cfg/yolov3.cfg', 'weights/yolov3.pt')"
63 | Success: converted 'weights/yolov3.pt' to 'converted.weights'
64 | ```
65 | ## 参考
66 | https://github.com/Lam1360/YOLOv3-model-pruning
67 | 
68 | https://github.com/ultralytics/yolov3
69 | 


--------------------------------------------------------------------------------
/cfg/yolov3-1cls.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | #batch=1
  4 | #subdivisions=1
  5 | # Training
  6 | batch=16
  7 | subdivisions=1
  8 | width=416
  9 | height=416
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=32
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | # Downsample
 34 | 
 35 | [convolutional]
 36 | batch_normalize=1
 37 | filters=64
 38 | size=3
 39 | stride=2
 40 | pad=1
 41 | activation=leaky
 42 | 
 43 | [convolutional]
 44 | batch_normalize=1
 45 | filters=32
 46 | size=1
 47 | stride=1
 48 | pad=1
 49 | activation=leaky
 50 | 
 51 | [convolutional]
 52 | batch_normalize=1
 53 | filters=64
 54 | size=3
 55 | stride=1
 56 | pad=1
 57 | activation=leaky
 58 | 
 59 | [shortcut]
 60 | from=-3
 61 | activation=linear
 62 | 
 63 | # Downsample
 64 | 
 65 | [convolutional]
 66 | batch_normalize=1
 67 | filters=128
 68 | size=3
 69 | stride=2
 70 | pad=1
 71 | activation=leaky
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=64
 76 | size=1
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [convolutional]
 82 | batch_normalize=1
 83 | filters=128
 84 | size=3
 85 | stride=1
 86 | pad=1
 87 | activation=leaky
 88 | 
 89 | [shortcut]
 90 | from=-3
 91 | activation=linear
 92 | 
 93 | [convolutional]
 94 | batch_normalize=1
 95 | filters=64
 96 | size=1
 97 | stride=1
 98 | pad=1
 99 | activation=leaky
100 | 
101 | [convolutional]
102 | batch_normalize=1
103 | filters=128
104 | size=3
105 | stride=1
106 | pad=1
107 | activation=leaky
108 | 
109 | [shortcut]
110 | from=-3
111 | activation=linear
112 | 
113 | # Downsample
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=256
118 | size=3
119 | stride=2
120 | pad=1
121 | activation=leaky
122 | 
123 | [convolutional]
124 | batch_normalize=1
125 | filters=128
126 | size=1
127 | stride=1
128 | pad=1
129 | activation=leaky
130 | 
131 | [convolutional]
132 | batch_normalize=1
133 | filters=256
134 | size=3
135 | stride=1
136 | pad=1
137 | activation=leaky
138 | 
139 | [shortcut]
140 | from=-3
141 | activation=linear
142 | 
143 | [convolutional]
144 | batch_normalize=1
145 | filters=128
146 | size=1
147 | stride=1
148 | pad=1
149 | activation=leaky
150 | 
151 | [convolutional]
152 | batch_normalize=1
153 | filters=256
154 | size=3
155 | stride=1
156 | pad=1
157 | activation=leaky
158 | 
159 | [shortcut]
160 | from=-3
161 | activation=linear
162 | 
163 | [convolutional]
164 | batch_normalize=1
165 | filters=128
166 | size=1
167 | stride=1
168 | pad=1
169 | activation=leaky
170 | 
171 | [convolutional]
172 | batch_normalize=1
173 | filters=256
174 | size=3
175 | stride=1
176 | pad=1
177 | activation=leaky
178 | 
179 | [shortcut]
180 | from=-3
181 | activation=linear
182 | 
183 | [convolutional]
184 | batch_normalize=1
185 | filters=128
186 | size=1
187 | stride=1
188 | pad=1
189 | activation=leaky
190 | 
191 | [convolutional]
192 | batch_normalize=1
193 | filters=256
194 | size=3
195 | stride=1
196 | pad=1
197 | activation=leaky
198 | 
199 | [shortcut]
200 | from=-3
201 | activation=linear
202 | 
203 | 
204 | [convolutional]
205 | batch_normalize=1
206 | filters=128
207 | size=1
208 | stride=1
209 | pad=1
210 | activation=leaky
211 | 
212 | [convolutional]
213 | batch_normalize=1
214 | filters=256
215 | size=3
216 | stride=1
217 | pad=1
218 | activation=leaky
219 | 
220 | [shortcut]
221 | from=-3
222 | activation=linear
223 | 
224 | [convolutional]
225 | batch_normalize=1
226 | filters=128
227 | size=1
228 | stride=1
229 | pad=1
230 | activation=leaky
231 | 
232 | [convolutional]
233 | batch_normalize=1
234 | filters=256
235 | size=3
236 | stride=1
237 | pad=1
238 | activation=leaky
239 | 
240 | [shortcut]
241 | from=-3
242 | activation=linear
243 | 
244 | [convolutional]
245 | batch_normalize=1
246 | filters=128
247 | size=1
248 | stride=1
249 | pad=1
250 | activation=leaky
251 | 
252 | [convolutional]
253 | batch_normalize=1
254 | filters=256
255 | size=3
256 | stride=1
257 | pad=1
258 | activation=leaky
259 | 
260 | [shortcut]
261 | from=-3
262 | activation=linear
263 | 
264 | [convolutional]
265 | batch_normalize=1
266 | filters=128
267 | size=1
268 | stride=1
269 | pad=1
270 | activation=leaky
271 | 
272 | [convolutional]
273 | batch_normalize=1
274 | filters=256
275 | size=3
276 | stride=1
277 | pad=1
278 | activation=leaky
279 | 
280 | [shortcut]
281 | from=-3
282 | activation=linear
283 | 
284 | # Downsample
285 | 
286 | [convolutional]
287 | batch_normalize=1
288 | filters=512
289 | size=3
290 | stride=2
291 | pad=1
292 | activation=leaky
293 | 
294 | [convolutional]
295 | batch_normalize=1
296 | filters=256
297 | size=1
298 | stride=1
299 | pad=1
300 | activation=leaky
301 | 
302 | [convolutional]
303 | batch_normalize=1
304 | filters=512
305 | size=3
306 | stride=1
307 | pad=1
308 | activation=leaky
309 | 
310 | [shortcut]
311 | from=-3
312 | activation=linear
313 | 
314 | 
315 | [convolutional]
316 | batch_normalize=1
317 | filters=256
318 | size=1
319 | stride=1
320 | pad=1
321 | activation=leaky
322 | 
323 | [convolutional]
324 | batch_normalize=1
325 | filters=512
326 | size=3
327 | stride=1
328 | pad=1
329 | activation=leaky
330 | 
331 | [shortcut]
332 | from=-3
333 | activation=linear
334 | 
335 | 
336 | [convolutional]
337 | batch_normalize=1
338 | filters=256
339 | size=1
340 | stride=1
341 | pad=1
342 | activation=leaky
343 | 
344 | [convolutional]
345 | batch_normalize=1
346 | filters=512
347 | size=3
348 | stride=1
349 | pad=1
350 | activation=leaky
351 | 
352 | [shortcut]
353 | from=-3
354 | activation=linear
355 | 
356 | 
357 | [convolutional]
358 | batch_normalize=1
359 | filters=256
360 | size=1
361 | stride=1
362 | pad=1
363 | activation=leaky
364 | 
365 | [convolutional]
366 | batch_normalize=1
367 | filters=512
368 | size=3
369 | stride=1
370 | pad=1
371 | activation=leaky
372 | 
373 | [shortcut]
374 | from=-3
375 | activation=linear
376 | 
377 | [convolutional]
378 | batch_normalize=1
379 | filters=256
380 | size=1
381 | stride=1
382 | pad=1
383 | activation=leaky
384 | 
385 | [convolutional]
386 | batch_normalize=1
387 | filters=512
388 | size=3
389 | stride=1
390 | pad=1
391 | activation=leaky
392 | 
393 | [shortcut]
394 | from=-3
395 | activation=linear
396 | 
397 | 
398 | [convolutional]
399 | batch_normalize=1
400 | filters=256
401 | size=1
402 | stride=1
403 | pad=1
404 | activation=leaky
405 | 
406 | [convolutional]
407 | batch_normalize=1
408 | filters=512
409 | size=3
410 | stride=1
411 | pad=1
412 | activation=leaky
413 | 
414 | [shortcut]
415 | from=-3
416 | activation=linear
417 | 
418 | 
419 | [convolutional]
420 | batch_normalize=1
421 | filters=256
422 | size=1
423 | stride=1
424 | pad=1
425 | activation=leaky
426 | 
427 | [convolutional]
428 | batch_normalize=1
429 | filters=512
430 | size=3
431 | stride=1
432 | pad=1
433 | activation=leaky
434 | 
435 | [shortcut]
436 | from=-3
437 | activation=linear
438 | 
439 | [convolutional]
440 | batch_normalize=1
441 | filters=256
442 | size=1
443 | stride=1
444 | pad=1
445 | activation=leaky
446 | 
447 | [convolutional]
448 | batch_normalize=1
449 | filters=512
450 | size=3
451 | stride=1
452 | pad=1
453 | activation=leaky
454 | 
455 | [shortcut]
456 | from=-3
457 | activation=linear
458 | 
459 | # Downsample
460 | 
461 | [convolutional]
462 | batch_normalize=1
463 | filters=1024
464 | size=3
465 | stride=2
466 | pad=1
467 | activation=leaky
468 | 
469 | [convolutional]
470 | batch_normalize=1
471 | filters=512
472 | size=1
473 | stride=1
474 | pad=1
475 | activation=leaky
476 | 
477 | [convolutional]
478 | batch_normalize=1
479 | filters=1024
480 | size=3
481 | stride=1
482 | pad=1
483 | activation=leaky
484 | 
485 | [shortcut]
486 | from=-3
487 | activation=linear
488 | 
489 | [convolutional]
490 | batch_normalize=1
491 | filters=512
492 | size=1
493 | stride=1
494 | pad=1
495 | activation=leaky
496 | 
497 | [convolutional]
498 | batch_normalize=1
499 | filters=1024
500 | size=3
501 | stride=1
502 | pad=1
503 | activation=leaky
504 | 
505 | [shortcut]
506 | from=-3
507 | activation=linear
508 | 
509 | [convolutional]
510 | batch_normalize=1
511 | filters=512
512 | size=1
513 | stride=1
514 | pad=1
515 | activation=leaky
516 | 
517 | [convolutional]
518 | batch_normalize=1
519 | filters=1024
520 | size=3
521 | stride=1
522 | pad=1
523 | activation=leaky
524 | 
525 | [shortcut]
526 | from=-3
527 | activation=linear
528 | 
529 | [convolutional]
530 | batch_normalize=1
531 | filters=512
532 | size=1
533 | stride=1
534 | pad=1
535 | activation=leaky
536 | 
537 | [convolutional]
538 | batch_normalize=1
539 | filters=1024
540 | size=3
541 | stride=1
542 | pad=1
543 | activation=leaky
544 | 
545 | [shortcut]
546 | from=-3
547 | activation=linear
548 | 
549 | ######################
550 | 
551 | [convolutional]
552 | batch_normalize=1
553 | filters=512
554 | size=1
555 | stride=1
556 | pad=1
557 | activation=leaky
558 | 
559 | [convolutional]
560 | batch_normalize=1
561 | size=3
562 | stride=1
563 | pad=1
564 | filters=1024
565 | activation=leaky
566 | 
567 | [convolutional]
568 | batch_normalize=1
569 | filters=512
570 | size=1
571 | stride=1
572 | pad=1
573 | activation=leaky
574 | 
575 | [convolutional]
576 | batch_normalize=1
577 | size=3
578 | stride=1
579 | pad=1
580 | filters=1024
581 | activation=leaky
582 | 
583 | [convolutional]
584 | batch_normalize=1
585 | filters=512
586 | size=1
587 | stride=1
588 | pad=1
589 | activation=leaky
590 | 
591 | [convolutional]
592 | batch_normalize=1
593 | size=3
594 | stride=1
595 | pad=1
596 | filters=1024
597 | activation=leaky
598 | 
599 | [convolutional]
600 | size=1
601 | stride=1
602 | pad=1
603 | filters=18
604 | activation=linear
605 | 
606 | 
607 | [yolo]
608 | mask = 6,7,8
609 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
610 | classes=1
611 | num=9
612 | jitter=.3
613 | ignore_thresh = .7
614 | truth_thresh = 1
615 | random=1
616 | 
617 | 
618 | [route]
619 | layers = -4
620 | 
621 | [convolutional]
622 | batch_normalize=1
623 | filters=256
624 | size=1
625 | stride=1
626 | pad=1
627 | activation=leaky
628 | 
629 | [upsample]
630 | stride=2
631 | 
632 | [route]
633 | layers = -1, 61
634 | 
635 | 
636 | 
637 | [convolutional]
638 | batch_normalize=1
639 | filters=256
640 | size=1
641 | stride=1
642 | pad=1
643 | activation=leaky
644 | 
645 | [convolutional]
646 | batch_normalize=1
647 | size=3
648 | stride=1
649 | pad=1
650 | filters=512
651 | activation=leaky
652 | 
653 | [convolutional]
654 | batch_normalize=1
655 | filters=256
656 | size=1
657 | stride=1
658 | pad=1
659 | activation=leaky
660 | 
661 | [convolutional]
662 | batch_normalize=1
663 | size=3
664 | stride=1
665 | pad=1
666 | filters=512
667 | activation=leaky
668 | 
669 | [convolutional]
670 | batch_normalize=1
671 | filters=256
672 | size=1
673 | stride=1
674 | pad=1
675 | activation=leaky
676 | 
677 | [convolutional]
678 | batch_normalize=1
679 | size=3
680 | stride=1
681 | pad=1
682 | filters=512
683 | activation=leaky
684 | 
685 | [convolutional]
686 | size=1
687 | stride=1
688 | pad=1
689 | filters=18
690 | activation=linear
691 | 
692 | 
693 | [yolo]
694 | mask = 3,4,5
695 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
696 | classes=1
697 | num=9
698 | jitter=.3
699 | ignore_thresh = .7
700 | truth_thresh = 1
701 | random=1
702 | 
703 | 
704 | 
705 | [route]
706 | layers = -4
707 | 
708 | [convolutional]
709 | batch_normalize=1
710 | filters=128
711 | size=1
712 | stride=1
713 | pad=1
714 | activation=leaky
715 | 
716 | [upsample]
717 | stride=2
718 | 
719 | [route]
720 | layers = -1, 36
721 | 
722 | 
723 | 
724 | [convolutional]
725 | batch_normalize=1
726 | filters=128
727 | size=1
728 | stride=1
729 | pad=1
730 | activation=leaky
731 | 
732 | [convolutional]
733 | batch_normalize=1
734 | size=3
735 | stride=1
736 | pad=1
737 | filters=256
738 | activation=leaky
739 | 
740 | [convolutional]
741 | batch_normalize=1
742 | filters=128
743 | size=1
744 | stride=1
745 | pad=1
746 | activation=leaky
747 | 
748 | [convolutional]
749 | batch_normalize=1
750 | size=3
751 | stride=1
752 | pad=1
753 | filters=256
754 | activation=leaky
755 | 
756 | [convolutional]
757 | batch_normalize=1
758 | filters=128
759 | size=1
760 | stride=1
761 | pad=1
762 | activation=leaky
763 | 
764 | [convolutional]
765 | batch_normalize=1
766 | size=3
767 | stride=1
768 | pad=1
769 | filters=256
770 | activation=leaky
771 | 
772 | [convolutional]
773 | size=1
774 | stride=1
775 | pad=1
776 | filters=18
777 | activation=linear
778 | 
779 | 
780 | [yolo]
781 | mask = 0,1,2
782 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
783 | classes=1
784 | num=9
785 | jitter=.3
786 | ignore_thresh = .7
787 | truth_thresh = 1
788 | random=1
789 | 


--------------------------------------------------------------------------------
/cfg/yolov3-spp-1cls.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | # batch=1
  4 | # subdivisions=1
  5 | # Training
  6 | batch=64
  7 | subdivisions=16
  8 | width=608
  9 | height=608
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=100
 20 | max_batches = 5000
 21 | policy=steps
 22 | steps=4000,4500
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=32
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | # Downsample
 34 | 
 35 | [convolutional]
 36 | batch_normalize=1
 37 | filters=64
 38 | size=3
 39 | stride=2
 40 | pad=1
 41 | activation=leaky
 42 | 
 43 | [convolutional]
 44 | batch_normalize=1
 45 | filters=32
 46 | size=1
 47 | stride=1
 48 | pad=1
 49 | activation=leaky
 50 | 
 51 | [convolutional]
 52 | batch_normalize=1
 53 | filters=64
 54 | size=3
 55 | stride=1
 56 | pad=1
 57 | activation=leaky
 58 | 
 59 | [shortcut]
 60 | from=-3
 61 | activation=linear
 62 | 
 63 | # Downsample
 64 | 
 65 | [convolutional]
 66 | batch_normalize=1
 67 | filters=128
 68 | size=3
 69 | stride=2
 70 | pad=1
 71 | activation=leaky
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=64
 76 | size=1
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [convolutional]
 82 | batch_normalize=1
 83 | filters=128
 84 | size=3
 85 | stride=1
 86 | pad=1
 87 | activation=leaky
 88 | 
 89 | [shortcut]
 90 | from=-3
 91 | activation=linear
 92 | 
 93 | [convolutional]
 94 | batch_normalize=1
 95 | filters=64
 96 | size=1
 97 | stride=1
 98 | pad=1
 99 | activation=leaky
100 | 
101 | [convolutional]
102 | batch_normalize=1
103 | filters=128
104 | size=3
105 | stride=1
106 | pad=1
107 | activation=leaky
108 | 
109 | [shortcut]
110 | from=-3
111 | activation=linear
112 | 
113 | # Downsample
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=256
118 | size=3
119 | stride=2
120 | pad=1
121 | activation=leaky
122 | 
123 | [convolutional]
124 | batch_normalize=1
125 | filters=128
126 | size=1
127 | stride=1
128 | pad=1
129 | activation=leaky
130 | 
131 | [convolutional]
132 | batch_normalize=1
133 | filters=256
134 | size=3
135 | stride=1
136 | pad=1
137 | activation=leaky
138 | 
139 | [shortcut]
140 | from=-3
141 | activation=linear
142 | 
143 | [convolutional]
144 | batch_normalize=1
145 | filters=128
146 | size=1
147 | stride=1
148 | pad=1
149 | activation=leaky
150 | 
151 | [convolutional]
152 | batch_normalize=1
153 | filters=256
154 | size=3
155 | stride=1
156 | pad=1
157 | activation=leaky
158 | 
159 | [shortcut]
160 | from=-3
161 | activation=linear
162 | 
163 | [convolutional]
164 | batch_normalize=1
165 | filters=128
166 | size=1
167 | stride=1
168 | pad=1
169 | activation=leaky
170 | 
171 | [convolutional]
172 | batch_normalize=1
173 | filters=256
174 | size=3
175 | stride=1
176 | pad=1
177 | activation=leaky
178 | 
179 | [shortcut]
180 | from=-3
181 | activation=linear
182 | 
183 | [convolutional]
184 | batch_normalize=1
185 | filters=128
186 | size=1
187 | stride=1
188 | pad=1
189 | activation=leaky
190 | 
191 | [convolutional]
192 | batch_normalize=1
193 | filters=256
194 | size=3
195 | stride=1
196 | pad=1
197 | activation=leaky
198 | 
199 | [shortcut]
200 | from=-3
201 | activation=linear
202 | 
203 | 
204 | [convolutional]
205 | batch_normalize=1
206 | filters=128
207 | size=1
208 | stride=1
209 | pad=1
210 | activation=leaky
211 | 
212 | [convolutional]
213 | batch_normalize=1
214 | filters=256
215 | size=3
216 | stride=1
217 | pad=1
218 | activation=leaky
219 | 
220 | [shortcut]
221 | from=-3
222 | activation=linear
223 | 
224 | [convolutional]
225 | batch_normalize=1
226 | filters=128
227 | size=1
228 | stride=1
229 | pad=1
230 | activation=leaky
231 | 
232 | [convolutional]
233 | batch_normalize=1
234 | filters=256
235 | size=3
236 | stride=1
237 | pad=1
238 | activation=leaky
239 | 
240 | [shortcut]
241 | from=-3
242 | activation=linear
243 | 
244 | [convolutional]
245 | batch_normalize=1
246 | filters=128
247 | size=1
248 | stride=1
249 | pad=1
250 | activation=leaky
251 | 
252 | [convolutional]
253 | batch_normalize=1
254 | filters=256
255 | size=3
256 | stride=1
257 | pad=1
258 | activation=leaky
259 | 
260 | [shortcut]
261 | from=-3
262 | activation=linear
263 | 
264 | [convolutional]
265 | batch_normalize=1
266 | filters=128
267 | size=1
268 | stride=1
269 | pad=1
270 | activation=leaky
271 | 
272 | [convolutional]
273 | batch_normalize=1
274 | filters=256
275 | size=3
276 | stride=1
277 | pad=1
278 | activation=leaky
279 | 
280 | [shortcut]
281 | from=-3
282 | activation=linear
283 | 
284 | # Downsample
285 | 
286 | [convolutional]
287 | batch_normalize=1
288 | filters=512
289 | size=3
290 | stride=2
291 | pad=1
292 | activation=leaky
293 | 
294 | [convolutional]
295 | batch_normalize=1
296 | filters=256
297 | size=1
298 | stride=1
299 | pad=1
300 | activation=leaky
301 | 
302 | [convolutional]
303 | batch_normalize=1
304 | filters=512
305 | size=3
306 | stride=1
307 | pad=1
308 | activation=leaky
309 | 
310 | [shortcut]
311 | from=-3
312 | activation=linear
313 | 
314 | 
315 | [convolutional]
316 | batch_normalize=1
317 | filters=256
318 | size=1
319 | stride=1
320 | pad=1
321 | activation=leaky
322 | 
323 | [convolutional]
324 | batch_normalize=1
325 | filters=512
326 | size=3
327 | stride=1
328 | pad=1
329 | activation=leaky
330 | 
331 | [shortcut]
332 | from=-3
333 | activation=linear
334 | 
335 | 
336 | [convolutional]
337 | batch_normalize=1
338 | filters=256
339 | size=1
340 | stride=1
341 | pad=1
342 | activation=leaky
343 | 
344 | [convolutional]
345 | batch_normalize=1
346 | filters=512
347 | size=3
348 | stride=1
349 | pad=1
350 | activation=leaky
351 | 
352 | [shortcut]
353 | from=-3
354 | activation=linear
355 | 
356 | 
357 | [convolutional]
358 | batch_normalize=1
359 | filters=256
360 | size=1
361 | stride=1
362 | pad=1
363 | activation=leaky
364 | 
365 | [convolutional]
366 | batch_normalize=1
367 | filters=512
368 | size=3
369 | stride=1
370 | pad=1
371 | activation=leaky
372 | 
373 | [shortcut]
374 | from=-3
375 | activation=linear
376 | 
377 | [convolutional]
378 | batch_normalize=1
379 | filters=256
380 | size=1
381 | stride=1
382 | pad=1
383 | activation=leaky
384 | 
385 | [convolutional]
386 | batch_normalize=1
387 | filters=512
388 | size=3
389 | stride=1
390 | pad=1
391 | activation=leaky
392 | 
393 | [shortcut]
394 | from=-3
395 | activation=linear
396 | 
397 | 
398 | [convolutional]
399 | batch_normalize=1
400 | filters=256
401 | size=1
402 | stride=1
403 | pad=1
404 | activation=leaky
405 | 
406 | [convolutional]
407 | batch_normalize=1
408 | filters=512
409 | size=3
410 | stride=1
411 | pad=1
412 | activation=leaky
413 | 
414 | [shortcut]
415 | from=-3
416 | activation=linear
417 | 
418 | 
419 | [convolutional]
420 | batch_normalize=1
421 | filters=256
422 | size=1
423 | stride=1
424 | pad=1
425 | activation=leaky
426 | 
427 | [convolutional]
428 | batch_normalize=1
429 | filters=512
430 | size=3
431 | stride=1
432 | pad=1
433 | activation=leaky
434 | 
435 | [shortcut]
436 | from=-3
437 | activation=linear
438 | 
439 | [convolutional]
440 | batch_normalize=1
441 | filters=256
442 | size=1
443 | stride=1
444 | pad=1
445 | activation=leaky
446 | 
447 | [convolutional]
448 | batch_normalize=1
449 | filters=512
450 | size=3
451 | stride=1
452 | pad=1
453 | activation=leaky
454 | 
455 | [shortcut]
456 | from=-3
457 | activation=linear
458 | 
459 | # Downsample
460 | 
461 | [convolutional]
462 | batch_normalize=1
463 | filters=1024
464 | size=3
465 | stride=2
466 | pad=1
467 | activation=leaky
468 | 
469 | [convolutional]
470 | batch_normalize=1
471 | filters=512
472 | size=1
473 | stride=1
474 | pad=1
475 | activation=leaky
476 | 
477 | [convolutional]
478 | batch_normalize=1
479 | filters=1024
480 | size=3
481 | stride=1
482 | pad=1
483 | activation=leaky
484 | 
485 | [shortcut]
486 | from=-3
487 | activation=linear
488 | 
489 | [convolutional]
490 | batch_normalize=1
491 | filters=512
492 | size=1
493 | stride=1
494 | pad=1
495 | activation=leaky
496 | 
497 | [convolutional]
498 | batch_normalize=1
499 | filters=1024
500 | size=3
501 | stride=1
502 | pad=1
503 | activation=leaky
504 | 
505 | [shortcut]
506 | from=-3
507 | activation=linear
508 | 
509 | [convolutional]
510 | batch_normalize=1
511 | filters=512
512 | size=1
513 | stride=1
514 | pad=1
515 | activation=leaky
516 | 
517 | [convolutional]
518 | batch_normalize=1
519 | filters=1024
520 | size=3
521 | stride=1
522 | pad=1
523 | activation=leaky
524 | 
525 | [shortcut]
526 | from=-3
527 | activation=linear
528 | 
529 | [convolutional]
530 | batch_normalize=1
531 | filters=512
532 | size=1
533 | stride=1
534 | pad=1
535 | activation=leaky
536 | 
537 | [convolutional]
538 | batch_normalize=1
539 | filters=1024
540 | size=3
541 | stride=1
542 | pad=1
543 | activation=leaky
544 | 
545 | [shortcut]
546 | from=-3
547 | activation=linear
548 | 
549 | ######################
550 | 
551 | [convolutional]
552 | batch_normalize=1
553 | filters=512
554 | size=1
555 | stride=1
556 | pad=1
557 | activation=leaky
558 | 
559 | [convolutional]
560 | batch_normalize=1
561 | size=3
562 | stride=1
563 | pad=1
564 | filters=1024
565 | activation=leaky
566 | 
567 | [convolutional]
568 | batch_normalize=1
569 | filters=512
570 | size=1
571 | stride=1
572 | pad=1
573 | activation=leaky
574 | 
575 | ### SPP ###
576 | [maxpool]
577 | stride=1
578 | size=5
579 | 
580 | [route]
581 | layers=-2
582 | 
583 | [maxpool]
584 | stride=1
585 | size=9
586 | 
587 | [route]
588 | layers=-4
589 | 
590 | [maxpool]
591 | stride=1
592 | size=13
593 | 
594 | [route]
595 | layers=-1,-3,-5,-6
596 | 
597 | ### End SPP ###
598 | 
599 | [convolutional]
600 | batch_normalize=1
601 | filters=512
602 | size=1
603 | stride=1
604 | pad=1
605 | activation=leaky
606 | 
607 | 
608 | [convolutional]
609 | batch_normalize=1
610 | size=3
611 | stride=1
612 | pad=1
613 | filters=1024
614 | activation=leaky
615 | 
616 | [convolutional]
617 | batch_normalize=1
618 | filters=512
619 | size=1
620 | stride=1
621 | pad=1
622 | activation=leaky
623 | 
624 | [convolutional]
625 | batch_normalize=1
626 | size=3
627 | stride=1
628 | pad=1
629 | filters=1024
630 | activation=leaky
631 | 
632 | [convolutional]
633 | size=1
634 | stride=1
635 | pad=1
636 | filters=18
637 | activation=linear
638 | 
639 | 
640 | [yolo]
641 | mask = 6,7,8
642 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
643 | classes=1
644 | num=9
645 | jitter=.3
646 | ignore_thresh = .7
647 | truth_thresh = 1
648 | random=1
649 | 
650 | 
651 | [route]
652 | layers = -4
653 | 
654 | [convolutional]
655 | batch_normalize=1
656 | filters=256
657 | size=1
658 | stride=1
659 | pad=1
660 | activation=leaky
661 | 
662 | [upsample]
663 | stride=2
664 | 
665 | [route]
666 | layers = -1, 61
667 | 
668 | 
669 | 
670 | [convolutional]
671 | batch_normalize=1
672 | filters=256
673 | size=1
674 | stride=1
675 | pad=1
676 | activation=leaky
677 | 
678 | [convolutional]
679 | batch_normalize=1
680 | size=3
681 | stride=1
682 | pad=1
683 | filters=512
684 | activation=leaky
685 | 
686 | [convolutional]
687 | batch_normalize=1
688 | filters=256
689 | size=1
690 | stride=1
691 | pad=1
692 | activation=leaky
693 | 
694 | [convolutional]
695 | batch_normalize=1
696 | size=3
697 | stride=1
698 | pad=1
699 | filters=512
700 | activation=leaky
701 | 
702 | [convolutional]
703 | batch_normalize=1
704 | filters=256
705 | size=1
706 | stride=1
707 | pad=1
708 | activation=leaky
709 | 
710 | [convolutional]
711 | batch_normalize=1
712 | size=3
713 | stride=1
714 | pad=1
715 | filters=512
716 | activation=leaky
717 | 
718 | [convolutional]
719 | size=1
720 | stride=1
721 | pad=1
722 | filters=18
723 | activation=linear
724 | 
725 | 
726 | [yolo]
727 | mask = 3,4,5
728 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
729 | classes=1
730 | num=9
731 | jitter=.3
732 | ignore_thresh = .7
733 | truth_thresh = 1
734 | random=1
735 | 
736 | 
737 | 
738 | [route]
739 | layers = -4
740 | 
741 | [convolutional]
742 | batch_normalize=1
743 | filters=128
744 | size=1
745 | stride=1
746 | pad=1
747 | activation=leaky
748 | 
749 | [upsample]
750 | stride=2
751 | 
752 | [route]
753 | layers = -1, 36
754 | 
755 | 
756 | 
757 | [convolutional]
758 | batch_normalize=1
759 | filters=128
760 | size=1
761 | stride=1
762 | pad=1
763 | activation=leaky
764 | 
765 | [convolutional]
766 | batch_normalize=1
767 | size=3
768 | stride=1
769 | pad=1
770 | filters=256
771 | activation=leaky
772 | 
773 | [convolutional]
774 | batch_normalize=1
775 | filters=128
776 | size=1
777 | stride=1
778 | pad=1
779 | activation=leaky
780 | 
781 | [convolutional]
782 | batch_normalize=1
783 | size=3
784 | stride=1
785 | pad=1
786 | filters=256
787 | activation=leaky
788 | 
789 | [convolutional]
790 | batch_normalize=1
791 | filters=128
792 | size=1
793 | stride=1
794 | pad=1
795 | activation=leaky
796 | 
797 | [convolutional]
798 | batch_normalize=1
799 | size=3
800 | stride=1
801 | pad=1
802 | filters=256
803 | activation=leaky
804 | 
805 | [convolutional]
806 | size=1
807 | stride=1
808 | pad=1
809 | filters=18
810 | activation=linear
811 | 
812 | 
813 | [yolo]
814 | mask = 0,1,2
815 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
816 | classes=1
817 | num=9
818 | jitter=.3
819 | ignore_thresh = .7
820 | truth_thresh = 1
821 | random=1
822 | 


--------------------------------------------------------------------------------
/cfg/yolov3-spp.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | # batch=1
  4 | # subdivisions=1
  5 | # Training
  6 | batch=64
  7 | subdivisions=16
  8 | width=608
  9 | height=608
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=32
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | # Downsample
 34 | 
 35 | [convolutional]
 36 | batch_normalize=1
 37 | filters=64
 38 | size=3
 39 | stride=2
 40 | pad=1
 41 | activation=leaky
 42 | 
 43 | [convolutional]
 44 | batch_normalize=1
 45 | filters=32
 46 | size=1
 47 | stride=1
 48 | pad=1
 49 | activation=leaky
 50 | 
 51 | [convolutional]
 52 | batch_normalize=1
 53 | filters=64
 54 | size=3
 55 | stride=1
 56 | pad=1
 57 | activation=leaky
 58 | 
 59 | [shortcut]
 60 | from=-3
 61 | activation=linear
 62 | 
 63 | # Downsample
 64 | 
 65 | [convolutional]
 66 | batch_normalize=1
 67 | filters=128
 68 | size=3
 69 | stride=2
 70 | pad=1
 71 | activation=leaky
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=64
 76 | size=1
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [convolutional]
 82 | batch_normalize=1
 83 | filters=128
 84 | size=3
 85 | stride=1
 86 | pad=1
 87 | activation=leaky
 88 | 
 89 | [shortcut]
 90 | from=-3
 91 | activation=linear
 92 | 
 93 | [convolutional]
 94 | batch_normalize=1
 95 | filters=64
 96 | size=1
 97 | stride=1
 98 | pad=1
 99 | activation=leaky
100 | 
101 | [convolutional]
102 | batch_normalize=1
103 | filters=128
104 | size=3
105 | stride=1
106 | pad=1
107 | activation=leaky
108 | 
109 | [shortcut]
110 | from=-3
111 | activation=linear
112 | 
113 | # Downsample
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=256
118 | size=3
119 | stride=2
120 | pad=1
121 | activation=leaky
122 | 
123 | [convolutional]
124 | batch_normalize=1
125 | filters=128
126 | size=1
127 | stride=1
128 | pad=1
129 | activation=leaky
130 | 
131 | [convolutional]
132 | batch_normalize=1
133 | filters=256
134 | size=3
135 | stride=1
136 | pad=1
137 | activation=leaky
138 | 
139 | [shortcut]
140 | from=-3
141 | activation=linear
142 | 
143 | [convolutional]
144 | batch_normalize=1
145 | filters=128
146 | size=1
147 | stride=1
148 | pad=1
149 | activation=leaky
150 | 
151 | [convolutional]
152 | batch_normalize=1
153 | filters=256
154 | size=3
155 | stride=1
156 | pad=1
157 | activation=leaky
158 | 
159 | [shortcut]
160 | from=-3
161 | activation=linear
162 | 
163 | [convolutional]
164 | batch_normalize=1
165 | filters=128
166 | size=1
167 | stride=1
168 | pad=1
169 | activation=leaky
170 | 
171 | [convolutional]
172 | batch_normalize=1
173 | filters=256
174 | size=3
175 | stride=1
176 | pad=1
177 | activation=leaky
178 | 
179 | [shortcut]
180 | from=-3
181 | activation=linear
182 | 
183 | [convolutional]
184 | batch_normalize=1
185 | filters=128
186 | size=1
187 | stride=1
188 | pad=1
189 | activation=leaky
190 | 
191 | [convolutional]
192 | batch_normalize=1
193 | filters=256
194 | size=3
195 | stride=1
196 | pad=1
197 | activation=leaky
198 | 
199 | [shortcut]
200 | from=-3
201 | activation=linear
202 | 
203 | 
204 | [convolutional]
205 | batch_normalize=1
206 | filters=128
207 | size=1
208 | stride=1
209 | pad=1
210 | activation=leaky
211 | 
212 | [convolutional]
213 | batch_normalize=1
214 | filters=256
215 | size=3
216 | stride=1
217 | pad=1
218 | activation=leaky
219 | 
220 | [shortcut]
221 | from=-3
222 | activation=linear
223 | 
224 | [convolutional]
225 | batch_normalize=1
226 | filters=128
227 | size=1
228 | stride=1
229 | pad=1
230 | activation=leaky
231 | 
232 | [convolutional]
233 | batch_normalize=1
234 | filters=256
235 | size=3
236 | stride=1
237 | pad=1
238 | activation=leaky
239 | 
240 | [shortcut]
241 | from=-3
242 | activation=linear
243 | 
244 | [convolutional]
245 | batch_normalize=1
246 | filters=128
247 | size=1
248 | stride=1
249 | pad=1
250 | activation=leaky
251 | 
252 | [convolutional]
253 | batch_normalize=1
254 | filters=256
255 | size=3
256 | stride=1
257 | pad=1
258 | activation=leaky
259 | 
260 | [shortcut]
261 | from=-3
262 | activation=linear
263 | 
264 | [convolutional]
265 | batch_normalize=1
266 | filters=128
267 | size=1
268 | stride=1
269 | pad=1
270 | activation=leaky
271 | 
272 | [convolutional]
273 | batch_normalize=1
274 | filters=256
275 | size=3
276 | stride=1
277 | pad=1
278 | activation=leaky
279 | 
280 | [shortcut]
281 | from=-3
282 | activation=linear
283 | 
284 | # Downsample
285 | 
286 | [convolutional]
287 | batch_normalize=1
288 | filters=512
289 | size=3
290 | stride=2
291 | pad=1
292 | activation=leaky
293 | 
294 | [convolutional]
295 | batch_normalize=1
296 | filters=256
297 | size=1
298 | stride=1
299 | pad=1
300 | activation=leaky
301 | 
302 | [convolutional]
303 | batch_normalize=1
304 | filters=512
305 | size=3
306 | stride=1
307 | pad=1
308 | activation=leaky
309 | 
310 | [shortcut]
311 | from=-3
312 | activation=linear
313 | 
314 | 
315 | [convolutional]
316 | batch_normalize=1
317 | filters=256
318 | size=1
319 | stride=1
320 | pad=1
321 | activation=leaky
322 | 
323 | [convolutional]
324 | batch_normalize=1
325 | filters=512
326 | size=3
327 | stride=1
328 | pad=1
329 | activation=leaky
330 | 
331 | [shortcut]
332 | from=-3
333 | activation=linear
334 | 
335 | 
336 | [convolutional]
337 | batch_normalize=1
338 | filters=256
339 | size=1
340 | stride=1
341 | pad=1
342 | activation=leaky
343 | 
344 | [convolutional]
345 | batch_normalize=1
346 | filters=512
347 | size=3
348 | stride=1
349 | pad=1
350 | activation=leaky
351 | 
352 | [shortcut]
353 | from=-3
354 | activation=linear
355 | 
356 | 
357 | [convolutional]
358 | batch_normalize=1
359 | filters=256
360 | size=1
361 | stride=1
362 | pad=1
363 | activation=leaky
364 | 
365 | [convolutional]
366 | batch_normalize=1
367 | filters=512
368 | size=3
369 | stride=1
370 | pad=1
371 | activation=leaky
372 | 
373 | [shortcut]
374 | from=-3
375 | activation=linear
376 | 
377 | [convolutional]
378 | batch_normalize=1
379 | filters=256
380 | size=1
381 | stride=1
382 | pad=1
383 | activation=leaky
384 | 
385 | [convolutional]
386 | batch_normalize=1
387 | filters=512
388 | size=3
389 | stride=1
390 | pad=1
391 | activation=leaky
392 | 
393 | [shortcut]
394 | from=-3
395 | activation=linear
396 | 
397 | 
398 | [convolutional]
399 | batch_normalize=1
400 | filters=256
401 | size=1
402 | stride=1
403 | pad=1
404 | activation=leaky
405 | 
406 | [convolutional]
407 | batch_normalize=1
408 | filters=512
409 | size=3
410 | stride=1
411 | pad=1
412 | activation=leaky
413 | 
414 | [shortcut]
415 | from=-3
416 | activation=linear
417 | 
418 | 
419 | [convolutional]
420 | batch_normalize=1
421 | filters=256
422 | size=1
423 | stride=1
424 | pad=1
425 | activation=leaky
426 | 
427 | [convolutional]
428 | batch_normalize=1
429 | filters=512
430 | size=3
431 | stride=1
432 | pad=1
433 | activation=leaky
434 | 
435 | [shortcut]
436 | from=-3
437 | activation=linear
438 | 
439 | [convolutional]
440 | batch_normalize=1
441 | filters=256
442 | size=1
443 | stride=1
444 | pad=1
445 | activation=leaky
446 | 
447 | [convolutional]
448 | batch_normalize=1
449 | filters=512
450 | size=3
451 | stride=1
452 | pad=1
453 | activation=leaky
454 | 
455 | [shortcut]
456 | from=-3
457 | activation=linear
458 | 
459 | # Downsample
460 | 
461 | [convolutional]
462 | batch_normalize=1
463 | filters=1024
464 | size=3
465 | stride=2
466 | pad=1
467 | activation=leaky
468 | 
469 | [convolutional]
470 | batch_normalize=1
471 | filters=512
472 | size=1
473 | stride=1
474 | pad=1
475 | activation=leaky
476 | 
477 | [convolutional]
478 | batch_normalize=1
479 | filters=1024
480 | size=3
481 | stride=1
482 | pad=1
483 | activation=leaky
484 | 
485 | [shortcut]
486 | from=-3
487 | activation=linear
488 | 
489 | [convolutional]
490 | batch_normalize=1
491 | filters=512
492 | size=1
493 | stride=1
494 | pad=1
495 | activation=leaky
496 | 
497 | [convolutional]
498 | batch_normalize=1
499 | filters=1024
500 | size=3
501 | stride=1
502 | pad=1
503 | activation=leaky
504 | 
505 | [shortcut]
506 | from=-3
507 | activation=linear
508 | 
509 | [convolutional]
510 | batch_normalize=1
511 | filters=512
512 | size=1
513 | stride=1
514 | pad=1
515 | activation=leaky
516 | 
517 | [convolutional]
518 | batch_normalize=1
519 | filters=1024
520 | size=3
521 | stride=1
522 | pad=1
523 | activation=leaky
524 | 
525 | [shortcut]
526 | from=-3
527 | activation=linear
528 | 
529 | [convolutional]
530 | batch_normalize=1
531 | filters=512
532 | size=1
533 | stride=1
534 | pad=1
535 | activation=leaky
536 | 
537 | [convolutional]
538 | batch_normalize=1
539 | filters=1024
540 | size=3
541 | stride=1
542 | pad=1
543 | activation=leaky
544 | 
545 | [shortcut]
546 | from=-3
547 | activation=linear
548 | 
549 | ######################
550 | 
551 | [convolutional]
552 | batch_normalize=1
553 | filters=512
554 | size=1
555 | stride=1
556 | pad=1
557 | activation=leaky
558 | 
559 | [convolutional]
560 | batch_normalize=1
561 | size=3
562 | stride=1
563 | pad=1
564 | filters=1024
565 | activation=leaky
566 | 
567 | [convolutional]
568 | batch_normalize=1
569 | filters=512
570 | size=1
571 | stride=1
572 | pad=1
573 | activation=leaky
574 | 
575 | ### SPP ###
576 | [maxpool]
577 | stride=1
578 | size=5
579 | 
580 | [route]
581 | layers=-2
582 | 
583 | [maxpool]
584 | stride=1
585 | size=9
586 | 
587 | [route]
588 | layers=-4
589 | 
590 | [maxpool]
591 | stride=1
592 | size=13
593 | 
594 | [route]
595 | layers=-1,-3,-5,-6
596 | 
597 | ### End SPP ###
598 | 
599 | [convolutional]
600 | batch_normalize=1
601 | filters=512
602 | size=1
603 | stride=1
604 | pad=1
605 | activation=leaky
606 | 
607 | 
608 | [convolutional]
609 | batch_normalize=1
610 | size=3
611 | stride=1
612 | pad=1
613 | filters=1024
614 | activation=leaky
615 | 
616 | [convolutional]
617 | batch_normalize=1
618 | filters=512
619 | size=1
620 | stride=1
621 | pad=1
622 | activation=leaky
623 | 
624 | [convolutional]
625 | batch_normalize=1
626 | size=3
627 | stride=1
628 | pad=1
629 | filters=1024
630 | activation=leaky
631 | 
632 | [convolutional]
633 | size=1
634 | stride=1
635 | pad=1
636 | filters=255
637 | activation=linear
638 | 
639 | 
640 | [yolo]
641 | mask = 6,7,8
642 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
643 | classes=80
644 | num=9
645 | jitter=.3
646 | ignore_thresh = .7
647 | truth_thresh = 1
648 | random=1
649 | 
650 | 
651 | [route]
652 | layers = -4
653 | 
654 | [convolutional]
655 | batch_normalize=1
656 | filters=256
657 | size=1
658 | stride=1
659 | pad=1
660 | activation=leaky
661 | 
662 | [upsample]
663 | stride=2
664 | 
665 | [route]
666 | layers = -1, 61
667 | 
668 | 
669 | 
670 | [convolutional]
671 | batch_normalize=1
672 | filters=256
673 | size=1
674 | stride=1
675 | pad=1
676 | activation=leaky
677 | 
678 | [convolutional]
679 | batch_normalize=1
680 | size=3
681 | stride=1
682 | pad=1
683 | filters=512
684 | activation=leaky
685 | 
686 | [convolutional]
687 | batch_normalize=1
688 | filters=256
689 | size=1
690 | stride=1
691 | pad=1
692 | activation=leaky
693 | 
694 | [convolutional]
695 | batch_normalize=1
696 | size=3
697 | stride=1
698 | pad=1
699 | filters=512
700 | activation=leaky
701 | 
702 | [convolutional]
703 | batch_normalize=1
704 | filters=256
705 | size=1
706 | stride=1
707 | pad=1
708 | activation=leaky
709 | 
710 | [convolutional]
711 | batch_normalize=1
712 | size=3
713 | stride=1
714 | pad=1
715 | filters=512
716 | activation=leaky
717 | 
718 | [convolutional]
719 | size=1
720 | stride=1
721 | pad=1
722 | filters=255
723 | activation=linear
724 | 
725 | 
726 | [yolo]
727 | mask = 3,4,5
728 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
729 | classes=80
730 | num=9
731 | jitter=.3
732 | ignore_thresh = .7
733 | truth_thresh = 1
734 | random=1
735 | 
736 | 
737 | 
738 | [route]
739 | layers = -4
740 | 
741 | [convolutional]
742 | batch_normalize=1
743 | filters=128
744 | size=1
745 | stride=1
746 | pad=1
747 | activation=leaky
748 | 
749 | [upsample]
750 | stride=2
751 | 
752 | [route]
753 | layers = -1, 36
754 | 
755 | 
756 | 
757 | [convolutional]
758 | batch_normalize=1
759 | filters=128
760 | size=1
761 | stride=1
762 | pad=1
763 | activation=leaky
764 | 
765 | [convolutional]
766 | batch_normalize=1
767 | size=3
768 | stride=1
769 | pad=1
770 | filters=256
771 | activation=leaky
772 | 
773 | [convolutional]
774 | batch_normalize=1
775 | filters=128
776 | size=1
777 | stride=1
778 | pad=1
779 | activation=leaky
780 | 
781 | [convolutional]
782 | batch_normalize=1
783 | size=3
784 | stride=1
785 | pad=1
786 | filters=256
787 | activation=leaky
788 | 
789 | [convolutional]
790 | batch_normalize=1
791 | filters=128
792 | size=1
793 | stride=1
794 | pad=1
795 | activation=leaky
796 | 
797 | [convolutional]
798 | batch_normalize=1
799 | size=3
800 | stride=1
801 | pad=1
802 | filters=256
803 | activation=leaky
804 | 
805 | [convolutional]
806 | size=1
807 | stride=1
808 | pad=1
809 | filters=255
810 | activation=linear
811 | 
812 | 
813 | [yolo]
814 | mask = 0,1,2
815 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
816 | classes=80
817 | num=9
818 | jitter=.3
819 | ignore_thresh = .7
820 | truth_thresh = 1
821 | random=1
822 | 


--------------------------------------------------------------------------------
/cfg/yolov3-tiny-1cls.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | batch=1
  4 | subdivisions=1
  5 | # Training
  6 | # batch=64
  7 | # subdivisions=2
  8 | width=416
  9 | height=416
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=16
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | [maxpool]
 34 | size=2
 35 | stride=2
 36 | 
 37 | [convolutional]
 38 | batch_normalize=1
 39 | filters=32
 40 | size=3
 41 | stride=1
 42 | pad=1
 43 | activation=leaky
 44 | 
 45 | [maxpool]
 46 | size=2
 47 | stride=2
 48 | 
 49 | [convolutional]
 50 | batch_normalize=1
 51 | filters=64
 52 | size=3
 53 | stride=1
 54 | pad=1
 55 | activation=leaky
 56 | 
 57 | [maxpool]
 58 | size=2
 59 | stride=2
 60 | 
 61 | [convolutional]
 62 | batch_normalize=1
 63 | filters=128
 64 | size=3
 65 | stride=1
 66 | pad=1
 67 | activation=leaky
 68 | 
 69 | [maxpool]
 70 | size=2
 71 | stride=2
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=256
 76 | size=3
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [maxpool]
 82 | size=2
 83 | stride=2
 84 | 
 85 | [convolutional]
 86 | batch_normalize=1
 87 | filters=512
 88 | size=3
 89 | stride=1
 90 | pad=1
 91 | activation=leaky
 92 | 
 93 | [maxpool]
 94 | size=2
 95 | stride=1
 96 | 
 97 | [convolutional]
 98 | batch_normalize=1
 99 | filters=1024
100 | size=3
101 | stride=1
102 | pad=1
103 | activation=leaky
104 | 
105 | ###########
106 | 
107 | [convolutional]
108 | batch_normalize=1
109 | filters=256
110 | size=1
111 | stride=1
112 | pad=1
113 | activation=leaky
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=512
118 | size=3
119 | stride=1
120 | pad=1
121 | activation=leaky
122 | 
123 | [convolutional]
124 | size=1
125 | stride=1
126 | pad=1
127 | filters=18
128 | activation=linear
129 | 
130 | 
131 | 
132 | [yolo]
133 | mask = 3,4,5
134 | anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
135 | classes=1
136 | num=6
137 | jitter=.3
138 | ignore_thresh = .7
139 | truth_thresh = 1
140 | random=1
141 | 
142 | [route]
143 | layers = -4
144 | 
145 | [convolutional]
146 | batch_normalize=1
147 | filters=128
148 | size=1
149 | stride=1
150 | pad=1
151 | activation=leaky
152 | 
153 | [upsample]
154 | stride=2
155 | 
156 | [route]
157 | layers = -1, 8
158 | 
159 | [convolutional]
160 | batch_normalize=1
161 | filters=256
162 | size=3
163 | stride=1
164 | pad=1
165 | activation=leaky
166 | 
167 | [convolutional]
168 | size=1
169 | stride=1
170 | pad=1
171 | filters=18
172 | activation=linear
173 | 
174 | [yolo]
175 | mask = 0,1,2
176 | anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
177 | classes=1
178 | num=6
179 | jitter=.3
180 | ignore_thresh = .7
181 | truth_thresh = 1
182 | random=1
183 | 


--------------------------------------------------------------------------------
/cfg/yolov3-tiny.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | batch=1
  4 | subdivisions=1
  5 | # Training
  6 | # batch=64
  7 | # subdivisions=2
  8 | width=416
  9 | height=416
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=16
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | [maxpool]
 34 | size=2
 35 | stride=2
 36 | 
 37 | [convolutional]
 38 | batch_normalize=1
 39 | filters=32
 40 | size=3
 41 | stride=1
 42 | pad=1
 43 | activation=leaky
 44 | 
 45 | [maxpool]
 46 | size=2
 47 | stride=2
 48 | 
 49 | [convolutional]
 50 | batch_normalize=1
 51 | filters=64
 52 | size=3
 53 | stride=1
 54 | pad=1
 55 | activation=leaky
 56 | 
 57 | [maxpool]
 58 | size=2
 59 | stride=2
 60 | 
 61 | [convolutional]
 62 | batch_normalize=1
 63 | filters=128
 64 | size=3
 65 | stride=1
 66 | pad=1
 67 | activation=leaky
 68 | 
 69 | [maxpool]
 70 | size=2
 71 | stride=2
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=256
 76 | size=3
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [maxpool]
 82 | size=2
 83 | stride=2
 84 | 
 85 | [convolutional]
 86 | batch_normalize=1
 87 | filters=512
 88 | size=3
 89 | stride=1
 90 | pad=1
 91 | activation=leaky
 92 | 
 93 | [maxpool]
 94 | size=2
 95 | stride=1
 96 | 
 97 | [convolutional]
 98 | batch_normalize=1
 99 | filters=1024
100 | size=3
101 | stride=1
102 | pad=1
103 | activation=leaky
104 | 
105 | ###########
106 | 
107 | [convolutional]
108 | batch_normalize=1
109 | filters=256
110 | size=1
111 | stride=1
112 | pad=1
113 | activation=leaky
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=512
118 | size=3
119 | stride=1
120 | pad=1
121 | activation=leaky
122 | 
123 | [convolutional]
124 | size=1
125 | stride=1
126 | pad=1
127 | filters=255
128 | activation=linear
129 | 
130 | 
131 | 
132 | [yolo]
133 | mask = 3,4,5
134 | anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
135 | classes=80
136 | num=6
137 | jitter=.3
138 | ignore_thresh = .7
139 | truth_thresh = 1
140 | random=1
141 | 
142 | [route]
143 | layers = -4
144 | 
145 | [convolutional]
146 | batch_normalize=1
147 | filters=128
148 | size=1
149 | stride=1
150 | pad=1
151 | activation=leaky
152 | 
153 | [upsample]
154 | stride=2
155 | 
156 | [route]
157 | layers = -1, 8
158 | 
159 | [convolutional]
160 | batch_normalize=1
161 | filters=256
162 | size=3
163 | stride=1
164 | pad=1
165 | activation=leaky
166 | 
167 | [convolutional]
168 | size=1
169 | stride=1
170 | pad=1
171 | filters=255
172 | activation=linear
173 | 
174 | [yolo]
175 | mask = 1,2,3
176 | anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
177 | classes=80
178 | num=6
179 | jitter=.3
180 | ignore_thresh = .7
181 | truth_thresh = 1
182 | random=1
183 | 


--------------------------------------------------------------------------------
/cfg/yolov3.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | #batch=1
  4 | #subdivisions=1
  5 | # Training
  6 | batch=16
  7 | subdivisions=1
  8 | width=416
  9 | height=416
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=32
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | # Downsample
 34 | 
 35 | [convolutional]
 36 | batch_normalize=1
 37 | filters=64
 38 | size=3
 39 | stride=2
 40 | pad=1
 41 | activation=leaky
 42 | 
 43 | [convolutional]
 44 | batch_normalize=1
 45 | filters=32
 46 | size=1
 47 | stride=1
 48 | pad=1
 49 | activation=leaky
 50 | 
 51 | [convolutional]
 52 | batch_normalize=1
 53 | filters=64
 54 | size=3
 55 | stride=1
 56 | pad=1
 57 | activation=leaky
 58 | 
 59 | [shortcut]
 60 | from=-3
 61 | activation=linear
 62 | 
 63 | # Downsample
 64 | 
 65 | [convolutional]
 66 | batch_normalize=1
 67 | filters=128
 68 | size=3
 69 | stride=2
 70 | pad=1
 71 | activation=leaky
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=64
 76 | size=1
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [convolutional]
 82 | batch_normalize=1
 83 | filters=128
 84 | size=3
 85 | stride=1
 86 | pad=1
 87 | activation=leaky
 88 | 
 89 | [shortcut]
 90 | from=-3
 91 | activation=linear
 92 | 
 93 | [convolutional]
 94 | batch_normalize=1
 95 | filters=64
 96 | size=1
 97 | stride=1
 98 | pad=1
 99 | activation=leaky
100 | 
101 | [convolutional]
102 | batch_normalize=1
103 | filters=128
104 | size=3
105 | stride=1
106 | pad=1
107 | activation=leaky
108 | 
109 | [shortcut]
110 | from=-3
111 | activation=linear
112 | 
113 | # Downsample
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=256
118 | size=3
119 | stride=2
120 | pad=1
121 | activation=leaky
122 | 
123 | [convolutional]
124 | batch_normalize=1
125 | filters=128
126 | size=1
127 | stride=1
128 | pad=1
129 | activation=leaky
130 | 
131 | [convolutional]
132 | batch_normalize=1
133 | filters=256
134 | size=3
135 | stride=1
136 | pad=1
137 | activation=leaky
138 | 
139 | [shortcut]
140 | from=-3
141 | activation=linear
142 | 
143 | [convolutional]
144 | batch_normalize=1
145 | filters=128
146 | size=1
147 | stride=1
148 | pad=1
149 | activation=leaky
150 | 
151 | [convolutional]
152 | batch_normalize=1
153 | filters=256
154 | size=3
155 | stride=1
156 | pad=1
157 | activation=leaky
158 | 
159 | [shortcut]
160 | from=-3
161 | activation=linear
162 | 
163 | [convolutional]
164 | batch_normalize=1
165 | filters=128
166 | size=1
167 | stride=1
168 | pad=1
169 | activation=leaky
170 | 
171 | [convolutional]
172 | batch_normalize=1
173 | filters=256
174 | size=3
175 | stride=1
176 | pad=1
177 | activation=leaky
178 | 
179 | [shortcut]
180 | from=-3
181 | activation=linear
182 | 
183 | [convolutional]
184 | batch_normalize=1
185 | filters=128
186 | size=1
187 | stride=1
188 | pad=1
189 | activation=leaky
190 | 
191 | [convolutional]
192 | batch_normalize=1
193 | filters=256
194 | size=3
195 | stride=1
196 | pad=1
197 | activation=leaky
198 | 
199 | [shortcut]
200 | from=-3
201 | activation=linear
202 | 
203 | 
204 | [convolutional]
205 | batch_normalize=1
206 | filters=128
207 | size=1
208 | stride=1
209 | pad=1
210 | activation=leaky
211 | 
212 | [convolutional]
213 | batch_normalize=1
214 | filters=256
215 | size=3
216 | stride=1
217 | pad=1
218 | activation=leaky
219 | 
220 | [shortcut]
221 | from=-3
222 | activation=linear
223 | 
224 | [convolutional]
225 | batch_normalize=1
226 | filters=128
227 | size=1
228 | stride=1
229 | pad=1
230 | activation=leaky
231 | 
232 | [convolutional]
233 | batch_normalize=1
234 | filters=256
235 | size=3
236 | stride=1
237 | pad=1
238 | activation=leaky
239 | 
240 | [shortcut]
241 | from=-3
242 | activation=linear
243 | 
244 | [convolutional]
245 | batch_normalize=1
246 | filters=128
247 | size=1
248 | stride=1
249 | pad=1
250 | activation=leaky
251 | 
252 | [convolutional]
253 | batch_normalize=1
254 | filters=256
255 | size=3
256 | stride=1
257 | pad=1
258 | activation=leaky
259 | 
260 | [shortcut]
261 | from=-3
262 | activation=linear
263 | 
264 | [convolutional]
265 | batch_normalize=1
266 | filters=128
267 | size=1
268 | stride=1
269 | pad=1
270 | activation=leaky
271 | 
272 | [convolutional]
273 | batch_normalize=1
274 | filters=256
275 | size=3
276 | stride=1
277 | pad=1
278 | activation=leaky
279 | 
280 | [shortcut]
281 | from=-3
282 | activation=linear
283 | 
284 | # Downsample
285 | 
286 | [convolutional]
287 | batch_normalize=1
288 | filters=512
289 | size=3
290 | stride=2
291 | pad=1
292 | activation=leaky
293 | 
294 | [convolutional]
295 | batch_normalize=1
296 | filters=256
297 | size=1
298 | stride=1
299 | pad=1
300 | activation=leaky
301 | 
302 | [convolutional]
303 | batch_normalize=1
304 | filters=512
305 | size=3
306 | stride=1
307 | pad=1
308 | activation=leaky
309 | 
310 | [shortcut]
311 | from=-3
312 | activation=linear
313 | 
314 | 
315 | [convolutional]
316 | batch_normalize=1
317 | filters=256
318 | size=1
319 | stride=1
320 | pad=1
321 | activation=leaky
322 | 
323 | [convolutional]
324 | batch_normalize=1
325 | filters=512
326 | size=3
327 | stride=1
328 | pad=1
329 | activation=leaky
330 | 
331 | [shortcut]
332 | from=-3
333 | activation=linear
334 | 
335 | 
336 | [convolutional]
337 | batch_normalize=1
338 | filters=256
339 | size=1
340 | stride=1
341 | pad=1
342 | activation=leaky
343 | 
344 | [convolutional]
345 | batch_normalize=1
346 | filters=512
347 | size=3
348 | stride=1
349 | pad=1
350 | activation=leaky
351 | 
352 | [shortcut]
353 | from=-3
354 | activation=linear
355 | 
356 | 
357 | [convolutional]
358 | batch_normalize=1
359 | filters=256
360 | size=1
361 | stride=1
362 | pad=1
363 | activation=leaky
364 | 
365 | [convolutional]
366 | batch_normalize=1
367 | filters=512
368 | size=3
369 | stride=1
370 | pad=1
371 | activation=leaky
372 | 
373 | [shortcut]
374 | from=-3
375 | activation=linear
376 | 
377 | [convolutional]
378 | batch_normalize=1
379 | filters=256
380 | size=1
381 | stride=1
382 | pad=1
383 | activation=leaky
384 | 
385 | [convolutional]
386 | batch_normalize=1
387 | filters=512
388 | size=3
389 | stride=1
390 | pad=1
391 | activation=leaky
392 | 
393 | [shortcut]
394 | from=-3
395 | activation=linear
396 | 
397 | 
398 | [convolutional]
399 | batch_normalize=1
400 | filters=256
401 | size=1
402 | stride=1
403 | pad=1
404 | activation=leaky
405 | 
406 | [convolutional]
407 | batch_normalize=1
408 | filters=512
409 | size=3
410 | stride=1
411 | pad=1
412 | activation=leaky
413 | 
414 | [shortcut]
415 | from=-3
416 | activation=linear
417 | 
418 | 
419 | [convolutional]
420 | batch_normalize=1
421 | filters=256
422 | size=1
423 | stride=1
424 | pad=1
425 | activation=leaky
426 | 
427 | [convolutional]
428 | batch_normalize=1
429 | filters=512
430 | size=3
431 | stride=1
432 | pad=1
433 | activation=leaky
434 | 
435 | [shortcut]
436 | from=-3
437 | activation=linear
438 | 
439 | [convolutional]
440 | batch_normalize=1
441 | filters=256
442 | size=1
443 | stride=1
444 | pad=1
445 | activation=leaky
446 | 
447 | [convolutional]
448 | batch_normalize=1
449 | filters=512
450 | size=3
451 | stride=1
452 | pad=1
453 | activation=leaky
454 | 
455 | [shortcut]
456 | from=-3
457 | activation=linear
458 | 
459 | # Downsample
460 | 
461 | [convolutional]
462 | batch_normalize=1
463 | filters=1024
464 | size=3
465 | stride=2
466 | pad=1
467 | activation=leaky
468 | 
469 | [convolutional]
470 | batch_normalize=1
471 | filters=512
472 | size=1
473 | stride=1
474 | pad=1
475 | activation=leaky
476 | 
477 | [convolutional]
478 | batch_normalize=1
479 | filters=1024
480 | size=3
481 | stride=1
482 | pad=1
483 | activation=leaky
484 | 
485 | [shortcut]
486 | from=-3
487 | activation=linear
488 | 
489 | [convolutional]
490 | batch_normalize=1
491 | filters=512
492 | size=1
493 | stride=1
494 | pad=1
495 | activation=leaky
496 | 
497 | [convolutional]
498 | batch_normalize=1
499 | filters=1024
500 | size=3
501 | stride=1
502 | pad=1
503 | activation=leaky
504 | 
505 | [shortcut]
506 | from=-3
507 | activation=linear
508 | 
509 | [convolutional]
510 | batch_normalize=1
511 | filters=512
512 | size=1
513 | stride=1
514 | pad=1
515 | activation=leaky
516 | 
517 | [convolutional]
518 | batch_normalize=1
519 | filters=1024
520 | size=3
521 | stride=1
522 | pad=1
523 | activation=leaky
524 | 
525 | [shortcut]
526 | from=-3
527 | activation=linear
528 | 
529 | [convolutional]
530 | batch_normalize=1
531 | filters=512
532 | size=1
533 | stride=1
534 | pad=1
535 | activation=leaky
536 | 
537 | [convolutional]
538 | batch_normalize=1
539 | filters=1024
540 | size=3
541 | stride=1
542 | pad=1
543 | activation=leaky
544 | 
545 | [shortcut]
546 | from=-3
547 | activation=linear
548 | 
549 | ######################
550 | 
551 | [convolutional]
552 | batch_normalize=1
553 | filters=512
554 | size=1
555 | stride=1
556 | pad=1
557 | activation=leaky
558 | 
559 | [convolutional]
560 | batch_normalize=1
561 | size=3
562 | stride=1
563 | pad=1
564 | filters=1024
565 | activation=leaky
566 | 
567 | [convolutional]
568 | batch_normalize=1
569 | filters=512
570 | size=1
571 | stride=1
572 | pad=1
573 | activation=leaky
574 | 
575 | [convolutional]
576 | batch_normalize=1
577 | size=3
578 | stride=1
579 | pad=1
580 | filters=1024
581 | activation=leaky
582 | 
583 | [convolutional]
584 | batch_normalize=1
585 | filters=512
586 | size=1
587 | stride=1
588 | pad=1
589 | activation=leaky
590 | 
591 | [convolutional]
592 | batch_normalize=1
593 | size=3
594 | stride=1
595 | pad=1
596 | filters=1024
597 | activation=leaky
598 | 
599 | [convolutional]
600 | size=1
601 | stride=1
602 | pad=1
603 | filters=255
604 | activation=linear
605 | 
606 | 
607 | [yolo]
608 | mask = 6,7,8
609 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
610 | classes=80
611 | num=9
612 | jitter=.3
613 | ignore_thresh = .7
614 | truth_thresh = 1
615 | random=1
616 | 
617 | 
618 | [route]
619 | layers = -4
620 | 
621 | [convolutional]
622 | batch_normalize=1
623 | filters=256
624 | size=1
625 | stride=1
626 | pad=1
627 | activation=leaky
628 | 
629 | [upsample]
630 | stride=2
631 | 
632 | [route]
633 | layers = -1, 61
634 | 
635 | 
636 | 
637 | [convolutional]
638 | batch_normalize=1
639 | filters=256
640 | size=1
641 | stride=1
642 | pad=1
643 | activation=leaky
644 | 
645 | [convolutional]
646 | batch_normalize=1
647 | size=3
648 | stride=1
649 | pad=1
650 | filters=512
651 | activation=leaky
652 | 
653 | [convolutional]
654 | batch_normalize=1
655 | filters=256
656 | size=1
657 | stride=1
658 | pad=1
659 | activation=leaky
660 | 
661 | [convolutional]
662 | batch_normalize=1
663 | size=3
664 | stride=1
665 | pad=1
666 | filters=512
667 | activation=leaky
668 | 
669 | [convolutional]
670 | batch_normalize=1
671 | filters=256
672 | size=1
673 | stride=1
674 | pad=1
675 | activation=leaky
676 | 
677 | [convolutional]
678 | batch_normalize=1
679 | size=3
680 | stride=1
681 | pad=1
682 | filters=512
683 | activation=leaky
684 | 
685 | [convolutional]
686 | size=1
687 | stride=1
688 | pad=1
689 | filters=255
690 | activation=linear
691 | 
692 | 
693 | [yolo]
694 | mask = 3,4,5
695 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
696 | classes=80
697 | num=9
698 | jitter=.3
699 | ignore_thresh = .7
700 | truth_thresh = 1
701 | random=1
702 | 
703 | 
704 | 
705 | [route]
706 | layers = -4
707 | 
708 | [convolutional]
709 | batch_normalize=1
710 | filters=128
711 | size=1
712 | stride=1
713 | pad=1
714 | activation=leaky
715 | 
716 | [upsample]
717 | stride=2
718 | 
719 | [route]
720 | layers = -1, 36
721 | 
722 | 
723 | 
724 | [convolutional]
725 | batch_normalize=1
726 | filters=128
727 | size=1
728 | stride=1
729 | pad=1
730 | activation=leaky
731 | 
732 | [convolutional]
733 | batch_normalize=1
734 | size=3
735 | stride=1
736 | pad=1
737 | filters=256
738 | activation=leaky
739 | 
740 | [convolutional]
741 | batch_normalize=1
742 | filters=128
743 | size=1
744 | stride=1
745 | pad=1
746 | activation=leaky
747 | 
748 | [convolutional]
749 | batch_normalize=1
750 | size=3
751 | stride=1
752 | pad=1
753 | filters=256
754 | activation=leaky
755 | 
756 | [convolutional]
757 | batch_normalize=1
758 | filters=128
759 | size=1
760 | stride=1
761 | pad=1
762 | activation=leaky
763 | 
764 | [convolutional]
765 | batch_normalize=1
766 | size=3
767 | stride=1
768 | pad=1
769 | filters=256
770 | activation=leaky
771 | 
772 | [convolutional]
773 | size=1
774 | stride=1
775 | pad=1
776 | filters=255
777 | activation=linear
778 | 
779 | 
780 | [yolo]
781 | mask = 0,1,2
782 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
783 | classes=80
784 | num=9
785 | jitter=.3
786 | ignore_thresh = .7
787 | truth_thresh = 1
788 | random=1
789 | 


--------------------------------------------------------------------------------
/data/coco.data:
--------------------------------------------------------------------------------
1 | classes=80
2 | train=../coco/trainvalno5k.txt
3 | valid=../coco/5k.txt
4 | names=data/coco.names
5 | backup=backup/
6 | eval=coco
7 | 


--------------------------------------------------------------------------------
/data/coco.names:
--------------------------------------------------------------------------------
 1 | person
 2 | bicycle
 3 | car
 4 | motorcycle
 5 | airplane
 6 | bus
 7 | train
 8 | truck
 9 | boat
10 | traffic light
11 | fire hydrant
12 | stop sign
13 | parking meter
14 | bench
15 | bird
16 | cat
17 | dog
18 | horse
19 | sheep
20 | cow
21 | elephant
22 | bear
23 | zebra
24 | giraffe
25 | backpack
26 | umbrella
27 | handbag
28 | tie
29 | suitcase
30 | frisbee
31 | skis
32 | snowboard
33 | sports ball
34 | kite
35 | baseball bat
36 | baseball glove
37 | skateboard
38 | surfboard
39 | tennis racket
40 | bottle
41 | wine glass
42 | cup
43 | fork
44 | knife
45 | spoon
46 | bowl
47 | banana
48 | apple
49 | sandwich
50 | orange
51 | broccoli
52 | carrot
53 | hot dog
54 | pizza
55 | donut
56 | cake
57 | chair
58 | couch
59 | potted plant
60 | bed
61 | dining table
62 | toilet
63 | tv
64 | laptop
65 | mouse
66 | remote
67 | keyboard
68 | cell phone
69 | microwave
70 | oven
71 | toaster
72 | sink
73 | refrigerator
74 | book
75 | clock
76 | vase
77 | scissors
78 | teddy bear
79 | hair drier
80 | toothbrush
81 | 


--------------------------------------------------------------------------------
/data/coco_1000img.data:
--------------------------------------------------------------------------------
1 | classes=80
2 | train=./data/coco_1000img.txt
3 | valid=./data/coco_1000img.txt
4 | names=data/coco.names
5 | backup=backup/
6 | eval=coco
7 | 


--------------------------------------------------------------------------------
/data/coco_1000val.data:
--------------------------------------------------------------------------------
1 | classes=80
2 | train=./data/coco_1000img.txt
3 | valid=./data/coco_1000val.txt
4 | names=data/coco.names
5 | backup=backup/
6 | eval=coco
7 | 


--------------------------------------------------------------------------------
/data/coco_16img.data:
--------------------------------------------------------------------------------
1 | classes=80
2 | train=./data/coco_16img.txt
3 | valid=./data/coco_16img.txt
4 | names=data/coco.names
5 | backup=backup/
6 | eval=coco
7 | 


--------------------------------------------------------------------------------
/data/coco_16img.txt:
--------------------------------------------------------------------------------
 1 | ../coco/images/train2014/COCO_train2014_000000000009.jpg
 2 | ../coco/images/train2014/COCO_train2014_000000000025.jpg
 3 | ../coco/images/train2014/COCO_train2014_000000000030.jpg
 4 | ../coco/images/train2014/COCO_train2014_000000000034.jpg
 5 | ../coco/images/train2014/COCO_train2014_000000000036.jpg
 6 | ../coco/images/train2014/COCO_train2014_000000000049.jpg
 7 | ../coco/images/train2014/COCO_train2014_000000000061.jpg
 8 | ../coco/images/train2014/COCO_train2014_000000000064.jpg
 9 | ../coco/images/train2014/COCO_train2014_000000000071.jpg
10 | ../coco/images/train2014/COCO_train2014_000000000072.jpg
11 | ../coco/images/train2014/COCO_train2014_000000000077.jpg
12 | ../coco/images/train2014/COCO_train2014_000000000078.jpg
13 | ../coco/images/train2014/COCO_train2014_000000000081.jpg
14 | ../coco/images/train2014/COCO_train2014_000000000086.jpg
15 | ../coco/images/train2014/COCO_train2014_000000000089.jpg
16 | ../coco/images/train2014/COCO_train2014_000000000092.jpg
17 | 


--------------------------------------------------------------------------------
/data/coco_1cls.data:
--------------------------------------------------------------------------------
1 | classes=1
2 | train=./data/coco_1cls.txt
3 | valid=./data/coco_1cls.txt
4 | names=data/coco.names
5 | backup=backup/
6 | eval=coco
7 | 


--------------------------------------------------------------------------------
/data/coco_1cls.txt:
--------------------------------------------------------------------------------
1 | ../coco/images/val2014/COCO_val2014_000000013992.jpg
2 | ../coco/images/val2014/COCO_val2014_000000047226.jpg
3 | ../coco/images/val2014/COCO_val2014_000000050324.jpg
4 | ../coco/images/val2014/COCO_val2014_000000121497.jpg
5 | ../coco/images/val2014/COCO_val2014_000000001464.jpg
6 | 


--------------------------------------------------------------------------------
/data/coco_1img.data:
--------------------------------------------------------------------------------
1 | classes=80
2 | train=./data/coco_1img.txt
3 | valid=./data/coco_1img.txt
4 | names=data/coco.names
5 | backup=backup/
6 | eval=coco
7 | 


--------------------------------------------------------------------------------
/data/coco_1img.txt:
--------------------------------------------------------------------------------
1 | ../coco/images/val2014/COCO_val2014_000000581886.jpg
2 | 


--------------------------------------------------------------------------------
/data/coco_1k5k.data:
--------------------------------------------------------------------------------
1 | classes=80
2 | train=./data/coco_1000img.txt
3 | valid=./data/5k.txt
4 | names=data/coco.names
5 | backup=backup/
6 | eval=coco
7 | 


--------------------------------------------------------------------------------
/data/coco_32img.data:
--------------------------------------------------------------------------------
1 | classes=80
2 | train=./data/coco_32img.txt
3 | valid=./data/coco_32img.txt
4 | names=data/coco.names
5 | backup=backup/
6 | eval=coco
7 | 


--------------------------------------------------------------------------------
/data/coco_32img.txt:
--------------------------------------------------------------------------------
 1 | ../coco/images/train2014/COCO_train2014_000000000009.jpg
 2 | ../coco/images/train2014/COCO_train2014_000000000025.jpg
 3 | ../coco/images/train2014/COCO_train2014_000000000030.jpg
 4 | ../coco/images/train2014/COCO_train2014_000000000034.jpg
 5 | ../coco/images/train2014/COCO_train2014_000000000036.jpg
 6 | ../coco/images/train2014/COCO_train2014_000000000049.jpg
 7 | ../coco/images/train2014/COCO_train2014_000000000061.jpg
 8 | ../coco/images/train2014/COCO_train2014_000000000064.jpg
 9 | ../coco/images/train2014/COCO_train2014_000000000071.jpg
10 | ../coco/images/train2014/COCO_train2014_000000000072.jpg
11 | ../coco/images/train2014/COCO_train2014_000000000077.jpg
12 | ../coco/images/train2014/COCO_train2014_000000000078.jpg
13 | ../coco/images/train2014/COCO_train2014_000000000081.jpg
14 | ../coco/images/train2014/COCO_train2014_000000000086.jpg
15 | ../coco/images/train2014/COCO_train2014_000000000089.jpg
16 | ../coco/images/train2014/COCO_train2014_000000000092.jpg
17 | ../coco/images/train2014/COCO_train2014_000000000094.jpg
18 | ../coco/images/train2014/COCO_train2014_000000000109.jpg
19 | ../coco/images/train2014/COCO_train2014_000000000110.jpg
20 | ../coco/images/train2014/COCO_train2014_000000000113.jpg
21 | ../coco/images/train2014/COCO_train2014_000000000127.jpg
22 | ../coco/images/train2014/COCO_train2014_000000000138.jpg
23 | ../coco/images/train2014/COCO_train2014_000000000142.jpg
24 | ../coco/images/train2014/COCO_train2014_000000000144.jpg
25 | ../coco/images/train2014/COCO_train2014_000000000149.jpg
26 | ../coco/images/train2014/COCO_train2014_000000000151.jpg
27 | ../coco/images/train2014/COCO_train2014_000000000154.jpg
28 | ../coco/images/train2014/COCO_train2014_000000000165.jpg
29 | ../coco/images/train2014/COCO_train2014_000000000194.jpg
30 | ../coco/images/train2014/COCO_train2014_000000000201.jpg
31 | ../coco/images/train2014/COCO_train2014_000000000247.jpg
32 | ../coco/images/train2014/COCO_train2014_000000000260.jpg
33 | 


--------------------------------------------------------------------------------
/data/coco_500val.data:
--------------------------------------------------------------------------------
1 | classes=80
2 | train=./data/coco_500img.txt
3 | valid=./data/coco_500val.txt
4 | names=data/coco.names
5 | backup=backup/
6 | eval=coco
7 | 


--------------------------------------------------------------------------------
/data/coco_64img.data:
--------------------------------------------------------------------------------
1 | classes=80
2 | train=./data/coco_64img.txt
3 | valid=./data/coco_64img.txt
4 | names=data/coco.names
5 | backup=backup/
6 | eval=coco
7 | 


--------------------------------------------------------------------------------
/data/coco_64img.shapes:
--------------------------------------------------------------------------------
 1 | 640 480
 2 | 640 426
 3 | 640 428
 4 | 640 425
 5 | 481 640
 6 | 381 500
 7 | 640 488
 8 | 480 640
 9 | 640 426
10 | 427 640
11 | 500 375
12 | 612 612
13 | 640 425
14 | 512 640
15 | 640 480
16 | 640 427
17 | 640 427
18 | 640 416
19 | 640 480
20 | 416 640
21 | 640 481
22 | 640 573
23 | 480 640
24 | 640 480
25 | 640 428
26 | 480 640
27 | 427 640
28 | 640 536
29 | 640 480
30 | 640 428
31 | 640 424
32 | 500 333
33 | 591 640
34 | 640 480
35 | 640 426
36 | 600 600
37 | 640 427
38 | 640 427
39 | 640 480
40 | 640 481
41 | 640 427
42 | 640 480
43 | 640 480
44 | 480 640
45 | 480 640
46 | 640 480
47 | 446 640
48 | 640 480
49 | 640 611
50 | 426 640
51 | 640 480
52 | 640 389
53 | 427 640
54 | 640 480
55 | 640 480
56 | 480 640
57 | 640 480
58 | 640 427
59 | 500 495
60 | 500 313
61 | 640 480
62 | 360 640
63 | 427 640
64 | 640 480
65 | 


--------------------------------------------------------------------------------
/data/coco_64img.txt:
--------------------------------------------------------------------------------
 1 | ../coco/images/train2014/COCO_train2014_000000000009.jpg
 2 | ../coco/images/train2014/COCO_train2014_000000000025.jpg
 3 | ../coco/images/train2014/COCO_train2014_000000000030.jpg
 4 | ../coco/images/train2014/COCO_train2014_000000000034.jpg
 5 | ../coco/images/train2014/COCO_train2014_000000000036.jpg
 6 | ../coco/images/train2014/COCO_train2014_000000000049.jpg
 7 | ../coco/images/train2014/COCO_train2014_000000000061.jpg
 8 | ../coco/images/train2014/COCO_train2014_000000000064.jpg
 9 | ../coco/images/train2014/COCO_train2014_000000000071.jpg
10 | ../coco/images/train2014/COCO_train2014_000000000072.jpg
11 | ../coco/images/train2014/COCO_train2014_000000000077.jpg
12 | ../coco/images/train2014/COCO_train2014_000000000078.jpg
13 | ../coco/images/train2014/COCO_train2014_000000000081.jpg
14 | ../coco/images/train2014/COCO_train2014_000000000086.jpg
15 | ../coco/images/train2014/COCO_train2014_000000000089.jpg
16 | ../coco/images/train2014/COCO_train2014_000000000092.jpg
17 | ../coco/images/train2014/COCO_train2014_000000000094.jpg
18 | ../coco/images/train2014/COCO_train2014_000000000109.jpg
19 | ../coco/images/train2014/COCO_train2014_000000000110.jpg
20 | ../coco/images/train2014/COCO_train2014_000000000113.jpg
21 | ../coco/images/train2014/COCO_train2014_000000000127.jpg
22 | ../coco/images/train2014/COCO_train2014_000000000138.jpg
23 | ../coco/images/train2014/COCO_train2014_000000000142.jpg
24 | ../coco/images/train2014/COCO_train2014_000000000144.jpg
25 | ../coco/images/train2014/COCO_train2014_000000000149.jpg
26 | ../coco/images/train2014/COCO_train2014_000000000151.jpg
27 | ../coco/images/train2014/COCO_train2014_000000000154.jpg
28 | ../coco/images/train2014/COCO_train2014_000000000165.jpg
29 | ../coco/images/train2014/COCO_train2014_000000000194.jpg
30 | ../coco/images/train2014/COCO_train2014_000000000201.jpg
31 | ../coco/images/train2014/COCO_train2014_000000000247.jpg
32 | ../coco/images/train2014/COCO_train2014_000000000260.jpg
33 | ../coco/images/train2014/COCO_train2014_000000000263.jpg
34 | ../coco/images/train2014/COCO_train2014_000000000307.jpg
35 | ../coco/images/train2014/COCO_train2014_000000000308.jpg
36 | ../coco/images/train2014/COCO_train2014_000000000309.jpg
37 | ../coco/images/train2014/COCO_train2014_000000000312.jpg
38 | ../coco/images/train2014/COCO_train2014_000000000315.jpg
39 | ../coco/images/train2014/COCO_train2014_000000000321.jpg
40 | ../coco/images/train2014/COCO_train2014_000000000322.jpg
41 | ../coco/images/train2014/COCO_train2014_000000000326.jpg
42 | ../coco/images/train2014/COCO_train2014_000000000332.jpg
43 | ../coco/images/train2014/COCO_train2014_000000000349.jpg
44 | ../coco/images/train2014/COCO_train2014_000000000368.jpg
45 | ../coco/images/train2014/COCO_train2014_000000000370.jpg
46 | ../coco/images/train2014/COCO_train2014_000000000382.jpg
47 | ../coco/images/train2014/COCO_train2014_000000000384.jpg
48 | ../coco/images/train2014/COCO_train2014_000000000389.jpg
49 | ../coco/images/train2014/COCO_train2014_000000000394.jpg
50 | ../coco/images/train2014/COCO_train2014_000000000404.jpg
51 | ../coco/images/train2014/COCO_train2014_000000000419.jpg
52 | ../coco/images/train2014/COCO_train2014_000000000431.jpg
53 | ../coco/images/train2014/COCO_train2014_000000000436.jpg
54 | ../coco/images/train2014/COCO_train2014_000000000438.jpg
55 | ../coco/images/train2014/COCO_train2014_000000000443.jpg
56 | ../coco/images/train2014/COCO_train2014_000000000446.jpg
57 | ../coco/images/train2014/COCO_train2014_000000000450.jpg
58 | ../coco/images/train2014/COCO_train2014_000000000471.jpg
59 | ../coco/images/train2014/COCO_train2014_000000000490.jpg
60 | ../coco/images/train2014/COCO_train2014_000000000491.jpg
61 | ../coco/images/train2014/COCO_train2014_000000000510.jpg
62 | ../coco/images/train2014/COCO_train2014_000000000514.jpg
63 | ../coco/images/train2014/COCO_train2014_000000000529.jpg
64 | ../coco/images/train2014/COCO_train2014_000000000531.jpg
65 | 


--------------------------------------------------------------------------------
/data/coco_paper.names:
--------------------------------------------------------------------------------
 1 | person
 2 | bicycle
 3 | car
 4 | motorcycle
 5 | airplane
 6 | bus
 7 | train
 8 | truck
 9 | boat
10 | traffic light
11 | fire hydrant
12 | street sign
13 | stop sign
14 | parking meter
15 | bench
16 | bird
17 | cat
18 | dog
19 | horse
20 | sheep
21 | cow
22 | elephant
23 | bear
24 | zebra
25 | giraffe
26 | hat
27 | backpack
28 | umbrella
29 | shoe
30 | eye glasses
31 | handbag
32 | tie
33 | suitcase
34 | frisbee
35 | skis
36 | snowboard
37 | sports ball
38 | kite
39 | baseball bat
40 | baseball glove
41 | skateboard
42 | surfboard
43 | tennis racket
44 | bottle
45 | plate
46 | wine glass
47 | cup
48 | fork
49 | knife
50 | spoon
51 | bowl
52 | banana
53 | apple
54 | sandwich
55 | orange
56 | broccoli
57 | carrot
58 | hot dog
59 | pizza
60 | donut
61 | cake
62 | chair
63 | couch
64 | potted plant
65 | bed
66 | mirror
67 | dining table
68 | window
69 | desk
70 | toilet
71 | door
72 | tv
73 | laptop
74 | mouse
75 | remote
76 | keyboard
77 | cell phone
78 | microwave
79 | oven
80 | toaster
81 | sink
82 | refrigerator
83 | blender
84 | book
85 | clock
86 | vase
87 | scissors
88 | teddy bear
89 | hair drier
90 | toothbrush
91 | hair brush


--------------------------------------------------------------------------------
/data/get_coco_dataset.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # CREDIT: https://github.com/pjreddie/darknet/tree/master/scripts/get_coco_dataset.sh
 3 | 
 4 | # Clone COCO API
 5 | git clone https://github.com/pdollar/coco && cd coco
 6 | 
 7 | # Download Images
 8 | mkdir images && cd images
 9 | wget -c https://pjreddie.com/media/files/train2014.zip
10 | wget -c https://pjreddie.com/media/files/val2014.zip
11 | 
12 | # Unzip
13 | unzip -q train2014.zip
14 | unzip -q val2014.zip
15 | 
16 | # (optional) Delete zip files
17 | rm -rf *.zip
18 | 
19 | cd ..
20 | 
21 | # Download COCO Metadata
22 | wget -c https://pjreddie.com/media/files/instances_train-val2014.zip
23 | wget -c https://pjreddie.com/media/files/coco/5k.part
24 | wget -c https://pjreddie.com/media/files/coco/trainvalno5k.part
25 | wget -c https://pjreddie.com/media/files/coco/labels.tgz
26 | tar xzf labels.tgz
27 | unzip -q instances_train-val2014.zip
28 | 
29 | # Set Up Image Lists
30 | paste <(awk "{print \"$PWD\"}" <5k.part) 5k.part | tr -d '\t' > 5k.txt
31 | paste <(awk "{print \"$PWD\"}" <trainvalno5k.part) trainvalno5k.part | tr -d '\t' > trainvalno5k.txt
32 | 
33 | # get xview training data
34 | # wget -O train_images.tgz 'https://d307kc0mrhucc3.cloudfront.net/train_images.tgz?Expires=1530124049&Signature=JrQoxipmsETvb7eQHCfDFUO-QEHJGAayUv0i-ParmS-1hn7hl9D~bzGuHWG82imEbZSLUARTtm0wOJ7EmYMGmG5PtLKz9H5qi6DjoSUuFc13NQ-~6yUhE~NfPaTnehUdUMCa3On2wl1h1ZtRG~0Jq1P-AJbpe~oQxbyBrs1KccaMa7FK4F4oMM6sMnNgoXx8-3O77kYw~uOpTMFmTaQdHln6EztW0Lx17i57kK3ogbSUpXgaUTqjHCRA1dWIl7PY1ngQnLslkLhZqmKcaL-BvWf0ZGjHxCDQBpnUjIlvMu5NasegkwD9Jjc0ClgTxsttSkmbapVqaVC8peR0pO619Q__&Key-Pair-Id=APKAIKGDJB5C3XUL2DXQ'
35 | # tar -xvzf train_images.tgz
36 | # sudo rm -rf train_images/._*
37 | # lastly convert each .tif to a .bmp for faster loading in cv2
38 | 
39 | # ./coco/images/train2014/COCO_train2014_000000167126.jpg  # corrupted image
40 | 


--------------------------------------------------------------------------------
/data/get_coco_dataset_gdrive.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # https://stackoverflow.com/questions/48133080/how-to-download-a-google-drive-url-via-curl-or-wget/48133859
 3 | 
 4 | # Zip coco folder
 5 | # zip -r coco.zip coco
 6 | # tar -czvf coco.tar.gz coco
 7 | 
 8 | # Set fileid and filename
 9 | filename="coco.zip"
10 | fileid="1HaXkef9z6y5l4vUnCYgdmEAj61c6bfWO"  # coco.zip
11 | 
12 | # Download from Google Drive, accepting presented query
13 | curl -c ./cookie -s -L "https://drive.google.com/uc?export=download&id=${fileid}" > /dev/null
14 | curl -Lb ./cookie "https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=${fileid}" -o ${filename}
15 | rm ./cookie
16 | 
17 | # Unzip
18 | unzip -q ${filename}  # for coco.zip
19 | # tar -xzf ${filename}  # for coco.tar.gz
20 | 


--------------------------------------------------------------------------------
/data/hand.data:
--------------------------------------------------------------------------------
1 | classes=1
2 | train=D:/dl/YOLOv3-model-pruning/data/train_.txt
3 | valid=D:/dl/YOLOv3-model-pruning/data/valid_.txt
4 | names=D:/dl/YOLOv3-model-pruning/data/oxfordhand.names
5 | backup=backup/
6 | eval=coco
7 | 


--------------------------------------------------------------------------------
/data/samples/bus.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zbyuan/pruning_yolov3/25e13596a218ec55375f4781aa3dae621b5961cb/data/samples/bus.jpg


--------------------------------------------------------------------------------
/data/samples/zidane.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zbyuan/pruning_yolov3/25e13596a218ec55375f4781aa3dae621b5961cb/data/samples/zidane.jpg


--------------------------------------------------------------------------------
/data/valid_.shapes:
--------------------------------------------------------------------------------
  1 | 500 375
  2 | 500 375
  3 | 500 375
  4 | 500 375
  5 | 500 375
  6 | 500 375
  7 | 375 500
  8 | 500 333
  9 | 333 500
 10 | 500 375
 11 | 500 434
 12 | 500 375
 13 | 333 500
 14 | 500 375
 15 | 500 331
 16 | 500 375
 17 | 500 375
 18 | 500 374
 19 | 500 375
 20 | 500 375
 21 | 375 500
 22 | 500 333
 23 | 500 333
 24 | 500 375
 25 | 500 332
 26 | 500 486
 27 | 500 375
 28 | 500 375
 29 | 375 500
 30 | 375 500
 31 | 500 375
 32 | 500 375
 33 | 500 375
 34 | 355 500
 35 | 375 500
 36 | 500 333
 37 | 500 375
 38 | 500 377
 39 | 375 500
 40 | 500 375
 41 | 500 375
 42 | 500 375
 43 | 500 375
 44 | 333 500
 45 | 500 375
 46 | 500 333
 47 | 500 346
 48 | 500 375
 49 | 476 500
 50 | 500 333
 51 | 500 420
 52 | 500 333
 53 | 500 333
 54 | 500 333
 55 | 333 500
 56 | 333 500
 57 | 375 500
 58 | 500 379
 59 | 500 375
 60 | 500 375
 61 | 500 357
 62 | 375 500
 63 | 500 393
 64 | 333 500
 65 | 500 375
 66 | 500 375
 67 | 500 333
 68 | 333 500
 69 | 327 500
 70 | 500 375
 71 | 500 375
 72 | 500 345
 73 | 333 500
 74 | 375 500
 75 | 500 380
 76 | 500 375
 77 | 487 377
 78 | 500 375
 79 | 500 333
 80 | 500 333
 81 | 333 500
 82 | 500 375
 83 | 375 500
 84 | 500 375
 85 | 500 375
 86 | 500 375
 87 | 375 500
 88 | 500 375
 89 | 500 332
 90 | 333 500
 91 | 480 360
 92 | 500 334
 93 | 500 375
 94 | 500 375
 95 | 333 500
 96 | 500 333
 97 | 375 500
 98 | 500 375
 99 | 500 375
100 | 500 375
101 | 500 375
102 | 500 375
103 | 332 500
104 | 500 375
105 | 500 375
106 | 375 500
107 | 500 333
108 | 500 331
109 | 500 375
110 | 333 500
111 | 333 500
112 | 486 500
113 | 500 375
114 | 375 500
115 | 356 500
116 | 500 375
117 | 375 500
118 | 500 375
119 | 500 375
120 | 268 400
121 | 389 500
122 | 333 500
123 | 500 375
124 | 500 375
125 | 500 367
126 | 500 375
127 | 500 375
128 | 500 334
129 | 495 500
130 | 319 480
131 | 500 375
132 | 500 375
133 | 333 500
134 | 500 375
135 | 500 375
136 | 500 375
137 | 326 500
138 | 500 375
139 | 500 375
140 | 500 375
141 | 500 400
142 | 332 500
143 | 500 375
144 | 500 375
145 | 360 331
146 | 333 500
147 | 500 332
148 | 500 374
149 | 500 375
150 | 375 500
151 | 500 375
152 | 500 375
153 | 500 375
154 | 500 367
155 | 500 375
156 | 500 375
157 | 500 375
158 | 375 500
159 | 500 375
160 | 500 375
161 | 500 390
162 | 500 358
163 | 500 397
164 | 500 341
165 | 375 500
166 | 500 333
167 | 500 375
168 | 332 500
169 | 500 375
170 | 500 375
171 | 500 375
172 | 375 500
173 | 240 320
174 | 450 480
175 | 417 500
176 | 500 400
177 | 500 375
178 | 500 411
179 | 338 500
180 | 500 375
181 | 500 375
182 | 379 500
183 | 500 375
184 | 333 500
185 | 500 332
186 | 500 375
187 | 500 375
188 | 500 375
189 | 500 375
190 | 332 500
191 | 469 500
192 | 500 375
193 | 333 500
194 | 500 375
195 | 500 375
196 | 500 376
197 | 500 375
198 | 500 334
199 | 500 375
200 | 500 375
201 | 500 341
202 | 500 333
203 | 500 375
204 | 500 375
205 | 500 334
206 | 500 375
207 | 500 375
208 | 500 357
209 | 375 500
210 | 500 375
211 | 500 375
212 | 375 500
213 | 500 375
214 | 500 497
215 | 375 500
216 | 375 500
217 | 500 334
218 | 500 375
219 | 500 375
220 | 500 375
221 | 500 375
222 | 500 375
223 | 333 500
224 | 500 375
225 | 375 500
226 | 500 375
227 | 500 375
228 | 375 500
229 | 500 375
230 | 334 500
231 | 500 375
232 | 364 500
233 | 375 500
234 | 494 500
235 | 484 500
236 | 500 333
237 | 500 375
238 | 500 443
239 | 375 500
240 | 500 375
241 | 500 334
242 | 500 375
243 | 375 500
244 | 500 375
245 | 500 333
246 | 500 375
247 | 313 500
248 | 500 375
249 | 400 300
250 | 375 500
251 | 375 500
252 | 500 375
253 | 333 500
254 | 500 337
255 | 375 500
256 | 500 290
257 | 500 375
258 | 500 312
259 | 500 333
260 | 500 375
261 | 375 500
262 | 500 333
263 | 500 333
264 | 500 333
265 | 500 375
266 | 500 375
267 | 500 375
268 | 500 333
269 | 500 375
270 | 500 375
271 | 500 375
272 | 500 375
273 | 500 375
274 | 375 500
275 | 375 500
276 | 500 375
277 | 500 374
278 | 333 500
279 | 375 500
280 | 500 375
281 | 500 375
282 | 500 375
283 | 500 375
284 | 333 500
285 | 500 375
286 | 500 375
287 | 500 375
288 | 500 333
289 | 294 500
290 | 500 375
291 | 500 375
292 | 500 375
293 | 500 334
294 | 375 500
295 | 333 500
296 | 500 375
297 | 333 500
298 | 500 375
299 | 500 221
300 | 500 374
301 | 500 375
302 | 333 500
303 | 500 333
304 | 500 375
305 | 270 360
306 | 500 371
307 | 500 333
308 | 500 335
309 | 358 500
310 | 220 500
311 | 500 375
312 | 500 375
313 | 375 500
314 | 500 375
315 | 500 375
316 | 375 500
317 | 366 500
318 | 500 375
319 | 500 379
320 | 500 375
321 | 500 489
322 | 500 333
323 | 500 375
324 | 500 375
325 | 500 333
326 | 500 375
327 | 500 375
328 | 334 500
329 | 500 395
330 | 333 500
331 | 500 369
332 | 500 375
333 | 375 500
334 | 500 375
335 | 500 375
336 | 375 500
337 | 500 333
338 | 500 332
339 | 500 375
340 | 500 375
341 | 375 500
342 | 375 500
343 | 500 379
344 | 500 395
345 | 500 333
346 | 500 375
347 | 500 375
348 | 378 500
349 | 500 333
350 | 500 335
351 | 500 333
352 | 375 500
353 | 375 500
354 | 281 500
355 | 500 336
356 | 500 333
357 | 500 375
358 | 500 245
359 | 500 375
360 | 500 375
361 | 500 333
362 | 500 375
363 | 500 334
364 | 500 375
365 | 500 419
366 | 500 375
367 | 500 333
368 | 500 375
369 | 500 375
370 | 375 500
371 | 500 375
372 | 500 375
373 | 500 375
374 | 375 500
375 | 500 332
376 | 500 333
377 | 500 277
378 | 500 333
379 | 500 333
380 | 375 500
381 | 500 334
382 | 500 375
383 | 500 375
384 | 500 333
385 | 335 500
386 | 500 375
387 | 500 375
388 | 332 500
389 | 500 375
390 | 500 375
391 | 500 375
392 | 500 375
393 | 500 375
394 | 500 313
395 | 500 375
396 | 500 375
397 | 333 500
398 | 500 375
399 | 500 375
400 | 335 500
401 | 500 375
402 | 500 375
403 | 500 375
404 | 375 500
405 | 500 335
406 | 375 500
407 | 500 375
408 | 375 500
409 | 500 500
410 | 500 375
411 | 500 375
412 | 500 333
413 | 500 375
414 | 500 375
415 | 500 375
416 | 500 375
417 | 333 500
418 | 500 375
419 | 500 375
420 | 500 375
421 | 500 332
422 | 500 375
423 | 334 500
424 | 332 500
425 | 375 500
426 | 500 333
427 | 500 405
428 | 333 500
429 | 500 334
430 | 500 333
431 | 500 375
432 | 500 375
433 | 500 332
434 | 333 500
435 | 368 500
436 | 375 500
437 | 500 375
438 | 500 375
439 | 500 375
440 | 500 375
441 | 500 375
442 | 288 432
443 | 375 500
444 | 500 375
445 | 500 333
446 | 500 375
447 | 500 333
448 | 375 500
449 | 500 375
450 | 500 281
451 | 333 500
452 | 500 333
453 | 500 375
454 | 500 333
455 | 500 375
456 | 500 334
457 | 500 375
458 | 375 500
459 | 375 500
460 | 375 500
461 | 500 333
462 | 500 333
463 | 500 375
464 | 500 375
465 | 500 375
466 | 500 375
467 | 500 375
468 | 500 356
469 | 474 500
470 | 500 375
471 | 500 375
472 | 500 326
473 | 360 480
474 | 500 375
475 | 500 375
476 | 500 488
477 | 500 375
478 | 442 500
479 | 500 333
480 | 450 349
481 | 375 500
482 | 500 375
483 | 375 500
484 | 306 500
485 | 500 338
486 | 500 333
487 | 500 375
488 | 375 500
489 | 500 375
490 | 500 333
491 | 375 500
492 | 500 375
493 | 375 500
494 | 378 500
495 | 500 375
496 | 500 375
497 | 500 375
498 | 500 405
499 | 500 333
500 | 500 375
501 | 500 500
502 | 500 375
503 | 450 300
504 | 500 375
505 | 500 375
506 | 500 375
507 | 333 500
508 | 500 375
509 | 500 375
510 | 500 375
511 | 500 375
512 | 500 377
513 | 500 375
514 | 500 333
515 | 500 375
516 | 375 500
517 | 375 500
518 | 500 375
519 | 500 375
520 | 500 375
521 | 375 500
522 | 500 375
523 | 500 332
524 | 500 375
525 | 500 375
526 | 500 375
527 | 334 500
528 | 500 375
529 | 500 375
530 | 332 500
531 | 500 333
532 | 500 375
533 | 375 500
534 | 375 500
535 | 333 500
536 | 500 332
537 | 500 375
538 | 500 375
539 | 500 375
540 | 500 364
541 | 333 500
542 | 500 375
543 | 500 333
544 | 500 375
545 | 500 375
546 | 500 333
547 | 375 500
548 | 500 375
549 | 500 333
550 | 500 336
551 | 500 375
552 | 500 359
553 | 500 333
554 | 500 375
555 | 500 375
556 | 500 375
557 | 500 332
558 | 500 375
559 | 500 333
560 | 500 375
561 | 282 500
562 | 500 375
563 | 500 375
564 | 500 375
565 | 500 375
566 | 500 375
567 | 375 500
568 | 500 375
569 | 375 500
570 | 500 375
571 | 500 375
572 | 500 375
573 | 500 390
574 | 334 500
575 | 332 500
576 | 500 375
577 | 500 375
578 | 425 319
579 | 500 333
580 | 500 334
581 | 500 375
582 | 375 500
583 | 500 333
584 | 500 375
585 | 375 500
586 | 500 332
587 | 500 375
588 | 448 336
589 | 500 375
590 | 500 375
591 | 500 375
592 | 500 375
593 | 500 375
594 | 500 375
595 | 500 335
596 | 500 333
597 | 375 500
598 | 375 500
599 | 500 333
600 | 500 375
601 | 333 500
602 | 500 375
603 | 500 375
604 | 500 375
605 | 375 500
606 | 500 375
607 | 500 375
608 | 500 375
609 | 333 500
610 | 375 500
611 | 500 439
612 | 375 500
613 | 500 375
614 | 500 375
615 | 334 500
616 | 374 500
617 | 500 375
618 | 321 500
619 | 500 400
620 | 500 375
621 | 500 375
622 | 500 375
623 | 500 375
624 | 500 410
625 | 500 333
626 | 500 375
627 | 500 375
628 | 500 334
629 | 500 375
630 | 500 375
631 | 500 326
632 | 500 375
633 | 500 357
634 | 500 374
635 | 500 375
636 | 500 374
637 | 500 333
638 | 500 375
639 | 500 375
640 | 500 334
641 | 375 500
642 | 500 375
643 | 500 334
644 | 500 375
645 | 500 333
646 | 500 375
647 | 500 375
648 | 500 375
649 | 480 361
650 | 375 500
651 | 333 500
652 | 500 333
653 | 333 500
654 | 500 333
655 | 500 372
656 | 375 500
657 | 500 375
658 | 375 500
659 | 500 319
660 | 500 333
661 | 500 375
662 | 375 500
663 | 500 377
664 | 400 498
665 | 500 393
666 | 500 334
667 | 333 500
668 | 500 333
669 | 500 375
670 | 500 375
671 | 500 375
672 | 500 375
673 | 375 500
674 | 375 500
675 | 333 500
676 | 500 333
677 | 500 333
678 | 333 500
679 | 306 500
680 | 500 375
681 | 500 334
682 | 320 448
683 | 333 500
684 | 375 500
685 | 500 334
686 | 270 360
687 | 361 500
688 | 500 393
689 | 500 375
690 | 396 500
691 | 500 333
692 | 500 333
693 | 320 480
694 | 500 375
695 | 500 375
696 | 500 375
697 | 500 375
698 | 399 500
699 | 500 375
700 | 500 375
701 | 500 333
702 | 500 332
703 | 500 375
704 | 320 480
705 | 500 375
706 | 375 500
707 | 500 334
708 | 451 500
709 | 500 375
710 | 500 375
711 | 500 368
712 | 500 375
713 | 375 500
714 | 500 375
715 | 500 375
716 | 319 500
717 | 500 375
718 | 450 500
719 | 375 500
720 | 500 375
721 | 375 500
722 | 500 374
723 | 500 375
724 | 500 375
725 | 500 375
726 | 345 500
727 | 500 375
728 | 500 375
729 | 500 325
730 | 500 375
731 | 500 379
732 | 500 333
733 | 500 375
734 | 375 500
735 | 500 375
736 | 500 375
737 | 354 500
738 | 500 375
739 | 375 500
740 | 500 375
741 | 333 500
742 | 375 500
743 | 500 333
744 | 225 417
745 | 333 500
746 | 500 333
747 | 375 500
748 | 500 332
749 | 500 334
750 | 400 500
751 | 500 333
752 | 333 500
753 | 500 375
754 | 375 500
755 | 333 500
756 | 500 333
757 | 340 500
758 | 500 375
759 | 375 500
760 | 333 500
761 | 500 375
762 | 500 375
763 | 500 375
764 | 500 375
765 | 500 402
766 | 375 500
767 | 500 333
768 | 500 333
769 | 374 500
770 | 500 333
771 | 375 500
772 | 500 333
773 | 500 375
774 | 500 375
775 | 500 294
776 | 500 375
777 | 375 500
778 | 500 375
779 | 500 332
780 | 332 500
781 | 358 500
782 | 500 333
783 | 380 472
784 | 500 375
785 | 375 500
786 | 500 375
787 | 500 375
788 | 500 375
789 | 453 500
790 | 375 500
791 | 500 333
792 | 500 500
793 | 375 500
794 | 500 375
795 | 375 500
796 | 500 375
797 | 375 500
798 | 402 500
799 | 459 288
800 | 273 500
801 | 415 500
802 | 346 336
803 | 320 500
804 | 500 375
805 | 500 333
806 | 500 333
807 | 500 410
808 | 500 375
809 | 500 458
810 | 500 333
811 | 500 375
812 | 443 437
813 | 341 251
814 | 375 500
815 | 500 375
816 | 252 400
817 | 288 432
818 | 500 334
819 | 375 500
820 | 375 500
821 | 377 500
822 | 


--------------------------------------------------------------------------------
/detect.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | from sys import platform
  3 | 
  4 | from models import *  # set ONNX_EXPORT in models.py
  5 | from utils.datasets import *
  6 | from utils.utils import *
  7 | 
  8 | 
  9 | def detect(save_txt=False, save_img=False):
 10 |     img_size = (320, 192) if ONNX_EXPORT else opt.img_size  # (320, 192) or (416, 256) or (608, 352) for (height, width)
 11 |     out, source, weights, half, view_img = opt.output, opt.source, opt.weights, opt.half, opt.view_img
 12 |     webcam = source == '0' or source.startswith('rtsp') or source.startswith('http') or source.endswith('.txt')
 13 | 
 14 |     # Initialize
 15 |     device = torch_utils.select_device(device='cpu' if ONNX_EXPORT else opt.device)
 16 |     if os.path.exists(out):
 17 |         shutil.rmtree(out)  # delete output folder
 18 |     os.makedirs(out)  # make new output folder
 19 | 
 20 |     # Initialize model
 21 |     model = Darknet(opt.cfg, img_size)
 22 | 
 23 |     # Load weights
 24 |     attempt_download(weights)
 25 |     if weights.endswith('.pt'):  # pytorch format
 26 |         model.load_state_dict(torch.load(weights, map_location=device)['model'])
 27 |     else:  # darknet format
 28 |         _ = load_darknet_weights(model, weights)
 29 | 
 30 |     # Fuse Conv2d + BatchNorm2d layers
 31 |     # model.fuse()
 32 | 
 33 |     # Eval mode
 34 |     model.to(device).eval()
 35 | 
 36 |     # Export mode
 37 |     if ONNX_EXPORT:
 38 |         img = torch.zeros((1, 3) + img_size)  # (1, 3, 320, 192)
 39 |         torch.onnx.export(model, img, 'weights/export.onnx', verbose=True)
 40 |         return
 41 | 
 42 |     # Half precision
 43 |     half = half and device.type != 'cpu'  # half precision only supported on CUDA
 44 |     if half:
 45 |         model.half()
 46 | 
 47 |     # Set Dataloader
 48 |     vid_path, vid_writer = None, None
 49 |     if webcam:
 50 |         view_img = True
 51 |         torch.backends.cudnn.benchmark = True  # set True to speed up constant image size inference
 52 |         dataset = LoadStreams(source, img_size=img_size, half=half)
 53 |     else:
 54 |         save_img = True
 55 |         dataset = LoadImages(source, img_size=img_size, half=half)
 56 | 
 57 |     # Get classes and colors
 58 |     classes = load_classes(parse_data_cfg(opt.data)['names'])
 59 |     colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(classes))]
 60 | 
 61 |     # Run inference
 62 |     t0 = time.time()
 63 |     for path, img, im0s, vid_cap in dataset:
 64 |         t = time.time()
 65 | 
 66 |         # Get detections
 67 |         img = torch.from_numpy(img).to(device)
 68 |         if img.ndimension() == 3:
 69 |             img = img.unsqueeze(0)
 70 |         pred, _ = model(img)
 71 | 
 72 |         if opt.half:
 73 |             pred = pred.float()
 74 | 
 75 |         for i, det in enumerate(non_max_suppression(pred, opt.conf_thres, opt.nms_thres)):  # detections per image
 76 |             if webcam:  # batch_size >= 1
 77 |                 p, s, im0 = path[i], '%g: ' % i, im0s[i]
 78 |             else:
 79 |                 p, s, im0 = path, '', im0s
 80 | 
 81 |             save_path = str(Path(out) / Path(p).name)
 82 |             s += '%gx%g ' % img.shape[2:]  # print string
 83 |             if det is not None and len(det):
 84 |                 # Rescale boxes from img_size to im0 size
 85 |                 det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
 86 | 
 87 |                 # Print results
 88 |                 for c in det[:, -1].unique():
 89 |                     n = (det[:, -1] == c).sum()  # detections per class
 90 |                     s += '%g %ss, ' % (n, classes[int(c)])  # add to string
 91 | 
 92 |                 # Write results
 93 |                 for *xyxy, conf, _, cls in det:
 94 |                     if save_txt:  # Write to file
 95 |                         with open(save_path + '.txt', 'a') as file:
 96 |                             file.write(('%g ' * 6 + '\n') % (*xyxy, cls, conf))
 97 | 
 98 |                     if save_img or view_img:  # Add bbox to image
 99 |                         label = '%s %.2f' % (classes[int(cls)], conf)
100 |                         plot_one_box(xyxy, im0, label=label, color=colors[int(cls)])
101 | 
102 |             print('%sDone. (%.3fs)' % (s, time.time() - t))
103 | 
104 |             # Stream results
105 |             if view_img:
106 |                 cv2.imshow(p, im0)
107 | 
108 |             # Save results (image with detections)
109 |             if save_img:
110 |                 if dataset.mode == 'images':
111 |                     cv2.imwrite(save_path, im0)
112 |                 else:
113 |                     if vid_path != save_path:  # new video
114 |                         vid_path = save_path
115 |                         if isinstance(vid_writer, cv2.VideoWriter):
116 |                             vid_writer.release()  # release previous video writer
117 | 
118 |                         fps = vid_cap.get(cv2.CAP_PROP_FPS)
119 |                         w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
120 |                         h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
121 |                         vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h))
122 |                     vid_writer.write(im0)
123 | 
124 |     if save_txt or save_img:
125 |         print('Results saved to %s' % os.getcwd() + os.sep + out)
126 |         if platform == 'darwin':  # MacOS
127 |             os.system('open ' + out + ' ' + save_path)
128 | 
129 |     print('Done. (%.3fs)' % (time.time() - t0))
130 | 
131 | 
132 | if __name__ == '__main__':
133 |     parser = argparse.ArgumentParser()
134 |     parser.add_argument('--cfg', type=str, default='cfg/yolov3-spp.cfg', help='cfg file path')
135 |     parser.add_argument('--data', type=str, default='data/coco.data', help='coco.data file path')
136 |     parser.add_argument('--weights', type=str, default='weights/yolov3-spp.weights', help='path to weights file')
137 |     parser.add_argument('--source', type=str, default='data/samples', help='source')  # input file/folder, 0 for webcam
138 |     parser.add_argument('--output', type=str, default='output', help='output folder')  # output folder
139 |     parser.add_argument('--img-size', type=int, default=416, help='inference size (pixels)')
140 |     parser.add_argument('--conf-thres', type=float, default=0.3, help='object confidence threshold')
141 |     parser.add_argument('--nms-thres', type=float, default=0.5, help='iou threshold for non-maximum suppression')
142 |     parser.add_argument('--fourcc', type=str, default='mp4v', help='output video codec (verify ffmpeg support)')
143 |     parser.add_argument('--half', action='store_true', help='half precision FP16 inference')
144 |     parser.add_argument('--device', default='', help='device id (i.e. 0 or 0,1) or cpu')
145 |     parser.add_argument('--view-img', action='store_true', help='display results')
146 |     opt = parser.parse_args()
147 |     print(opt)
148 | 
149 |     with torch.no_grad():
150 |         detect()
151 | 


--------------------------------------------------------------------------------
/models.py:
--------------------------------------------------------------------------------
  1 | import torch.nn.functional as F
  2 | 
  3 | from utils.google_utils import *
  4 | from utils.parse_config import *
  5 | from utils.utils import *
  6 | 
  7 | ONNX_EXPORT = False
  8 | 
  9 | 
 10 | def create_modules(module_defs, img_size, arc):
 11 |     # Constructs module list of layer blocks from module configuration in module_defs
 12 | 
 13 |     hyperparams = module_defs.pop(0)
 14 |     output_filters = [int(hyperparams['channels'])]
 15 |     module_list = nn.ModuleList()
 16 |     routs = []  # list of layers which rout to deeper layes
 17 |     yolo_index = -1
 18 | 
 19 |     for i, mdef in enumerate(module_defs):
 20 |         modules = nn.Sequential()
 21 | 
 22 |         if mdef['type'] == 'convolutional':
 23 |             bn = int(mdef['batch_normalize'])
 24 |             filters = int(mdef['filters'])
 25 |             kernel_size = int(mdef['size'])
 26 |             pad = (kernel_size - 1) // 2 if int(mdef['pad']) else 0
 27 |             modules.add_module('Conv2d', nn.Conv2d(in_channels=output_filters[-1],
 28 |                                                    out_channels=filters,
 29 |                                                    kernel_size=kernel_size,
 30 |                                                    stride=int(mdef['stride']),
 31 |                                                    padding=pad,
 32 |                                                    bias=not bn))
 33 |             if bn:
 34 |                 modules.add_module('BatchNorm2d', nn.BatchNorm2d(filters, momentum=0.1))
 35 |             if mdef['activation'] == 'leaky':  # TODO: activation study https://github.com/ultralytics/yolov3/issues/441
 36 |                 modules.add_module('activation', nn.LeakyReLU(0.1, inplace=True))
 37 |                 # modules.add_module('activation', nn.PReLU(num_parameters=1, init=0.10))
 38 |                 # modules.add_module('activation', Swish())
 39 | 
 40 |         elif mdef['type'] == 'maxpool':
 41 |             kernel_size = int(mdef['size'])
 42 |             stride = int(mdef['stride'])
 43 |             maxpool = nn.MaxPool2d(kernel_size=kernel_size, stride=stride, padding=int((kernel_size - 1) // 2))
 44 |             if kernel_size == 2 and stride == 1:  # yolov3-tiny
 45 |                 modules.add_module('ZeroPad2d', nn.ZeroPad2d((0, 1, 0, 1)))
 46 |                 modules.add_module('MaxPool2d', maxpool)
 47 |             else:
 48 |                 modules = maxpool
 49 | 
 50 |         elif mdef['type'] == 'upsample':
 51 |             modules = nn.Upsample(scale_factor=int(mdef['stride']), mode='nearest')
 52 | 
 53 |         elif mdef['type'] == 'route':  # nn.Sequential() placeholder for 'route' layer
 54 |             layers = [int(x) for x in mdef['layers'].split(',')]
 55 |             filters = sum([output_filters[i + 1 if i > 0 else i] for i in layers])
 56 |             routs.extend([l if l > 0 else l + i for l in layers])
 57 |             # if mdef[i+1]['type'] == 'reorg3d':
 58 |             #     modules = nn.Upsample(scale_factor=1/float(mdef[i+1]['stride']), mode='nearest')  # reorg3d
 59 | 
 60 |         elif mdef['type'] == 'shortcut':  # nn.Sequential() placeholder for 'shortcut' layer
 61 |             filters = output_filters[int(mdef['from'])]
 62 |             layer = int(mdef['from'])
 63 |             routs.extend([i + layer if layer < 0 else layer])
 64 | 
 65 |         elif mdef['type'] == 'reorg3d':  # yolov3-spp-pan-scale
 66 |             # torch.Size([16, 128, 104, 104])
 67 |             # torch.Size([16, 64, 208, 208]) <-- # stride 2 interpolate dimensions 2 and 3 to cat with prior layer
 68 |             pass
 69 | 
 70 |         elif mdef['type'] == 'yolo':
 71 |             yolo_index += 1
 72 |             mask = [int(x) for x in mdef['mask'].split(',')]  # anchor mask
 73 |             modules = YOLOLayer(anchors=mdef['anchors'][mask],  # anchor list
 74 |                                 nc=int(mdef['classes']),  # number of classes
 75 |                                 img_size=img_size,  # (416, 416)
 76 |                                 yolo_index=yolo_index,  # 0, 1 or 2
 77 |                                 arc=arc)  # yolo architecture
 78 | 
 79 |             # Initialize preceding Conv2d() bias (https://arxiv.org/pdf/1708.02002.pdf section 3.3)
 80 |             try:
 81 |                 if arc == 'defaultpw' or arc == 'Fdefaultpw':  # default with positive weights
 82 |                     b = [-4, -3.6]  # obj, cls
 83 |                 elif arc == 'default':  # default no pw (40 cls, 80 obj)
 84 |                     b = [-5.5, -4.0]
 85 |                 elif arc == 'uBCE':  # unified BCE (80 classes)
 86 |                     b = [0, -8.5]
 87 |                 elif arc == 'uCE':  # unified CE (1 background + 80 classes)
 88 |                     b = [10, -0.1]
 89 |                 elif arc == 'Fdefault':  # Focal default no pw (28 cls, 21 obj, no pw)
 90 |                     b = [-2.1, -1.8]
 91 |                 elif arc == 'uFBCE' or arc == 'uFBCEpw':  # unified FocalBCE (5120 obj, 80 classes)
 92 |                     b = [0, -6.5]
 93 |                 elif arc == 'uFCE':  # unified FocalCE (64 cls, 1 background + 80 classes)
 94 |                     b = [7.7, -1.1]
 95 | 
 96 |                 bias = module_list[-1][0].bias.view(len(mask), -1)  # 255 to 3x85
 97 |                 bias[:, 4] += b[0] - bias[:, 4].mean()  # obj
 98 |                 bias[:, 5:] += b[1] - bias[:, 5:].mean()  # cls
 99 |                 # bias = torch.load('weights/yolov3-spp.bias.pt')[yolo_index]  # list of tensors [3x85, 3x85, 3x85]
100 |                 module_list[-1][0].bias = torch.nn.Parameter(bias.view(-1))
101 |                 # utils.print_model_biases(model)
102 |             except:
103 |                 print('WARNING: smart bias initialization failure.')
104 | 
105 |         else:
106 |             print('Warning: Unrecognized Layer Type: ' + mdef['type'])
107 | 
108 |         # Register module list and number of output filters
109 |         module_list.append(modules)
110 |         output_filters.append(filters)
111 | 
112 |     return module_list, routs, hyperparams
113 | 
114 | 
115 | class Swish(nn.Module):
116 |     def __init__(self):
117 |         super(Swish, self).__init__()
118 | 
119 |     def forward(self, x):
120 |         return x * torch.sigmoid(x)
121 | 
122 | 
123 | class YOLOLayer(nn.Module):
124 |     def __init__(self, anchors, nc, img_size, yolo_index, arc):
125 |         super(YOLOLayer, self).__init__()
126 | 
127 |         self.anchors = torch.Tensor(anchors)
128 |         self.na = len(anchors)  # number of anchors (3)
129 |         self.nc = nc  # number of classes (80)
130 |         self.nx = 0  # initialize number of x gridpoints
131 |         self.ny = 0  # initialize number of y gridpoints
132 |         self.arc = arc
133 | 
134 |         if ONNX_EXPORT:  # grids must be computed in __init__
135 |             stride = [32, 16, 8][yolo_index]  # stride of this layer
136 |             nx = int(img_size[1] / stride)  # number x grid points
137 |             ny = int(img_size[0] / stride)  # number y grid points
138 |             create_grids(self, img_size, (nx, ny))
139 | 
140 |     def forward(self, p, img_size, var=None):
141 |         if ONNX_EXPORT:
142 |             bs = 1  # batch size
143 |         else:
144 |             bs, ny, nx = p.shape[0], p.shape[-2], p.shape[-1]
145 |             if (self.nx, self.ny) != (nx, ny):
146 |                 create_grids(self, img_size, (nx, ny), p.device, p.dtype)
147 | 
148 |         # p.view(bs, 255, 13, 13) -- > (bs, 3, 13, 13, 85)  # (bs, anchors, grid, grid, classes + xywh)
149 |         p = p.view(bs, self.na, self.nc + 5, self.ny, self.nx).permute(0, 1, 3, 4, 2).contiguous()  # prediction
150 | 
151 |         if self.training:
152 |             return p
153 | 
154 |         elif ONNX_EXPORT:
155 |             # Constants CAN NOT BE BROADCAST, ensure correct shape!
156 |             ngu = self.ng.repeat((1, self.na * self.nx * self.ny, 1))
157 |             grid_xy = self.grid_xy.repeat((1, self.na, 1, 1, 1)).view((1, -1, 2))
158 |             anchor_wh = self.anchor_wh.repeat((1, 1, self.nx, self.ny, 1)).view((1, -1, 2)) / ngu
159 | 
160 |             p = p.view(-1, 5 + self.nc)
161 |             xy = torch.sigmoid(p[..., 0:2]) + grid_xy[0]  # x, y
162 |             wh = torch.exp(p[..., 2:4]) * anchor_wh[0]  # width, height
163 |             p_conf = torch.sigmoid(p[:, 4:5])  # Conf
164 |             p_cls = F.softmax(p[:, 5:85], 1) * p_conf  # SSD-like conf
165 |             return torch.cat((xy / ngu[0], wh, p_conf, p_cls), 1).t()
166 | 
167 |             # p = p.view(1, -1, 5 + self.nc)
168 |             # xy = torch.sigmoid(p[..., 0:2]) + grid_xy  # x, y
169 |             # wh = torch.exp(p[..., 2:4]) * anchor_wh  # width, height
170 |             # p_conf = torch.sigmoid(p[..., 4:5])  # Conf
171 |             # p_cls = p[..., 5:5 + self.nc]
172 |             # # Broadcasting only supported on first dimension in CoreML. See onnx-coreml/_operators.py
173 |             # # p_cls = F.softmax(p_cls, 2) * p_conf  # SSD-like conf
174 |             # p_cls = torch.exp(p_cls).permute((2, 1, 0))
175 |             # p_cls = p_cls / p_cls.sum(0).unsqueeze(0) * p_conf.permute((2, 1, 0))  # F.softmax() equivalent
176 |             # p_cls = p_cls.permute(2, 1, 0)
177 |             # return torch.cat((xy / ngu, wh, p_conf, p_cls), 2).squeeze().t()
178 | 
179 |         else:  # inference
180 |             # s = 1.5  # scale_xy  (pxy = pxy * s - (s - 1) / 2)
181 |             io = p.clone()  # inference output
182 |             io[..., 0:2] = torch.sigmoid(io[..., 0:2]) + self.grid_xy  # xy
183 |             io[..., 2:4] = torch.exp(io[..., 2:4]) * self.anchor_wh  # wh yolo method
184 |             # io[..., 2:4] = ((torch.sigmoid(io[..., 2:4]) * 2) ** 3) * self.anchor_wh  # wh power method
185 |             io[..., :4] *= self.stride
186 | 
187 |             if 'default' in self.arc:  # seperate obj and cls
188 |                 torch.sigmoid_(io[..., 4:])
189 |             elif 'BCE' in self.arc:  # unified BCE (80 classes)
190 |                 torch.sigmoid_(io[..., 5:])
191 |                 io[..., 4] = 1
192 |             elif 'CE' in self.arc:  # unified CE (1 background + 80 classes)
193 |                 io[..., 4:] = F.softmax(io[..., 4:], dim=4)
194 |                 io[..., 4] = 1
195 | 
196 |             if self.nc == 1:
197 |                 io[..., 5] = 1  # single-class model https://github.com/ultralytics/yolov3/issues/235
198 | 
199 |             # reshape from [1, 3, 13, 13, 85] to [1, 507, 85]
200 |             return io.view(bs, -1, 5 + self.nc), p
201 | 
202 | 
203 | class Darknet(nn.Module):
204 |     # YOLOv3 object detection model
205 | 
206 |     def __init__(self, cfg, img_size=(416, 416), arc='default'):
207 |         super(Darknet, self).__init__()
208 | 
209 |         if isinstance(cfg, str):
210 |             self.module_defs = parse_model_cfg(cfg)
211 |         elif isinstance(cfg, list):
212 |             self.module_defs = cfg
213 | 
214 |         self.module_list, self.routs, self.hyperparams = create_modules(self.module_defs, img_size, arc)
215 |         self.yolo_layers = get_yolo_layers(self)
216 | 
217 |         # Darknet Header https://github.com/AlexeyAB/darknet/issues/2914#issuecomment-496675346
218 |         self.version = np.array([0, 2, 5], dtype=np.int32)  # (int32) version info: major, minor, revision
219 |         self.seen = np.array([0], dtype=np.int64)  # (int64) number of images seen during training
220 | 
221 |     def forward(self, x, var=None):
222 |         img_size = x.shape[-2:]
223 |         layer_outputs = []
224 |         output = []
225 | 
226 |         for i, (mdef, module) in enumerate(zip(self.module_defs, self.module_list)):
227 |             mtype = mdef['type']
228 |             if mtype in ['convolutional', 'upsample', 'maxpool']:
229 |                 x = module(x)
230 |             elif mtype == 'route':
231 |                 layers = [int(x) for x in mdef['layers'].split(',')]
232 |                 if len(layers) == 1:
233 |                     x = layer_outputs[layers[0]]
234 |                 else:
235 |                     try:
236 |                         x = torch.cat([layer_outputs[i] for i in layers], 1)
237 |                     except:  # apply stride 2 for darknet reorg layer
238 |                         layer_outputs[layers[1]] = F.interpolate(layer_outputs[layers[1]], scale_factor=[0.5, 0.5])
239 |                         x = torch.cat([layer_outputs[i] for i in layers], 1)
240 |                     # print(''), [print(layer_outputs[i].shape) for i in layers], print(x.shape)
241 |             elif mtype == 'shortcut':
242 |                 x = x + layer_outputs[int(mdef['from'])]
243 |             elif mtype == 'yolo':
244 |                 x = module(x, img_size)
245 |                 output.append(x)
246 |             layer_outputs.append(x if i in self.routs else [])
247 | 
248 |         if self.training:
249 |             return output
250 |         elif ONNX_EXPORT:
251 |             output = torch.cat(output, 1)  # cat 3 layers 85 x (507, 2028, 8112) to 85 x 10647
252 |             nc = self.module_list[self.yolo_layers[0]].nc  # number of classes
253 |             return output[5:5 + nc].t(), output[:4].t()  # ONNX scores, boxes
254 |         else:
255 |             io, p = list(zip(*output))  # inference output, training output
256 |             return torch.cat(io, 1), p
257 | 
258 |     def fuse(self):
259 |         # Fuse Conv2d + BatchNorm2d layers throughout model
260 |         fused_list = nn.ModuleList()
261 |         for a in list(self.children())[0]:
262 |             if isinstance(a, nn.Sequential):
263 |                 for i, b in enumerate(a):
264 |                     if isinstance(b, nn.modules.batchnorm.BatchNorm2d):
265 |                         # fuse this bn layer with the previous conv2d layer
266 |                         conv = a[i - 1]
267 |                         fused = torch_utils.fuse_conv_and_bn(conv, b)
268 |                         a = nn.Sequential(fused, *list(a.children())[i + 1:])
269 |                         break
270 |             fused_list.append(a)
271 |         self.module_list = fused_list
272 |         # model_info(self)  # yolov3-spp reduced from 225 to 152 layers
273 | 
274 | 
275 | def get_yolo_layers(model):
276 |     return [i for i, x in enumerate(model.module_defs) if x['type'] == 'yolo']  # [82, 94, 106] for yolov3
277 | 
278 | 
279 | def create_grids(self, img_size=416, ng=(13, 13), device='cpu', type=torch.float32):
280 |     nx, ny = ng  # x and y grid size
281 |     self.img_size = max(img_size)
282 |     self.stride = self.img_size / max(ng)
283 | 
284 |     # build xy offsets
285 |     yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
286 |     self.grid_xy = torch.stack((xv, yv), 2).to(device).type(type).view((1, 1, ny, nx, 2))
287 | 
288 |     # build wh gains
289 |     self.anchor_vec = self.anchors.to(device) / self.stride
290 |     self.anchor_wh = self.anchor_vec.view(1, self.na, 1, 1, 2).to(device).type(type)
291 |     self.ng = torch.Tensor(ng).to(device)
292 |     self.nx = nx
293 |     self.ny = ny
294 | 
295 | 
296 | def load_darknet_weights(self, weights, cutoff=-1):
297 |     # Parses and loads the weights stored in 'weights'
298 | 
299 |     # Establish cutoffs (load layers between 0 and cutoff. if cutoff = -1 all are loaded)
300 |     file = Path(weights).name
301 |     if file == 'darknet53.conv.74':
302 |         cutoff = 75
303 |     elif file == 'yolov3-tiny.conv.15':
304 |         cutoff = 15
305 | 
306 |     # Read weights file
307 |     with open(weights, 'rb') as f:
308 |         # Read Header https://github.com/AlexeyAB/darknet/issues/2914#issuecomment-496675346
309 |         self.version = np.fromfile(f, dtype=np.int32, count=3)  # (int32) version info: major, minor, revision
310 |         self.seen = np.fromfile(f, dtype=np.int64, count=1)  # (int64) number of images seen during training
311 | 
312 |         weights = np.fromfile(f, dtype=np.float32)  # The rest are weights
313 | 
314 |     ptr = 0
315 |     for i, (mdef, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])):
316 |         if mdef['type'] == 'convolutional':
317 |             conv_layer = module[0]
318 |             if mdef['batch_normalize']:
319 |                 # Load BN bias, weights, running mean and running variance
320 |                 bn_layer = module[1]
321 |                 num_b = bn_layer.bias.numel()  # Number of biases
322 |                 # Bias
323 |                 bn_b = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.bias)
324 |                 bn_layer.bias.data.copy_(bn_b)
325 |                 ptr += num_b
326 |                 # Weight
327 |                 bn_w = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.weight)
328 |                 bn_layer.weight.data.copy_(bn_w)
329 |                 ptr += num_b
330 |                 # Running Mean
331 |                 bn_rm = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.running_mean)
332 |                 bn_layer.running_mean.data.copy_(bn_rm)
333 |                 ptr += num_b
334 |                 # Running Var
335 |                 bn_rv = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.running_var)
336 |                 bn_layer.running_var.data.copy_(bn_rv)
337 |                 ptr += num_b
338 |                 # Load conv. weights
339 |                 num_w = conv_layer.weight.numel()
340 |                 conv_w = torch.from_numpy(weights[ptr:ptr + num_w]).view_as(conv_layer.weight)
341 |                 conv_layer.weight.data.copy_(conv_w)
342 |                 ptr += num_w
343 |             else:
344 |                 if "yolov3.weights" in file:
345 |                     num_b = 255
346 |                     ptr += num_b
347 |                     num_w = int(self.module_defs[i-1]["filters"]) * 255
348 |                     ptr += num_w
349 |                 else:
350 |                     # Load conv. bias
351 |                     num_b = conv_layer.bias.numel()
352 |                     conv_b = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(conv_layer.bias)
353 |                     conv_layer.bias.data.copy_(conv_b)
354 |                     ptr += num_b
355 |                     # Load conv. weights
356 |                     num_w = conv_layer.weight.numel()
357 |                     conv_w = torch.from_numpy(weights[ptr:ptr + num_w]).view_as(conv_layer.weight)
358 |                     conv_layer.weight.data.copy_(conv_w)
359 |                     ptr += num_w
360 |     assert ptr == len(weights)
361 |     return cutoff
362 | 
363 | 
364 | def save_weights(self, path='model.weights', cutoff=-1):
365 |     # Converts a PyTorch model to Darket format (*.pt to *.weights)
366 |     # Note: Does not work if model.fuse() is applied
367 |     with open(path, 'wb') as f:
368 |         # Write Header https://github.com/AlexeyAB/darknet/issues/2914#issuecomment-496675346
369 |         self.version.tofile(f)  # (int32) version info: major, minor, revision
370 |         self.seen.tofile(f)  # (int64) number of images seen during training
371 | 
372 |         # Iterate through layers
373 |         for i, (mdef, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])):
374 |             if mdef['type'] == 'convolutional':
375 |                 conv_layer = module[0]
376 |                 # If batch norm, load bn first
377 |                 if mdef['batch_normalize']:
378 |                     bn_layer = module[1]
379 |                     bn_layer.bias.data.cpu().numpy().tofile(f)
380 |                     bn_layer.weight.data.cpu().numpy().tofile(f)
381 |                     bn_layer.running_mean.data.cpu().numpy().tofile(f)
382 |                     bn_layer.running_var.data.cpu().numpy().tofile(f)
383 |                 # Load conv bias
384 |                 else:
385 |                     conv_layer.bias.data.cpu().numpy().tofile(f)
386 |                 # Load conv weights
387 |                 conv_layer.weight.data.cpu().numpy().tofile(f)
388 | 
389 | 
390 | def convert(cfg='cfg/yolov3-spp.cfg', weights='weights/yolov3-spp.weights'):
391 |     # Converts between PyTorch and Darknet format per extension (i.e. *.weights convert to *.pt and vice versa)
392 |     # from models import *; convert('cfg/yolov3-spp.cfg', 'weights/yolov3-spp.weights')
393 | 
394 |     # Initialize model
395 |     model = Darknet(cfg)
396 | 
397 |     # Load weights and save
398 |     if weights.endswith('.pt'):  # if PyTorch format
399 |         model.load_state_dict(torch.load(weights, map_location='cpu')['model'])
400 |         save_weights(model, path='converted.weights', cutoff=-1)
401 |         print("Success: converted '%s' to 'converted.weights'" % weights)
402 | 
403 |     elif weights.endswith('.weights'):  # darknet format
404 |         _ = load_darknet_weights(model, weights)
405 | 
406 |         chkpt = {'epoch': -1,
407 |                  'best_fitness': None,
408 |                  'training_results': None,
409 |                  'model': model.state_dict(),
410 |                  'optimizer': None}
411 | 
412 |         torch.save(chkpt, 'converted.pt')
413 |         print("Success: converted '%s' to 'converted.pt'" % weights)
414 | 
415 |     else:
416 |         print('Error: extension not supported.')
417 | 
418 | 
419 | def attempt_download(weights):
420 |     # Attempt to download pretrained weights if not found locally
421 | 
422 |     msg = weights + ' missing, download from https://drive.google.com/drive/folders/1uxgUBemJVw9wZsdpboYbzUN4bcRhsuAI'
423 |     if weights and not os.path.isfile(weights):
424 |         file = Path(weights).name
425 | 
426 |         if file == 'yolov3-spp.weights':
427 |             gdrive_download(id='1oPCHKsM2JpM-zgyepQciGli9X0MTsJCO', name=weights)
428 |         elif file == 'yolov3-spp.pt':
429 |             gdrive_download(id='1vFlbJ_dXPvtwaLLOu-twnjK4exdFiQ73', name=weights)
430 |         elif file == 'yolov3.pt':
431 |             gdrive_download(id='11uy0ybbOXA2hc-NJkJbbbkDwNX1QZDlz', name=weights)
432 |         elif file == 'yolov3-tiny.pt':
433 |             gdrive_download(id='1qKSgejNeNczgNNiCn9ZF_o55GFk1DjY_', name=weights)
434 |         elif file == 'darknet53.conv.74':
435 |             gdrive_download(id='18xqvs_uwAqfTXp-LJCYLYNHBOcrwbrp0', name=weights)
436 |         elif file == 'yolov3-tiny.conv.15':
437 |             gdrive_download(id='140PnSedCsGGgu3rOD6Ez4oI6cdDzerLC', name=weights)
438 | 
439 |         else:
440 |             try:  # download from pjreddie.com
441 |                 url = 'https://pjreddie.com/media/files/' + file
442 |                 print('Downloading ' + url)
443 |                 os.system('curl -f ' + url + ' -o ' + weights)
444 |             except IOError:
445 |                 print(msg)
446 |                 os.system('rm ' + weights)  # remove partial downloads
447 | 
448 |         assert os.path.exists(weights), msg  # download missing weights from Google Drive
449 | 


--------------------------------------------------------------------------------
/prune.py:
--------------------------------------------------------------------------------
  1 | from models import *
  2 | from utils.utils import *
  3 | import numpy as np
  4 | from copy import deepcopy
  5 | from test import test
  6 | from terminaltables import AsciiTable
  7 | import time
  8 | from utils.prune_utils import *
  9 | import argparse
 10 | 
 11 | 
 12 | 
 13 | if __name__ == '__main__':
 14 |     parser = argparse.ArgumentParser()
 15 |     parser.add_argument('--cfg', type=str, default='cfg/yolov3.cfg', help='cfg file path')
 16 |     parser.add_argument('--data', type=str, default='data/coco.data', help='*.data file path')
 17 |     parser.add_argument('--weights', type=str, default='weights/last.pt', help='sparse model weights')
 18 |     parser.add_argument('--percent', type=float, default=0.8, help='channel prune percent')
 19 |     opt = parser.parse_args()
 20 |     print(opt)
 21 | 
 22 | 
 23 |     #%%
 24 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 25 |     model = Darknet(opt.cfg).to(device)
 26 |     if opt.weights.endswith('.pt'):
 27 |         model.load_state_dict(torch.load(opt.weights)['model'])
 28 |     else:
 29 |         load_darknet_weights(model, opt.weights)
 30 |     print('\nloaded weights from ',opt.weights)
 31 | 
 32 |     eval_model = lambda model:test(opt.cfg, opt.data, 
 33 |         weights=opt.weights, 
 34 |         batch_size=16,
 35 |          img_size=416,
 36 |          iou_thres=0.5,
 37 |          conf_thres=0.001,
 38 |          nms_thres=0.5,
 39 |          save_json=False,
 40 |          model=model)
 41 |     obtain_num_parameters = lambda model:sum([param.nelement() for param in model.parameters()])
 42 | 
 43 |     print("\nlet's test the original model first:")
 44 |     origin_model_metric = eval_model(model)
 45 | 
 46 |     origin_nparameters = obtain_num_parameters(model)
 47 | 
 48 |     CBL_idx, Conv_idx, prune_idx= parse_module_defs(model.module_defs)
 49 | 
 50 |     bn_weights = gather_bn_weights(model.module_list, prune_idx)
 51 | 
 52 |     sorted_bn = torch.sort(bn_weights)[0]
 53 | 
 54 |     # 避免剪掉所有channel的最高阈值(每个BN层的gamma的最大值的最小值即为阈值上限)
 55 |     highest_thre = []
 56 |     for idx in prune_idx:
 57 |         highest_thre.append(model.module_list[idx][1].weight.data.abs().max().item())
 58 |     highest_thre = min(highest_thre)
 59 | 
 60 |     # 找到highest_thre对应的下标对应的百分比
 61 |     percent_limit = (sorted_bn==highest_thre).nonzero().item()/len(bn_weights)
 62 | 
 63 |     print(f'Gamma threshold should be less than {highest_thre:.4f}.')
 64 |     print(f'The corresponding prune ratio is {percent_limit:.3f}.')
 65 | 
 66 |     #%%
 67 |     def prune_and_eval(model, sorted_bn, percent=.0):
 68 |         model_copy = deepcopy(model)
 69 |         thre_index = int(len(sorted_bn) * percent)
 70 |         thre = sorted_bn[thre_index]
 71 | 
 72 |         print(f'Gamma value that less than {thre:.4f} are set to zero!')
 73 | 
 74 |         remain_num = 0
 75 |         for idx in prune_idx:
 76 | 
 77 |             bn_module = model_copy.module_list[idx][1]
 78 | 
 79 |             mask = obtain_bn_mask(bn_module, thre)
 80 | 
 81 |             remain_num += int(mask.sum())
 82 |             bn_module.weight.data.mul_(mask)
 83 |         print("let's test the current model!")
 84 |         mAP = eval_model(model_copy)[0][2]
 85 | 
 86 | 
 87 |         print(f'Number of channels has been reduced from {len(sorted_bn)} to {remain_num}')
 88 |         print(f'Prune ratio: {1-remain_num/len(sorted_bn):.3f}')
 89 |         print(f"mAP of the 'pruned' model is {mAP:.4f}")
 90 | 
 91 |         return thre
 92 | 
 93 |     percent = opt.percent
 94 |     print('the required prune percent is', percent)
 95 |     threshold = prune_and_eval(model, sorted_bn, percent)
 96 |     #%%
 97 |     def obtain_filters_mask(model, thre, CBL_idx, prune_idx):
 98 | 
 99 |         pruned = 0
100 |         total = 0
101 |         num_filters = []
102 |         filters_mask = []
103 |         for idx in CBL_idx:
104 |             bn_module = model.module_list[idx][1]
105 |             if idx in prune_idx:
106 | 
107 |                 mask = obtain_bn_mask(bn_module, thre).cpu().numpy()
108 |                 remain = int(mask.sum())
109 |                 pruned = pruned + mask.shape[0] - remain
110 | 
111 |                 if remain == 0:
112 |                     print("Channels would be all pruned!")
113 |                     raise Exception
114 | 
115 |                 print(f'layer index: {idx:>3d} \t total channel: {mask.shape[0]:>4d} \t '
116 |                       f'remaining channel: {remain:>4d}')
117 |             else:
118 |                 mask = np.ones(bn_module.weight.data.shape)
119 |                 remain = mask.shape[0]
120 | 
121 |             total += mask.shape[0]
122 |             num_filters.append(remain)
123 |             filters_mask.append(mask.copy())
124 | 
125 |         prune_ratio = pruned / total
126 |         print(f'Prune channels: {pruned}\tPrune ratio: {prune_ratio:.3f}')
127 | 
128 |         return num_filters, filters_mask
129 | 
130 |     num_filters, filters_mask = obtain_filters_mask(model, threshold, CBL_idx, prune_idx)
131 | 
132 |     #%%
133 |     CBLidx2mask = {idx: mask for idx, mask in zip(CBL_idx, filters_mask)}
134 | 
135 |     pruned_model = prune_model_keep_size(model, prune_idx, CBL_idx, CBLidx2mask)
136 | 
137 |     print("\nnow prune the model but keep size,(actually add offset of BN beta to next layer), let's see how the mAP goes")
138 |     eval_model(pruned_model)
139 | 
140 | 
141 |     #%%
142 |     compact_module_defs = deepcopy(model.module_defs)
143 |     for idx, num in zip(CBL_idx, num_filters):
144 |         assert compact_module_defs[idx]['type'] == 'convolutional'
145 |         compact_module_defs[idx]['filters'] = str(num)
146 | 
147 |     #%%
148 |     compact_model = Darknet([model.hyperparams.copy()] + compact_module_defs).to(device)
149 |     compact_nparameters = obtain_num_parameters(compact_model)
150 | 
151 |     init_weights_from_loose_model(compact_model, pruned_model, CBL_idx, Conv_idx, CBLidx2mask)
152 | 
153 |     #%%
154 |     random_input = torch.rand((1, 3, 416, 416)).to(device)
155 | 
156 |     def obtain_avg_forward_time(input, model, repeat=200):
157 | 
158 |         model.eval()
159 |         start = time.time()
160 |         with torch.no_grad():
161 |             for i in range(repeat):
162 |                 output = model(input)[0]
163 |         avg_infer_time = (time.time() - start) / repeat
164 | 
165 |         return avg_infer_time, output
166 | 
167 |     print('\ntesting avg forward time...')
168 |     pruned_forward_time, pruned_output = obtain_avg_forward_time(random_input, pruned_model)
169 |     compact_forward_time, compact_output = obtain_avg_forward_time(random_input, compact_model)
170 | 
171 |     diff = (pruned_output-compact_output).abs().gt(0.001).sum().item()
172 |     if diff > 0:
173 |         print('Something wrong with the pruned model!')
174 | 
175 |     #%%
176 |     # 在测试集上测试剪枝后的模型, 并统计模型的参数数量
177 |     print('testing the mAP of final pruned model')
178 |     compact_model_metric = eval_model(compact_model)
179 | 
180 | 
181 |     #%%
182 |     # 比较剪枝前后参数数量的变化、指标性能的变化
183 |     metric_table = [
184 |         ["Metric", "Before", "After"],
185 |         ["mAP", f'{origin_model_metric[0][2]:.6f}', f'{compact_model_metric[0][2]:.6f}'],
186 |         ["Parameters", f"{origin_nparameters}", f"{compact_nparameters}"],
187 |         ["Inference", f'{pruned_forward_time:.4f}', f'{compact_forward_time:.4f}']
188 |     ]
189 |     print(AsciiTable(metric_table).table)
190 | 
191 |     #%%
192 |     # 生成剪枝后的cfg文件并保存模型
193 |     pruned_cfg_name = opt.cfg.replace('/', f'/prune_{percent}_')
194 |     pruned_cfg_file = write_cfg(pruned_cfg_name, [model.hyperparams.copy()] + compact_module_defs)
195 |     print(f'Config file has been saved: {pruned_cfg_file}')
196 | 
197 |     compact_model_name = opt.weights.replace('/', f'/prune_{percent}_')
198 |     if compact_model_name.endswith('.pt'):
199 |         compact_model_name = compact_model_name.replace('.pt', '.weights')
200 |     save_weights(compact_model, compact_model_name)
201 |     print(f'Compact model has been saved: {compact_model_name}')
202 | 
203 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | # pip3 install -U -r requirements.txt
 2 | numpy
 3 | opencv-python
 4 | torch >= 1.2
 5 | matplotlib
 6 | pycocotools
 7 | tqdm
 8 | tb-nightly
 9 | future
10 | Pillow
11 | tensorboard >=1.13
12 | # Equivalent conda commands ----------------------------------------------------
13 | # conda update -n base -c defaults conda
14 | # conda install -yc anaconda future numpy opencv matplotlib tqdm pillow
15 | # conda install -yc conda-forge scikit-image tensorboard pycocotools
16 | # conda install -yc spyder-ide spyder-line-profiler
17 | # conda install -yc pytorch pytorch torchvision
18 | 


--------------------------------------------------------------------------------
/shortcut_prune.py:
--------------------------------------------------------------------------------
  1 | from models import *
  2 | from utils.utils import *
  3 | import numpy as np
  4 | from copy import deepcopy
  5 | from test import test
  6 | from terminaltables import AsciiTable
  7 | import time
  8 | from utils.prune_utils import *
  9 | import argparse
 10 | 
 11 | 
 12 | 
 13 | if __name__ == '__main__':
 14 |     parser = argparse.ArgumentParser()
 15 |     parser.add_argument('--cfg', type=str, default='cfg/yolov3.cfg', help='cfg file path')
 16 |     parser.add_argument('--data', type=str, default='data/coco.data', help='*.data file path')
 17 |     parser.add_argument('--weights', type=str, default='weights/last.pt', help='sparse model weights')
 18 |     parser.add_argument('--percent', type=float, default=0.8, help='channel prune percent')
 19 |     opt = parser.parse_args()
 20 |     print(opt)
 21 | 
 22 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 23 |     model = Darknet(opt.cfg).to(device)
 24 | 
 25 |     if opt.weights.endswith(".pt"):
 26 |         model.load_state_dict(torch.load(opt.weights, map_location=device)['model'])
 27 |     else:
 28 |         _ = load_darknet_weights(model, opt.weights)
 29 |     print('\nloaded weights from ',opt.weights)
 30 | 
 31 | 
 32 |     eval_model = lambda model:test(model=model,cfg=opt.cfg, data=opt.data)
 33 |     obtain_num_parameters = lambda model:sum([param.nelement() for param in model.parameters()])
 34 | 
 35 |     print("\nlet's test the original model first:")
 36 |     origin_model_metric = eval_model(model)
 37 |     origin_nparameters = obtain_num_parameters(model)
 38 | 
 39 |     CBL_idx, Conv_idx, prune_idx,shortcut_idx,shortcut_all= parse_module_defs2(model.module_defs)
 40 | 
 41 | 
 42 |     sort_prune_idx=[idx for idx in prune_idx if idx not in shortcut_idx]
 43 | 
 44 |     #将所有要剪枝的BN层的α参数，拷贝到bn_weights列表
 45 |     bn_weights = gather_bn_weights(model.module_list, sort_prune_idx)
 46 | 
 47 |     #torch.sort返回二维列表，第一维是排序后的值列表，第二维是排序后的值列表对应的索引
 48 |     sorted_bn = torch.sort(bn_weights)[0]
 49 | 
 50 | 
 51 |     #避免剪掉所有channel的最高阈值(每个BN层的gamma的最大值的最小值即为阈值上限)
 52 |     highest_thre = []
 53 |     for idx in sort_prune_idx:
 54 |         #.item()可以得到张量里的元素值
 55 |         highest_thre.append(model.module_list[idx][1].weight.data.abs().max().item())
 56 |     highest_thre = min(highest_thre)
 57 | 
 58 |     # 找到highest_thre对应的下标对应的百分比
 59 |     percent_limit = (sorted_bn==highest_thre).nonzero().item()/len(bn_weights)
 60 | 
 61 |     print(f'Threshold should be less than {highest_thre:.4f}.')
 62 |     print(f'The corresponding prune ratio is {percent_limit:.3f}.')
 63 | 
 64 | 
 65 |     def prune_and_eval(model, sorted_bn, percent=.0):
 66 |         model_copy = deepcopy(model)
 67 |         thre_index = int(len(sorted_bn) * percent)
 68 |         #获得α参数的阈值，小于该值的α参数对应的通道，全部裁剪掉
 69 |         thre1 = sorted_bn[thre_index]
 70 | 
 71 |         print(f'Channels with Gamma value less than {thre1:.6f} are pruned!')
 72 | 
 73 |         remain_num = 0
 74 |         idx_new=dict()
 75 |         for idx in prune_idx:
 76 |             
 77 |             if idx not in shortcut_idx:
 78 |                 
 79 |                 bn_module = model_copy.module_list[idx][1]
 80 | 
 81 |                 mask = obtain_bn_mask(bn_module, thre1)
 82 |                 #记录剪枝后，每一层卷积层对应的mask
 83 |                 # idx_new[idx]=mask.cpu().numpy()
 84 |                 idx_new[idx]=mask
 85 |                 remain_num += int(mask.sum())
 86 |                 bn_module.weight.data.mul_(mask)
 87 |                 #bn_module.bias.data.mul_(mask*0.0001)
 88 |             else:
 89 |                 
 90 |                 bn_module = model_copy.module_list[idx][1]
 91 |                
 92 | 
 93 |                 mask=idx_new[shortcut_idx[idx]]
 94 |                 idx_new[idx]=mask
 95 |                 
 96 |      
 97 |                 remain_num += int(mask.sum())
 98 |                 bn_module.weight.data.mul_(mask)
 99 |                 
100 |             #print(int(mask.sum()))
101 | 
102 |         with torch.no_grad():
103 |             mAP = eval_model(model_copy)[0][2]
104 | 
105 |         print(f'Number of channels has been reduced from {len(sorted_bn)} to {remain_num}')
106 |         print(f'Prune ratio: {1-remain_num/len(sorted_bn):.3f}')
107 |         print(f'mAP of the pruned model is {mAP:.4f}')
108 | 
109 |         return thre1
110 | 
111 |     percent = opt.percent
112 |     threshold = prune_and_eval(model, sorted_bn, percent)
113 | 
114 | 
115 | 
116 |     #****************************************************************
117 |     #虽然上面已经能看到剪枝后的效果，但是没有生成剪枝后的模型结构，因此下面的代码是为了生成新的模型结构并拷贝旧模型参数到新模型
118 | 
119 | 
120 | 
121 |     #%%
122 |     def obtain_filters_mask(model, thre, CBL_idx, prune_idx):
123 | 
124 |         pruned = 0
125 |         total = 0
126 |         num_filters = []
127 |         filters_mask = []
128 |         idx_new=dict()
129 |         #CBL_idx存储的是所有带BN的卷积层（YOLO层的前一层卷积层是不带BN的）
130 |         for idx in CBL_idx:
131 |             bn_module = model.module_list[idx][1]
132 |             if idx in prune_idx:
133 |                 if idx not in shortcut_idx:
134 | 
135 |                     mask = obtain_bn_mask(bn_module, thre).cpu().numpy()
136 |                     idx_new[idx]=mask
137 |                     remain = int(mask.sum())
138 |                     pruned = pruned + mask.shape[0] - remain
139 | 
140 |                     # if remain == 0:
141 |                     #     print("Channels would be all pruned!")
142 |                     #     raise Exception
143 | 
144 |                     # print(f'layer index: {idx:>3d} \t total channel: {mask.shape[0]:>4d} \t '
145 |                     #     f'remaining channel: {remain:>4d}')
146 |                 else:
147 |                     mask=idx_new[shortcut_idx[idx]]
148 |                     idx_new[idx]=mask
149 |                     remain= int(mask.sum())
150 |                     pruned = pruned + mask.shape[0] - remain
151 |                     
152 |                 if remain == 0:
153 |                     print("Channels would be all pruned!")
154 |                     raise Exception
155 | 
156 |                 print(f'layer index: {idx:>3d} \t total channel: {mask.shape[0]:>4d} \t '
157 |                         f'remaining channel: {remain:>4d}')
158 |             else:
159 |                 mask = np.ones(bn_module.weight.data.shape)
160 |                 remain = mask.shape[0]
161 | 
162 |             total += mask.shape[0]
163 |             num_filters.append(remain)
164 |             filters_mask.append(mask.copy())
165 | 
166 |         #因此，这里求出的prune_ratio,需要裁剪的α参数/cbl_idx中所有的α参数
167 |         prune_ratio = pruned / total
168 |         print(f'Prune channels: {pruned}\tPrune ratio: {prune_ratio:.3f}')
169 | 
170 |         return num_filters, filters_mask
171 | 
172 |     num_filters, filters_mask = obtain_filters_mask(model, threshold, CBL_idx, prune_idx)
173 | 
174 | 
175 |     #CBLidx2mask存储CBL_idx中，每一层BN层对应的mask
176 |     CBLidx2mask = {idx: mask for idx, mask in zip(CBL_idx, filters_mask)}
177 | 
178 | 
179 |     pruned_model = prune_model_keep_size2(model, prune_idx, CBL_idx, CBLidx2mask)
180 |     print("\nnow prune the model but keep size,(actually add offset of BN beta to next layer), let's see how the mAP goes")
181 | 
182 | 
183 |     eval_model(pruned_model)
184 | 
185 | 
186 | 
187 |     #获得原始模型的module_defs，并修改该defs中的卷积核数量
188 |     compact_module_defs = deepcopy(model.module_defs)
189 |     for idx, num in zip(CBL_idx, num_filters):
190 |         assert compact_module_defs[idx]['type'] == 'convolutional'
191 |         compact_module_defs[idx]['filters'] = str(num)
192 | 
193 | 
194 |     compact_model = Darknet([model.hyperparams.copy()] + compact_module_defs).to(device)
195 |     compact_nparameters = obtain_num_parameters(compact_model)
196 | 
197 |     init_weights_from_loose_model(compact_model, pruned_model, CBL_idx, Conv_idx, CBLidx2mask)
198 | 
199 | 
200 |     random_input = torch.rand((1, 3, 416, 416)).to(device)
201 | 
202 |     def obtain_avg_forward_time(input, model, repeat=200):
203 | 
204 |         model.eval()
205 |         start = time.time()
206 |         with torch.no_grad():
207 |             for i in range(repeat):
208 |                 output = model(input)
209 |         avg_infer_time = (time.time() - start) / repeat
210 | 
211 |         return avg_infer_time, output
212 | 
213 |     pruned_forward_time, pruned_output = obtain_avg_forward_time(random_input, pruned_model)
214 |     compact_forward_time, compact_output = obtain_avg_forward_time(random_input, compact_model)
215 | 
216 | 
217 |     # 在测试集上测试剪枝后的模型, 并统计模型的参数数量
218 |     compact_model_metric = eval_model(compact_model)
219 | 
220 | 
221 |     # 比较剪枝前后参数数量的变化、指标性能的变化
222 |     metric_table = [
223 |         ["Metric", "Before", "After"],
224 |         ["mAP", f'{origin_model_metric[0][2]:.6f}', f'{compact_model_metric[0][2]:.6f}'],
225 |         ["Parameters", f"{origin_nparameters}", f"{compact_nparameters}"],
226 |         ["Inference", f'{pruned_forward_time:.4f}', f'{compact_forward_time:.4f}']
227 |     ]
228 |     print(AsciiTable(metric_table).table)
229 | 
230 | 
231 |     # 生成剪枝后的cfg文件并保存模型
232 |     pruned_cfg_name = opt.cfg.replace('/', f'/prune_{percent}_')
233 |     pruned_cfg_file = write_cfg(pruned_cfg_name, [model.hyperparams.copy()] + compact_module_defs)
234 |     print(f'Config file has been saved: {pruned_cfg_file}')
235 | 
236 |     compact_model_name = opt.weights.replace('/', f'/prune_{percent}_')
237 |     if compact_model_name.endswith('.pt'):
238 |         compact_model_name = compact_model_name.replace('.pt', '.weights')
239 |     save_weights(compact_model, path=compact_model_name)
240 |     print(f'Compact model has been saved: {compact_model_name}')
241 | 
242 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import json
  3 | 
  4 | from torch.utils.data import DataLoader
  5 | 
  6 | from models import *
  7 | from utils.datasets import *
  8 | from utils.utils import *
  9 | 
 10 | 
 11 | def test(cfg,
 12 |          data,
 13 |          weights=None,
 14 |          batch_size=16,
 15 |          img_size=416,
 16 |          iou_thres=0.5,
 17 |          conf_thres=0.001,
 18 |          nms_thres=0.5,
 19 |          save_json=False,
 20 |          model=None):
 21 |     with torch.no_grad():
 22 |         # Initialize/load model and set device
 23 |         if model is None:
 24 |             device = torch_utils.select_device(opt.device)
 25 |             verbose = True
 26 | 
 27 |             # Initialize model
 28 |             model = Darknet(cfg, img_size).to(device)
 29 | 
 30 |             # Load weights
 31 |             attempt_download(weights)
 32 |             if weights.endswith('.pt'):  # pytorch format
 33 |                 model.load_state_dict(torch.load(weights, map_location=device)['model'])
 34 |             else:  # darknet format
 35 |                 _ = load_darknet_weights(model, weights)
 36 | 
 37 |             if torch.cuda.device_count() > 1:
 38 |                 model = nn.DataParallel(model)
 39 |         else:
 40 |             device = next(model.parameters()).device  # get model device
 41 |             verbose = False
 42 | 
 43 |         # Configure run
 44 |         data = parse_data_cfg(data)
 45 |         nc = int(data['classes'])  # number of classes
 46 |         test_path = data['valid']  # path to test images
 47 |         names = load_classes(data['names'])  # class names
 48 | 
 49 |         # Dataloader
 50 |         dataset = LoadImagesAndLabels(test_path, img_size, batch_size)
 51 |         dataloader = DataLoader(dataset,
 52 |                                 batch_size=batch_size,
 53 |                                 num_workers=min([os.cpu_count(), batch_size, 16]),
 54 |                                 pin_memory=True,
 55 |                                 collate_fn=dataset.collate_fn)
 56 | 
 57 |         seen = 0
 58 |         model.eval()
 59 |         coco91class = coco80_to_coco91_class()
 60 |         s = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', 'mAP', 'F1')
 61 |         p, r, f1, mp, mr, map, mf1 = 0., 0., 0., 0., 0., 0., 0.
 62 |         loss = torch.zeros(3)
 63 |         jdict, stats, ap, ap_class = [], [], [], []
 64 |         for batch_i, (imgs, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)):
 65 |             targets = targets.to(device)
 66 |             imgs = imgs.to(device)
 67 |             _, _, height, width = imgs.shape  # batch size, channels, height, width
 68 | 
 69 |             # Plot images with bounding boxes
 70 |             if batch_i == 0 and not os.path.exists('test_batch0.jpg'):
 71 |                 plot_images(imgs=imgs, targets=targets, paths=paths, fname='test_batch0.jpg')
 72 | 
 73 |             # Run model
 74 |             inf_out, train_out = model(imgs)  # inference and training outputs
 75 | 
 76 |             # Compute loss
 77 |             if hasattr(model, 'hyp'):  # if model has loss hyperparameters
 78 |                 loss += compute_loss(train_out, targets, model)[1][:3].cpu()  # GIoU, obj, cls
 79 | 
 80 |             # Run NMS
 81 |             output = non_max_suppression(inf_out, conf_thres=conf_thres, nms_thres=nms_thres)
 82 | 
 83 |             # Statistics per image
 84 |             for si, pred in enumerate(output):
 85 |                 labels = targets[targets[:, 0] == si, 1:]
 86 |                 nl = len(labels)
 87 |                 tcls = labels[:, 0].tolist() if nl else []  # target class
 88 |                 seen += 1
 89 | 
 90 |                 if pred is None:
 91 |                     if nl:
 92 |                         stats.append(([], torch.Tensor(), torch.Tensor(), tcls))
 93 |                     continue
 94 | 
 95 |                 # Append to text file
 96 |                 # with open('test.txt', 'a') as file:
 97 |                 #    [file.write('%11.5g' * 7 % tuple(x) + '\n') for x in pred]
 98 | 
 99 |                 # Append to pycocotools JSON dictionary
100 |                 if save_json:
101 |                     # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...
102 |                     image_id = int(Path(paths[si]).stem.split('_')[-1])
103 |                     box = pred[:, :4].clone()  # xyxy
104 |                     scale_coords(imgs[si].shape[1:], box, shapes[si])  # to original shape
105 |                     box = xyxy2xywh(box)  # xywh
106 |                     box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
107 |                     for di, d in enumerate(pred):
108 |                         jdict.append({'image_id': image_id,
109 |                                       'category_id': coco91class[int(d[6])],
110 |                                       'bbox': [floatn(x, 3) for x in box[di]],
111 |                                       'score': floatn(d[4], 5)})
112 | 
113 |                 # Clip boxes to image bounds
114 |                 clip_coords(pred, (height, width))
115 | 
116 |                 # Assign all predictions as incorrect
117 |                 correct = [0] * len(pred)
118 |                 if nl:
119 |                     detected = []
120 |                     tcls_tensor = labels[:, 0]
121 | 
122 |                     # target boxes
123 |                     tbox = xywh2xyxy(labels[:, 1:5])
124 |                     tbox[:, [0, 2]] *= width
125 |                     tbox[:, [1, 3]] *= height
126 | 
127 |                     # Search for correct predictions
128 |                     for i, (*pbox, pconf, pcls_conf, pcls) in enumerate(pred):
129 | 
130 |                         # Break if all targets already located in image
131 |                         if len(detected) == nl:
132 |                             break
133 | 
134 |                         # Continue if predicted class not among image classes
135 |                         if pcls.item() not in tcls:
136 |                             continue
137 | 
138 |                         # Best iou, index between pred and targets
139 |                         m = (pcls == tcls_tensor).nonzero().view(-1)
140 |                         iou, bi = bbox_iou(pbox, tbox[m]).max(0)
141 | 
142 |                         # If iou > threshold and class is correct mark as correct
143 |                         if iou > iou_thres and m[bi] not in detected:  # and pcls == tcls[bi]:
144 |                             correct[i] = 1
145 |                             detected.append(m[bi])
146 | 
147 |                 # Append statistics (correct, conf, pcls, tcls)
148 |                 stats.append((correct, pred[:, 4].cpu(), pred[:, 6].cpu(), tcls))
149 | 
150 |         # Compute statistics
151 |         stats = [np.concatenate(x, 0) for x in list(zip(*stats))]  # to numpy
152 |         if len(stats):
153 |             p, r, ap, f1, ap_class = ap_per_class(*stats)
154 |             mp, mr, map, mf1 = p.mean(), r.mean(), ap.mean(), f1.mean()
155 |             nt = np.bincount(stats[3].astype(np.int64), minlength=nc)  # number of targets per class
156 |         else:
157 |             nt = torch.zeros(1)
158 | 
159 |         # Print results
160 |         pf = '%20s' + '%10.3g' * 6  # print format
161 |         print(pf % ('all', seen, nt.sum(), mp, mr, map, mf1))
162 | 
163 |         # Print results per class
164 |         if verbose and nc > 1 and len(stats):
165 |             for i, c in enumerate(ap_class):
166 |                 print(pf % (names[c], seen, nt[c], p[i], r[i], ap[i], f1[i]))
167 | 
168 |         # Save JSON
169 |         if save_json and map and len(jdict):
170 |             try:
171 |                 imgIds = [int(Path(x).stem.split('_')[-1]) for x in dataset.img_files]
172 |                 with open('results.json', 'w') as file:
173 |                     json.dump(jdict, file)
174 | 
175 |                 from pycocotools.coco import COCO
176 |                 from pycocotools.cocoeval import COCOeval
177 | 
178 |                 # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
179 |                 cocoGt = COCO('../coco/annotations/instances_val2014.json')  # initialize COCO ground truth api
180 |                 cocoDt = cocoGt.loadRes('results.json')  # initialize COCO pred api
181 | 
182 |                 cocoEval = COCOeval(cocoGt, cocoDt, 'bbox')
183 |                 cocoEval.params.imgIds = imgIds  # [:32]  # only evaluate these images
184 |                 cocoEval.evaluate()
185 |                 cocoEval.accumulate()
186 |                 cocoEval.summarize()
187 |                 map = cocoEval.stats[1]  # update mAP to pycocotools mAP
188 |             except:
189 |                 print('WARNING: missing dependency pycocotools from requirements.txt. Can not compute official COCO mAP.')
190 | 
191 |         # Return results
192 |         maps = np.zeros(nc) + map
193 |         for i, c in enumerate(ap_class):
194 |             maps[c] = ap[i]
195 |         return (mp, mr, map, mf1, *(loss / len(dataloader)).tolist()), maps
196 | 
197 | 
198 | if __name__ == '__main__':
199 |     parser = argparse.ArgumentParser(prog='test.py')
200 |     parser.add_argument('--cfg', type=str, default='cfg/yolov3-spp.cfg', help='cfg file path')
201 |     parser.add_argument('--data', type=str, default='data/coco.data', help='coco.data file path')
202 |     parser.add_argument('--weights', type=str, default='weights/yolov3-spp.weights', help='path to weights file')
203 |     parser.add_argument('--batch-size', type=int, default=16, help='size of each image batch')
204 |     parser.add_argument('--img-size', type=int, default=416, help='inference size (pixels)')
205 |     parser.add_argument('--iou-thres', type=float, default=0.5, help='iou threshold required to qualify as detected')
206 |     parser.add_argument('--conf-thres', type=float, default=0.001, help='object confidence threshold')
207 |     parser.add_argument('--nms-thres', type=float, default=0.5, help='iou threshold for non-maximum suppression')
208 |     parser.add_argument('--save-json', action='store_true', help='save a cocoapi-compatible JSON results file')
209 |     parser.add_argument('--device', default='', help='device id (i.e. 0 or 0,1) or cpu')
210 |     opt = parser.parse_args()
211 |     print(opt)
212 | 
213 |     test(opt.cfg,
214 |          opt.data,
215 |          opt.weights,
216 |          opt.batch_size,
217 |          opt.img_size,
218 |          opt.iou_thres,
219 |          opt.conf_thres,
220 |          opt.nms_thres,
221 |          opt.save_json)
222 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | 
  3 | import torch.distributed as dist
  4 | import torch.optim as optim
  5 | import torch.optim.lr_scheduler as lr_scheduler
  6 | 
  7 | import test  # import test.py to get mAP after each epoch
  8 | from models import *
  9 | from utils.datasets import *
 10 | from utils.utils import *
 11 | from utils.prune_utils import *
 12 | 
 13 | 
 14 | mixed_precision = True
 15 | try:  # Mixed precision training https://github.com/NVIDIA/apex
 16 |     from apex import amp
 17 | except:
 18 |     mixed_precision = False  # not installed
 19 | 
 20 | wdir = 'weights' + os.sep  # weights dir
 21 | last = wdir + 'last.pt'
 22 | best = wdir + 'best.pt'
 23 | results_file = 'results.txt'
 24 | 
 25 | # Hyperparameters (j-series, 50.5 mAP yolov3-320) evolved by @ktian08 https://github.com/ultralytics/yolov3/issues/310
 26 | hyp = {'giou': 1.582,  # giou loss gain
 27 |        'cls': 27.76,  # cls loss gain  (CE=~1.0, uCE=~20)
 28 |        'cls_pw': 1.446,  # cls BCELoss positive_weight
 29 |        'obj': 21.35,  # obj loss gain (*=80 for uBCE with 80 classes)
 30 |        'obj_pw': 3.941,  # obj BCELoss positive_weight
 31 |        'iou_t': 0.2635,  # iou training threshold
 32 |        'lr0': 0.002324,  # initial learning rate (SGD=1E-3, Adam=9E-5)
 33 |        'lrf': -4.,  # final LambdaLR learning rate = lr0 * (10 ** lrf)
 34 |        'momentum': 0.97,  # SGD momentum
 35 |        'weight_decay': 0.0004569,  # optimizer weight decay
 36 |        'fl_gamma': 0.5,  # focal loss gamma
 37 |        'hsv_h': 0.10,  # image HSV-Hue augmentation (fraction)
 38 |        'hsv_s': 0.5703,  # image HSV-Saturation augmentation (fraction)
 39 |        'hsv_v': 0.3174,  # image HSV-Value augmentation (fraction)
 40 |        'degrees': 1.113,  # image rotation (+/- deg)
 41 |        'translate': 0.06797,  # image translation (+/- fraction)
 42 |        'scale': 0.1059,  # image scale (+/- gain)
 43 |        'shear': 0.5768}  # image shear (+/- deg)
 44 | 
 45 | # Overwrite hyp with hyp*.txt (optional)
 46 | f = glob.glob('hyp*.txt')
 47 | if f:
 48 |     for k, v in zip(hyp.keys(), np.loadtxt(f[0])):
 49 |         hyp[k] = v
 50 | 
 51 | 
 52 | def train():
 53 |     cfg = opt.cfg
 54 |     data = opt.data
 55 |     img_size = opt.img_size
 56 |     epochs = 1 if opt.prebias else opt.epochs  # 500200 batches at bs 64, 117263 images = 273 epochs
 57 |     batch_size = opt.batch_size
 58 |     accumulate = opt.accumulate  # effective bs = batch_size * accumulate = 16 * 4 = 64
 59 |     weights = opt.weights  # initial training weights
 60 | 
 61 |     if 'pw' not in opt.arc:  # remove BCELoss positive weights
 62 |         hyp['cls_pw'] = 1.
 63 |         hyp['obj_pw'] = 1.
 64 | 
 65 |     # Initialize
 66 |     init_seeds()
 67 |     multi_scale = opt.multi_scale
 68 | 
 69 |     if multi_scale:
 70 |         img_sz_min = round(img_size / 32 / 1.5) + 1
 71 |         img_sz_max = round(img_size / 32 * 1.5) - 1
 72 |         img_size = img_sz_max * 32  # initiate with maximum multi_scale size
 73 |         print('Using multi-scale %g - %g' % (img_sz_min * 32, img_size))
 74 | 
 75 |     # Configure run
 76 |     data_dict = parse_data_cfg(data)
 77 |     train_path = data_dict['train']
 78 |     nc = int(data_dict['classes'])  # number of classes
 79 | 
 80 |     # Remove previous results
 81 |     for f in glob.glob('*_batch*.jpg') + glob.glob(results_file):
 82 |         os.remove(f)
 83 | 
 84 |     # Initialize model
 85 |     model = Darknet(cfg, arc=opt.arc).to(device)
 86 | 
 87 |     # Optimizer
 88 |     pg0, pg1 = [], []  # optimizer parameter groups
 89 |     for k, v in dict(model.named_parameters()).items():
 90 |         if 'Conv2d.weight' in k:
 91 |             pg1 += [v]  # parameter group 1 (apply weight_decay)
 92 |         else:
 93 |             pg0 += [v]  # parameter group 0
 94 | 
 95 |     if opt.adam:
 96 |         optimizer = optim.Adam(pg0, lr=hyp['lr0'])
 97 |         # optimizer = AdaBound(pg0, lr=hyp['lr0'], final_lr=0.1)
 98 |     else:
 99 |         optimizer = optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True)
100 |     optimizer.add_param_group({'params': pg1, 'weight_decay': hyp['weight_decay']})  # add pg1 with weight_decay
101 |     del pg0, pg1
102 | 
103 |     cutoff = -1  # backbone reaches to cutoff layer
104 |     start_epoch = 0
105 |     best_fitness = 0.
106 |     attempt_download(weights)
107 |     if weights.endswith('.pt'):  # pytorch format
108 |         # possible weights are 'last.pt', 'yolov3-spp.pt', 'yolov3-tiny.pt' etc.
109 |         if opt.bucket:
110 |             os.system('gsutil cp gs://%s/last.pt %s' % (opt.bucket, last))  # download from bucket
111 |         chkpt = torch.load(weights, map_location=device)
112 | 
113 |         # load model
114 |         # if opt.transfer:
115 |         chkpt['model'] = {k: v for k, v in chkpt['model'].items() if model.state_dict()[k].numel() == v.numel()}
116 |         model.load_state_dict(chkpt['model'], strict=False)
117 |         print('loaded weights from', weights)
118 |         # else:
119 |         #    model.load_state_dict(chkpt['model'])
120 | 
121 |         # load optimizer
122 |         if chkpt['optimizer'] is not None:
123 |             optimizer.load_state_dict(chkpt['optimizer'])
124 |             best_fitness = chkpt['best_fitness']
125 | 
126 |         # load results
127 |         if chkpt.get('training_results') is not None:
128 |             with open(results_file, 'w') as file:
129 |                 file.write(chkpt['training_results'])  # write results.txt
130 | 
131 |         start_epoch = chkpt['epoch'] + 1
132 |         del chkpt
133 | 
134 |     # elif weights.endswith('.pth'):
135 |     #     model.load_state_dict(torch.load(weights))
136 | 
137 |     elif len(weights) > 0:  # darknet format
138 |         # possible weights are 'yolov3.weights', 'yolov3-tiny.conv.15',  'darknet53.conv.74' etc.
139 |         cutoff = load_darknet_weights(model, weights)
140 |         print('loaded weights from', weights)
141 | 
142 |     if opt.prune==1:
143 |         print('shortcut sparse training')
144 |         _, _, prune_idx, shortcut_idx, _=parse_module_defs2(model.module_defs)
145 |         prune_idx = [idx for idx in prune_idx if idx not in shortcut_idx]
146 |     elif opt.prune==0:
147 |         print('normal sparse training ')
148 |         _, _, prune_idx= parse_module_defs(model.module_defs)
149 | 
150 | 
151 |     if opt.transfer or opt.prebias:  # transfer learning edge (yolo) layers
152 |         nf = int(model.module_defs[model.yolo_layers[0] - 1]['filters'])  # yolo layer size (i.e. 255)
153 | 
154 |         if opt.prebias:
155 |             for p in optimizer.param_groups:
156 |                 # lower param count allows more aggressive training settings: i.e. SGD ~0.1 lr0, ~0.9 momentum
157 |                 p['lr'] *= 100  # lr gain
158 |                 if p.get('momentum') is not None:  # for SGD but not Adam
159 |                     p['momentum'] *= 0.9
160 | 
161 |         for p in model.parameters():
162 |             if opt.prebias and p.numel() == nf:  # train (yolo biases)
163 |                 p.requires_grad = True
164 |             elif opt.transfer and p.shape[0] == nf:  # train (yolo biases+weights)
165 |                 p.requires_grad = True
166 |             else:  # freeze layer
167 |                 p.requires_grad = False
168 | 
169 |     # Scheduler https://github.com/ultralytics/yolov3/issues/238
170 |     # lf = lambda x: 1 - x / epochs  # linear ramp to zero
171 |     # lf = lambda x: 10 ** (hyp['lrf'] * x / epochs)  # exp ramp
172 |     # lf = lambda x: 1 - 10 ** (hyp['lrf'] * (1 - x / epochs))  # inverse exp ramp
173 |     # scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
174 |     # scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=range(59, 70, 1), gamma=0.8)  # gradual fall to 0.1*lr0
175 |     if opt.sr:
176 |         scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[round(opt.epochs * x) for x in [0.5, 0.6]], gamma=0.1)
177 |     else:
178 |         scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[round(opt.epochs * x) for x in [0.8, 0.9]], gamma=0.1)
179 |     scheduler.last_epoch = start_epoch - 1
180 | 
181 |     # # Plot lr schedule
182 |     # y = []
183 |     # for _ in range(epochs):
184 |     #     scheduler.step()
185 |     #     y.append(optimizer.param_groups[0]['lr'])
186 |     # plt.plot(y, label='LambdaLR')
187 |     # plt.xlabel('epoch')
188 |     # plt.ylabel('LR')
189 |     # plt.tight_layout()
190 |     # plt.savefig('LR.png', dpi=300)
191 | 
192 |     # Mixed precision training https://github.com/NVIDIA/apex
193 |     if mixed_precision:
194 |         model, optimizer = amp.initialize(model, optimizer, opt_level='O1', verbosity=0)
195 | 
196 |     # Initialize distributed training
197 |     if torch.cuda.device_count() > 1:
198 |         dist.init_process_group(backend='nccl',  # 'distributed backend'
199 |                                 init_method='tcp://127.0.0.1:9999',  # distributed training init method
200 |                                 world_size=1,  # number of nodes for distributed training
201 |                                 rank=0)  # distributed training node rank
202 |         model = torch.nn.parallel.DistributedDataParallel(model)
203 |         model.yolo_layers = model.module.yolo_layers  # move yolo layer indices to top level
204 | 
205 |     # Dataset
206 |     dataset = LoadImagesAndLabels(train_path,
207 |                                   img_size,
208 |                                   batch_size,
209 |                                   augment=True,
210 |                                   hyp=hyp,  # augmentation hyperparameters
211 |                                   rect=opt.rect,  # rectangular training
212 |                                   image_weights=opt.img_weights,
213 |                                   cache_labels=True if epochs > 10 else False,
214 |                                   cache_images=False if opt.prebias else opt.cache_images)
215 | 
216 |     # Dataloader
217 |     dataloader = torch.utils.data.DataLoader(dataset,
218 |                                              batch_size=batch_size,
219 |                                              num_workers=min([os.cpu_count(), batch_size, 16]),
220 |                                              shuffle=not opt.rect,  # Shuffle=True unless rectangular training is used
221 |                                              pin_memory=True,
222 |                                              collate_fn=dataset.collate_fn)
223 | 
224 |     # Start training
225 |     model.nc = nc  # attach number of classes to model
226 |     model.arc = opt.arc  # attach yolo architecture
227 |     model.hyp = hyp  # attach hyperparameters to model
228 |     # model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device)  # attach class weights
229 |     torch_utils.model_info(model, report='summary')  # 'full' or 'summary'
230 |     nb = len(dataloader)
231 |     maps = np.zeros(nc)  # mAP per class
232 |     results = (0, 0, 0, 0, 0, 0, 0)  # 'P', 'R', 'mAP', 'F1', 'val GIoU', 'val Objectness', 'val Classification'
233 |     t0 = time.time()
234 |     print('Starting %s for %g epochs...' % ('prebias' if opt.prebias else 'training', epochs))
235 |     for epoch in range(start_epoch, epochs):  # epoch ------------------------------------------------------------------
236 |         model.train()
237 |         print(('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'GIoU', 'obj', 'cls', 'total', 'targets', 'img_size'))
238 | 
239 |         # Freeze backbone at epoch 0, unfreeze at epoch 1 (optional)
240 |         freeze_backbone = False
241 |         if freeze_backbone and epoch < 2:
242 |             for name, p in model.named_parameters():
243 |                 if int(name.split('.')[1]) < cutoff:  # if layer < 75
244 |                     p.requires_grad = False if epoch == 0 else True
245 | 
246 |         # Update image weights (optional)
247 |         if dataset.image_weights:
248 |             w = model.class_weights.cpu().numpy() * (1 - maps) ** 2  # class weights
249 |             image_weights = labels_to_image_weights(dataset.labels, nc=nc, class_weights=w)
250 |             dataset.indices = random.choices(range(dataset.n), weights=image_weights, k=dataset.n)  # rand weighted idx
251 | 
252 |         mloss = torch.zeros(4).to(device)  # mean losses
253 |         pbar = tqdm(enumerate(dataloader), total=nb)  # progress bar
254 |         sr_flag = get_sr_flag(epoch, opt.sr)
255 |         for i, (imgs, targets, paths, _) in pbar:  # batch -------------------------------------------------------------
256 |             ni = i + nb * epoch  # number integrated batches (since train start)
257 |             imgs = imgs.to(device)
258 |             targets = targets.to(device)
259 | 
260 |             # Multi-Scale training
261 |             if multi_scale:
262 |                 if ni / accumulate % 10 == 0:  #  adjust (67% - 150%) every 10 batches
263 |                     img_size = random.randrange(img_sz_min, img_sz_max + 1) * 32
264 |                 sf = img_size / max(imgs.shape[2:])  # scale factor
265 |                 if sf != 1:
266 |                     ns = [math.ceil(x * sf / 32.) * 32 for x in imgs.shape[2:]]  # new shape (stretched to 32-multiple)
267 |                     imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False)
268 | 
269 |             # Plot images with bounding boxes
270 |             if ni == 0:
271 |                 fname = 'train_batch%g.jpg' % i
272 |                 plot_images(imgs=imgs, targets=targets, paths=paths, fname=fname)
273 |                 if tb_writer:
274 |                     tb_writer.add_image(fname, cv2.imread(fname)[:, :, ::-1], dataformats='HWC')
275 | 
276 |             # Hyperparameter burn-in
277 |             # n_burn = nb - 1  # min(nb // 5 + 1, 1000)  # number of burn-in batches
278 |             # if ni <= n_burn:
279 |             #     for m in model.named_modules():
280 |             #         if m[0].endswith('BatchNorm2d'):
281 |             #             m[1].momentum = 1 - i / n_burn * 0.99  # BatchNorm2d momentum falls from 1 - 0.01
282 |             #     g = (i / n_burn) ** 4  # gain rises from 0 - 1
283 |             #     for x in optimizer.param_groups:
284 |             #         x['lr'] = hyp['lr0'] * g
285 |             #         x['weight_decay'] = hyp['weight_decay'] * g
286 | 
287 |             # Run model
288 |             pred = model(imgs)
289 | 
290 |             # Compute loss
291 |             loss, loss_items = compute_loss(pred, targets, model)
292 |             if not torch.isfinite(loss):
293 |                 print('WARNING: non-finite loss, ending training ', loss_items)
294 |                 return results
295 | 
296 |             # Scale loss by nominal batch_size of 64
297 |             loss *= batch_size / 64
298 | 
299 |             # Compute gradient
300 |             if mixed_precision:
301 |                 with amp.scale_loss(loss, optimizer) as scaled_loss:
302 |                     scaled_loss.backward()
303 |             else:
304 |                 loss.backward()
305 | 
306 |             BNOptimizer.updateBN(sr_flag, model.module_list, opt.s, prune_idx)
307 | 
308 |             # Accumulate gradient for x batches before optimizing
309 |             if ni % accumulate == 0:
310 |                 optimizer.step()
311 |                 optimizer.zero_grad()
312 | 
313 |             # Print batch results
314 |             mloss = (mloss * i + loss_items) / (i + 1)  # update mean losses
315 |             mem = torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available() else 0  # (GB)
316 |             s = ('%10s' * 2 + '%10.3g' * 6) % (
317 |                 '%g/%g' % (epoch, epochs - 1), '%.3gG' % mem, *mloss, len(targets), img_size)
318 |             pbar.set_description(s)
319 | 
320 |             # end batch ------------------------------------------------------------------------------------------------
321 | 
322 |         # Update scheduler
323 |         scheduler.step()
324 |         print('learning rate:',optimizer.param_groups[0]['lr'])
325 | 
326 | 
327 |         # Process epoch results
328 |         final_epoch = epoch + 1 == epochs
329 |         if opt.prebias:
330 |             print_model_biases(model)
331 |         else:
332 |             # Calculate mAP (always test final epoch, skip first 10 if opt.nosave)
333 |             if not (opt.notest or (opt.nosave and epoch < 10)) or final_epoch:
334 |                 with torch.no_grad():
335 |                     results, maps = test.test(cfg,
336 |                                               data,
337 |                                               batch_size=batch_size,
338 |                                               img_size=opt.img_size,
339 |                                               model=model,
340 |                                               conf_thres=0.001 if final_epoch and epoch > 0 else 0.1,  # 0.1 for speed
341 |                                               save_json=final_epoch and epoch > 0 and 'coco.data' in data)
342 | 
343 |         # Write epoch results
344 |         with open(results_file, 'a') as f:
345 |             f.write(s + '%10.3g' * 7 % results + '\n')  # P, R, mAP, F1, test_losses=(GIoU, obj, cls)
346 | 
347 |         # Write Tensorboard results
348 |         if tb_writer:
349 |             x = list(mloss) + list(results)
350 |             titles = ['GIoU', 'Objectness', 'Classification', 'Train loss',
351 |                       'Precision', 'Recall', 'mAP', 'F1', 'val GIoU', 'val Objectness', 'val Classification']
352 |             for xi, title in zip(x, titles):
353 |                 tb_writer.add_scalar(title, xi, epoch)
354 |             bn_weights = gather_bn_weights(model.module_list, prune_idx)
355 |             tb_writer.add_histogram('bn_weights/hist', bn_weights.numpy(), epoch, bins='doane')
356 | 
357 |         # Update best mAP
358 |         fitness = results[2]  # mAP
359 |         if fitness > best_fitness:
360 |             best_fitness = fitness
361 | 
362 |         # Save training results
363 |         save = (not opt.nosave) or (final_epoch and not opt.evolve) or opt.prebias
364 |         if save:
365 |             with open(results_file, 'r') as f:
366 |                 # Create checkpoint
367 |                 chkpt = {'epoch': epoch,
368 |                          'best_fitness': best_fitness,
369 |                          'training_results': f.read(),
370 |                          'model': model.module.state_dict() if type(
371 |                              model) is nn.parallel.DistributedDataParallel else model.state_dict(),
372 |                          'optimizer': None if final_epoch else optimizer.state_dict()}
373 | 
374 |             # Save last checkpoint
375 |             torch.save(chkpt, last)
376 |             if opt.bucket and not opt.prebias:
377 |                 os.system('gsutil cp %s gs://%s' % (last, opt.bucket))  # upload to bucket
378 | 
379 |             # Save best checkpoint
380 |             if best_fitness == fitness:
381 |                 torch.save(chkpt, best)
382 | 
383 |             # Save backup every 10 epochs (optional)
384 |             if epoch > 0 and epoch % 5 == 0:
385 |                 torch.save(chkpt, wdir + 'backup%g.pt' % epoch)
386 | 
387 |             # Delete checkpoint
388 |             del chkpt            
389 | 
390 | 
391 |         # end epoch ----------------------------------------------------------------------------------------------------
392 | 
393 |     # end training
394 |     if len(opt.name):
395 |         os.rename('results.txt', 'results_%s.txt' % opt.name)
396 |     plot_results()  # save as results.png
397 |     print('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600))
398 |     dist.destroy_process_group() if torch.cuda.device_count() > 1 else None
399 |     torch.cuda.empty_cache()
400 |     return results
401 | 
402 | 
403 | def prebias():
404 |     # trains output bias layers for 1 epoch and creates new backbone
405 |     if opt.prebias:
406 |         train()  # transfer-learn yolo biases for 1 epoch
407 |         create_backbone(last)  # saved results as backbone.pt
408 |         opt.weights = wdir + 'backbone.pt'  # assign backbone
409 |         opt.prebias = False  # disable prebias
410 | 
411 | 
412 | if __name__ == '__main__':
413 |     parser = argparse.ArgumentParser()
414 |     parser.add_argument('--epochs', type=int, default=273)  # 500200 batches at bs 16, 117263 images = 273 epochs
415 |     parser.add_argument('--batch-size', type=int, default=32)  # effective bs = batch_size * accumulate = 16 * 4 = 64
416 |     parser.add_argument('--accumulate', type=int, default=2, help='batches to accumulate before optimizing')
417 |     parser.add_argument('--cfg', type=str, default='cfg/yolov3-spp.cfg', help='cfg file path')
418 |     parser.add_argument('--data', type=str, default='data/coco.data', help='*.data file path')
419 |     parser.add_argument('--multi-scale', action='store_true', help='adjust (67% - 150%) img_size every 10 batches')
420 |     parser.add_argument('--img-size', type=int, default=416, help='inference size (pixels)')
421 |     parser.add_argument('--rect', action='store_true', help='rectangular training')
422 |     parser.add_argument('--resume', action='store_true', help='resume training from last.pt')
423 |     parser.add_argument('--transfer', action='store_true', help='transfer learning')
424 |     parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
425 |     parser.add_argument('--notest', action='store_true', help='only test final epoch')
426 |     parser.add_argument('--evolve', action='store_true', help='evolve hyperparameters')
427 |     parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')
428 |     parser.add_argument('--img-weights', action='store_true', help='select training images by weight')
429 |     parser.add_argument('--cache-images', action='store_true', help='cache images for faster training')
430 |     parser.add_argument('--weights', type=str, default='', help='initial weights')  # i.e. weights/darknet.53.conv.74
431 |     parser.add_argument('--arc', type=str, default='defaultpw', help='yolo architecture')  # defaultpw, uCE, uBCE
432 |     parser.add_argument('--prebias', action='store_true', help='transfer-learn yolo biases prior to training')
433 |     parser.add_argument('--name', default='', help='renames results.txt to results_name.txt if supplied')
434 |     parser.add_argument('--device', default='', help='device id (i.e. 0 or 0,1) or cpu')
435 |     parser.add_argument('--adam', action='store_true', help='use adam optimizer')
436 |     parser.add_argument('--var', type=float, help='debug variable')
437 |     parser.add_argument('--sparsity-regularization', '-sr', dest='sr', action='store_true',
438 |                         help='train with channel sparsity regularization')
439 |     parser.add_argument('--s', type=float, default=0.001, help='scale sparse rate')
440 |     parser.add_argument('--prune', type=int, default=0, help='0:nomal prune 1:shortcut prune ')
441 |     opt = parser.parse_args()
442 |     opt.weights = last if opt.resume else opt.weights
443 |     print(opt)
444 |     device = torch_utils.select_device(opt.device, apex=mixed_precision)
445 | 
446 |     tb_writer = None
447 |     if not opt.evolve:  # Train normally
448 |         # try:
449 |             # Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/
450 |         from torch.utils.tensorboard import SummaryWriter
451 | 
452 |         tb_writer = SummaryWriter()
453 |         # except:
454 |         #     pass
455 | 
456 |         prebias()  # optional
457 |         train()  # train normally
458 | 
459 |     else:  # Evolve hyperparameters (optional)
460 |         opt.notest = True  # only test final epoch
461 |         opt.nosave = True  # only save final checkpoint
462 |         if opt.bucket:
463 |             os.system('gsutil cp gs://%s/evolve.txt .' % opt.bucket)  # download evolve.txt if exists
464 | 
465 |         for _ in range(1):  # generations to evolve
466 |             if os.path.exists('evolve.txt'):  # if evolve.txt exists: select best hyps and mutate
467 |                 # Select parent(s)
468 |                 x = np.loadtxt('evolve.txt', ndmin=2)
469 |                 parent = 'weighted'  # parent selection method: 'single' or 'weighted'
470 |                 if parent == 'single' or len(x) == 1:
471 |                     x = x[fitness(x).argmax()]
472 |                 elif parent == 'weighted':  # weighted combination
473 |                     n = min(10, x.shape[0])  # number to merge
474 |                     x = x[np.argsort(-fitness(x))][:n]  # top n mutations
475 |                     w = fitness(x) - fitness(x).min()  # weights
476 |                     x = (x[:n] * w.reshape(n, 1)).sum(0) / w.sum()  # new parent
477 |                 for i, k in enumerate(hyp.keys()):
478 |                     hyp[k] = x[i + 7]
479 | 
480 |                 # Mutate
481 |                 np.random.seed(int(time.time()))
482 |                 s = [.2, .2, .2, .2, .2, .2, .2, .0, .02, .2, .2, .2, .2, .2, .2, .2, .2]  # sigmas
483 |                 for i, k in enumerate(hyp.keys()):
484 |                     x = (np.random.randn(1) * s[i] + 1) ** 2.0  # plt.hist(x.ravel(), 300)
485 |                     hyp[k] *= float(x)  # vary by sigmas
486 | 
487 |             # Clip to limits
488 |             keys = ['lr0', 'iou_t', 'momentum', 'weight_decay', 'hsv_s', 'hsv_v', 'translate', 'scale', 'fl_gamma']
489 |             limits = [(1e-5, 1e-2), (0.00, 0.70), (0.60, 0.98), (0, 0.001), (0, .9), (0, .9), (0, .9), (0, .9), (0, 3)]
490 |             for k, v in zip(keys, limits):
491 |                 hyp[k] = np.clip(hyp[k], v[0], v[1])
492 | 
493 |             # Train mutation
494 |             prebias()
495 |             results = train()
496 | 
497 |             # Write mutation results
498 |             print_mutation(hyp, results, opt.bucket)
499 | 
500 |             # Plot results
501 |             # plot_evolution_results(hyp)
502 | 


--------------------------------------------------------------------------------
/utils/adabound.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | 
  3 | import torch
  4 | from torch.optim import Optimizer
  5 | 
  6 | 
  7 | class AdaBound(Optimizer):
  8 |     """Implements AdaBound algorithm.
  9 |     It has been proposed in `Adaptive Gradient Methods with Dynamic Bound of Learning Rate`_.
 10 |     Arguments:
 11 |         params (iterable): iterable of parameters to optimize or dicts defining
 12 |             parameter groups
 13 |         lr (float, optional): Adam learning rate (default: 1e-3)
 14 |         betas (Tuple[float, float], optional): coefficients used for computing
 15 |             running averages of gradient and its square (default: (0.9, 0.999))
 16 |         final_lr (float, optional): final (SGD) learning rate (default: 0.1)
 17 |         gamma (float, optional): convergence speed of the bound functions (default: 1e-3)
 18 |         eps (float, optional): term added to the denominator to improve
 19 |             numerical stability (default: 1e-8)
 20 |         weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
 21 |         amsbound (boolean, optional): whether to use the AMSBound variant of this algorithm
 22 |     .. Adaptive Gradient Methods with Dynamic Bound of Learning Rate:
 23 |         https://openreview.net/forum?id=Bkg3g2R9FX
 24 |     """
 25 | 
 26 |     def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), final_lr=0.1, gamma=1e-3,
 27 |                  eps=1e-8, weight_decay=0, amsbound=False):
 28 |         if not 0.0 <= lr:
 29 |             raise ValueError("Invalid learning rate: {}".format(lr))
 30 |         if not 0.0 <= eps:
 31 |             raise ValueError("Invalid epsilon value: {}".format(eps))
 32 |         if not 0.0 <= betas[0] < 1.0:
 33 |             raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0]))
 34 |         if not 0.0 <= betas[1] < 1.0:
 35 |             raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1]))
 36 |         if not 0.0 <= final_lr:
 37 |             raise ValueError("Invalid final learning rate: {}".format(final_lr))
 38 |         if not 0.0 <= gamma < 1.0:
 39 |             raise ValueError("Invalid gamma parameter: {}".format(gamma))
 40 |         defaults = dict(lr=lr, betas=betas, final_lr=final_lr, gamma=gamma, eps=eps,
 41 |                         weight_decay=weight_decay, amsbound=amsbound)
 42 |         super(AdaBound, self).__init__(params, defaults)
 43 | 
 44 |         self.base_lrs = list(map(lambda group: group['lr'], self.param_groups))
 45 | 
 46 |     def __setstate__(self, state):
 47 |         super(AdaBound, self).__setstate__(state)
 48 |         for group in self.param_groups:
 49 |             group.setdefault('amsbound', False)
 50 | 
 51 |     def step(self, closure=None):
 52 |         """Performs a single optimization step.
 53 |         Arguments:
 54 |             closure (callable, optional): A closure that reevaluates the model
 55 |                 and returns the loss.
 56 |         """
 57 |         loss = None
 58 |         if closure is not None:
 59 |             loss = closure()
 60 | 
 61 |         for group, base_lr in zip(self.param_groups, self.base_lrs):
 62 |             for p in group['params']:
 63 |                 if p.grad is None:
 64 |                     continue
 65 |                 grad = p.grad.data
 66 |                 if grad.is_sparse:
 67 |                     raise RuntimeError(
 68 |                         'Adam does not support sparse gradients, please consider SparseAdam instead')
 69 |                 amsbound = group['amsbound']
 70 | 
 71 |                 state = self.state[p]
 72 | 
 73 |                 # State initialization
 74 |                 if len(state) == 0:
 75 |                     state['step'] = 0
 76 |                     # Exponential moving average of gradient values
 77 |                     state['exp_avg'] = torch.zeros_like(p.data)
 78 |                     # Exponential moving average of squared gradient values
 79 |                     state['exp_avg_sq'] = torch.zeros_like(p.data)
 80 |                     if amsbound:
 81 |                         # Maintains max of all exp. moving avg. of sq. grad. values
 82 |                         state['max_exp_avg_sq'] = torch.zeros_like(p.data)
 83 | 
 84 |                 exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
 85 |                 if amsbound:
 86 |                     max_exp_avg_sq = state['max_exp_avg_sq']
 87 |                 beta1, beta2 = group['betas']
 88 | 
 89 |                 state['step'] += 1
 90 | 
 91 |                 if group['weight_decay'] != 0:
 92 |                     grad = grad.add(group['weight_decay'], p.data)
 93 | 
 94 |                 # Decay the first and second moment running average coefficient
 95 |                 exp_avg.mul_(beta1).add_(1 - beta1, grad)
 96 |                 exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
 97 |                 if amsbound:
 98 |                     # Maintains the maximum of all 2nd moment running avg. till now
 99 |                     torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq)
100 |                     # Use the max. for normalizing running avg. of gradient
101 |                     denom = max_exp_avg_sq.sqrt().add_(group['eps'])
102 |                 else:
103 |                     denom = exp_avg_sq.sqrt().add_(group['eps'])
104 | 
105 |                 bias_correction1 = 1 - beta1 ** state['step']
106 |                 bias_correction2 = 1 - beta2 ** state['step']
107 |                 step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1
108 | 
109 |                 # Applies bounds on actual learning rate
110 |                 # lr_scheduler cannot affect final_lr, this is a workaround to apply lr decay
111 |                 final_lr = group['final_lr'] * group['lr'] / base_lr
112 |                 lower_bound = final_lr * (1 - 1 / (group['gamma'] * state['step'] + 1))
113 |                 upper_bound = final_lr * (1 + 1 / (group['gamma'] * state['step']))
114 |                 step_size = torch.full_like(denom, step_size)
115 |                 step_size.div_(denom).clamp_(lower_bound, upper_bound).mul_(exp_avg)
116 | 
117 |                 p.data.add_(-step_size)
118 | 
119 |         return loss
120 | 
121 | 
122 | class AdaBoundW(Optimizer):
123 |     """Implements AdaBound algorithm with Decoupled Weight Decay (arxiv.org/abs/1711.05101)
124 |     It has been proposed in `Adaptive Gradient Methods with Dynamic Bound of Learning Rate`_.
125 |     Arguments:
126 |         params (iterable): iterable of parameters to optimize or dicts defining
127 |             parameter groups
128 |         lr (float, optional): Adam learning rate (default: 1e-3)
129 |         betas (Tuple[float, float], optional): coefficients used for computing
130 |             running averages of gradient and its square (default: (0.9, 0.999))
131 |         final_lr (float, optional): final (SGD) learning rate (default: 0.1)
132 |         gamma (float, optional): convergence speed of the bound functions (default: 1e-3)
133 |         eps (float, optional): term added to the denominator to improve
134 |             numerical stability (default: 1e-8)
135 |         weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
136 |         amsbound (boolean, optional): whether to use the AMSBound variant of this algorithm
137 |     .. Adaptive Gradient Methods with Dynamic Bound of Learning Rate:
138 |         https://openreview.net/forum?id=Bkg3g2R9FX
139 |     """
140 | 
141 |     def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), final_lr=0.1, gamma=1e-3,
142 |                  eps=1e-8, weight_decay=0, amsbound=False):
143 |         if not 0.0 <= lr:
144 |             raise ValueError("Invalid learning rate: {}".format(lr))
145 |         if not 0.0 <= eps:
146 |             raise ValueError("Invalid epsilon value: {}".format(eps))
147 |         if not 0.0 <= betas[0] < 1.0:
148 |             raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0]))
149 |         if not 0.0 <= betas[1] < 1.0:
150 |             raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1]))
151 |         if not 0.0 <= final_lr:
152 |             raise ValueError("Invalid final learning rate: {}".format(final_lr))
153 |         if not 0.0 <= gamma < 1.0:
154 |             raise ValueError("Invalid gamma parameter: {}".format(gamma))
155 |         defaults = dict(lr=lr, betas=betas, final_lr=final_lr, gamma=gamma, eps=eps,
156 |                         weight_decay=weight_decay, amsbound=amsbound)
157 |         super(AdaBoundW, self).__init__(params, defaults)
158 | 
159 |         self.base_lrs = list(map(lambda group: group['lr'], self.param_groups))
160 | 
161 |     def __setstate__(self, state):
162 |         super(AdaBoundW, self).__setstate__(state)
163 |         for group in self.param_groups:
164 |             group.setdefault('amsbound', False)
165 | 
166 |     def step(self, closure=None):
167 |         """Performs a single optimization step.
168 |         Arguments:
169 |             closure (callable, optional): A closure that reevaluates the model
170 |                 and returns the loss.
171 |         """
172 |         loss = None
173 |         if closure is not None:
174 |             loss = closure()
175 | 
176 |         for group, base_lr in zip(self.param_groups, self.base_lrs):
177 |             for p in group['params']:
178 |                 if p.grad is None:
179 |                     continue
180 |                 grad = p.grad.data
181 |                 if grad.is_sparse:
182 |                     raise RuntimeError(
183 |                         'Adam does not support sparse gradients, please consider SparseAdam instead')
184 |                 amsbound = group['amsbound']
185 | 
186 |                 state = self.state[p]
187 | 
188 |                 # State initialization
189 |                 if len(state) == 0:
190 |                     state['step'] = 0
191 |                     # Exponential moving average of gradient values
192 |                     state['exp_avg'] = torch.zeros_like(p.data)
193 |                     # Exponential moving average of squared gradient values
194 |                     state['exp_avg_sq'] = torch.zeros_like(p.data)
195 |                     if amsbound:
196 |                         # Maintains max of all exp. moving avg. of sq. grad. values
197 |                         state['max_exp_avg_sq'] = torch.zeros_like(p.data)
198 | 
199 |                 exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
200 |                 if amsbound:
201 |                     max_exp_avg_sq = state['max_exp_avg_sq']
202 |                 beta1, beta2 = group['betas']
203 | 
204 |                 state['step'] += 1
205 | 
206 |                 # Decay the first and second moment running average coefficient
207 |                 exp_avg.mul_(beta1).add_(1 - beta1, grad)
208 |                 exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
209 |                 if amsbound:
210 |                     # Maintains the maximum of all 2nd moment running avg. till now
211 |                     torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq)
212 |                     # Use the max. for normalizing running avg. of gradient
213 |                     denom = max_exp_avg_sq.sqrt().add_(group['eps'])
214 |                 else:
215 |                     denom = exp_avg_sq.sqrt().add_(group['eps'])
216 | 
217 |                 bias_correction1 = 1 - beta1 ** state['step']
218 |                 bias_correction2 = 1 - beta2 ** state['step']
219 |                 step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1
220 | 
221 |                 # Applies bounds on actual learning rate
222 |                 # lr_scheduler cannot affect final_lr, this is a workaround to apply lr decay
223 |                 final_lr = group['final_lr'] * group['lr'] / base_lr
224 |                 lower_bound = final_lr * (1 - 1 / (group['gamma'] * state['step'] + 1))
225 |                 upper_bound = final_lr * (1 + 1 / (group['gamma'] * state['step']))
226 |                 step_size = torch.full_like(denom, step_size)
227 |                 step_size.div_(denom).clamp_(lower_bound, upper_bound).mul_(exp_avg)
228 | 
229 |                 if group['weight_decay'] != 0:
230 |                     decayed_weights = torch.mul(p.data, group['weight_decay'])
231 |                     p.data.add_(-step_size)
232 |                     p.data.sub_(decayed_weights)
233 |                 else:
234 |                     p.data.add_(-step_size)
235 | 
236 |         return loss
237 | 


--------------------------------------------------------------------------------
/utils/google_utils.py:
--------------------------------------------------------------------------------
 1 | # This file contains google utils: https://cloud.google.com/storage/docs/reference/libraries
 2 | # pip install --upgrade google-cloud-storage
 3 | 
 4 | import os
 5 | import time
 6 | 
 7 | 
 8 | # from google.cloud import storage
 9 | 
10 | 
11 | def gdrive_download(id='1HaXkef9z6y5l4vUnCYgdmEAj61c6bfWO', name='coco.zip'):
12 |     # https://gist.github.com/tanaikech/f0f2d122e05bf5f971611258c22c110f
13 |     # Downloads a file from Google Drive, accepting presented query
14 |     # from utils.google_utils import *; gdrive_download()
15 |     t = time.time()
16 | 
17 |     print('Downloading https://drive.google.com/uc?export=download&id=%s as %s... ' % (id, name), end='')
18 |     if os.path.exists(name):  # remove existing
19 |         os.remove(name)
20 | 
21 |     # Attempt large file download
22 |     s = ["curl -c ./cookie -s -L \"https://drive.google.com/uc?export=download&id=%s\" > /dev/null" % id,
23 |          "curl -Lb ./cookie -s \"https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=%s\" -o %s" % (
24 |              id, name),
25 |          'rm ./cookie']
26 |     [os.system(x) for x in s]  # run commands
27 | 
28 |     # Attempt small file download
29 |     if not os.path.exists(name):  # file size < 40MB
30 |         s = 'curl -f -L -o %s https://drive.google.com/uc?export=download&id=%s' % (name, id)
31 |         os.system(s)
32 | 
33 |     # Unzip if archive
34 |     if name.endswith('.zip'):
35 |         print('unzipping... ', end='')
36 |         os.system('unzip -q %s' % name)  # unzip
37 |         os.remove(name)  # remove zip to free space
38 | 
39 |     print('Done (%.1fs)' % (time.time() - t))
40 | 
41 | 
42 | def upload_blob(bucket_name, source_file_name, destination_blob_name):
43 |     # Uploads a file to a bucket
44 |     # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python
45 | 
46 |     storage_client = storage.Client()
47 |     bucket = storage_client.get_bucket(bucket_name)
48 |     blob = bucket.blob(destination_blob_name)
49 | 
50 |     blob.upload_from_filename(source_file_name)
51 | 
52 |     print('File {} uploaded to {}.'.format(
53 |         source_file_name,
54 |         destination_blob_name))
55 | 
56 | 
57 | def download_blob(bucket_name, source_blob_name, destination_file_name):
58 |     # Uploads a blob from a bucket
59 |     storage_client = storage.Client()
60 |     bucket = storage_client.get_bucket(bucket_name)
61 |     blob = bucket.blob(source_blob_name)
62 | 
63 |     blob.download_to_filename(destination_file_name)
64 | 
65 |     print('Blob {} downloaded to {}.'.format(
66 |         source_blob_name,
67 |         destination_file_name))
68 | 


--------------------------------------------------------------------------------
/utils/parse_config.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | def parse_model_cfg(path):
 5 |     # Parses the yolo-v3 layer configuration file and returns module definitions
 6 |     file = open(path, 'r')
 7 |     lines = file.read().split('\n')
 8 |     lines = [x for x in lines if x and not x.startswith('#')]
 9 |     lines = [x.rstrip().lstrip() for x in lines]  # get rid of fringe whitespaces
10 |     mdefs = []  # module definitions
11 |     for line in lines:
12 |         if line.startswith('['):  # This marks the start of a new block
13 |             mdefs.append({})
14 |             mdefs[-1]['type'] = line[1:-1].rstrip()
15 |             if mdefs[-1]['type'] == 'convolutional':
16 |                 mdefs[-1]['batch_normalize'] = 0  # pre-populate with zeros (may be overwritten later)
17 |         else:
18 |             key, val = line.split("=")
19 |             key = key.rstrip()
20 | 
21 |             if 'anchors' in key:
22 |                 mdefs[-1][key] = np.array([float(x) for x in val.split(',')]).reshape((-1, 2))  # np anchors
23 |             else:
24 |                 mdefs[-1][key] = val.strip()
25 | 
26 |     return mdefs
27 | 
28 | 
29 | def parse_data_cfg(path):
30 |     # Parses the data configuration file
31 |     options = dict()
32 |     with open(path, 'r') as fp:
33 |         lines = fp.readlines()
34 | 
35 |     for line in lines:
36 |         line = line.strip()
37 |         if line == '' or line.startswith('#'):
38 |             continue
39 |         key, val = line.split('=')
40 |         options[key.strip()] = val.strip()
41 | 
42 |     return options
43 | 
44 | 


--------------------------------------------------------------------------------
/utils/prune_utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from terminaltables import AsciiTable
  3 | from copy import deepcopy
  4 | import numpy as np
  5 | import torch.nn.functional as F
  6 | 
  7 | 
  8 | def get_sr_flag(epoch, sr):
  9 |     # return epoch >= 5 and sr
 10 |     return sr
 11 | 
 12 | 
 13 | def parse_module_defs(module_defs):
 14 | 
 15 |     CBL_idx = []
 16 |     Conv_idx = []
 17 |     for i, module_def in enumerate(module_defs):
 18 |         if module_def['type'] == 'convolutional':
 19 |             if module_def['batch_normalize'] == '1':
 20 |                 CBL_idx.append(i)
 21 |             else:
 22 |                 Conv_idx.append(i)
 23 | 
 24 |     ignore_idx = set()
 25 |     for i, module_def in enumerate(module_defs):
 26 |         if module_def['type'] == 'shortcut':
 27 |             ignore_idx.add(i-1)
 28 |             identity_idx = (i + int(module_def['from']))
 29 |             if module_defs[identity_idx]['type'] == 'convolutional':
 30 |                 ignore_idx.add(identity_idx)
 31 |             elif module_defs[identity_idx]['type'] == 'shortcut':
 32 |                 ignore_idx.add(identity_idx - 1)
 33 | 
 34 |     ignore_idx.add(84)
 35 |     ignore_idx.add(96)
 36 | 
 37 |     prune_idx = [idx for idx in CBL_idx if idx not in ignore_idx]
 38 | 
 39 |     return CBL_idx, Conv_idx, prune_idx
 40 | 
 41 | 
 42 | def parse_module_defs2(module_defs):
 43 | 
 44 |     CBL_idx = []
 45 |     Conv_idx = []
 46 |     shortcut_idx=dict()
 47 |     shortcut_all=set()
 48 |     for i, module_def in enumerate(module_defs):
 49 |         if module_def['type'] == 'convolutional':
 50 |             if module_def['batch_normalize'] == '1':
 51 |                 CBL_idx.append(i)
 52 |             else:
 53 |                 Conv_idx.append(i)
 54 | 
 55 |     ignore_idx = set()
 56 |     for i, module_def in enumerate(module_defs):
 57 |         if module_def['type'] == 'shortcut':
 58 |             identity_idx = (i + int(module_def['from']))
 59 |             if module_defs[identity_idx]['type'] == 'convolutional':
 60 |                 
 61 |                 #ignore_idx.add(identity_idx)
 62 |                 shortcut_idx[i-1]=identity_idx
 63 |                 shortcut_all.add(identity_idx)
 64 |             elif module_defs[identity_idx]['type'] == 'shortcut':
 65 |                 
 66 |                 #ignore_idx.add(identity_idx - 1)
 67 |                 shortcut_idx[i-1]=identity_idx-1
 68 |                 shortcut_all.add(identity_idx-1)
 69 |             shortcut_all.add(i-1)
 70 |     #上采样层前的卷积层不裁剪
 71 |     ignore_idx.add(84)
 72 |     ignore_idx.add(96)
 73 | 
 74 |     prune_idx = [idx for idx in CBL_idx if idx not in ignore_idx]
 75 | 
 76 |     return CBL_idx, Conv_idx, prune_idx,shortcut_idx,shortcut_all
 77 | 
 78 | 
 79 |     
 80 | def gather_bn_weights(module_list, prune_idx):
 81 | 
 82 |     size_list = [module_list[idx][1].weight.data.shape[0] for idx in prune_idx]
 83 | 
 84 |     bn_weights = torch.zeros(sum(size_list))
 85 |     index = 0
 86 |     for idx, size in zip(prune_idx, size_list):
 87 |         bn_weights[index:(index + size)] = module_list[idx][1].weight.data.abs().clone()
 88 |         index += size
 89 | 
 90 |     return bn_weights
 91 | 
 92 | 
 93 | def write_cfg(cfg_file, module_defs):
 94 | 
 95 |     with open(cfg_file, 'w') as f:
 96 |         for module_def in module_defs:
 97 |             f.write(f"[{module_def['type']}]\n")
 98 |             for key, value in module_def.items():
 99 |                 if key == 'batch_normalize' and value == 0:
100 |                     continue
101 | 
102 |                 if key != 'type':
103 |                     if key == 'anchors':
104 |                         value = ', '.join(','.join(str(int(i)) for i in j) for j in value)
105 |                     f.write(f"{key}={value}\n")
106 |             f.write("\n")
107 |     return cfg_file
108 | 
109 | 
110 | class BNOptimizer():
111 | 
112 |     @staticmethod
113 |     def updateBN(sr_flag, module_list, s, prune_idx):
114 |         if sr_flag:
115 |             for idx in prune_idx:
116 |                 # Squential(Conv, BN, Lrelu)
117 |                 bn_module = module_list[idx][1]
118 |                 bn_module.weight.grad.data.add_(s * torch.sign(bn_module.weight.data))  # L1
119 | 
120 | 
121 | def obtain_quantiles(bn_weights, num_quantile=5):
122 | 
123 |     sorted_bn_weights, i = torch.sort(bn_weights)
124 |     total = sorted_bn_weights.shape[0]
125 |     quantiles = sorted_bn_weights.tolist()[-1::-total//num_quantile][::-1]
126 |     print("\nBN weights quantile:")
127 |     quantile_table = [
128 |         [f'{i}/{num_quantile}' for i in range(1, num_quantile+1)],
129 |         ["%.3f" % quantile for quantile in quantiles]
130 |     ]
131 |     print(AsciiTable(quantile_table).table)
132 | 
133 |     return quantiles
134 | 
135 | 
136 | def get_input_mask(module_defs, idx, CBLidx2mask):
137 | 
138 |     if idx == 0:
139 |         return np.ones(3)
140 | 
141 |     if module_defs[idx - 1]['type'] == 'convolutional':
142 |         return CBLidx2mask[idx - 1]
143 |     elif module_defs[idx - 1]['type'] == 'shortcut':
144 |         return CBLidx2mask[idx - 2]
145 |     elif module_defs[idx - 1]['type'] == 'route':
146 |         route_in_idxs = []
147 |         for layer_i in module_defs[idx - 1]['layers'].split(","):
148 |             if int(layer_i) < 0:
149 |                 route_in_idxs.append(idx - 1 + int(layer_i))
150 |             else:
151 |                 route_in_idxs.append(int(layer_i))
152 |         if len(route_in_idxs) == 1:
153 |             return CBLidx2mask[route_in_idxs[0]]
154 |         elif len(route_in_idxs) == 2:
155 |             return np.concatenate([CBLidx2mask[in_idx - 1] for in_idx in route_in_idxs])
156 |         else:
157 |             print("Something wrong with route module!")
158 |             raise Exception
159 | 
160 | 
161 | def init_weights_from_loose_model(compact_model, loose_model, CBL_idx, Conv_idx, CBLidx2mask):
162 | 
163 |     for idx in CBL_idx:
164 |         compact_CBL = compact_model.module_list[idx]
165 |         loose_CBL = loose_model.module_list[idx]
166 |         out_channel_idx = np.argwhere(CBLidx2mask[idx])[:, 0].tolist()
167 | 
168 |         compact_bn, loose_bn         = compact_CBL[1], loose_CBL[1]
169 |         compact_bn.weight.data       = loose_bn.weight.data[out_channel_idx].clone()
170 |         compact_bn.bias.data         = loose_bn.bias.data[out_channel_idx].clone()
171 |         compact_bn.running_mean.data = loose_bn.running_mean.data[out_channel_idx].clone()
172 |         compact_bn.running_var.data  = loose_bn.running_var.data[out_channel_idx].clone()
173 | 
174 |         input_mask = get_input_mask(loose_model.module_defs, idx, CBLidx2mask)
175 |         in_channel_idx = np.argwhere(input_mask)[:, 0].tolist()
176 |         compact_conv, loose_conv = compact_CBL[0], loose_CBL[0]
177 |         tmp = loose_conv.weight.data[:, in_channel_idx, :, :].clone()
178 |         compact_conv.weight.data = tmp[out_channel_idx, :, :, :].clone()
179 | 
180 |     for idx in Conv_idx:
181 |         compact_conv = compact_model.module_list[idx][0]
182 |         loose_conv = loose_model.module_list[idx][0]
183 | 
184 |         input_mask = get_input_mask(loose_model.module_defs, idx, CBLidx2mask)
185 |         in_channel_idx = np.argwhere(input_mask)[:, 0].tolist()
186 |         compact_conv.weight.data = loose_conv.weight.data[:, in_channel_idx, :, :].clone()
187 |         compact_conv.bias.data   = loose_conv.bias.data.clone()
188 | 
189 | 
190 | def prune_model_keep_size(model, prune_idx, CBL_idx, CBLidx2mask):
191 | 
192 |     pruned_model = deepcopy(model)
193 |     for idx in prune_idx:
194 |         mask = torch.from_numpy(CBLidx2mask[idx]).cuda()
195 |         bn_module = pruned_model.module_list[idx][1]
196 | 
197 |         bn_module.weight.data.mul_(mask)
198 | 
199 |         activation = F.leaky_relu((1 - mask) * bn_module.bias.data, 0.1)
200 | 
201 |         # 两个上采样层前的卷积层
202 |         next_idx_list = [idx + 1]
203 |         if idx == 79:
204 |             next_idx_list.append(84)
205 |         elif idx == 91:
206 |             next_idx_list.append(96)
207 | 
208 |         for next_idx in next_idx_list:
209 |             next_conv = pruned_model.module_list[next_idx][0]
210 |             conv_sum = next_conv.weight.data.sum(dim=(2, 3))
211 |             offset = conv_sum.matmul(activation.reshape(-1, 1)).reshape(-1)
212 |             if next_idx in CBL_idx:
213 |                 next_bn = pruned_model.module_list[next_idx][1]
214 |                 next_bn.running_mean.data.sub_(offset)
215 |             else:
216 |                 next_conv.bias.data.add_(offset)
217 | 
218 |         bn_module.bias.data.mul_(mask)
219 | 
220 |     return pruned_model
221 | 
222 | 
223 | def obtain_bn_mask(bn_module, thre):
224 | 
225 |     thre = thre.cuda()
226 |     mask = bn_module.weight.data.abs().ge(thre).float()
227 | 
228 |     return mask
229 | 
230 | 
231 | 
232 | def update_activation(i, pruned_model, activation, CBL_idx):
233 |     next_idx = i + 1
234 |     if pruned_model.module_defs[next_idx]['type'] == 'convolutional':
235 |         next_conv = pruned_model.module_list[next_idx][0]
236 |         conv_sum = next_conv.weight.data.sum(dim=(2, 3))
237 |         offset = conv_sum.matmul(activation.reshape(-1, 1)).reshape(-1)
238 |         if next_idx in CBL_idx:
239 |             next_bn = pruned_model.module_list[next_idx][1]
240 |             next_bn.running_mean.data.sub_(offset)
241 |         else:
242 |             next_conv.bias.data.add_(offset)
243 | 
244 | 
245 | 
246 | def prune_model_keep_size2(model, prune_idx, CBL_idx, CBLidx2mask):
247 | 
248 |     pruned_model = deepcopy(model)
249 |     activations = []
250 |     for i, model_def in enumerate(model.module_defs):
251 | 
252 |         if model_def['type'] == 'convolutional':
253 |             activation = None
254 |             if i in prune_idx:
255 |                 mask = torch.from_numpy(CBLidx2mask[i]).cuda()
256 |                 bn_module = pruned_model.module_list[i][1]
257 |                 bn_module.weight.data.mul_(mask)
258 |                 activation = F.leaky_relu((1 - mask) * bn_module.bias.data, 0.1)
259 |                 update_activation(i, pruned_model, activation, CBL_idx)
260 |                 bn_module.bias.data.mul_(mask)
261 |             activations.append(activation)
262 | 
263 |         if model_def['type'] == 'shortcut':
264 |             actv1 = activations[i - 1]
265 |             from_layer = int(model_def['from'])
266 |             actv2 = activations[i + from_layer]
267 |             activation = actv1 + actv2
268 |             update_activation(i, pruned_model, activation, CBL_idx)
269 |             activations.append(activation)
270 |             
271 | 
272 | 
273 |         if model_def['type'] == 'route':
274 |             from_layers = [int(s) for s in model_def['layers'].split(',')]
275 |             if len(from_layers) == 1:
276 |                 activation = activations[i + from_layers[0]]
277 |                 update_activation(i, pruned_model, activation, CBL_idx)
278 |             else:
279 |                 actv1 = activations[i + from_layers[0]]
280 |                 actv2 = activations[from_layers[1]]
281 |                 activation = torch.cat((actv1, actv2))
282 |                 update_activation(i, pruned_model, activation, CBL_idx)
283 |             activations.append(activation)
284 | 
285 |         if model_def['type'] == 'upsample':
286 |             activation = torch.zeros(int(model.module_defs[i - 1]['filters'])).cuda()
287 |             activations.append(activation)
288 | 
289 |         if model_def['type'] == 'yolo':
290 |             activations.append(None)
291 |        
292 |     return pruned_model
293 | 


--------------------------------------------------------------------------------
/utils/torch_utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import torch
 4 | 
 5 | 
 6 | def init_seeds(seed=0):
 7 |     torch.manual_seed(seed)
 8 |     torch.cuda.manual_seed(seed)
 9 |     torch.cuda.manual_seed_all(seed)
10 | 
11 |     # Remove randomness (may be slower on Tesla GPUs) # https://pytorch.org/docs/stable/notes/randomness.html
12 |     if seed == 0:
13 |         torch.backends.cudnn.deterministic = True
14 |         torch.backends.cudnn.benchmark = False
15 | 
16 | 
17 | def select_device(device='', apex=False):
18 |     # device = 'cpu' or '0' or '0,1,2,3'
19 |     cpu_request = device.lower() == 'cpu'
20 |     if device and not cpu_request:  # if device requested other than 'cpu'
21 |         os.environ['CUDA_VISIBLE_DEVICES'] = device  # set environment variable
22 |         assert torch.cuda.is_available(), 'CUDA unavailable, invalid device %s requested' % device  # check availablity
23 | 
24 |     cuda = False if cpu_request else torch.cuda.is_available()
25 |     if cuda:
26 |         c = 1024 ** 2  # bytes to MB
27 |         ng = torch.cuda.device_count()
28 |         x = [torch.cuda.get_device_properties(i) for i in range(ng)]
29 |         cuda_str = 'Using CUDA ' + ('Apex ' if apex else '')  # apex for mixed precision https://github.com/NVIDIA/apex
30 |         for i in range(0, ng):
31 |             if i == 1:
32 |                 cuda_str = ' ' * len(cuda_str)
33 |             print("%sdevice%g _CudaDeviceProperties(name='%s', total_memory=%dMB)" %
34 |                   (cuda_str, i, x[i].name, x[i].total_memory / c))
35 |     else:
36 |         print('Using CPU')
37 | 
38 |     print('')  # skip a line
39 |     return torch.device('cuda:0' if cuda else 'cpu')
40 | 
41 | 
42 | def fuse_conv_and_bn(conv, bn):
43 |     # https://tehnokv.com/posts/fusing-batchnorm-and-conv/
44 |     with torch.no_grad():
45 |         # init
46 |         fusedconv = torch.nn.Conv2d(conv.in_channels,
47 |                                     conv.out_channels,
48 |                                     kernel_size=conv.kernel_size,
49 |                                     stride=conv.stride,
50 |                                     padding=conv.padding,
51 |                                     bias=True)
52 | 
53 |         # prepare filters
54 |         w_conv = conv.weight.clone().view(conv.out_channels, -1)
55 |         w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
56 |         fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size()))
57 | 
58 |         # prepare spatial bias
59 |         if conv.bias is not None:
60 |             b_conv = conv.bias
61 |         else:
62 |             b_conv = torch.zeros(conv.weight.size(0))
63 |         b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps))
64 |         fusedconv.bias.copy_(b_conv + b_bn)
65 | 
66 |         return fusedconv
67 | 
68 | 
69 | def model_info(model, report='summary'):
70 |     # Plots a line-by-line description of a PyTorch model
71 |     n_p = sum(x.numel() for x in model.parameters())  # number parameters
72 |     n_g = sum(x.numel() for x in model.parameters() if x.requires_grad)  # number gradients
73 |     if report is 'full':
74 |         print('%5s %40s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma'))
75 |         for i, (name, p) in enumerate(model.named_parameters()):
76 |             name = name.replace('module_list.', '')
77 |             print('%5g %40s %9s %12g %20s %10.3g %10.3g' %
78 |                   (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std()))
79 |     print('Model Summary: %g layers, %g parameters, %g gradients' % (len(list(model.parameters())), n_p, n_g))
80 | 


--------------------------------------------------------------------------------
/weights/download_yolov3_weights.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # make '/weights' directory if it does not exist and cd into it
 4 | mkdir -p weights && cd weights
 5 | 
 6 | # copy darknet weight files, continue '-c' if partially downloaded
 7 | wget -c https://pjreddie.com/media/files/yolov3.weights
 8 | wget -c https://pjreddie.com/media/files/yolov3-tiny.weights
 9 | wget -c https://pjreddie.com/media/files/yolov3-spp.weights
10 | 
11 | # yolov3 pytorch weights
12 | # download from Google Drive: https://drive.google.com/drive/folders/1uxgUBemJVw9wZsdpboYbzUN4bcRhsuAI
13 | 
14 | # darknet53 weights (first 75 layers only)
15 | wget -c https://pjreddie.com/media/files/darknet53.conv.74
16 | 
17 | # yolov3-tiny weights from darknet (first 16 layers only)
18 | # ./darknet partial cfg/yolov3-tiny.cfg yolov3-tiny.weights yolov3-tiny.conv.15 15
19 | # mv yolov3-tiny.conv.15 ../
20 | 
21 | 


--------------------------------------------------------------------------------