├── .gitignore
├── README.md
├── cfg
    ├── yolov3-hand.cfg
    ├── yolov3-repvggB0-hand.cfg
    ├── yolov3-repvggB1-hand.cfg
    └── yolov3.cfg
├── convert_repyolo.py
├── data
    ├── converter.py
    ├── oxfordhand.data
    └── oxfordhand.names
├── detect.py
├── models.py
├── requirements.txt
├── test.py
├── train.py
├── utils
    ├── __init__.py
    ├── adabound.py
    ├── datasets.py
    ├── gcp.sh
    ├── google_utils.py
    ├── parse_config.py
    ├── prune_utils.py
    ├── tiny_prune_utils.py
    ├── torch_utils.py
    └── utils.py
└── weights
    └── download_yolov3_weights.sh


/.gitignore:
--------------------------------------------------------------------------------
1 | .vscode
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # YOLOv3-RepVGG-backbone
2 | 
3 | This is a naive implementation of RepVGG as the backbone of yolov3. The whole project is based on [YOLOv3-complete-pruning](https://github.com/coldlarry/YOLOv3-complete-pruning.git)
4 | 
5 | You can download my trained RepVGG-B0 and RepVGG-B1 models for testing.
6 | [BaiduDisk](https://pan.baidu.com/s/162_jQ3rr5s_w5W8YqfURBw)     code: xypk


--------------------------------------------------------------------------------
/cfg/yolov3-hand.cfg:
--------------------------------------------------------------------------------
  1 | 
  2 | [net]
  3 | # Testing
  4 | #batch=1
  5 | #subdivisions=1
  6 | # Training
  7 | batch=16
  8 | subdivisions=1
  9 | width=416
 10 | height=416
 11 | channels=3
 12 | momentum=0.9
 13 | decay=0.0005
 14 | angle=0
 15 | saturation = 1.5
 16 | exposure = 1.5
 17 | hue=.1
 18 | 
 19 | learning_rate=0.001
 20 | burn_in=1000
 21 | max_batches = 500200
 22 | policy=steps
 23 | steps=400000,450000
 24 | scales=.1,.1
 25 | 
 26 | [convolutional]
 27 | batch_normalize=1
 28 | filters=32
 29 | size=3
 30 | stride=1
 31 | pad=1
 32 | activation=leaky
 33 | 
 34 | # Downsample
 35 | 
 36 | [convolutional]
 37 | batch_normalize=1
 38 | filters=64
 39 | size=3
 40 | stride=2
 41 | pad=1
 42 | activation=leaky
 43 | 
 44 | [convolutional]
 45 | batch_normalize=1
 46 | filters=32
 47 | size=1
 48 | stride=1
 49 | pad=1
 50 | activation=leaky
 51 | 
 52 | [convolutional]
 53 | batch_normalize=1
 54 | filters=64
 55 | size=3
 56 | stride=1
 57 | pad=1
 58 | activation=leaky
 59 | 
 60 | [shortcut]
 61 | from=-3
 62 | activation=linear
 63 | 
 64 | # Downsample
 65 | 
 66 | [convolutional]
 67 | batch_normalize=1
 68 | filters=128
 69 | size=3
 70 | stride=2
 71 | pad=1
 72 | activation=leaky
 73 | 
 74 | [convolutional]
 75 | batch_normalize=1
 76 | filters=64
 77 | size=1
 78 | stride=1
 79 | pad=1
 80 | activation=leaky
 81 | 
 82 | [convolutional]
 83 | batch_normalize=1
 84 | filters=128
 85 | size=3
 86 | stride=1
 87 | pad=1
 88 | activation=leaky
 89 | 
 90 | [shortcut]
 91 | from=-3
 92 | activation=linear
 93 | 
 94 | [convolutional]
 95 | batch_normalize=1
 96 | filters=64
 97 | size=1
 98 | stride=1
 99 | pad=1
100 | activation=leaky
101 | 
102 | [convolutional]
103 | batch_normalize=1
104 | filters=128
105 | size=3
106 | stride=1
107 | pad=1
108 | activation=leaky
109 | 
110 | [shortcut]
111 | from=-3
112 | activation=linear
113 | 
114 | # Downsample
115 | 
116 | [convolutional]
117 | batch_normalize=1
118 | filters=256
119 | size=3
120 | stride=2
121 | pad=1
122 | activation=leaky
123 | 
124 | [convolutional]
125 | batch_normalize=1
126 | filters=128
127 | size=1
128 | stride=1
129 | pad=1
130 | activation=leaky
131 | 
132 | [convolutional]
133 | batch_normalize=1
134 | filters=256
135 | size=3
136 | stride=1
137 | pad=1
138 | activation=leaky
139 | 
140 | [shortcut]
141 | from=-3
142 | activation=linear
143 | 
144 | [convolutional]
145 | batch_normalize=1
146 | filters=128
147 | size=1
148 | stride=1
149 | pad=1
150 | activation=leaky
151 | 
152 | [convolutional]
153 | batch_normalize=1
154 | filters=256
155 | size=3
156 | stride=1
157 | pad=1
158 | activation=leaky
159 | 
160 | [shortcut]
161 | from=-3
162 | activation=linear
163 | 
164 | [convolutional]
165 | batch_normalize=1
166 | filters=128
167 | size=1
168 | stride=1
169 | pad=1
170 | activation=leaky
171 | 
172 | [convolutional]
173 | batch_normalize=1
174 | filters=256
175 | size=3
176 | stride=1
177 | pad=1
178 | activation=leaky
179 | 
180 | [shortcut]
181 | from=-3
182 | activation=linear
183 | 
184 | [convolutional]
185 | batch_normalize=1
186 | filters=128
187 | size=1
188 | stride=1
189 | pad=1
190 | activation=leaky
191 | 
192 | [convolutional]
193 | batch_normalize=1
194 | filters=256
195 | size=3
196 | stride=1
197 | pad=1
198 | activation=leaky
199 | 
200 | [shortcut]
201 | from=-3
202 | activation=linear
203 | 
204 | 
205 | [convolutional]
206 | batch_normalize=1
207 | filters=128
208 | size=1
209 | stride=1
210 | pad=1
211 | activation=leaky
212 | 
213 | [convolutional]
214 | batch_normalize=1
215 | filters=256
216 | size=3
217 | stride=1
218 | pad=1
219 | activation=leaky
220 | 
221 | [shortcut]
222 | from=-3
223 | activation=linear
224 | 
225 | [convolutional]
226 | batch_normalize=1
227 | filters=128
228 | size=1
229 | stride=1
230 | pad=1
231 | activation=leaky
232 | 
233 | [convolutional]
234 | batch_normalize=1
235 | filters=256
236 | size=3
237 | stride=1
238 | pad=1
239 | activation=leaky
240 | 
241 | [shortcut]
242 | from=-3
243 | activation=linear
244 | 
245 | [convolutional]
246 | batch_normalize=1
247 | filters=128
248 | size=1
249 | stride=1
250 | pad=1
251 | activation=leaky
252 | 
253 | [convolutional]
254 | batch_normalize=1
255 | filters=256
256 | size=3
257 | stride=1
258 | pad=1
259 | activation=leaky
260 | 
261 | [shortcut]
262 | from=-3
263 | activation=linear
264 | 
265 | [convolutional]
266 | batch_normalize=1
267 | filters=128
268 | size=1
269 | stride=1
270 | pad=1
271 | activation=leaky
272 | 
273 | [convolutional]
274 | batch_normalize=1
275 | filters=256
276 | size=3
277 | stride=1
278 | pad=1
279 | activation=leaky
280 | 
281 | [shortcut]
282 | from=-3
283 | activation=linear
284 | 
285 | # Downsample
286 | 
287 | [convolutional]
288 | batch_normalize=1
289 | filters=512
290 | size=3
291 | stride=2
292 | pad=1
293 | activation=leaky
294 | 
295 | [convolutional]
296 | batch_normalize=1
297 | filters=256
298 | size=1
299 | stride=1
300 | pad=1
301 | activation=leaky
302 | 
303 | [convolutional]
304 | batch_normalize=1
305 | filters=512
306 | size=3
307 | stride=1
308 | pad=1
309 | activation=leaky
310 | 
311 | [shortcut]
312 | from=-3
313 | activation=linear
314 | 
315 | 
316 | [convolutional]
317 | batch_normalize=1
318 | filters=256
319 | size=1
320 | stride=1
321 | pad=1
322 | activation=leaky
323 | 
324 | [convolutional]
325 | batch_normalize=1
326 | filters=512
327 | size=3
328 | stride=1
329 | pad=1
330 | activation=leaky
331 | 
332 | [shortcut]
333 | from=-3
334 | activation=linear
335 | 
336 | 
337 | [convolutional]
338 | batch_normalize=1
339 | filters=256
340 | size=1
341 | stride=1
342 | pad=1
343 | activation=leaky
344 | 
345 | [convolutional]
346 | batch_normalize=1
347 | filters=512
348 | size=3
349 | stride=1
350 | pad=1
351 | activation=leaky
352 | 
353 | [shortcut]
354 | from=-3
355 | activation=linear
356 | 
357 | 
358 | [convolutional]
359 | batch_normalize=1
360 | filters=256
361 | size=1
362 | stride=1
363 | pad=1
364 | activation=leaky
365 | 
366 | [convolutional]
367 | batch_normalize=1
368 | filters=512
369 | size=3
370 | stride=1
371 | pad=1
372 | activation=leaky
373 | 
374 | [shortcut]
375 | from=-3
376 | activation=linear
377 | 
378 | [convolutional]
379 | batch_normalize=1
380 | filters=256
381 | size=1
382 | stride=1
383 | pad=1
384 | activation=leaky
385 | 
386 | [convolutional]
387 | batch_normalize=1
388 | filters=512
389 | size=3
390 | stride=1
391 | pad=1
392 | activation=leaky
393 | 
394 | [shortcut]
395 | from=-3
396 | activation=linear
397 | 
398 | 
399 | [convolutional]
400 | batch_normalize=1
401 | filters=256
402 | size=1
403 | stride=1
404 | pad=1
405 | activation=leaky
406 | 
407 | [convolutional]
408 | batch_normalize=1
409 | filters=512
410 | size=3
411 | stride=1
412 | pad=1
413 | activation=leaky
414 | 
415 | [shortcut]
416 | from=-3
417 | activation=linear
418 | 
419 | 
420 | [convolutional]
421 | batch_normalize=1
422 | filters=256
423 | size=1
424 | stride=1
425 | pad=1
426 | activation=leaky
427 | 
428 | [convolutional]
429 | batch_normalize=1
430 | filters=512
431 | size=3
432 | stride=1
433 | pad=1
434 | activation=leaky
435 | 
436 | [shortcut]
437 | from=-3
438 | activation=linear
439 | 
440 | [convolutional]
441 | batch_normalize=1
442 | filters=256
443 | size=1
444 | stride=1
445 | pad=1
446 | activation=leaky
447 | 
448 | [convolutional]
449 | batch_normalize=1
450 | filters=512
451 | size=3
452 | stride=1
453 | pad=1
454 | activation=leaky
455 | 
456 | [shortcut]
457 | from=-3
458 | activation=linear
459 | 
460 | # Downsample
461 | 
462 | [convolutional]
463 | batch_normalize=1
464 | filters=1024
465 | size=3
466 | stride=2
467 | pad=1
468 | activation=leaky
469 | 
470 | [convolutional]
471 | batch_normalize=1
472 | filters=512
473 | size=1
474 | stride=1
475 | pad=1
476 | activation=leaky
477 | 
478 | [convolutional]
479 | batch_normalize=1
480 | filters=1024
481 | size=3
482 | stride=1
483 | pad=1
484 | activation=leaky
485 | 
486 | [shortcut]
487 | from=-3
488 | activation=linear
489 | 
490 | [convolutional]
491 | batch_normalize=1
492 | filters=512
493 | size=1
494 | stride=1
495 | pad=1
496 | activation=leaky
497 | 
498 | [convolutional]
499 | batch_normalize=1
500 | filters=1024
501 | size=3
502 | stride=1
503 | pad=1
504 | activation=leaky
505 | 
506 | [shortcut]
507 | from=-3
508 | activation=linear
509 | 
510 | [convolutional]
511 | batch_normalize=1
512 | filters=512
513 | size=1
514 | stride=1
515 | pad=1
516 | activation=leaky
517 | 
518 | [convolutional]
519 | batch_normalize=1
520 | filters=1024
521 | size=3
522 | stride=1
523 | pad=1
524 | activation=leaky
525 | 
526 | [shortcut]
527 | from=-3
528 | activation=linear
529 | 
530 | [convolutional]
531 | batch_normalize=1
532 | filters=512
533 | size=1
534 | stride=1
535 | pad=1
536 | activation=leaky
537 | 
538 | [convolutional]
539 | batch_normalize=1
540 | filters=1024
541 | size=3
542 | stride=1
543 | pad=1
544 | activation=leaky
545 | 
546 | [shortcut]
547 | from=-3
548 | activation=linear
549 | 
550 | ######################
551 | 
552 | [convolutional]
553 | batch_normalize=1
554 | filters=512
555 | size=1
556 | stride=1
557 | pad=1
558 | activation=leaky
559 | 
560 | [convolutional]
561 | batch_normalize=1
562 | size=3
563 | stride=1
564 | pad=1
565 | filters=1024
566 | activation=leaky
567 | 
568 | [convolutional]
569 | batch_normalize=1
570 | filters=512
571 | size=1
572 | stride=1
573 | pad=1
574 | activation=leaky
575 | 
576 | [convolutional]
577 | batch_normalize=1
578 | size=3
579 | stride=1
580 | pad=1
581 | filters=1024
582 | activation=leaky
583 | 
584 | [convolutional]
585 | batch_normalize=1
586 | filters=512
587 | size=1
588 | stride=1
589 | pad=1
590 | activation=leaky
591 | 
592 | [convolutional]
593 | batch_normalize=1
594 | size=3
595 | stride=1
596 | pad=1
597 | filters=1024
598 | activation=leaky
599 | 
600 | [convolutional]
601 | size=1
602 | stride=1
603 | pad=1
604 | filters=18
605 | activation=linear
606 | 
607 | 
608 | [yolo]
609 | mask = 6,7,8
610 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
611 | classes=1
612 | num=9
613 | jitter=.3
614 | ignore_thresh = .7
615 | truth_thresh = 1
616 | random=1
617 | 
618 | 
619 | [route]
620 | layers = -4
621 | 
622 | [convolutional]
623 | batch_normalize=1
624 | filters=256
625 | size=1
626 | stride=1
627 | pad=1
628 | activation=leaky
629 | 
630 | [upsample]
631 | stride=2
632 | 
633 | [route]
634 | layers = -1, 61
635 | 
636 | 
637 | 
638 | [convolutional]
639 | batch_normalize=1
640 | filters=256
641 | size=1
642 | stride=1
643 | pad=1
644 | activation=leaky
645 | 
646 | [convolutional]
647 | batch_normalize=1
648 | size=3
649 | stride=1
650 | pad=1
651 | filters=512
652 | activation=leaky
653 | 
654 | [convolutional]
655 | batch_normalize=1
656 | filters=256
657 | size=1
658 | stride=1
659 | pad=1
660 | activation=leaky
661 | 
662 | [convolutional]
663 | batch_normalize=1
664 | size=3
665 | stride=1
666 | pad=1
667 | filters=512
668 | activation=leaky
669 | 
670 | [convolutional]
671 | batch_normalize=1
672 | filters=256
673 | size=1
674 | stride=1
675 | pad=1
676 | activation=leaky
677 | 
678 | [convolutional]
679 | batch_normalize=1
680 | size=3
681 | stride=1
682 | pad=1
683 | filters=512
684 | activation=leaky
685 | 
686 | [convolutional]
687 | size=1
688 | stride=1
689 | pad=1
690 | filters=18
691 | activation=linear
692 | 
693 | 
694 | [yolo]
695 | mask = 3,4,5
696 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
697 | classes=1
698 | num=9
699 | jitter=.3
700 | ignore_thresh = .7
701 | truth_thresh = 1
702 | random=1
703 | 
704 | 
705 | 
706 | [route]
707 | layers = -4
708 | 
709 | [convolutional]
710 | batch_normalize=1
711 | filters=128
712 | size=1
713 | stride=1
714 | pad=1
715 | activation=leaky
716 | 
717 | [upsample]
718 | stride=2
719 | 
720 | [route]
721 | layers = -1, 36
722 | 
723 | 
724 | 
725 | [convolutional]
726 | batch_normalize=1
727 | filters=128
728 | size=1
729 | stride=1
730 | pad=1
731 | activation=leaky
732 | 
733 | [convolutional]
734 | batch_normalize=1
735 | size=3
736 | stride=1
737 | pad=1
738 | filters=256
739 | activation=leaky
740 | 
741 | [convolutional]
742 | batch_normalize=1
743 | filters=128
744 | size=1
745 | stride=1
746 | pad=1
747 | activation=leaky
748 | 
749 | [convolutional]
750 | batch_normalize=1
751 | size=3
752 | stride=1
753 | pad=1
754 | filters=256
755 | activation=leaky
756 | 
757 | [convolutional]
758 | batch_normalize=1
759 | filters=128
760 | size=1
761 | stride=1
762 | pad=1
763 | activation=leaky
764 | 
765 | [convolutional]
766 | batch_normalize=1
767 | size=3
768 | stride=1
769 | pad=1
770 | filters=256
771 | activation=leaky
772 | 
773 | [convolutional]
774 | size=1
775 | stride=1
776 | pad=1
777 | filters=18
778 | activation=linear
779 | 
780 | 
781 | [yolo]
782 | mask = 0,1,2
783 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
784 | classes=1
785 | num=9
786 | jitter=.3
787 | ignore_thresh = .7
788 | truth_thresh = 1
789 | random=1
790 | 
791 | 


--------------------------------------------------------------------------------
/cfg/yolov3-repvggB0-hand.cfg:
--------------------------------------------------------------------------------
  1 | 
  2 | [net]
  3 | # Testing
  4 | #batch=1
  5 | #subdivisions=1
  6 | # Training
  7 | batch=16
  8 | subdivisions=1
  9 | width=416
 10 | height=416
 11 | channels=3
 12 | momentum=0.9
 13 | decay=0.0005
 14 | angle=0
 15 | saturation = 1.5
 16 | exposure = 1.5
 17 | hue=.1
 18 | 
 19 | learning_rate=0.001
 20 | burn_in=1000
 21 | max_batches = 500200
 22 | policy=steps
 23 | steps=400000,450000
 24 | scales=.1,.1
 25 | 
 26 | # 0 先保留
 27 | # [convolutional0]
 28 | # batch_normalize=1
 29 | # filters=32
 30 | # size=3
 31 | # stride=1
 32 | # pad=1
 33 | # activation=leaky
 34 | 
 35 | # 1:64
 36 | # 1
 37 | [RepvggBlock]
 38 | batch_normalize=1
 39 | filters=64
 40 | size=3
 41 | stride=2
 42 | pad=1
 43 | activation=relu
 44 | 
 45 | # 4:64
 46 | # 2 --- 5
 47 | [RepvggBlock]
 48 | batch_normalize=1
 49 | filters=64
 50 | size=3
 51 | stride=2
 52 | pad=1
 53 | activation=relu
 54 | 
 55 | # 3 --- 9
 56 | [RepvggBlock]
 57 | batch_normalize=1
 58 | filters=64
 59 | size=3
 60 | stride=1
 61 | pad=1
 62 | activation=relu
 63 | 
 64 | # 4 --- 13
 65 | [RepvggBlock]
 66 | batch_normalize=1
 67 | filters=64
 68 | size=3
 69 | stride=1
 70 | pad=1
 71 | activation=relu
 72 | 
 73 | # 5
 74 | [RepvggBlock]
 75 | batch_normalize=1
 76 | filters=64
 77 | size=3
 78 | stride=1
 79 | pad=1
 80 | activation=relu
 81 | 
 82 | # 6:128
 83 | # 6
 84 | [RepvggBlock]
 85 | batch_normalize=1
 86 | filters=128
 87 | size=3
 88 | stride=2
 89 | pad=1
 90 | activation=relu
 91 | 
 92 | # 7
 93 | [RepvggBlock]
 94 | batch_normalize=1
 95 | filters=128
 96 | size=3
 97 | stride=1
 98 | pad=1
 99 | activation=relu
100 | 
101 | # 8
102 | [RepvggBlock]
103 | batch_normalize=1
104 | filters=128
105 | size=3
106 | stride=1
107 | pad=1
108 | activation=relu
109 | 
110 | # 9
111 | [RepvggBlock]
112 | batch_normalize=1
113 | filters=128
114 | size=3
115 | stride=1
116 | pad=1
117 | activation=relu
118 | 
119 | # 10
120 | [RepvggBlock]
121 | batch_normalize=1
122 | filters=128
123 | size=3
124 | stride=1
125 | pad=1
126 | activation=relu
127 | 
128 | # 11
129 | [RepvggBlock]
130 | batch_normalize=1
131 | filters=128
132 | size=3
133 | stride=1
134 | pad=1
135 | activation=relu
136 | 
137 | # 16:256
138 | # 12
139 | [RepvggBlock]
140 | batch_normalize=1
141 | filters=256
142 | size=3
143 | stride=2
144 | pad=1
145 | activation=relu
146 | 
147 | # 13
148 | [RepvggBlock]
149 | batch_normalize=1
150 | filters=256
151 | size=3
152 | stride=1
153 | pad=1
154 | activation=relu
155 | 
156 | # 14
157 | [RepvggBlock]
158 | batch_normalize=1
159 | filters=256
160 | size=3
161 | stride=1
162 | pad=1
163 | activation=relu
164 | 
165 | # 15
166 | [RepvggBlock]
167 | batch_normalize=1
168 | filters=256
169 | size=3
170 | stride=1
171 | pad=1
172 | activation=relu
173 | 
174 | # 16
175 | [RepvggBlock]
176 | batch_normalize=1
177 | filters=256
178 | size=3
179 | stride=1
180 | pad=1
181 | activation=relu
182 | 
183 | # 17
184 | [RepvggBlock]
185 | batch_normalize=1
186 | filters=256
187 | size=3
188 | stride=1
189 | pad=1
190 | activation=relu
191 | 
192 | # 18
193 | [RepvggBlock]
194 | batch_normalize=1
195 | filters=256
196 | size=3
197 | stride=1
198 | pad=1
199 | activation=relu
200 | 
201 | # 19
202 | [RepvggBlock]
203 | batch_normalize=1
204 | filters=256
205 | size=3
206 | stride=1
207 | pad=1
208 | activation=relu
209 | 
210 | # 20
211 | [RepvggBlock]
212 | batch_normalize=1
213 | filters=256
214 | size=3
215 | stride=1
216 | pad=1
217 | activation=relu
218 | 
219 | # 21
220 | [RepvggBlock]
221 | batch_normalize=1
222 | filters=256
223 | size=3
224 | stride=1
225 | pad=1
226 | activation=relu
227 | 
228 | # 22
229 | [RepvggBlock]
230 | batch_normalize=1
231 | filters=256
232 | size=3
233 | stride=1
234 | pad=1
235 | activation=relu
236 | 
237 | # 23
238 | [RepvggBlock]
239 | batch_normalize=1
240 | filters=256
241 | size=3
242 | stride=1
243 | pad=1
244 | activation=relu
245 | 
246 | # 24
247 | [RepvggBlock]
248 | batch_normalize=1
249 | filters=256
250 | size=3
251 | stride=1
252 | pad=1
253 | activation=relu
254 | 
255 | # 25
256 | [RepvggBlock]
257 | batch_normalize=1
258 | filters=256
259 | size=3
260 | stride=1
261 | pad=1
262 | activation=relu
263 | 
264 | # 26
265 | [RepvggBlock]
266 | batch_normalize=1
267 | filters=256
268 | size=3
269 | stride=1
270 | pad=1
271 | activation=relu
272 | 
273 | # 26 --- 105
274 | [RepvggBlock]
275 | batch_normalize=1
276 | filters=256
277 | size=3
278 | stride=1
279 | pad=1
280 | activation=relu
281 | 
282 | # 1:512*2.5
283 | # 27 --- 109
284 | [RepvggBlock]
285 | batch_normalize=1
286 | filters=1280
287 | size=3
288 | stride=2
289 | pad=1
290 | activation=relu
291 | 
292 | 
293 | ######################
294 | # 75
295 | # 28 --- 113
296 | [convolutional]
297 | batch_normalize=1
298 | filters=512
299 | size=1
300 | stride=1
301 | pad=1
302 | activation=leaky
303 | 
304 | # 29 --- 114
305 | [convolutional]
306 | batch_normalize=1
307 | size=3
308 | stride=1
309 | pad=1
310 | filters=1024
311 | activation=leaky
312 | 
313 | # 30 ---115
314 | [convolutional]
315 | batch_normalize=1
316 | filters=512
317 | size=1
318 | stride=1
319 | pad=1
320 | activation=leaky
321 | 
322 | # 31 --- 116
323 | [convolutional]
324 | batch_normalize=1
325 | size=3
326 | stride=1
327 | pad=1
328 | filters=1024
329 | activation=leaky
330 | 
331 | # 32 --- 117
332 | [convolutional]
333 | batch_normalize=1
334 | filters=512
335 | size=1
336 | stride=1
337 | pad=1
338 | activation=leaky
339 | 
340 | # ----
341 | # 32 --- 118
342 | [convolutional]
343 | batch_normalize=1
344 | size=3
345 | stride=1
346 | pad=1
347 | filters=1024
348 | activation=leaky
349 | 
350 | # 34 --- 119
351 | [convolutional]
352 | size=1
353 | stride=1
354 | pad=1
355 | filters=18
356 | activation=linear
357 | 
358 | # 35 --- 120
359 | [yolo]
360 | mask = 6,7,8
361 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
362 | classes=1
363 | num=9
364 | jitter=.3
365 | ignore_thresh = .7
366 | truth_thresh = 1
367 | random=1
368 | 
369 | # 36 --- 121
370 | [route]
371 | layers = -4
372 | 
373 | # 37 --- 122
374 | [convolutional]
375 | batch_normalize=1
376 | filters=256
377 | size=1
378 | stride=1
379 | pad=1
380 | activation=leaky
381 | 
382 | # 38
383 | [upsample]
384 | stride=2
385 | 
386 | # 39
387 | [route]
388 | layers = -1, 26
389 | 
390 | 
391 | # 40
392 | [convolutional]
393 | batch_normalize=1
394 | filters=256
395 | size=1
396 | stride=1
397 | pad=1
398 | activation=leaky
399 | 
400 | # 41
401 | [convolutional]
402 | batch_normalize=1
403 | size=3
404 | stride=1
405 | pad=1
406 | filters=512
407 | activation=leaky
408 | 
409 | # 42
410 | [convolutional]
411 | batch_normalize=1
412 | filters=256
413 | size=1
414 | stride=1
415 | pad=1
416 | activation=leaky
417 | 
418 | # 43
419 | [convolutional]
420 | batch_normalize=1
421 | size=3
422 | stride=1
423 | pad=1
424 | filters=512
425 | activation=leaky
426 | 
427 | # 44
428 | [convolutional]
429 | batch_normalize=1
430 | filters=256
431 | size=1
432 | stride=1
433 | pad=1
434 | activation=leaky
435 | 
436 | # 45
437 | [convolutional]
438 | batch_normalize=1
439 | size=3
440 | stride=1
441 | pad=1
442 | filters=512
443 | activation=leaky
444 | 
445 | # 46
446 | [convolutional]
447 | size=1
448 | stride=1
449 | pad=1
450 | filters=18
451 | activation=linear
452 | 
453 | # 47
454 | [yolo]
455 | mask = 3,4,5
456 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
457 | classes=1
458 | num=9
459 | jitter=.3
460 | ignore_thresh = .7
461 | truth_thresh = 1
462 | random=1
463 | 
464 | 
465 | # 48
466 | [route]
467 | layers = -4
468 | 
469 | # 49
470 | [convolutional]
471 | batch_normalize=1
472 | filters=128
473 | size=1
474 | stride=1
475 | pad=1
476 | activation=leaky
477 | 
478 | # 50
479 | [upsample]
480 | stride=2
481 | 
482 | # 51
483 | [route]
484 | layers = -1, 10
485 | 
486 | 
487 | # 52
488 | [convolutional]
489 | batch_normalize=1
490 | filters=128
491 | size=1
492 | stride=1
493 | pad=1
494 | activation=leaky
495 | 
496 | # 53
497 | [convolutional]
498 | batch_normalize=1
499 | size=3
500 | stride=1
501 | pad=1
502 | filters=256
503 | activation=leaky
504 | 
505 | # 54
506 | [convolutional]
507 | batch_normalize=1
508 | filters=128
509 | size=1
510 | stride=1
511 | pad=1
512 | activation=leaky
513 | 
514 | # 55
515 | [convolutional]
516 | batch_normalize=1
517 | size=3
518 | stride=1
519 | pad=1
520 | filters=256
521 | activation=leaky
522 | 
523 | # 56
524 | [convolutional]
525 | batch_normalize=1
526 | filters=128
527 | size=1
528 | stride=1
529 | pad=1
530 | activation=leaky
531 | 
532 | [convolutional]
533 | batch_normalize=1
534 | size=3
535 | stride=1
536 | pad=1
537 | filters=256
538 | activation=leaky
539 | 
540 | [convolutional]
541 | size=1
542 | stride=1
543 | pad=1
544 | filters=18
545 | activation=linear
546 | 
547 | 
548 | [yolo]
549 | mask = 0,1,2
550 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
551 | classes=1
552 | num=9
553 | jitter=.3
554 | ignore_thresh = .7
555 | truth_thresh = 1
556 | random=1
557 | 
558 | 


--------------------------------------------------------------------------------
/cfg/yolov3-repvggB1-hand.cfg:
--------------------------------------------------------------------------------
  1 | 
  2 | [net]
  3 | # Testing
  4 | #batch=1
  5 | #subdivisions=1
  6 | # Training
  7 | batch=16
  8 | subdivisions=1
  9 | width=416
 10 | height=416
 11 | channels=3
 12 | momentum=0.9
 13 | decay=0.0005
 14 | angle=0
 15 | saturation = 1.5
 16 | exposure = 1.5
 17 | hue=.1
 18 | 
 19 | learning_rate=0.001
 20 | burn_in=1000
 21 | max_batches = 500200
 22 | policy=steps
 23 | steps=400000,450000
 24 | scales=.1,.1
 25 | 
 26 | # 0 先保留
 27 | # [convolutional]
 28 | # batch_normalize=1
 29 | # filters=32
 30 | # size=3
 31 | # stride=1
 32 | # pad=1
 33 | # activation=leaky
 34 | 
 35 | # 1:64
 36 | # 1
 37 | [RepvggBlock]
 38 | batch_normalize=1
 39 | filters=64
 40 | size=3
 41 | stride=2
 42 | pad=1
 43 | activation=relu
 44 | 
 45 | # 4:64x2
 46 | # 2 --- 5(i)
 47 | [RepvggBlock]
 48 | batch_normalize=1
 49 | filters=128
 50 | size=3
 51 | stride=2
 52 | pad=1
 53 | activation=relu
 54 | 
 55 | # 3 --- 9
 56 | [RepvggBlock]
 57 | batch_normalize=1
 58 | filters=128
 59 | size=3
 60 | stride=1
 61 | pad=1
 62 | activation=relu
 63 | 
 64 | # 4 --- 13
 65 | [RepvggBlock]
 66 | batch_normalize=1
 67 | filters=128
 68 | size=3
 69 | stride=1
 70 | pad=1
 71 | activation=relu
 72 | 
 73 | # 5
 74 | [RepvggBlock]
 75 | batch_normalize=1
 76 | filters=128
 77 | size=3
 78 | stride=1
 79 | pad=1
 80 | activation=relu
 81 | 
 82 | # 6:128x2
 83 | # 6
 84 | [RepvggBlock]
 85 | batch_normalize=1
 86 | filters=256
 87 | size=3
 88 | stride=2
 89 | pad=1
 90 | activation=relu
 91 | 
 92 | # 7
 93 | [RepvggBlock]
 94 | batch_normalize=1
 95 | filters=256
 96 | size=3
 97 | stride=1
 98 | pad=1
 99 | activation=relu
100 | 
101 | # 8
102 | [RepvggBlock]
103 | batch_normalize=1
104 | filters=256
105 | size=3
106 | stride=1
107 | pad=1
108 | activation=relu
109 | 
110 | # 9
111 | [RepvggBlock]
112 | batch_normalize=1
113 | filters=256
114 | size=3
115 | stride=1
116 | pad=1
117 | activation=relu
118 | 
119 | # 10
120 | [RepvggBlock]
121 | batch_normalize=1
122 | filters=256
123 | size=3
124 | stride=1
125 | pad=1
126 | activation=relu
127 | 
128 | # 11
129 | [RepvggBlock]
130 | batch_normalize=1
131 | filters=256
132 | size=3
133 | stride=1
134 | pad=1
135 | activation=relu
136 | 
137 | # 16:256x2
138 | # 12
139 | [RepvggBlock]
140 | batch_normalize=1
141 | filters=512
142 | size=3
143 | stride=2
144 | pad=1
145 | activation=relu
146 | 
147 | # 13
148 | [RepvggBlock]
149 | batch_normalize=1
150 | filters=512
151 | size=3
152 | stride=1
153 | pad=1
154 | activation=relu
155 | 
156 | # 14
157 | [RepvggBlock]
158 | batch_normalize=1
159 | filters=512
160 | size=3
161 | stride=1
162 | pad=1
163 | activation=relu
164 | 
165 | # 15
166 | [RepvggBlock]
167 | batch_normalize=1
168 | filters=512
169 | size=3
170 | stride=1
171 | pad=1
172 | activation=relu
173 | 
174 | # 16
175 | [RepvggBlock]
176 | batch_normalize=1
177 | filters=512
178 | size=3
179 | stride=1
180 | pad=1
181 | activation=relu
182 | 
183 | # 17
184 | [RepvggBlock]
185 | batch_normalize=1
186 | filters=512
187 | size=3
188 | stride=1
189 | pad=1
190 | activation=relu
191 | 
192 | # 18
193 | [RepvggBlock]
194 | batch_normalize=1
195 | filters=512
196 | size=3
197 | stride=1
198 | pad=1
199 | activation=relu
200 | 
201 | # 19
202 | [RepvggBlock]
203 | batch_normalize=1
204 | filters=512
205 | size=3
206 | stride=1
207 | pad=1
208 | activation=relu
209 | 
210 | # 20
211 | [RepvggBlock]
212 | batch_normalize=1
213 | filters=512
214 | size=3
215 | stride=1
216 | pad=1
217 | activation=relu
218 | 
219 | # 21
220 | [RepvggBlock]
221 | batch_normalize=1
222 | filters=512
223 | size=3
224 | stride=1
225 | pad=1
226 | activation=relu
227 | 
228 | # 22
229 | [RepvggBlock]
230 | batch_normalize=1
231 | filters=512
232 | size=3
233 | stride=1
234 | pad=1
235 | activation=relu
236 | 
237 | # 23
238 | [RepvggBlock]
239 | batch_normalize=1
240 | filters=512
241 | size=3
242 | stride=1
243 | pad=1
244 | activation=relu
245 | 
246 | # 24
247 | [RepvggBlock]
248 | batch_normalize=1
249 | filters=512
250 | size=3
251 | stride=1
252 | pad=1
253 | activation=relu
254 | 
255 | # 25
256 | [RepvggBlock]
257 | batch_normalize=1
258 | filters=512
259 | size=3
260 | stride=1
261 | pad=1
262 | activation=relu
263 | 
264 | # 26
265 | [RepvggBlock]
266 | batch_normalize=1
267 | filters=512
268 | size=3
269 | stride=1
270 | pad=1
271 | activation=relu
272 | 
273 | # 27 --- 105
274 | [RepvggBlock]
275 | batch_normalize=1
276 | filters=512
277 | size=3
278 | stride=1
279 | pad=1
280 | activation=relu
281 | 
282 | # 1:512x4
283 | # 28 --- 109
284 | [RepvggBlock]
285 | batch_normalize=1
286 | filters=2048
287 | size=3
288 | stride=2
289 | pad=1
290 | activation=relu
291 | 
292 | 
293 | ######################
294 | # 75
295 | # 29 --- 113
296 | [convolutional]
297 | batch_normalize=1
298 | filters=512
299 | size=1
300 | stride=1
301 | pad=1
302 | activation=leaky
303 | 
304 | # 30 --- 114
305 | [convolutional]
306 | batch_normalize=1
307 | size=3
308 | stride=1
309 | pad=1
310 | filters=1024
311 | activation=leaky
312 | 
313 | # 31 ---115
314 | [convolutional]
315 | batch_normalize=1
316 | filters=512
317 | size=1
318 | stride=1
319 | pad=1
320 | activation=leaky
321 | 
322 | # 32 --- 116
323 | [convolutional]
324 | batch_normalize=1
325 | size=3
326 | stride=1
327 | pad=1
328 | filters=1024
329 | activation=leaky
330 | 
331 | # 33 --- 117
332 | [convolutional]
333 | batch_normalize=1
334 | filters=512
335 | size=1
336 | stride=1
337 | pad=1
338 | activation=leaky
339 | 
340 | # ----
341 | # 34 --- 118
342 | [convolutional]
343 | batch_normalize=1
344 | size=3
345 | stride=1
346 | pad=1
347 | filters=1024
348 | activation=leaky
349 | 
350 | # 35 --- 119
351 | [convolutional]
352 | size=1
353 | stride=1
354 | pad=1
355 | filters=18
356 | activation=linear
357 | 
358 | # 36 --- 120
359 | [yolo]
360 | mask = 6,7,8
361 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
362 | classes=1
363 | num=9
364 | jitter=.3
365 | ignore_thresh = .7
366 | truth_thresh = 1
367 | random=1
368 | 
369 | # 37 --- 121
370 | [route]
371 | layers = -4
372 | 
373 | # 38 --- 122
374 | [convolutional]
375 | batch_normalize=1
376 | filters=256
377 | size=1
378 | stride=1
379 | pad=1
380 | activation=leaky
381 | 
382 | [upsample]
383 | stride=2
384 | 
385 | [route]
386 | layers = -1, 26
387 | 
388 | 
389 | 
390 | [convolutional]
391 | batch_normalize=1
392 | filters=256
393 | size=1
394 | stride=1
395 | pad=1
396 | activation=leaky
397 | 
398 | [convolutional]
399 | batch_normalize=1
400 | size=3
401 | stride=1
402 | pad=1
403 | filters=512
404 | activation=leaky
405 | 
406 | [convolutional]
407 | batch_normalize=1
408 | filters=256
409 | size=1
410 | stride=1
411 | pad=1
412 | activation=leaky
413 | 
414 | [convolutional]
415 | batch_normalize=1
416 | size=3
417 | stride=1
418 | pad=1
419 | filters=512
420 | activation=leaky
421 | 
422 | [convolutional]
423 | batch_normalize=1
424 | filters=256
425 | size=1
426 | stride=1
427 | pad=1
428 | activation=leaky
429 | 
430 | [convolutional]
431 | batch_normalize=1
432 | size=3
433 | stride=1
434 | pad=1
435 | filters=512
436 | activation=leaky
437 | 
438 | [convolutional]
439 | size=1
440 | stride=1
441 | pad=1
442 | filters=18
443 | activation=linear
444 | 
445 | 
446 | [yolo]
447 | mask = 3,4,5
448 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
449 | classes=1
450 | num=9
451 | jitter=.3
452 | ignore_thresh = .7
453 | truth_thresh = 1
454 | random=1
455 | 
456 | 
457 | 
458 | [route]
459 | layers = -4
460 | 
461 | [convolutional]
462 | batch_normalize=1
463 | filters=128
464 | size=1
465 | stride=1
466 | pad=1
467 | activation=leaky
468 | 
469 | [upsample]
470 | stride=2
471 | 
472 | [route]
473 | layers = -1, 10
474 | 
475 | 
476 | 
477 | [convolutional]
478 | batch_normalize=1
479 | filters=128
480 | size=1
481 | stride=1
482 | pad=1
483 | activation=leaky
484 | 
485 | [convolutional]
486 | batch_normalize=1
487 | size=3
488 | stride=1
489 | pad=1
490 | filters=256
491 | activation=leaky
492 | 
493 | [convolutional]
494 | batch_normalize=1
495 | filters=128
496 | size=1
497 | stride=1
498 | pad=1
499 | activation=leaky
500 | 
501 | [convolutional]
502 | batch_normalize=1
503 | size=3
504 | stride=1
505 | pad=1
506 | filters=256
507 | activation=leaky
508 | 
509 | [convolutional]
510 | batch_normalize=1
511 | filters=128
512 | size=1
513 | stride=1
514 | pad=1
515 | activation=leaky
516 | 
517 | [convolutional]
518 | batch_normalize=1
519 | size=3
520 | stride=1
521 | pad=1
522 | filters=256
523 | activation=leaky
524 | 
525 | [convolutional]
526 | size=1
527 | stride=1
528 | pad=1
529 | filters=18
530 | activation=linear
531 | 
532 | 
533 | [yolo]
534 | mask = 0,1,2
535 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
536 | classes=1
537 | num=9
538 | jitter=.3
539 | ignore_thresh = .7
540 | truth_thresh = 1
541 | random=1
542 | 
543 | 


--------------------------------------------------------------------------------
/cfg/yolov3.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | #batch=1
  4 | #subdivisions=1
  5 | # Training
  6 | batch=16
  7 | subdivisions=1
  8 | width=416
  9 | height=416
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=32
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | # Downsample
 34 | 
 35 | [convolutional]
 36 | batch_normalize=1
 37 | filters=64
 38 | size=3
 39 | stride=2
 40 | pad=1
 41 | activation=leaky
 42 | 
 43 | [convolutional]
 44 | batch_normalize=1
 45 | filters=32
 46 | size=1
 47 | stride=1
 48 | pad=1
 49 | activation=leaky
 50 | 
 51 | [convolutional]
 52 | batch_normalize=1
 53 | filters=64
 54 | size=3
 55 | stride=1
 56 | pad=1
 57 | activation=leaky
 58 | 
 59 | [shortcut]
 60 | from=-3
 61 | activation=linear
 62 | 
 63 | # Downsample
 64 | 
 65 | [convolutional]
 66 | batch_normalize=1
 67 | filters=128
 68 | size=3
 69 | stride=2
 70 | pad=1
 71 | activation=leaky
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=64
 76 | size=1
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [convolutional]
 82 | batch_normalize=1
 83 | filters=128
 84 | size=3
 85 | stride=1
 86 | pad=1
 87 | activation=leaky
 88 | 
 89 | [shortcut]
 90 | from=-3
 91 | activation=linear
 92 | 
 93 | [convolutional]
 94 | batch_normalize=1
 95 | filters=64
 96 | size=1
 97 | stride=1
 98 | pad=1
 99 | activation=leaky
100 | 
101 | [convolutional]
102 | batch_normalize=1
103 | filters=128
104 | size=3
105 | stride=1
106 | pad=1
107 | activation=leaky
108 | 
109 | [shortcut]
110 | from=-3
111 | activation=linear
112 | 
113 | # Downsample
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=256
118 | size=3
119 | stride=2
120 | pad=1
121 | activation=leaky
122 | 
123 | [convolutional]
124 | batch_normalize=1
125 | filters=128
126 | size=1
127 | stride=1
128 | pad=1
129 | activation=leaky
130 | 
131 | [convolutional]
132 | batch_normalize=1
133 | filters=256
134 | size=3
135 | stride=1
136 | pad=1
137 | activation=leaky
138 | 
139 | [shortcut]
140 | from=-3
141 | activation=linear
142 | 
143 | [convolutional]
144 | batch_normalize=1
145 | filters=128
146 | size=1
147 | stride=1
148 | pad=1
149 | activation=leaky
150 | 
151 | [convolutional]
152 | batch_normalize=1
153 | filters=256
154 | size=3
155 | stride=1
156 | pad=1
157 | activation=leaky
158 | 
159 | [shortcut]
160 | from=-3
161 | activation=linear
162 | 
163 | [convolutional]
164 | batch_normalize=1
165 | filters=128
166 | size=1
167 | stride=1
168 | pad=1
169 | activation=leaky
170 | 
171 | [convolutional]
172 | batch_normalize=1
173 | filters=256
174 | size=3
175 | stride=1
176 | pad=1
177 | activation=leaky
178 | 
179 | [shortcut]
180 | from=-3
181 | activation=linear
182 | 
183 | [convolutional]
184 | batch_normalize=1
185 | filters=128
186 | size=1
187 | stride=1
188 | pad=1
189 | activation=leaky
190 | 
191 | [convolutional]
192 | batch_normalize=1
193 | filters=256
194 | size=3
195 | stride=1
196 | pad=1
197 | activation=leaky
198 | 
199 | [shortcut]
200 | from=-3
201 | activation=linear
202 | 
203 | 
204 | [convolutional]
205 | batch_normalize=1
206 | filters=128
207 | size=1
208 | stride=1
209 | pad=1
210 | activation=leaky
211 | 
212 | [convolutional]
213 | batch_normalize=1
214 | filters=256
215 | size=3
216 | stride=1
217 | pad=1
218 | activation=leaky
219 | 
220 | [shortcut]
221 | from=-3
222 | activation=linear
223 | 
224 | [convolutional]
225 | batch_normalize=1
226 | filters=128
227 | size=1
228 | stride=1
229 | pad=1
230 | activation=leaky
231 | 
232 | [convolutional]
233 | batch_normalize=1
234 | filters=256
235 | size=3
236 | stride=1
237 | pad=1
238 | activation=leaky
239 | 
240 | [shortcut]
241 | from=-3
242 | activation=linear
243 | 
244 | [convolutional]
245 | batch_normalize=1
246 | filters=128
247 | size=1
248 | stride=1
249 | pad=1
250 | activation=leaky
251 | 
252 | [convolutional]
253 | batch_normalize=1
254 | filters=256
255 | size=3
256 | stride=1
257 | pad=1
258 | activation=leaky
259 | 
260 | [shortcut]
261 | from=-3
262 | activation=linear
263 | 
264 | [convolutional]
265 | batch_normalize=1
266 | filters=128
267 | size=1
268 | stride=1
269 | pad=1
270 | activation=leaky
271 | 
272 | [convolutional]
273 | batch_normalize=1
274 | filters=256
275 | size=3
276 | stride=1
277 | pad=1
278 | activation=leaky
279 | 
280 | [shortcut]
281 | from=-3
282 | activation=linear
283 | 
284 | # Downsample
285 | 
286 | [convolutional]
287 | batch_normalize=1
288 | filters=512
289 | size=3
290 | stride=2
291 | pad=1
292 | activation=leaky
293 | 
294 | [convolutional]
295 | batch_normalize=1
296 | filters=256
297 | size=1
298 | stride=1
299 | pad=1
300 | activation=leaky
301 | 
302 | [convolutional]
303 | batch_normalize=1
304 | filters=512
305 | size=3
306 | stride=1
307 | pad=1
308 | activation=leaky
309 | 
310 | [shortcut]
311 | from=-3
312 | activation=linear
313 | 
314 | 
315 | [convolutional]
316 | batch_normalize=1
317 | filters=256
318 | size=1
319 | stride=1
320 | pad=1
321 | activation=leaky
322 | 
323 | [convolutional]
324 | batch_normalize=1
325 | filters=512
326 | size=3
327 | stride=1
328 | pad=1
329 | activation=leaky
330 | 
331 | [shortcut]
332 | from=-3
333 | activation=linear
334 | 
335 | 
336 | [convolutional]
337 | batch_normalize=1
338 | filters=256
339 | size=1
340 | stride=1
341 | pad=1
342 | activation=leaky
343 | 
344 | [convolutional]
345 | batch_normalize=1
346 | filters=512
347 | size=3
348 | stride=1
349 | pad=1
350 | activation=leaky
351 | 
352 | [shortcut]
353 | from=-3
354 | activation=linear
355 | 
356 | 
357 | [convolutional]
358 | batch_normalize=1
359 | filters=256
360 | size=1
361 | stride=1
362 | pad=1
363 | activation=leaky
364 | 
365 | [convolutional]
366 | batch_normalize=1
367 | filters=512
368 | size=3
369 | stride=1
370 | pad=1
371 | activation=leaky
372 | 
373 | [shortcut]
374 | from=-3
375 | activation=linear
376 | 
377 | [convolutional]
378 | batch_normalize=1
379 | filters=256
380 | size=1
381 | stride=1
382 | pad=1
383 | activation=leaky
384 | 
385 | [convolutional]
386 | batch_normalize=1
387 | filters=512
388 | size=3
389 | stride=1
390 | pad=1
391 | activation=leaky
392 | 
393 | [shortcut]
394 | from=-3
395 | activation=linear
396 | 
397 | 
398 | [convolutional]
399 | batch_normalize=1
400 | filters=256
401 | size=1
402 | stride=1
403 | pad=1
404 | activation=leaky
405 | 
406 | [convolutional]
407 | batch_normalize=1
408 | filters=512
409 | size=3
410 | stride=1
411 | pad=1
412 | activation=leaky
413 | 
414 | [shortcut]
415 | from=-3
416 | activation=linear
417 | 
418 | 
419 | [convolutional]
420 | batch_normalize=1
421 | filters=256
422 | size=1
423 | stride=1
424 | pad=1
425 | activation=leaky
426 | 
427 | [convolutional]
428 | batch_normalize=1
429 | filters=512
430 | size=3
431 | stride=1
432 | pad=1
433 | activation=leaky
434 | 
435 | [shortcut]
436 | from=-3
437 | activation=linear
438 | 
439 | [convolutional]
440 | batch_normalize=1
441 | filters=256
442 | size=1
443 | stride=1
444 | pad=1
445 | activation=leaky
446 | 
447 | [convolutional]
448 | batch_normalize=1
449 | filters=512
450 | size=3
451 | stride=1
452 | pad=1
453 | activation=leaky
454 | 
455 | [shortcut]
456 | from=-3
457 | activation=linear
458 | 
459 | # Downsample
460 | 
461 | [convolutional]
462 | batch_normalize=1
463 | filters=1024
464 | size=3
465 | stride=2
466 | pad=1
467 | activation=leaky
468 | 
469 | [convolutional]
470 | batch_normalize=1
471 | filters=512
472 | size=1
473 | stride=1
474 | pad=1
475 | activation=leaky
476 | 
477 | [convolutional]
478 | batch_normalize=1
479 | filters=1024
480 | size=3
481 | stride=1
482 | pad=1
483 | activation=leaky
484 | 
485 | [shortcut]
486 | from=-3
487 | activation=linear
488 | 
489 | [convolutional]
490 | batch_normalize=1
491 | filters=512
492 | size=1
493 | stride=1
494 | pad=1
495 | activation=leaky
496 | 
497 | [convolutional]
498 | batch_normalize=1
499 | filters=1024
500 | size=3
501 | stride=1
502 | pad=1
503 | activation=leaky
504 | 
505 | [shortcut]
506 | from=-3
507 | activation=linear
508 | 
509 | [convolutional]
510 | batch_normalize=1
511 | filters=512
512 | size=1
513 | stride=1
514 | pad=1
515 | activation=leaky
516 | 
517 | [convolutional]
518 | batch_normalize=1
519 | filters=1024
520 | size=3
521 | stride=1
522 | pad=1
523 | activation=leaky
524 | 
525 | [shortcut]
526 | from=-3
527 | activation=linear
528 | 
529 | [convolutional]
530 | batch_normalize=1
531 | filters=512
532 | size=1
533 | stride=1
534 | pad=1
535 | activation=leaky
536 | 
537 | [convolutional]
538 | batch_normalize=1
539 | filters=1024
540 | size=3
541 | stride=1
542 | pad=1
543 | activation=leaky
544 | 
545 | [shortcut]
546 | from=-3
547 | activation=linear
548 | 
549 | ######################
550 | 
551 | [convolutional]
552 | batch_normalize=1
553 | filters=512
554 | size=1
555 | stride=1
556 | pad=1
557 | activation=leaky
558 | 
559 | [convolutional]
560 | batch_normalize=1
561 | size=3
562 | stride=1
563 | pad=1
564 | filters=1024
565 | activation=leaky
566 | 
567 | [convolutional]
568 | batch_normalize=1
569 | filters=512
570 | size=1
571 | stride=1
572 | pad=1
573 | activation=leaky
574 | 
575 | [convolutional]
576 | batch_normalize=1
577 | size=3
578 | stride=1
579 | pad=1
580 | filters=1024
581 | activation=leaky
582 | 
583 | [convolutional]
584 | batch_normalize=1
585 | filters=512
586 | size=1
587 | stride=1
588 | pad=1
589 | activation=leaky
590 | 
591 | [convolutional]
592 | batch_normalize=1
593 | size=3
594 | stride=1
595 | pad=1
596 | filters=1024
597 | activation=leaky
598 | 
599 | [convolutional]
600 | size=1
601 | stride=1
602 | pad=1
603 | filters=255
604 | activation=linear
605 | 
606 | 
607 | [yolo]
608 | mask = 6,7,8
609 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
610 | classes=80
611 | num=9
612 | jitter=.3
613 | ignore_thresh = .7
614 | truth_thresh = 1
615 | random=1
616 | 
617 | 
618 | [route]
619 | layers = -4
620 | 
621 | [convolutional]
622 | batch_normalize=1
623 | filters=256
624 | size=1
625 | stride=1
626 | pad=1
627 | activation=leaky
628 | 
629 | [upsample]
630 | stride=2
631 | 
632 | [route]
633 | layers = -1, 61
634 | 
635 | 
636 | 
637 | [convolutional]
638 | batch_normalize=1
639 | filters=256
640 | size=1
641 | stride=1
642 | pad=1
643 | activation=leaky
644 | 
645 | [convolutional]
646 | batch_normalize=1
647 | size=3
648 | stride=1
649 | pad=1
650 | filters=512
651 | activation=leaky
652 | 
653 | [convolutional]
654 | batch_normalize=1
655 | filters=256
656 | size=1
657 | stride=1
658 | pad=1
659 | activation=leaky
660 | 
661 | [convolutional]
662 | batch_normalize=1
663 | size=3
664 | stride=1
665 | pad=1
666 | filters=512
667 | activation=leaky
668 | 
669 | [convolutional]
670 | batch_normalize=1
671 | filters=256
672 | size=1
673 | stride=1
674 | pad=1
675 | activation=leaky
676 | 
677 | [convolutional]
678 | batch_normalize=1
679 | size=3
680 | stride=1
681 | pad=1
682 | filters=512
683 | activation=leaky
684 | 
685 | [convolutional]
686 | size=1
687 | stride=1
688 | pad=1
689 | filters=255
690 | activation=linear
691 | 
692 | 
693 | [yolo]
694 | mask = 3,4,5
695 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
696 | classes=80
697 | num=9
698 | jitter=.3
699 | ignore_thresh = .7
700 | truth_thresh = 1
701 | random=1
702 | 
703 | 
704 | 
705 | [route]
706 | layers = -4
707 | 
708 | [convolutional]
709 | batch_normalize=1
710 | filters=128
711 | size=1
712 | stride=1
713 | pad=1
714 | activation=leaky
715 | 
716 | [upsample]
717 | stride=2
718 | 
719 | [route]
720 | layers = -1, 36
721 | 
722 | 
723 | 
724 | [convolutional]
725 | batch_normalize=1
726 | filters=128
727 | size=1
728 | stride=1
729 | pad=1
730 | activation=leaky
731 | 
732 | [convolutional]
733 | batch_normalize=1
734 | size=3
735 | stride=1
736 | pad=1
737 | filters=256
738 | activation=leaky
739 | 
740 | [convolutional]
741 | batch_normalize=1
742 | filters=128
743 | size=1
744 | stride=1
745 | pad=1
746 | activation=leaky
747 | 
748 | [convolutional]
749 | batch_normalize=1
750 | size=3
751 | stride=1
752 | pad=1
753 | filters=256
754 | activation=leaky
755 | 
756 | [convolutional]
757 | batch_normalize=1
758 | filters=128
759 | size=1
760 | stride=1
761 | pad=1
762 | activation=leaky
763 | 
764 | [convolutional]
765 | batch_normalize=1
766 | size=3
767 | stride=1
768 | pad=1
769 | filters=256
770 | activation=leaky
771 | 
772 | [convolutional]
773 | size=1
774 | stride=1
775 | pad=1
776 | filters=255
777 | activation=linear
778 | 
779 | 
780 | [yolo]
781 | mask = 0,1,2
782 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
783 | classes=80
784 | num=9
785 | jitter=.3
786 | ignore_thresh = .7
787 | truth_thresh = 1
788 | random=1
789 | 


--------------------------------------------------------------------------------
/convert_repyolo.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import numpy as np
  4 | 
  5 | import argparse
  6 | import json
  7 | 
  8 | from torch.utils.data import DataLoader
  9 | 
 10 | from models import *
 11 | from utils.datasets import *
 12 | from utils.utils import *
 13 | 
 14 | use_dict_old = {'stage0.rbr_dense':'module_list.1','stage0.rbr_1x1':'module_list.2',
 15 |             # ---------
 16 |             'stage1.0.rbr_dense':'module_list.5','stage1.0.rbr_1x1':'module_list.6',
 17 |             'stage1.1.rbr_dense':'module_list.9','stage1.1.rbr_1x1':'module_list.10','stage1.1.rbr_identity':'module_list.11',
 18 |             'stage1.2.rbr_dense':'module_list.13','stage1.2.rbr_1x1':'module_list.14','stage1.2.rbr_identity':'module_list.15',
 19 |             'stage1.3.rbr_dense':'module_list.17','stage1.3.rbr_1x1':'module_list.18','stage1.3.rbr_identity':'module_list.19',
 20 |             # ---------
 21 |             'stage2.0.rbr_dense':'module_list.21','stage2.0.rbr_1x1':'module_list.22',
 22 |             'stage2.1.rbr_dense':'module_list.25','stage2.1.rbr_1x1':'module_list.26','stage2.1.rbr_identity':'module_list.27',
 23 |             'stage2.2.rbr_dense':'module_list.29','stage2.2.rbr_1x1':'module_list.30','stage2.2.rbr_identity':'module_list.31',
 24 |             'stage2.3.rbr_dense':'module_list.33','stage2.3.rbr_1x1':'module_list.34','stage2.3.rbr_identity':'module_list.35',
 25 |             'stage2.4.rbr_dense':'module_list.37','stage2.4.rbr_1x1':'module_list.38','stage2.4.rbr_identity':'module_list.39',
 26 |             'stage2.5.rbr_dense':'module_list.41','stage2.5.rbr_1x1':'module_list.42','stage2.5.rbr_identity':'module_list.43',
 27 |             # ---------
 28 |             'stage3.0.rbr_dense':'module_list.45','stage3.0.rbr_1x1':'module_list.46',
 29 |             'stage3.1.rbr_dense':'module_list.49','stage3.1.rbr_1x1':'module_list.50','stage3.1.rbr_identity':'module_list.51',
 30 |             'stage3.2.rbr_dense':'module_list.53','stage3.2.rbr_1x1':'module_list.54','stage3.2.rbr_identity':'module_list.55',
 31 |             'stage3.3.rbr_dense':'module_list.57','stage3.3.rbr_1x1':'module_list.58','stage3.3.rbr_identity':'module_list.59',
 32 |             'stage3.4.rbr_dense':'module_list.61','stage3.4.rbr_1x1':'module_list.62','stage3.4.rbr_identity':'module_list.63',
 33 |             'stage3.5.rbr_dense':'module_list.65','stage3.5.rbr_1x1':'module_list.66','stage3.5.rbr_identity':'module_list.67',
 34 |             'stage3.6.rbr_dense':'module_list.69','stage3.6.rbr_1x1':'module_list.70','stage3.6.rbr_identity':'module_list.71',
 35 |             'stage3.7.rbr_dense':'module_list.73','stage3.7.rbr_1x1':'module_list.74','stage3.7.rbr_identity':'module_list.75',
 36 |             'stage3.8.rbr_dense':'module_list.77','stage3.8.rbr_1x1':'module_list.78','stage3.8.rbr_identity':'module_list.79',
 37 |             'stage3.9.rbr_dense':'module_list.81','stage3.9.rbr_1x1':'module_list.82','stage3.9.rbr_identity':'module_list.83',
 38 |             'stage3.10.rbr_dense':'module_list.85','stage3.10.rbr_1x1':'module_list.86','stage3.10.rbr_identity':'module_list.87',
 39 |             'stage3.11.rbr_dense':'module_list.89','stage3.11.rbr_1x1':'module_list.90','stage3.11.rbr_identity':'module_list.91',
 40 |             'stage3.12.rbr_dense':'module_list.93','stage3.12.rbr_1x1':'module_list.94','stage3.12.rbr_identity':'module_list.95',
 41 |             'stage3.13.rbr_dense':'module_list.97','stage3.13.rbr_1x1':'module_list.98','stage3.13.rbr_identity':'module_list.99',
 42 |             'stage3.14.rbr_dense':'module_list.101','stage3.14.rbr_1x1':'module_list.102','stage3.14.rbr_identity':'module_list.103',
 43 |             'stage3.15.rbr_dense':'module_list.105','stage3.15.rbr_1x1':'module_list.106','stage3.15.rbr_identity':'module_list.107',
 44 |             # ----------
 45 |             'stage4.0.rbr_dense':'module_list.109','stage4.0.rbr_1x1':'module_list.110'
 46 |             }
 47 | 
 48 | use_dict = {'stage0.rbr_dense': 'module_list.0', 'stage0.rbr_1x1': 'module_list.1', 
 49 |             'stage1.0.rbr_dense': 'module_list.4', 'stage1.0.rbr_1x1': 'module_list.5', 
 50 |             'stage1.1.rbr_dense': 'module_list.8', 'stage1.1.rbr_1x1': 'module_list.9', 'stage1.1.rbr_identity': 'module_list.10', 
 51 |             'stage1.2.rbr_dense': 'module_list.12', 'stage1.2.rbr_1x1': 'module_list.13', 'stage1.2.rbr_identity': 'module_list.14', 
 52 |             'stage1.3.rbr_dense': 'module_list.16', 'stage1.3.rbr_1x1': 'module_list.17', 'stage1.3.rbr_identity': 'module_list.18', 
 53 |             'stage2.0.rbr_dense': 'module_list.20', 'stage2.0.rbr_1x1': 'module_list.21', 
 54 |             'stage2.1.rbr_dense': 'module_list.24', 'stage2.1.rbr_1x1': 'module_list.25', 'stage2.1.rbr_identity': 'module_list.26', 
 55 |             'stage2.2.rbr_dense': 'module_list.28', 'stage2.2.rbr_1x1': 'module_list.29', 'stage2.2.rbr_identity': 'module_list.30', 
 56 |             'stage2.3.rbr_dense': 'module_list.32', 'stage2.3.rbr_1x1': 'module_list.33', 'stage2.3.rbr_identity': 'module_list.34', 
 57 |             'stage2.4.rbr_dense': 'module_list.36', 'stage2.4.rbr_1x1': 'module_list.37', 'stage2.4.rbr_identity': 'module_list.38', 
 58 |             'stage2.5.rbr_dense': 'module_list.40', 'stage2.5.rbr_1x1': 'module_list.41', 'stage2.5.rbr_identity': 'module_list.42', 
 59 |             'stage3.0.rbr_dense': 'module_list.44', 'stage3.0.rbr_1x1': 'module_list.45', 
 60 |             'stage3.1.rbr_dense': 'module_list.48', 'stage3.1.rbr_1x1': 'module_list.49', 'stage3.1.rbr_identity': 'module_list.50', 
 61 |             'stage3.2.rbr_dense': 'module_list.52', 'stage3.2.rbr_1x1': 'module_list.53', 'stage3.2.rbr_identity': 'module_list.54', 
 62 |             'stage3.3.rbr_dense': 'module_list.56', 'stage3.3.rbr_1x1': 'module_list.57', 'stage3.3.rbr_identity': 'module_list.58', 
 63 |             'stage3.4.rbr_dense': 'module_list.60', 'stage3.4.rbr_1x1': 'module_list.61', 'stage3.4.rbr_identity': 'module_list.62', 
 64 |             'stage3.5.rbr_dense': 'module_list.64', 'stage3.5.rbr_1x1': 'module_list.65', 'stage3.5.rbr_identity': 'module_list.66', 
 65 |             'stage3.6.rbr_dense': 'module_list.68', 'stage3.6.rbr_1x1': 'module_list.69', 'stage3.6.rbr_identity': 'module_list.70', 
 66 |             'stage3.7.rbr_dense': 'module_list.72', 'stage3.7.rbr_1x1': 'module_list.73', 'stage3.7.rbr_identity': 'module_list.74', 
 67 |             'stage3.8.rbr_dense': 'module_list.76', 'stage3.8.rbr_1x1': 'module_list.77', 'stage3.8.rbr_identity': 'module_list.78', 
 68 |             'stage3.9.rbr_dense': 'module_list.80', 'stage3.9.rbr_1x1': 'module_list.81', 'stage3.9.rbr_identity': 'module_list.82', 
 69 |             'stage3.10.rbr_dense': 'module_list.84', 'stage3.10.rbr_1x1': 'module_list.85', 'stage3.10.rbr_identity': 'module_list.86', 
 70 |             'stage3.11.rbr_dense': 'module_list.88', 'stage3.11.rbr_1x1': 'module_list.89', 'stage3.11.rbr_identity': 'module_list.90', 
 71 |             'stage3.12.rbr_dense': 'module_list.92', 'stage3.12.rbr_1x1': 'module_list.93', 'stage3.12.rbr_identity': 'module_list.94', 
 72 |             'stage3.13.rbr_dense': 'module_list.96', 'stage3.13.rbr_1x1': 'module_list.97', 'stage3.13.rbr_identity': 'module_list.98', 
 73 |             'stage3.14.rbr_dense': 'module_list.100', 'stage3.14.rbr_1x1': 'module_list.101', 'stage3.14.rbr_identity': 'module_list.102', 
 74 |             'stage3.15.rbr_dense': 'module_list.104', 'stage3.15.rbr_1x1': 'module_list.105', 'stage3.15.rbr_identity': 'module_list.106', 
 75 |             'stage4.0.rbr_dense': 'module_list.108', 'stage4.0.rbr_1x1': 'module_list.109'}
 76 | 
 77 | # for rep_name in use_dict_old:
 78 | #     # yolo_name = k.replace(rep_name,use_dict[rep_name])
 79 | #     yolo_name = use_dict_old[rep_name].split('.')[0]+'.'+str(int(use_dict_old[rep_name].split('.')[-1])-1)
 80 | #     use_dict[rep_name]=yolo_name
 81 | 
 82 | # print(use_dict)
 83 | 
 84 | 
 85 | def get_equivalent_kernel_bias2(weight):
 86 |     kernel3x3, bias3x3 = fuse_bn_tensor(weight[0:6])
 87 |     kernel1x1, bias1x1 = fuse_bn_tensor(weight[6:])
 88 |     return [kernel3x3 + pad_1x1_to_3x3_tensor(kernel1x1), bias3x3 + bias1x1]
 89 | 
 90 | def get_equivalent_kernel_bias3(weight):
 91 |     kernel3x3, bias3x3 = fuse_bn_tensor(weight[0:6])
 92 |     kernel1x1, bias1x1 = fuse_bn_tensor(weight[6:12])
 93 |     kernelid, biasid = fuse_bn_tensor_bn(weight[12:])
 94 |     return [kernel3x3 + pad_1x1_to_3x3_tensor(kernel1x1) + kernelid, bias3x3 + bias1x1 + biasid]
 95 | 
 96 | def pad_1x1_to_3x3_tensor(kernel1x1):
 97 |     if kernel1x1 is None:
 98 |         return 0
 99 |     else:
100 |         return torch.nn.functional.pad(kernel1x1, [1,1,1,1])
101 | 
102 | def fuse_bn_tensor(branch):
103 |     kernel = branch[0]
104 |     gamma = branch[1]
105 |     beta = branch[2]
106 |     running_mean = branch[3]
107 |     running_var = branch[4]
108 |     eps = 1e-05
109 |     
110 |     std = (running_var + eps).sqrt()
111 |     t = (gamma / std).reshape(-1, 1, 1, 1)
112 |     return kernel * t, beta - running_mean * gamma / std
113 | 
114 | def fuse_bn_tensor_bn(branch):
115 |     input_dim = list(branch[0].size())[0]
116 |     kernel_value = np.zeros((input_dim, input_dim, 3, 3), dtype=np.float32)
117 |     for i in range(input_dim):
118 |         kernel_value[i, i % input_dim, 1, 1] = 1
119 |     id_tensor = torch.from_numpy(kernel_value).to(branch[0].device)
120 |     kernel = id_tensor
121 |     gamma = branch[0]
122 |     beta = branch[1]
123 |     running_mean = branch[2]
124 |     running_var = branch[3]
125 |     eps = 1e-05
126 | 
127 |     std = (running_var + eps).sqrt()
128 |     t = (gamma / std).reshape(-1, 1, 1, 1)
129 |     return kernel * t, beta - running_mean * gamma / std
130 | 
131 | # def repvgg_convert():
132 | #     kernel, bias = self.get_equivalent_kernel_bias()
133 | #     return kernel.detach().cpu().numpy(), bias.detach().cpu().numpy()
134 | 
135 | def main():
136 |     device = torch_utils.select_device('2')
137 |     # cfg = 'cfg/yolov3-repvggB0-hand.cfg'
138 |     cfg = 'cfg/yolov3-repvggB1-hand.cfg'
139 |     img_size=416
140 |     # weights = 'weights_repvgg/B0/best.pt'
141 |     # weights = 'weights_repvgg/B0/last.pt'
142 |     weights = 'weights_repvgg/B1/last.pt'
143 |     model = Darknet(cfg, img_size).to(device)
144 |     ck = torch.load(weights, map_location=device)
145 |     if 'model' in ck:
146 |         model_ = ck['model']
147 |     else:
148 |         model_ = ck
149 |     # print(type(list(model_.items())[0][1]))
150 |     convert_dict={}
151 |     tmp2=[]
152 |     tmp3=[]
153 |     for k,v in model_.items():
154 |         ik = int(k.split('.')[1])
155 |         i = 2 * (int(k.split('.')[1]) // 4)
156 | 
157 |         if i==0 or i==2 or i==10 or i==22 or i==54:
158 |             tmp2.append(v)
159 |             if len(tmp2)==2*6:
160 |                 w,b = get_equivalent_kernel_bias2(tmp2)
161 |                 convert_dict['module_list.'+str(i)+'.conv.weight']=w
162 |                 convert_dict['module_list.'+str(i)+'.conv.bias']=b
163 |                 tmp2=[]
164 |         elif i<=55:
165 |             tmp3.append(v)
166 |             if len(tmp3)==3*6-1:
167 |                 w,b = get_equivalent_kernel_bias3(tmp3)
168 |                 convert_dict['module_list.'+str(i)+'.conv.weight']=w
169 |                 convert_dict['module_list.'+str(i)+'.conv.bias']=b
170 |                 tmp3=[]
171 |         elif i>55:
172 |             convert_dict[k.replace(str(ik),str(ik-56))]=v
173 | 
174 |     # torch.save(convert_dict,'repB0_convert_last.pt')
175 |     torch.save(convert_dict,'repB1_convert_last.pt')
176 | 
177 | main()


--------------------------------------------------------------------------------
/data/converter.py:
--------------------------------------------------------------------------------
  1 | import scipy.io as sio
  2 | from PIL import Image
  3 | import os, glob
  4 | import datetime
  5 | import shutil
  6 | 
  7 | running_from_path = os.getcwd()
  8 | created_images_dir = 'images'
  9 | created_labels_dir = 'labels'
 10 | data_dir = 'data'   # data_dir为脚本所在的文件夹
 11 | 
 12 | def hms_string(sec_elapsed):    # 格式化显示已消耗时间
 13 |     h = int(sec_elapsed / (60 * 60))
 14 |     m = int((sec_elapsed % (60 * 60)) / 60)
 15 |     s = sec_elapsed % 60.
 16 |     return "{}:{:>02}:{:>05.2f}".format(h, m, s)
 17 | 
 18 | def generate_dir(set_name, root_path):   # 往images和labels文件夹下生成相应的文件夹
 19 |     images_dir = os.path.join(root_path, 'images')
 20 |     annotation_dir = os.path.join(root_path, 'annotations')
 21 | 
 22 |     new_images_dir = os.path.join(created_images_dir, set_name)   # 将图片从原来的文件夹复制到该文件夹下
 23 |     new_annotation_dir = os.path.join(created_labels_dir, set_name)
 24 | 
 25 |     if not os.path.exists(new_images_dir):
 26 |         os.makedirs(new_images_dir)
 27 | 
 28 |     if not os.path.exists(new_annotation_dir):
 29 |         os.makedirs(new_annotation_dir)
 30 | 
 31 |     for img in glob.glob(os.path.join(images_dir, "*.jpg")):    # 将图片从原来的文件夹复制到新文件夹下
 32 |         shutil.copy(img, new_images_dir)
 33 | 
 34 |     os.chdir(annotation_dir)        # 切换到annotation的路径下
 35 |     matlab_annotations = glob.glob("*.mat")  # 仅仅包含文件名，不包含路径
 36 |     os.chdir(running_from_path)     # 切换回原来的路径
 37 | 
 38 |     for matfile in matlab_annotations:
 39 |         filename = matfile.split(".")[0]
 40 | 
 41 |         pil_image = Image.open(os.path.join(images_dir, filename+".jpg"))
 42 | 
 43 |         content = sio.loadmat(os.path.join(annotation_dir, matfile), matlab_compatible=False)
 44 | 
 45 |         boxes = content["boxes"]
 46 | 
 47 |         width, height = pil_image.size
 48 | 
 49 |         with open(os.path.join(new_annotation_dir, filename+".txt"), "w") as hs:
 50 |             for box_idx, box in enumerate(boxes.T):
 51 |                 a = box[0][0][0][0]
 52 |                 b = box[0][0][0][1]
 53 |                 c = box[0][0][0][2]
 54 |                 d = box[0][0][0][3]
 55 | 
 56 |                 aXY = (a[0][1], a[0][0])
 57 |                 bXY = (b[0][1], b[0][0])
 58 |                 cXY = (c[0][1], c[0][0])
 59 |                 dXY = (d[0][1], d[0][0])
 60 | 
 61 |                 maxX = max(aXY[0], bXY[0], cXY[0], dXY[0])
 62 |                 minX = min(aXY[0], bXY[0], cXY[0], dXY[0])
 63 |                 maxY = max(aXY[1], bXY[1], cXY[1], dXY[1])
 64 |                 minY = min(aXY[1], bXY[1], cXY[1], dXY[1])
 65 | 
 66 |                 # clip,防止超出边界
 67 |                 maxX = min(maxX, width-1)
 68 |                 minX = max(minX, 0)
 69 |                 maxY = min(maxY, height-1)
 70 |                 minY = max(minY, 0)
 71 | 
 72 |                 # (<absolute_x> / <image_width>)
 73 |                 norm_width = (maxX - minX) / width
 74 | 
 75 |                 # (<absolute_y> / <image_height>)
 76 |                 norm_height = (maxY - minY) / height
 77 | 
 78 |                 center_x, center_y = (maxX + minX) / 2, (maxY + minY) / 2
 79 | 
 80 |                 norm_center_x = center_x / width
 81 |                 norm_center_y = center_y / height
 82 | 
 83 |                 if box_idx != 0:
 84 |                     hs.write("\n")
 85 | 
 86 |                 hs.write("0 %f %f %f %f" % (norm_center_x, norm_center_y, norm_width, norm_height)) # 0表示类别
 87 | 
 88 | def create_txt(dirlist, filename):
 89 |     with open(filename, "w") as txtfile:   # 在data文件夹下生成txt文件
 90 |         imglist = []
 91 | 
 92 |         for dir in dirlist:     # dir='images/test'
 93 |             imglist.extend(glob.glob(os.path.join(dir, "*.jpg")))   # img='images/test/abc.jpg'
 94 | 
 95 |         for idx, img in enumerate(imglist):
 96 |             if idx != 0:
 97 |                 txtfile.write("\n")
 98 |             txtfile.write(os.path.join(data_dir, img))    # 加上前缀data
 99 | 
100 | if __name__ == '__main__':
101 |     start_time = datetime.datetime.now()
102 | 
103 |     generate_dir("train", "hand_dataset/training_dataset/training_data")    # 第一个参数表示生成的文件夹的名称
104 |     generate_dir("test", "hand_dataset/test_dataset/test_data")
105 |     generate_dir("validation", "hand_dataset/validation_dataset/validation_data")
106 | 
107 |     create_txt((os.path.join(created_images_dir, 'train'),          # 将train和validation文件夹下的图片合并成train
108 |                 os.path.join(created_images_dir, 'validation')),
109 |                'train.txt')
110 |     create_txt((os.path.join(created_images_dir, 'test'), ),
111 |                'valid.txt')
112 | 
113 |     end_time = datetime.datetime.now()
114 |     seconds_elapsed = (end_time - start_time).total_seconds()
115 |     print("It took {} to execute this".format(hms_string(seconds_elapsed)))


--------------------------------------------------------------------------------
/data/oxfordhand.data:
--------------------------------------------------------------------------------
1 | classes= 1
2 | train=data/train.txt
3 | valid=data/valid.txt
4 | names=data/oxfordhand.names
5 | 


--------------------------------------------------------------------------------
/data/oxfordhand.names:
--------------------------------------------------------------------------------
1 | hand
2 | 
3 | 


--------------------------------------------------------------------------------
/detect.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | from sys import platform
  3 | 
  4 | from models import *  # set ONNX_EXPORT in models.py
  5 | from utils.datasets import *
  6 | from utils.utils import *
  7 | 
  8 | 
  9 | def detect(save_txt=False, save_img=False):
 10 |     img_size = (320, 192) if ONNX_EXPORT else opt.img_size  # (320, 192) or (416, 256) or (608, 352) for (height, width)
 11 |     out, source, weights, half, view_img = opt.output, opt.source, opt.weights, opt.half, opt.view_img
 12 |     webcam = source == '0' or source.startswith('rtsp') or source.startswith('http') or source.endswith('.txt')
 13 | 
 14 |     # Initialize
 15 |     device = torch_utils.select_device(device='cpu' if ONNX_EXPORT else opt.device)
 16 |     if os.path.exists(out):
 17 |         shutil.rmtree(out)  # delete output folder
 18 |     os.makedirs(out)  # make new output folder
 19 | 
 20 |     # Initialize model
 21 |     model = Darknet(opt.cfg, img_size)
 22 | 
 23 |     # Load weights
 24 |     attempt_download(weights)
 25 |     if weights.endswith('.pt'):  # pytorch format
 26 |         model.load_state_dict(torch.load(weights, map_location=device)['model'])
 27 |     else:  # darknet format
 28 |         _ = load_darknet_weights(model, weights)
 29 | 
 30 |     # Second-stage classifier
 31 |     classify = False
 32 |     if classify:
 33 |         modelc = torch_utils.load_classifier(name='resnet101', n=2)  # initialize
 34 |         modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model'])  # load weights
 35 |         modelc.to(device).eval()
 36 | 
 37 |     # Fuse Conv2d + BatchNorm2d layers
 38 |     # model.fuse()
 39 | 
 40 |     # Eval mode
 41 |     model.to(device).eval()
 42 | 
 43 |     # Export mode
 44 |     if ONNX_EXPORT:
 45 |         img = torch.zeros((1, 3) + img_size)  # (1, 3, 320, 192)
 46 |         torch.onnx.export(model, img, 'weights/export.onnx', verbose=True)
 47 |         return
 48 | 
 49 |     # Half precision
 50 |     half = half and device.type != 'cpu'  # half precision only supported on CUDA
 51 |     if half:
 52 |         model.half()
 53 | 
 54 |     # Set Dataloader
 55 |     vid_path, vid_writer = None, None
 56 |     if webcam:
 57 |         view_img = True
 58 |         torch.backends.cudnn.benchmark = True  # set True to speed up constant image size inference
 59 |         dataset = LoadStreams(source, img_size=img_size, half=half)
 60 |     else:
 61 |         save_img = True
 62 |         dataset = LoadImages(source, img_size=img_size, half=half)
 63 | 
 64 |     # Get classes and colors
 65 |     classes = load_classes(parse_data_cfg(opt.data)['names'])
 66 |     colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(classes))]
 67 | 
 68 |     # Run inference
 69 |     t0 = time.time()
 70 |     for path, img, im0s, vid_cap in dataset:
 71 |         t = time.time()
 72 | 
 73 |         # Get detections
 74 |         img = torch.from_numpy(img).to(device)
 75 |         if img.ndimension() == 3:
 76 |             img = img.unsqueeze(0)
 77 |         pred = model(img)[0]
 78 | 
 79 |         if opt.half:
 80 |             pred = pred.float()
 81 | 
 82 |         # Apply NMS
 83 |         pred = non_max_suppression(pred, opt.conf_thres, opt.nms_thres)
 84 | 
 85 |         # Apply
 86 |         if classify:
 87 |             pred = apply_classifier(pred, modelc, img, im0s)
 88 | 
 89 |         # Process detections
 90 |         for i, det in enumerate(pred):  # detections per image
 91 |             if webcam:  # batch_size >= 1
 92 |                 p, s, im0 = path[i], '%g: ' % i, im0s[i]
 93 |             else:
 94 |                 p, s, im0 = path, '', im0s
 95 | 
 96 |             save_path = str(Path(out) / Path(p).name)
 97 |             s += '%gx%g ' % img.shape[2:]  # print string
 98 |             if det is not None and len(det):
 99 |                 # Rescale boxes from img_size to im0 size
100 |                 det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
101 | 
102 |                 # Print results
103 |                 for c in det[:, -1].unique():
104 |                     n = (det[:, -1] == c).sum()  # detections per class
105 |                     s += '%g %ss, ' % (n, classes[int(c)])  # add to string
106 | 
107 |                 # Write results
108 |                 for *xyxy, conf, _, cls in det:
109 |                     if save_txt:  # Write to file
110 |                         with open(save_path + '.txt', 'a') as file:
111 |                             file.write(('%g ' * 6 + '\n') % (*xyxy, cls, conf))
112 | 
113 |                     if save_img or view_img:  # Add bbox to image
114 |                         label = '%s %.2f' % (classes[int(cls)], conf)
115 |                         #plot_one_box(xyxy, im0, label=label, color=colors[int(cls)])
116 |                         plot_one_box(xyxy, im0, label=None, color=colors[int(cls)])
117 | 
118 |             print('%sDone. (%.3fs)' % (s, time.time() - t))
119 | 
120 |             # Stream results
121 |             if view_img:
122 |                 cv2.imshow(p, im0)
123 | 
124 |             # Save results (image with detections)
125 |             if save_img:
126 |                 if dataset.mode == 'images':
127 |                     cv2.imwrite(save_path, im0)
128 |                 else:
129 |                     if vid_path != save_path:  # new video
130 |                         vid_path = save_path
131 |                         if isinstance(vid_writer, cv2.VideoWriter):
132 |                             vid_writer.release()  # release previous video writer
133 | 
134 |                         fps = vid_cap.get(cv2.CAP_PROP_FPS)
135 |                         w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
136 |                         h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
137 |                         vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h))
138 |                     vid_writer.write(im0)
139 | 
140 |     if save_txt or save_img:
141 |         print('Results saved to %s' % os.getcwd() + os.sep + out)
142 |         if platform == 'darwin':  # MacOS
143 |             os.system('open ' + out + ' ' + save_path)
144 | 
145 |     print('Done. (%.3fs)' % (time.time() - t0))
146 | 
147 | 
148 | if __name__ == '__main__':
149 |     parser = argparse.ArgumentParser()
150 |     parser.add_argument('--cfg', type=str, default='cfg/yolov3-spp.cfg', help='cfg file path')
151 |     parser.add_argument('--data', type=str, default='data/coco.data', help='coco.data file path')
152 |     parser.add_argument('--weights', type=str, default='weights/yolov3-spp.weights', help='path to weights file')
153 |     parser.add_argument('--source', type=str, default='data/samples', help='source')  # input file/folder, 0 for webcam
154 |     parser.add_argument('--output', type=str, default='output', help='output folder')  # output folder
155 |     parser.add_argument('--img-size', type=int, default=416, help='inference size (pixels)')
156 |     parser.add_argument('--conf-thres', type=float, default=0.3, help='object confidence threshold')
157 |     parser.add_argument('--nms-thres', type=float, default=0.5, help='iou threshold for non-maximum suppression')
158 |     parser.add_argument('--fourcc', type=str, default='mp4v', help='output video codec (verify ffmpeg support)')
159 |     parser.add_argument('--half', action='store_true', help='half precision FP16 inference')
160 |     parser.add_argument('--device', default='', help='device id (i.e. 0 or 0,1) or cpu')
161 |     parser.add_argument('--view-img', action='store_true', help='display results')
162 |     opt = parser.parse_args()
163 |     print(opt)
164 | 
165 |     with torch.no_grad():
166 |         detect()
167 | 


--------------------------------------------------------------------------------
/models.py:
--------------------------------------------------------------------------------
  1 | import torch.nn.functional as F
  2 | 
  3 | from utils.google_utils import *
  4 | from utils.parse_config import *
  5 | from utils.utils import *
  6 | import copy
  7 | import os
  8 | ONNX_EXPORT = False
  9 | 
 10 | from quant_dorefa import QuanConv as Conv_q
 11 | 
 12 | 
 13 | #权重量化为W_bit位
 14 | W_bit=16
 15 | #激活量化为A_bit位
 16 | A_bit=16
 17 | 
 18 | def conv_bn(in_channels, out_channels, kernel_size, stride, padding, groups=1):
 19 |     result = nn.Sequential()
 20 |     result.add_module('conv', nn.Conv2d(in_channels=in_channels, out_channels=out_channels,
 21 |                                                   kernel_size=kernel_size, stride=stride, padding=padding, groups=groups, bias=False))
 22 |     result.add_module('bn', nn.BatchNorm2d(num_features=out_channels))
 23 |     return result
 24 | 
 25 | def create_modules(module_defs, img_size, arc, deploy):
 26 |     # Constructs module list of layer blocks from module configuration in module_defs
 27 | 
 28 |     hyperparams = module_defs.pop(0)
 29 |     output_filters = [int(hyperparams['channels'])]
 30 |     module_list = nn.ModuleList()
 31 |     routs = []  # list of layers which rout to deeper layes
 32 |     yolo_index = -1
 33 | 
 34 |     for i, mdef in enumerate(module_defs):
 35 |         modules = nn.Sequential()
 36 | 
 37 |         if mdef['type'] == 'quantize_convolutional':
 38 |             bn = int(mdef['batch_normalize'])
 39 |             filters = int(mdef['filters'])
 40 |             kernel_size = int(mdef['size'])
 41 |             pad = (kernel_size - 1) // 2 if int(mdef['pad']) else 0
 42 |             modules.add_module('Conv2d', Conv_q(in_channels=output_filters[-1],
 43 |                                                    out_channels=filters,
 44 |                                                    kernel_size=kernel_size,
 45 |                                                    stride=int(mdef['stride']),
 46 |                                                    padding=pad,
 47 |                                                    bias=not bn,
 48 |                                                    nbit_w=W_bit,
 49 |                                                    nbit_a=A_bit))
 50 | 
 51 |             
 52 |             if bn:
 53 |                 modules.add_module('BatchNorm2d', nn.BatchNorm2d(filters, momentum=0.1))
 54 |             if mdef['activation'] == 'leaky':  # TODO: activation study https://github.com/ultralytics/yolov3/issues/441
 55 |                 modules.add_module('activation', nn.LeakyReLU(0.1, inplace=True))
 56 |                 # modules.add_module('activation', nn.PReLU(num_parameters=1, init=0.10))
 57 |                 # modules.add_module('activation', Swish())
 58 |         
 59 |         elif mdef['type'] == 'convolutional':
 60 |             bn = int(mdef['batch_normalize'])
 61 |             filters = int(mdef['filters'])
 62 |             kernel_size = int(mdef['size'])
 63 |             pad = (kernel_size - 1) // 2 if int(mdef['pad']) else 0
 64 |             modules.add_module('Conv2d', nn.Conv2d(in_channels=output_filters[-1],
 65 |                                                    out_channels=filters,
 66 |                                                    kernel_size=kernel_size,
 67 |                                                    stride=int(mdef['stride']),
 68 |                                                    padding=pad,
 69 |                                                    bias=not bn))
 70 | 
 71 |             
 72 |             if bn:
 73 |                 modules.add_module('BatchNorm2d', nn.BatchNorm2d(filters, momentum=0.1))
 74 |             if mdef['activation'] == 'leaky':  # TODO: activation study https://github.com/ultralytics/yolov3/issues/441
 75 |                 modules.add_module('activation', nn.LeakyReLU(0.1, inplace=True))
 76 | 
 77 |         elif mdef['type'] == 'RepvggBlock':
 78 |             if deploy==False:
 79 |                 modules_repvgg = nn.ModuleList()
 80 |                 bn = int(mdef['batch_normalize'])
 81 |                 filters = int(mdef['filters'])
 82 |                 kernel_size = int(mdef['size'])
 83 |                 stride=int(mdef['stride'])
 84 |                 pad = int(mdef['pad'])
 85 |                 dense = conv_bn(in_channels=output_filters[-1],out_channels=filters,kernel_size=kernel_size,stride=stride,padding=pad)
 86 |                 identy = nn.BatchNorm2d(num_features=filters) if output_filters[-1]==filters and stride==1 else None
 87 |                 conv1x1 = conv_bn(in_channels=output_filters[-1],out_channels=filters,kernel_size=1,stride=stride,padding=0)
 88 |                 modules_repvgg.append(dense)
 89 |                 modules_repvgg.append(conv1x1)
 90 |                 modules_repvgg.append(identy)
 91 |                 if mdef['activation'] == 'relu':
 92 |                     modules_repvgg.append(nn.ReLU())
 93 |                 module_list.extend(modules_repvgg)
 94 |                 output_filters.append(filters)
 95 |                 continue
 96 |             else:
 97 |                 bn = int(mdef['batch_normalize'])
 98 |                 filters = int(mdef['filters'])
 99 |                 kernel_size = int(mdef['size'])
100 |                 stride=int(mdef['stride'])
101 |                 pad = int(mdef['pad'])
102 |                 modules.add_module('conv', nn.Conv2d(in_channels=output_filters[-1],
103 |                                                     out_channels=filters,
104 |                                                     kernel_size=kernel_size,
105 |                                                     stride=stride,
106 |                                                     padding=pad,
107 |                                                     bias=True))
108 |                 module_list.append(modules)
109 |                 if mdef['activation'] == 'relu':  # TODO: activation study https://github.com/ultralytics/yolov3/issues/441
110 |                     module_list.extend(nn.ModuleList().append(nn.ReLU()))
111 |                 output_filters.append(filters)
112 |                 continue
113 | 
114 |         elif mdef['type'] == 'maxpool':
115 |             kernel_size = int(mdef['size'])
116 |             stride = int(mdef['stride'])
117 |             maxpool = nn.MaxPool2d(kernel_size=kernel_size, stride=stride, padding=int((kernel_size - 1) // 2))
118 |             if kernel_size == 2 and stride == 1:  # yolov3-tiny
119 |                 modules.add_module('ZeroPad2d', nn.ZeroPad2d((0, 1, 0, 1)))
120 |                 modules.add_module('MaxPool2d', maxpool)
121 |             else:
122 |                 modules = maxpool
123 | 
124 |         elif mdef['type'] == 'upsample':
125 |             modules = nn.Upsample(scale_factor=int(mdef['stride']), mode='nearest')
126 | 
127 |         elif mdef['type'] == 'route':  # nn.Sequential() placeholder for 'route' layer
128 |             layers = [int(x) for x in mdef['layers'].split(',')]
129 |             filters = sum([output_filters[i + 1 if i > 0 else i] for i in layers])
130 |             routs.extend([l if l > 0 else l + i for l in layers])
131 |             # if mdef[i+1]['type'] == 'reorg3d':
132 |             #     modules = nn.Upsample(scale_factor=1/float(mdef[i+1]['stride']), mode='nearest')  # reorg3d
133 | 
134 |         elif mdef['type'] == 'shortcut':  # nn.Sequential() placeholder for 'shortcut' layer
135 |             filters = output_filters[int(mdef['from'])]
136 |             layer = int(mdef['from'])
137 |             routs.extend([i + layer if layer < 0 else layer])
138 | 
139 |         elif mdef['type'] == 'reorg3d':  # yolov3-spp-pan-scale
140 |             # torch.Size([16, 128, 104, 104])
141 |             # torch.Size([16, 64, 208, 208]) <-- # stride 2 interpolate dimensions 2 and 3 to cat with prior layer
142 |             pass
143 | 
144 |         elif mdef['type'] == 'yolo':
145 |             yolo_index += 1
146 |             mask = [int(x) for x in mdef['mask'].split(',')]  # anchor mask
147 |             modules = YOLOLayer(anchors=mdef['anchors'][mask],  # anchor list
148 |                                 nc=int(mdef['classes']),  # number of classes
149 |                                 img_size=img_size,  # (416, 416)
150 |                                 yolo_index=yolo_index,  # 0, 1 or 2
151 |                                 arc=arc)  # yolo architecture
152 | 
153 |             # Initialize preceding Conv2d() bias (https://arxiv.org/pdf/1708.02002.pdf section 3.3)
154 |             try:
155 |                 if arc == 'defaultpw' or arc == 'Fdefaultpw':  # default with positive weights
156 |                     b = [-4, -3.6]  # obj, cls
157 |                 elif arc == 'default':  # default no pw (40 cls, 80 obj)
158 |                     b = [-5.5, -4.0]
159 |                 elif arc == 'uBCE':  # unified BCE (80 classes)
160 |                     b = [0, -8.5]
161 |                 elif arc == 'uCE':  # unified CE (1 background + 80 classes)
162 |                     b = [10, -0.1]
163 |                 elif arc == 'Fdefault':  # Focal default no pw (28 cls, 21 obj, no pw)
164 |                     b = [-2.1, -1.8]
165 |                 elif arc == 'uFBCE' or arc == 'uFBCEpw':  # unified FocalBCE (5120 obj, 80 classes)
166 |                     b = [0, -6.5]
167 |                 elif arc == 'uFCE':  # unified FocalCE (64 cls, 1 background + 80 classes)
168 |                     b = [7.7, -1.1]
169 | 
170 |                 bias = module_list[-1][0].bias.view(len(mask), -1)  # 255 to 3x85
171 |                 bias[:, 4] += b[0] - bias[:, 4].mean()  # obj
172 |                 bias[:, 5:] += b[1] - bias[:, 5:].mean()  # cls
173 |                 # bias = torch.load('weights/yolov3-spp.bias.pt')[yolo_index]  # list of tensors [3x85, 3x85, 3x85]
174 |                 module_list[-1][0].bias = torch.nn.Parameter(bias.view(-1))
175 |                 # utils.print_model_biases(model)
176 |             except:
177 |                 print('WARNING: smart bias initialization failure.')
178 | 
179 |         else:
180 |             print('Warning: Unrecognized Layer Type: ' + mdef['type'])
181 | 
182 |         # Register module list and number of output filters
183 |         module_list.append(modules)
184 |         output_filters.append(filters)
185 | 
186 |     return module_list, routs
187 | 
188 | 
189 | class Swish(nn.Module):
190 |     def __init__(self):
191 |         super(Swish, self).__init__()
192 | 
193 |     def forward(self, x):
194 |         return x * torch.sigmoid(x)
195 | 
196 | 
197 | class YOLOLayer(nn.Module):
198 |     def __init__(self, anchors, nc, img_size, yolo_index, arc):
199 |         super(YOLOLayer, self).__init__()
200 | 
201 |         self.anchors = torch.Tensor(anchors)
202 |         self.na = len(anchors)  # number of anchors (3)
203 |         self.nc = nc  # number of classes (80)
204 |         self.nx = 0  # initialize number of x gridpoints
205 |         self.ny = 0  # initialize number of y gridpoints
206 |         self.arc = arc
207 | 
208 |         if ONNX_EXPORT:  # grids must be computed in __init__
209 |             stride = [32, 16, 8][yolo_index]  # stride of this layer
210 |             nx = int(img_size[1] / stride)  # number x grid points
211 |             ny = int(img_size[0] / stride)  # number y grid points
212 |             create_grids(self, img_size, (nx, ny))
213 | 
214 |     def forward(self, p, img_size, var=None):
215 |         if ONNX_EXPORT:
216 |             bs = 1  # batch size
217 |         else:
218 |             bs, ny, nx = p.shape[0], p.shape[-2], p.shape[-1]
219 |             if (self.nx, self.ny) != (nx, ny):
220 |                 create_grids(self, img_size, (nx, ny), p.device, p.dtype)
221 | 
222 |         # p.view(bs, 255, 13, 13) -- > (bs, 3, 13, 13, 85)  # (bs, anchors, grid, grid, classes + xywh)
223 |         p = p.view(bs, self.na, self.nc + 5, self.ny, self.nx).permute(0, 1, 3, 4, 2).contiguous()  # prediction
224 | 
225 |         if self.training:
226 |             return p
227 | 
228 |         elif ONNX_EXPORT:
229 |             # Constants CAN NOT BE BROADCAST, ensure correct shape!
230 |             ngu = self.ng.repeat((1, self.na * self.nx * self.ny, 1))
231 |             grid_xy = self.grid_xy.repeat((1, self.na, 1, 1, 1)).view((1, -1, 2))
232 |             anchor_wh = self.anchor_wh.repeat((1, 1, self.nx, self.ny, 1)).view((1, -1, 2)) / ngu
233 | 
234 |             p = p.view(-1, 5 + self.nc)
235 |             xy = torch.sigmoid(p[..., 0:2]) + grid_xy[0]  # x, y
236 |             wh = torch.exp(p[..., 2:4]) * anchor_wh[0]  # width, height
237 |             p_conf = torch.sigmoid(p[:, 4:5])  # Conf
238 |             p_cls = F.softmax(p[:, 5:85], 1) * p_conf  # SSD-like conf
239 |             return torch.cat((xy / ngu[0], wh, p_conf, p_cls), 1).t()
240 | 
241 |             # p = p.view(1, -1, 5 + self.nc)
242 |             # xy = torch.sigmoid(p[..., 0:2]) + grid_xy  # x, y
243 |             # wh = torch.exp(p[..., 2:4]) * anchor_wh  # width, height
244 |             # p_conf = torch.sigmoid(p[..., 4:5])  # Conf
245 |             # p_cls = p[..., 5:5 + self.nc]
246 |             # # Broadcasting only supported on first dimension in CoreML. See onnx-coreml/_operators.py
247 |             # # p_cls = F.softmax(p_cls, 2) * p_conf  # SSD-like conf
248 |             # p_cls = torch.exp(p_cls).permute((2, 1, 0))
249 |             # p_cls = p_cls / p_cls.sum(0).unsqueeze(0) * p_conf.permute((2, 1, 0))  # F.softmax() equivalent
250 |             # p_cls = p_cls.permute(2, 1, 0)
251 |             # return torch.cat((xy / ngu, wh, p_conf, p_cls), 2).squeeze().t()
252 | 
253 |         else:  # inference
254 |             # s = 1.5  # scale_xy  (pxy = pxy * s - (s - 1) / 2)
255 |             io = p.clone()  # inference output
256 |             io[..., 0:2] = torch.sigmoid(io[..., 0:2]) + self.grid_xy  # xy
257 |             io[..., 2:4] = torch.exp(io[..., 2:4]) * self.anchor_wh  # wh yolo method
258 |             # io[..., 2:4] = ((torch.sigmoid(io[..., 2:4]) * 2) ** 3) * self.anchor_wh  # wh power method
259 |             io[..., :4] *= self.stride
260 | 
261 |             if 'default' in self.arc:  # seperate obj and cls
262 |                 torch.sigmoid_(io[..., 4:])
263 |             elif 'BCE' in self.arc:  # unified BCE (80 classes)
264 |                 torch.sigmoid_(io[..., 5:])
265 |                 io[..., 4] = 1
266 |             elif 'CE' in self.arc:  # unified CE (1 background + 80 classes)
267 |                 io[..., 4:] = F.softmax(io[..., 4:], dim=4)
268 |                 io[..., 4] = 1
269 | 
270 |             if self.nc == 1:
271 |                 io[..., 5] = 1  # single-class model https://github.com/ultralytics/yolov3/issues/235
272 | 
273 |             # reshape from [1, 3, 13, 13, 85] to [1, 507, 85]
274 |             return io.view(bs, -1, 5 + self.nc), p
275 | 
276 | 
277 | class Darknet(nn.Module):
278 |     # YOLOv3 object detection model
279 | 
280 |     def __init__(self, cfg, img_size=(416, 416), arc='default', deploy=False):
281 |         #我的添加
282 |         super(Darknet, self).__init__()
283 |         if isinstance(cfg, str):
284 |             self.module_defs = parse_model_cfg(cfg)
285 |         elif isinstance(cfg, list):
286 |             self.module_defs = cfg
287 | 
288 |         self.hyperparams=copy.deepcopy(self.module_defs[0])
289 |         self.deploy = deploy
290 |         
291 |         self.module_list, self.routs = create_modules(self.module_defs, img_size, arc, deploy=self.deploy)
292 |         # print(self.module_list)
293 |         # print(self.routs)
294 |         self.yolo_layers = get_yolo_layers(self)
295 |         
296 |         # Darknet Header https://github.com/AlexeyAB/darknet/issues/2914#issuecomment-496675346
297 |         self.version = np.array([0, 2, 5], dtype=np.int32)  # (int32) version info: major, minor, revision
298 |         self.seen = np.array([0], dtype=np.int64)  # (int64) number of images seen during training
299 | 
300 |     def forward(self, x, var=None):
301 |         img_size = x.shape[-2:]
302 |         layer_outputs = []
303 |         output = []
304 | 
305 |         # print(self.module_defs)
306 |         # print(self.module_list)
307 |         # for i, (mdef, module) in enumerate(zip(self.module_defs, self.module_list)):
308 |         i = 0
309 |         for mdef in self.module_defs:
310 |             # module = self.module_list[i]
311 |             mtype = mdef['type']
312 |             if mtype in ['convolutional', 'quantize_convolutional','upsample', 'maxpool']:
313 |                 module = self.module_list[i]
314 |                 # print(module)
315 |                 x = module(x)
316 |             elif mtype == 'RepvggBlock':
317 |                 if self.deploy==False:
318 |                     # print(i)
319 |                     module=[self.module_list[i],self.module_list[i+1],self.module_list[i+2],self.module_list[i+3]]
320 |                     if module[2] is None:
321 |                         id_out=0
322 |                     else:
323 |                         id_out=module[2](x)
324 |                     x = module[3](module[0](x) + module[1](x) + id_out)
325 |                     # layer_outputs.append(x if int((i-1)/4+1) in self.routs else [])       ###
326 |                     layer_outputs.append(x if int(i/4) in self.routs else [])
327 |                     i = i + 4
328 |                     continue
329 |                 else:
330 |                     module = self.module_list[i]
331 |                     x = module(x)
332 |                     module = self.module_list[i+1]
333 |                     x = module(x)
334 |                     layer_outputs.append(x if int(i/2) in self.routs else [])
335 |                     i = i + 2
336 |                     continue
337 |             elif mtype == 'route':
338 |                 module = self.module_list[i]
339 |                 # print(module)
340 |                 # print(i)
341 |                 layers = [int(x) for x in mdef['layers'].split(',')]
342 |                 if len(layers) == 1:
343 |                     x = layer_outputs[layers[0]]
344 |                 else:
345 |                     try:
346 |                         x = torch.cat([layer_outputs[i] for i in layers], 1)
347 |                     except:  # apply stride 2 for darknet reorg layer
348 |                         layer_outputs[layers[1]] = F.interpolate(layer_outputs[layers[1]], scale_factor=[0.5, 0.5])
349 |                         x = torch.cat([layer_outputs[i] for i in layers], 1)
350 |                     # print(''), [print(layer_outputs[i].shape) for i in layers], print(x.shape)
351 |             elif mtype == 'shortcut':
352 |                 module = self.module_list[i]
353 |                 x = x + layer_outputs[int(mdef['from'])]
354 |             elif mtype == 'yolo':
355 |                 module = self.module_list[i]
356 |                 x = module(x, img_size)
357 |                 output.append(x)
358 |             
359 |             if self.deploy==False:
360 |                 layer_outputs.append(x if i+1-113+28 in self.routs else [])       ###
361 |             else:
362 |                 layer_outputs.append(x if i-28 in self.routs else [])
363 |             i = i + 1
364 | 
365 |         if self.training:
366 |             return output
367 |         elif ONNX_EXPORT:
368 |             output = torch.cat(output, 1)  # cat 3 layers 85 x (507, 2028, 8112) to 85 x 10647
369 |             nc = self.module_list[self.yolo_layers[0]].nc  # number of classes
370 |             return output[5:5 + nc].t(), output[:4].t()  # ONNX scores, boxes
371 |         else:
372 |             io, p = list(zip(*output))  # inference output, training output
373 |             return torch.cat(io, 1), p
374 | 
375 |     def fuse(self):
376 |         # Fuse Conv2d + BatchNorm2d layers throughout model
377 |         fused_list = nn.ModuleList()
378 |         for a in list(self.children())[0]:
379 |             if isinstance(a, nn.Sequential):
380 |                 for i, b in enumerate(a):
381 |                     if isinstance(b, nn.modules.batchnorm.BatchNorm2d):
382 |                         # fuse this bn layer with the previous conv2d layer
383 |                         conv = a[i - 1]
384 |                         fused = torch_utils.fuse_conv_and_bn(conv, b)
385 |                         a = nn.Sequential(fused, *list(a.children())[i + 1:])
386 |                         break
387 |             fused_list.append(a)
388 |         self.module_list = fused_list
389 |         # model_info(self)  # yolov3-spp reduced from 225 to 152 layers
390 | 
391 | 
392 | def get_yolo_layers(model):
393 |     return [i for i, x in enumerate(model.module_defs) if x['type'] == 'yolo']  # [82, 94, 106] for yolov3
394 | 
395 | 
396 | def create_grids(self, img_size=416, ng=(13, 13), device='cpu', type=torch.float32):
397 |     nx, ny = ng  # x and y grid size
398 |     self.img_size = max(img_size)
399 |     self.stride = self.img_size / max(ng)
400 | 
401 |     # build xy offsets
402 |     yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
403 |     self.grid_xy = torch.stack((xv, yv), 2).to(device).type(type).view((1, 1, ny, nx, 2))
404 | 
405 |     # build wh gains
406 |     self.anchor_vec = self.anchors.to(device) / self.stride
407 |     self.anchor_wh = self.anchor_vec.view(1, self.na, 1, 1, 2).to(device).type(type)
408 |     self.ng = torch.Tensor(ng).to(device)
409 |     self.nx = nx
410 |     self.ny = ny
411 | 
412 | 
413 | def load_darknet_weights(self, weights, cutoff=-1):
414 |     # Parses and loads the weights stored in 'weights'
415 | 
416 |     # Establish cutoffs (load layers between 0 and cutoff. if cutoff = -1 all are loaded)
417 |     file = Path(weights).name
418 |     if file == 'darknet53.conv.74':
419 |         cutoff = 75
420 |     elif file == 'yolov3-tiny.conv.15':
421 |         cutoff = 15
422 | 
423 |     # Read weights file
424 |     with open(weights, 'rb') as f:
425 |         # Read Header https://github.com/AlexeyAB/darknet/issues/2914#issuecomment-496675346
426 |         self.version = np.fromfile(f, dtype=np.int32, count=3)  # (int32) version info: major, minor, revision
427 |         self.seen = np.fromfile(f, dtype=np.int64, count=1)  # (int64) number of images seen during training
428 | 
429 |         weights = np.fromfile(f, dtype=np.float32)  # The rest are weights
430 | 
431 |     ptr = 0
432 |     for i, (mdef, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])):
433 |         if mdef['type'] == 'convolutional' or mdef['type'] == 'quantize_convolutional':
434 |             conv_layer = module[0]
435 |             if mdef['batch_normalize']=='1':
436 |                 # Load BN bias, weights, running mean and running variance
437 |                 bn_layer = module[1]
438 |                 num_b = bn_layer.bias.numel()  # Number of biases
439 |                 # Bias
440 |                 bn_b = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.bias)
441 |                 bn_layer.bias.data.copy_(bn_b)
442 |                 ptr += num_b
443 |                 # Weight
444 |                 bn_w = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.weight)
445 |                 bn_layer.weight.data.copy_(bn_w)
446 |                 ptr += num_b
447 |                 # Running Mean
448 |                 bn_rm = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.running_mean)
449 |                 bn_layer.running_mean.data.copy_(bn_rm)
450 |                 ptr += num_b
451 |                 # Running Var
452 |                 bn_rv = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.running_var)
453 |                 bn_layer.running_var.data.copy_(bn_rv)
454 |                 ptr += num_b
455 |                 #自己加的
456 |                 num_w = conv_layer.weight.numel()
457 |                 conv_w = torch.from_numpy(weights[ptr:ptr + num_w]).view_as(conv_layer.weight)
458 |                 conv_layer.weight.data.copy_(conv_w)
459 |                 ptr += num_w
460 |             else:
461 |                 if os.path.basename(file) == 'yolov3.weights' or os.path.basename(file) == 'yolov3-tiny.weights':
462 |                     num_b=255
463 |                     ptr += num_b
464 |                     num_w = int(self.module_defs[i-1]["filters"]) * 255
465 |                     ptr += num_w
466 |                 else:
467 |                     # Load conv. bias
468 |                     
469 |                     num_b = conv_layer.bias.numel()
470 |                     conv_b = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(conv_layer.bias)
471 |                     conv_layer.bias.data.copy_(conv_b)
472 |                     ptr += num_b
473 |                     # Load conv. weights
474 |                     num_w = conv_layer.weight.numel()
475 |                     conv_w = torch.from_numpy(weights[ptr:ptr + num_w]).view_as(conv_layer.weight)
476 |                     conv_layer.weight.data.copy_(conv_w)
477 |                     ptr += num_w
478 |     # 确保指针到达权重的最后一个位置
479 |     assert ptr == len(weights)        
480 | 
481 |     return cutoff
482 | 
483 | def load_darknet_weights2(self, weights, cutoff=-1):
484 |     # Parses and loads the weights stored in 'weights'
485 | 
486 |     # Establish cutoffs (load layers between 0 and cutoff. if cutoff = -1 all are loaded)
487 |     file = Path(weights).name
488 |     if file == 'darknet53.conv.74':
489 |         cutoff = 75
490 |     elif file == 'yolov3-tiny.conv.15':
491 |         cutoff = 15
492 | 
493 |     # Read weights file
494 |     with open(weights, 'rb') as f:
495 |         # Read Header https://github.com/AlexeyAB/darknet/issues/2914#issuecomment-496675346
496 |         self.version = np.fromfile(f, dtype=np.int32, count=3)  # (int32) version info: major, minor, revision
497 |         self.seen = np.fromfile(f, dtype=np.int64, count=1)  # (int64) number of images seen during training
498 | 
499 |         weights = np.fromfile(f, dtype=np.float32)  # The rest are weights
500 | 
501 |     ptr = 0
502 |     for i, (mdef, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])):
503 |         if mdef['type'] == 'quantize_convolutional':
504 |             conv_layer = module[0]
505 |             if mdef['batch_normalize']=='0' or mdef['batch_normalize']==0:
506 |                 # Load BN bias, weights, running mean and running variance
507 |                 bn_layer = module[1]
508 |                 
509 |                 num_b = conv_layer.beta.numel()
510 | 
511 |                 # beta
512 |                 bn_b = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(conv_layer.beta)
513 |                 conv_layer.beta.data.copy_(bn_b)
514 |                 ptr += num_b
515 | 
516 |                 # gama
517 |                 bn_w = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(conv_layer.gamma)
518 |                 conv_layer.gamma.data.copy_(bn_w)
519 |                 ptr += num_b
520 | 
521 |                 # Running Mean
522 |                 bn_rm = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(conv_layer.running_mean)
523 |                 conv_layer.running_mean.data.copy_(bn_rm)
524 |                 ptr += num_b
525 |                 # Running Var
526 |                 bn_rv = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(conv_layer.running_var)
527 |                 conv_layer.running_var.data.copy_(bn_rv)
528 |                 ptr += num_b
529 |                 #自己加的
530 |                 num_w = conv_layer.weight.numel()
531 |                 conv_w = torch.from_numpy(weights[ptr:ptr + num_w]).view_as(conv_layer.weight)
532 |                 conv_layer.weight.data.copy_(conv_w)
533 |                 ptr += num_w
534 |             else:
535 |                 if os.path.basename(file) == 'yolov3.weights' or os.path.basename(file) == 'yolov3-tiny.weights':
536 |                     num_b=255
537 |                     ptr += num_b
538 |                     num_w = int(self.module_defs[i-1]["filters"]) * 255
539 |                     ptr += num_w
540 |                 else:
541 |                     # Load conv. bias
542 |                     
543 |                     num_b = conv_layer.bias.numel()
544 |                     conv_b = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(conv_layer.bias)
545 |                     conv_layer.bias.data.copy_(conv_b)
546 |                     ptr += num_b
547 |                     # Load conv. weights
548 |                     num_w = conv_layer.weight.numel()
549 |                     conv_w = torch.from_numpy(weights[ptr:ptr + num_w]).view_as(conv_layer.weight)
550 |                     conv_layer.weight.data.copy_(conv_w)
551 |                     ptr += num_w
552 |         elif mdef['type'] == 'convolutional':
553 |             conv_layer = module[0]
554 |             if mdef['batch_normalize']=='1':
555 |                 # Load BN bias, weights, running mean and running variance
556 |                 bn_layer = module[1]
557 |                 num_b = bn_layer.bias.numel()  # Number of biases
558 |                 # Bias
559 |                 bn_b = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.bias)
560 |                 bn_layer.bias.data.copy_(bn_b)
561 |                 ptr += num_b
562 |                 # Weight
563 |                 bn_w = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.weight)
564 |                 bn_layer.weight.data.copy_(bn_w)
565 |                 ptr += num_b
566 |                 # Running Mean
567 |                 bn_rm = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.running_mean)
568 |                 bn_layer.running_mean.data.copy_(bn_rm)
569 |                 ptr += num_b
570 |                 # Running Var
571 |                 bn_rv = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.running_var)
572 |                 bn_layer.running_var.data.copy_(bn_rv)
573 |                 ptr += num_b
574 |                 #自己加的
575 |                 num_w = conv_layer.weight.numel()
576 |                 conv_w = torch.from_numpy(weights[ptr:ptr + num_w]).view_as(conv_layer.weight)
577 |                 conv_layer.weight.data.copy_(conv_w)
578 |                 ptr += num_w
579 |             else:
580 |                 if os.path.basename(file) == 'yolov3.weights' or os.path.basename(file) == 'yolov3-tiny.weights':
581 |                     num_b=255
582 |                     ptr += num_b
583 |                     num_w = int(self.module_defs[i-1]["filters"]) * 255
584 |                     ptr += num_w
585 |                 else:
586 |                     # Load conv. bias
587 |                     
588 |                     num_b = conv_layer.bias.numel()
589 |                     conv_b = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(conv_layer.bias)
590 |                     conv_layer.bias.data.copy_(conv_b)
591 |                     ptr += num_b
592 |                     # Load conv. weights
593 |                     num_w = conv_layer.weight.numel()
594 |                     conv_w = torch.from_numpy(weights[ptr:ptr + num_w]).view_as(conv_layer.weight)
595 |                     conv_layer.weight.data.copy_(conv_w)
596 |                     ptr += num_w
597 | 
598 |     # 确保指针到达权重的最后一个位置
599 |     assert ptr == len(weights)        
600 | 
601 |     return cutoff
602 | 
603 | def save_weights(self, path='model.weights', cutoff=-1):
604 |     # Converts a PyTorch model to Darket format (*.pt to *.weights)
605 |     # Note: Does not work if model.fuse() is applied
606 |     with open(path, 'wb') as f:
607 |         # Write Header https://github.com/AlexeyAB/darknet/issues/2914#issuecomment-496675346
608 |         self.version.tofile(f)  # (int32) version info: major, minor, revision
609 |         self.seen.tofile(f)  # (int64) number of images seen during training
610 | 
611 |         # Iterate through layers
612 |         for i, (mdef, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])):
613 |             if mdef['type'] == 'convolutional' or mdef['type'] == 'quantize_convolutional':
614 |                 conv_layer = module[0]
615 |                 # If batch norm, load bn first
616 |                 if mdef['batch_normalize']=='1':
617 |                     bn_layer = module[1]
618 |                     bn_layer.bias.data.cpu().numpy().tofile(f)
619 |                     bn_layer.weight.data.cpu().numpy().tofile(f)
620 |                     bn_layer.running_mean.data.cpu().numpy().tofile(f)
621 |                     bn_layer.running_var.data.cpu().numpy().tofile(f)
622 |                 # Load conv bias
623 |                 else:
624 |                     conv_layer.bias.data.cpu().numpy().tofile(f)
625 |                 # Load conv weights
626 |                 conv_layer.weight.data.cpu().numpy().tofile(f)
627 |             elif mdef['type'] == 'RepvggBlock':
628 |                 conv_layer = module[0]
629 |                 conv_layer.bias.data.cpu().numpy().tofile(f)
630 |                 conv_layer.weight.data.cpu().numpy().tofile(f)
631 | 
632 | 
633 | def save_weights2(self, path='model.weights', cutoff=-1):
634 |     # Converts a PyTorch model to Darket format (*.pt to *.weights)
635 |     # Note: Does not work if model.fuse() is applied
636 |     with open(path, 'wb') as f:
637 |         # Write Header https://github.com/AlexeyAB/darknet/issues/2914#issuecomment-496675346
638 |         self.version.tofile(f)  # (int32) version info: major, minor, revision
639 |         self.seen.tofile(f)  # (int64) number of images seen during training
640 | 
641 |         # Iterate through layers
642 |         for i, (mdef, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])):
643 |             if mdef['type'] == 'quantize_convolutional':
644 |                 conv_layer = module[0]
645 |                 # If batch norm, load bn first
646 |                 if mdef['batch_normalize']=='0' or mdef['batch_normalize']==0:
647 |                     
648 |                     conv_layer.beta.data.cpu().numpy().tofile(f)
649 |                     conv_layer.gamma.data.cpu().numpy().tofile(f)
650 |                     conv_layer.running_mean.data.cpu().numpy().tofile(f)
651 |                     conv_layer.running_var.data.cpu().numpy().tofile(f)
652 |                 # Load conv bias
653 |                 else:
654 |                     conv_layer.bias.data.cpu().numpy().tofile(f)
655 |                 # Load conv weights
656 |                 conv_layer.weight.data.cpu().numpy().tofile(f)
657 |             elif mdef['type'] == 'convolutional':
658 |                 conv_layer = module[0]
659 |                 # If batch norm, load bn first
660 |                 if mdef['batch_normalize']=='1':
661 |                     bn_layer = module[1]
662 |                     bn_layer.bias.data.cpu().numpy().tofile(f)
663 |                     bn_layer.weight.data.cpu().numpy().tofile(f)
664 |                     bn_layer.running_mean.data.cpu().numpy().tofile(f)
665 |                     bn_layer.running_var.data.cpu().numpy().tofile(f)
666 |                 # Load conv bias
667 |                 else:
668 |                     conv_layer.bias.data.cpu().numpy().tofile(f)
669 |                 # Load conv weights
670 |                 conv_layer.weight.data.cpu().numpy().tofile(f)
671 | 
672 | 
673 | 
674 | def convert(cfg='cfg/yolov3-spp.cfg', weights='weights/yolov3-spp.weights'):
675 |     # Converts between PyTorch and Darknet format per extension (i.e. *.weights convert to *.pt and vice versa)
676 |     # from models import *; convert('cfg/yolov3-spp.cfg', 'weights/yolov3-spp.weights')
677 | 
678 |     # Initialize model
679 |     model = Darknet(cfg)
680 | 
681 |     # Load weights and save
682 |     if weights.endswith('.pt'):  # if PyTorch format
683 |         model.load_state_dict(torch.load(weights, map_location='cpu')['model'])
684 |         save_weights(model, path='converted.weights', cutoff=-1)
685 |         print("Success: converted '%s' to 'converted.weights'" % weights)
686 | 
687 |     elif weights.endswith('.weights'):  # darknet format
688 |         _ = load_darknet_weights(model, weights)
689 | 
690 |         chkpt = {'epoch': -1,
691 |                  'best_fitness': None,
692 |                  'training_results': None,
693 |                  'model': model.state_dict(),
694 |                  'optimizer': None}
695 | 
696 |         torch.save(chkpt, 'converted.pt')
697 |         print("Success: converted '%s' to 'converted.pt'" % weights)
698 | 
699 |     else:
700 |         print('Error: extension not supported.')
701 | 
702 | 
703 | def attempt_download(weights):
704 |     # Attempt to download pretrained weights if not found locally
705 | 
706 |     msg = weights + ' missing, download from https://drive.google.com/drive/folders/1uxgUBemJVw9wZsdpboYbzUN4bcRhsuAI'
707 |     if weights and not os.path.isfile(weights):
708 |         file = Path(weights).name
709 | 
710 |         if file == 'yolov3-spp.weights':
711 |             gdrive_download(id='1oPCHKsM2JpM-zgyepQciGli9X0MTsJCO', name=weights)
712 |         elif file == 'yolov3-spp.pt':
713 |             gdrive_download(id='1vFlbJ_dXPvtwaLLOu-twnjK4exdFiQ73', name=weights)
714 |         elif file == 'yolov3.pt':
715 |             gdrive_download(id='11uy0ybbOXA2hc-NJkJbbbkDwNX1QZDlz', name=weights)
716 |         elif file == 'yolov3-tiny.pt':
717 |             gdrive_download(id='1qKSgejNeNczgNNiCn9ZF_o55GFk1DjY_', name=weights)
718 |         elif file == 'darknet53.conv.74':
719 |             gdrive_download(id='18xqvs_uwAqfTXp-LJCYLYNHBOcrwbrp0', name=weights)
720 |         elif file == 'yolov3-tiny.conv.15':
721 |             gdrive_download(id='140PnSedCsGGgu3rOD6Ez4oI6cdDzerLC', name=weights)
722 | 
723 |         else:
724 |             try:  # download from pjreddie.com
725 |                 url = 'https://pjreddie.com/media/files/' + file
726 |                 print('Downloading ' + url)
727 |                 os.system('curl -f ' + url + ' -o ' + weights)
728 |             except IOError:
729 |                 print(msg)
730 |                 os.system('rm ' + weights)  # remove partial downloads
731 | 
732 |         assert os.path.exists(weights), msg  # download missing weights from Google Drive
733 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | # pip3 install -U -r requirements.txt
 2 | numpy
 3 | opencv-python
 4 | torch >= 1.2
 5 | matplotlib
 6 | pycocotools
 7 | tqdm
 8 | tb-nightly
 9 | future
10 | Pillow
11 | 
12 | # Equivalent conda commands ----------------------------------------------------
13 | # conda update -n base -c defaults conda
14 | # conda install -yc anaconda future numpy opencv matplotlib tqdm pillow
15 | # conda install -yc conda-forge scikit-image tensorboard pycocotools
16 | # conda install -yc spyder-ide spyder-line-profiler
17 | # conda install -yc pytorch pytorch torchvision
18 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import json
  3 | 
  4 | from torch.utils.data import DataLoader
  5 | 
  6 | from models import *
  7 | from utils.datasets import *
  8 | from utils.utils import *
  9 | 
 10 | 
 11 | 
 12 | def test(cfg,
 13 |          data,
 14 |          weights=None,
 15 |          batch_size=64,
 16 |          img_size=416,
 17 |          iou_thres=0.5,
 18 |          conf_thres=0.001,
 19 |          nms_thres=0.5,
 20 |          save_json=False,
 21 |          model=None):
 22 |     # Initialize/load model and set device
 23 |     if model is None:
 24 |         device = torch_utils.select_device(opt.device)
 25 |         verbose = True
 26 | 
 27 |         # Initialize model
 28 |         model = Darknet(cfg, img_size,deploy=True).to(device)
 29 |         #print(model)
 30 |         # Load weights
 31 |         #本身有，被我去掉了
 32 |         attempt_download(weights)
 33 |         if weights.endswith('.pt'):  # pytorch format
 34 |             print('.pth is reading')
 35 |             # model.load_state_dict(torch.load(weights, map_location=device)['model'])
 36 |             model.load_state_dict(torch.load(weights, map_location=device))
 37 |             
 38 |             
 39 |         else:  # darknet format
 40 |             print('darknet weights is reading')
 41 |             _ = load_darknet_weights(model, weights)
 42 | 
 43 |         if torch.cuda.device_count() > 1:
 44 |             model = nn.DataParallel(model)
 45 |     else:
 46 |         device = next(model.parameters()).device  # get model device
 47 |         verbose = False
 48 | 
 49 |     # Configure run
 50 |     data = parse_data_cfg(data)
 51 |     nc = int(data['classes'])  # number of classes
 52 |     test_path = data['valid']  # path to test images
 53 |     names = load_classes(data['names'])  # class names
 54 | 
 55 |     # Dataloader
 56 |     dataset = LoadImagesAndLabels(test_path, img_size, batch_size)
 57 |     dataloader = DataLoader(dataset,
 58 |                             batch_size=batch_size,
 59 |                             num_workers=min([os.cpu_count(), batch_size, 16]),
 60 |                             # num_workers=0,
 61 |                             pin_memory=True,
 62 |                             collate_fn=dataset.collate_fn)
 63 | 
 64 |     seen = 0
 65 |     model.eval()
 66 |     coco91class = coco80_to_coco91_class()
 67 |     s = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', 'mAP', 'F1')
 68 |     p, r, f1, mp, mr, map, mf1 = 0., 0., 0., 0., 0., 0., 0.
 69 |     loss = torch.zeros(3)
 70 |     jdict, stats, ap, ap_class = [], [], [], []
 71 |     for batch_i, (imgs, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)):
 72 |         targets = targets.to(device)
 73 |         imgs = imgs.to(device)
 74 |         _, _, height, width = imgs.shape  # batch size, channels, height, width
 75 | 
 76 |         # Plot images with bounding boxes
 77 |         if batch_i == 0 and not os.path.exists('test_batch0.jpg'):
 78 |             plot_images(imgs=imgs, targets=targets, paths=paths, fname='test_batch0.jpg')
 79 | 
 80 |         # Run model
 81 |         inf_out, train_out = model(imgs)  # inference and training outputs
 82 | 
 83 |         # Compute loss
 84 |         if hasattr(model, 'hyp'):  # if model has loss hyperparameters
 85 |             loss += compute_loss(train_out, targets, model)[1][:3].cpu()  # GIoU, obj, cls
 86 | 
 87 |         # Run NMS
 88 |         output = non_max_suppression(inf_out, conf_thres=conf_thres, nms_thres=nms_thres)
 89 | 
 90 |         # Statistics per image
 91 |         for si, pred in enumerate(output):
 92 |             labels = targets[targets[:, 0] == si, 1:]
 93 |             nl = len(labels)
 94 |             tcls = labels[:, 0].tolist() if nl else []  # target class
 95 |             seen += 1
 96 | 
 97 |             if pred is None:
 98 |                 if nl:
 99 |                     stats.append(([], torch.Tensor(), torch.Tensor(), tcls))
100 |                 continue
101 | 
102 |             # Append to text file
103 |             # with open('test.txt', 'a') as file:
104 |             #    [file.write('%11.5g' * 7 % tuple(x) + '\n') for x in pred]
105 | 
106 |             # Append to pycocotools JSON dictionary
107 |             if save_json:
108 |                 # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...
109 |                 image_id = int(Path(paths[si]).stem.split('_')[-1])
110 |                 box = pred[:, :4].clone()  # xyxy
111 |                 scale_coords(imgs[si].shape[1:], box, shapes[si])  # to original shape
112 |                 box = xyxy2xywh(box)  # xywh
113 |                 box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
114 |                 for di, d in enumerate(pred):
115 |                     jdict.append({'image_id': image_id,
116 |                                   'category_id': coco91class[int(d[6])],
117 |                                   'bbox': [floatn(x, 3) for x in box[di]],
118 |                                   'score': floatn(d[4], 5)})
119 | 
120 |             # Clip boxes to image bounds
121 |             clip_coords(pred, (height, width))
122 | 
123 |             # Assign all predictions as incorrect
124 |             correct = [0] * len(pred)
125 |             if nl:
126 |                 detected = []
127 |                 tcls_tensor = labels[:, 0]
128 | 
129 |                 # target boxes
130 |                 tbox = xywh2xyxy(labels[:, 1:5])
131 |                 tbox[:, [0, 2]] *= width
132 |                 tbox[:, [1, 3]] *= height
133 | 
134 |                 # Search for correct predictions
135 |                 for i, (*pbox, pconf, pcls_conf, pcls) in enumerate(pred):
136 | 
137 |                     # Break if all targets already located in image
138 |                     if len(detected) == nl:
139 |                         break
140 | 
141 |                     # Continue if predicted class not among image classes
142 |                     if pcls.item() not in tcls:
143 |                         continue
144 | 
145 |                     # Best iou, index between pred and targets
146 |                     m = (pcls == tcls_tensor).nonzero().view(-1)
147 |                     iou, bi = bbox_iou(pbox, tbox[m]).max(0)
148 | 
149 |                     # If iou > threshold and class is correct mark as correct
150 |                     if iou > iou_thres and m[bi] not in detected:  # and pcls == tcls[bi]:
151 |                         correct[i] = 1
152 |                         detected.append(m[bi])
153 | 
154 |             # Append statistics (correct, conf, pcls, tcls)
155 |             stats.append((correct, pred[:, 4].cpu(), pred[:, 6].cpu(), tcls))
156 | 
157 |     # Compute statistics
158 |     stats = [np.concatenate(x, 0) for x in list(zip(*stats))]  # to numpy
159 |     if len(stats):
160 |         p, r, ap, f1, ap_class = ap_per_class(*stats)
161 |         mp, mr, map, mf1 = p.mean(), r.mean(), ap.mean(), f1.mean()
162 |         nt = np.bincount(stats[3].astype(np.int64), minlength=nc)  # number of targets per class
163 |     else:
164 |         nt = torch.zeros(1)
165 | 
166 |     # Print results
167 |     pf = '%20s' + '%10.3g' * 6  # print format
168 |     print(pf % ('all', seen, nt.sum(), mp, mr, map, mf1))
169 | 
170 |     # Print results per class
171 |     if verbose and nc > 1 and len(stats):
172 |         for i, c in enumerate(ap_class):
173 |             print(pf % (names[c], seen, nt[c], p[i], r[i], ap[i], f1[i]))
174 | 
175 |     # Save JSON
176 |     if save_json and map and len(jdict):
177 |         try:
178 |             imgIds = [int(Path(x).stem.split('_')[-1]) for x in dataset.img_files]
179 |             with open('results.json', 'w') as file:
180 |                 json.dump(jdict, file)
181 | 
182 |             from pycocotools.coco import COCO
183 |             from pycocotools.cocoeval import COCOeval
184 | 
185 |             # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
186 |             cocoGt = COCO('../coco/annotations/instances_val2014.json')  # initialize COCO ground truth api
187 |             cocoDt = cocoGt.loadRes('results.json')  # initialize COCO pred api
188 | 
189 |             cocoEval = COCOeval(cocoGt, cocoDt, 'bbox')
190 |             cocoEval.params.imgIds = imgIds  # [:32]  # only evaluate these images
191 |             cocoEval.evaluate()
192 |             cocoEval.accumulate()
193 |             cocoEval.summarize()
194 |             map = cocoEval.stats[1]  # update mAP to pycocotools mAP
195 |         except:
196 |             print('WARNING: missing dependency pycocotools from requirements.txt. Can not compute official COCO mAP.')
197 | 
198 |     # Return results
199 |     maps = np.zeros(nc) + map
200 |     for i, c in enumerate(ap_class):
201 |         maps[c] = ap[i]
202 |     return (mp, mr, map, mf1, *(loss.cpu() / len(dataloader)).tolist()), maps
203 | 
204 | 
205 | if __name__ == '__main__':
206 |     parser = argparse.ArgumentParser(prog='test.py')
207 |     parser.add_argument('--cfg', type=str, default='cfg/yolov3-spp.cfg', help='cfg file path')
208 |     parser.add_argument('--data', type=str, default='data/coco.data', help='coco.data file path')
209 |     parser.add_argument('--weights', type=str, default='weights/yolov3-spp.weights', help='path to weights file')
210 |     parser.add_argument('--batch-size', type=int, default=16, help='size of each image batch')
211 |     parser.add_argument('--img-size', type=int, default=416, help='inference size (pixels)')
212 |     parser.add_argument('--iou-thres', type=float, default=0.5, help='iou threshold required to qualify as detected')
213 |     parser.add_argument('--conf-thres', type=float, default=0.001, help='object confidence threshold')
214 |     parser.add_argument('--nms-thres', type=float, default=0.5, help='iou threshold for non-maximum suppression')
215 |     parser.add_argument('--save-json', action='store_true', help='save a cocoapi-compatible JSON results file')
216 |     parser.add_argument('--device', default='', help='device id (i.e. 0 or 0,1) or cpu')
217 |     opt = parser.parse_args()
218 |     print(opt)
219 | 
220 |     with torch.no_grad():
221 |         test(opt.cfg,
222 |              opt.data,
223 |              opt.weights,
224 |              opt.batch_size,
225 |              opt.img_size,
226 |              opt.iou_thres,
227 |              opt.conf_thres,
228 |              opt.nms_thres,
229 |              opt.save_json)
230 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | 
  3 | import torch.distributed as dist
  4 | import torch.optim as optim
  5 | import torch.optim.lr_scheduler as lr_scheduler
  6 | 
  7 | import test  # import test.py to get mAP after each epoch
  8 | from models import *
  9 | from utils.datasets import *
 10 | from utils.utils import *
 11 | from utils.prune_utils import *
 12 | 
 13 | from mttkinter import mtTkinter as tk
 14 | 
 15 | mixed_precision = True
 16 | try:  # Mixed precision training https://github.com/NVIDIA/apex
 17 |     from apex import amp
 18 | except:
 19 |     mixed_precision = False  # not installed
 20 | 
 21 | wdir = 'weights_repvgg/B1' + os.sep  # weights dir
 22 | # wdir = 'weights_iter/weights_step6' + os.sep
 23 | last = wdir + 'last.pt'
 24 | best = wdir + 'best.pt'
 25 | results_file = 'results.txt'
 26 | 
 27 | # Hyperparameters (j-series, 50.5 mAP yolov3-320) evolved by @ktian08 https://github.com/ultralytics/yolov3/issues/310
 28 | hyp = {'giou': 1.582,  # giou loss gain
 29 |        'cls': 27.76,  # cls loss gain  (CE=~1.0, uCE=~20)
 30 |        'cls_pw': 1.446,  # cls BCELoss positive_weight
 31 |        'obj': 21.35,  # obj loss gain (*=80 for uBCE with 80 classes)
 32 |        'obj_pw': 3.941,  # obj BCELoss positive_weight
 33 |        'iou_t': 0.2635,  # iou training threshold
 34 |        'lr0': 0.002324,  # initial learning rate (SGD=1E-3, Adam=9E-5)
 35 |        'lrf': -4.,  # final LambdaLR learning rate = lr0 * (10 ** lrf)
 36 |        'momentum': 0.97,  # SGD momentum
 37 |        'weight_decay': 0.0004569,  # optimizer weight decay
 38 |        'fl_gamma': 0.5,  # focal loss gamma
 39 |        'hsv_h': 0.01,  # image HSV-Hue augmentation (fraction)
 40 |        'hsv_s': 0.5703,  # image HSV-Saturation augmentation (fraction)
 41 |        'hsv_v': 0.3174,  # image HSV-Value augmentation (fraction)
 42 |        'degrees': 1.113,  # image rotation (+/- deg)
 43 |        'translate': 0.06797,  # image translation (+/- fraction)
 44 |        'scale': 0.1059,  # image scale (+/- gain)
 45 |        'shear': 0.5768}  # image shear (+/- deg)
 46 | 
 47 | # Overwrite hyp with hyp*.txt (optional)
 48 | f = glob.glob('hyp*.txt')
 49 | if f:
 50 |     for k, v in zip(hyp.keys(), np.loadtxt(f[0])):
 51 |         hyp[k] = v
 52 | 
 53 | 
 54 | def train():
 55 |     cfg = opt.cfg
 56 |     data = opt.data
 57 |     img_size = opt.img_size
 58 |     epochs = 1 if opt.prebias else opt.epochs  # 500200 batches at bs 64, 117263 images = 273 epochs
 59 |     batch_size = opt.batch_size
 60 |     accumulate = opt.accumulate  # effective bs = batch_size * accumulate = 16 * 4 = 64
 61 |     weights = opt.weights  # initial training weights
 62 | 
 63 |     if 'pw' not in opt.arc:  # remove BCELoss positive weights
 64 |         hyp['cls_pw'] = 1.
 65 |         hyp['obj_pw'] = 1.
 66 | 
 67 |     # Initialize
 68 |     init_seeds()
 69 |     multi_scale = opt.multi_scale
 70 | 
 71 |     if multi_scale:
 72 |         img_sz_min = round(img_size / 32 / 1.5) + 1
 73 |         img_sz_max = round(img_size / 32 * 1.5) - 1
 74 |         img_size = img_sz_max * 32  # initiate with maximum multi_scale size
 75 |         print('Using multi-scale %g - %g' % (img_sz_min * 32, img_size))
 76 | 
 77 |     # Configure run
 78 |     data_dict = parse_data_cfg(data)
 79 |     train_path = data_dict['train']
 80 |     nc = int(data_dict['classes'])  # number of classes
 81 | 
 82 |     # Remove previous results
 83 |     for f in glob.glob('*_batch*.jpg') + glob.glob(results_file):
 84 |         os.remove(f)
 85 | 
 86 |     # Initialize model
 87 |     model = Darknet(cfg, arc=opt.arc).to(device)
 88 | 
 89 |     # Optimizer
 90 |     pg0, pg1 = [], []  # optimizer parameter groups
 91 |     for k, v in dict(model.named_parameters()).items():
 92 |         if 'Conv2d.weight' in k:
 93 |             pg1 += [v]  # parameter group 1 (apply weight_decay)
 94 |         else:
 95 |             pg0 += [v]  # parameter group 0
 96 | 
 97 |     if opt.adam:
 98 |         optimizer = optim.Adam(pg0, lr=hyp['lr0'])
 99 |         # optimizer = AdaBound(pg0, lr=hyp['lr0'], final_lr=0.1)
100 |     else:
101 |         optimizer = optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True)
102 |     optimizer.add_param_group({'params': pg1, 'weight_decay': hyp['weight_decay']})  # add pg1 with weight_decay
103 |     del pg0, pg1
104 | 
105 |     cutoff = -1  # backbone reaches to cutoff layer
106 |     start_epoch = 0
107 |     best_fitness = float('inf')
108 |     attempt_download(weights)
109 |     if weights.endswith('.pt'):  # pytorch format
110 |         # possible weights are 'last.pt', 'yolov3-spp.pt', 'yolov3-tiny.pt' etc.
111 |         if opt.bucket:
112 |             os.system('gsutil cp gs://%s/last.pt %s' % (opt.bucket, last))  # download from bucket
113 |         chkpt = torch.load(weights, map_location=device)
114 | 
115 |         # load model
116 |         # if opt.transfer:
117 | 
118 |         # chkpt['model'] = {k: v for k, v in chkpt['model'].items() if model.state_dict()[k].numel() == v.numel()}
119 |         # model.load_state_dict(chkpt['model'], strict=False)
120 | 
121 |         new_dict = model.state_dict()
122 |         new_dict.update(chkpt)
123 |         model.load_state_dict(new_dict)
124 |         # model.load_state_dict(chkpt, strict=True)
125 | 
126 |         # else:
127 |         #    model.load_state_dict(chkpt['model'])
128 | 
129 |         # load optimizer
130 |         
131 |         # if chkpt['optimizer'] is not None:
132 |         #     optimizer.load_state_dict(chkpt['optimizer'])
133 |         #     best_fitness = chkpt['best_fitness']
134 | 
135 |         # # load results
136 |         # if chkpt.get('training_results') is not None:
137 |         #     with open(results_file, 'w') as file:
138 |         #         file.write(chkpt['training_results'])  # write results.txt
139 | 
140 |         # start_epoch = chkpt['epoch'] + 1
141 |         # del chkpt
142 |     
143 |     elif weights.endswith('.pth'):
144 |         pass
145 | 
146 |     elif len(weights) > 0:  # darknet format
147 |         # possible weights are 'yolov3.weights', 'yolov3-tiny.conv.15',  'darknet53.conv.74' etc.
148 |         cutoff = load_darknet_weights(model, weights)
149 | 
150 | 
151 | 
152 | 
153 |     if opt.transfer or opt.prebias:  # transfer learning edge (yolo) layers
154 |         nf = int(model.module_defs[model.yolo_layers[0] - 1]['filters'])  # yolo layer size (i.e. 255)
155 | 
156 |         if opt.prebias:
157 |             for p in optimizer.param_groups:
158 |                 # lower param count allows more aggressive training settings: i.e. SGD ~0.1 lr0, ~0.9 momentum
159 |                 p['lr'] *= 100  # lr gain
160 |                 if p.get('momentum') is not None:  # for SGD but not Adam
161 |                     p['momentum'] *= 0.9
162 | 
163 |         for p in model.parameters():
164 |             if opt.prebias and p.numel() == nf:  # train (yolo biases)
165 |                 p.requires_grad = True
166 |             elif opt.transfer and p.shape[0] == nf:  # train (yolo biases+weights)
167 |                 p.requires_grad = True
168 |             else:  # freeze layer
169 |                 p.requires_grad = False
170 | 
171 |     # Scheduler https://github.com/ultralytics/yolov3/issues/238
172 |     # lf = lambda x: 1 - x / epochs  # linear ramp to zero
173 |     # lf = lambda x: 10 ** (hyp['lrf'] * x / epochs)  # exp ramp
174 |     # lf = lambda x: 1 - 10 ** (hyp['lrf'] * (1 - x / epochs))  # inverse exp ramp
175 |     # scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
176 |     # scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=range(59, 70, 1), gamma=0.8)  # gradual fall to 0.1*lr0
177 |     scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[round(opt.epochs * x) for x in [0.8, 0.9]], gamma=0.1)
178 |     scheduler.last_epoch = start_epoch - 1
179 | 
180 |     # # Plot lr schedule
181 |     # y = []
182 |     # for _ in range(epochs):
183 |     #     scheduler.step()
184 |     #     y.append(optimizer.param_groups[0]['lr'])
185 |     # plt.plot(y, label='LambdaLR')
186 |     # plt.xlabel('epoch')
187 |     # plt.ylabel('LR')
188 |     # plt.tight_layout()
189 |     # plt.savefig('LR.png', dpi=300)
190 | 
191 |     # Mixed precision training https://github.com/NVIDIA/apex
192 |     if mixed_precision:
193 |         model, optimizer = amp.initialize(model, optimizer, opt_level='O1', verbosity=0)
194 | 
195 |     # Initialize distributed training
196 |     if torch.cuda.device_count() > 1:
197 |         dist.init_process_group(backend='nccl',  # 'distributed backend'
198 |                                 init_method='tcp://127.0.0.1:9999',  # distributed training init method
199 |                                 world_size=1,  # number of nodes for distributed training
200 |                                 rank=0)  # distributed training node rank
201 |         model = torch.nn.parallel.DistributedDataParallel(model)
202 |         model.yolo_layers = model.module.yolo_layers  # move yolo layer indices to top level
203 | 
204 |     
205 | 
206 |     #获得要剪枝的层
207 |     
208 |     if hasattr(model, 'module'):
209 |         print('muti-gpus sparse')
210 |         if opt.prune==1:
211 |             print('shortcut sparse training')
212 |             _,_,prune_idx,_,_=parse_module_defs2(model.module.module_defs)
213 |         elif opt.prune==0:
214 |             print('normal sparse training ')
215 |             _,_,prune_idx= parse_module_defs(model.module.module_defs)
216 |         elif opt.prune==2:
217 |             print('tiny yolo normal sparse traing')
218 |             _,_,prune_idx= parse_module_defs3(model.module.module_defs)
219 | 
220 |     else:
221 |         print('single-gpu sparse')
222 |         if opt.prune==1:
223 |             print('shortcut sparse training')
224 |             _,_,prune_idx,_,_=parse_module_defs2(model.module_defs)
225 |         elif opt.prune==0:
226 |             print('normal sparse training')
227 |             _,_,prune_idx= parse_module_defs(model.module_defs)
228 |         elif opt.prune==2:
229 |             print('tiny yolo normal sparse traing')
230 |             _,_,prune_idx= parse_module_defs3(model.module_defs)
231 | 
232 | 
233 |     # Dataset
234 |     dataset = LoadImagesAndLabels(train_path,
235 |                                   img_size,
236 |                                   batch_size,
237 |                                   augment=True,
238 |                                   hyp=hyp,  # augmentation hyperparameters
239 |                                   rect=opt.rect,  # rectangular training
240 |                                   image_weights=opt.img_weights,
241 |                                   cache_labels=True if epochs > 10 else False,
242 |                                   cache_images=False if opt.prebias else opt.cache_images)
243 | 
244 |     # Dataloader
245 |     dataloader = torch.utils.data.DataLoader(dataset,
246 |                                              batch_size=batch_size,
247 |                                              num_workers=min([os.cpu_count(), batch_size, 16]),
248 |                                              shuffle=not opt.rect,  # Shuffle=True unless rectangular training is used
249 |                                              pin_memory=True,
250 |                                              collate_fn=dataset.collate_fn)
251 | 
252 | 
253 |     
254 |     # Start training
255 |     model.nc = nc  # attach number of classes to model
256 |     model.arc = opt.arc  # attach yolo architecture
257 |     model.hyp = hyp  # attach hyperparameters to model
258 |     # model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device)  # attach class weights
259 |     torch_utils.model_info(model, report='summary')  # 'full' or 'summary'
260 |     nb = len(dataloader)
261 |     maps = np.zeros(nc)  # mAP per class
262 |     results = (0, 0, 0, 0, 0, 0, 0)  # 'P', 'R', 'mAP', 'F1', 'val GIoU', 'val Objectness', 'val Classification'
263 |     t0 = time.time()
264 |     print('Starting %s for %g epochs...' % ('prebias' if opt.prebias else 'training', epochs))
265 |     for epoch in range(start_epoch, epochs):  # epoch ------------------------------------------------------------------
266 |         model.train()
267 |         print(('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'GIoU', 'obj', 'cls', 'total', 'targets', 'img_size'))
268 |         #稀疏化标志
269 |         sr_flag = get_sr_flag(epoch, opt.sr)
270 | 
271 |         # Freeze backbone at epoch 0, unfreeze at epoch 1 (optional)
272 |         freeze_backbone = False
273 |         if freeze_backbone and epoch < 2:
274 |             for name, p in model.named_parameters():
275 |                 if int(name.split('.')[1]) < cutoff:  # if layer < 75
276 |                     p.requires_grad = False if epoch == 0 else True
277 | 
278 |         # Update image weights (optional)
279 |         if dataset.image_weights:
280 |             w = model.class_weights.cpu().numpy() * (1 - maps) ** 2  # class weights
281 |             image_weights = labels_to_image_weights(dataset.labels, nc=nc, class_weights=w)
282 |             dataset.indices = random.choices(range(dataset.n), weights=image_weights, k=dataset.n)  # rand weighted idx
283 | 
284 |         mloss = torch.zeros(4).to(device)  # mean losses
285 |         pbar = tqdm(enumerate(dataloader), total=nb)  # progress bar
286 |         for i, (imgs, targets, paths, _) in pbar:  # batch -------------------------------------------------------------
287 |             ni = i + nb * epoch  # number integrated batches (since train start)
288 |             imgs = imgs.to(device)
289 |             targets = targets.to(device)
290 | 
291 |             # Multi-Scale training
292 |             if multi_scale:
293 |                 if ni / accumulate % 10 == 0:  #  adjust (67% - 150%) every 10 batches
294 |                     img_size = random.randrange(img_sz_min, img_sz_max + 1) * 32
295 |                 sf = img_size / max(imgs.shape[2:])  # scale factor
296 |                 if sf != 1:
297 |                     ns = [math.ceil(x * sf / 32.) * 32 for x in imgs.shape[2:]]  # new shape (stretched to 32-multiple)
298 |                     imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False)
299 | 
300 |             # Plot images with bounding boxes
301 |             if ni == 0:
302 |                 fname = 'train_batch%g.jpg' % i
303 |                 plot_images(imgs=imgs, targets=targets, paths=paths, fname=fname)
304 |                 if tb_writer:
305 |                     tb_writer.add_image(fname, cv2.imread(fname)[:, :, ::-1], dataformats='HWC')
306 | 
307 |             # Hyperparameter burn-in
308 |             # n_burn = nb - 1  # min(nb // 5 + 1, 1000)  # number of burn-in batches
309 |             # if ni <= n_burn:
310 |             #     for m in model.named_modules():
311 |             #         if m[0].endswith('BatchNorm2d'):
312 |             #             m[1].momentum = 1 - i / n_burn * 0.99  # BatchNorm2d momentum falls from 1 - 0.01
313 |             #     g = (i / n_burn) ** 4  # gain rises from 0 - 1
314 |             #     for x in optimizer.param_groups:
315 |             #         x['lr'] = hyp['lr0'] * g
316 |             #         x['weight_decay'] = hyp['weight_decay'] * g
317 | 
318 |             # Run model
319 |             pred = model(imgs)
320 | 
321 |             # Compute loss
322 |             loss, loss_items = compute_loss(pred, targets, model)
323 |             if not torch.isfinite(loss):
324 |                 print('WARNING: non-finite loss, ending training ', loss_items)
325 |                 return results
326 | 
327 |             # Scale loss by nominal batch_size of 64
328 |             loss *= batch_size / 64
329 | 
330 |             # Compute gradient
331 |             if mixed_precision:
332 |                 with amp.scale_loss(loss, optimizer) as scaled_loss:
333 |                     scaled_loss.backward()
334 |             else:
335 |                 loss.backward()
336 | 
337 |             #对要剪枝层的γ参数稀疏化
338 |             if hasattr(model, 'module'):
339 |                 BNOptimizer.updateBN(sr_flag, model.module.module_list, opt.s, prune_idx)
340 |             else:
341 |                 BNOptimizer.updateBN(sr_flag, model.module_list, opt.s, prune_idx)
342 | 
343 |             # Accumulate gradient for x batches before optimizing
344 |             if ni % accumulate == 0:
345 |                 optimizer.step()
346 |                 optimizer.zero_grad()
347 | 
348 |             # Print batch results
349 |             mloss = (mloss * i + loss_items) / (i + 1)  # update mean losses
350 |             mem = torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available() else 0  # (GB)
351 |             s = ('%10s' * 2 + '%10.3g' * 6) % (
352 |                 '%g/%g' % (epoch, epochs - 1), '%.3gG' % mem, *mloss, len(targets), img_size)
353 |             pbar.set_description(s)
354 | 
355 |             # end batch ------------------------------------------------------------------------------------------------
356 | 
357 |         # Update scheduler
358 |         scheduler.step()
359 | 
360 |         # Process epoch results
361 |         final_epoch = epoch + 1 == epochs
362 |         if opt.prebias:
363 |             print_model_biases(model)
364 |         else:
365 |             # Calculate mAP (always test final epoch, skip first 10 if opt.nosave)
366 |             if not (opt.notest or (opt.nosave and epoch < 10)) or final_epoch:
367 |                 with torch.no_grad():
368 |                     results, maps = test.test(cfg,
369 |                                               data,
370 |                                               batch_size=batch_size,
371 |                                               img_size=opt.img_size,
372 |                                               model=model,
373 |                                               # conf_thres=0.001 if final_epoch and epoch > 0 else 0.1,  # 0.1 for speed
374 |                                               conf_thres=0.001,
375 |                                               save_json=final_epoch and epoch > 0 and 'coco.data' in data)
376 | 
377 |         # Write epoch results
378 |         with open(results_file, 'a') as f:
379 |             f.write(s + '%10.3g' * 7 % results + '\n')  # P, R, mAP, F1, test_losses=(GIoU, obj, cls)
380 | 
381 |         # Write Tensorboard results
382 |         if tb_writer:
383 |             x = list(mloss) + list(results)
384 |             titles = ['GIoU', 'Objectness', 'Classification', 'Train loss',
385 |                       'Precision', 'Recall', 'mAP', 'F1', 'val GIoU', 'val Objectness', 'val Classification']
386 |             for xi, title in zip(x, titles):
387 |                 tb_writer.add_scalar(title, xi, epoch)
388 | 
389 |         # Update best mAP
390 |         # fitness = sum(results[4:])  # total loss
391 |         # if fitness < best_fitness:
392 |         #     best_fitness = fitness
393 |         fi = fitness(np.array(results).reshape(1, -1))  # fitness_i = weighted combination of [P, R, mAP, F1]
394 |         if fi > best_fitness:
395 |             best_fitness = fi
396 | 
397 |         # Save training results
398 |         save = (not opt.nosave) or (final_epoch and not opt.evolve) or opt.prebias
399 |         if save:
400 |             with open(results_file, 'r') as f:
401 |                 # Create checkpoint
402 |                 chkpt = {'epoch': epoch,
403 |                          'best_fitness': best_fitness,
404 |                          'training_results': f.read(),
405 |                          'model': model.module.state_dict() if type(
406 |                              model) is nn.parallel.DistributedDataParallel else model.state_dict(),
407 |                          'optimizer': None if final_epoch else optimizer.state_dict()}
408 | 
409 |             # Save last checkpoint
410 |             torch.save(chkpt, last)
411 |             if opt.bucket and not opt.prebias:
412 |                 os.system('gsutil cp %s gs://%s' % (last, opt.bucket))  # upload to bucket
413 | 
414 |             # Save best checkpoint
415 |             if best_fitness == fitness:
416 |                 torch.save(chkpt, best)
417 | 
418 |             # Save backup every 10 epochs (optional)
419 |             if epoch > 0 and epoch % 10 == 0:
420 |                 torch.save(chkpt, wdir + 'backup%g.pt' % epoch)
421 | 
422 |             # Delete checkpoint
423 |             del chkpt
424 | 
425 |         # end epoch ----------------------------------------------------------------------------------------------------
426 | 
427 |     # end training
428 |     if len(opt.name):
429 |         os.rename('results.txt', 'results_%s.txt' % opt.name)
430 |         os.rename(wdir + 'best.pt', wdir + 'best_%s.pt' % opt.name)
431 |     plot_results()  # save as results.png
432 |     print('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600))
433 |     dist.destroy_process_group() if torch.cuda.device_count() > 1 else None
434 |     torch.cuda.empty_cache()
435 | 
436 |     # save to cloud
437 |     # os.system(gsutil cp results.txt gs://...)
438 |     # os.system(gsutil cp weights/best.pt gs://...)
439 | 
440 |     return results
441 | 
442 | 
443 | def prebias():
444 |     # trains output bias layers for 1 epoch and creates new backbone
445 |     if opt.prebias:
446 |         train()  # transfer-learn yolo biases for 1 epoch
447 |         create_backbone(last)  # saved results as backbone.pt
448 |         opt.weights = wdir + 'backbone.pt'  # assign backbone
449 |         opt.prebias = False  # disable prebias
450 | 
451 | 
452 | if __name__ == '__main__':
453 |     parser = argparse.ArgumentParser()
454 |     parser.add_argument('--epochs', type=int, default=273)  # 500200 batches at bs 16, 117263 images = 273 epochs
455 |     parser.add_argument('--batch-size', type=int, default=32)  # effective bs = batch_size * accumulate = 16 * 4 = 64
456 |     parser.add_argument('--accumulate', type=int, default=2, help='batches to accumulate before optimizing')
457 |     parser.add_argument('--cfg', type=str, default='cfg/yolov3-hand.cfg', help='cfg file path')
458 |     parser.add_argument('--data', type=str, default='data/oxfordhand.data', help='*.data file path')
459 |     parser.add_argument('--multi-scale', action='store_true', help='adjust (67% - 150%) img_size every 10 batches')
460 |     parser.add_argument('--img-size', type=int, default=416, help='inference size (pixels)')
461 |     parser.add_argument('--rect', action='store_true', help='rectangular training')
462 |     parser.add_argument('--resume', action='store_true', help='resume training from last.pt')
463 |     parser.add_argument('--transfer', action='store_true', help='transfer learning')
464 |     parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
465 |     parser.add_argument('--notest', action='store_true', help='only test final epoch')
466 |     parser.add_argument('--evolve', action='store_true', help='evolve hyperparameters')
467 |     parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')
468 |     parser.add_argument('--img-weights', action='store_true', help='select training images by weight')
469 |     parser.add_argument('--cache-images', action='store_true', help='cache images for faster training')
470 |     parser.add_argument('--weights', type=str, default='weights/yolov3.weights', help='initial weights')  # i.e. weights/darknet.53.conv.74
471 |     parser.add_argument('--arc', type=str, default='default', help='yolo architecture')  # defaultpw, uCE, uBCE
472 |     parser.add_argument('--prebias', action='store_true', help='transfer-learn yolo biases prior to training')
473 |     parser.add_argument('--name', default='', help='renames results.txt to results_name.txt if supplied')
474 |     parser.add_argument('--device', default='', help='device id (i.e. 0 or 0,1) or cpu')
475 |     parser.add_argument('--adam', action='store_true', help='use adam optimizer')
476 |     parser.add_argument('--var', type=float, help='debug variable')
477 |     parser.add_argument('--sparsity-regularization', '-sr', dest='sr', action='store_true',
478 |                         help='train with channel sparsity regularization')
479 |     parser.add_argument('--s', type=float, default=0.001, help='scale sparse rate')
480 |     parser.add_argument('--prune', type=int, default=0, help='0:nomal prune or regular prune 1:shortcut prune 2:tiny prune')
481 |     opt = parser.parse_args()
482 |     opt.weights = last if opt.resume else opt.weights
483 |     print(opt)
484 |     device = torch_utils.select_device(opt.device, apex=mixed_precision)
485 | 
486 |     tb_writer = None
487 |     if not opt.evolve:  # Train normally
488 |         try:
489 |             # Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/
490 |             from torch.utils.tensorboard import SummaryWriter
491 | 
492 |             tb_writer = SummaryWriter()
493 |         except:
494 |             pass
495 | 
496 |         prebias()  # optional
497 |         train()  # train normally
498 |     
499 |      # Evolve hyperparameters (optional)
500 |     else:
501 |         opt.notest = True  # only test final epoch
502 |         opt.nosave = True  # only save final checkpoint
503 |         if opt.bucket:
504 |             os.system('gsutil cp gs://%s/evolve.txt .' % opt.bucket)  # download evolve.txt if exists
505 | 
506 |         for _ in range(1):  # generations to evolve
507 |             if os.path.exists('evolve.txt'):  # if evolve.txt exists: select best hyps and mutate
508 |                 # Select parent(s)
509 |                 x = np.loadtxt('evolve.txt', ndmin=2)
510 |                 parent = 'weighted'  # parent selection method: 'single' or 'weighted'
511 |                 if parent == 'single' or len(x) == 1:
512 |                     x = x[fitness(x).argmax()]
513 |                 elif parent == 'weighted':  # weighted combination
514 |                     n = min(10, x.shape[0])  # number to merge
515 |                     x = x[np.argsort(-fitness(x))][:n]  # top n mutations
516 |                     w = fitness(x) - fitness(x).min()  # weights
517 |                     x = (x[:n] * w.reshape(n, 1)).sum(0) / w.sum()  # new parent
518 |                 for i, k in enumerate(hyp.keys()):
519 |                     hyp[k] = x[i + 7]
520 | 
521 |                 # Mutate
522 |                 np.random.seed(int(time.time()))
523 |                 s = [.2, .2, .2, .2, .2, .2, .2, .0, .02, .2, .2, .2, .2, .2, .2, .2, .2, .2]  # sigmas
524 |                 for i, k in enumerate(hyp.keys()):
525 |                     x = (np.random.randn(1) * s[i] + 1) ** 2.0  # plt.hist(x.ravel(), 300)
526 |                     hyp[k] *= float(x)  # vary by sigmas
527 | 
528 |             # Clip to limits
529 |             keys = ['lr0', 'iou_t', 'momentum', 'weight_decay', 'hsv_s', 'hsv_v', 'translate', 'scale', 'fl_gamma']
530 |             limits = [(1e-5, 1e-2), (0.00, 0.70), (0.60, 0.98), (0, 0.001), (0, .9), (0, .9), (0, .9), (0, .9), (0, 3)]
531 |             for k, v in zip(keys, limits):
532 |                 hyp[k] = np.clip(hyp[k], v[0], v[1])
533 | 
534 |             # Train mutation
535 |             prebias()
536 |             results = train()
537 | 
538 |             # Write mutation results
539 |             print_mutation(hyp, results, opt.bucket)
540 | 
541 |             # Plot results
542 |             # plot_evolution_results(hyp)
543 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Cydia2018/YOLOv3-RepVGG-backbone/0bd5670acafc0e0e5e86b452ab76c5032c16eb44/utils/__init__.py


--------------------------------------------------------------------------------
/utils/adabound.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | 
  3 | import torch
  4 | from torch.optim import Optimizer
  5 | 
  6 | 
  7 | class AdaBound(Optimizer):
  8 |     """Implements AdaBound algorithm.
  9 |     It has been proposed in `Adaptive Gradient Methods with Dynamic Bound of Learning Rate`_.
 10 |     Arguments:
 11 |         params (iterable): iterable of parameters to optimize or dicts defining
 12 |             parameter groups
 13 |         lr (float, optional): Adam learning rate (default: 1e-3)
 14 |         betas (Tuple[float, float], optional): coefficients used for computing
 15 |             running averages of gradient and its square (default: (0.9, 0.999))
 16 |         final_lr (float, optional): final (SGD) learning rate (default: 0.1)
 17 |         gamma (float, optional): convergence speed of the bound functions (default: 1e-3)
 18 |         eps (float, optional): term added to the denominator to improve
 19 |             numerical stability (default: 1e-8)
 20 |         weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
 21 |         amsbound (boolean, optional): whether to use the AMSBound variant of this algorithm
 22 |     .. Adaptive Gradient Methods with Dynamic Bound of Learning Rate:
 23 |         https://openreview.net/forum?id=Bkg3g2R9FX
 24 |     """
 25 | 
 26 |     def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), final_lr=0.1, gamma=1e-3,
 27 |                  eps=1e-8, weight_decay=0, amsbound=False):
 28 |         if not 0.0 <= lr:
 29 |             raise ValueError("Invalid learning rate: {}".format(lr))
 30 |         if not 0.0 <= eps:
 31 |             raise ValueError("Invalid epsilon value: {}".format(eps))
 32 |         if not 0.0 <= betas[0] < 1.0:
 33 |             raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0]))
 34 |         if not 0.0 <= betas[1] < 1.0:
 35 |             raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1]))
 36 |         if not 0.0 <= final_lr:
 37 |             raise ValueError("Invalid final learning rate: {}".format(final_lr))
 38 |         if not 0.0 <= gamma < 1.0:
 39 |             raise ValueError("Invalid gamma parameter: {}".format(gamma))
 40 |         defaults = dict(lr=lr, betas=betas, final_lr=final_lr, gamma=gamma, eps=eps,
 41 |                         weight_decay=weight_decay, amsbound=amsbound)
 42 |         super(AdaBound, self).__init__(params, defaults)
 43 | 
 44 |         self.base_lrs = list(map(lambda group: group['lr'], self.param_groups))
 45 | 
 46 |     def __setstate__(self, state):
 47 |         super(AdaBound, self).__setstate__(state)
 48 |         for group in self.param_groups:
 49 |             group.setdefault('amsbound', False)
 50 | 
 51 |     def step(self, closure=None):
 52 |         """Performs a single optimization step.
 53 |         Arguments:
 54 |             closure (callable, optional): A closure that reevaluates the model
 55 |                 and returns the loss.
 56 |         """
 57 |         loss = None
 58 |         if closure is not None:
 59 |             loss = closure()
 60 | 
 61 |         for group, base_lr in zip(self.param_groups, self.base_lrs):
 62 |             for p in group['params']:
 63 |                 if p.grad is None:
 64 |                     continue
 65 |                 grad = p.grad.data
 66 |                 if grad.is_sparse:
 67 |                     raise RuntimeError(
 68 |                         'Adam does not support sparse gradients, please consider SparseAdam instead')
 69 |                 amsbound = group['amsbound']
 70 | 
 71 |                 state = self.state[p]
 72 | 
 73 |                 # State initialization
 74 |                 if len(state) == 0:
 75 |                     state['step'] = 0
 76 |                     # Exponential moving average of gradient values
 77 |                     state['exp_avg'] = torch.zeros_like(p.data)
 78 |                     # Exponential moving average of squared gradient values
 79 |                     state['exp_avg_sq'] = torch.zeros_like(p.data)
 80 |                     if amsbound:
 81 |                         # Maintains max of all exp. moving avg. of sq. grad. values
 82 |                         state['max_exp_avg_sq'] = torch.zeros_like(p.data)
 83 | 
 84 |                 exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
 85 |                 if amsbound:
 86 |                     max_exp_avg_sq = state['max_exp_avg_sq']
 87 |                 beta1, beta2 = group['betas']
 88 | 
 89 |                 state['step'] += 1
 90 | 
 91 |                 if group['weight_decay'] != 0:
 92 |                     grad = grad.add(group['weight_decay'], p.data)
 93 | 
 94 |                 # Decay the first and second moment running average coefficient
 95 |                 exp_avg.mul_(beta1).add_(1 - beta1, grad)
 96 |                 exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
 97 |                 if amsbound:
 98 |                     # Maintains the maximum of all 2nd moment running avg. till now
 99 |                     torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq)
100 |                     # Use the max. for normalizing running avg. of gradient
101 |                     denom = max_exp_avg_sq.sqrt().add_(group['eps'])
102 |                 else:
103 |                     denom = exp_avg_sq.sqrt().add_(group['eps'])
104 | 
105 |                 bias_correction1 = 1 - beta1 ** state['step']
106 |                 bias_correction2 = 1 - beta2 ** state['step']
107 |                 step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1
108 | 
109 |                 # Applies bounds on actual learning rate
110 |                 # lr_scheduler cannot affect final_lr, this is a workaround to apply lr decay
111 |                 final_lr = group['final_lr'] * group['lr'] / base_lr
112 |                 lower_bound = final_lr * (1 - 1 / (group['gamma'] * state['step'] + 1))
113 |                 upper_bound = final_lr * (1 + 1 / (group['gamma'] * state['step']))
114 |                 step_size = torch.full_like(denom, step_size)
115 |                 step_size.div_(denom).clamp_(lower_bound, upper_bound).mul_(exp_avg)
116 | 
117 |                 p.data.add_(-step_size)
118 | 
119 |         return loss
120 | 
121 | 
122 | class AdaBoundW(Optimizer):
123 |     """Implements AdaBound algorithm with Decoupled Weight Decay (arxiv.org/abs/1711.05101)
124 |     It has been proposed in `Adaptive Gradient Methods with Dynamic Bound of Learning Rate`_.
125 |     Arguments:
126 |         params (iterable): iterable of parameters to optimize or dicts defining
127 |             parameter groups
128 |         lr (float, optional): Adam learning rate (default: 1e-3)
129 |         betas (Tuple[float, float], optional): coefficients used for computing
130 |             running averages of gradient and its square (default: (0.9, 0.999))
131 |         final_lr (float, optional): final (SGD) learning rate (default: 0.1)
132 |         gamma (float, optional): convergence speed of the bound functions (default: 1e-3)
133 |         eps (float, optional): term added to the denominator to improve
134 |             numerical stability (default: 1e-8)
135 |         weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
136 |         amsbound (boolean, optional): whether to use the AMSBound variant of this algorithm
137 |     .. Adaptive Gradient Methods with Dynamic Bound of Learning Rate:
138 |         https://openreview.net/forum?id=Bkg3g2R9FX
139 |     """
140 | 
141 |     def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), final_lr=0.1, gamma=1e-3,
142 |                  eps=1e-8, weight_decay=0, amsbound=False):
143 |         if not 0.0 <= lr:
144 |             raise ValueError("Invalid learning rate: {}".format(lr))
145 |         if not 0.0 <= eps:
146 |             raise ValueError("Invalid epsilon value: {}".format(eps))
147 |         if not 0.0 <= betas[0] < 1.0:
148 |             raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0]))
149 |         if not 0.0 <= betas[1] < 1.0:
150 |             raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1]))
151 |         if not 0.0 <= final_lr:
152 |             raise ValueError("Invalid final learning rate: {}".format(final_lr))
153 |         if not 0.0 <= gamma < 1.0:
154 |             raise ValueError("Invalid gamma parameter: {}".format(gamma))
155 |         defaults = dict(lr=lr, betas=betas, final_lr=final_lr, gamma=gamma, eps=eps,
156 |                         weight_decay=weight_decay, amsbound=amsbound)
157 |         super(AdaBoundW, self).__init__(params, defaults)
158 | 
159 |         self.base_lrs = list(map(lambda group: group['lr'], self.param_groups))
160 | 
161 |     def __setstate__(self, state):
162 |         super(AdaBoundW, self).__setstate__(state)
163 |         for group in self.param_groups:
164 |             group.setdefault('amsbound', False)
165 | 
166 |     def step(self, closure=None):
167 |         """Performs a single optimization step.
168 |         Arguments:
169 |             closure (callable, optional): A closure that reevaluates the model
170 |                 and returns the loss.
171 |         """
172 |         loss = None
173 |         if closure is not None:
174 |             loss = closure()
175 | 
176 |         for group, base_lr in zip(self.param_groups, self.base_lrs):
177 |             for p in group['params']:
178 |                 if p.grad is None:
179 |                     continue
180 |                 grad = p.grad.data
181 |                 if grad.is_sparse:
182 |                     raise RuntimeError(
183 |                         'Adam does not support sparse gradients, please consider SparseAdam instead')
184 |                 amsbound = group['amsbound']
185 | 
186 |                 state = self.state[p]
187 | 
188 |                 # State initialization
189 |                 if len(state) == 0:
190 |                     state['step'] = 0
191 |                     # Exponential moving average of gradient values
192 |                     state['exp_avg'] = torch.zeros_like(p.data)
193 |                     # Exponential moving average of squared gradient values
194 |                     state['exp_avg_sq'] = torch.zeros_like(p.data)
195 |                     if amsbound:
196 |                         # Maintains max of all exp. moving avg. of sq. grad. values
197 |                         state['max_exp_avg_sq'] = torch.zeros_like(p.data)
198 | 
199 |                 exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
200 |                 if amsbound:
201 |                     max_exp_avg_sq = state['max_exp_avg_sq']
202 |                 beta1, beta2 = group['betas']
203 | 
204 |                 state['step'] += 1
205 | 
206 |                 # Decay the first and second moment running average coefficient
207 |                 exp_avg.mul_(beta1).add_(1 - beta1, grad)
208 |                 exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
209 |                 if amsbound:
210 |                     # Maintains the maximum of all 2nd moment running avg. till now
211 |                     torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq)
212 |                     # Use the max. for normalizing running avg. of gradient
213 |                     denom = max_exp_avg_sq.sqrt().add_(group['eps'])
214 |                 else:
215 |                     denom = exp_avg_sq.sqrt().add_(group['eps'])
216 | 
217 |                 bias_correction1 = 1 - beta1 ** state['step']
218 |                 bias_correction2 = 1 - beta2 ** state['step']
219 |                 step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1
220 | 
221 |                 # Applies bounds on actual learning rate
222 |                 # lr_scheduler cannot affect final_lr, this is a workaround to apply lr decay
223 |                 final_lr = group['final_lr'] * group['lr'] / base_lr
224 |                 lower_bound = final_lr * (1 - 1 / (group['gamma'] * state['step'] + 1))
225 |                 upper_bound = final_lr * (1 + 1 / (group['gamma'] * state['step']))
226 |                 step_size = torch.full_like(denom, step_size)
227 |                 step_size.div_(denom).clamp_(lower_bound, upper_bound).mul_(exp_avg)
228 | 
229 |                 if group['weight_decay'] != 0:
230 |                     decayed_weights = torch.mul(p.data, group['weight_decay'])
231 |                     p.data.add_(-step_size)
232 |                     p.data.sub_(decayed_weights)
233 |                 else:
234 |                     p.data.add_(-step_size)
235 | 
236 |         return loss
237 | 


--------------------------------------------------------------------------------
/utils/datasets.py:
--------------------------------------------------------------------------------
  1 | import glob
  2 | import math
  3 | import os
  4 | import random
  5 | import shutil
  6 | import time
  7 | from pathlib import Path
  8 | from threading import Thread
  9 | 
 10 | import cv2
 11 | import numpy as np
 12 | import torch
 13 | from PIL import Image, ExifTags
 14 | from torch.utils.data import Dataset
 15 | from tqdm import tqdm
 16 | 
 17 | from utils.utils import xyxy2xywh, xywh2xyxy
 18 | 
 19 | img_formats = ['.bmp', '.jpg', '.jpeg', '.png', '.tif']
 20 | vid_formats = ['.mov', '.avi', '.mp4']
 21 | 
 22 | # Get orientation exif tag
 23 | for orientation in ExifTags.TAGS.keys():
 24 |     if ExifTags.TAGS[orientation] == 'Orientation':
 25 |         break
 26 | 
 27 | 
 28 | def exif_size(img):
 29 |     # Returns exif-corrected PIL size
 30 |     s = img.size  # (width, height)
 31 |     try:
 32 |         rotation = dict(img._getexif().items())[orientation]
 33 |         if rotation == 6:  # rotation 270
 34 |             s = (s[1], s[0])
 35 |         elif rotation == 8:  # rotation 90
 36 |             s = (s[1], s[0])
 37 |     except:
 38 |         pass
 39 | 
 40 |     return s
 41 | 
 42 | 
 43 | class LoadImages:  # for inference
 44 |     def __init__(self, path, img_size=416, half=False):
 45 |         path = str(Path(path))  # os-agnostic
 46 |         files = []
 47 |         if os.path.isdir(path):
 48 |             files = sorted(glob.glob(os.path.join(path, '*.*')))
 49 |         elif os.path.isfile(path):
 50 |             files = [path]
 51 | 
 52 |         images = [x for x in files if os.path.splitext(x)[-1].lower() in img_formats]
 53 |         videos = [x for x in files if os.path.splitext(x)[-1].lower() in vid_formats]
 54 |         nI, nV = len(images), len(videos)
 55 | 
 56 |         self.img_size = img_size
 57 |         self.files = images + videos
 58 |         self.nF = nI + nV  # number of files
 59 |         self.video_flag = [False] * nI + [True] * nV
 60 |         self.mode = 'images'
 61 |         self.half = half  # half precision fp16 images
 62 |         if any(videos):
 63 |             self.new_video(videos[0])  # new video
 64 |         else:
 65 |             self.cap = None
 66 |         assert self.nF > 0, 'No images or videos found in ' + path
 67 | 
 68 |     def __iter__(self):
 69 |         self.count = 0
 70 |         return self
 71 | 
 72 |     def __next__(self):
 73 |         if self.count == self.nF:
 74 |             raise StopIteration
 75 |         path = self.files[self.count]
 76 | 
 77 |         if self.video_flag[self.count]:
 78 |             # Read video
 79 |             self.mode = 'video'
 80 |             ret_val, img0 = self.cap.read()
 81 |             if not ret_val:
 82 |                 self.count += 1
 83 |                 self.cap.release()
 84 |                 if self.count == self.nF:  # last video
 85 |                     raise StopIteration
 86 |                 else:
 87 |                     path = self.files[self.count]
 88 |                     self.new_video(path)
 89 |                     ret_val, img0 = self.cap.read()
 90 | 
 91 |             self.frame += 1
 92 |             print('video %g/%g (%g/%g) %s: ' % (self.count + 1, self.nF, self.frame, self.nframes, path), end='')
 93 | 
 94 |         else:
 95 |             # Read image
 96 |             self.count += 1
 97 |             img0 = cv2.imread(path)  # BGR
 98 |             assert img0 is not None, 'Image Not Found ' + path
 99 |             print('image %g/%g %s: ' % (self.count, self.nF, path), end='')
100 | 
101 |         # Padded resize
102 |         img = letterbox(img0, new_shape=self.img_size)[0]
103 | 
104 |         # Normalize RGB
105 |         img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB
106 |         img = np.ascontiguousarray(img, dtype=np.float16 if self.half else np.float32)  # uint8 to fp16/fp32
107 |         img /= 255.0  # 0 - 255 to 0.0 - 1.0
108 | 
109 |         # cv2.imwrite(path + '.letterbox.jpg', 255 * img.transpose((1, 2, 0))[:, :, ::-1])  # save letterbox image
110 |         return path, img, img0, self.cap
111 | 
112 |     def new_video(self, path):
113 |         self.frame = 0
114 |         self.cap = cv2.VideoCapture(path)
115 |         self.nframes = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
116 | 
117 |     def __len__(self):
118 |         return self.nF  # number of files
119 | 
120 | 
121 | class LoadWebcam:  # for inference
122 |     def __init__(self, pipe=0, img_size=416, half=False):
123 |         self.img_size = img_size
124 |         self.half = half  # half precision fp16 images
125 | 
126 |         if pipe == '0':
127 |             pipe = 0  # local camera
128 |         # pipe = 'rtsp://192.168.1.64/1'  # IP camera
129 |         # pipe = 'rtsp://username:password@192.168.1.64/1'  # IP camera with login
130 |         # pipe = 'rtsp://170.93.143.139/rtplive/470011e600ef003a004ee33696235daa'  # IP traffic camera
131 |         # pipe = 'http://wmccpinetop.axiscam.net/mjpg/video.mjpg'  # IP golf camera
132 | 
133 |         # https://answers.opencv.org/question/215996/changing-gstreamer-pipeline-to-opencv-in-pythonsolved/
134 |         # pipe = '"rtspsrc location="rtsp://username:password@192.168.1.64/1" latency=10 ! appsink'  # GStreamer
135 | 
136 |         # https://answers.opencv.org/question/200787/video-acceleration-gstremer-pipeline-in-videocapture/
137 |         # https://stackoverflow.com/questions/54095699/install-gstreamer-support-for-opencv-python-package  # install help
138 |         # pipe = "rtspsrc location=rtsp://root:root@192.168.0.91:554/axis-media/media.amp?videocodec=h264&resolution=3840x2160 protocols=GST_RTSP_LOWER_TRANS_TCP ! rtph264depay ! queue ! vaapih264dec ! videoconvert ! appsink"  # GStreamer
139 | 
140 |         self.pipe = pipe
141 |         self.cap = cv2.VideoCapture(pipe)  # video capture object
142 |         self.cap.set(cv2.CAP_PROP_BUFFERSIZE, 3)  # set buffer size
143 | 
144 |     def __iter__(self):
145 |         self.count = -1
146 |         return self
147 | 
148 |     def __next__(self):
149 |         self.count += 1
150 |         if cv2.waitKey(1) == ord('q'):  # q to quit
151 |             self.cap.release()
152 |             cv2.destroyAllWindows()
153 |             raise StopIteration
154 | 
155 |         # Read frame
156 |         if self.pipe == 0:  # local camera
157 |             ret_val, img0 = self.cap.read()
158 |             img0 = cv2.flip(img0, 1)  # flip left-right
159 |         else:  # IP camera
160 |             n = 0
161 |             while True:
162 |                 n += 1
163 |                 self.cap.grab()
164 |                 if n % 30 == 0:  # skip frames
165 |                     ret_val, img0 = self.cap.retrieve()
166 |                     if ret_val:
167 |                         break
168 | 
169 |         # Print
170 |         assert ret_val, 'Camera Error %s' % self.pipe
171 |         img_path = 'webcam.jpg'
172 |         print('webcam %g: ' % self.count, end='')
173 | 
174 |         # Padded resize
175 |         img = letterbox(img0, new_shape=self.img_size)[0]
176 | 
177 |         # Normalize RGB
178 |         img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB
179 |         img = np.ascontiguousarray(img, dtype=np.float16 if self.half else np.float32)  # uint8 to fp16/fp32
180 |         img /= 255.0  # 0 - 255 to 0.0 - 1.0
181 | 
182 |         return img_path, img, img0, None
183 | 
184 |     def __len__(self):
185 |         return 0
186 | 
187 | 
188 | class LoadStreams:  # multiple IP or RTSP cameras
189 |     def __init__(self, sources='streams.txt', img_size=416, half=False):
190 |         self.mode = 'images'
191 |         self.img_size = img_size
192 |         self.half = half  # half precision fp16 images
193 | 
194 |         if os.path.isfile(sources):
195 |             with open(sources, 'r') as f:
196 |                 sources = [x.strip() for x in f.read().splitlines() if len(x.strip())]
197 |         else:
198 |             sources = [sources]
199 | 
200 |         n = len(sources)
201 |         self.imgs = [None] * n
202 |         self.sources = sources
203 |         for i, s in enumerate(sources):
204 |             # Start the thread to read frames from the video stream
205 |             print('%g/%g: %s... ' % (i + 1, n, s), end='')
206 |             cap = cv2.VideoCapture(0 if s == '0' else s)
207 |             assert cap.isOpened(), 'Failed to open %s' % s
208 |             w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
209 |             h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
210 |             fps = cap.get(cv2.CAP_PROP_FPS) % 100
211 |             _, self.imgs[i] = cap.read()  # guarantee first frame
212 |             thread = Thread(target=self.update, args=([i, cap]), daemon=True)
213 |             print(' success (%gx%g at %.2f FPS).' % (w, h, fps))
214 |             thread.start()
215 |         print('')  # newline
216 | 
217 |     def update(self, index, cap):
218 |         # Read next stream frame in a daemon thread
219 |         n = 0
220 |         while cap.isOpened():
221 |             n += 1
222 |             # _, self.imgs[index] = cap.read()
223 |             cap.grab()
224 |             if n == 4:  # read every 4th frame
225 |                 _, self.imgs[index] = cap.retrieve()
226 |                 n = 0
227 |             time.sleep(0.01)  # wait time
228 | 
229 |     def __iter__(self):
230 |         self.count = -1
231 |         return self
232 | 
233 |     def __next__(self):
234 |         self.count += 1
235 |         img0 = self.imgs.copy()
236 |         if cv2.waitKey(1) == ord('q'):  # q to quit
237 |             cv2.destroyAllWindows()
238 |             raise StopIteration
239 | 
240 |         # Letterbox
241 |         img = [letterbox(x, new_shape=self.img_size, interp=cv2.INTER_LINEAR)[0] for x in img0]
242 | 
243 |         # Stack
244 |         img = np.stack(img, 0)
245 | 
246 |         # Normalize RGB
247 |         img = img[:, :, :, ::-1].transpose(0, 3, 1, 2)  # BGR to RGB
248 |         img = np.ascontiguousarray(img, dtype=np.float16 if self.half else np.float32)  # uint8 to fp16/fp32
249 |         img /= 255.0  # 0 - 255 to 0.0 - 1.0
250 | 
251 |         return self.sources, img, img0, None
252 | 
253 |     def __len__(self):
254 |         return 0  # 1E12 frames = 32 streams at 30 FPS for 30 years
255 | 
256 | 
257 | class LoadImagesAndLabels(Dataset):  # for training/testing
258 |     def __init__(self, path, img_size=416, batch_size=16, augment=False, hyp=None, rect=True, image_weights=False,
259 |                  cache_labels=False, cache_images=False):
260 |         path = str(Path(path))  # os-agnostic
261 |         with open(path, 'r') as f:
262 |             self.img_files = [x.replace('/', os.sep) for x in f.read().splitlines()  # os-agnostic
263 |                               if os.path.splitext(x)[-1].lower() in img_formats]
264 | 
265 |         n = len(self.img_files)
266 |         bi = np.floor(np.arange(n) / batch_size).astype(np.int)  # batch index
267 |         nb = bi[-1] + 1  # number of batches
268 |         assert n > 0, 'No images found in %s' % path
269 | 
270 |         self.n = n
271 |         self.batch = bi  # batch index of image
272 |         self.img_size = img_size
273 |         self.augment = augment
274 |         self.hyp = hyp
275 |         self.image_weights = image_weights
276 |         self.rect = False if image_weights else rect
277 | 
278 |         # Define labels
279 |         self.label_files = [x.replace('images', 'labels').replace(os.path.splitext(x)[-1], '.txt')
280 |                             for x in self.img_files]
281 | 
282 |         # Rectangular Training  https://github.com/ultralytics/yolov3/issues/232
283 |         if self.rect:
284 |             # Read image shapes
285 |             sp = 'data' + os.sep + path.replace('.txt', '.shapes').split(os.sep)[-1]  # shapefile path
286 |             try:
287 |                 with open(sp, 'r') as f:  # read existing shapefile
288 |                     s = [x.split() for x in f.read().splitlines()]
289 |                     assert len(s) == n, 'Shapefile out of sync'
290 |             except:
291 |                 s = [exif_size(Image.open(f)) for f in tqdm(self.img_files, desc='Reading image shapes')]
292 |                 np.savetxt(sp, s, fmt='%g')  # overwrites existing (if any)
293 | 
294 |             # Sort by aspect ratio
295 |             s = np.array(s, dtype=np.float64)
296 |             ar = s[:, 1] / s[:, 0]  # aspect ratio
297 |             i = ar.argsort()
298 |             self.img_files = [self.img_files[i] for i in i]
299 |             self.label_files = [self.label_files[i] for i in i]
300 |             self.shapes = s[i]
301 |             ar = ar[i]
302 | 
303 |             # Set training image shapes
304 |             shapes = [[1, 1]] * nb
305 |             for i in range(nb):
306 |                 ari = ar[bi == i]
307 |                 mini, maxi = ari.min(), ari.max()
308 |                 if maxi < 1:
309 |                     shapes[i] = [maxi, 1]
310 |                 elif mini > 1:
311 |                     shapes[i] = [1, 1 / mini]
312 | 
313 |             self.batch_shapes = np.ceil(np.array(shapes) * img_size / 32.).astype(np.int) * 32
314 | 
315 |         # Preload labels (required for weighted CE training)
316 |         self.imgs = [None] * n
317 |         self.labels = [None] * n
318 |         if cache_labels or image_weights:  # cache labels for faster training
319 |             self.labels = [np.zeros((0, 5))] * n
320 |             extract_bounding_boxes = False
321 |             create_datasubset = False
322 |             pbar = tqdm(self.label_files, desc='Reading labels')
323 |             nm, nf, ne, ns = 0, 0, 0, 0  # number missing, number found, number empty, number datasubset
324 |             for i, file in enumerate(pbar):
325 |                 try:
326 |                     with open(file, 'r') as f:
327 |                         l = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32)
328 |                 except:
329 |                     nm += 1  # print('missing labels for image %s' % self.img_files[i])  # file missing
330 |                     continue
331 | 
332 |                 if l.shape[0]:
333 |                     assert l.shape[1] == 5, '> 5 label columns: %s' % file
334 |                     assert (l >= 0).all(), 'negative labels: %s' % file
335 |                     assert (l[:, 1:] <= 1).all(), 'non-normalized or out of bounds coordinate labels: %s' % file
336 |                     self.labels[i] = l
337 |                     nf += 1  # file found
338 | 
339 |                     # Create subdataset (a smaller dataset)
340 |                     if create_datasubset and ns < 1E4:
341 |                         if ns == 0:
342 |                             create_folder(path='./datasubset')
343 |                             os.makedirs('./datasubset/images')
344 |                         exclude_classes = 43
345 |                         if exclude_classes not in l[:, 0]:
346 |                             ns += 1
347 |                             # shutil.copy(src=self.img_files[i], dst='./datasubset/images/')  # copy image
348 |                             with open('./datasubset/images.txt', 'a') as f:
349 |                                 f.write(self.img_files[i] + '\n')
350 | 
351 |                     # Extract object detection boxes for a second stage classifier
352 |                     if extract_bounding_boxes:
353 |                         p = Path(self.img_files[i])
354 |                         img = cv2.imread(str(p))
355 |                         h, w, _ = img.shape
356 |                         for j, x in enumerate(l):
357 |                             f = '%s%sclassifier%s%g_%g_%s' % (p.parent.parent, os.sep, os.sep, x[0], j, p.name)
358 |                             if not os.path.exists(Path(f).parent):
359 |                                 os.makedirs(Path(f).parent)  # make new output folder
360 | 
361 |                             b = x[1:] * np.array([w, h, w, h])  # box
362 |                             b[2:] = b[2:].max()  # rectangle to square
363 |                             b[2:] = b[2:] * 1.3 + 30  # pad
364 |                             b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int)
365 | 
366 |                             b[[0, 2]] = np.clip(b[[0, 2]], 0, w)  # clip boxes outside of image
367 |                             b[[1, 3]] = np.clip(b[[1, 3]], 0, h)
368 |                             assert cv2.imwrite(f, img[b[1]:b[3], b[0]:b[2]]), 'Failure extracting classifier boxes'
369 |                 else:
370 |                     ne += 1  # file empty
371 | 
372 |                 pbar.desc = 'Reading labels (%g found, %g missing, %g empty for %g images)' % (nf, nm, ne, n)
373 |             assert nf > 0, 'No labels found. Recommend correcting image and label paths.'
374 | 
375 |         # Cache images into memory for faster training (~5GB)
376 |         if cache_images and augment:  # if training
377 |             for i in tqdm(range(min(len(self.img_files), 10000)), desc='Reading images'):  # max 10k images
378 |                 img_path = self.img_files[i]
379 |                 img = cv2.imread(img_path)  # BGR
380 |                 assert img is not None, 'Image Not Found ' + img_path
381 |                 r = self.img_size / max(img.shape)  # size ratio
382 |                 if self.augment and r < 1:  # if training (NOT testing), downsize to inference shape
383 |                     h, w, _ = img.shape
384 |                     img = cv2.resize(img, (int(w * r), int(h * r)), interpolation=cv2.INTER_LINEAR)  # or INTER_AREA
385 |                 self.imgs[i] = img
386 | 
387 |         # Detect corrupted images https://medium.com/joelthchao/programmatically-detect-corrupted-image-8c1b2006c3d3
388 |         detect_corrupted_images = False
389 |         if detect_corrupted_images:
390 |             from skimage import io  # conda install -c conda-forge scikit-image
391 |             for file in tqdm(self.img_files, desc='Detecting corrupted images'):
392 |                 try:
393 |                     _ = io.imread(file)
394 |                 except:
395 |                     print('Corrupted image detected: %s' % file)
396 | 
397 |     def __len__(self):
398 |         return len(self.img_files)
399 | 
400 |     # def __iter__(self):
401 |     #     self.count = -1
402 |     #     print('ran dataset iter')
403 |     #     #self.shuffled_vector = np.random.permutation(self.nF) if self.augment else np.arange(self.nF)
404 |     #     return self
405 | 
406 |     def __getitem__(self, index):
407 |         if self.image_weights:
408 |             index = self.indices[index]
409 | 
410 |         img_path = self.img_files[index]
411 |         label_path = self.label_files[index]
412 | 
413 |         mosaic = True and self.augment  # load 4 images at a time into a mosaic (only during training)
414 |         if mosaic:
415 |             # Load mosaic
416 |             img, labels = load_mosaic(self, index)
417 |             h, w, _ = img.shape
418 | 
419 |         else:
420 |             # Load image
421 |             img = load_image(self, index)
422 | 
423 |             # Letterbox
424 |             h, w, _ = img.shape
425 |             if self.rect:
426 |                 img, ratio, padw, padh = letterbox(img, self.batch_shapes[self.batch[index]], mode='rect')
427 |             else:
428 |                 img, ratio, padw, padh = letterbox(img, self.img_size, mode='square')
429 | 
430 |             # Load labels
431 |             labels = []
432 |             if os.path.isfile(label_path):
433 |                 x = self.labels[index]
434 |                 if x is None:  # labels not preloaded
435 |                     with open(label_path, 'r') as f:
436 |                         x = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32)
437 | 
438 |                 if x.size > 0:
439 |                     # Normalized xywh to pixel xyxy format
440 |                     labels = x.copy()
441 |                     labels[:, 1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + padw
442 |                     labels[:, 2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + padh
443 |                     labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + padw
444 |                     labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + padh
445 | 
446 |         if self.augment:
447 |             # Augment colorspace
448 |             augment_hsv(img, hgain=self.hyp['hsv_h'], sgain=self.hyp['hsv_s'], vgain=self.hyp['hsv_v'])
449 | 
450 |             # Augment imagespace
451 |             g = 0.0 if mosaic else 1.0  # do not augment mosaics
452 |             hyp = self.hyp
453 |             img, labels = random_affine(img, labels,
454 |                                         degrees=hyp['degrees'] * g,
455 |                                         translate=hyp['translate'] * g,
456 |                                         scale=hyp['scale'] * g,
457 |                                         shear=hyp['shear'] * g)
458 | 
459 |             # Apply cutouts
460 |             # if random.random() < 0.9:
461 |             #     labels = cutout(img, labels)
462 | 
463 |         nL = len(labels)  # number of labels
464 |         if nL:
465 |             # convert xyxy to xywh
466 |             labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])
467 | 
468 |             # Normalize coordinates 0 - 1
469 |             labels[:, [2, 4]] /= img.shape[0]  # height
470 |             labels[:, [1, 3]] /= img.shape[1]  # width
471 | 
472 |         if self.augment:
473 |             # random left-right flip
474 |             lr_flip = True
475 |             if lr_flip and random.random() < 0.5:
476 |                 img = np.fliplr(img)
477 |                 if nL:
478 |                     labels[:, 1] = 1 - labels[:, 1]
479 | 
480 |             # random up-down flip
481 |             ud_flip = False
482 |             if ud_flip and random.random() < 0.5:
483 |                 img = np.flipud(img)
484 |                 if nL:
485 |                     labels[:, 2] = 1 - labels[:, 2]
486 | 
487 |         labels_out = torch.zeros((nL, 6))
488 |         if nL:
489 |             labels_out[:, 1:] = torch.from_numpy(labels)
490 | 
491 |         # Normalize
492 |         img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
493 |         img = np.ascontiguousarray(img, dtype=np.float32)  # uint8 to float32
494 |         img /= 255.0  # 0 - 255 to 0.0 - 1.0
495 | 
496 |         return torch.from_numpy(img), labels_out, img_path, (h, w)
497 | 
498 |     @staticmethod
499 |     def collate_fn(batch):
500 |         img, label, path, hw = list(zip(*batch))  # transposed
501 |         for i, l in enumerate(label):
502 |             l[:, 0] = i  # add target image index for build_targets()
503 |         return torch.stack(img, 0), torch.cat(label, 0), path, hw
504 | 
505 | 
506 | def load_image(self, index):
507 |     # loads 1 image from dataset
508 |     img = self.imgs[index]
509 |     if img is None:
510 |         img_path = self.img_files[index]
511 |         img = cv2.imread(img_path)  # BGR
512 |         assert img is not None, 'Image Not Found ' + img_path
513 |         r = self.img_size / max(img.shape)  # size ratio
514 |         if self.augment and r < 1.0:  # if training (NOT testing), downsize to inference shape
515 |             h, w, _ = img.shape
516 |             img = cv2.resize(img, (int(w * r), int(h * r)), interpolation=cv2.INTER_LINEAR)  # _LINEAR fastest
517 |     return img
518 | 
519 | 
520 | def augment_hsv(img, hgain=0.5, sgain=0.5, vgain=0.5):
521 |     x = (np.random.uniform(-1, 1, 3) * np.array([hgain, sgain, vgain]) + 1).astype(np.float32)  # random gains
522 |     img_hsv = (cv2.cvtColor(img, cv2.COLOR_BGR2HSV) * x.reshape((1, 1, 3))).clip(None, 255).astype(np.uint8)
523 |     cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)  # no return needed
524 | 
525 | 
526 | # def augment_hsv(img, hgain=0.5, sgain=0.5, vgain=0.5):  # original version
527 | #     # SV augmentation by 50%
528 | #     img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)  # hue, sat, val
529 | #
530 | #     S = img_hsv[:, :, 1].astype(np.float32)  # saturation
531 | #     V = img_hsv[:, :, 2].astype(np.float32)  # value
532 | #
533 | #     a = random.uniform(-1, 1) * sgain + 1
534 | #     b = random.uniform(-1, 1) * vgain + 1
535 | #     S *= a
536 | #     V *= b
537 | #
538 | #     img_hsv[:, :, 1] = S if a < 1 else S.clip(None, 255)
539 | #     img_hsv[:, :, 2] = V if b < 1 else V.clip(None, 255)
540 | #     cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)  # no return needed
541 | 
542 | 
543 | def load_mosaic(self, index):
544 |     # loads images in a mosaic
545 | 
546 |     labels4 = []
547 |     s = self.img_size
548 |     xc, yc = [int(random.uniform(s * 0.5, s * 1.5)) for _ in range(2)]  # mosaic center x, y
549 |     img4 = np.zeros((s * 2, s * 2, 3), dtype=np.uint8) + 128  # base image with 4 tiles
550 |     indices = [index] + [random.randint(0, len(self.labels) - 1) for _ in range(3)]  # 3 additional image indices
551 |     for i, index in enumerate(indices):
552 |         # Load image
553 |         img = load_image(self, index)
554 |         h, w, _ = img.shape
555 | 
556 |         # place img in img4
557 |         if i == 0:  # top left
558 |             x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc  # xmin, ymin, xmax, ymax (large image)
559 |             x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h  # xmin, ymin, xmax, ymax (small image)
560 |         elif i == 1:  # top right
561 |             x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
562 |             x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
563 |         elif i == 2:  # bottom left
564 |             x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
565 |             x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, max(xc, w), min(y2a - y1a, h)
566 |         elif i == 3:  # bottom right
567 |             x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
568 |             x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
569 | 
570 |         img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]
571 |         padw = x1a - x1b
572 |         padh = y1a - y1b
573 | 
574 |         # Load labels
575 |         label_path = self.label_files[index]
576 |         if os.path.isfile(label_path):
577 |             x = self.labels[index]
578 |             if x is None:  # labels not preloaded
579 |                 with open(label_path, 'r') as f:
580 |                     x = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32)
581 | 
582 |             if x.size > 0:
583 |                 # Normalized xywh to pixel xyxy format
584 |                 labels = x.copy()
585 |                 labels[:, 1] = w * (x[:, 1] - x[:, 3] / 2) + padw
586 |                 labels[:, 2] = h * (x[:, 2] - x[:, 4] / 2) + padh
587 |                 labels[:, 3] = w * (x[:, 1] + x[:, 3] / 2) + padw
588 |                 labels[:, 4] = h * (x[:, 2] + x[:, 4] / 2) + padh
589 | 
590 |             labels4.append(labels)
591 |     if len(labels4):
592 |         labels4 = np.concatenate(labels4, 0)
593 | 
594 |     # hyp = self.hyp
595 |     # img4, labels4 = random_affine(img4, labels4,
596 |     #                               degrees=hyp['degrees'],
597 |     #                               translate=hyp['translate'],
598 |     #                               scale=hyp['scale'],
599 |     #                               shear=hyp['shear'])
600 | 
601 |     # Center crop
602 |     a = s // 2
603 |     img4 = img4[a:a + s, a:a + s]
604 |     if len(labels4):
605 |         labels4[:, 1:] -= a
606 | 
607 |     return img4, labels4
608 | 
609 | 
610 | def letterbox(img, new_shape=416, color=(128, 128, 128), mode='auto', interp=cv2.INTER_AREA):
611 |     # Resize a rectangular image to a 32 pixel multiple rectangle
612 |     # https://github.com/ultralytics/yolov3/issues/232
613 |     shape = img.shape[:2]  # current shape [height, width]
614 | 
615 |     if isinstance(new_shape, int):
616 |         r = float(new_shape) / max(shape)  # ratio  = new / old
617 |     else:
618 |         r = max(new_shape) / max(shape)
619 |     ratio = r, r  # width, height ratios
620 |     new_unpad = (int(round(shape[1] * r)), int(round(shape[0] * r)))
621 | 
622 |     # Compute padding https://github.com/ultralytics/yolov3/issues/232
623 |     if mode is 'auto':  # minimum rectangle
624 |         dw = np.mod(new_shape - new_unpad[0], 32) / 2  # width padding
625 |         dh = np.mod(new_shape - new_unpad[1], 32) / 2  # height padding
626 |     elif mode is 'square':  # square
627 |         dw = (new_shape - new_unpad[0]) / 2  # width padding
628 |         dh = (new_shape - new_unpad[1]) / 2  # height padding
629 |     elif mode is 'rect':  # square
630 |         dw = (new_shape[1] - new_unpad[0]) / 2  # width padding
631 |         dh = (new_shape[0] - new_unpad[1]) / 2  # height padding
632 |     elif mode is 'scaleFill':
633 |         dw, dh = 0.0, 0.0
634 |         new_unpad = (new_shape, new_shape)
635 |         ratio = new_shape / shape[1], new_shape / shape[0]  # width, height ratios
636 | 
637 |     if shape[::-1] != new_unpad:  # resize
638 |         img = cv2.resize(img, new_unpad, interpolation=interp)  # INTER_AREA is better, INTER_LINEAR is faster
639 |     top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
640 |     left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
641 |     img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
642 |     return img, ratio, dw, dh
643 | 
644 | 
645 | def random_affine(img, targets=(), degrees=10, translate=.1, scale=.1, shear=10):
646 |     # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
647 |     # https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4
648 | 
649 |     if targets is None:  # targets = [cls, xyxy]
650 |         targets = []
651 |     border = 0  # width of added border (optional)
652 |     height = img.shape[0] + border * 2
653 |     width = img.shape[1] + border * 2
654 | 
655 |     # Rotation and Scale
656 |     R = np.eye(3)
657 |     a = random.uniform(-degrees, degrees)
658 |     # a += random.choice([-180, -90, 0, 90])  # add 90deg rotations to small rotations
659 |     s = random.uniform(1 - scale, 1 + scale)
660 |     R[:2] = cv2.getRotationMatrix2D(angle=a, center=(img.shape[1] / 2, img.shape[0] / 2), scale=s)
661 | 
662 |     # Translation
663 |     T = np.eye(3)
664 |     T[0, 2] = random.uniform(-translate, translate) * img.shape[0] + border  # x translation (pixels)
665 |     T[1, 2] = random.uniform(-translate, translate) * img.shape[1] + border  # y translation (pixels)
666 | 
667 |     # Shear
668 |     S = np.eye(3)
669 |     S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # x shear (deg)
670 |     S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180)  # y shear (deg)
671 | 
672 |     # Combined rotation matrix
673 |     M = S @ T @ R  # ORDER IS IMPORTANT HERE!!
674 |     changed = (border != 0) or (M != np.eye(3)).any()
675 |     if changed:
676 |         img = cv2.warpAffine(img, M[:2], dsize=(width, height), flags=cv2.INTER_AREA, borderValue=(128, 128, 128))
677 | 
678 |     # Transform label coordinates
679 |     n = len(targets)
680 |     if n:
681 |         # warp points
682 |         xy = np.ones((n * 4, 3))
683 |         xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2)  # x1y1, x2y2, x1y2, x2y1
684 |         xy = (xy @ M.T)[:, :2].reshape(n, 8)
685 | 
686 |         # create new boxes
687 |         x = xy[:, [0, 2, 4, 6]]
688 |         y = xy[:, [1, 3, 5, 7]]
689 |         xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
690 | 
691 |         # # apply angle-based reduction of bounding boxes
692 |         # radians = a * math.pi / 180
693 |         # reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5
694 |         # x = (xy[:, 2] + xy[:, 0]) / 2
695 |         # y = (xy[:, 3] + xy[:, 1]) / 2
696 |         # w = (xy[:, 2] - xy[:, 0]) * reduction
697 |         # h = (xy[:, 3] - xy[:, 1]) * reduction
698 |         # xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T
699 | 
700 |         # reject warped points outside of image
701 |         xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width)
702 |         xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height)
703 |         w = xy[:, 2] - xy[:, 0]
704 |         h = xy[:, 3] - xy[:, 1]
705 |         area = w * h
706 |         area0 = (targets[:, 3] - targets[:, 1]) * (targets[:, 4] - targets[:, 2])
707 |         ar = np.maximum(w / (h + 1e-16), h / (w + 1e-16))
708 |         i = (w > 4) & (h > 4) & (area / (area0 + 1e-16) > 0.1) & (ar < 10)
709 | 
710 |         targets = targets[i]
711 |         targets[:, 1:5] = xy[i]
712 | 
713 |     return img, targets
714 | 
715 | 
716 | def cutout(image, labels):
717 |     # https://arxiv.org/abs/1708.04552
718 |     # https://github.com/hysts/pytorch_cutout/blob/master/dataloader.py
719 |     # https://towardsdatascience.com/when-conventional-wisdom-fails-revisiting-data-augmentation-for-self-driving-cars-4831998c5509
720 |     h, w = image.shape[:2]
721 | 
722 |     def bbox_ioa(box1, box2, x1y1x2y2=True):
723 |         # Returns the intersection over box2 area given box1, box2. box1 is 4, box2 is nx4. boxes are x1y1x2y2
724 |         box2 = box2.transpose()
725 | 
726 |         # Get the coordinates of bounding boxes
727 |         b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
728 |         b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
729 | 
730 |         # Intersection area
731 |         inter_area = (np.minimum(b1_x2, b2_x2) - np.maximum(b1_x1, b2_x1)).clip(0) * \
732 |                      (np.minimum(b1_y2, b2_y2) - np.maximum(b1_y1, b2_y1)).clip(0)
733 | 
734 |         # box2 area
735 |         box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + 1e-16
736 | 
737 |         # Intersection over box2 area
738 |         return inter_area / box2_area
739 | 
740 |     # create random masks
741 |     scales = [0.5] * 1  # + [0.25] * 4 + [0.125] * 16 + [0.0625] * 64 + [0.03125] * 256  # image size fraction
742 |     for s in scales:
743 |         mask_h = random.randint(1, int(h * s))
744 |         mask_w = random.randint(1, int(w * s))
745 | 
746 |         # box
747 |         xmin = max(0, random.randint(0, w) - mask_w // 2)
748 |         ymin = max(0, random.randint(0, h) - mask_h // 2)
749 |         xmax = min(w, xmin + mask_w)
750 |         ymax = min(h, ymin + mask_h)
751 | 
752 |         # apply random color mask
753 |         mask_color = [random.randint(0, 255) for _ in range(3)]
754 |         image[ymin:ymax, xmin:xmax] = mask_color
755 | 
756 |         # return unobscured labels
757 |         if len(labels) and s > 0.03:
758 |             box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32)
759 |             ioa = bbox_ioa(box, labels[:, 1:5])  # intersection over area
760 |             labels = labels[ioa < 0.90]  # remove >90% obscured labels
761 | 
762 |     return labels
763 | 
764 | 
765 | def convert_images2bmp():
766 |     # cv2.imread() jpg at 230 img/s, *.bmp at 400 img/s
767 |     for path in ['../coco/images/val2014/', '../coco/images/train2014/']:
768 |         folder = os.sep + Path(path).name
769 |         output = path.replace(folder, folder + 'bmp')
770 |         if os.path.exists(output):
771 |             shutil.rmtree(output)  # delete output folder
772 |         os.makedirs(output)  # make new output folder
773 | 
774 |         for f in tqdm(glob.glob('%s*.jpg' % path)):
775 |             save_name = f.replace('.jpg', '.bmp').replace(folder, folder + 'bmp')
776 |             cv2.imwrite(save_name, cv2.imread(f))
777 | 
778 |     for label_path in ['../coco/trainvalno5k.txt', '../coco/5k.txt']:
779 |         with open(label_path, 'r') as file:
780 |             lines = file.read()
781 |         lines = lines.replace('2014/', '2014bmp/').replace('.jpg', '.bmp').replace(
782 |             '/Users/glennjocher/PycharmProjects/', '../')
783 |         with open(label_path.replace('5k', '5k_bmp'), 'w') as file:
784 |             file.write(lines)
785 | 
786 | 
787 | def create_folder(path='./new_folder'):
788 |     # Create folder
789 |     if os.path.exists(path):
790 |         shutil.rmtree(path)  # delete output folder
791 |     os.makedirs(path)  # make new output folder
792 | 


--------------------------------------------------------------------------------
/utils/gcp.sh:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | 
  3 | # New VM
  4 | rm -rf sample_data yolov3 darknet apex coco cocoapi knife knifec
  5 | git clone https://github.com/ultralytics/yolov3
  6 | # git clone https://github.com/AlexeyAB/darknet && cd darknet && make GPU=1 CUDNN=1 CUDNN_HALF=1 OPENCV=0 && wget -c https://pjreddie.com/media/files/darknet53.conv.74 && cd ..
  7 | git clone https://github.com/NVIDIA/apex && cd apex && pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" . --user && cd .. && rm -rf apex
  8 | # git clone https://github.com/cocodataset/cocoapi && cd cocoapi/PythonAPI && make && cd ../.. && cp -r cocoapi/PythonAPI/pycocotools yolov3
  9 | sudo conda install -y -c conda-forge scikit-image tensorboard pycocotools
 10 | python3 -c "
 11 | from yolov3.utils.google_utils import gdrive_download
 12 | gdrive_download('1HaXkef9z6y5l4vUnCYgdmEAj61c6bfWO','coco.zip')"
 13 | sudo shutdown
 14 | 
 15 | # Re-clone
 16 | rm -rf yolov3  # Warning: remove existing
 17 | git clone https://github.com/ultralytics/yolov3 && cd yolov3 # master
 18 | # git clone -b test --depth 1 https://github.com/ultralytics/yolov3 test  # branch
 19 | python3 train.py --img-size 320 --weights weights/darknet53.conv.74 --epochs 27 --batch-size 64 --accumulate 1
 20 | 
 21 | # Train
 22 | python3 train.py
 23 | 
 24 | # Resume
 25 | python3 train.py --resume
 26 | 
 27 | # Detect
 28 | python3 detect.py
 29 | 
 30 | # Test
 31 | python3 test.py --save-json
 32 | 
 33 | # Evolve
 34 | for i in {0..500}
 35 | do
 36 |   python3 train.py --data data/coco.data --img-size 320 --epochs 1 --batch-size 64 --accumulate 1 --evolve --bucket yolov4
 37 | done
 38 | 
 39 | # Git pull
 40 | git pull https://github.com/ultralytics/yolov3  # master
 41 | git pull https://github.com/ultralytics/yolov3 test  # branch
 42 | 
 43 | # Test Darknet training
 44 | python3 test.py --weights ../darknet/backup/yolov3.backup
 45 | 
 46 | # Copy last.pt TO bucket
 47 | gsutil cp yolov3/weights/last1gpu.pt gs://ultralytics
 48 | 
 49 | # Copy last.pt FROM bucket
 50 | gsutil cp gs://ultralytics/last.pt yolov3/weights/last.pt
 51 | wget https://storage.googleapis.com/ultralytics/yolov3/last_v1_0.pt -O weights/last_v1_0.pt
 52 | wget https://storage.googleapis.com/ultralytics/yolov3/best_v1_0.pt -O weights/best_v1_0.pt
 53 | 
 54 | # Reproduce tutorials
 55 | rm results*.txt  # WARNING: removes existing results
 56 | python3 train.py --nosave --data data/coco_1img.data && mv results.txt results0r_1img.txt
 57 | python3 train.py --nosave --data data/coco_10img.data && mv results.txt results0r_10img.txt
 58 | python3 train.py --nosave --data data/coco_100img.data && mv results.txt results0r_100img.txt
 59 | # python3 train.py --nosave --data data/coco_100img.data --transfer && mv results.txt results3_100imgTL.txt
 60 | python3 -c "from utils import utils; utils.plot_results()"
 61 | # gsutil cp results*.txt gs://ultralytics
 62 | gsutil cp results.png gs://ultralytics
 63 | sudo shutdown
 64 | 
 65 | # Reproduce mAP
 66 | python3 test.py --save-json --img-size 608
 67 | python3 test.py --save-json --img-size 416
 68 | python3 test.py --save-json --img-size 320
 69 | sudo shutdown
 70 | 
 71 | # Benchmark script
 72 | git clone https://github.com/ultralytics/yolov3  # clone our repo
 73 | git clone https://github.com/NVIDIA/apex && cd apex && pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" . --user && cd .. && rm -rf apex  # install nvidia apex
 74 | python3 -c "from yolov3.utils.google_utils import gdrive_download; gdrive_download('1HaXkef9z6y5l4vUnCYgdmEAj61c6bfWO','coco.zip')"  # download coco dataset (20GB)
 75 | cd yolov3 && clear && python3 train.py --epochs 1  # run benchmark (~30 min)
 76 | 
 77 | # Unit tests
 78 | python3 detect.py  # detect 2 persons, 1 tie
 79 | python3 test.py --data data/coco_32img.data  # test mAP = 0.8
 80 | python3 train.py --data data/coco_32img.data --epochs 5 --nosave  # train 5 epochs
 81 | python3 train.py --data data/coco_1cls.data --epochs 5 --nosave  # train 5 epochs
 82 | python3 train.py --data data/coco_1img.data --epochs 5 --nosave  # train 5 epochs
 83 | 
 84 | # AlexyAB Darknet
 85 | gsutil cp -r gs://sm6/supermarket2 .  # dataset from bucket
 86 | rm -rf darknet && git clone https://github.com/AlexeyAB/darknet && cd darknet && wget -c https://pjreddie.com/media/files/darknet53.conv.74  # sudo apt install libopencv-dev && make
 87 | ./darknet detector calc_anchors data/coco_img64.data -num_of_clusters 9 -width 320 -height 320  # kmeans anchor calculation
 88 | ./darknet detector train ../supermarket2/supermarket2.data ../yolo_v3_spp_pan_scale.cfg darknet53.conv.74 -map -dont_show # train spp
 89 | ./darknet detector train ../yolov3/data/coco.data ../yolov3-spp.cfg darknet53.conv.74 -map -dont_show # train spp coco
 90 | 
 91 | ./darknet detector train data/coco.data ../yolov3-spp.cfg darknet53.conv.74 -map -dont_show # train spp
 92 | gsutil cp -r backup/*5000.weights gs://sm6/weights
 93 | sudo shutdown
 94 | 
 95 | 
 96 | ./darknet detector train ../supermarket2/supermarket2.data ../yolov3-tiny-sm2-1cls.cfg yolov3-tiny.conv.15 -map -dont_show # train tiny
 97 | ./darknet detector train ../supermarket2/supermarket2.data cfg/yolov3-spp-sm2-1cls.cfg backup/yolov3-spp-sm2-1cls_last.weights  # resume
 98 | python3 train.py --data ../supermarket2/supermarket2.data --cfg ../yolov3-spp-sm2-1cls.cfg --epochs 100 --num-workers 8 --img-size 320 --nosave  # train ultralytics
 99 | python3 test.py --data ../supermarket2/supermarket2.data --weights ../darknet/backup/yolov3-spp-sm2-1cls_5000.weights --cfg cfg/yolov3-spp-sm2-1cls.cfg  # test
100 | gsutil cp -r backup/*.weights gs://sm6/weights  # weights to bucket
101 | 
102 | python3 test.py --data ../supermarket2/supermarket2.data --weights weights/yolov3-spp-sm2-1cls_5000.weights --cfg ../yolov3-spp-sm2-1cls.cfg --img-size 320 --conf-thres 0.2  # test
103 | python3 test.py --data ../supermarket2/supermarket2.data --weights weights/yolov3-spp-sm2-1cls-scalexy_125_5000.weights --cfg ../yolov3-spp-sm2-1cls-scalexy_125.cfg --img-size 320 --conf-thres 0.2  # test
104 | python3 test.py --data ../supermarket2/supermarket2.data --weights weights/yolov3-spp-sm2-1cls-scalexy_150_5000.weights --cfg ../yolov3-spp-sm2-1cls-scalexy_150.cfg --img-size 320 --conf-thres 0.2  # test
105 | python3 test.py --data ../supermarket2/supermarket2.data --weights weights/yolov3-spp-sm2-1cls-scalexy_200_5000.weights --cfg ../yolov3-spp-sm2-1cls-scalexy_200.cfg --img-size 320 --conf-thres 0.2  # test
106 | python3 test.py --data ../supermarket2/supermarket2.data --weights ../darknet/backup/yolov3-spp-sm2-1cls-scalexy_variable_5000.weights --cfg ../yolov3-spp-sm2-1cls-scalexy_variable.cfg --img-size 320 --conf-thres 0.2  # test
107 | 
108 | python3 train.py --img-size 320 --epochs 27 --batch-size 64 --accumulate 1 --nosave --notest && python3 test.py --weights weights/last.pt --img-size 320 --save-json && sudo shutdown
109 | 
110 | # Debug/Development
111 | python3 train.py --data data/coco.data --img-size 320 --single-scale --batch-size 64 --accumulate 1 --epochs 1 --evolve --giou
112 | python3 test.py --weights weights/last.pt --cfg cfg/yolov3-spp.cfg --img-size 320
113 | 
114 | gsutil cp evolve.txt gs://ultralytics
115 | sudo shutdown
116 | 
117 | #Docker
118 | sudo docker kill $(sudo docker ps -q)
119 | sudo docker pull ultralytics/yolov3:v1
120 | sudo nvidia-docker run -it --ipc=host --mount type=bind,source="$(pwd)"/coco,target=/usr/src/coco ultralytics/yolov3:v1
121 | 
122 | clear
123 | while true
124 | do
125 |   python3 train.py --data data/coco.data --img-size 320 --batch-size 64 --accumulate 1 --evolve --epochs 1 --adam --bucket yolov4/adamdefaultpw_coco_1e --device 1
126 | done
127 | 
128 | python3 train.py --data data/coco.data --img-size 320 --batch-size 64 --accumulate 1 --epochs 1 --adam --device 1 --prebias
129 | while true; do python3 train.py --data data/coco.data --img-size 320 --batch-size 64 --accumulate 1 --evolve --epochs 1 --adam --bucket yolov4/adamdefaultpw_coco_1e; done
130 | 


--------------------------------------------------------------------------------
/utils/google_utils.py:
--------------------------------------------------------------------------------
 1 | # This file contains google utils: https://cloud.google.com/storage/docs/reference/libraries
 2 | # pip install --upgrade google-cloud-storage
 3 | 
 4 | import os
 5 | import time
 6 | 
 7 | 
 8 | # from google.cloud import storage
 9 | 
10 | 
11 | def gdrive_download(id='1HaXkef9z6y5l4vUnCYgdmEAj61c6bfWO', name='coco.zip'):
12 |     # https://gist.github.com/tanaikech/f0f2d122e05bf5f971611258c22c110f
13 |     # Downloads a file from Google Drive, accepting presented query
14 |     # from utils.google_utils import *; gdrive_download()
15 |     t = time.time()
16 | 
17 |     print('Downloading https://drive.google.com/uc?export=download&id=%s as %s... ' % (id, name), end='')
18 |     if os.path.exists(name):  # remove existing
19 |         os.remove(name)
20 | 
21 |     # Attempt large file download
22 |     s = ["curl -c ./cookie -s -L \"https://drive.google.com/uc?export=download&id=%s\" > /dev/null" % id,
23 |          "curl -Lb ./cookie -s \"https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=%s\" -o %s" % (
24 |              id, name),
25 |          'rm ./cookie']
26 |     [os.system(x) for x in s]  # run commands
27 | 
28 |     # Attempt small file download
29 |     if not os.path.exists(name):  # file size < 40MB
30 |         s = 'curl -f -L -o %s https://drive.google.com/uc?export=download&id=%s' % (name, id)
31 |         os.system(s)
32 | 
33 |     # Unzip if archive
34 |     if name.endswith('.zip'):
35 |         print('unzipping... ', end='')
36 |         os.system('unzip -q %s' % name)  # unzip
37 |         os.remove(name)  # remove zip to free space
38 | 
39 |     print('Done (%.1fs)' % (time.time() - t))
40 | 
41 | 
42 | def upload_blob(bucket_name, source_file_name, destination_blob_name):
43 |     # Uploads a file to a bucket
44 |     # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python
45 | 
46 |     storage_client = storage.Client()
47 |     bucket = storage_client.get_bucket(bucket_name)
48 |     blob = bucket.blob(destination_blob_name)
49 | 
50 |     blob.upload_from_filename(source_file_name)
51 | 
52 |     print('File {} uploaded to {}.'.format(
53 |         source_file_name,
54 |         destination_blob_name))
55 | 
56 | 
57 | def download_blob(bucket_name, source_blob_name, destination_file_name):
58 |     # Uploads a blob from a bucket
59 |     storage_client = storage.Client()
60 |     bucket = storage_client.get_bucket(bucket_name)
61 |     blob = bucket.blob(source_blob_name)
62 | 
63 |     blob.download_to_filename(destination_file_name)
64 | 
65 |     print('Blob {} downloaded to {}.'.format(
66 |         source_blob_name,
67 |         destination_file_name))
68 | 


--------------------------------------------------------------------------------
/utils/parse_config.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def parse_model_cfg(path):
 5 |     # Parses the yolo-v3 layer configuration file and returns module definitions
 6 |     file = open(path, 'r')
 7 |     lines = file.read().split('\n')
 8 |     lines = [x for x in lines if x and not x.startswith('#')]
 9 |     lines = [x.rstrip().lstrip() for x in lines]  # get rid of fringe whitespaces
10 |     mdefs = []  # module definitions
11 |     for line in lines:
12 |         if line.startswith('['):  # This marks the start of a new block
13 |             mdefs.append({})
14 |             mdefs[-1]['type'] = line[1:-1].rstrip()
15 |             if mdefs[-1]['type'] == 'convolutional':
16 |                 mdefs[-1]['batch_normalize'] = 0  # pre-populate with zeros (may be overwritten later)
17 |         else:
18 |             key, val = line.split("=")
19 |             key = key.rstrip()
20 | 
21 |             if 'anchors' in key:
22 |                 mdefs[-1][key] = np.array([float(x) for x in val.split(',')]).reshape((-1, 2))  # np anchors
23 |             else:
24 |                 mdefs[-1][key] = val.strip()
25 | 
26 |     return mdefs
27 | 
28 | 
29 | def parse_data_cfg(path):
30 |     # Parses the data configuration file
31 |     options = dict()
32 |     with open(path, 'r') as fp:
33 |         lines = fp.readlines()
34 | 
35 |     for line in lines:
36 |         line = line.strip()
37 |         if line == '' or line.startswith('#'):
38 |             continue
39 |         key, val = line.split('=')
40 |         options[key.strip()] = val.strip()
41 | 
42 |     return options
43 | 


--------------------------------------------------------------------------------
/utils/prune_utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from terminaltables import AsciiTable
  3 | from copy import deepcopy
  4 | import numpy as np
  5 | import torch.nn.functional as F
  6 | from scipy.spatial import distance
  7 | 
  8 | 
  9 | def get_sr_flag(epoch, sr):
 10 |     # return epoch >= 5 and sr
 11 |     return sr
 12 | 
 13 | def parse_module_defs3(module_defs):
 14 | 
 15 |     CBL_idx = []
 16 |     Conv_idx = []
 17 |     for i, module_def in enumerate(module_defs):
 18 |         if module_def['type'] == 'convolutional':
 19 |             if module_def['batch_normalize'] == '1':
 20 |                 CBL_idx.append(i)
 21 |             else:
 22 |                 Conv_idx.append(i)
 23 | 
 24 |     ignore_idx = set()
 25 | 
 26 |     ignore_idx.add(18)
 27 |     
 28 | 
 29 |     prune_idx = [idx for idx in CBL_idx if idx not in ignore_idx]
 30 | 
 31 |     return CBL_idx, Conv_idx, prune_idx
 32 |     
 33 | def parse_module_defs2(module_defs):
 34 | 
 35 |     CBL_idx = []
 36 |     Conv_idx = []
 37 |     shortcut_idx=dict()
 38 |     shortcut_all=set()
 39 |     for i, module_def in enumerate(module_defs):
 40 |         if module_def['type'] == 'convolutional':
 41 |             if module_def['batch_normalize'] == '1':
 42 |                 CBL_idx.append(i)
 43 |             else:
 44 |                 Conv_idx.append(i)
 45 | 
 46 |     ignore_idx = set()
 47 |     for i, module_def in enumerate(module_defs):
 48 |         if module_def['type'] == 'shortcut':
 49 |             identity_idx = (i + int(module_def['from']))
 50 |             if module_defs[identity_idx]['type'] == 'convolutional':
 51 |                 
 52 |                 #ignore_idx.add(identity_idx)
 53 |                 shortcut_idx[i-1]=identity_idx
 54 |                 shortcut_all.add(identity_idx)
 55 |             elif module_defs[identity_idx]['type'] == 'shortcut':
 56 |                 
 57 |                 #ignore_idx.add(identity_idx - 1)
 58 |                 shortcut_idx[i-1]=identity_idx-1
 59 |                 shortcut_all.add(identity_idx-1)
 60 |             shortcut_all.add(i-1)
 61 |     #上采样层前的卷积层不裁剪
 62 |     ignore_idx.add(84)
 63 |     ignore_idx.add(96)
 64 | 
 65 |     prune_idx = [idx for idx in CBL_idx if idx not in ignore_idx]
 66 | 
 67 |     return CBL_idx, Conv_idx, prune_idx,shortcut_idx,shortcut_all
 68 | 
 69 | def parse_module_defs_rep(module_defs):
 70 | 
 71 |     CBL_idx = []
 72 |     Conv_idx = []
 73 |     rep_idx = []
 74 |     shortcut_idx=dict()
 75 |     shortcut_all=set()
 76 |     for i, module_def in enumerate(module_defs):
 77 |         if module_def['type'] == 'RepvggBlock':
 78 |             CBL_idx.append(i*2)
 79 |             rep_idx.append(i*2)
 80 | 
 81 |     for i, module_def in enumerate(module_defs):
 82 |         if module_def['type'] == 'convolutional':
 83 |             if module_def['batch_normalize'] == '1':
 84 |                 CBL_idx.append(i+28)
 85 |             else:
 86 |                 Conv_idx.append(i+28)
 87 | 
 88 |     ignore_idx = set()
 89 |     for i, module_def in enumerate(module_defs):
 90 |         if module_def['type'] == 'shortcut':
 91 |             identity_idx = (i + int(module_def['from']))
 92 |             if module_defs[identity_idx]['type'] == 'convolutional':
 93 |                 
 94 |                 #ignore_idx.add(identity_idx)
 95 |                 shortcut_idx[i-1]=identity_idx
 96 |                 shortcut_all.add(identity_idx)
 97 |             elif module_defs[identity_idx]['type'] == 'shortcut':
 98 |                 
 99 |                 #ignore_idx.add(identity_idx - 1)
100 |                 shortcut_idx[i-1]=identity_idx-1
101 |                 shortcut_all.add(identity_idx-1)
102 |             shortcut_all.add(i-1)
103 |     #上采样层前的卷积层不裁剪
104 |     ignore_idx.add(38+28-1)
105 |     ignore_idx.add(50+28-1)
106 | 
107 |     prune_idx = [idx for idx in CBL_idx if idx not in ignore_idx]
108 | 
109 |     return CBL_idx, Conv_idx, prune_idx, rep_idx, shortcut_idx,shortcut_all
110 | 
111 | 
112 | def parse_module_defs(module_defs):
113 | 
114 |     CBL_idx = []
115 |     Conv_idx = []
116 |     for i, module_def in enumerate(module_defs):
117 |         if module_def['type'] == 'convolutional':
118 |             if module_def['batch_normalize'] == '1':
119 |                 CBL_idx.append(i)
120 |             else:
121 |                 Conv_idx.append(i)
122 |     ignore_idx = set()
123 |     for i, module_def in enumerate(module_defs):
124 |         if module_def['type'] == 'shortcut':
125 |             ignore_idx.add(i-1)
126 |             identity_idx = (i + int(module_def['from']))
127 |             if module_defs[identity_idx]['type'] == 'convolutional':
128 |                 ignore_idx.add(identity_idx)
129 |             elif module_defs[identity_idx]['type'] == 'shortcut':
130 |                 ignore_idx.add(identity_idx - 1)
131 |     #上采样层前的卷积层不裁剪
132 |     ignore_idx.add(84)
133 |     ignore_idx.add(96)
134 | 
135 |     prune_idx = [idx for idx in CBL_idx if idx not in ignore_idx]
136 | 
137 |     return CBL_idx, Conv_idx, prune_idx
138 | 
139 | 
140 | def gather_bn_weights(module_list, prune_idx):
141 | 
142 |     size_list = [module_list[idx][1].weight.data.shape[0] for idx in prune_idx]
143 | 
144 |     bn_weights = torch.zeros(sum(size_list))
145 |     index = 0
146 |     for idx, size in zip(prune_idx, size_list):
147 |         bn_weights[index:(index + size)] = module_list[idx][1].weight.data.abs().clone()
148 |         index += size
149 | 
150 |     return bn_weights
151 | 
152 | 
153 | def write_cfg(cfg_file, module_defs):
154 | 
155 |     with open(cfg_file, 'w') as f:
156 |         for module_def in module_defs:
157 |             f.write(f"[{module_def['type']}]\n")
158 |             for key, value in module_def.items():
159 |                 if key != 'type':
160 |                     f.write(f"{key}={value}\n")
161 |             f.write("\n")
162 |     return cfg_file
163 | 
164 | 
165 | class BNOptimizer():
166 | 
167 |     @staticmethod
168 |     def updateBN(sr_flag, module_list, s, prune_idx):
169 |         if sr_flag:
170 |             for idx in prune_idx:
171 |                 # Squential(Conv, BN, Lrelu)
172 |                 bn_module = module_list[idx][1]
173 |                 bn_module.weight.grad.data.add_(s * torch.sign(bn_module.weight.data))  # L1
174 | 
175 | 
176 | def obtain_quantiles(bn_weights, num_quantile=5):
177 | 
178 |     sorted_bn_weights, i = torch.sort(bn_weights)
179 |     total = sorted_bn_weights.shape[0]
180 |     quantiles = sorted_bn_weights.tolist()[-1::-total//num_quantile][::-1]
181 |     print("\nBN weights quantile:")
182 |     quantile_table = [
183 |         [f'{i}/{num_quantile}' for i in range(1, num_quantile+1)],
184 |         ["%.3f" % quantile for quantile in quantiles]
185 |     ]
186 |     print(AsciiTable(quantile_table).table)
187 | 
188 |     return quantiles
189 | 
190 | 
191 | def get_input_mask(module_defs, idx, CBLidx2mask):
192 | 
193 |     if idx == 0:
194 |         return np.ones(3)
195 | 
196 |     if idx == 56:
197 |         return CBLidx2mask[idx - 2]
198 | 
199 |     if module_defs[idx-28-1]['type'] == 'convolutional':
200 |         return CBLidx2mask[idx - 1]
201 |     elif module_defs[idx-28-1]['type'] == 'shortcut':
202 |         return CBLidx2mask[idx - 2]
203 |     elif module_defs[idx-28-1]['type'] == 'route':
204 |         # print('idx:')
205 |         # print(idx)
206 |         route_in_idxs = []
207 |         for layer_i in module_defs[idx-28-1]['layers'].split(","):
208 |             if int(layer_i) < 0:
209 |                 route_in_idxs.append(idx-1 + int(layer_i))
210 |             else:
211 |                 route_in_idxs.append(int(layer_i)*2)
212 |         # print('route_in_idxs:')
213 |         # print(route_in_idxs)
214 |         if len(route_in_idxs) == 1:
215 |             return CBLidx2mask[route_in_idxs[0]]
216 |         elif len(route_in_idxs) == 2:
217 |             # return np.concatenate([CBLidx2mask[in_idx-1] for in_idx in route_in_idxs])
218 |             return np.concatenate([CBLidx2mask[route_in_idxs[0]-1],CBLidx2mask[route_in_idxs[1]]])
219 |         else:
220 |             print("Something wrong with route module!")
221 |             raise Exception
222 | 
223 | def get_rep_input_mask(module_defs, idx, CBLidx2mask):
224 | 
225 |     if idx == 0:
226 |         return np.ones(3)
227 | 
228 |     if module_defs[int(idx/2) - 1]['type'] == 'RepvggBlock':
229 |         return CBLidx2mask[idx - 2]
230 | 
231 | def init_weights_from_loose_model(compact_model, loose_model, CBL_idx, Conv_idx, CBLidx2mask):
232 | 
233 |     for idx in CBL_idx:
234 |         compact_CBL = compact_model.module_list[idx]
235 |         loose_CBL = loose_model.module_list[idx]
236 |         out_channel_idx = np.argwhere(CBLidx2mask[idx])[:, 0].tolist()
237 | 
238 |         compact_bn, loose_bn         = compact_CBL[1], loose_CBL[1]
239 |         compact_bn.weight.data       = loose_bn.weight.data[out_channel_idx].clone()
240 |         compact_bn.bias.data         = loose_bn.bias.data[out_channel_idx].clone()
241 |         compact_bn.running_mean.data = loose_bn.running_mean.data[out_channel_idx].clone()
242 |         compact_bn.running_var.data  = loose_bn.running_var.data[out_channel_idx].clone()
243 | 
244 |         input_mask = get_input_mask(loose_model.module_defs, idx, CBLidx2mask)
245 |         in_channel_idx = np.argwhere(input_mask)[:, 0].tolist()
246 |         compact_conv, loose_conv = compact_CBL[0], loose_CBL[0]
247 |         tmp = loose_conv.weight.data[:, in_channel_idx, :, :].clone()
248 |         compact_conv.weight.data = tmp[out_channel_idx, :, :, :].clone()
249 | 
250 |     for idx in Conv_idx:
251 |         compact_conv = compact_model.module_list[idx][0]
252 |         loose_conv = loose_model.module_list[idx][0]
253 | 
254 |         input_mask = get_input_mask(loose_model.module_defs, idx, CBLidx2mask)
255 |         in_channel_idx = np.argwhere(input_mask)[:, 0].tolist()
256 |         compact_conv.weight.data = loose_conv.weight.data[:, in_channel_idx, :, :].clone()
257 |         compact_conv.bias.data   = loose_conv.bias.data.clone()
258 | 
259 | def init_weights_from_loose_model_rep(compact_model, loose_model, CBL_idx, Conv_idx, rep_idx, CBLidx2mask):
260 | 
261 |     # print(compact_model.module_list)
262 |     # print('~~~~~~~~~~~~~~~~~~~~~~~~~')
263 |     # print(loose_model.module_list)
264 | 
265 |     for idx in CBL_idx:
266 |         if idx in rep_idx:
267 |             compact_CBL = compact_model.module_list[idx]
268 |             loose_CBL = loose_model.module_list[idx]
269 |             # print(compact_CBL)
270 |             # print(loose_CBL)
271 |             out_channel_idx = np.argwhere(CBLidx2mask[idx])[:, 0].tolist()
272 | 
273 |             input_mask = get_rep_input_mask(loose_model.module_defs, idx, CBLidx2mask)
274 |             # print(input_mask)
275 |             # try:
276 |             #     in_channel_idx = np.argwhere(input_mask)[:, 0].tolist()
277 |             # except:
278 |             #     print(idx)
279 |             #     print(input_mask)
280 |             in_channel_idx = np.argwhere(input_mask)[:, 0].tolist()
281 |             # if idx==0:
282 |             #     print(in_channel_idx)
283 |             #     print('------------')
284 |             #     print(out_channel_idx)
285 |             compact_conv, loose_conv = compact_CBL[0], loose_CBL[0]
286 |             tmp = loose_conv.weight.data[:, in_channel_idx, :, :].clone()
287 |             compact_conv.weight.data = tmp[out_channel_idx, :, :, :].clone()
288 |             # iden = compact_conv.weight.data==loose_conv.weight.data
289 |             # print(iden.sum())
290 |         else:
291 |             compact_CBL = compact_model.module_list[idx]
292 |             loose_CBL = loose_model.module_list[idx]
293 |             out_channel_idx = np.argwhere(CBLidx2mask[idx])[:, 0].tolist()
294 | 
295 |             compact_bn, loose_bn         = compact_CBL[1], loose_CBL[1]
296 |             compact_bn.weight.data       = loose_bn.weight.data[out_channel_idx].clone()
297 |             compact_bn.bias.data         = loose_bn.bias.data[out_channel_idx].clone()
298 |             compact_bn.running_mean.data = loose_bn.running_mean.data[out_channel_idx].clone()
299 |             compact_bn.running_var.data  = loose_bn.running_var.data[out_channel_idx].clone()
300 | 
301 |             input_mask = get_input_mask(loose_model.module_defs, idx, CBLidx2mask)
302 |             in_channel_idx = np.argwhere(input_mask)[:, 0].tolist()
303 |             compact_conv, loose_conv = compact_CBL[0], loose_CBL[0]
304 |             tmp = loose_conv.weight.data[:, in_channel_idx, :, :].clone()
305 |             compact_conv.weight.data = tmp[out_channel_idx, :, :, :].clone()
306 |             # print('idx: '+str(idx))
307 |             # print(len(in_channel_idx))
308 |             # print(len(out_channel_idx))
309 |             # iden = compact_conv.weight.data==loose_conv.weight.data
310 |             # print(iden.sum())
311 |             # iden2 = compact_bn.weight.data==loose_bn.weight.data
312 |             # print(iden2.sum())
313 |             # print('-----------')
314 | 
315 |     for idx in Conv_idx:
316 |         compact_conv = compact_model.module_list[idx][0]
317 |         loose_conv = loose_model.module_list[idx][0]
318 | 
319 |         input_mask = get_input_mask(loose_model.module_defs, idx, CBLidx2mask)
320 |         in_channel_idx = np.argwhere(input_mask)[:, 0].tolist()
321 |         compact_conv.weight.data = loose_conv.weight.data[:, in_channel_idx, :, :].clone()
322 |         compact_conv.bias.data   = loose_conv.bias.data.clone()
323 | 
324 | 
325 | def prune_model_keep_size(model, prune_idx, CBL_idx, CBLidx2mask):
326 | 
327 |     pruned_model = deepcopy(model)
328 |     for idx in prune_idx:
329 |         mask = torch.from_numpy(CBLidx2mask[idx]).cuda()
330 |         bn_module = pruned_model.module_list[idx][1]
331 | 
332 |         bn_module.weight.data.mul_(mask)
333 | 
334 |         activation = F.leaky_relu((1 - mask) * bn_module.bias.data, 0.1)
335 | 
336 |         # 两个上采样层前的卷积层
337 |         next_idx_list = [idx + 1]
338 |         if idx == 79:
339 |             next_idx_list.append(84)
340 |         elif idx == 91:
341 |             next_idx_list.append(96)
342 | 
343 |         for next_idx in next_idx_list:
344 |             next_conv = pruned_model.module_list[next_idx][0]
345 |             conv_sum = next_conv.weight.data.sum(dim=(2, 3))
346 |             offset = conv_sum.matmul(activation.reshape(-1, 1)).reshape(-1)
347 |             if next_idx in CBL_idx:
348 |                 next_bn = pruned_model.module_list[next_idx][1]
349 |                 next_bn.running_mean.data.sub_(offset)
350 |             else:
351 |                 #这里需要注意的是，对于convolutionnal，如果有BN，则该层卷积层不使用bias，如果无BN，则使用bias
352 |                 next_conv.bias.data.add_(offset)
353 | 
354 |         bn_module.bias.data.mul_(mask)
355 | 
356 |     return pruned_model
357 | 
358 | 
359 | def prune_rep_model_keep_size(model, prune_idx, CBL_idx, rep_idx, CBLidx2mask):
360 | 
361 |     pruned_model = deepcopy(model)
362 |     # for idx in prune_idx:
363 |     #     if idx in rep_idx:
364 |     #         # mask = torch.from_numpy(CBLidx2mask[idx]).cuda()
365 |     #         # conv_module = pruned_model.module_list[idx][0]
366 |     #         # conv_module.weight.data = conv_module.weight.data.permute(1, 2, 3, 0).mul(mask).float().permute(3, 0, 1, 2)
367 |     #         # next_idx_list = [idx + 2]
368 |     #         pass
369 |     #     else:
370 |     #         mask = torch.from_numpy(CBLidx2mask[idx]).cuda()
371 |     #         bn_module = pruned_model.module_list[idx][1]
372 | 
373 |     #         bn_module.weight.data.mul_(mask)
374 | 
375 |     #         activation = F.leaky_relu((1 - mask) * bn_module.bias.data, 0.1)
376 | 
377 |     #         # 两个上采样层前的卷积层
378 |     #         next_idx_list = [idx + 1]
379 |     #         if idx == 60:
380 |     #             next_idx_list.append(65)
381 |     #         elif idx == 72:
382 |     #             next_idx_list.append(77)
383 | 
384 |     #         for next_idx in next_idx_list:
385 |     #             next_conv = pruned_model.module_list[next_idx][0]
386 |     #             conv_sum = next_conv.weight.data.sum(dim=(2, 3))
387 |     #             offset = conv_sum.matmul(activation.float().reshape(-1, 1)).reshape(-1)
388 |     #             if next_idx in CBL_idx:
389 |     #                 next_bn = pruned_model.module_list[next_idx][1]
390 |     #                 next_bn.running_mean.data.sub_(offset)
391 |     #             else:
392 |     #                 #这里需要注意的是，对于convolutionnal，如果有BN，则该层卷积层不使用bias，如果无BN，则使用bias
393 |     #                 next_conv.bias.data.add_(offset)
394 | 
395 |     #         bn_module.bias.data.mul_(mask)
396 | 
397 |     return pruned_model
398 | 
399 | 
400 | def obtain_bn_mask(bn_module, thre):
401 | 
402 |     thre = thre.cuda()
403 |     mask = bn_module.weight.data.abs().ge(thre).float()
404 | 
405 |     return mask
406 | 
407 | def obtain_l1_mask(bn_module, random_rate):
408 | 
409 |     w_copy = bn_module.weight.data.abs().clone()
410 |     w_copy = torch.sum(w_copy, dim=(1,2,3))
411 |     length = w_copy.cpu().numpy().shape[0]
412 |     num_retain = int(length*(1-random_rate))
413 |     _,y = torch.topk(w_copy,num_retain)
414 | 
415 |     mask = np.zeros(length)
416 |     mask[y.cpu()] = 1
417 | 
418 |     return mask
419 | 
420 | def obtain_l1_mask2(bn_module, random_rate):
421 | 
422 |     w_copy = bn_module.weight.data.abs().clone()
423 |     w_copy = torch.sum(w_copy, dim=(1,2,3))
424 |     length = w_copy.cpu().numpy().shape[0]
425 |     num_retain = int(length*random_rate)
426 |     if num_retain==0:
427 |         num_retain=1
428 |     _,y = torch.topk(w_copy,num_retain)
429 | 
430 |     mask = np.zeros(length)
431 |     mask[y.cpu()] = 1
432 | 
433 |     return mask
434 | 
435 | def obtain_rep_mask(conv_module, distance_rate):
436 |     length = conv_module.weight.data.size()[0]
437 |     codebook = np.ones(length)
438 |     weight_torch = conv_module.weight.data.abs().clone()
439 | 
440 |     similar_pruned_num = int(weight_torch.size()[0] * distance_rate)
441 |     weight_vec = weight_torch.view(weight_torch.size()[0], -1)
442 |     # norm1 = torch.norm(weight_vec, 1, 1)
443 |     # norm1_np = norm1.cpu().numpy()
444 |     norm2 = torch.norm(weight_vec, 2, 1)
445 |     norm2_np = norm2.cpu().numpy()
446 |     filter_small_index = []
447 |     filter_large_index = []
448 |     filter_large_index = norm2_np.argsort()
449 | 
450 |     indices = torch.LongTensor(filter_large_index).cuda()
451 |     weight_vec_after_norm = torch.index_select(weight_vec, 0, indices).cpu().numpy()
452 |     # for euclidean distance
453 |     similar_matrix = distance.cdist(weight_vec_after_norm, weight_vec_after_norm, 'euclidean')
454 |     # for cos similarity
455 |     # similar_matrix = 1 - distance.cdist(weight_vec_after_norm, weight_vec_after_norm, 'cosine')
456 |     similar_sum = np.sum(np.abs(similar_matrix), axis=0)
457 | 
458 |     # for distance similar: get the filter index with largest similarity == small distance
459 |     similar_large_index = similar_sum.argsort()[similar_pruned_num:]
460 |     similar_small_index = similar_sum.argsort()[:  similar_pruned_num]
461 |     similar_index_for_filter = [filter_large_index[i] for i in similar_small_index]
462 | 
463 |     # kernel_length = weight_torch.size()[1] * weight_torch.size()[2] * weight_torch.size()[3]
464 |     # for x in range(0, len(similar_index_for_filter)):
465 |     #     codebook[
466 |     #     similar_index_for_filter[x] * kernel_length: (similar_index_for_filter[x] + 1) * kernel_length] = 0
467 | 
468 |     mask = np.ones(length)
469 |     # mask[similar_index_for_filter] = 0
470 | 
471 |     return mask


--------------------------------------------------------------------------------
/utils/tiny_prune_utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from terminaltables import AsciiTable
  3 | from copy import deepcopy
  4 | import numpy as np
  5 | import torch.nn.functional as F
  6 | 
  7 | 
  8 | def get_sr_flag(epoch, sr):
  9 |     # return epoch >= 5 and sr
 10 |     return sr
 11 | 
 12 | 
 13 | def parse_module_defs(module_defs):
 14 | 
 15 |     CBL_idx = []
 16 |     Conv_idx = []
 17 |     for i, module_def in enumerate(module_defs):
 18 |         if module_def['type'] == 'convolutional':
 19 |             if module_def['batch_normalize'] == '1':
 20 |                 CBL_idx.append(i)
 21 |             else:
 22 |                 Conv_idx.append(i)
 23 | 
 24 |     ignore_idx = set()
 25 | 
 26 |     ignore_idx.add(18)
 27 |     
 28 | 
 29 |     prune_idx = [idx for idx in CBL_idx if idx not in ignore_idx]
 30 | 
 31 |     return CBL_idx, Conv_idx, prune_idx
 32 | 
 33 | 
 34 | def gather_bn_weights(module_list, prune_idx):
 35 | 
 36 |     size_list = [module_list[idx][1].weight.data.shape[0] for idx in prune_idx]
 37 | 
 38 |     bn_weights = torch.zeros(sum(size_list))
 39 |     index = 0
 40 |     for idx, size in zip(prune_idx, size_list):
 41 |         bn_weights[index:(index + size)] = module_list[idx][1].weight.data.abs().clone()
 42 |         index += size
 43 | 
 44 |     return bn_weights
 45 | 
 46 | 
 47 | def write_cfg(cfg_file, module_defs):
 48 | 
 49 |     with open(cfg_file, 'w') as f:
 50 |         for module_def in module_defs:
 51 |             f.write(f"[{module_def['type']}]\n")
 52 |             for key, value in module_def.items():
 53 |                 if key != 'type':
 54 |                     f.write(f"{key}={value}\n")
 55 |             f.write("\n")
 56 |     return cfg_file
 57 | 
 58 | 
 59 | class BNOptimizer():
 60 | 
 61 |     @staticmethod
 62 |     def updateBN(sr_flag, module_list, s, prune_idx):
 63 |         if sr_flag:
 64 |             for idx in prune_idx:
 65 |                 # Squential(Conv, BN, Lrelu)
 66 |                 bn_module = module_list[idx][1]
 67 |                 bn_module.weight.grad.data.add_(s * torch.sign(bn_module.weight.data))  # L1
 68 | 
 69 | 
 70 | def obtain_quantiles(bn_weights, num_quantile=5):
 71 | 
 72 |     sorted_bn_weights, i = torch.sort(bn_weights)
 73 |     total = sorted_bn_weights.shape[0]
 74 |     quantiles = sorted_bn_weights.tolist()[-1::-total//num_quantile][::-1]
 75 |     print("\nBN weights quantile:")
 76 |     quantile_table = [
 77 |         [f'{i}/{num_quantile}' for i in range(1, num_quantile+1)],
 78 |         ["%.3f" % quantile for quantile in quantiles]
 79 |     ]
 80 |     print(AsciiTable(quantile_table).table)
 81 | 
 82 |     return quantiles
 83 | 
 84 | 
 85 | def get_input_mask(module_defs, idx, CBLidx2mask):
 86 | 
 87 |     if idx == 0:
 88 |         return np.ones(3)
 89 | 
 90 |     if module_defs[idx - 1]['type'] == 'convolutional':
 91 |         return CBLidx2mask[idx - 1]
 92 |     elif module_defs[idx - 1]['type'] == 'shortcut':
 93 |         return CBLidx2mask[idx - 2]
 94 |     elif module_defs[idx - 1]['type'] == 'route':
 95 |         route_in_idxs = []
 96 |         for layer_i in module_defs[idx - 1]['layers'].split(","):
 97 |             if int(layer_i) < 0:
 98 |                 route_in_idxs.append(idx - 1 + int(layer_i))
 99 |             else:
100 |                 route_in_idxs.append(int(layer_i))
101 |         if len(route_in_idxs) == 1:
102 |             return CBLidx2mask[route_in_idxs[0]]
103 |         elif len(route_in_idxs) == 2:
104 |             return np.concatenate([CBLidx2mask[in_idx - 1] for in_idx in route_in_idxs])
105 |         else:
106 |             print("Something wrong with route module!")
107 |             raise Exception
108 | 
109 | 
110 | def init_weights_from_loose_model(compact_model, loose_model, CBL_idx, Conv_idx, CBLidx2mask):
111 | 
112 |     for idx in CBL_idx:
113 |         compact_CBL = compact_model.module_list[idx]
114 |         loose_CBL = loose_model.module_list[idx]
115 |         out_channel_idx = np.argwhere(CBLidx2mask[idx])[:, 0].tolist()
116 | 
117 |         compact_bn, loose_bn         = compact_CBL[1], loose_CBL[1]
118 |         compact_bn.weight.data       = loose_bn.weight.data[out_channel_idx].clone()
119 |         compact_bn.bias.data         = loose_bn.bias.data[out_channel_idx].clone()
120 |         compact_bn.running_mean.data = loose_bn.running_mean.data[out_channel_idx].clone()
121 |         compact_bn.running_var.data  = loose_bn.running_var.data[out_channel_idx].clone()
122 | 
123 |         input_mask = get_input_mask(loose_model.module_defs, idx, CBLidx2mask)
124 |         in_channel_idx = np.argwhere(input_mask)[:, 0].tolist()
125 |         compact_conv, loose_conv = compact_CBL[0], loose_CBL[0]
126 |         tmp = loose_conv.weight.data[:, in_channel_idx, :, :].clone()
127 |         compact_conv.weight.data = tmp[out_channel_idx, :, :, :].clone()
128 | 
129 |     for idx in Conv_idx:
130 |         compact_conv = compact_model.module_list[idx][0]
131 |         loose_conv = loose_model.module_list[idx][0]
132 | 
133 |         input_mask = get_input_mask(loose_model.module_defs, idx, CBLidx2mask)
134 |         in_channel_idx = np.argwhere(input_mask)[:, 0].tolist()
135 |         compact_conv.weight.data = loose_conv.weight.data[:, in_channel_idx, :, :].clone()
136 |         compact_conv.bias.data   = loose_conv.bias.data.clone()
137 | 
138 | 
139 | def prune_model_keep_size(model, prune_idx, CBL_idx, CBLidx2mask):
140 | 
141 |     pruned_model = deepcopy(model)
142 |     for idx in prune_idx:
143 |         mask = torch.from_numpy(CBLidx2mask[idx]).cuda()
144 |         bn_module = pruned_model.module_list[idx][1]
145 | 
146 |         bn_module.weight.data.mul_(mask)
147 | 
148 |         activation = F.leaky_relu((1 - mask) * bn_module.bias.data, 0.1)
149 | 
150 | 
151 |         if idx<12:
152 |             next_idx_list = [idx + 2]
153 |         else:
154 |             next_idx_list = [idx + 1]
155 | 
156 | 
157 |         #next_idx_list = [idx + 1]
158 |         if idx == 13:
159 |             next_idx_list.append(18)
160 | 
161 | 
162 | 
163 |         for next_idx in next_idx_list:
164 |             next_conv = pruned_model.module_list[next_idx][0]
165 |             conv_sum = next_conv.weight.data.sum(dim=(2, 3))
166 |             offset = conv_sum.matmul(activation.reshape(-1, 1)).reshape(-1)
167 |             
168 |             if next_idx in CBL_idx:
169 |                 next_bn = pruned_model.module_list[next_idx][1]
170 |                 next_bn.running_mean.data.sub_(offset)
171 |             else:
172 |                 #这里需要注意的是，对于convolutionnal，如果有BN，则该层卷积层不使用bias，如果无BN，则使用bias
173 |                 next_conv.bias.data.add_(offset)
174 |             
175 | 
176 |         bn_module.bias.data.mul_(mask)
177 | 
178 |     return pruned_model
179 | 
180 | 
181 | def obtain_bn_mask(bn_module, thre):
182 | 
183 |     thre = thre.cuda()
184 |     mask = bn_module.weight.data.abs().ge(thre).float()
185 | 
186 |     return mask
187 | 


--------------------------------------------------------------------------------
/utils/torch_utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import torch
 4 | 
 5 | 
 6 | def init_seeds(seed=0):
 7 |     torch.manual_seed(seed)
 8 |     torch.cuda.manual_seed(seed)
 9 |     torch.cuda.manual_seed_all(seed)
10 | 
11 |     # Remove randomness (may be slower on Tesla GPUs) # https://pytorch.org/docs/stable/notes/randomness.html
12 |     if seed == 0:
13 |         torch.backends.cudnn.deterministic = True
14 |         torch.backends.cudnn.benchmark = False
15 | 
16 | 
17 | def select_device(device='', apex=False):
18 |     # device = 'cpu' or '0' or '0,1,2,3'
19 |     cpu_request = device.lower() == 'cpu'
20 |     if device and not cpu_request:  # if device requested other than 'cpu'
21 |         os.environ['CUDA_VISIBLE_DEVICES'] = device  # set environment variable
22 |         assert torch.cuda.is_available(), 'CUDA unavailable, invalid device %s requested' % device  # check availablity
23 | 
24 |     cuda = False if cpu_request else torch.cuda.is_available()
25 |     if cuda:
26 |         c = 1024 ** 2  # bytes to MB
27 |         ng = torch.cuda.device_count()
28 |         x = [torch.cuda.get_device_properties(i) for i in range(ng)]
29 |         cuda_str = 'Using CUDA ' + ('Apex ' if apex else '')  # apex for mixed precision https://github.com/NVIDIA/apex
30 |         for i in range(0, ng):
31 |             if i == 1:
32 |                 cuda_str = ' ' * len(cuda_str)
33 |             print("%sdevice%g _CudaDeviceProperties(name='%s', total_memory=%dMB)" %
34 |                   (cuda_str, i, x[i].name, x[i].total_memory / c))
35 |     else:
36 |         print('Using CPU')
37 | 
38 |     print('')  # skip a line
39 |     return torch.device('cuda:0' if cuda else 'cpu')
40 | 
41 | 
42 | def fuse_conv_and_bn(conv, bn):
43 |     # https://tehnokv.com/posts/fusing-batchnorm-and-conv/
44 |     with torch.no_grad():
45 |         # init
46 |         fusedconv = torch.nn.Conv2d(conv.in_channels,
47 |                                     conv.out_channels,
48 |                                     kernel_size=conv.kernel_size,
49 |                                     stride=conv.stride,
50 |                                     padding=conv.padding,
51 |                                     bias=True)
52 | 
53 |         # prepare filters
54 |         w_conv = conv.weight.clone().view(conv.out_channels, -1)
55 |         w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
56 |         fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size()))
57 | 
58 |         # prepare spatial bias
59 |         if conv.bias is not None:
60 |             b_conv = conv.bias
61 |         else:
62 |             b_conv = torch.zeros(conv.weight.size(0)).cuda()
63 |         b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps))
64 |         fusedconv.bias.copy_(b_conv + b_bn)
65 | 
66 |         return fusedconv
67 | 
68 | 
69 | def model_info(model, report='summary'):
70 |     # Plots a line-by-line description of a PyTorch model
71 |     n_p = sum(x.numel() for x in model.parameters())  # number parameters
72 |     n_g = sum(x.numel() for x in model.parameters() if x.requires_grad)  # number gradients
73 |     if report is 'full':
74 |         print('%5s %40s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma'))
75 |         for i, (name, p) in enumerate(model.named_parameters()):
76 |             name = name.replace('module_list.', '')
77 |             print('%5g %40s %9s %12g %20s %10.3g %10.3g' %
78 |                   (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std()))
79 |     print('Model Summary: %g layers, %g parameters, %g gradients' % (len(list(model.parameters())), n_p, n_g))
80 | 
81 | 
82 | def load_classifier(name='resnet101', n=2):
83 |     # Loads a pretrained model reshaped to n-class output
84 |     import pretrainedmodels  # https://github.com/Cadene/pretrained-models.pytorch#torchvision
85 |     model = pretrainedmodels.__dict__[name](num_classes=1000, pretrained='imagenet')
86 | 
87 |     # Display model properties
88 |     for x in ['model.input_size', 'model.input_space', 'model.input_range', 'model.mean', 'model.std']:
89 |         print(x + ' =', eval(x))
90 | 
91 |     # Reshape output to n classes
92 |     filters = model.last_linear.weight.shape[1]
93 |     model.last_linear.bias = torch.nn.Parameter(torch.zeros(n))
94 |     model.last_linear.weight = torch.nn.Parameter(torch.zeros(n, filters))
95 |     model.last_linear.out_features = n
96 |     return model
97 | 


--------------------------------------------------------------------------------
/weights/download_yolov3_weights.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # make '/weights' directory if it does not exist and cd into it
 4 | mkdir -p weights && cd weights
 5 | 
 6 | # copy darknet weight files, continue '-c' if partially downloaded
 7 | wget -c https://pjreddie.com/media/files/yolov3.weights
 8 | wget -c https://pjreddie.com/media/files/yolov3-tiny.weights
 9 | wget -c https://pjreddie.com/media/files/yolov3-spp.weights
10 | 
11 | # yolov3 pytorch weights
12 | # download from Google Drive: https://drive.google.com/drive/folders/1uxgUBemJVw9wZsdpboYbzUN4bcRhsuAI
13 | 
14 | # darknet53 weights (first 75 layers only)
15 | wget -c https://pjreddie.com/media/files/darknet53.conv.74
16 | 
17 | # yolov3-tiny weights from darknet (first 16 layers only)
18 | # ./darknet partial cfg/yolov3-tiny.cfg yolov3-tiny.weights yolov3-tiny.conv.15 15
19 | # mv yolov3-tiny.conv.15 ../
20 | 
21 | 


--------------------------------------------------------------------------------