├── .gitignore ├── README.md ├── cfg ├── yolov3-hand.cfg ├── yolov3-repvggB0-hand.cfg ├── yolov3-repvggB1-hand.cfg └── yolov3.cfg ├── convert_repyolo.py ├── data ├── converter.py ├── oxfordhand.data └── oxfordhand.names ├── detect.py ├── models.py ├── requirements.txt ├── test.py ├── train.py ├── utils ├── __init__.py ├── adabound.py ├── datasets.py ├── gcp.sh ├── google_utils.py ├── parse_config.py ├── prune_utils.py ├── tiny_prune_utils.py ├── torch_utils.py └── utils.py └── weights └── download_yolov3_weights.sh /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # YOLOv3-RepVGG-backbone 2 | 3 | This is a naive implementation of RepVGG as the backbone of yolov3. The whole project is based on [YOLOv3-complete-pruning](https://github.com/coldlarry/YOLOv3-complete-pruning.git) 4 | 5 | You can download my trained RepVGG-B0 and RepVGG-B1 models for testing. 6 | [BaiduDisk](https://pan.baidu.com/s/162_jQ3rr5s_w5W8YqfURBw) code: xypk -------------------------------------------------------------------------------- /cfg/yolov3-hand.cfg: -------------------------------------------------------------------------------- 1 | 2 | [net] 3 | # Testing 4 | #batch=1 5 | #subdivisions=1 6 | # Training 7 | batch=16 8 | subdivisions=1 9 | width=416 10 | height=416 11 | channels=3 12 | momentum=0.9 13 | decay=0.0005 14 | angle=0 15 | saturation = 1.5 16 | exposure = 1.5 17 | hue=.1 18 | 19 | learning_rate=0.001 20 | burn_in=1000 21 | max_batches = 500200 22 | policy=steps 23 | steps=400000,450000 24 | scales=.1,.1 25 | 26 | [convolutional] 27 | batch_normalize=1 28 | filters=32 29 | size=3 30 | stride=1 31 | pad=1 32 | activation=leaky 33 | 34 | # Downsample 35 | 36 | [convolutional] 37 | batch_normalize=1 38 | filters=64 39 | size=3 40 | stride=2 41 | pad=1 42 | activation=leaky 43 | 44 | [convolutional] 45 | batch_normalize=1 46 | filters=32 47 | size=1 48 | stride=1 49 | pad=1 50 | activation=leaky 51 | 52 | [convolutional] 53 | batch_normalize=1 54 | filters=64 55 | size=3 56 | stride=1 57 | pad=1 58 | activation=leaky 59 | 60 | [shortcut] 61 | from=-3 62 | activation=linear 63 | 64 | # Downsample 65 | 66 | [convolutional] 67 | batch_normalize=1 68 | filters=128 69 | size=3 70 | stride=2 71 | pad=1 72 | activation=leaky 73 | 74 | [convolutional] 75 | batch_normalize=1 76 | filters=64 77 | size=1 78 | stride=1 79 | pad=1 80 | activation=leaky 81 | 82 | [convolutional] 83 | batch_normalize=1 84 | filters=128 85 | size=3 86 | stride=1 87 | pad=1 88 | activation=leaky 89 | 90 | [shortcut] 91 | from=-3 92 | activation=linear 93 | 94 | [convolutional] 95 | batch_normalize=1 96 | filters=64 97 | size=1 98 | stride=1 99 | pad=1 100 | activation=leaky 101 | 102 | [convolutional] 103 | batch_normalize=1 104 | filters=128 105 | size=3 106 | stride=1 107 | pad=1 108 | activation=leaky 109 | 110 | [shortcut] 111 | from=-3 112 | activation=linear 113 | 114 | # Downsample 115 | 116 | [convolutional] 117 | batch_normalize=1 118 | filters=256 119 | size=3 120 | stride=2 121 | pad=1 122 | activation=leaky 123 | 124 | [convolutional] 125 | batch_normalize=1 126 | filters=128 127 | size=1 128 | stride=1 129 | pad=1 130 | activation=leaky 131 | 132 | [convolutional] 133 | batch_normalize=1 134 | filters=256 135 | size=3 136 | stride=1 137 | pad=1 138 | activation=leaky 139 | 140 | [shortcut] 141 | from=-3 142 | activation=linear 143 | 144 | [convolutional] 145 | batch_normalize=1 146 | filters=128 147 | size=1 148 | stride=1 149 | pad=1 150 | activation=leaky 151 | 152 | [convolutional] 153 | batch_normalize=1 154 | filters=256 155 | size=3 156 | stride=1 157 | pad=1 158 | activation=leaky 159 | 160 | [shortcut] 161 | from=-3 162 | activation=linear 163 | 164 | [convolutional] 165 | batch_normalize=1 166 | filters=128 167 | size=1 168 | stride=1 169 | pad=1 170 | activation=leaky 171 | 172 | [convolutional] 173 | batch_normalize=1 174 | filters=256 175 | size=3 176 | stride=1 177 | pad=1 178 | activation=leaky 179 | 180 | [shortcut] 181 | from=-3 182 | activation=linear 183 | 184 | [convolutional] 185 | batch_normalize=1 186 | filters=128 187 | size=1 188 | stride=1 189 | pad=1 190 | activation=leaky 191 | 192 | [convolutional] 193 | batch_normalize=1 194 | filters=256 195 | size=3 196 | stride=1 197 | pad=1 198 | activation=leaky 199 | 200 | [shortcut] 201 | from=-3 202 | activation=linear 203 | 204 | 205 | [convolutional] 206 | batch_normalize=1 207 | filters=128 208 | size=1 209 | stride=1 210 | pad=1 211 | activation=leaky 212 | 213 | [convolutional] 214 | batch_normalize=1 215 | filters=256 216 | size=3 217 | stride=1 218 | pad=1 219 | activation=leaky 220 | 221 | [shortcut] 222 | from=-3 223 | activation=linear 224 | 225 | [convolutional] 226 | batch_normalize=1 227 | filters=128 228 | size=1 229 | stride=1 230 | pad=1 231 | activation=leaky 232 | 233 | [convolutional] 234 | batch_normalize=1 235 | filters=256 236 | size=3 237 | stride=1 238 | pad=1 239 | activation=leaky 240 | 241 | [shortcut] 242 | from=-3 243 | activation=linear 244 | 245 | [convolutional] 246 | batch_normalize=1 247 | filters=128 248 | size=1 249 | stride=1 250 | pad=1 251 | activation=leaky 252 | 253 | [convolutional] 254 | batch_normalize=1 255 | filters=256 256 | size=3 257 | stride=1 258 | pad=1 259 | activation=leaky 260 | 261 | [shortcut] 262 | from=-3 263 | activation=linear 264 | 265 | [convolutional] 266 | batch_normalize=1 267 | filters=128 268 | size=1 269 | stride=1 270 | pad=1 271 | activation=leaky 272 | 273 | [convolutional] 274 | batch_normalize=1 275 | filters=256 276 | size=3 277 | stride=1 278 | pad=1 279 | activation=leaky 280 | 281 | [shortcut] 282 | from=-3 283 | activation=linear 284 | 285 | # Downsample 286 | 287 | [convolutional] 288 | batch_normalize=1 289 | filters=512 290 | size=3 291 | stride=2 292 | pad=1 293 | activation=leaky 294 | 295 | [convolutional] 296 | batch_normalize=1 297 | filters=256 298 | size=1 299 | stride=1 300 | pad=1 301 | activation=leaky 302 | 303 | [convolutional] 304 | batch_normalize=1 305 | filters=512 306 | size=3 307 | stride=1 308 | pad=1 309 | activation=leaky 310 | 311 | [shortcut] 312 | from=-3 313 | activation=linear 314 | 315 | 316 | [convolutional] 317 | batch_normalize=1 318 | filters=256 319 | size=1 320 | stride=1 321 | pad=1 322 | activation=leaky 323 | 324 | [convolutional] 325 | batch_normalize=1 326 | filters=512 327 | size=3 328 | stride=1 329 | pad=1 330 | activation=leaky 331 | 332 | [shortcut] 333 | from=-3 334 | activation=linear 335 | 336 | 337 | [convolutional] 338 | batch_normalize=1 339 | filters=256 340 | size=1 341 | stride=1 342 | pad=1 343 | activation=leaky 344 | 345 | [convolutional] 346 | batch_normalize=1 347 | filters=512 348 | size=3 349 | stride=1 350 | pad=1 351 | activation=leaky 352 | 353 | [shortcut] 354 | from=-3 355 | activation=linear 356 | 357 | 358 | [convolutional] 359 | batch_normalize=1 360 | filters=256 361 | size=1 362 | stride=1 363 | pad=1 364 | activation=leaky 365 | 366 | [convolutional] 367 | batch_normalize=1 368 | filters=512 369 | size=3 370 | stride=1 371 | pad=1 372 | activation=leaky 373 | 374 | [shortcut] 375 | from=-3 376 | activation=linear 377 | 378 | [convolutional] 379 | batch_normalize=1 380 | filters=256 381 | size=1 382 | stride=1 383 | pad=1 384 | activation=leaky 385 | 386 | [convolutional] 387 | batch_normalize=1 388 | filters=512 389 | size=3 390 | stride=1 391 | pad=1 392 | activation=leaky 393 | 394 | [shortcut] 395 | from=-3 396 | activation=linear 397 | 398 | 399 | [convolutional] 400 | batch_normalize=1 401 | filters=256 402 | size=1 403 | stride=1 404 | pad=1 405 | activation=leaky 406 | 407 | [convolutional] 408 | batch_normalize=1 409 | filters=512 410 | size=3 411 | stride=1 412 | pad=1 413 | activation=leaky 414 | 415 | [shortcut] 416 | from=-3 417 | activation=linear 418 | 419 | 420 | [convolutional] 421 | batch_normalize=1 422 | filters=256 423 | size=1 424 | stride=1 425 | pad=1 426 | activation=leaky 427 | 428 | [convolutional] 429 | batch_normalize=1 430 | filters=512 431 | size=3 432 | stride=1 433 | pad=1 434 | activation=leaky 435 | 436 | [shortcut] 437 | from=-3 438 | activation=linear 439 | 440 | [convolutional] 441 | batch_normalize=1 442 | filters=256 443 | size=1 444 | stride=1 445 | pad=1 446 | activation=leaky 447 | 448 | [convolutional] 449 | batch_normalize=1 450 | filters=512 451 | size=3 452 | stride=1 453 | pad=1 454 | activation=leaky 455 | 456 | [shortcut] 457 | from=-3 458 | activation=linear 459 | 460 | # Downsample 461 | 462 | [convolutional] 463 | batch_normalize=1 464 | filters=1024 465 | size=3 466 | stride=2 467 | pad=1 468 | activation=leaky 469 | 470 | [convolutional] 471 | batch_normalize=1 472 | filters=512 473 | size=1 474 | stride=1 475 | pad=1 476 | activation=leaky 477 | 478 | [convolutional] 479 | batch_normalize=1 480 | filters=1024 481 | size=3 482 | stride=1 483 | pad=1 484 | activation=leaky 485 | 486 | [shortcut] 487 | from=-3 488 | activation=linear 489 | 490 | [convolutional] 491 | batch_normalize=1 492 | filters=512 493 | size=1 494 | stride=1 495 | pad=1 496 | activation=leaky 497 | 498 | [convolutional] 499 | batch_normalize=1 500 | filters=1024 501 | size=3 502 | stride=1 503 | pad=1 504 | activation=leaky 505 | 506 | [shortcut] 507 | from=-3 508 | activation=linear 509 | 510 | [convolutional] 511 | batch_normalize=1 512 | filters=512 513 | size=1 514 | stride=1 515 | pad=1 516 | activation=leaky 517 | 518 | [convolutional] 519 | batch_normalize=1 520 | filters=1024 521 | size=3 522 | stride=1 523 | pad=1 524 | activation=leaky 525 | 526 | [shortcut] 527 | from=-3 528 | activation=linear 529 | 530 | [convolutional] 531 | batch_normalize=1 532 | filters=512 533 | size=1 534 | stride=1 535 | pad=1 536 | activation=leaky 537 | 538 | [convolutional] 539 | batch_normalize=1 540 | filters=1024 541 | size=3 542 | stride=1 543 | pad=1 544 | activation=leaky 545 | 546 | [shortcut] 547 | from=-3 548 | activation=linear 549 | 550 | ###################### 551 | 552 | [convolutional] 553 | batch_normalize=1 554 | filters=512 555 | size=1 556 | stride=1 557 | pad=1 558 | activation=leaky 559 | 560 | [convolutional] 561 | batch_normalize=1 562 | size=3 563 | stride=1 564 | pad=1 565 | filters=1024 566 | activation=leaky 567 | 568 | [convolutional] 569 | batch_normalize=1 570 | filters=512 571 | size=1 572 | stride=1 573 | pad=1 574 | activation=leaky 575 | 576 | [convolutional] 577 | batch_normalize=1 578 | size=3 579 | stride=1 580 | pad=1 581 | filters=1024 582 | activation=leaky 583 | 584 | [convolutional] 585 | batch_normalize=1 586 | filters=512 587 | size=1 588 | stride=1 589 | pad=1 590 | activation=leaky 591 | 592 | [convolutional] 593 | batch_normalize=1 594 | size=3 595 | stride=1 596 | pad=1 597 | filters=1024 598 | activation=leaky 599 | 600 | [convolutional] 601 | size=1 602 | stride=1 603 | pad=1 604 | filters=18 605 | activation=linear 606 | 607 | 608 | [yolo] 609 | mask = 6,7,8 610 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 611 | classes=1 612 | num=9 613 | jitter=.3 614 | ignore_thresh = .7 615 | truth_thresh = 1 616 | random=1 617 | 618 | 619 | [route] 620 | layers = -4 621 | 622 | [convolutional] 623 | batch_normalize=1 624 | filters=256 625 | size=1 626 | stride=1 627 | pad=1 628 | activation=leaky 629 | 630 | [upsample] 631 | stride=2 632 | 633 | [route] 634 | layers = -1, 61 635 | 636 | 637 | 638 | [convolutional] 639 | batch_normalize=1 640 | filters=256 641 | size=1 642 | stride=1 643 | pad=1 644 | activation=leaky 645 | 646 | [convolutional] 647 | batch_normalize=1 648 | size=3 649 | stride=1 650 | pad=1 651 | filters=512 652 | activation=leaky 653 | 654 | [convolutional] 655 | batch_normalize=1 656 | filters=256 657 | size=1 658 | stride=1 659 | pad=1 660 | activation=leaky 661 | 662 | [convolutional] 663 | batch_normalize=1 664 | size=3 665 | stride=1 666 | pad=1 667 | filters=512 668 | activation=leaky 669 | 670 | [convolutional] 671 | batch_normalize=1 672 | filters=256 673 | size=1 674 | stride=1 675 | pad=1 676 | activation=leaky 677 | 678 | [convolutional] 679 | batch_normalize=1 680 | size=3 681 | stride=1 682 | pad=1 683 | filters=512 684 | activation=leaky 685 | 686 | [convolutional] 687 | size=1 688 | stride=1 689 | pad=1 690 | filters=18 691 | activation=linear 692 | 693 | 694 | [yolo] 695 | mask = 3,4,5 696 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 697 | classes=1 698 | num=9 699 | jitter=.3 700 | ignore_thresh = .7 701 | truth_thresh = 1 702 | random=1 703 | 704 | 705 | 706 | [route] 707 | layers = -4 708 | 709 | [convolutional] 710 | batch_normalize=1 711 | filters=128 712 | size=1 713 | stride=1 714 | pad=1 715 | activation=leaky 716 | 717 | [upsample] 718 | stride=2 719 | 720 | [route] 721 | layers = -1, 36 722 | 723 | 724 | 725 | [convolutional] 726 | batch_normalize=1 727 | filters=128 728 | size=1 729 | stride=1 730 | pad=1 731 | activation=leaky 732 | 733 | [convolutional] 734 | batch_normalize=1 735 | size=3 736 | stride=1 737 | pad=1 738 | filters=256 739 | activation=leaky 740 | 741 | [convolutional] 742 | batch_normalize=1 743 | filters=128 744 | size=1 745 | stride=1 746 | pad=1 747 | activation=leaky 748 | 749 | [convolutional] 750 | batch_normalize=1 751 | size=3 752 | stride=1 753 | pad=1 754 | filters=256 755 | activation=leaky 756 | 757 | [convolutional] 758 | batch_normalize=1 759 | filters=128 760 | size=1 761 | stride=1 762 | pad=1 763 | activation=leaky 764 | 765 | [convolutional] 766 | batch_normalize=1 767 | size=3 768 | stride=1 769 | pad=1 770 | filters=256 771 | activation=leaky 772 | 773 | [convolutional] 774 | size=1 775 | stride=1 776 | pad=1 777 | filters=18 778 | activation=linear 779 | 780 | 781 | [yolo] 782 | mask = 0,1,2 783 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 784 | classes=1 785 | num=9 786 | jitter=.3 787 | ignore_thresh = .7 788 | truth_thresh = 1 789 | random=1 790 | 791 | -------------------------------------------------------------------------------- /cfg/yolov3-repvggB0-hand.cfg: -------------------------------------------------------------------------------- 1 | 2 | [net] 3 | # Testing 4 | #batch=1 5 | #subdivisions=1 6 | # Training 7 | batch=16 8 | subdivisions=1 9 | width=416 10 | height=416 11 | channels=3 12 | momentum=0.9 13 | decay=0.0005 14 | angle=0 15 | saturation = 1.5 16 | exposure = 1.5 17 | hue=.1 18 | 19 | learning_rate=0.001 20 | burn_in=1000 21 | max_batches = 500200 22 | policy=steps 23 | steps=400000,450000 24 | scales=.1,.1 25 | 26 | # 0 先保留 27 | # [convolutional0] 28 | # batch_normalize=1 29 | # filters=32 30 | # size=3 31 | # stride=1 32 | # pad=1 33 | # activation=leaky 34 | 35 | # 1:64 36 | # 1 37 | [RepvggBlock] 38 | batch_normalize=1 39 | filters=64 40 | size=3 41 | stride=2 42 | pad=1 43 | activation=relu 44 | 45 | # 4:64 46 | # 2 --- 5 47 | [RepvggBlock] 48 | batch_normalize=1 49 | filters=64 50 | size=3 51 | stride=2 52 | pad=1 53 | activation=relu 54 | 55 | # 3 --- 9 56 | [RepvggBlock] 57 | batch_normalize=1 58 | filters=64 59 | size=3 60 | stride=1 61 | pad=1 62 | activation=relu 63 | 64 | # 4 --- 13 65 | [RepvggBlock] 66 | batch_normalize=1 67 | filters=64 68 | size=3 69 | stride=1 70 | pad=1 71 | activation=relu 72 | 73 | # 5 74 | [RepvggBlock] 75 | batch_normalize=1 76 | filters=64 77 | size=3 78 | stride=1 79 | pad=1 80 | activation=relu 81 | 82 | # 6:128 83 | # 6 84 | [RepvggBlock] 85 | batch_normalize=1 86 | filters=128 87 | size=3 88 | stride=2 89 | pad=1 90 | activation=relu 91 | 92 | # 7 93 | [RepvggBlock] 94 | batch_normalize=1 95 | filters=128 96 | size=3 97 | stride=1 98 | pad=1 99 | activation=relu 100 | 101 | # 8 102 | [RepvggBlock] 103 | batch_normalize=1 104 | filters=128 105 | size=3 106 | stride=1 107 | pad=1 108 | activation=relu 109 | 110 | # 9 111 | [RepvggBlock] 112 | batch_normalize=1 113 | filters=128 114 | size=3 115 | stride=1 116 | pad=1 117 | activation=relu 118 | 119 | # 10 120 | [RepvggBlock] 121 | batch_normalize=1 122 | filters=128 123 | size=3 124 | stride=1 125 | pad=1 126 | activation=relu 127 | 128 | # 11 129 | [RepvggBlock] 130 | batch_normalize=1 131 | filters=128 132 | size=3 133 | stride=1 134 | pad=1 135 | activation=relu 136 | 137 | # 16:256 138 | # 12 139 | [RepvggBlock] 140 | batch_normalize=1 141 | filters=256 142 | size=3 143 | stride=2 144 | pad=1 145 | activation=relu 146 | 147 | # 13 148 | [RepvggBlock] 149 | batch_normalize=1 150 | filters=256 151 | size=3 152 | stride=1 153 | pad=1 154 | activation=relu 155 | 156 | # 14 157 | [RepvggBlock] 158 | batch_normalize=1 159 | filters=256 160 | size=3 161 | stride=1 162 | pad=1 163 | activation=relu 164 | 165 | # 15 166 | [RepvggBlock] 167 | batch_normalize=1 168 | filters=256 169 | size=3 170 | stride=1 171 | pad=1 172 | activation=relu 173 | 174 | # 16 175 | [RepvggBlock] 176 | batch_normalize=1 177 | filters=256 178 | size=3 179 | stride=1 180 | pad=1 181 | activation=relu 182 | 183 | # 17 184 | [RepvggBlock] 185 | batch_normalize=1 186 | filters=256 187 | size=3 188 | stride=1 189 | pad=1 190 | activation=relu 191 | 192 | # 18 193 | [RepvggBlock] 194 | batch_normalize=1 195 | filters=256 196 | size=3 197 | stride=1 198 | pad=1 199 | activation=relu 200 | 201 | # 19 202 | [RepvggBlock] 203 | batch_normalize=1 204 | filters=256 205 | size=3 206 | stride=1 207 | pad=1 208 | activation=relu 209 | 210 | # 20 211 | [RepvggBlock] 212 | batch_normalize=1 213 | filters=256 214 | size=3 215 | stride=1 216 | pad=1 217 | activation=relu 218 | 219 | # 21 220 | [RepvggBlock] 221 | batch_normalize=1 222 | filters=256 223 | size=3 224 | stride=1 225 | pad=1 226 | activation=relu 227 | 228 | # 22 229 | [RepvggBlock] 230 | batch_normalize=1 231 | filters=256 232 | size=3 233 | stride=1 234 | pad=1 235 | activation=relu 236 | 237 | # 23 238 | [RepvggBlock] 239 | batch_normalize=1 240 | filters=256 241 | size=3 242 | stride=1 243 | pad=1 244 | activation=relu 245 | 246 | # 24 247 | [RepvggBlock] 248 | batch_normalize=1 249 | filters=256 250 | size=3 251 | stride=1 252 | pad=1 253 | activation=relu 254 | 255 | # 25 256 | [RepvggBlock] 257 | batch_normalize=1 258 | filters=256 259 | size=3 260 | stride=1 261 | pad=1 262 | activation=relu 263 | 264 | # 26 265 | [RepvggBlock] 266 | batch_normalize=1 267 | filters=256 268 | size=3 269 | stride=1 270 | pad=1 271 | activation=relu 272 | 273 | # 26 --- 105 274 | [RepvggBlock] 275 | batch_normalize=1 276 | filters=256 277 | size=3 278 | stride=1 279 | pad=1 280 | activation=relu 281 | 282 | # 1:512*2.5 283 | # 27 --- 109 284 | [RepvggBlock] 285 | batch_normalize=1 286 | filters=1280 287 | size=3 288 | stride=2 289 | pad=1 290 | activation=relu 291 | 292 | 293 | ###################### 294 | # 75 295 | # 28 --- 113 296 | [convolutional] 297 | batch_normalize=1 298 | filters=512 299 | size=1 300 | stride=1 301 | pad=1 302 | activation=leaky 303 | 304 | # 29 --- 114 305 | [convolutional] 306 | batch_normalize=1 307 | size=3 308 | stride=1 309 | pad=1 310 | filters=1024 311 | activation=leaky 312 | 313 | # 30 ---115 314 | [convolutional] 315 | batch_normalize=1 316 | filters=512 317 | size=1 318 | stride=1 319 | pad=1 320 | activation=leaky 321 | 322 | # 31 --- 116 323 | [convolutional] 324 | batch_normalize=1 325 | size=3 326 | stride=1 327 | pad=1 328 | filters=1024 329 | activation=leaky 330 | 331 | # 32 --- 117 332 | [convolutional] 333 | batch_normalize=1 334 | filters=512 335 | size=1 336 | stride=1 337 | pad=1 338 | activation=leaky 339 | 340 | # ---- 341 | # 32 --- 118 342 | [convolutional] 343 | batch_normalize=1 344 | size=3 345 | stride=1 346 | pad=1 347 | filters=1024 348 | activation=leaky 349 | 350 | # 34 --- 119 351 | [convolutional] 352 | size=1 353 | stride=1 354 | pad=1 355 | filters=18 356 | activation=linear 357 | 358 | # 35 --- 120 359 | [yolo] 360 | mask = 6,7,8 361 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 362 | classes=1 363 | num=9 364 | jitter=.3 365 | ignore_thresh = .7 366 | truth_thresh = 1 367 | random=1 368 | 369 | # 36 --- 121 370 | [route] 371 | layers = -4 372 | 373 | # 37 --- 122 374 | [convolutional] 375 | batch_normalize=1 376 | filters=256 377 | size=1 378 | stride=1 379 | pad=1 380 | activation=leaky 381 | 382 | # 38 383 | [upsample] 384 | stride=2 385 | 386 | # 39 387 | [route] 388 | layers = -1, 26 389 | 390 | 391 | # 40 392 | [convolutional] 393 | batch_normalize=1 394 | filters=256 395 | size=1 396 | stride=1 397 | pad=1 398 | activation=leaky 399 | 400 | # 41 401 | [convolutional] 402 | batch_normalize=1 403 | size=3 404 | stride=1 405 | pad=1 406 | filters=512 407 | activation=leaky 408 | 409 | # 42 410 | [convolutional] 411 | batch_normalize=1 412 | filters=256 413 | size=1 414 | stride=1 415 | pad=1 416 | activation=leaky 417 | 418 | # 43 419 | [convolutional] 420 | batch_normalize=1 421 | size=3 422 | stride=1 423 | pad=1 424 | filters=512 425 | activation=leaky 426 | 427 | # 44 428 | [convolutional] 429 | batch_normalize=1 430 | filters=256 431 | size=1 432 | stride=1 433 | pad=1 434 | activation=leaky 435 | 436 | # 45 437 | [convolutional] 438 | batch_normalize=1 439 | size=3 440 | stride=1 441 | pad=1 442 | filters=512 443 | activation=leaky 444 | 445 | # 46 446 | [convolutional] 447 | size=1 448 | stride=1 449 | pad=1 450 | filters=18 451 | activation=linear 452 | 453 | # 47 454 | [yolo] 455 | mask = 3,4,5 456 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 457 | classes=1 458 | num=9 459 | jitter=.3 460 | ignore_thresh = .7 461 | truth_thresh = 1 462 | random=1 463 | 464 | 465 | # 48 466 | [route] 467 | layers = -4 468 | 469 | # 49 470 | [convolutional] 471 | batch_normalize=1 472 | filters=128 473 | size=1 474 | stride=1 475 | pad=1 476 | activation=leaky 477 | 478 | # 50 479 | [upsample] 480 | stride=2 481 | 482 | # 51 483 | [route] 484 | layers = -1, 10 485 | 486 | 487 | # 52 488 | [convolutional] 489 | batch_normalize=1 490 | filters=128 491 | size=1 492 | stride=1 493 | pad=1 494 | activation=leaky 495 | 496 | # 53 497 | [convolutional] 498 | batch_normalize=1 499 | size=3 500 | stride=1 501 | pad=1 502 | filters=256 503 | activation=leaky 504 | 505 | # 54 506 | [convolutional] 507 | batch_normalize=1 508 | filters=128 509 | size=1 510 | stride=1 511 | pad=1 512 | activation=leaky 513 | 514 | # 55 515 | [convolutional] 516 | batch_normalize=1 517 | size=3 518 | stride=1 519 | pad=1 520 | filters=256 521 | activation=leaky 522 | 523 | # 56 524 | [convolutional] 525 | batch_normalize=1 526 | filters=128 527 | size=1 528 | stride=1 529 | pad=1 530 | activation=leaky 531 | 532 | [convolutional] 533 | batch_normalize=1 534 | size=3 535 | stride=1 536 | pad=1 537 | filters=256 538 | activation=leaky 539 | 540 | [convolutional] 541 | size=1 542 | stride=1 543 | pad=1 544 | filters=18 545 | activation=linear 546 | 547 | 548 | [yolo] 549 | mask = 0,1,2 550 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 551 | classes=1 552 | num=9 553 | jitter=.3 554 | ignore_thresh = .7 555 | truth_thresh = 1 556 | random=1 557 | 558 | -------------------------------------------------------------------------------- /cfg/yolov3-repvggB1-hand.cfg: -------------------------------------------------------------------------------- 1 | 2 | [net] 3 | # Testing 4 | #batch=1 5 | #subdivisions=1 6 | # Training 7 | batch=16 8 | subdivisions=1 9 | width=416 10 | height=416 11 | channels=3 12 | momentum=0.9 13 | decay=0.0005 14 | angle=0 15 | saturation = 1.5 16 | exposure = 1.5 17 | hue=.1 18 | 19 | learning_rate=0.001 20 | burn_in=1000 21 | max_batches = 500200 22 | policy=steps 23 | steps=400000,450000 24 | scales=.1,.1 25 | 26 | # 0 先保留 27 | # [convolutional] 28 | # batch_normalize=1 29 | # filters=32 30 | # size=3 31 | # stride=1 32 | # pad=1 33 | # activation=leaky 34 | 35 | # 1:64 36 | # 1 37 | [RepvggBlock] 38 | batch_normalize=1 39 | filters=64 40 | size=3 41 | stride=2 42 | pad=1 43 | activation=relu 44 | 45 | # 4:64x2 46 | # 2 --- 5(i) 47 | [RepvggBlock] 48 | batch_normalize=1 49 | filters=128 50 | size=3 51 | stride=2 52 | pad=1 53 | activation=relu 54 | 55 | # 3 --- 9 56 | [RepvggBlock] 57 | batch_normalize=1 58 | filters=128 59 | size=3 60 | stride=1 61 | pad=1 62 | activation=relu 63 | 64 | # 4 --- 13 65 | [RepvggBlock] 66 | batch_normalize=1 67 | filters=128 68 | size=3 69 | stride=1 70 | pad=1 71 | activation=relu 72 | 73 | # 5 74 | [RepvggBlock] 75 | batch_normalize=1 76 | filters=128 77 | size=3 78 | stride=1 79 | pad=1 80 | activation=relu 81 | 82 | # 6:128x2 83 | # 6 84 | [RepvggBlock] 85 | batch_normalize=1 86 | filters=256 87 | size=3 88 | stride=2 89 | pad=1 90 | activation=relu 91 | 92 | # 7 93 | [RepvggBlock] 94 | batch_normalize=1 95 | filters=256 96 | size=3 97 | stride=1 98 | pad=1 99 | activation=relu 100 | 101 | # 8 102 | [RepvggBlock] 103 | batch_normalize=1 104 | filters=256 105 | size=3 106 | stride=1 107 | pad=1 108 | activation=relu 109 | 110 | # 9 111 | [RepvggBlock] 112 | batch_normalize=1 113 | filters=256 114 | size=3 115 | stride=1 116 | pad=1 117 | activation=relu 118 | 119 | # 10 120 | [RepvggBlock] 121 | batch_normalize=1 122 | filters=256 123 | size=3 124 | stride=1 125 | pad=1 126 | activation=relu 127 | 128 | # 11 129 | [RepvggBlock] 130 | batch_normalize=1 131 | filters=256 132 | size=3 133 | stride=1 134 | pad=1 135 | activation=relu 136 | 137 | # 16:256x2 138 | # 12 139 | [RepvggBlock] 140 | batch_normalize=1 141 | filters=512 142 | size=3 143 | stride=2 144 | pad=1 145 | activation=relu 146 | 147 | # 13 148 | [RepvggBlock] 149 | batch_normalize=1 150 | filters=512 151 | size=3 152 | stride=1 153 | pad=1 154 | activation=relu 155 | 156 | # 14 157 | [RepvggBlock] 158 | batch_normalize=1 159 | filters=512 160 | size=3 161 | stride=1 162 | pad=1 163 | activation=relu 164 | 165 | # 15 166 | [RepvggBlock] 167 | batch_normalize=1 168 | filters=512 169 | size=3 170 | stride=1 171 | pad=1 172 | activation=relu 173 | 174 | # 16 175 | [RepvggBlock] 176 | batch_normalize=1 177 | filters=512 178 | size=3 179 | stride=1 180 | pad=1 181 | activation=relu 182 | 183 | # 17 184 | [RepvggBlock] 185 | batch_normalize=1 186 | filters=512 187 | size=3 188 | stride=1 189 | pad=1 190 | activation=relu 191 | 192 | # 18 193 | [RepvggBlock] 194 | batch_normalize=1 195 | filters=512 196 | size=3 197 | stride=1 198 | pad=1 199 | activation=relu 200 | 201 | # 19 202 | [RepvggBlock] 203 | batch_normalize=1 204 | filters=512 205 | size=3 206 | stride=1 207 | pad=1 208 | activation=relu 209 | 210 | # 20 211 | [RepvggBlock] 212 | batch_normalize=1 213 | filters=512 214 | size=3 215 | stride=1 216 | pad=1 217 | activation=relu 218 | 219 | # 21 220 | [RepvggBlock] 221 | batch_normalize=1 222 | filters=512 223 | size=3 224 | stride=1 225 | pad=1 226 | activation=relu 227 | 228 | # 22 229 | [RepvggBlock] 230 | batch_normalize=1 231 | filters=512 232 | size=3 233 | stride=1 234 | pad=1 235 | activation=relu 236 | 237 | # 23 238 | [RepvggBlock] 239 | batch_normalize=1 240 | filters=512 241 | size=3 242 | stride=1 243 | pad=1 244 | activation=relu 245 | 246 | # 24 247 | [RepvggBlock] 248 | batch_normalize=1 249 | filters=512 250 | size=3 251 | stride=1 252 | pad=1 253 | activation=relu 254 | 255 | # 25 256 | [RepvggBlock] 257 | batch_normalize=1 258 | filters=512 259 | size=3 260 | stride=1 261 | pad=1 262 | activation=relu 263 | 264 | # 26 265 | [RepvggBlock] 266 | batch_normalize=1 267 | filters=512 268 | size=3 269 | stride=1 270 | pad=1 271 | activation=relu 272 | 273 | # 27 --- 105 274 | [RepvggBlock] 275 | batch_normalize=1 276 | filters=512 277 | size=3 278 | stride=1 279 | pad=1 280 | activation=relu 281 | 282 | # 1:512x4 283 | # 28 --- 109 284 | [RepvggBlock] 285 | batch_normalize=1 286 | filters=2048 287 | size=3 288 | stride=2 289 | pad=1 290 | activation=relu 291 | 292 | 293 | ###################### 294 | # 75 295 | # 29 --- 113 296 | [convolutional] 297 | batch_normalize=1 298 | filters=512 299 | size=1 300 | stride=1 301 | pad=1 302 | activation=leaky 303 | 304 | # 30 --- 114 305 | [convolutional] 306 | batch_normalize=1 307 | size=3 308 | stride=1 309 | pad=1 310 | filters=1024 311 | activation=leaky 312 | 313 | # 31 ---115 314 | [convolutional] 315 | batch_normalize=1 316 | filters=512 317 | size=1 318 | stride=1 319 | pad=1 320 | activation=leaky 321 | 322 | # 32 --- 116 323 | [convolutional] 324 | batch_normalize=1 325 | size=3 326 | stride=1 327 | pad=1 328 | filters=1024 329 | activation=leaky 330 | 331 | # 33 --- 117 332 | [convolutional] 333 | batch_normalize=1 334 | filters=512 335 | size=1 336 | stride=1 337 | pad=1 338 | activation=leaky 339 | 340 | # ---- 341 | # 34 --- 118 342 | [convolutional] 343 | batch_normalize=1 344 | size=3 345 | stride=1 346 | pad=1 347 | filters=1024 348 | activation=leaky 349 | 350 | # 35 --- 119 351 | [convolutional] 352 | size=1 353 | stride=1 354 | pad=1 355 | filters=18 356 | activation=linear 357 | 358 | # 36 --- 120 359 | [yolo] 360 | mask = 6,7,8 361 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 362 | classes=1 363 | num=9 364 | jitter=.3 365 | ignore_thresh = .7 366 | truth_thresh = 1 367 | random=1 368 | 369 | # 37 --- 121 370 | [route] 371 | layers = -4 372 | 373 | # 38 --- 122 374 | [convolutional] 375 | batch_normalize=1 376 | filters=256 377 | size=1 378 | stride=1 379 | pad=1 380 | activation=leaky 381 | 382 | [upsample] 383 | stride=2 384 | 385 | [route] 386 | layers = -1, 26 387 | 388 | 389 | 390 | [convolutional] 391 | batch_normalize=1 392 | filters=256 393 | size=1 394 | stride=1 395 | pad=1 396 | activation=leaky 397 | 398 | [convolutional] 399 | batch_normalize=1 400 | size=3 401 | stride=1 402 | pad=1 403 | filters=512 404 | activation=leaky 405 | 406 | [convolutional] 407 | batch_normalize=1 408 | filters=256 409 | size=1 410 | stride=1 411 | pad=1 412 | activation=leaky 413 | 414 | [convolutional] 415 | batch_normalize=1 416 | size=3 417 | stride=1 418 | pad=1 419 | filters=512 420 | activation=leaky 421 | 422 | [convolutional] 423 | batch_normalize=1 424 | filters=256 425 | size=1 426 | stride=1 427 | pad=1 428 | activation=leaky 429 | 430 | [convolutional] 431 | batch_normalize=1 432 | size=3 433 | stride=1 434 | pad=1 435 | filters=512 436 | activation=leaky 437 | 438 | [convolutional] 439 | size=1 440 | stride=1 441 | pad=1 442 | filters=18 443 | activation=linear 444 | 445 | 446 | [yolo] 447 | mask = 3,4,5 448 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 449 | classes=1 450 | num=9 451 | jitter=.3 452 | ignore_thresh = .7 453 | truth_thresh = 1 454 | random=1 455 | 456 | 457 | 458 | [route] 459 | layers = -4 460 | 461 | [convolutional] 462 | batch_normalize=1 463 | filters=128 464 | size=1 465 | stride=1 466 | pad=1 467 | activation=leaky 468 | 469 | [upsample] 470 | stride=2 471 | 472 | [route] 473 | layers = -1, 10 474 | 475 | 476 | 477 | [convolutional] 478 | batch_normalize=1 479 | filters=128 480 | size=1 481 | stride=1 482 | pad=1 483 | activation=leaky 484 | 485 | [convolutional] 486 | batch_normalize=1 487 | size=3 488 | stride=1 489 | pad=1 490 | filters=256 491 | activation=leaky 492 | 493 | [convolutional] 494 | batch_normalize=1 495 | filters=128 496 | size=1 497 | stride=1 498 | pad=1 499 | activation=leaky 500 | 501 | [convolutional] 502 | batch_normalize=1 503 | size=3 504 | stride=1 505 | pad=1 506 | filters=256 507 | activation=leaky 508 | 509 | [convolutional] 510 | batch_normalize=1 511 | filters=128 512 | size=1 513 | stride=1 514 | pad=1 515 | activation=leaky 516 | 517 | [convolutional] 518 | batch_normalize=1 519 | size=3 520 | stride=1 521 | pad=1 522 | filters=256 523 | activation=leaky 524 | 525 | [convolutional] 526 | size=1 527 | stride=1 528 | pad=1 529 | filters=18 530 | activation=linear 531 | 532 | 533 | [yolo] 534 | mask = 0,1,2 535 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 536 | classes=1 537 | num=9 538 | jitter=.3 539 | ignore_thresh = .7 540 | truth_thresh = 1 541 | random=1 542 | 543 | -------------------------------------------------------------------------------- /cfg/yolov3.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | #batch=1 4 | #subdivisions=1 5 | # Training 6 | batch=16 7 | subdivisions=1 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=32 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | # Downsample 34 | 35 | [convolutional] 36 | batch_normalize=1 37 | filters=64 38 | size=3 39 | stride=2 40 | pad=1 41 | activation=leaky 42 | 43 | [convolutional] 44 | batch_normalize=1 45 | filters=32 46 | size=1 47 | stride=1 48 | pad=1 49 | activation=leaky 50 | 51 | [convolutional] 52 | batch_normalize=1 53 | filters=64 54 | size=3 55 | stride=1 56 | pad=1 57 | activation=leaky 58 | 59 | [shortcut] 60 | from=-3 61 | activation=linear 62 | 63 | # Downsample 64 | 65 | [convolutional] 66 | batch_normalize=1 67 | filters=128 68 | size=3 69 | stride=2 70 | pad=1 71 | activation=leaky 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=64 76 | size=1 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [convolutional] 82 | batch_normalize=1 83 | filters=128 84 | size=3 85 | stride=1 86 | pad=1 87 | activation=leaky 88 | 89 | [shortcut] 90 | from=-3 91 | activation=linear 92 | 93 | [convolutional] 94 | batch_normalize=1 95 | filters=64 96 | size=1 97 | stride=1 98 | pad=1 99 | activation=leaky 100 | 101 | [convolutional] 102 | batch_normalize=1 103 | filters=128 104 | size=3 105 | stride=1 106 | pad=1 107 | activation=leaky 108 | 109 | [shortcut] 110 | from=-3 111 | activation=linear 112 | 113 | # Downsample 114 | 115 | [convolutional] 116 | batch_normalize=1 117 | filters=256 118 | size=3 119 | stride=2 120 | pad=1 121 | activation=leaky 122 | 123 | [convolutional] 124 | batch_normalize=1 125 | filters=128 126 | size=1 127 | stride=1 128 | pad=1 129 | activation=leaky 130 | 131 | [convolutional] 132 | batch_normalize=1 133 | filters=256 134 | size=3 135 | stride=1 136 | pad=1 137 | activation=leaky 138 | 139 | [shortcut] 140 | from=-3 141 | activation=linear 142 | 143 | [convolutional] 144 | batch_normalize=1 145 | filters=128 146 | size=1 147 | stride=1 148 | pad=1 149 | activation=leaky 150 | 151 | [convolutional] 152 | batch_normalize=1 153 | filters=256 154 | size=3 155 | stride=1 156 | pad=1 157 | activation=leaky 158 | 159 | [shortcut] 160 | from=-3 161 | activation=linear 162 | 163 | [convolutional] 164 | batch_normalize=1 165 | filters=128 166 | size=1 167 | stride=1 168 | pad=1 169 | activation=leaky 170 | 171 | [convolutional] 172 | batch_normalize=1 173 | filters=256 174 | size=3 175 | stride=1 176 | pad=1 177 | activation=leaky 178 | 179 | [shortcut] 180 | from=-3 181 | activation=linear 182 | 183 | [convolutional] 184 | batch_normalize=1 185 | filters=128 186 | size=1 187 | stride=1 188 | pad=1 189 | activation=leaky 190 | 191 | [convolutional] 192 | batch_normalize=1 193 | filters=256 194 | size=3 195 | stride=1 196 | pad=1 197 | activation=leaky 198 | 199 | [shortcut] 200 | from=-3 201 | activation=linear 202 | 203 | 204 | [convolutional] 205 | batch_normalize=1 206 | filters=128 207 | size=1 208 | stride=1 209 | pad=1 210 | activation=leaky 211 | 212 | [convolutional] 213 | batch_normalize=1 214 | filters=256 215 | size=3 216 | stride=1 217 | pad=1 218 | activation=leaky 219 | 220 | [shortcut] 221 | from=-3 222 | activation=linear 223 | 224 | [convolutional] 225 | batch_normalize=1 226 | filters=128 227 | size=1 228 | stride=1 229 | pad=1 230 | activation=leaky 231 | 232 | [convolutional] 233 | batch_normalize=1 234 | filters=256 235 | size=3 236 | stride=1 237 | pad=1 238 | activation=leaky 239 | 240 | [shortcut] 241 | from=-3 242 | activation=linear 243 | 244 | [convolutional] 245 | batch_normalize=1 246 | filters=128 247 | size=1 248 | stride=1 249 | pad=1 250 | activation=leaky 251 | 252 | [convolutional] 253 | batch_normalize=1 254 | filters=256 255 | size=3 256 | stride=1 257 | pad=1 258 | activation=leaky 259 | 260 | [shortcut] 261 | from=-3 262 | activation=linear 263 | 264 | [convolutional] 265 | batch_normalize=1 266 | filters=128 267 | size=1 268 | stride=1 269 | pad=1 270 | activation=leaky 271 | 272 | [convolutional] 273 | batch_normalize=1 274 | filters=256 275 | size=3 276 | stride=1 277 | pad=1 278 | activation=leaky 279 | 280 | [shortcut] 281 | from=-3 282 | activation=linear 283 | 284 | # Downsample 285 | 286 | [convolutional] 287 | batch_normalize=1 288 | filters=512 289 | size=3 290 | stride=2 291 | pad=1 292 | activation=leaky 293 | 294 | [convolutional] 295 | batch_normalize=1 296 | filters=256 297 | size=1 298 | stride=1 299 | pad=1 300 | activation=leaky 301 | 302 | [convolutional] 303 | batch_normalize=1 304 | filters=512 305 | size=3 306 | stride=1 307 | pad=1 308 | activation=leaky 309 | 310 | [shortcut] 311 | from=-3 312 | activation=linear 313 | 314 | 315 | [convolutional] 316 | batch_normalize=1 317 | filters=256 318 | size=1 319 | stride=1 320 | pad=1 321 | activation=leaky 322 | 323 | [convolutional] 324 | batch_normalize=1 325 | filters=512 326 | size=3 327 | stride=1 328 | pad=1 329 | activation=leaky 330 | 331 | [shortcut] 332 | from=-3 333 | activation=linear 334 | 335 | 336 | [convolutional] 337 | batch_normalize=1 338 | filters=256 339 | size=1 340 | stride=1 341 | pad=1 342 | activation=leaky 343 | 344 | [convolutional] 345 | batch_normalize=1 346 | filters=512 347 | size=3 348 | stride=1 349 | pad=1 350 | activation=leaky 351 | 352 | [shortcut] 353 | from=-3 354 | activation=linear 355 | 356 | 357 | [convolutional] 358 | batch_normalize=1 359 | filters=256 360 | size=1 361 | stride=1 362 | pad=1 363 | activation=leaky 364 | 365 | [convolutional] 366 | batch_normalize=1 367 | filters=512 368 | size=3 369 | stride=1 370 | pad=1 371 | activation=leaky 372 | 373 | [shortcut] 374 | from=-3 375 | activation=linear 376 | 377 | [convolutional] 378 | batch_normalize=1 379 | filters=256 380 | size=1 381 | stride=1 382 | pad=1 383 | activation=leaky 384 | 385 | [convolutional] 386 | batch_normalize=1 387 | filters=512 388 | size=3 389 | stride=1 390 | pad=1 391 | activation=leaky 392 | 393 | [shortcut] 394 | from=-3 395 | activation=linear 396 | 397 | 398 | [convolutional] 399 | batch_normalize=1 400 | filters=256 401 | size=1 402 | stride=1 403 | pad=1 404 | activation=leaky 405 | 406 | [convolutional] 407 | batch_normalize=1 408 | filters=512 409 | size=3 410 | stride=1 411 | pad=1 412 | activation=leaky 413 | 414 | [shortcut] 415 | from=-3 416 | activation=linear 417 | 418 | 419 | [convolutional] 420 | batch_normalize=1 421 | filters=256 422 | size=1 423 | stride=1 424 | pad=1 425 | activation=leaky 426 | 427 | [convolutional] 428 | batch_normalize=1 429 | filters=512 430 | size=3 431 | stride=1 432 | pad=1 433 | activation=leaky 434 | 435 | [shortcut] 436 | from=-3 437 | activation=linear 438 | 439 | [convolutional] 440 | batch_normalize=1 441 | filters=256 442 | size=1 443 | stride=1 444 | pad=1 445 | activation=leaky 446 | 447 | [convolutional] 448 | batch_normalize=1 449 | filters=512 450 | size=3 451 | stride=1 452 | pad=1 453 | activation=leaky 454 | 455 | [shortcut] 456 | from=-3 457 | activation=linear 458 | 459 | # Downsample 460 | 461 | [convolutional] 462 | batch_normalize=1 463 | filters=1024 464 | size=3 465 | stride=2 466 | pad=1 467 | activation=leaky 468 | 469 | [convolutional] 470 | batch_normalize=1 471 | filters=512 472 | size=1 473 | stride=1 474 | pad=1 475 | activation=leaky 476 | 477 | [convolutional] 478 | batch_normalize=1 479 | filters=1024 480 | size=3 481 | stride=1 482 | pad=1 483 | activation=leaky 484 | 485 | [shortcut] 486 | from=-3 487 | activation=linear 488 | 489 | [convolutional] 490 | batch_normalize=1 491 | filters=512 492 | size=1 493 | stride=1 494 | pad=1 495 | activation=leaky 496 | 497 | [convolutional] 498 | batch_normalize=1 499 | filters=1024 500 | size=3 501 | stride=1 502 | pad=1 503 | activation=leaky 504 | 505 | [shortcut] 506 | from=-3 507 | activation=linear 508 | 509 | [convolutional] 510 | batch_normalize=1 511 | filters=512 512 | size=1 513 | stride=1 514 | pad=1 515 | activation=leaky 516 | 517 | [convolutional] 518 | batch_normalize=1 519 | filters=1024 520 | size=3 521 | stride=1 522 | pad=1 523 | activation=leaky 524 | 525 | [shortcut] 526 | from=-3 527 | activation=linear 528 | 529 | [convolutional] 530 | batch_normalize=1 531 | filters=512 532 | size=1 533 | stride=1 534 | pad=1 535 | activation=leaky 536 | 537 | [convolutional] 538 | batch_normalize=1 539 | filters=1024 540 | size=3 541 | stride=1 542 | pad=1 543 | activation=leaky 544 | 545 | [shortcut] 546 | from=-3 547 | activation=linear 548 | 549 | ###################### 550 | 551 | [convolutional] 552 | batch_normalize=1 553 | filters=512 554 | size=1 555 | stride=1 556 | pad=1 557 | activation=leaky 558 | 559 | [convolutional] 560 | batch_normalize=1 561 | size=3 562 | stride=1 563 | pad=1 564 | filters=1024 565 | activation=leaky 566 | 567 | [convolutional] 568 | batch_normalize=1 569 | filters=512 570 | size=1 571 | stride=1 572 | pad=1 573 | activation=leaky 574 | 575 | [convolutional] 576 | batch_normalize=1 577 | size=3 578 | stride=1 579 | pad=1 580 | filters=1024 581 | activation=leaky 582 | 583 | [convolutional] 584 | batch_normalize=1 585 | filters=512 586 | size=1 587 | stride=1 588 | pad=1 589 | activation=leaky 590 | 591 | [convolutional] 592 | batch_normalize=1 593 | size=3 594 | stride=1 595 | pad=1 596 | filters=1024 597 | activation=leaky 598 | 599 | [convolutional] 600 | size=1 601 | stride=1 602 | pad=1 603 | filters=255 604 | activation=linear 605 | 606 | 607 | [yolo] 608 | mask = 6,7,8 609 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 610 | classes=80 611 | num=9 612 | jitter=.3 613 | ignore_thresh = .7 614 | truth_thresh = 1 615 | random=1 616 | 617 | 618 | [route] 619 | layers = -4 620 | 621 | [convolutional] 622 | batch_normalize=1 623 | filters=256 624 | size=1 625 | stride=1 626 | pad=1 627 | activation=leaky 628 | 629 | [upsample] 630 | stride=2 631 | 632 | [route] 633 | layers = -1, 61 634 | 635 | 636 | 637 | [convolutional] 638 | batch_normalize=1 639 | filters=256 640 | size=1 641 | stride=1 642 | pad=1 643 | activation=leaky 644 | 645 | [convolutional] 646 | batch_normalize=1 647 | size=3 648 | stride=1 649 | pad=1 650 | filters=512 651 | activation=leaky 652 | 653 | [convolutional] 654 | batch_normalize=1 655 | filters=256 656 | size=1 657 | stride=1 658 | pad=1 659 | activation=leaky 660 | 661 | [convolutional] 662 | batch_normalize=1 663 | size=3 664 | stride=1 665 | pad=1 666 | filters=512 667 | activation=leaky 668 | 669 | [convolutional] 670 | batch_normalize=1 671 | filters=256 672 | size=1 673 | stride=1 674 | pad=1 675 | activation=leaky 676 | 677 | [convolutional] 678 | batch_normalize=1 679 | size=3 680 | stride=1 681 | pad=1 682 | filters=512 683 | activation=leaky 684 | 685 | [convolutional] 686 | size=1 687 | stride=1 688 | pad=1 689 | filters=255 690 | activation=linear 691 | 692 | 693 | [yolo] 694 | mask = 3,4,5 695 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 696 | classes=80 697 | num=9 698 | jitter=.3 699 | ignore_thresh = .7 700 | truth_thresh = 1 701 | random=1 702 | 703 | 704 | 705 | [route] 706 | layers = -4 707 | 708 | [convolutional] 709 | batch_normalize=1 710 | filters=128 711 | size=1 712 | stride=1 713 | pad=1 714 | activation=leaky 715 | 716 | [upsample] 717 | stride=2 718 | 719 | [route] 720 | layers = -1, 36 721 | 722 | 723 | 724 | [convolutional] 725 | batch_normalize=1 726 | filters=128 727 | size=1 728 | stride=1 729 | pad=1 730 | activation=leaky 731 | 732 | [convolutional] 733 | batch_normalize=1 734 | size=3 735 | stride=1 736 | pad=1 737 | filters=256 738 | activation=leaky 739 | 740 | [convolutional] 741 | batch_normalize=1 742 | filters=128 743 | size=1 744 | stride=1 745 | pad=1 746 | activation=leaky 747 | 748 | [convolutional] 749 | batch_normalize=1 750 | size=3 751 | stride=1 752 | pad=1 753 | filters=256 754 | activation=leaky 755 | 756 | [convolutional] 757 | batch_normalize=1 758 | filters=128 759 | size=1 760 | stride=1 761 | pad=1 762 | activation=leaky 763 | 764 | [convolutional] 765 | batch_normalize=1 766 | size=3 767 | stride=1 768 | pad=1 769 | filters=256 770 | activation=leaky 771 | 772 | [convolutional] 773 | size=1 774 | stride=1 775 | pad=1 776 | filters=255 777 | activation=linear 778 | 779 | 780 | [yolo] 781 | mask = 0,1,2 782 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 783 | classes=80 784 | num=9 785 | jitter=.3 786 | ignore_thresh = .7 787 | truth_thresh = 1 788 | random=1 789 | -------------------------------------------------------------------------------- /convert_repyolo.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | 5 | import argparse 6 | import json 7 | 8 | from torch.utils.data import DataLoader 9 | 10 | from models import * 11 | from utils.datasets import * 12 | from utils.utils import * 13 | 14 | use_dict_old = {'stage0.rbr_dense':'module_list.1','stage0.rbr_1x1':'module_list.2', 15 | # --------- 16 | 'stage1.0.rbr_dense':'module_list.5','stage1.0.rbr_1x1':'module_list.6', 17 | 'stage1.1.rbr_dense':'module_list.9','stage1.1.rbr_1x1':'module_list.10','stage1.1.rbr_identity':'module_list.11', 18 | 'stage1.2.rbr_dense':'module_list.13','stage1.2.rbr_1x1':'module_list.14','stage1.2.rbr_identity':'module_list.15', 19 | 'stage1.3.rbr_dense':'module_list.17','stage1.3.rbr_1x1':'module_list.18','stage1.3.rbr_identity':'module_list.19', 20 | # --------- 21 | 'stage2.0.rbr_dense':'module_list.21','stage2.0.rbr_1x1':'module_list.22', 22 | 'stage2.1.rbr_dense':'module_list.25','stage2.1.rbr_1x1':'module_list.26','stage2.1.rbr_identity':'module_list.27', 23 | 'stage2.2.rbr_dense':'module_list.29','stage2.2.rbr_1x1':'module_list.30','stage2.2.rbr_identity':'module_list.31', 24 | 'stage2.3.rbr_dense':'module_list.33','stage2.3.rbr_1x1':'module_list.34','stage2.3.rbr_identity':'module_list.35', 25 | 'stage2.4.rbr_dense':'module_list.37','stage2.4.rbr_1x1':'module_list.38','stage2.4.rbr_identity':'module_list.39', 26 | 'stage2.5.rbr_dense':'module_list.41','stage2.5.rbr_1x1':'module_list.42','stage2.5.rbr_identity':'module_list.43', 27 | # --------- 28 | 'stage3.0.rbr_dense':'module_list.45','stage3.0.rbr_1x1':'module_list.46', 29 | 'stage3.1.rbr_dense':'module_list.49','stage3.1.rbr_1x1':'module_list.50','stage3.1.rbr_identity':'module_list.51', 30 | 'stage3.2.rbr_dense':'module_list.53','stage3.2.rbr_1x1':'module_list.54','stage3.2.rbr_identity':'module_list.55', 31 | 'stage3.3.rbr_dense':'module_list.57','stage3.3.rbr_1x1':'module_list.58','stage3.3.rbr_identity':'module_list.59', 32 | 'stage3.4.rbr_dense':'module_list.61','stage3.4.rbr_1x1':'module_list.62','stage3.4.rbr_identity':'module_list.63', 33 | 'stage3.5.rbr_dense':'module_list.65','stage3.5.rbr_1x1':'module_list.66','stage3.5.rbr_identity':'module_list.67', 34 | 'stage3.6.rbr_dense':'module_list.69','stage3.6.rbr_1x1':'module_list.70','stage3.6.rbr_identity':'module_list.71', 35 | 'stage3.7.rbr_dense':'module_list.73','stage3.7.rbr_1x1':'module_list.74','stage3.7.rbr_identity':'module_list.75', 36 | 'stage3.8.rbr_dense':'module_list.77','stage3.8.rbr_1x1':'module_list.78','stage3.8.rbr_identity':'module_list.79', 37 | 'stage3.9.rbr_dense':'module_list.81','stage3.9.rbr_1x1':'module_list.82','stage3.9.rbr_identity':'module_list.83', 38 | 'stage3.10.rbr_dense':'module_list.85','stage3.10.rbr_1x1':'module_list.86','stage3.10.rbr_identity':'module_list.87', 39 | 'stage3.11.rbr_dense':'module_list.89','stage3.11.rbr_1x1':'module_list.90','stage3.11.rbr_identity':'module_list.91', 40 | 'stage3.12.rbr_dense':'module_list.93','stage3.12.rbr_1x1':'module_list.94','stage3.12.rbr_identity':'module_list.95', 41 | 'stage3.13.rbr_dense':'module_list.97','stage3.13.rbr_1x1':'module_list.98','stage3.13.rbr_identity':'module_list.99', 42 | 'stage3.14.rbr_dense':'module_list.101','stage3.14.rbr_1x1':'module_list.102','stage3.14.rbr_identity':'module_list.103', 43 | 'stage3.15.rbr_dense':'module_list.105','stage3.15.rbr_1x1':'module_list.106','stage3.15.rbr_identity':'module_list.107', 44 | # ---------- 45 | 'stage4.0.rbr_dense':'module_list.109','stage4.0.rbr_1x1':'module_list.110' 46 | } 47 | 48 | use_dict = {'stage0.rbr_dense': 'module_list.0', 'stage0.rbr_1x1': 'module_list.1', 49 | 'stage1.0.rbr_dense': 'module_list.4', 'stage1.0.rbr_1x1': 'module_list.5', 50 | 'stage1.1.rbr_dense': 'module_list.8', 'stage1.1.rbr_1x1': 'module_list.9', 'stage1.1.rbr_identity': 'module_list.10', 51 | 'stage1.2.rbr_dense': 'module_list.12', 'stage1.2.rbr_1x1': 'module_list.13', 'stage1.2.rbr_identity': 'module_list.14', 52 | 'stage1.3.rbr_dense': 'module_list.16', 'stage1.3.rbr_1x1': 'module_list.17', 'stage1.3.rbr_identity': 'module_list.18', 53 | 'stage2.0.rbr_dense': 'module_list.20', 'stage2.0.rbr_1x1': 'module_list.21', 54 | 'stage2.1.rbr_dense': 'module_list.24', 'stage2.1.rbr_1x1': 'module_list.25', 'stage2.1.rbr_identity': 'module_list.26', 55 | 'stage2.2.rbr_dense': 'module_list.28', 'stage2.2.rbr_1x1': 'module_list.29', 'stage2.2.rbr_identity': 'module_list.30', 56 | 'stage2.3.rbr_dense': 'module_list.32', 'stage2.3.rbr_1x1': 'module_list.33', 'stage2.3.rbr_identity': 'module_list.34', 57 | 'stage2.4.rbr_dense': 'module_list.36', 'stage2.4.rbr_1x1': 'module_list.37', 'stage2.4.rbr_identity': 'module_list.38', 58 | 'stage2.5.rbr_dense': 'module_list.40', 'stage2.5.rbr_1x1': 'module_list.41', 'stage2.5.rbr_identity': 'module_list.42', 59 | 'stage3.0.rbr_dense': 'module_list.44', 'stage3.0.rbr_1x1': 'module_list.45', 60 | 'stage3.1.rbr_dense': 'module_list.48', 'stage3.1.rbr_1x1': 'module_list.49', 'stage3.1.rbr_identity': 'module_list.50', 61 | 'stage3.2.rbr_dense': 'module_list.52', 'stage3.2.rbr_1x1': 'module_list.53', 'stage3.2.rbr_identity': 'module_list.54', 62 | 'stage3.3.rbr_dense': 'module_list.56', 'stage3.3.rbr_1x1': 'module_list.57', 'stage3.3.rbr_identity': 'module_list.58', 63 | 'stage3.4.rbr_dense': 'module_list.60', 'stage3.4.rbr_1x1': 'module_list.61', 'stage3.4.rbr_identity': 'module_list.62', 64 | 'stage3.5.rbr_dense': 'module_list.64', 'stage3.5.rbr_1x1': 'module_list.65', 'stage3.5.rbr_identity': 'module_list.66', 65 | 'stage3.6.rbr_dense': 'module_list.68', 'stage3.6.rbr_1x1': 'module_list.69', 'stage3.6.rbr_identity': 'module_list.70', 66 | 'stage3.7.rbr_dense': 'module_list.72', 'stage3.7.rbr_1x1': 'module_list.73', 'stage3.7.rbr_identity': 'module_list.74', 67 | 'stage3.8.rbr_dense': 'module_list.76', 'stage3.8.rbr_1x1': 'module_list.77', 'stage3.8.rbr_identity': 'module_list.78', 68 | 'stage3.9.rbr_dense': 'module_list.80', 'stage3.9.rbr_1x1': 'module_list.81', 'stage3.9.rbr_identity': 'module_list.82', 69 | 'stage3.10.rbr_dense': 'module_list.84', 'stage3.10.rbr_1x1': 'module_list.85', 'stage3.10.rbr_identity': 'module_list.86', 70 | 'stage3.11.rbr_dense': 'module_list.88', 'stage3.11.rbr_1x1': 'module_list.89', 'stage3.11.rbr_identity': 'module_list.90', 71 | 'stage3.12.rbr_dense': 'module_list.92', 'stage3.12.rbr_1x1': 'module_list.93', 'stage3.12.rbr_identity': 'module_list.94', 72 | 'stage3.13.rbr_dense': 'module_list.96', 'stage3.13.rbr_1x1': 'module_list.97', 'stage3.13.rbr_identity': 'module_list.98', 73 | 'stage3.14.rbr_dense': 'module_list.100', 'stage3.14.rbr_1x1': 'module_list.101', 'stage3.14.rbr_identity': 'module_list.102', 74 | 'stage3.15.rbr_dense': 'module_list.104', 'stage3.15.rbr_1x1': 'module_list.105', 'stage3.15.rbr_identity': 'module_list.106', 75 | 'stage4.0.rbr_dense': 'module_list.108', 'stage4.0.rbr_1x1': 'module_list.109'} 76 | 77 | # for rep_name in use_dict_old: 78 | # # yolo_name = k.replace(rep_name,use_dict[rep_name]) 79 | # yolo_name = use_dict_old[rep_name].split('.')[0]+'.'+str(int(use_dict_old[rep_name].split('.')[-1])-1) 80 | # use_dict[rep_name]=yolo_name 81 | 82 | # print(use_dict) 83 | 84 | 85 | def get_equivalent_kernel_bias2(weight): 86 | kernel3x3, bias3x3 = fuse_bn_tensor(weight[0:6]) 87 | kernel1x1, bias1x1 = fuse_bn_tensor(weight[6:]) 88 | return [kernel3x3 + pad_1x1_to_3x3_tensor(kernel1x1), bias3x3 + bias1x1] 89 | 90 | def get_equivalent_kernel_bias3(weight): 91 | kernel3x3, bias3x3 = fuse_bn_tensor(weight[0:6]) 92 | kernel1x1, bias1x1 = fuse_bn_tensor(weight[6:12]) 93 | kernelid, biasid = fuse_bn_tensor_bn(weight[12:]) 94 | return [kernel3x3 + pad_1x1_to_3x3_tensor(kernel1x1) + kernelid, bias3x3 + bias1x1 + biasid] 95 | 96 | def pad_1x1_to_3x3_tensor(kernel1x1): 97 | if kernel1x1 is None: 98 | return 0 99 | else: 100 | return torch.nn.functional.pad(kernel1x1, [1,1,1,1]) 101 | 102 | def fuse_bn_tensor(branch): 103 | kernel = branch[0] 104 | gamma = branch[1] 105 | beta = branch[2] 106 | running_mean = branch[3] 107 | running_var = branch[4] 108 | eps = 1e-05 109 | 110 | std = (running_var + eps).sqrt() 111 | t = (gamma / std).reshape(-1, 1, 1, 1) 112 | return kernel * t, beta - running_mean * gamma / std 113 | 114 | def fuse_bn_tensor_bn(branch): 115 | input_dim = list(branch[0].size())[0] 116 | kernel_value = np.zeros((input_dim, input_dim, 3, 3), dtype=np.float32) 117 | for i in range(input_dim): 118 | kernel_value[i, i % input_dim, 1, 1] = 1 119 | id_tensor = torch.from_numpy(kernel_value).to(branch[0].device) 120 | kernel = id_tensor 121 | gamma = branch[0] 122 | beta = branch[1] 123 | running_mean = branch[2] 124 | running_var = branch[3] 125 | eps = 1e-05 126 | 127 | std = (running_var + eps).sqrt() 128 | t = (gamma / std).reshape(-1, 1, 1, 1) 129 | return kernel * t, beta - running_mean * gamma / std 130 | 131 | # def repvgg_convert(): 132 | # kernel, bias = self.get_equivalent_kernel_bias() 133 | # return kernel.detach().cpu().numpy(), bias.detach().cpu().numpy() 134 | 135 | def main(): 136 | device = torch_utils.select_device('2') 137 | # cfg = 'cfg/yolov3-repvggB0-hand.cfg' 138 | cfg = 'cfg/yolov3-repvggB1-hand.cfg' 139 | img_size=416 140 | # weights = 'weights_repvgg/B0/best.pt' 141 | # weights = 'weights_repvgg/B0/last.pt' 142 | weights = 'weights_repvgg/B1/last.pt' 143 | model = Darknet(cfg, img_size).to(device) 144 | ck = torch.load(weights, map_location=device) 145 | if 'model' in ck: 146 | model_ = ck['model'] 147 | else: 148 | model_ = ck 149 | # print(type(list(model_.items())[0][1])) 150 | convert_dict={} 151 | tmp2=[] 152 | tmp3=[] 153 | for k,v in model_.items(): 154 | ik = int(k.split('.')[1]) 155 | i = 2 * (int(k.split('.')[1]) // 4) 156 | 157 | if i==0 or i==2 or i==10 or i==22 or i==54: 158 | tmp2.append(v) 159 | if len(tmp2)==2*6: 160 | w,b = get_equivalent_kernel_bias2(tmp2) 161 | convert_dict['module_list.'+str(i)+'.conv.weight']=w 162 | convert_dict['module_list.'+str(i)+'.conv.bias']=b 163 | tmp2=[] 164 | elif i<=55: 165 | tmp3.append(v) 166 | if len(tmp3)==3*6-1: 167 | w,b = get_equivalent_kernel_bias3(tmp3) 168 | convert_dict['module_list.'+str(i)+'.conv.weight']=w 169 | convert_dict['module_list.'+str(i)+'.conv.bias']=b 170 | tmp3=[] 171 | elif i>55: 172 | convert_dict[k.replace(str(ik),str(ik-56))]=v 173 | 174 | # torch.save(convert_dict,'repB0_convert_last.pt') 175 | torch.save(convert_dict,'repB1_convert_last.pt') 176 | 177 | main() -------------------------------------------------------------------------------- /data/converter.py: -------------------------------------------------------------------------------- 1 | import scipy.io as sio 2 | from PIL import Image 3 | import os, glob 4 | import datetime 5 | import shutil 6 | 7 | running_from_path = os.getcwd() 8 | created_images_dir = 'images' 9 | created_labels_dir = 'labels' 10 | data_dir = 'data' # data_dir为脚本所在的文件夹 11 | 12 | def hms_string(sec_elapsed): # 格式化显示已消耗时间 13 | h = int(sec_elapsed / (60 * 60)) 14 | m = int((sec_elapsed % (60 * 60)) / 60) 15 | s = sec_elapsed % 60. 16 | return "{}:{:>02}:{:>05.2f}".format(h, m, s) 17 | 18 | def generate_dir(set_name, root_path): # 往images和labels文件夹下生成相应的文件夹 19 | images_dir = os.path.join(root_path, 'images') 20 | annotation_dir = os.path.join(root_path, 'annotations') 21 | 22 | new_images_dir = os.path.join(created_images_dir, set_name) # 将图片从原来的文件夹复制到该文件夹下 23 | new_annotation_dir = os.path.join(created_labels_dir, set_name) 24 | 25 | if not os.path.exists(new_images_dir): 26 | os.makedirs(new_images_dir) 27 | 28 | if not os.path.exists(new_annotation_dir): 29 | os.makedirs(new_annotation_dir) 30 | 31 | for img in glob.glob(os.path.join(images_dir, "*.jpg")): # 将图片从原来的文件夹复制到新文件夹下 32 | shutil.copy(img, new_images_dir) 33 | 34 | os.chdir(annotation_dir) # 切换到annotation的路径下 35 | matlab_annotations = glob.glob("*.mat") # 仅仅包含文件名,不包含路径 36 | os.chdir(running_from_path) # 切换回原来的路径 37 | 38 | for matfile in matlab_annotations: 39 | filename = matfile.split(".")[0] 40 | 41 | pil_image = Image.open(os.path.join(images_dir, filename+".jpg")) 42 | 43 | content = sio.loadmat(os.path.join(annotation_dir, matfile), matlab_compatible=False) 44 | 45 | boxes = content["boxes"] 46 | 47 | width, height = pil_image.size 48 | 49 | with open(os.path.join(new_annotation_dir, filename+".txt"), "w") as hs: 50 | for box_idx, box in enumerate(boxes.T): 51 | a = box[0][0][0][0] 52 | b = box[0][0][0][1] 53 | c = box[0][0][0][2] 54 | d = box[0][0][0][3] 55 | 56 | aXY = (a[0][1], a[0][0]) 57 | bXY = (b[0][1], b[0][0]) 58 | cXY = (c[0][1], c[0][0]) 59 | dXY = (d[0][1], d[0][0]) 60 | 61 | maxX = max(aXY[0], bXY[0], cXY[0], dXY[0]) 62 | minX = min(aXY[0], bXY[0], cXY[0], dXY[0]) 63 | maxY = max(aXY[1], bXY[1], cXY[1], dXY[1]) 64 | minY = min(aXY[1], bXY[1], cXY[1], dXY[1]) 65 | 66 | # clip,防止超出边界 67 | maxX = min(maxX, width-1) 68 | minX = max(minX, 0) 69 | maxY = min(maxY, height-1) 70 | minY = max(minY, 0) 71 | 72 | # ( / ) 73 | norm_width = (maxX - minX) / width 74 | 75 | # ( / ) 76 | norm_height = (maxY - minY) / height 77 | 78 | center_x, center_y = (maxX + minX) / 2, (maxY + minY) / 2 79 | 80 | norm_center_x = center_x / width 81 | norm_center_y = center_y / height 82 | 83 | if box_idx != 0: 84 | hs.write("\n") 85 | 86 | hs.write("0 %f %f %f %f" % (norm_center_x, norm_center_y, norm_width, norm_height)) # 0表示类别 87 | 88 | def create_txt(dirlist, filename): 89 | with open(filename, "w") as txtfile: # 在data文件夹下生成txt文件 90 | imglist = [] 91 | 92 | for dir in dirlist: # dir='images/test' 93 | imglist.extend(glob.glob(os.path.join(dir, "*.jpg"))) # img='images/test/abc.jpg' 94 | 95 | for idx, img in enumerate(imglist): 96 | if idx != 0: 97 | txtfile.write("\n") 98 | txtfile.write(os.path.join(data_dir, img)) # 加上前缀data 99 | 100 | if __name__ == '__main__': 101 | start_time = datetime.datetime.now() 102 | 103 | generate_dir("train", "hand_dataset/training_dataset/training_data") # 第一个参数表示生成的文件夹的名称 104 | generate_dir("test", "hand_dataset/test_dataset/test_data") 105 | generate_dir("validation", "hand_dataset/validation_dataset/validation_data") 106 | 107 | create_txt((os.path.join(created_images_dir, 'train'), # 将train和validation文件夹下的图片合并成train 108 | os.path.join(created_images_dir, 'validation')), 109 | 'train.txt') 110 | create_txt((os.path.join(created_images_dir, 'test'), ), 111 | 'valid.txt') 112 | 113 | end_time = datetime.datetime.now() 114 | seconds_elapsed = (end_time - start_time).total_seconds() 115 | print("It took {} to execute this".format(hms_string(seconds_elapsed))) -------------------------------------------------------------------------------- /data/oxfordhand.data: -------------------------------------------------------------------------------- 1 | classes= 1 2 | train=data/train.txt 3 | valid=data/valid.txt 4 | names=data/oxfordhand.names 5 | -------------------------------------------------------------------------------- /data/oxfordhand.names: -------------------------------------------------------------------------------- 1 | hand 2 | 3 | -------------------------------------------------------------------------------- /detect.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from sys import platform 3 | 4 | from models import * # set ONNX_EXPORT in models.py 5 | from utils.datasets import * 6 | from utils.utils import * 7 | 8 | 9 | def detect(save_txt=False, save_img=False): 10 | img_size = (320, 192) if ONNX_EXPORT else opt.img_size # (320, 192) or (416, 256) or (608, 352) for (height, width) 11 | out, source, weights, half, view_img = opt.output, opt.source, opt.weights, opt.half, opt.view_img 12 | webcam = source == '0' or source.startswith('rtsp') or source.startswith('http') or source.endswith('.txt') 13 | 14 | # Initialize 15 | device = torch_utils.select_device(device='cpu' if ONNX_EXPORT else opt.device) 16 | if os.path.exists(out): 17 | shutil.rmtree(out) # delete output folder 18 | os.makedirs(out) # make new output folder 19 | 20 | # Initialize model 21 | model = Darknet(opt.cfg, img_size) 22 | 23 | # Load weights 24 | attempt_download(weights) 25 | if weights.endswith('.pt'): # pytorch format 26 | model.load_state_dict(torch.load(weights, map_location=device)['model']) 27 | else: # darknet format 28 | _ = load_darknet_weights(model, weights) 29 | 30 | # Second-stage classifier 31 | classify = False 32 | if classify: 33 | modelc = torch_utils.load_classifier(name='resnet101', n=2) # initialize 34 | modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights 35 | modelc.to(device).eval() 36 | 37 | # Fuse Conv2d + BatchNorm2d layers 38 | # model.fuse() 39 | 40 | # Eval mode 41 | model.to(device).eval() 42 | 43 | # Export mode 44 | if ONNX_EXPORT: 45 | img = torch.zeros((1, 3) + img_size) # (1, 3, 320, 192) 46 | torch.onnx.export(model, img, 'weights/export.onnx', verbose=True) 47 | return 48 | 49 | # Half precision 50 | half = half and device.type != 'cpu' # half precision only supported on CUDA 51 | if half: 52 | model.half() 53 | 54 | # Set Dataloader 55 | vid_path, vid_writer = None, None 56 | if webcam: 57 | view_img = True 58 | torch.backends.cudnn.benchmark = True # set True to speed up constant image size inference 59 | dataset = LoadStreams(source, img_size=img_size, half=half) 60 | else: 61 | save_img = True 62 | dataset = LoadImages(source, img_size=img_size, half=half) 63 | 64 | # Get classes and colors 65 | classes = load_classes(parse_data_cfg(opt.data)['names']) 66 | colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(classes))] 67 | 68 | # Run inference 69 | t0 = time.time() 70 | for path, img, im0s, vid_cap in dataset: 71 | t = time.time() 72 | 73 | # Get detections 74 | img = torch.from_numpy(img).to(device) 75 | if img.ndimension() == 3: 76 | img = img.unsqueeze(0) 77 | pred = model(img)[0] 78 | 79 | if opt.half: 80 | pred = pred.float() 81 | 82 | # Apply NMS 83 | pred = non_max_suppression(pred, opt.conf_thres, opt.nms_thres) 84 | 85 | # Apply 86 | if classify: 87 | pred = apply_classifier(pred, modelc, img, im0s) 88 | 89 | # Process detections 90 | for i, det in enumerate(pred): # detections per image 91 | if webcam: # batch_size >= 1 92 | p, s, im0 = path[i], '%g: ' % i, im0s[i] 93 | else: 94 | p, s, im0 = path, '', im0s 95 | 96 | save_path = str(Path(out) / Path(p).name) 97 | s += '%gx%g ' % img.shape[2:] # print string 98 | if det is not None and len(det): 99 | # Rescale boxes from img_size to im0 size 100 | det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() 101 | 102 | # Print results 103 | for c in det[:, -1].unique(): 104 | n = (det[:, -1] == c).sum() # detections per class 105 | s += '%g %ss, ' % (n, classes[int(c)]) # add to string 106 | 107 | # Write results 108 | for *xyxy, conf, _, cls in det: 109 | if save_txt: # Write to file 110 | with open(save_path + '.txt', 'a') as file: 111 | file.write(('%g ' * 6 + '\n') % (*xyxy, cls, conf)) 112 | 113 | if save_img or view_img: # Add bbox to image 114 | label = '%s %.2f' % (classes[int(cls)], conf) 115 | #plot_one_box(xyxy, im0, label=label, color=colors[int(cls)]) 116 | plot_one_box(xyxy, im0, label=None, color=colors[int(cls)]) 117 | 118 | print('%sDone. (%.3fs)' % (s, time.time() - t)) 119 | 120 | # Stream results 121 | if view_img: 122 | cv2.imshow(p, im0) 123 | 124 | # Save results (image with detections) 125 | if save_img: 126 | if dataset.mode == 'images': 127 | cv2.imwrite(save_path, im0) 128 | else: 129 | if vid_path != save_path: # new video 130 | vid_path = save_path 131 | if isinstance(vid_writer, cv2.VideoWriter): 132 | vid_writer.release() # release previous video writer 133 | 134 | fps = vid_cap.get(cv2.CAP_PROP_FPS) 135 | w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) 136 | h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) 137 | vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h)) 138 | vid_writer.write(im0) 139 | 140 | if save_txt or save_img: 141 | print('Results saved to %s' % os.getcwd() + os.sep + out) 142 | if platform == 'darwin': # MacOS 143 | os.system('open ' + out + ' ' + save_path) 144 | 145 | print('Done. (%.3fs)' % (time.time() - t0)) 146 | 147 | 148 | if __name__ == '__main__': 149 | parser = argparse.ArgumentParser() 150 | parser.add_argument('--cfg', type=str, default='cfg/yolov3-spp.cfg', help='cfg file path') 151 | parser.add_argument('--data', type=str, default='data/coco.data', help='coco.data file path') 152 | parser.add_argument('--weights', type=str, default='weights/yolov3-spp.weights', help='path to weights file') 153 | parser.add_argument('--source', type=str, default='data/samples', help='source') # input file/folder, 0 for webcam 154 | parser.add_argument('--output', type=str, default='output', help='output folder') # output folder 155 | parser.add_argument('--img-size', type=int, default=416, help='inference size (pixels)') 156 | parser.add_argument('--conf-thres', type=float, default=0.3, help='object confidence threshold') 157 | parser.add_argument('--nms-thres', type=float, default=0.5, help='iou threshold for non-maximum suppression') 158 | parser.add_argument('--fourcc', type=str, default='mp4v', help='output video codec (verify ffmpeg support)') 159 | parser.add_argument('--half', action='store_true', help='half precision FP16 inference') 160 | parser.add_argument('--device', default='', help='device id (i.e. 0 or 0,1) or cpu') 161 | parser.add_argument('--view-img', action='store_true', help='display results') 162 | opt = parser.parse_args() 163 | print(opt) 164 | 165 | with torch.no_grad(): 166 | detect() 167 | -------------------------------------------------------------------------------- /models.py: -------------------------------------------------------------------------------- 1 | import torch.nn.functional as F 2 | 3 | from utils.google_utils import * 4 | from utils.parse_config import * 5 | from utils.utils import * 6 | import copy 7 | import os 8 | ONNX_EXPORT = False 9 | 10 | from quant_dorefa import QuanConv as Conv_q 11 | 12 | 13 | #权重量化为W_bit位 14 | W_bit=16 15 | #激活量化为A_bit位 16 | A_bit=16 17 | 18 | def conv_bn(in_channels, out_channels, kernel_size, stride, padding, groups=1): 19 | result = nn.Sequential() 20 | result.add_module('conv', nn.Conv2d(in_channels=in_channels, out_channels=out_channels, 21 | kernel_size=kernel_size, stride=stride, padding=padding, groups=groups, bias=False)) 22 | result.add_module('bn', nn.BatchNorm2d(num_features=out_channels)) 23 | return result 24 | 25 | def create_modules(module_defs, img_size, arc, deploy): 26 | # Constructs module list of layer blocks from module configuration in module_defs 27 | 28 | hyperparams = module_defs.pop(0) 29 | output_filters = [int(hyperparams['channels'])] 30 | module_list = nn.ModuleList() 31 | routs = [] # list of layers which rout to deeper layes 32 | yolo_index = -1 33 | 34 | for i, mdef in enumerate(module_defs): 35 | modules = nn.Sequential() 36 | 37 | if mdef['type'] == 'quantize_convolutional': 38 | bn = int(mdef['batch_normalize']) 39 | filters = int(mdef['filters']) 40 | kernel_size = int(mdef['size']) 41 | pad = (kernel_size - 1) // 2 if int(mdef['pad']) else 0 42 | modules.add_module('Conv2d', Conv_q(in_channels=output_filters[-1], 43 | out_channels=filters, 44 | kernel_size=kernel_size, 45 | stride=int(mdef['stride']), 46 | padding=pad, 47 | bias=not bn, 48 | nbit_w=W_bit, 49 | nbit_a=A_bit)) 50 | 51 | 52 | if bn: 53 | modules.add_module('BatchNorm2d', nn.BatchNorm2d(filters, momentum=0.1)) 54 | if mdef['activation'] == 'leaky': # TODO: activation study https://github.com/ultralytics/yolov3/issues/441 55 | modules.add_module('activation', nn.LeakyReLU(0.1, inplace=True)) 56 | # modules.add_module('activation', nn.PReLU(num_parameters=1, init=0.10)) 57 | # modules.add_module('activation', Swish()) 58 | 59 | elif mdef['type'] == 'convolutional': 60 | bn = int(mdef['batch_normalize']) 61 | filters = int(mdef['filters']) 62 | kernel_size = int(mdef['size']) 63 | pad = (kernel_size - 1) // 2 if int(mdef['pad']) else 0 64 | modules.add_module('Conv2d', nn.Conv2d(in_channels=output_filters[-1], 65 | out_channels=filters, 66 | kernel_size=kernel_size, 67 | stride=int(mdef['stride']), 68 | padding=pad, 69 | bias=not bn)) 70 | 71 | 72 | if bn: 73 | modules.add_module('BatchNorm2d', nn.BatchNorm2d(filters, momentum=0.1)) 74 | if mdef['activation'] == 'leaky': # TODO: activation study https://github.com/ultralytics/yolov3/issues/441 75 | modules.add_module('activation', nn.LeakyReLU(0.1, inplace=True)) 76 | 77 | elif mdef['type'] == 'RepvggBlock': 78 | if deploy==False: 79 | modules_repvgg = nn.ModuleList() 80 | bn = int(mdef['batch_normalize']) 81 | filters = int(mdef['filters']) 82 | kernel_size = int(mdef['size']) 83 | stride=int(mdef['stride']) 84 | pad = int(mdef['pad']) 85 | dense = conv_bn(in_channels=output_filters[-1],out_channels=filters,kernel_size=kernel_size,stride=stride,padding=pad) 86 | identy = nn.BatchNorm2d(num_features=filters) if output_filters[-1]==filters and stride==1 else None 87 | conv1x1 = conv_bn(in_channels=output_filters[-1],out_channels=filters,kernel_size=1,stride=stride,padding=0) 88 | modules_repvgg.append(dense) 89 | modules_repvgg.append(conv1x1) 90 | modules_repvgg.append(identy) 91 | if mdef['activation'] == 'relu': 92 | modules_repvgg.append(nn.ReLU()) 93 | module_list.extend(modules_repvgg) 94 | output_filters.append(filters) 95 | continue 96 | else: 97 | bn = int(mdef['batch_normalize']) 98 | filters = int(mdef['filters']) 99 | kernel_size = int(mdef['size']) 100 | stride=int(mdef['stride']) 101 | pad = int(mdef['pad']) 102 | modules.add_module('conv', nn.Conv2d(in_channels=output_filters[-1], 103 | out_channels=filters, 104 | kernel_size=kernel_size, 105 | stride=stride, 106 | padding=pad, 107 | bias=True)) 108 | module_list.append(modules) 109 | if mdef['activation'] == 'relu': # TODO: activation study https://github.com/ultralytics/yolov3/issues/441 110 | module_list.extend(nn.ModuleList().append(nn.ReLU())) 111 | output_filters.append(filters) 112 | continue 113 | 114 | elif mdef['type'] == 'maxpool': 115 | kernel_size = int(mdef['size']) 116 | stride = int(mdef['stride']) 117 | maxpool = nn.MaxPool2d(kernel_size=kernel_size, stride=stride, padding=int((kernel_size - 1) // 2)) 118 | if kernel_size == 2 and stride == 1: # yolov3-tiny 119 | modules.add_module('ZeroPad2d', nn.ZeroPad2d((0, 1, 0, 1))) 120 | modules.add_module('MaxPool2d', maxpool) 121 | else: 122 | modules = maxpool 123 | 124 | elif mdef['type'] == 'upsample': 125 | modules = nn.Upsample(scale_factor=int(mdef['stride']), mode='nearest') 126 | 127 | elif mdef['type'] == 'route': # nn.Sequential() placeholder for 'route' layer 128 | layers = [int(x) for x in mdef['layers'].split(',')] 129 | filters = sum([output_filters[i + 1 if i > 0 else i] for i in layers]) 130 | routs.extend([l if l > 0 else l + i for l in layers]) 131 | # if mdef[i+1]['type'] == 'reorg3d': 132 | # modules = nn.Upsample(scale_factor=1/float(mdef[i+1]['stride']), mode='nearest') # reorg3d 133 | 134 | elif mdef['type'] == 'shortcut': # nn.Sequential() placeholder for 'shortcut' layer 135 | filters = output_filters[int(mdef['from'])] 136 | layer = int(mdef['from']) 137 | routs.extend([i + layer if layer < 0 else layer]) 138 | 139 | elif mdef['type'] == 'reorg3d': # yolov3-spp-pan-scale 140 | # torch.Size([16, 128, 104, 104]) 141 | # torch.Size([16, 64, 208, 208]) <-- # stride 2 interpolate dimensions 2 and 3 to cat with prior layer 142 | pass 143 | 144 | elif mdef['type'] == 'yolo': 145 | yolo_index += 1 146 | mask = [int(x) for x in mdef['mask'].split(',')] # anchor mask 147 | modules = YOLOLayer(anchors=mdef['anchors'][mask], # anchor list 148 | nc=int(mdef['classes']), # number of classes 149 | img_size=img_size, # (416, 416) 150 | yolo_index=yolo_index, # 0, 1 or 2 151 | arc=arc) # yolo architecture 152 | 153 | # Initialize preceding Conv2d() bias (https://arxiv.org/pdf/1708.02002.pdf section 3.3) 154 | try: 155 | if arc == 'defaultpw' or arc == 'Fdefaultpw': # default with positive weights 156 | b = [-4, -3.6] # obj, cls 157 | elif arc == 'default': # default no pw (40 cls, 80 obj) 158 | b = [-5.5, -4.0] 159 | elif arc == 'uBCE': # unified BCE (80 classes) 160 | b = [0, -8.5] 161 | elif arc == 'uCE': # unified CE (1 background + 80 classes) 162 | b = [10, -0.1] 163 | elif arc == 'Fdefault': # Focal default no pw (28 cls, 21 obj, no pw) 164 | b = [-2.1, -1.8] 165 | elif arc == 'uFBCE' or arc == 'uFBCEpw': # unified FocalBCE (5120 obj, 80 classes) 166 | b = [0, -6.5] 167 | elif arc == 'uFCE': # unified FocalCE (64 cls, 1 background + 80 classes) 168 | b = [7.7, -1.1] 169 | 170 | bias = module_list[-1][0].bias.view(len(mask), -1) # 255 to 3x85 171 | bias[:, 4] += b[0] - bias[:, 4].mean() # obj 172 | bias[:, 5:] += b[1] - bias[:, 5:].mean() # cls 173 | # bias = torch.load('weights/yolov3-spp.bias.pt')[yolo_index] # list of tensors [3x85, 3x85, 3x85] 174 | module_list[-1][0].bias = torch.nn.Parameter(bias.view(-1)) 175 | # utils.print_model_biases(model) 176 | except: 177 | print('WARNING: smart bias initialization failure.') 178 | 179 | else: 180 | print('Warning: Unrecognized Layer Type: ' + mdef['type']) 181 | 182 | # Register module list and number of output filters 183 | module_list.append(modules) 184 | output_filters.append(filters) 185 | 186 | return module_list, routs 187 | 188 | 189 | class Swish(nn.Module): 190 | def __init__(self): 191 | super(Swish, self).__init__() 192 | 193 | def forward(self, x): 194 | return x * torch.sigmoid(x) 195 | 196 | 197 | class YOLOLayer(nn.Module): 198 | def __init__(self, anchors, nc, img_size, yolo_index, arc): 199 | super(YOLOLayer, self).__init__() 200 | 201 | self.anchors = torch.Tensor(anchors) 202 | self.na = len(anchors) # number of anchors (3) 203 | self.nc = nc # number of classes (80) 204 | self.nx = 0 # initialize number of x gridpoints 205 | self.ny = 0 # initialize number of y gridpoints 206 | self.arc = arc 207 | 208 | if ONNX_EXPORT: # grids must be computed in __init__ 209 | stride = [32, 16, 8][yolo_index] # stride of this layer 210 | nx = int(img_size[1] / stride) # number x grid points 211 | ny = int(img_size[0] / stride) # number y grid points 212 | create_grids(self, img_size, (nx, ny)) 213 | 214 | def forward(self, p, img_size, var=None): 215 | if ONNX_EXPORT: 216 | bs = 1 # batch size 217 | else: 218 | bs, ny, nx = p.shape[0], p.shape[-2], p.shape[-1] 219 | if (self.nx, self.ny) != (nx, ny): 220 | create_grids(self, img_size, (nx, ny), p.device, p.dtype) 221 | 222 | # p.view(bs, 255, 13, 13) -- > (bs, 3, 13, 13, 85) # (bs, anchors, grid, grid, classes + xywh) 223 | p = p.view(bs, self.na, self.nc + 5, self.ny, self.nx).permute(0, 1, 3, 4, 2).contiguous() # prediction 224 | 225 | if self.training: 226 | return p 227 | 228 | elif ONNX_EXPORT: 229 | # Constants CAN NOT BE BROADCAST, ensure correct shape! 230 | ngu = self.ng.repeat((1, self.na * self.nx * self.ny, 1)) 231 | grid_xy = self.grid_xy.repeat((1, self.na, 1, 1, 1)).view((1, -1, 2)) 232 | anchor_wh = self.anchor_wh.repeat((1, 1, self.nx, self.ny, 1)).view((1, -1, 2)) / ngu 233 | 234 | p = p.view(-1, 5 + self.nc) 235 | xy = torch.sigmoid(p[..., 0:2]) + grid_xy[0] # x, y 236 | wh = torch.exp(p[..., 2:4]) * anchor_wh[0] # width, height 237 | p_conf = torch.sigmoid(p[:, 4:5]) # Conf 238 | p_cls = F.softmax(p[:, 5:85], 1) * p_conf # SSD-like conf 239 | return torch.cat((xy / ngu[0], wh, p_conf, p_cls), 1).t() 240 | 241 | # p = p.view(1, -1, 5 + self.nc) 242 | # xy = torch.sigmoid(p[..., 0:2]) + grid_xy # x, y 243 | # wh = torch.exp(p[..., 2:4]) * anchor_wh # width, height 244 | # p_conf = torch.sigmoid(p[..., 4:5]) # Conf 245 | # p_cls = p[..., 5:5 + self.nc] 246 | # # Broadcasting only supported on first dimension in CoreML. See onnx-coreml/_operators.py 247 | # # p_cls = F.softmax(p_cls, 2) * p_conf # SSD-like conf 248 | # p_cls = torch.exp(p_cls).permute((2, 1, 0)) 249 | # p_cls = p_cls / p_cls.sum(0).unsqueeze(0) * p_conf.permute((2, 1, 0)) # F.softmax() equivalent 250 | # p_cls = p_cls.permute(2, 1, 0) 251 | # return torch.cat((xy / ngu, wh, p_conf, p_cls), 2).squeeze().t() 252 | 253 | else: # inference 254 | # s = 1.5 # scale_xy (pxy = pxy * s - (s - 1) / 2) 255 | io = p.clone() # inference output 256 | io[..., 0:2] = torch.sigmoid(io[..., 0:2]) + self.grid_xy # xy 257 | io[..., 2:4] = torch.exp(io[..., 2:4]) * self.anchor_wh # wh yolo method 258 | # io[..., 2:4] = ((torch.sigmoid(io[..., 2:4]) * 2) ** 3) * self.anchor_wh # wh power method 259 | io[..., :4] *= self.stride 260 | 261 | if 'default' in self.arc: # seperate obj and cls 262 | torch.sigmoid_(io[..., 4:]) 263 | elif 'BCE' in self.arc: # unified BCE (80 classes) 264 | torch.sigmoid_(io[..., 5:]) 265 | io[..., 4] = 1 266 | elif 'CE' in self.arc: # unified CE (1 background + 80 classes) 267 | io[..., 4:] = F.softmax(io[..., 4:], dim=4) 268 | io[..., 4] = 1 269 | 270 | if self.nc == 1: 271 | io[..., 5] = 1 # single-class model https://github.com/ultralytics/yolov3/issues/235 272 | 273 | # reshape from [1, 3, 13, 13, 85] to [1, 507, 85] 274 | return io.view(bs, -1, 5 + self.nc), p 275 | 276 | 277 | class Darknet(nn.Module): 278 | # YOLOv3 object detection model 279 | 280 | def __init__(self, cfg, img_size=(416, 416), arc='default', deploy=False): 281 | #我的添加 282 | super(Darknet, self).__init__() 283 | if isinstance(cfg, str): 284 | self.module_defs = parse_model_cfg(cfg) 285 | elif isinstance(cfg, list): 286 | self.module_defs = cfg 287 | 288 | self.hyperparams=copy.deepcopy(self.module_defs[0]) 289 | self.deploy = deploy 290 | 291 | self.module_list, self.routs = create_modules(self.module_defs, img_size, arc, deploy=self.deploy) 292 | # print(self.module_list) 293 | # print(self.routs) 294 | self.yolo_layers = get_yolo_layers(self) 295 | 296 | # Darknet Header https://github.com/AlexeyAB/darknet/issues/2914#issuecomment-496675346 297 | self.version = np.array([0, 2, 5], dtype=np.int32) # (int32) version info: major, minor, revision 298 | self.seen = np.array([0], dtype=np.int64) # (int64) number of images seen during training 299 | 300 | def forward(self, x, var=None): 301 | img_size = x.shape[-2:] 302 | layer_outputs = [] 303 | output = [] 304 | 305 | # print(self.module_defs) 306 | # print(self.module_list) 307 | # for i, (mdef, module) in enumerate(zip(self.module_defs, self.module_list)): 308 | i = 0 309 | for mdef in self.module_defs: 310 | # module = self.module_list[i] 311 | mtype = mdef['type'] 312 | if mtype in ['convolutional', 'quantize_convolutional','upsample', 'maxpool']: 313 | module = self.module_list[i] 314 | # print(module) 315 | x = module(x) 316 | elif mtype == 'RepvggBlock': 317 | if self.deploy==False: 318 | # print(i) 319 | module=[self.module_list[i],self.module_list[i+1],self.module_list[i+2],self.module_list[i+3]] 320 | if module[2] is None: 321 | id_out=0 322 | else: 323 | id_out=module[2](x) 324 | x = module[3](module[0](x) + module[1](x) + id_out) 325 | # layer_outputs.append(x if int((i-1)/4+1) in self.routs else []) ### 326 | layer_outputs.append(x if int(i/4) in self.routs else []) 327 | i = i + 4 328 | continue 329 | else: 330 | module = self.module_list[i] 331 | x = module(x) 332 | module = self.module_list[i+1] 333 | x = module(x) 334 | layer_outputs.append(x if int(i/2) in self.routs else []) 335 | i = i + 2 336 | continue 337 | elif mtype == 'route': 338 | module = self.module_list[i] 339 | # print(module) 340 | # print(i) 341 | layers = [int(x) for x in mdef['layers'].split(',')] 342 | if len(layers) == 1: 343 | x = layer_outputs[layers[0]] 344 | else: 345 | try: 346 | x = torch.cat([layer_outputs[i] for i in layers], 1) 347 | except: # apply stride 2 for darknet reorg layer 348 | layer_outputs[layers[1]] = F.interpolate(layer_outputs[layers[1]], scale_factor=[0.5, 0.5]) 349 | x = torch.cat([layer_outputs[i] for i in layers], 1) 350 | # print(''), [print(layer_outputs[i].shape) for i in layers], print(x.shape) 351 | elif mtype == 'shortcut': 352 | module = self.module_list[i] 353 | x = x + layer_outputs[int(mdef['from'])] 354 | elif mtype == 'yolo': 355 | module = self.module_list[i] 356 | x = module(x, img_size) 357 | output.append(x) 358 | 359 | if self.deploy==False: 360 | layer_outputs.append(x if i+1-113+28 in self.routs else []) ### 361 | else: 362 | layer_outputs.append(x if i-28 in self.routs else []) 363 | i = i + 1 364 | 365 | if self.training: 366 | return output 367 | elif ONNX_EXPORT: 368 | output = torch.cat(output, 1) # cat 3 layers 85 x (507, 2028, 8112) to 85 x 10647 369 | nc = self.module_list[self.yolo_layers[0]].nc # number of classes 370 | return output[5:5 + nc].t(), output[:4].t() # ONNX scores, boxes 371 | else: 372 | io, p = list(zip(*output)) # inference output, training output 373 | return torch.cat(io, 1), p 374 | 375 | def fuse(self): 376 | # Fuse Conv2d + BatchNorm2d layers throughout model 377 | fused_list = nn.ModuleList() 378 | for a in list(self.children())[0]: 379 | if isinstance(a, nn.Sequential): 380 | for i, b in enumerate(a): 381 | if isinstance(b, nn.modules.batchnorm.BatchNorm2d): 382 | # fuse this bn layer with the previous conv2d layer 383 | conv = a[i - 1] 384 | fused = torch_utils.fuse_conv_and_bn(conv, b) 385 | a = nn.Sequential(fused, *list(a.children())[i + 1:]) 386 | break 387 | fused_list.append(a) 388 | self.module_list = fused_list 389 | # model_info(self) # yolov3-spp reduced from 225 to 152 layers 390 | 391 | 392 | def get_yolo_layers(model): 393 | return [i for i, x in enumerate(model.module_defs) if x['type'] == 'yolo'] # [82, 94, 106] for yolov3 394 | 395 | 396 | def create_grids(self, img_size=416, ng=(13, 13), device='cpu', type=torch.float32): 397 | nx, ny = ng # x and y grid size 398 | self.img_size = max(img_size) 399 | self.stride = self.img_size / max(ng) 400 | 401 | # build xy offsets 402 | yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)]) 403 | self.grid_xy = torch.stack((xv, yv), 2).to(device).type(type).view((1, 1, ny, nx, 2)) 404 | 405 | # build wh gains 406 | self.anchor_vec = self.anchors.to(device) / self.stride 407 | self.anchor_wh = self.anchor_vec.view(1, self.na, 1, 1, 2).to(device).type(type) 408 | self.ng = torch.Tensor(ng).to(device) 409 | self.nx = nx 410 | self.ny = ny 411 | 412 | 413 | def load_darknet_weights(self, weights, cutoff=-1): 414 | # Parses and loads the weights stored in 'weights' 415 | 416 | # Establish cutoffs (load layers between 0 and cutoff. if cutoff = -1 all are loaded) 417 | file = Path(weights).name 418 | if file == 'darknet53.conv.74': 419 | cutoff = 75 420 | elif file == 'yolov3-tiny.conv.15': 421 | cutoff = 15 422 | 423 | # Read weights file 424 | with open(weights, 'rb') as f: 425 | # Read Header https://github.com/AlexeyAB/darknet/issues/2914#issuecomment-496675346 426 | self.version = np.fromfile(f, dtype=np.int32, count=3) # (int32) version info: major, minor, revision 427 | self.seen = np.fromfile(f, dtype=np.int64, count=1) # (int64) number of images seen during training 428 | 429 | weights = np.fromfile(f, dtype=np.float32) # The rest are weights 430 | 431 | ptr = 0 432 | for i, (mdef, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])): 433 | if mdef['type'] == 'convolutional' or mdef['type'] == 'quantize_convolutional': 434 | conv_layer = module[0] 435 | if mdef['batch_normalize']=='1': 436 | # Load BN bias, weights, running mean and running variance 437 | bn_layer = module[1] 438 | num_b = bn_layer.bias.numel() # Number of biases 439 | # Bias 440 | bn_b = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.bias) 441 | bn_layer.bias.data.copy_(bn_b) 442 | ptr += num_b 443 | # Weight 444 | bn_w = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.weight) 445 | bn_layer.weight.data.copy_(bn_w) 446 | ptr += num_b 447 | # Running Mean 448 | bn_rm = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.running_mean) 449 | bn_layer.running_mean.data.copy_(bn_rm) 450 | ptr += num_b 451 | # Running Var 452 | bn_rv = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.running_var) 453 | bn_layer.running_var.data.copy_(bn_rv) 454 | ptr += num_b 455 | #自己加的 456 | num_w = conv_layer.weight.numel() 457 | conv_w = torch.from_numpy(weights[ptr:ptr + num_w]).view_as(conv_layer.weight) 458 | conv_layer.weight.data.copy_(conv_w) 459 | ptr += num_w 460 | else: 461 | if os.path.basename(file) == 'yolov3.weights' or os.path.basename(file) == 'yolov3-tiny.weights': 462 | num_b=255 463 | ptr += num_b 464 | num_w = int(self.module_defs[i-1]["filters"]) * 255 465 | ptr += num_w 466 | else: 467 | # Load conv. bias 468 | 469 | num_b = conv_layer.bias.numel() 470 | conv_b = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(conv_layer.bias) 471 | conv_layer.bias.data.copy_(conv_b) 472 | ptr += num_b 473 | # Load conv. weights 474 | num_w = conv_layer.weight.numel() 475 | conv_w = torch.from_numpy(weights[ptr:ptr + num_w]).view_as(conv_layer.weight) 476 | conv_layer.weight.data.copy_(conv_w) 477 | ptr += num_w 478 | # 确保指针到达权重的最后一个位置 479 | assert ptr == len(weights) 480 | 481 | return cutoff 482 | 483 | def load_darknet_weights2(self, weights, cutoff=-1): 484 | # Parses and loads the weights stored in 'weights' 485 | 486 | # Establish cutoffs (load layers between 0 and cutoff. if cutoff = -1 all are loaded) 487 | file = Path(weights).name 488 | if file == 'darknet53.conv.74': 489 | cutoff = 75 490 | elif file == 'yolov3-tiny.conv.15': 491 | cutoff = 15 492 | 493 | # Read weights file 494 | with open(weights, 'rb') as f: 495 | # Read Header https://github.com/AlexeyAB/darknet/issues/2914#issuecomment-496675346 496 | self.version = np.fromfile(f, dtype=np.int32, count=3) # (int32) version info: major, minor, revision 497 | self.seen = np.fromfile(f, dtype=np.int64, count=1) # (int64) number of images seen during training 498 | 499 | weights = np.fromfile(f, dtype=np.float32) # The rest are weights 500 | 501 | ptr = 0 502 | for i, (mdef, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])): 503 | if mdef['type'] == 'quantize_convolutional': 504 | conv_layer = module[0] 505 | if mdef['batch_normalize']=='0' or mdef['batch_normalize']==0: 506 | # Load BN bias, weights, running mean and running variance 507 | bn_layer = module[1] 508 | 509 | num_b = conv_layer.beta.numel() 510 | 511 | # beta 512 | bn_b = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(conv_layer.beta) 513 | conv_layer.beta.data.copy_(bn_b) 514 | ptr += num_b 515 | 516 | # gama 517 | bn_w = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(conv_layer.gamma) 518 | conv_layer.gamma.data.copy_(bn_w) 519 | ptr += num_b 520 | 521 | # Running Mean 522 | bn_rm = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(conv_layer.running_mean) 523 | conv_layer.running_mean.data.copy_(bn_rm) 524 | ptr += num_b 525 | # Running Var 526 | bn_rv = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(conv_layer.running_var) 527 | conv_layer.running_var.data.copy_(bn_rv) 528 | ptr += num_b 529 | #自己加的 530 | num_w = conv_layer.weight.numel() 531 | conv_w = torch.from_numpy(weights[ptr:ptr + num_w]).view_as(conv_layer.weight) 532 | conv_layer.weight.data.copy_(conv_w) 533 | ptr += num_w 534 | else: 535 | if os.path.basename(file) == 'yolov3.weights' or os.path.basename(file) == 'yolov3-tiny.weights': 536 | num_b=255 537 | ptr += num_b 538 | num_w = int(self.module_defs[i-1]["filters"]) * 255 539 | ptr += num_w 540 | else: 541 | # Load conv. bias 542 | 543 | num_b = conv_layer.bias.numel() 544 | conv_b = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(conv_layer.bias) 545 | conv_layer.bias.data.copy_(conv_b) 546 | ptr += num_b 547 | # Load conv. weights 548 | num_w = conv_layer.weight.numel() 549 | conv_w = torch.from_numpy(weights[ptr:ptr + num_w]).view_as(conv_layer.weight) 550 | conv_layer.weight.data.copy_(conv_w) 551 | ptr += num_w 552 | elif mdef['type'] == 'convolutional': 553 | conv_layer = module[0] 554 | if mdef['batch_normalize']=='1': 555 | # Load BN bias, weights, running mean and running variance 556 | bn_layer = module[1] 557 | num_b = bn_layer.bias.numel() # Number of biases 558 | # Bias 559 | bn_b = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.bias) 560 | bn_layer.bias.data.copy_(bn_b) 561 | ptr += num_b 562 | # Weight 563 | bn_w = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.weight) 564 | bn_layer.weight.data.copy_(bn_w) 565 | ptr += num_b 566 | # Running Mean 567 | bn_rm = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.running_mean) 568 | bn_layer.running_mean.data.copy_(bn_rm) 569 | ptr += num_b 570 | # Running Var 571 | bn_rv = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.running_var) 572 | bn_layer.running_var.data.copy_(bn_rv) 573 | ptr += num_b 574 | #自己加的 575 | num_w = conv_layer.weight.numel() 576 | conv_w = torch.from_numpy(weights[ptr:ptr + num_w]).view_as(conv_layer.weight) 577 | conv_layer.weight.data.copy_(conv_w) 578 | ptr += num_w 579 | else: 580 | if os.path.basename(file) == 'yolov3.weights' or os.path.basename(file) == 'yolov3-tiny.weights': 581 | num_b=255 582 | ptr += num_b 583 | num_w = int(self.module_defs[i-1]["filters"]) * 255 584 | ptr += num_w 585 | else: 586 | # Load conv. bias 587 | 588 | num_b = conv_layer.bias.numel() 589 | conv_b = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(conv_layer.bias) 590 | conv_layer.bias.data.copy_(conv_b) 591 | ptr += num_b 592 | # Load conv. weights 593 | num_w = conv_layer.weight.numel() 594 | conv_w = torch.from_numpy(weights[ptr:ptr + num_w]).view_as(conv_layer.weight) 595 | conv_layer.weight.data.copy_(conv_w) 596 | ptr += num_w 597 | 598 | # 确保指针到达权重的最后一个位置 599 | assert ptr == len(weights) 600 | 601 | return cutoff 602 | 603 | def save_weights(self, path='model.weights', cutoff=-1): 604 | # Converts a PyTorch model to Darket format (*.pt to *.weights) 605 | # Note: Does not work if model.fuse() is applied 606 | with open(path, 'wb') as f: 607 | # Write Header https://github.com/AlexeyAB/darknet/issues/2914#issuecomment-496675346 608 | self.version.tofile(f) # (int32) version info: major, minor, revision 609 | self.seen.tofile(f) # (int64) number of images seen during training 610 | 611 | # Iterate through layers 612 | for i, (mdef, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])): 613 | if mdef['type'] == 'convolutional' or mdef['type'] == 'quantize_convolutional': 614 | conv_layer = module[0] 615 | # If batch norm, load bn first 616 | if mdef['batch_normalize']=='1': 617 | bn_layer = module[1] 618 | bn_layer.bias.data.cpu().numpy().tofile(f) 619 | bn_layer.weight.data.cpu().numpy().tofile(f) 620 | bn_layer.running_mean.data.cpu().numpy().tofile(f) 621 | bn_layer.running_var.data.cpu().numpy().tofile(f) 622 | # Load conv bias 623 | else: 624 | conv_layer.bias.data.cpu().numpy().tofile(f) 625 | # Load conv weights 626 | conv_layer.weight.data.cpu().numpy().tofile(f) 627 | elif mdef['type'] == 'RepvggBlock': 628 | conv_layer = module[0] 629 | conv_layer.bias.data.cpu().numpy().tofile(f) 630 | conv_layer.weight.data.cpu().numpy().tofile(f) 631 | 632 | 633 | def save_weights2(self, path='model.weights', cutoff=-1): 634 | # Converts a PyTorch model to Darket format (*.pt to *.weights) 635 | # Note: Does not work if model.fuse() is applied 636 | with open(path, 'wb') as f: 637 | # Write Header https://github.com/AlexeyAB/darknet/issues/2914#issuecomment-496675346 638 | self.version.tofile(f) # (int32) version info: major, minor, revision 639 | self.seen.tofile(f) # (int64) number of images seen during training 640 | 641 | # Iterate through layers 642 | for i, (mdef, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])): 643 | if mdef['type'] == 'quantize_convolutional': 644 | conv_layer = module[0] 645 | # If batch norm, load bn first 646 | if mdef['batch_normalize']=='0' or mdef['batch_normalize']==0: 647 | 648 | conv_layer.beta.data.cpu().numpy().tofile(f) 649 | conv_layer.gamma.data.cpu().numpy().tofile(f) 650 | conv_layer.running_mean.data.cpu().numpy().tofile(f) 651 | conv_layer.running_var.data.cpu().numpy().tofile(f) 652 | # Load conv bias 653 | else: 654 | conv_layer.bias.data.cpu().numpy().tofile(f) 655 | # Load conv weights 656 | conv_layer.weight.data.cpu().numpy().tofile(f) 657 | elif mdef['type'] == 'convolutional': 658 | conv_layer = module[0] 659 | # If batch norm, load bn first 660 | if mdef['batch_normalize']=='1': 661 | bn_layer = module[1] 662 | bn_layer.bias.data.cpu().numpy().tofile(f) 663 | bn_layer.weight.data.cpu().numpy().tofile(f) 664 | bn_layer.running_mean.data.cpu().numpy().tofile(f) 665 | bn_layer.running_var.data.cpu().numpy().tofile(f) 666 | # Load conv bias 667 | else: 668 | conv_layer.bias.data.cpu().numpy().tofile(f) 669 | # Load conv weights 670 | conv_layer.weight.data.cpu().numpy().tofile(f) 671 | 672 | 673 | 674 | def convert(cfg='cfg/yolov3-spp.cfg', weights='weights/yolov3-spp.weights'): 675 | # Converts between PyTorch and Darknet format per extension (i.e. *.weights convert to *.pt and vice versa) 676 | # from models import *; convert('cfg/yolov3-spp.cfg', 'weights/yolov3-spp.weights') 677 | 678 | # Initialize model 679 | model = Darknet(cfg) 680 | 681 | # Load weights and save 682 | if weights.endswith('.pt'): # if PyTorch format 683 | model.load_state_dict(torch.load(weights, map_location='cpu')['model']) 684 | save_weights(model, path='converted.weights', cutoff=-1) 685 | print("Success: converted '%s' to 'converted.weights'" % weights) 686 | 687 | elif weights.endswith('.weights'): # darknet format 688 | _ = load_darknet_weights(model, weights) 689 | 690 | chkpt = {'epoch': -1, 691 | 'best_fitness': None, 692 | 'training_results': None, 693 | 'model': model.state_dict(), 694 | 'optimizer': None} 695 | 696 | torch.save(chkpt, 'converted.pt') 697 | print("Success: converted '%s' to 'converted.pt'" % weights) 698 | 699 | else: 700 | print('Error: extension not supported.') 701 | 702 | 703 | def attempt_download(weights): 704 | # Attempt to download pretrained weights if not found locally 705 | 706 | msg = weights + ' missing, download from https://drive.google.com/drive/folders/1uxgUBemJVw9wZsdpboYbzUN4bcRhsuAI' 707 | if weights and not os.path.isfile(weights): 708 | file = Path(weights).name 709 | 710 | if file == 'yolov3-spp.weights': 711 | gdrive_download(id='1oPCHKsM2JpM-zgyepQciGli9X0MTsJCO', name=weights) 712 | elif file == 'yolov3-spp.pt': 713 | gdrive_download(id='1vFlbJ_dXPvtwaLLOu-twnjK4exdFiQ73', name=weights) 714 | elif file == 'yolov3.pt': 715 | gdrive_download(id='11uy0ybbOXA2hc-NJkJbbbkDwNX1QZDlz', name=weights) 716 | elif file == 'yolov3-tiny.pt': 717 | gdrive_download(id='1qKSgejNeNczgNNiCn9ZF_o55GFk1DjY_', name=weights) 718 | elif file == 'darknet53.conv.74': 719 | gdrive_download(id='18xqvs_uwAqfTXp-LJCYLYNHBOcrwbrp0', name=weights) 720 | elif file == 'yolov3-tiny.conv.15': 721 | gdrive_download(id='140PnSedCsGGgu3rOD6Ez4oI6cdDzerLC', name=weights) 722 | 723 | else: 724 | try: # download from pjreddie.com 725 | url = 'https://pjreddie.com/media/files/' + file 726 | print('Downloading ' + url) 727 | os.system('curl -f ' + url + ' -o ' + weights) 728 | except IOError: 729 | print(msg) 730 | os.system('rm ' + weights) # remove partial downloads 731 | 732 | assert os.path.exists(weights), msg # download missing weights from Google Drive 733 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # pip3 install -U -r requirements.txt 2 | numpy 3 | opencv-python 4 | torch >= 1.2 5 | matplotlib 6 | pycocotools 7 | tqdm 8 | tb-nightly 9 | future 10 | Pillow 11 | 12 | # Equivalent conda commands ---------------------------------------------------- 13 | # conda update -n base -c defaults conda 14 | # conda install -yc anaconda future numpy opencv matplotlib tqdm pillow 15 | # conda install -yc conda-forge scikit-image tensorboard pycocotools 16 | # conda install -yc spyder-ide spyder-line-profiler 17 | # conda install -yc pytorch pytorch torchvision 18 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | 4 | from torch.utils.data import DataLoader 5 | 6 | from models import * 7 | from utils.datasets import * 8 | from utils.utils import * 9 | 10 | 11 | 12 | def test(cfg, 13 | data, 14 | weights=None, 15 | batch_size=64, 16 | img_size=416, 17 | iou_thres=0.5, 18 | conf_thres=0.001, 19 | nms_thres=0.5, 20 | save_json=False, 21 | model=None): 22 | # Initialize/load model and set device 23 | if model is None: 24 | device = torch_utils.select_device(opt.device) 25 | verbose = True 26 | 27 | # Initialize model 28 | model = Darknet(cfg, img_size,deploy=True).to(device) 29 | #print(model) 30 | # Load weights 31 | #本身有,被我去掉了 32 | attempt_download(weights) 33 | if weights.endswith('.pt'): # pytorch format 34 | print('.pth is reading') 35 | # model.load_state_dict(torch.load(weights, map_location=device)['model']) 36 | model.load_state_dict(torch.load(weights, map_location=device)) 37 | 38 | 39 | else: # darknet format 40 | print('darknet weights is reading') 41 | _ = load_darknet_weights(model, weights) 42 | 43 | if torch.cuda.device_count() > 1: 44 | model = nn.DataParallel(model) 45 | else: 46 | device = next(model.parameters()).device # get model device 47 | verbose = False 48 | 49 | # Configure run 50 | data = parse_data_cfg(data) 51 | nc = int(data['classes']) # number of classes 52 | test_path = data['valid'] # path to test images 53 | names = load_classes(data['names']) # class names 54 | 55 | # Dataloader 56 | dataset = LoadImagesAndLabels(test_path, img_size, batch_size) 57 | dataloader = DataLoader(dataset, 58 | batch_size=batch_size, 59 | num_workers=min([os.cpu_count(), batch_size, 16]), 60 | # num_workers=0, 61 | pin_memory=True, 62 | collate_fn=dataset.collate_fn) 63 | 64 | seen = 0 65 | model.eval() 66 | coco91class = coco80_to_coco91_class() 67 | s = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', 'mAP', 'F1') 68 | p, r, f1, mp, mr, map, mf1 = 0., 0., 0., 0., 0., 0., 0. 69 | loss = torch.zeros(3) 70 | jdict, stats, ap, ap_class = [], [], [], [] 71 | for batch_i, (imgs, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)): 72 | targets = targets.to(device) 73 | imgs = imgs.to(device) 74 | _, _, height, width = imgs.shape # batch size, channels, height, width 75 | 76 | # Plot images with bounding boxes 77 | if batch_i == 0 and not os.path.exists('test_batch0.jpg'): 78 | plot_images(imgs=imgs, targets=targets, paths=paths, fname='test_batch0.jpg') 79 | 80 | # Run model 81 | inf_out, train_out = model(imgs) # inference and training outputs 82 | 83 | # Compute loss 84 | if hasattr(model, 'hyp'): # if model has loss hyperparameters 85 | loss += compute_loss(train_out, targets, model)[1][:3].cpu() # GIoU, obj, cls 86 | 87 | # Run NMS 88 | output = non_max_suppression(inf_out, conf_thres=conf_thres, nms_thres=nms_thres) 89 | 90 | # Statistics per image 91 | for si, pred in enumerate(output): 92 | labels = targets[targets[:, 0] == si, 1:] 93 | nl = len(labels) 94 | tcls = labels[:, 0].tolist() if nl else [] # target class 95 | seen += 1 96 | 97 | if pred is None: 98 | if nl: 99 | stats.append(([], torch.Tensor(), torch.Tensor(), tcls)) 100 | continue 101 | 102 | # Append to text file 103 | # with open('test.txt', 'a') as file: 104 | # [file.write('%11.5g' * 7 % tuple(x) + '\n') for x in pred] 105 | 106 | # Append to pycocotools JSON dictionary 107 | if save_json: 108 | # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ... 109 | image_id = int(Path(paths[si]).stem.split('_')[-1]) 110 | box = pred[:, :4].clone() # xyxy 111 | scale_coords(imgs[si].shape[1:], box, shapes[si]) # to original shape 112 | box = xyxy2xywh(box) # xywh 113 | box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner 114 | for di, d in enumerate(pred): 115 | jdict.append({'image_id': image_id, 116 | 'category_id': coco91class[int(d[6])], 117 | 'bbox': [floatn(x, 3) for x in box[di]], 118 | 'score': floatn(d[4], 5)}) 119 | 120 | # Clip boxes to image bounds 121 | clip_coords(pred, (height, width)) 122 | 123 | # Assign all predictions as incorrect 124 | correct = [0] * len(pred) 125 | if nl: 126 | detected = [] 127 | tcls_tensor = labels[:, 0] 128 | 129 | # target boxes 130 | tbox = xywh2xyxy(labels[:, 1:5]) 131 | tbox[:, [0, 2]] *= width 132 | tbox[:, [1, 3]] *= height 133 | 134 | # Search for correct predictions 135 | for i, (*pbox, pconf, pcls_conf, pcls) in enumerate(pred): 136 | 137 | # Break if all targets already located in image 138 | if len(detected) == nl: 139 | break 140 | 141 | # Continue if predicted class not among image classes 142 | if pcls.item() not in tcls: 143 | continue 144 | 145 | # Best iou, index between pred and targets 146 | m = (pcls == tcls_tensor).nonzero().view(-1) 147 | iou, bi = bbox_iou(pbox, tbox[m]).max(0) 148 | 149 | # If iou > threshold and class is correct mark as correct 150 | if iou > iou_thres and m[bi] not in detected: # and pcls == tcls[bi]: 151 | correct[i] = 1 152 | detected.append(m[bi]) 153 | 154 | # Append statistics (correct, conf, pcls, tcls) 155 | stats.append((correct, pred[:, 4].cpu(), pred[:, 6].cpu(), tcls)) 156 | 157 | # Compute statistics 158 | stats = [np.concatenate(x, 0) for x in list(zip(*stats))] # to numpy 159 | if len(stats): 160 | p, r, ap, f1, ap_class = ap_per_class(*stats) 161 | mp, mr, map, mf1 = p.mean(), r.mean(), ap.mean(), f1.mean() 162 | nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class 163 | else: 164 | nt = torch.zeros(1) 165 | 166 | # Print results 167 | pf = '%20s' + '%10.3g' * 6 # print format 168 | print(pf % ('all', seen, nt.sum(), mp, mr, map, mf1)) 169 | 170 | # Print results per class 171 | if verbose and nc > 1 and len(stats): 172 | for i, c in enumerate(ap_class): 173 | print(pf % (names[c], seen, nt[c], p[i], r[i], ap[i], f1[i])) 174 | 175 | # Save JSON 176 | if save_json and map and len(jdict): 177 | try: 178 | imgIds = [int(Path(x).stem.split('_')[-1]) for x in dataset.img_files] 179 | with open('results.json', 'w') as file: 180 | json.dump(jdict, file) 181 | 182 | from pycocotools.coco import COCO 183 | from pycocotools.cocoeval import COCOeval 184 | 185 | # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb 186 | cocoGt = COCO('../coco/annotations/instances_val2014.json') # initialize COCO ground truth api 187 | cocoDt = cocoGt.loadRes('results.json') # initialize COCO pred api 188 | 189 | cocoEval = COCOeval(cocoGt, cocoDt, 'bbox') 190 | cocoEval.params.imgIds = imgIds # [:32] # only evaluate these images 191 | cocoEval.evaluate() 192 | cocoEval.accumulate() 193 | cocoEval.summarize() 194 | map = cocoEval.stats[1] # update mAP to pycocotools mAP 195 | except: 196 | print('WARNING: missing dependency pycocotools from requirements.txt. Can not compute official COCO mAP.') 197 | 198 | # Return results 199 | maps = np.zeros(nc) + map 200 | for i, c in enumerate(ap_class): 201 | maps[c] = ap[i] 202 | return (mp, mr, map, mf1, *(loss.cpu() / len(dataloader)).tolist()), maps 203 | 204 | 205 | if __name__ == '__main__': 206 | parser = argparse.ArgumentParser(prog='test.py') 207 | parser.add_argument('--cfg', type=str, default='cfg/yolov3-spp.cfg', help='cfg file path') 208 | parser.add_argument('--data', type=str, default='data/coco.data', help='coco.data file path') 209 | parser.add_argument('--weights', type=str, default='weights/yolov3-spp.weights', help='path to weights file') 210 | parser.add_argument('--batch-size', type=int, default=16, help='size of each image batch') 211 | parser.add_argument('--img-size', type=int, default=416, help='inference size (pixels)') 212 | parser.add_argument('--iou-thres', type=float, default=0.5, help='iou threshold required to qualify as detected') 213 | parser.add_argument('--conf-thres', type=float, default=0.001, help='object confidence threshold') 214 | parser.add_argument('--nms-thres', type=float, default=0.5, help='iou threshold for non-maximum suppression') 215 | parser.add_argument('--save-json', action='store_true', help='save a cocoapi-compatible JSON results file') 216 | parser.add_argument('--device', default='', help='device id (i.e. 0 or 0,1) or cpu') 217 | opt = parser.parse_args() 218 | print(opt) 219 | 220 | with torch.no_grad(): 221 | test(opt.cfg, 222 | opt.data, 223 | opt.weights, 224 | opt.batch_size, 225 | opt.img_size, 226 | opt.iou_thres, 227 | opt.conf_thres, 228 | opt.nms_thres, 229 | opt.save_json) 230 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import torch.distributed as dist 4 | import torch.optim as optim 5 | import torch.optim.lr_scheduler as lr_scheduler 6 | 7 | import test # import test.py to get mAP after each epoch 8 | from models import * 9 | from utils.datasets import * 10 | from utils.utils import * 11 | from utils.prune_utils import * 12 | 13 | from mttkinter import mtTkinter as tk 14 | 15 | mixed_precision = True 16 | try: # Mixed precision training https://github.com/NVIDIA/apex 17 | from apex import amp 18 | except: 19 | mixed_precision = False # not installed 20 | 21 | wdir = 'weights_repvgg/B1' + os.sep # weights dir 22 | # wdir = 'weights_iter/weights_step6' + os.sep 23 | last = wdir + 'last.pt' 24 | best = wdir + 'best.pt' 25 | results_file = 'results.txt' 26 | 27 | # Hyperparameters (j-series, 50.5 mAP yolov3-320) evolved by @ktian08 https://github.com/ultralytics/yolov3/issues/310 28 | hyp = {'giou': 1.582, # giou loss gain 29 | 'cls': 27.76, # cls loss gain (CE=~1.0, uCE=~20) 30 | 'cls_pw': 1.446, # cls BCELoss positive_weight 31 | 'obj': 21.35, # obj loss gain (*=80 for uBCE with 80 classes) 32 | 'obj_pw': 3.941, # obj BCELoss positive_weight 33 | 'iou_t': 0.2635, # iou training threshold 34 | 'lr0': 0.002324, # initial learning rate (SGD=1E-3, Adam=9E-5) 35 | 'lrf': -4., # final LambdaLR learning rate = lr0 * (10 ** lrf) 36 | 'momentum': 0.97, # SGD momentum 37 | 'weight_decay': 0.0004569, # optimizer weight decay 38 | 'fl_gamma': 0.5, # focal loss gamma 39 | 'hsv_h': 0.01, # image HSV-Hue augmentation (fraction) 40 | 'hsv_s': 0.5703, # image HSV-Saturation augmentation (fraction) 41 | 'hsv_v': 0.3174, # image HSV-Value augmentation (fraction) 42 | 'degrees': 1.113, # image rotation (+/- deg) 43 | 'translate': 0.06797, # image translation (+/- fraction) 44 | 'scale': 0.1059, # image scale (+/- gain) 45 | 'shear': 0.5768} # image shear (+/- deg) 46 | 47 | # Overwrite hyp with hyp*.txt (optional) 48 | f = glob.glob('hyp*.txt') 49 | if f: 50 | for k, v in zip(hyp.keys(), np.loadtxt(f[0])): 51 | hyp[k] = v 52 | 53 | 54 | def train(): 55 | cfg = opt.cfg 56 | data = opt.data 57 | img_size = opt.img_size 58 | epochs = 1 if opt.prebias else opt.epochs # 500200 batches at bs 64, 117263 images = 273 epochs 59 | batch_size = opt.batch_size 60 | accumulate = opt.accumulate # effective bs = batch_size * accumulate = 16 * 4 = 64 61 | weights = opt.weights # initial training weights 62 | 63 | if 'pw' not in opt.arc: # remove BCELoss positive weights 64 | hyp['cls_pw'] = 1. 65 | hyp['obj_pw'] = 1. 66 | 67 | # Initialize 68 | init_seeds() 69 | multi_scale = opt.multi_scale 70 | 71 | if multi_scale: 72 | img_sz_min = round(img_size / 32 / 1.5) + 1 73 | img_sz_max = round(img_size / 32 * 1.5) - 1 74 | img_size = img_sz_max * 32 # initiate with maximum multi_scale size 75 | print('Using multi-scale %g - %g' % (img_sz_min * 32, img_size)) 76 | 77 | # Configure run 78 | data_dict = parse_data_cfg(data) 79 | train_path = data_dict['train'] 80 | nc = int(data_dict['classes']) # number of classes 81 | 82 | # Remove previous results 83 | for f in glob.glob('*_batch*.jpg') + glob.glob(results_file): 84 | os.remove(f) 85 | 86 | # Initialize model 87 | model = Darknet(cfg, arc=opt.arc).to(device) 88 | 89 | # Optimizer 90 | pg0, pg1 = [], [] # optimizer parameter groups 91 | for k, v in dict(model.named_parameters()).items(): 92 | if 'Conv2d.weight' in k: 93 | pg1 += [v] # parameter group 1 (apply weight_decay) 94 | else: 95 | pg0 += [v] # parameter group 0 96 | 97 | if opt.adam: 98 | optimizer = optim.Adam(pg0, lr=hyp['lr0']) 99 | # optimizer = AdaBound(pg0, lr=hyp['lr0'], final_lr=0.1) 100 | else: 101 | optimizer = optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True) 102 | optimizer.add_param_group({'params': pg1, 'weight_decay': hyp['weight_decay']}) # add pg1 with weight_decay 103 | del pg0, pg1 104 | 105 | cutoff = -1 # backbone reaches to cutoff layer 106 | start_epoch = 0 107 | best_fitness = float('inf') 108 | attempt_download(weights) 109 | if weights.endswith('.pt'): # pytorch format 110 | # possible weights are 'last.pt', 'yolov3-spp.pt', 'yolov3-tiny.pt' etc. 111 | if opt.bucket: 112 | os.system('gsutil cp gs://%s/last.pt %s' % (opt.bucket, last)) # download from bucket 113 | chkpt = torch.load(weights, map_location=device) 114 | 115 | # load model 116 | # if opt.transfer: 117 | 118 | # chkpt['model'] = {k: v for k, v in chkpt['model'].items() if model.state_dict()[k].numel() == v.numel()} 119 | # model.load_state_dict(chkpt['model'], strict=False) 120 | 121 | new_dict = model.state_dict() 122 | new_dict.update(chkpt) 123 | model.load_state_dict(new_dict) 124 | # model.load_state_dict(chkpt, strict=True) 125 | 126 | # else: 127 | # model.load_state_dict(chkpt['model']) 128 | 129 | # load optimizer 130 | 131 | # if chkpt['optimizer'] is not None: 132 | # optimizer.load_state_dict(chkpt['optimizer']) 133 | # best_fitness = chkpt['best_fitness'] 134 | 135 | # # load results 136 | # if chkpt.get('training_results') is not None: 137 | # with open(results_file, 'w') as file: 138 | # file.write(chkpt['training_results']) # write results.txt 139 | 140 | # start_epoch = chkpt['epoch'] + 1 141 | # del chkpt 142 | 143 | elif weights.endswith('.pth'): 144 | pass 145 | 146 | elif len(weights) > 0: # darknet format 147 | # possible weights are 'yolov3.weights', 'yolov3-tiny.conv.15', 'darknet53.conv.74' etc. 148 | cutoff = load_darknet_weights(model, weights) 149 | 150 | 151 | 152 | 153 | if opt.transfer or opt.prebias: # transfer learning edge (yolo) layers 154 | nf = int(model.module_defs[model.yolo_layers[0] - 1]['filters']) # yolo layer size (i.e. 255) 155 | 156 | if opt.prebias: 157 | for p in optimizer.param_groups: 158 | # lower param count allows more aggressive training settings: i.e. SGD ~0.1 lr0, ~0.9 momentum 159 | p['lr'] *= 100 # lr gain 160 | if p.get('momentum') is not None: # for SGD but not Adam 161 | p['momentum'] *= 0.9 162 | 163 | for p in model.parameters(): 164 | if opt.prebias and p.numel() == nf: # train (yolo biases) 165 | p.requires_grad = True 166 | elif opt.transfer and p.shape[0] == nf: # train (yolo biases+weights) 167 | p.requires_grad = True 168 | else: # freeze layer 169 | p.requires_grad = False 170 | 171 | # Scheduler https://github.com/ultralytics/yolov3/issues/238 172 | # lf = lambda x: 1 - x / epochs # linear ramp to zero 173 | # lf = lambda x: 10 ** (hyp['lrf'] * x / epochs) # exp ramp 174 | # lf = lambda x: 1 - 10 ** (hyp['lrf'] * (1 - x / epochs)) # inverse exp ramp 175 | # scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) 176 | # scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=range(59, 70, 1), gamma=0.8) # gradual fall to 0.1*lr0 177 | scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[round(opt.epochs * x) for x in [0.8, 0.9]], gamma=0.1) 178 | scheduler.last_epoch = start_epoch - 1 179 | 180 | # # Plot lr schedule 181 | # y = [] 182 | # for _ in range(epochs): 183 | # scheduler.step() 184 | # y.append(optimizer.param_groups[0]['lr']) 185 | # plt.plot(y, label='LambdaLR') 186 | # plt.xlabel('epoch') 187 | # plt.ylabel('LR') 188 | # plt.tight_layout() 189 | # plt.savefig('LR.png', dpi=300) 190 | 191 | # Mixed precision training https://github.com/NVIDIA/apex 192 | if mixed_precision: 193 | model, optimizer = amp.initialize(model, optimizer, opt_level='O1', verbosity=0) 194 | 195 | # Initialize distributed training 196 | if torch.cuda.device_count() > 1: 197 | dist.init_process_group(backend='nccl', # 'distributed backend' 198 | init_method='tcp://127.0.0.1:9999', # distributed training init method 199 | world_size=1, # number of nodes for distributed training 200 | rank=0) # distributed training node rank 201 | model = torch.nn.parallel.DistributedDataParallel(model) 202 | model.yolo_layers = model.module.yolo_layers # move yolo layer indices to top level 203 | 204 | 205 | 206 | #获得要剪枝的层 207 | 208 | if hasattr(model, 'module'): 209 | print('muti-gpus sparse') 210 | if opt.prune==1: 211 | print('shortcut sparse training') 212 | _,_,prune_idx,_,_=parse_module_defs2(model.module.module_defs) 213 | elif opt.prune==0: 214 | print('normal sparse training ') 215 | _,_,prune_idx= parse_module_defs(model.module.module_defs) 216 | elif opt.prune==2: 217 | print('tiny yolo normal sparse traing') 218 | _,_,prune_idx= parse_module_defs3(model.module.module_defs) 219 | 220 | else: 221 | print('single-gpu sparse') 222 | if opt.prune==1: 223 | print('shortcut sparse training') 224 | _,_,prune_idx,_,_=parse_module_defs2(model.module_defs) 225 | elif opt.prune==0: 226 | print('normal sparse training') 227 | _,_,prune_idx= parse_module_defs(model.module_defs) 228 | elif opt.prune==2: 229 | print('tiny yolo normal sparse traing') 230 | _,_,prune_idx= parse_module_defs3(model.module_defs) 231 | 232 | 233 | # Dataset 234 | dataset = LoadImagesAndLabels(train_path, 235 | img_size, 236 | batch_size, 237 | augment=True, 238 | hyp=hyp, # augmentation hyperparameters 239 | rect=opt.rect, # rectangular training 240 | image_weights=opt.img_weights, 241 | cache_labels=True if epochs > 10 else False, 242 | cache_images=False if opt.prebias else opt.cache_images) 243 | 244 | # Dataloader 245 | dataloader = torch.utils.data.DataLoader(dataset, 246 | batch_size=batch_size, 247 | num_workers=min([os.cpu_count(), batch_size, 16]), 248 | shuffle=not opt.rect, # Shuffle=True unless rectangular training is used 249 | pin_memory=True, 250 | collate_fn=dataset.collate_fn) 251 | 252 | 253 | 254 | # Start training 255 | model.nc = nc # attach number of classes to model 256 | model.arc = opt.arc # attach yolo architecture 257 | model.hyp = hyp # attach hyperparameters to model 258 | # model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) # attach class weights 259 | torch_utils.model_info(model, report='summary') # 'full' or 'summary' 260 | nb = len(dataloader) 261 | maps = np.zeros(nc) # mAP per class 262 | results = (0, 0, 0, 0, 0, 0, 0) # 'P', 'R', 'mAP', 'F1', 'val GIoU', 'val Objectness', 'val Classification' 263 | t0 = time.time() 264 | print('Starting %s for %g epochs...' % ('prebias' if opt.prebias else 'training', epochs)) 265 | for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------ 266 | model.train() 267 | print(('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'GIoU', 'obj', 'cls', 'total', 'targets', 'img_size')) 268 | #稀疏化标志 269 | sr_flag = get_sr_flag(epoch, opt.sr) 270 | 271 | # Freeze backbone at epoch 0, unfreeze at epoch 1 (optional) 272 | freeze_backbone = False 273 | if freeze_backbone and epoch < 2: 274 | for name, p in model.named_parameters(): 275 | if int(name.split('.')[1]) < cutoff: # if layer < 75 276 | p.requires_grad = False if epoch == 0 else True 277 | 278 | # Update image weights (optional) 279 | if dataset.image_weights: 280 | w = model.class_weights.cpu().numpy() * (1 - maps) ** 2 # class weights 281 | image_weights = labels_to_image_weights(dataset.labels, nc=nc, class_weights=w) 282 | dataset.indices = random.choices(range(dataset.n), weights=image_weights, k=dataset.n) # rand weighted idx 283 | 284 | mloss = torch.zeros(4).to(device) # mean losses 285 | pbar = tqdm(enumerate(dataloader), total=nb) # progress bar 286 | for i, (imgs, targets, paths, _) in pbar: # batch ------------------------------------------------------------- 287 | ni = i + nb * epoch # number integrated batches (since train start) 288 | imgs = imgs.to(device) 289 | targets = targets.to(device) 290 | 291 | # Multi-Scale training 292 | if multi_scale: 293 | if ni / accumulate % 10 == 0: #  adjust (67% - 150%) every 10 batches 294 | img_size = random.randrange(img_sz_min, img_sz_max + 1) * 32 295 | sf = img_size / max(imgs.shape[2:]) # scale factor 296 | if sf != 1: 297 | ns = [math.ceil(x * sf / 32.) * 32 for x in imgs.shape[2:]] # new shape (stretched to 32-multiple) 298 | imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False) 299 | 300 | # Plot images with bounding boxes 301 | if ni == 0: 302 | fname = 'train_batch%g.jpg' % i 303 | plot_images(imgs=imgs, targets=targets, paths=paths, fname=fname) 304 | if tb_writer: 305 | tb_writer.add_image(fname, cv2.imread(fname)[:, :, ::-1], dataformats='HWC') 306 | 307 | # Hyperparameter burn-in 308 | # n_burn = nb - 1 # min(nb // 5 + 1, 1000) # number of burn-in batches 309 | # if ni <= n_burn: 310 | # for m in model.named_modules(): 311 | # if m[0].endswith('BatchNorm2d'): 312 | # m[1].momentum = 1 - i / n_burn * 0.99 # BatchNorm2d momentum falls from 1 - 0.01 313 | # g = (i / n_burn) ** 4 # gain rises from 0 - 1 314 | # for x in optimizer.param_groups: 315 | # x['lr'] = hyp['lr0'] * g 316 | # x['weight_decay'] = hyp['weight_decay'] * g 317 | 318 | # Run model 319 | pred = model(imgs) 320 | 321 | # Compute loss 322 | loss, loss_items = compute_loss(pred, targets, model) 323 | if not torch.isfinite(loss): 324 | print('WARNING: non-finite loss, ending training ', loss_items) 325 | return results 326 | 327 | # Scale loss by nominal batch_size of 64 328 | loss *= batch_size / 64 329 | 330 | # Compute gradient 331 | if mixed_precision: 332 | with amp.scale_loss(loss, optimizer) as scaled_loss: 333 | scaled_loss.backward() 334 | else: 335 | loss.backward() 336 | 337 | #对要剪枝层的γ参数稀疏化 338 | if hasattr(model, 'module'): 339 | BNOptimizer.updateBN(sr_flag, model.module.module_list, opt.s, prune_idx) 340 | else: 341 | BNOptimizer.updateBN(sr_flag, model.module_list, opt.s, prune_idx) 342 | 343 | # Accumulate gradient for x batches before optimizing 344 | if ni % accumulate == 0: 345 | optimizer.step() 346 | optimizer.zero_grad() 347 | 348 | # Print batch results 349 | mloss = (mloss * i + loss_items) / (i + 1) # update mean losses 350 | mem = torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available() else 0 # (GB) 351 | s = ('%10s' * 2 + '%10.3g' * 6) % ( 352 | '%g/%g' % (epoch, epochs - 1), '%.3gG' % mem, *mloss, len(targets), img_size) 353 | pbar.set_description(s) 354 | 355 | # end batch ------------------------------------------------------------------------------------------------ 356 | 357 | # Update scheduler 358 | scheduler.step() 359 | 360 | # Process epoch results 361 | final_epoch = epoch + 1 == epochs 362 | if opt.prebias: 363 | print_model_biases(model) 364 | else: 365 | # Calculate mAP (always test final epoch, skip first 10 if opt.nosave) 366 | if not (opt.notest or (opt.nosave and epoch < 10)) or final_epoch: 367 | with torch.no_grad(): 368 | results, maps = test.test(cfg, 369 | data, 370 | batch_size=batch_size, 371 | img_size=opt.img_size, 372 | model=model, 373 | # conf_thres=0.001 if final_epoch and epoch > 0 else 0.1, # 0.1 for speed 374 | conf_thres=0.001, 375 | save_json=final_epoch and epoch > 0 and 'coco.data' in data) 376 | 377 | # Write epoch results 378 | with open(results_file, 'a') as f: 379 | f.write(s + '%10.3g' * 7 % results + '\n') # P, R, mAP, F1, test_losses=(GIoU, obj, cls) 380 | 381 | # Write Tensorboard results 382 | if tb_writer: 383 | x = list(mloss) + list(results) 384 | titles = ['GIoU', 'Objectness', 'Classification', 'Train loss', 385 | 'Precision', 'Recall', 'mAP', 'F1', 'val GIoU', 'val Objectness', 'val Classification'] 386 | for xi, title in zip(x, titles): 387 | tb_writer.add_scalar(title, xi, epoch) 388 | 389 | # Update best mAP 390 | # fitness = sum(results[4:]) # total loss 391 | # if fitness < best_fitness: 392 | # best_fitness = fitness 393 | fi = fitness(np.array(results).reshape(1, -1)) # fitness_i = weighted combination of [P, R, mAP, F1] 394 | if fi > best_fitness: 395 | best_fitness = fi 396 | 397 | # Save training results 398 | save = (not opt.nosave) or (final_epoch and not opt.evolve) or opt.prebias 399 | if save: 400 | with open(results_file, 'r') as f: 401 | # Create checkpoint 402 | chkpt = {'epoch': epoch, 403 | 'best_fitness': best_fitness, 404 | 'training_results': f.read(), 405 | 'model': model.module.state_dict() if type( 406 | model) is nn.parallel.DistributedDataParallel else model.state_dict(), 407 | 'optimizer': None if final_epoch else optimizer.state_dict()} 408 | 409 | # Save last checkpoint 410 | torch.save(chkpt, last) 411 | if opt.bucket and not opt.prebias: 412 | os.system('gsutil cp %s gs://%s' % (last, opt.bucket)) # upload to bucket 413 | 414 | # Save best checkpoint 415 | if best_fitness == fitness: 416 | torch.save(chkpt, best) 417 | 418 | # Save backup every 10 epochs (optional) 419 | if epoch > 0 and epoch % 10 == 0: 420 | torch.save(chkpt, wdir + 'backup%g.pt' % epoch) 421 | 422 | # Delete checkpoint 423 | del chkpt 424 | 425 | # end epoch ---------------------------------------------------------------------------------------------------- 426 | 427 | # end training 428 | if len(opt.name): 429 | os.rename('results.txt', 'results_%s.txt' % opt.name) 430 | os.rename(wdir + 'best.pt', wdir + 'best_%s.pt' % opt.name) 431 | plot_results() # save as results.png 432 | print('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600)) 433 | dist.destroy_process_group() if torch.cuda.device_count() > 1 else None 434 | torch.cuda.empty_cache() 435 | 436 | # save to cloud 437 | # os.system(gsutil cp results.txt gs://...) 438 | # os.system(gsutil cp weights/best.pt gs://...) 439 | 440 | return results 441 | 442 | 443 | def prebias(): 444 | # trains output bias layers for 1 epoch and creates new backbone 445 | if opt.prebias: 446 | train() # transfer-learn yolo biases for 1 epoch 447 | create_backbone(last) # saved results as backbone.pt 448 | opt.weights = wdir + 'backbone.pt' # assign backbone 449 | opt.prebias = False # disable prebias 450 | 451 | 452 | if __name__ == '__main__': 453 | parser = argparse.ArgumentParser() 454 | parser.add_argument('--epochs', type=int, default=273) # 500200 batches at bs 16, 117263 images = 273 epochs 455 | parser.add_argument('--batch-size', type=int, default=32) # effective bs = batch_size * accumulate = 16 * 4 = 64 456 | parser.add_argument('--accumulate', type=int, default=2, help='batches to accumulate before optimizing') 457 | parser.add_argument('--cfg', type=str, default='cfg/yolov3-hand.cfg', help='cfg file path') 458 | parser.add_argument('--data', type=str, default='data/oxfordhand.data', help='*.data file path') 459 | parser.add_argument('--multi-scale', action='store_true', help='adjust (67% - 150%) img_size every 10 batches') 460 | parser.add_argument('--img-size', type=int, default=416, help='inference size (pixels)') 461 | parser.add_argument('--rect', action='store_true', help='rectangular training') 462 | parser.add_argument('--resume', action='store_true', help='resume training from last.pt') 463 | parser.add_argument('--transfer', action='store_true', help='transfer learning') 464 | parser.add_argument('--nosave', action='store_true', help='only save final checkpoint') 465 | parser.add_argument('--notest', action='store_true', help='only test final epoch') 466 | parser.add_argument('--evolve', action='store_true', help='evolve hyperparameters') 467 | parser.add_argument('--bucket', type=str, default='', help='gsutil bucket') 468 | parser.add_argument('--img-weights', action='store_true', help='select training images by weight') 469 | parser.add_argument('--cache-images', action='store_true', help='cache images for faster training') 470 | parser.add_argument('--weights', type=str, default='weights/yolov3.weights', help='initial weights') # i.e. weights/darknet.53.conv.74 471 | parser.add_argument('--arc', type=str, default='default', help='yolo architecture') # defaultpw, uCE, uBCE 472 | parser.add_argument('--prebias', action='store_true', help='transfer-learn yolo biases prior to training') 473 | parser.add_argument('--name', default='', help='renames results.txt to results_name.txt if supplied') 474 | parser.add_argument('--device', default='', help='device id (i.e. 0 or 0,1) or cpu') 475 | parser.add_argument('--adam', action='store_true', help='use adam optimizer') 476 | parser.add_argument('--var', type=float, help='debug variable') 477 | parser.add_argument('--sparsity-regularization', '-sr', dest='sr', action='store_true', 478 | help='train with channel sparsity regularization') 479 | parser.add_argument('--s', type=float, default=0.001, help='scale sparse rate') 480 | parser.add_argument('--prune', type=int, default=0, help='0:nomal prune or regular prune 1:shortcut prune 2:tiny prune') 481 | opt = parser.parse_args() 482 | opt.weights = last if opt.resume else opt.weights 483 | print(opt) 484 | device = torch_utils.select_device(opt.device, apex=mixed_precision) 485 | 486 | tb_writer = None 487 | if not opt.evolve: # Train normally 488 | try: 489 | # Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/ 490 | from torch.utils.tensorboard import SummaryWriter 491 | 492 | tb_writer = SummaryWriter() 493 | except: 494 | pass 495 | 496 | prebias() # optional 497 | train() # train normally 498 | 499 | # Evolve hyperparameters (optional) 500 | else: 501 | opt.notest = True # only test final epoch 502 | opt.nosave = True # only save final checkpoint 503 | if opt.bucket: 504 | os.system('gsutil cp gs://%s/evolve.txt .' % opt.bucket) # download evolve.txt if exists 505 | 506 | for _ in range(1): # generations to evolve 507 | if os.path.exists('evolve.txt'): # if evolve.txt exists: select best hyps and mutate 508 | # Select parent(s) 509 | x = np.loadtxt('evolve.txt', ndmin=2) 510 | parent = 'weighted' # parent selection method: 'single' or 'weighted' 511 | if parent == 'single' or len(x) == 1: 512 | x = x[fitness(x).argmax()] 513 | elif parent == 'weighted': # weighted combination 514 | n = min(10, x.shape[0]) # number to merge 515 | x = x[np.argsort(-fitness(x))][:n] # top n mutations 516 | w = fitness(x) - fitness(x).min() # weights 517 | x = (x[:n] * w.reshape(n, 1)).sum(0) / w.sum() # new parent 518 | for i, k in enumerate(hyp.keys()): 519 | hyp[k] = x[i + 7] 520 | 521 | # Mutate 522 | np.random.seed(int(time.time())) 523 | s = [.2, .2, .2, .2, .2, .2, .2, .0, .02, .2, .2, .2, .2, .2, .2, .2, .2, .2] # sigmas 524 | for i, k in enumerate(hyp.keys()): 525 | x = (np.random.randn(1) * s[i] + 1) ** 2.0 # plt.hist(x.ravel(), 300) 526 | hyp[k] *= float(x) # vary by sigmas 527 | 528 | # Clip to limits 529 | keys = ['lr0', 'iou_t', 'momentum', 'weight_decay', 'hsv_s', 'hsv_v', 'translate', 'scale', 'fl_gamma'] 530 | limits = [(1e-5, 1e-2), (0.00, 0.70), (0.60, 0.98), (0, 0.001), (0, .9), (0, .9), (0, .9), (0, .9), (0, 3)] 531 | for k, v in zip(keys, limits): 532 | hyp[k] = np.clip(hyp[k], v[0], v[1]) 533 | 534 | # Train mutation 535 | prebias() 536 | results = train() 537 | 538 | # Write mutation results 539 | print_mutation(hyp, results, opt.bucket) 540 | 541 | # Plot results 542 | # plot_evolution_results(hyp) 543 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Cydia2018/YOLOv3-RepVGG-backbone/0bd5670acafc0e0e5e86b452ab76c5032c16eb44/utils/__init__.py -------------------------------------------------------------------------------- /utils/adabound.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | from torch.optim import Optimizer 5 | 6 | 7 | class AdaBound(Optimizer): 8 | """Implements AdaBound algorithm. 9 | It has been proposed in `Adaptive Gradient Methods with Dynamic Bound of Learning Rate`_. 10 | Arguments: 11 | params (iterable): iterable of parameters to optimize or dicts defining 12 | parameter groups 13 | lr (float, optional): Adam learning rate (default: 1e-3) 14 | betas (Tuple[float, float], optional): coefficients used for computing 15 | running averages of gradient and its square (default: (0.9, 0.999)) 16 | final_lr (float, optional): final (SGD) learning rate (default: 0.1) 17 | gamma (float, optional): convergence speed of the bound functions (default: 1e-3) 18 | eps (float, optional): term added to the denominator to improve 19 | numerical stability (default: 1e-8) 20 | weight_decay (float, optional): weight decay (L2 penalty) (default: 0) 21 | amsbound (boolean, optional): whether to use the AMSBound variant of this algorithm 22 | .. Adaptive Gradient Methods with Dynamic Bound of Learning Rate: 23 | https://openreview.net/forum?id=Bkg3g2R9FX 24 | """ 25 | 26 | def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), final_lr=0.1, gamma=1e-3, 27 | eps=1e-8, weight_decay=0, amsbound=False): 28 | if not 0.0 <= lr: 29 | raise ValueError("Invalid learning rate: {}".format(lr)) 30 | if not 0.0 <= eps: 31 | raise ValueError("Invalid epsilon value: {}".format(eps)) 32 | if not 0.0 <= betas[0] < 1.0: 33 | raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0])) 34 | if not 0.0 <= betas[1] < 1.0: 35 | raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1])) 36 | if not 0.0 <= final_lr: 37 | raise ValueError("Invalid final learning rate: {}".format(final_lr)) 38 | if not 0.0 <= gamma < 1.0: 39 | raise ValueError("Invalid gamma parameter: {}".format(gamma)) 40 | defaults = dict(lr=lr, betas=betas, final_lr=final_lr, gamma=gamma, eps=eps, 41 | weight_decay=weight_decay, amsbound=amsbound) 42 | super(AdaBound, self).__init__(params, defaults) 43 | 44 | self.base_lrs = list(map(lambda group: group['lr'], self.param_groups)) 45 | 46 | def __setstate__(self, state): 47 | super(AdaBound, self).__setstate__(state) 48 | for group in self.param_groups: 49 | group.setdefault('amsbound', False) 50 | 51 | def step(self, closure=None): 52 | """Performs a single optimization step. 53 | Arguments: 54 | closure (callable, optional): A closure that reevaluates the model 55 | and returns the loss. 56 | """ 57 | loss = None 58 | if closure is not None: 59 | loss = closure() 60 | 61 | for group, base_lr in zip(self.param_groups, self.base_lrs): 62 | for p in group['params']: 63 | if p.grad is None: 64 | continue 65 | grad = p.grad.data 66 | if grad.is_sparse: 67 | raise RuntimeError( 68 | 'Adam does not support sparse gradients, please consider SparseAdam instead') 69 | amsbound = group['amsbound'] 70 | 71 | state = self.state[p] 72 | 73 | # State initialization 74 | if len(state) == 0: 75 | state['step'] = 0 76 | # Exponential moving average of gradient values 77 | state['exp_avg'] = torch.zeros_like(p.data) 78 | # Exponential moving average of squared gradient values 79 | state['exp_avg_sq'] = torch.zeros_like(p.data) 80 | if amsbound: 81 | # Maintains max of all exp. moving avg. of sq. grad. values 82 | state['max_exp_avg_sq'] = torch.zeros_like(p.data) 83 | 84 | exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq'] 85 | if amsbound: 86 | max_exp_avg_sq = state['max_exp_avg_sq'] 87 | beta1, beta2 = group['betas'] 88 | 89 | state['step'] += 1 90 | 91 | if group['weight_decay'] != 0: 92 | grad = grad.add(group['weight_decay'], p.data) 93 | 94 | # Decay the first and second moment running average coefficient 95 | exp_avg.mul_(beta1).add_(1 - beta1, grad) 96 | exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad) 97 | if amsbound: 98 | # Maintains the maximum of all 2nd moment running avg. till now 99 | torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq) 100 | # Use the max. for normalizing running avg. of gradient 101 | denom = max_exp_avg_sq.sqrt().add_(group['eps']) 102 | else: 103 | denom = exp_avg_sq.sqrt().add_(group['eps']) 104 | 105 | bias_correction1 = 1 - beta1 ** state['step'] 106 | bias_correction2 = 1 - beta2 ** state['step'] 107 | step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1 108 | 109 | # Applies bounds on actual learning rate 110 | # lr_scheduler cannot affect final_lr, this is a workaround to apply lr decay 111 | final_lr = group['final_lr'] * group['lr'] / base_lr 112 | lower_bound = final_lr * (1 - 1 / (group['gamma'] * state['step'] + 1)) 113 | upper_bound = final_lr * (1 + 1 / (group['gamma'] * state['step'])) 114 | step_size = torch.full_like(denom, step_size) 115 | step_size.div_(denom).clamp_(lower_bound, upper_bound).mul_(exp_avg) 116 | 117 | p.data.add_(-step_size) 118 | 119 | return loss 120 | 121 | 122 | class AdaBoundW(Optimizer): 123 | """Implements AdaBound algorithm with Decoupled Weight Decay (arxiv.org/abs/1711.05101) 124 | It has been proposed in `Adaptive Gradient Methods with Dynamic Bound of Learning Rate`_. 125 | Arguments: 126 | params (iterable): iterable of parameters to optimize or dicts defining 127 | parameter groups 128 | lr (float, optional): Adam learning rate (default: 1e-3) 129 | betas (Tuple[float, float], optional): coefficients used for computing 130 | running averages of gradient and its square (default: (0.9, 0.999)) 131 | final_lr (float, optional): final (SGD) learning rate (default: 0.1) 132 | gamma (float, optional): convergence speed of the bound functions (default: 1e-3) 133 | eps (float, optional): term added to the denominator to improve 134 | numerical stability (default: 1e-8) 135 | weight_decay (float, optional): weight decay (L2 penalty) (default: 0) 136 | amsbound (boolean, optional): whether to use the AMSBound variant of this algorithm 137 | .. Adaptive Gradient Methods with Dynamic Bound of Learning Rate: 138 | https://openreview.net/forum?id=Bkg3g2R9FX 139 | """ 140 | 141 | def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), final_lr=0.1, gamma=1e-3, 142 | eps=1e-8, weight_decay=0, amsbound=False): 143 | if not 0.0 <= lr: 144 | raise ValueError("Invalid learning rate: {}".format(lr)) 145 | if not 0.0 <= eps: 146 | raise ValueError("Invalid epsilon value: {}".format(eps)) 147 | if not 0.0 <= betas[0] < 1.0: 148 | raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0])) 149 | if not 0.0 <= betas[1] < 1.0: 150 | raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1])) 151 | if not 0.0 <= final_lr: 152 | raise ValueError("Invalid final learning rate: {}".format(final_lr)) 153 | if not 0.0 <= gamma < 1.0: 154 | raise ValueError("Invalid gamma parameter: {}".format(gamma)) 155 | defaults = dict(lr=lr, betas=betas, final_lr=final_lr, gamma=gamma, eps=eps, 156 | weight_decay=weight_decay, amsbound=amsbound) 157 | super(AdaBoundW, self).__init__(params, defaults) 158 | 159 | self.base_lrs = list(map(lambda group: group['lr'], self.param_groups)) 160 | 161 | def __setstate__(self, state): 162 | super(AdaBoundW, self).__setstate__(state) 163 | for group in self.param_groups: 164 | group.setdefault('amsbound', False) 165 | 166 | def step(self, closure=None): 167 | """Performs a single optimization step. 168 | Arguments: 169 | closure (callable, optional): A closure that reevaluates the model 170 | and returns the loss. 171 | """ 172 | loss = None 173 | if closure is not None: 174 | loss = closure() 175 | 176 | for group, base_lr in zip(self.param_groups, self.base_lrs): 177 | for p in group['params']: 178 | if p.grad is None: 179 | continue 180 | grad = p.grad.data 181 | if grad.is_sparse: 182 | raise RuntimeError( 183 | 'Adam does not support sparse gradients, please consider SparseAdam instead') 184 | amsbound = group['amsbound'] 185 | 186 | state = self.state[p] 187 | 188 | # State initialization 189 | if len(state) == 0: 190 | state['step'] = 0 191 | # Exponential moving average of gradient values 192 | state['exp_avg'] = torch.zeros_like(p.data) 193 | # Exponential moving average of squared gradient values 194 | state['exp_avg_sq'] = torch.zeros_like(p.data) 195 | if amsbound: 196 | # Maintains max of all exp. moving avg. of sq. grad. values 197 | state['max_exp_avg_sq'] = torch.zeros_like(p.data) 198 | 199 | exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq'] 200 | if amsbound: 201 | max_exp_avg_sq = state['max_exp_avg_sq'] 202 | beta1, beta2 = group['betas'] 203 | 204 | state['step'] += 1 205 | 206 | # Decay the first and second moment running average coefficient 207 | exp_avg.mul_(beta1).add_(1 - beta1, grad) 208 | exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad) 209 | if amsbound: 210 | # Maintains the maximum of all 2nd moment running avg. till now 211 | torch.max(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq) 212 | # Use the max. for normalizing running avg. of gradient 213 | denom = max_exp_avg_sq.sqrt().add_(group['eps']) 214 | else: 215 | denom = exp_avg_sq.sqrt().add_(group['eps']) 216 | 217 | bias_correction1 = 1 - beta1 ** state['step'] 218 | bias_correction2 = 1 - beta2 ** state['step'] 219 | step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1 220 | 221 | # Applies bounds on actual learning rate 222 | # lr_scheduler cannot affect final_lr, this is a workaround to apply lr decay 223 | final_lr = group['final_lr'] * group['lr'] / base_lr 224 | lower_bound = final_lr * (1 - 1 / (group['gamma'] * state['step'] + 1)) 225 | upper_bound = final_lr * (1 + 1 / (group['gamma'] * state['step'])) 226 | step_size = torch.full_like(denom, step_size) 227 | step_size.div_(denom).clamp_(lower_bound, upper_bound).mul_(exp_avg) 228 | 229 | if group['weight_decay'] != 0: 230 | decayed_weights = torch.mul(p.data, group['weight_decay']) 231 | p.data.add_(-step_size) 232 | p.data.sub_(decayed_weights) 233 | else: 234 | p.data.add_(-step_size) 235 | 236 | return loss 237 | -------------------------------------------------------------------------------- /utils/datasets.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import math 3 | import os 4 | import random 5 | import shutil 6 | import time 7 | from pathlib import Path 8 | from threading import Thread 9 | 10 | import cv2 11 | import numpy as np 12 | import torch 13 | from PIL import Image, ExifTags 14 | from torch.utils.data import Dataset 15 | from tqdm import tqdm 16 | 17 | from utils.utils import xyxy2xywh, xywh2xyxy 18 | 19 | img_formats = ['.bmp', '.jpg', '.jpeg', '.png', '.tif'] 20 | vid_formats = ['.mov', '.avi', '.mp4'] 21 | 22 | # Get orientation exif tag 23 | for orientation in ExifTags.TAGS.keys(): 24 | if ExifTags.TAGS[orientation] == 'Orientation': 25 | break 26 | 27 | 28 | def exif_size(img): 29 | # Returns exif-corrected PIL size 30 | s = img.size # (width, height) 31 | try: 32 | rotation = dict(img._getexif().items())[orientation] 33 | if rotation == 6: # rotation 270 34 | s = (s[1], s[0]) 35 | elif rotation == 8: # rotation 90 36 | s = (s[1], s[0]) 37 | except: 38 | pass 39 | 40 | return s 41 | 42 | 43 | class LoadImages: # for inference 44 | def __init__(self, path, img_size=416, half=False): 45 | path = str(Path(path)) # os-agnostic 46 | files = [] 47 | if os.path.isdir(path): 48 | files = sorted(glob.glob(os.path.join(path, '*.*'))) 49 | elif os.path.isfile(path): 50 | files = [path] 51 | 52 | images = [x for x in files if os.path.splitext(x)[-1].lower() in img_formats] 53 | videos = [x for x in files if os.path.splitext(x)[-1].lower() in vid_formats] 54 | nI, nV = len(images), len(videos) 55 | 56 | self.img_size = img_size 57 | self.files = images + videos 58 | self.nF = nI + nV # number of files 59 | self.video_flag = [False] * nI + [True] * nV 60 | self.mode = 'images' 61 | self.half = half # half precision fp16 images 62 | if any(videos): 63 | self.new_video(videos[0]) # new video 64 | else: 65 | self.cap = None 66 | assert self.nF > 0, 'No images or videos found in ' + path 67 | 68 | def __iter__(self): 69 | self.count = 0 70 | return self 71 | 72 | def __next__(self): 73 | if self.count == self.nF: 74 | raise StopIteration 75 | path = self.files[self.count] 76 | 77 | if self.video_flag[self.count]: 78 | # Read video 79 | self.mode = 'video' 80 | ret_val, img0 = self.cap.read() 81 | if not ret_val: 82 | self.count += 1 83 | self.cap.release() 84 | if self.count == self.nF: # last video 85 | raise StopIteration 86 | else: 87 | path = self.files[self.count] 88 | self.new_video(path) 89 | ret_val, img0 = self.cap.read() 90 | 91 | self.frame += 1 92 | print('video %g/%g (%g/%g) %s: ' % (self.count + 1, self.nF, self.frame, self.nframes, path), end='') 93 | 94 | else: 95 | # Read image 96 | self.count += 1 97 | img0 = cv2.imread(path) # BGR 98 | assert img0 is not None, 'Image Not Found ' + path 99 | print('image %g/%g %s: ' % (self.count, self.nF, path), end='') 100 | 101 | # Padded resize 102 | img = letterbox(img0, new_shape=self.img_size)[0] 103 | 104 | # Normalize RGB 105 | img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB 106 | img = np.ascontiguousarray(img, dtype=np.float16 if self.half else np.float32) # uint8 to fp16/fp32 107 | img /= 255.0 # 0 - 255 to 0.0 - 1.0 108 | 109 | # cv2.imwrite(path + '.letterbox.jpg', 255 * img.transpose((1, 2, 0))[:, :, ::-1]) # save letterbox image 110 | return path, img, img0, self.cap 111 | 112 | def new_video(self, path): 113 | self.frame = 0 114 | self.cap = cv2.VideoCapture(path) 115 | self.nframes = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT)) 116 | 117 | def __len__(self): 118 | return self.nF # number of files 119 | 120 | 121 | class LoadWebcam: # for inference 122 | def __init__(self, pipe=0, img_size=416, half=False): 123 | self.img_size = img_size 124 | self.half = half # half precision fp16 images 125 | 126 | if pipe == '0': 127 | pipe = 0 # local camera 128 | # pipe = 'rtsp://192.168.1.64/1' # IP camera 129 | # pipe = 'rtsp://username:password@192.168.1.64/1' # IP camera with login 130 | # pipe = 'rtsp://170.93.143.139/rtplive/470011e600ef003a004ee33696235daa' # IP traffic camera 131 | # pipe = 'http://wmccpinetop.axiscam.net/mjpg/video.mjpg' # IP golf camera 132 | 133 | # https://answers.opencv.org/question/215996/changing-gstreamer-pipeline-to-opencv-in-pythonsolved/ 134 | # pipe = '"rtspsrc location="rtsp://username:password@192.168.1.64/1" latency=10 ! appsink' # GStreamer 135 | 136 | # https://answers.opencv.org/question/200787/video-acceleration-gstremer-pipeline-in-videocapture/ 137 | # https://stackoverflow.com/questions/54095699/install-gstreamer-support-for-opencv-python-package # install help 138 | # pipe = "rtspsrc location=rtsp://root:root@192.168.0.91:554/axis-media/media.amp?videocodec=h264&resolution=3840x2160 protocols=GST_RTSP_LOWER_TRANS_TCP ! rtph264depay ! queue ! vaapih264dec ! videoconvert ! appsink" # GStreamer 139 | 140 | self.pipe = pipe 141 | self.cap = cv2.VideoCapture(pipe) # video capture object 142 | self.cap.set(cv2.CAP_PROP_BUFFERSIZE, 3) # set buffer size 143 | 144 | def __iter__(self): 145 | self.count = -1 146 | return self 147 | 148 | def __next__(self): 149 | self.count += 1 150 | if cv2.waitKey(1) == ord('q'): # q to quit 151 | self.cap.release() 152 | cv2.destroyAllWindows() 153 | raise StopIteration 154 | 155 | # Read frame 156 | if self.pipe == 0: # local camera 157 | ret_val, img0 = self.cap.read() 158 | img0 = cv2.flip(img0, 1) # flip left-right 159 | else: # IP camera 160 | n = 0 161 | while True: 162 | n += 1 163 | self.cap.grab() 164 | if n % 30 == 0: # skip frames 165 | ret_val, img0 = self.cap.retrieve() 166 | if ret_val: 167 | break 168 | 169 | # Print 170 | assert ret_val, 'Camera Error %s' % self.pipe 171 | img_path = 'webcam.jpg' 172 | print('webcam %g: ' % self.count, end='') 173 | 174 | # Padded resize 175 | img = letterbox(img0, new_shape=self.img_size)[0] 176 | 177 | # Normalize RGB 178 | img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB 179 | img = np.ascontiguousarray(img, dtype=np.float16 if self.half else np.float32) # uint8 to fp16/fp32 180 | img /= 255.0 # 0 - 255 to 0.0 - 1.0 181 | 182 | return img_path, img, img0, None 183 | 184 | def __len__(self): 185 | return 0 186 | 187 | 188 | class LoadStreams: # multiple IP or RTSP cameras 189 | def __init__(self, sources='streams.txt', img_size=416, half=False): 190 | self.mode = 'images' 191 | self.img_size = img_size 192 | self.half = half # half precision fp16 images 193 | 194 | if os.path.isfile(sources): 195 | with open(sources, 'r') as f: 196 | sources = [x.strip() for x in f.read().splitlines() if len(x.strip())] 197 | else: 198 | sources = [sources] 199 | 200 | n = len(sources) 201 | self.imgs = [None] * n 202 | self.sources = sources 203 | for i, s in enumerate(sources): 204 | # Start the thread to read frames from the video stream 205 | print('%g/%g: %s... ' % (i + 1, n, s), end='') 206 | cap = cv2.VideoCapture(0 if s == '0' else s) 207 | assert cap.isOpened(), 'Failed to open %s' % s 208 | w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) 209 | h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) 210 | fps = cap.get(cv2.CAP_PROP_FPS) % 100 211 | _, self.imgs[i] = cap.read() # guarantee first frame 212 | thread = Thread(target=self.update, args=([i, cap]), daemon=True) 213 | print(' success (%gx%g at %.2f FPS).' % (w, h, fps)) 214 | thread.start() 215 | print('') # newline 216 | 217 | def update(self, index, cap): 218 | # Read next stream frame in a daemon thread 219 | n = 0 220 | while cap.isOpened(): 221 | n += 1 222 | # _, self.imgs[index] = cap.read() 223 | cap.grab() 224 | if n == 4: # read every 4th frame 225 | _, self.imgs[index] = cap.retrieve() 226 | n = 0 227 | time.sleep(0.01) # wait time 228 | 229 | def __iter__(self): 230 | self.count = -1 231 | return self 232 | 233 | def __next__(self): 234 | self.count += 1 235 | img0 = self.imgs.copy() 236 | if cv2.waitKey(1) == ord('q'): # q to quit 237 | cv2.destroyAllWindows() 238 | raise StopIteration 239 | 240 | # Letterbox 241 | img = [letterbox(x, new_shape=self.img_size, interp=cv2.INTER_LINEAR)[0] for x in img0] 242 | 243 | # Stack 244 | img = np.stack(img, 0) 245 | 246 | # Normalize RGB 247 | img = img[:, :, :, ::-1].transpose(0, 3, 1, 2) # BGR to RGB 248 | img = np.ascontiguousarray(img, dtype=np.float16 if self.half else np.float32) # uint8 to fp16/fp32 249 | img /= 255.0 # 0 - 255 to 0.0 - 1.0 250 | 251 | return self.sources, img, img0, None 252 | 253 | def __len__(self): 254 | return 0 # 1E12 frames = 32 streams at 30 FPS for 30 years 255 | 256 | 257 | class LoadImagesAndLabels(Dataset): # for training/testing 258 | def __init__(self, path, img_size=416, batch_size=16, augment=False, hyp=None, rect=True, image_weights=False, 259 | cache_labels=False, cache_images=False): 260 | path = str(Path(path)) # os-agnostic 261 | with open(path, 'r') as f: 262 | self.img_files = [x.replace('/', os.sep) for x in f.read().splitlines() # os-agnostic 263 | if os.path.splitext(x)[-1].lower() in img_formats] 264 | 265 | n = len(self.img_files) 266 | bi = np.floor(np.arange(n) / batch_size).astype(np.int) # batch index 267 | nb = bi[-1] + 1 # number of batches 268 | assert n > 0, 'No images found in %s' % path 269 | 270 | self.n = n 271 | self.batch = bi # batch index of image 272 | self.img_size = img_size 273 | self.augment = augment 274 | self.hyp = hyp 275 | self.image_weights = image_weights 276 | self.rect = False if image_weights else rect 277 | 278 | # Define labels 279 | self.label_files = [x.replace('images', 'labels').replace(os.path.splitext(x)[-1], '.txt') 280 | for x in self.img_files] 281 | 282 | # Rectangular Training https://github.com/ultralytics/yolov3/issues/232 283 | if self.rect: 284 | # Read image shapes 285 | sp = 'data' + os.sep + path.replace('.txt', '.shapes').split(os.sep)[-1] # shapefile path 286 | try: 287 | with open(sp, 'r') as f: # read existing shapefile 288 | s = [x.split() for x in f.read().splitlines()] 289 | assert len(s) == n, 'Shapefile out of sync' 290 | except: 291 | s = [exif_size(Image.open(f)) for f in tqdm(self.img_files, desc='Reading image shapes')] 292 | np.savetxt(sp, s, fmt='%g') # overwrites existing (if any) 293 | 294 | # Sort by aspect ratio 295 | s = np.array(s, dtype=np.float64) 296 | ar = s[:, 1] / s[:, 0] # aspect ratio 297 | i = ar.argsort() 298 | self.img_files = [self.img_files[i] for i in i] 299 | self.label_files = [self.label_files[i] for i in i] 300 | self.shapes = s[i] 301 | ar = ar[i] 302 | 303 | # Set training image shapes 304 | shapes = [[1, 1]] * nb 305 | for i in range(nb): 306 | ari = ar[bi == i] 307 | mini, maxi = ari.min(), ari.max() 308 | if maxi < 1: 309 | shapes[i] = [maxi, 1] 310 | elif mini > 1: 311 | shapes[i] = [1, 1 / mini] 312 | 313 | self.batch_shapes = np.ceil(np.array(shapes) * img_size / 32.).astype(np.int) * 32 314 | 315 | # Preload labels (required for weighted CE training) 316 | self.imgs = [None] * n 317 | self.labels = [None] * n 318 | if cache_labels or image_weights: # cache labels for faster training 319 | self.labels = [np.zeros((0, 5))] * n 320 | extract_bounding_boxes = False 321 | create_datasubset = False 322 | pbar = tqdm(self.label_files, desc='Reading labels') 323 | nm, nf, ne, ns = 0, 0, 0, 0 # number missing, number found, number empty, number datasubset 324 | for i, file in enumerate(pbar): 325 | try: 326 | with open(file, 'r') as f: 327 | l = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32) 328 | except: 329 | nm += 1 # print('missing labels for image %s' % self.img_files[i]) # file missing 330 | continue 331 | 332 | if l.shape[0]: 333 | assert l.shape[1] == 5, '> 5 label columns: %s' % file 334 | assert (l >= 0).all(), 'negative labels: %s' % file 335 | assert (l[:, 1:] <= 1).all(), 'non-normalized or out of bounds coordinate labels: %s' % file 336 | self.labels[i] = l 337 | nf += 1 # file found 338 | 339 | # Create subdataset (a smaller dataset) 340 | if create_datasubset and ns < 1E4: 341 | if ns == 0: 342 | create_folder(path='./datasubset') 343 | os.makedirs('./datasubset/images') 344 | exclude_classes = 43 345 | if exclude_classes not in l[:, 0]: 346 | ns += 1 347 | # shutil.copy(src=self.img_files[i], dst='./datasubset/images/') # copy image 348 | with open('./datasubset/images.txt', 'a') as f: 349 | f.write(self.img_files[i] + '\n') 350 | 351 | # Extract object detection boxes for a second stage classifier 352 | if extract_bounding_boxes: 353 | p = Path(self.img_files[i]) 354 | img = cv2.imread(str(p)) 355 | h, w, _ = img.shape 356 | for j, x in enumerate(l): 357 | f = '%s%sclassifier%s%g_%g_%s' % (p.parent.parent, os.sep, os.sep, x[0], j, p.name) 358 | if not os.path.exists(Path(f).parent): 359 | os.makedirs(Path(f).parent) # make new output folder 360 | 361 | b = x[1:] * np.array([w, h, w, h]) # box 362 | b[2:] = b[2:].max() # rectangle to square 363 | b[2:] = b[2:] * 1.3 + 30 # pad 364 | b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int) 365 | 366 | b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image 367 | b[[1, 3]] = np.clip(b[[1, 3]], 0, h) 368 | assert cv2.imwrite(f, img[b[1]:b[3], b[0]:b[2]]), 'Failure extracting classifier boxes' 369 | else: 370 | ne += 1 # file empty 371 | 372 | pbar.desc = 'Reading labels (%g found, %g missing, %g empty for %g images)' % (nf, nm, ne, n) 373 | assert nf > 0, 'No labels found. Recommend correcting image and label paths.' 374 | 375 | # Cache images into memory for faster training (~5GB) 376 | if cache_images and augment: # if training 377 | for i in tqdm(range(min(len(self.img_files), 10000)), desc='Reading images'): # max 10k images 378 | img_path = self.img_files[i] 379 | img = cv2.imread(img_path) # BGR 380 | assert img is not None, 'Image Not Found ' + img_path 381 | r = self.img_size / max(img.shape) # size ratio 382 | if self.augment and r < 1: # if training (NOT testing), downsize to inference shape 383 | h, w, _ = img.shape 384 | img = cv2.resize(img, (int(w * r), int(h * r)), interpolation=cv2.INTER_LINEAR) # or INTER_AREA 385 | self.imgs[i] = img 386 | 387 | # Detect corrupted images https://medium.com/joelthchao/programmatically-detect-corrupted-image-8c1b2006c3d3 388 | detect_corrupted_images = False 389 | if detect_corrupted_images: 390 | from skimage import io # conda install -c conda-forge scikit-image 391 | for file in tqdm(self.img_files, desc='Detecting corrupted images'): 392 | try: 393 | _ = io.imread(file) 394 | except: 395 | print('Corrupted image detected: %s' % file) 396 | 397 | def __len__(self): 398 | return len(self.img_files) 399 | 400 | # def __iter__(self): 401 | # self.count = -1 402 | # print('ran dataset iter') 403 | # #self.shuffled_vector = np.random.permutation(self.nF) if self.augment else np.arange(self.nF) 404 | # return self 405 | 406 | def __getitem__(self, index): 407 | if self.image_weights: 408 | index = self.indices[index] 409 | 410 | img_path = self.img_files[index] 411 | label_path = self.label_files[index] 412 | 413 | mosaic = True and self.augment # load 4 images at a time into a mosaic (only during training) 414 | if mosaic: 415 | # Load mosaic 416 | img, labels = load_mosaic(self, index) 417 | h, w, _ = img.shape 418 | 419 | else: 420 | # Load image 421 | img = load_image(self, index) 422 | 423 | # Letterbox 424 | h, w, _ = img.shape 425 | if self.rect: 426 | img, ratio, padw, padh = letterbox(img, self.batch_shapes[self.batch[index]], mode='rect') 427 | else: 428 | img, ratio, padw, padh = letterbox(img, self.img_size, mode='square') 429 | 430 | # Load labels 431 | labels = [] 432 | if os.path.isfile(label_path): 433 | x = self.labels[index] 434 | if x is None: # labels not preloaded 435 | with open(label_path, 'r') as f: 436 | x = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32) 437 | 438 | if x.size > 0: 439 | # Normalized xywh to pixel xyxy format 440 | labels = x.copy() 441 | labels[:, 1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + padw 442 | labels[:, 2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + padh 443 | labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + padw 444 | labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + padh 445 | 446 | if self.augment: 447 | # Augment colorspace 448 | augment_hsv(img, hgain=self.hyp['hsv_h'], sgain=self.hyp['hsv_s'], vgain=self.hyp['hsv_v']) 449 | 450 | # Augment imagespace 451 | g = 0.0 if mosaic else 1.0 # do not augment mosaics 452 | hyp = self.hyp 453 | img, labels = random_affine(img, labels, 454 | degrees=hyp['degrees'] * g, 455 | translate=hyp['translate'] * g, 456 | scale=hyp['scale'] * g, 457 | shear=hyp['shear'] * g) 458 | 459 | # Apply cutouts 460 | # if random.random() < 0.9: 461 | # labels = cutout(img, labels) 462 | 463 | nL = len(labels) # number of labels 464 | if nL: 465 | # convert xyxy to xywh 466 | labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) 467 | 468 | # Normalize coordinates 0 - 1 469 | labels[:, [2, 4]] /= img.shape[0] # height 470 | labels[:, [1, 3]] /= img.shape[1] # width 471 | 472 | if self.augment: 473 | # random left-right flip 474 | lr_flip = True 475 | if lr_flip and random.random() < 0.5: 476 | img = np.fliplr(img) 477 | if nL: 478 | labels[:, 1] = 1 - labels[:, 1] 479 | 480 | # random up-down flip 481 | ud_flip = False 482 | if ud_flip and random.random() < 0.5: 483 | img = np.flipud(img) 484 | if nL: 485 | labels[:, 2] = 1 - labels[:, 2] 486 | 487 | labels_out = torch.zeros((nL, 6)) 488 | if nL: 489 | labels_out[:, 1:] = torch.from_numpy(labels) 490 | 491 | # Normalize 492 | img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 493 | img = np.ascontiguousarray(img, dtype=np.float32) # uint8 to float32 494 | img /= 255.0 # 0 - 255 to 0.0 - 1.0 495 | 496 | return torch.from_numpy(img), labels_out, img_path, (h, w) 497 | 498 | @staticmethod 499 | def collate_fn(batch): 500 | img, label, path, hw = list(zip(*batch)) # transposed 501 | for i, l in enumerate(label): 502 | l[:, 0] = i # add target image index for build_targets() 503 | return torch.stack(img, 0), torch.cat(label, 0), path, hw 504 | 505 | 506 | def load_image(self, index): 507 | # loads 1 image from dataset 508 | img = self.imgs[index] 509 | if img is None: 510 | img_path = self.img_files[index] 511 | img = cv2.imread(img_path) # BGR 512 | assert img is not None, 'Image Not Found ' + img_path 513 | r = self.img_size / max(img.shape) # size ratio 514 | if self.augment and r < 1.0: # if training (NOT testing), downsize to inference shape 515 | h, w, _ = img.shape 516 | img = cv2.resize(img, (int(w * r), int(h * r)), interpolation=cv2.INTER_LINEAR) # _LINEAR fastest 517 | return img 518 | 519 | 520 | def augment_hsv(img, hgain=0.5, sgain=0.5, vgain=0.5): 521 | x = (np.random.uniform(-1, 1, 3) * np.array([hgain, sgain, vgain]) + 1).astype(np.float32) # random gains 522 | img_hsv = (cv2.cvtColor(img, cv2.COLOR_BGR2HSV) * x.reshape((1, 1, 3))).clip(None, 255).astype(np.uint8) 523 | cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) # no return needed 524 | 525 | 526 | # def augment_hsv(img, hgain=0.5, sgain=0.5, vgain=0.5): # original version 527 | # # SV augmentation by 50% 528 | # img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) # hue, sat, val 529 | # 530 | # S = img_hsv[:, :, 1].astype(np.float32) # saturation 531 | # V = img_hsv[:, :, 2].astype(np.float32) # value 532 | # 533 | # a = random.uniform(-1, 1) * sgain + 1 534 | # b = random.uniform(-1, 1) * vgain + 1 535 | # S *= a 536 | # V *= b 537 | # 538 | # img_hsv[:, :, 1] = S if a < 1 else S.clip(None, 255) 539 | # img_hsv[:, :, 2] = V if b < 1 else V.clip(None, 255) 540 | # cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) # no return needed 541 | 542 | 543 | def load_mosaic(self, index): 544 | # loads images in a mosaic 545 | 546 | labels4 = [] 547 | s = self.img_size 548 | xc, yc = [int(random.uniform(s * 0.5, s * 1.5)) for _ in range(2)] # mosaic center x, y 549 | img4 = np.zeros((s * 2, s * 2, 3), dtype=np.uint8) + 128 # base image with 4 tiles 550 | indices = [index] + [random.randint(0, len(self.labels) - 1) for _ in range(3)] # 3 additional image indices 551 | for i, index in enumerate(indices): 552 | # Load image 553 | img = load_image(self, index) 554 | h, w, _ = img.shape 555 | 556 | # place img in img4 557 | if i == 0: # top left 558 | x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image) 559 | x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image) 560 | elif i == 1: # top right 561 | x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc 562 | x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h 563 | elif i == 2: # bottom left 564 | x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h) 565 | x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, max(xc, w), min(y2a - y1a, h) 566 | elif i == 3: # bottom right 567 | x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h) 568 | x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h) 569 | 570 | img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax] 571 | padw = x1a - x1b 572 | padh = y1a - y1b 573 | 574 | # Load labels 575 | label_path = self.label_files[index] 576 | if os.path.isfile(label_path): 577 | x = self.labels[index] 578 | if x is None: # labels not preloaded 579 | with open(label_path, 'r') as f: 580 | x = np.array([x.split() for x in f.read().splitlines()], dtype=np.float32) 581 | 582 | if x.size > 0: 583 | # Normalized xywh to pixel xyxy format 584 | labels = x.copy() 585 | labels[:, 1] = w * (x[:, 1] - x[:, 3] / 2) + padw 586 | labels[:, 2] = h * (x[:, 2] - x[:, 4] / 2) + padh 587 | labels[:, 3] = w * (x[:, 1] + x[:, 3] / 2) + padw 588 | labels[:, 4] = h * (x[:, 2] + x[:, 4] / 2) + padh 589 | 590 | labels4.append(labels) 591 | if len(labels4): 592 | labels4 = np.concatenate(labels4, 0) 593 | 594 | # hyp = self.hyp 595 | # img4, labels4 = random_affine(img4, labels4, 596 | # degrees=hyp['degrees'], 597 | # translate=hyp['translate'], 598 | # scale=hyp['scale'], 599 | # shear=hyp['shear']) 600 | 601 | # Center crop 602 | a = s // 2 603 | img4 = img4[a:a + s, a:a + s] 604 | if len(labels4): 605 | labels4[:, 1:] -= a 606 | 607 | return img4, labels4 608 | 609 | 610 | def letterbox(img, new_shape=416, color=(128, 128, 128), mode='auto', interp=cv2.INTER_AREA): 611 | # Resize a rectangular image to a 32 pixel multiple rectangle 612 | # https://github.com/ultralytics/yolov3/issues/232 613 | shape = img.shape[:2] # current shape [height, width] 614 | 615 | if isinstance(new_shape, int): 616 | r = float(new_shape) / max(shape) # ratio = new / old 617 | else: 618 | r = max(new_shape) / max(shape) 619 | ratio = r, r # width, height ratios 620 | new_unpad = (int(round(shape[1] * r)), int(round(shape[0] * r))) 621 | 622 | # Compute padding https://github.com/ultralytics/yolov3/issues/232 623 | if mode is 'auto': # minimum rectangle 624 | dw = np.mod(new_shape - new_unpad[0], 32) / 2 # width padding 625 | dh = np.mod(new_shape - new_unpad[1], 32) / 2 # height padding 626 | elif mode is 'square': # square 627 | dw = (new_shape - new_unpad[0]) / 2 # width padding 628 | dh = (new_shape - new_unpad[1]) / 2 # height padding 629 | elif mode is 'rect': # square 630 | dw = (new_shape[1] - new_unpad[0]) / 2 # width padding 631 | dh = (new_shape[0] - new_unpad[1]) / 2 # height padding 632 | elif mode is 'scaleFill': 633 | dw, dh = 0.0, 0.0 634 | new_unpad = (new_shape, new_shape) 635 | ratio = new_shape / shape[1], new_shape / shape[0] # width, height ratios 636 | 637 | if shape[::-1] != new_unpad: # resize 638 | img = cv2.resize(img, new_unpad, interpolation=interp) # INTER_AREA is better, INTER_LINEAR is faster 639 | top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) 640 | left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) 641 | img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border 642 | return img, ratio, dw, dh 643 | 644 | 645 | def random_affine(img, targets=(), degrees=10, translate=.1, scale=.1, shear=10): 646 | # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10)) 647 | # https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4 648 | 649 | if targets is None: # targets = [cls, xyxy] 650 | targets = [] 651 | border = 0 # width of added border (optional) 652 | height = img.shape[0] + border * 2 653 | width = img.shape[1] + border * 2 654 | 655 | # Rotation and Scale 656 | R = np.eye(3) 657 | a = random.uniform(-degrees, degrees) 658 | # a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations 659 | s = random.uniform(1 - scale, 1 + scale) 660 | R[:2] = cv2.getRotationMatrix2D(angle=a, center=(img.shape[1] / 2, img.shape[0] / 2), scale=s) 661 | 662 | # Translation 663 | T = np.eye(3) 664 | T[0, 2] = random.uniform(-translate, translate) * img.shape[0] + border # x translation (pixels) 665 | T[1, 2] = random.uniform(-translate, translate) * img.shape[1] + border # y translation (pixels) 666 | 667 | # Shear 668 | S = np.eye(3) 669 | S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg) 670 | S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg) 671 | 672 | # Combined rotation matrix 673 | M = S @ T @ R # ORDER IS IMPORTANT HERE!! 674 | changed = (border != 0) or (M != np.eye(3)).any() 675 | if changed: 676 | img = cv2.warpAffine(img, M[:2], dsize=(width, height), flags=cv2.INTER_AREA, borderValue=(128, 128, 128)) 677 | 678 | # Transform label coordinates 679 | n = len(targets) 680 | if n: 681 | # warp points 682 | xy = np.ones((n * 4, 3)) 683 | xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1 684 | xy = (xy @ M.T)[:, :2].reshape(n, 8) 685 | 686 | # create new boxes 687 | x = xy[:, [0, 2, 4, 6]] 688 | y = xy[:, [1, 3, 5, 7]] 689 | xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T 690 | 691 | # # apply angle-based reduction of bounding boxes 692 | # radians = a * math.pi / 180 693 | # reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5 694 | # x = (xy[:, 2] + xy[:, 0]) / 2 695 | # y = (xy[:, 3] + xy[:, 1]) / 2 696 | # w = (xy[:, 2] - xy[:, 0]) * reduction 697 | # h = (xy[:, 3] - xy[:, 1]) * reduction 698 | # xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T 699 | 700 | # reject warped points outside of image 701 | xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width) 702 | xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height) 703 | w = xy[:, 2] - xy[:, 0] 704 | h = xy[:, 3] - xy[:, 1] 705 | area = w * h 706 | area0 = (targets[:, 3] - targets[:, 1]) * (targets[:, 4] - targets[:, 2]) 707 | ar = np.maximum(w / (h + 1e-16), h / (w + 1e-16)) 708 | i = (w > 4) & (h > 4) & (area / (area0 + 1e-16) > 0.1) & (ar < 10) 709 | 710 | targets = targets[i] 711 | targets[:, 1:5] = xy[i] 712 | 713 | return img, targets 714 | 715 | 716 | def cutout(image, labels): 717 | # https://arxiv.org/abs/1708.04552 718 | # https://github.com/hysts/pytorch_cutout/blob/master/dataloader.py 719 | # https://towardsdatascience.com/when-conventional-wisdom-fails-revisiting-data-augmentation-for-self-driving-cars-4831998c5509 720 | h, w = image.shape[:2] 721 | 722 | def bbox_ioa(box1, box2, x1y1x2y2=True): 723 | # Returns the intersection over box2 area given box1, box2. box1 is 4, box2 is nx4. boxes are x1y1x2y2 724 | box2 = box2.transpose() 725 | 726 | # Get the coordinates of bounding boxes 727 | b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3] 728 | b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3] 729 | 730 | # Intersection area 731 | inter_area = (np.minimum(b1_x2, b2_x2) - np.maximum(b1_x1, b2_x1)).clip(0) * \ 732 | (np.minimum(b1_y2, b2_y2) - np.maximum(b1_y1, b2_y1)).clip(0) 733 | 734 | # box2 area 735 | box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + 1e-16 736 | 737 | # Intersection over box2 area 738 | return inter_area / box2_area 739 | 740 | # create random masks 741 | scales = [0.5] * 1 # + [0.25] * 4 + [0.125] * 16 + [0.0625] * 64 + [0.03125] * 256 # image size fraction 742 | for s in scales: 743 | mask_h = random.randint(1, int(h * s)) 744 | mask_w = random.randint(1, int(w * s)) 745 | 746 | # box 747 | xmin = max(0, random.randint(0, w) - mask_w // 2) 748 | ymin = max(0, random.randint(0, h) - mask_h // 2) 749 | xmax = min(w, xmin + mask_w) 750 | ymax = min(h, ymin + mask_h) 751 | 752 | # apply random color mask 753 | mask_color = [random.randint(0, 255) for _ in range(3)] 754 | image[ymin:ymax, xmin:xmax] = mask_color 755 | 756 | # return unobscured labels 757 | if len(labels) and s > 0.03: 758 | box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32) 759 | ioa = bbox_ioa(box, labels[:, 1:5]) # intersection over area 760 | labels = labels[ioa < 0.90] # remove >90% obscured labels 761 | 762 | return labels 763 | 764 | 765 | def convert_images2bmp(): 766 | # cv2.imread() jpg at 230 img/s, *.bmp at 400 img/s 767 | for path in ['../coco/images/val2014/', '../coco/images/train2014/']: 768 | folder = os.sep + Path(path).name 769 | output = path.replace(folder, folder + 'bmp') 770 | if os.path.exists(output): 771 | shutil.rmtree(output) # delete output folder 772 | os.makedirs(output) # make new output folder 773 | 774 | for f in tqdm(glob.glob('%s*.jpg' % path)): 775 | save_name = f.replace('.jpg', '.bmp').replace(folder, folder + 'bmp') 776 | cv2.imwrite(save_name, cv2.imread(f)) 777 | 778 | for label_path in ['../coco/trainvalno5k.txt', '../coco/5k.txt']: 779 | with open(label_path, 'r') as file: 780 | lines = file.read() 781 | lines = lines.replace('2014/', '2014bmp/').replace('.jpg', '.bmp').replace( 782 | '/Users/glennjocher/PycharmProjects/', '../') 783 | with open(label_path.replace('5k', '5k_bmp'), 'w') as file: 784 | file.write(lines) 785 | 786 | 787 | def create_folder(path='./new_folder'): 788 | # Create folder 789 | if os.path.exists(path): 790 | shutil.rmtree(path) # delete output folder 791 | os.makedirs(path) # make new output folder 792 | -------------------------------------------------------------------------------- /utils/gcp.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # New VM 4 | rm -rf sample_data yolov3 darknet apex coco cocoapi knife knifec 5 | git clone https://github.com/ultralytics/yolov3 6 | # git clone https://github.com/AlexeyAB/darknet && cd darknet && make GPU=1 CUDNN=1 CUDNN_HALF=1 OPENCV=0 && wget -c https://pjreddie.com/media/files/darknet53.conv.74 && cd .. 7 | git clone https://github.com/NVIDIA/apex && cd apex && pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" . --user && cd .. && rm -rf apex 8 | # git clone https://github.com/cocodataset/cocoapi && cd cocoapi/PythonAPI && make && cd ../.. && cp -r cocoapi/PythonAPI/pycocotools yolov3 9 | sudo conda install -y -c conda-forge scikit-image tensorboard pycocotools 10 | python3 -c " 11 | from yolov3.utils.google_utils import gdrive_download 12 | gdrive_download('1HaXkef9z6y5l4vUnCYgdmEAj61c6bfWO','coco.zip')" 13 | sudo shutdown 14 | 15 | # Re-clone 16 | rm -rf yolov3 # Warning: remove existing 17 | git clone https://github.com/ultralytics/yolov3 && cd yolov3 # master 18 | # git clone -b test --depth 1 https://github.com/ultralytics/yolov3 test # branch 19 | python3 train.py --img-size 320 --weights weights/darknet53.conv.74 --epochs 27 --batch-size 64 --accumulate 1 20 | 21 | # Train 22 | python3 train.py 23 | 24 | # Resume 25 | python3 train.py --resume 26 | 27 | # Detect 28 | python3 detect.py 29 | 30 | # Test 31 | python3 test.py --save-json 32 | 33 | # Evolve 34 | for i in {0..500} 35 | do 36 | python3 train.py --data data/coco.data --img-size 320 --epochs 1 --batch-size 64 --accumulate 1 --evolve --bucket yolov4 37 | done 38 | 39 | # Git pull 40 | git pull https://github.com/ultralytics/yolov3 # master 41 | git pull https://github.com/ultralytics/yolov3 test # branch 42 | 43 | # Test Darknet training 44 | python3 test.py --weights ../darknet/backup/yolov3.backup 45 | 46 | # Copy last.pt TO bucket 47 | gsutil cp yolov3/weights/last1gpu.pt gs://ultralytics 48 | 49 | # Copy last.pt FROM bucket 50 | gsutil cp gs://ultralytics/last.pt yolov3/weights/last.pt 51 | wget https://storage.googleapis.com/ultralytics/yolov3/last_v1_0.pt -O weights/last_v1_0.pt 52 | wget https://storage.googleapis.com/ultralytics/yolov3/best_v1_0.pt -O weights/best_v1_0.pt 53 | 54 | # Reproduce tutorials 55 | rm results*.txt # WARNING: removes existing results 56 | python3 train.py --nosave --data data/coco_1img.data && mv results.txt results0r_1img.txt 57 | python3 train.py --nosave --data data/coco_10img.data && mv results.txt results0r_10img.txt 58 | python3 train.py --nosave --data data/coco_100img.data && mv results.txt results0r_100img.txt 59 | # python3 train.py --nosave --data data/coco_100img.data --transfer && mv results.txt results3_100imgTL.txt 60 | python3 -c "from utils import utils; utils.plot_results()" 61 | # gsutil cp results*.txt gs://ultralytics 62 | gsutil cp results.png gs://ultralytics 63 | sudo shutdown 64 | 65 | # Reproduce mAP 66 | python3 test.py --save-json --img-size 608 67 | python3 test.py --save-json --img-size 416 68 | python3 test.py --save-json --img-size 320 69 | sudo shutdown 70 | 71 | # Benchmark script 72 | git clone https://github.com/ultralytics/yolov3 # clone our repo 73 | git clone https://github.com/NVIDIA/apex && cd apex && pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" . --user && cd .. && rm -rf apex # install nvidia apex 74 | python3 -c "from yolov3.utils.google_utils import gdrive_download; gdrive_download('1HaXkef9z6y5l4vUnCYgdmEAj61c6bfWO','coco.zip')" # download coco dataset (20GB) 75 | cd yolov3 && clear && python3 train.py --epochs 1 # run benchmark (~30 min) 76 | 77 | # Unit tests 78 | python3 detect.py # detect 2 persons, 1 tie 79 | python3 test.py --data data/coco_32img.data # test mAP = 0.8 80 | python3 train.py --data data/coco_32img.data --epochs 5 --nosave # train 5 epochs 81 | python3 train.py --data data/coco_1cls.data --epochs 5 --nosave # train 5 epochs 82 | python3 train.py --data data/coco_1img.data --epochs 5 --nosave # train 5 epochs 83 | 84 | # AlexyAB Darknet 85 | gsutil cp -r gs://sm6/supermarket2 . # dataset from bucket 86 | rm -rf darknet && git clone https://github.com/AlexeyAB/darknet && cd darknet && wget -c https://pjreddie.com/media/files/darknet53.conv.74 # sudo apt install libopencv-dev && make 87 | ./darknet detector calc_anchors data/coco_img64.data -num_of_clusters 9 -width 320 -height 320 # kmeans anchor calculation 88 | ./darknet detector train ../supermarket2/supermarket2.data ../yolo_v3_spp_pan_scale.cfg darknet53.conv.74 -map -dont_show # train spp 89 | ./darknet detector train ../yolov3/data/coco.data ../yolov3-spp.cfg darknet53.conv.74 -map -dont_show # train spp coco 90 | 91 | ./darknet detector train data/coco.data ../yolov3-spp.cfg darknet53.conv.74 -map -dont_show # train spp 92 | gsutil cp -r backup/*5000.weights gs://sm6/weights 93 | sudo shutdown 94 | 95 | 96 | ./darknet detector train ../supermarket2/supermarket2.data ../yolov3-tiny-sm2-1cls.cfg yolov3-tiny.conv.15 -map -dont_show # train tiny 97 | ./darknet detector train ../supermarket2/supermarket2.data cfg/yolov3-spp-sm2-1cls.cfg backup/yolov3-spp-sm2-1cls_last.weights # resume 98 | python3 train.py --data ../supermarket2/supermarket2.data --cfg ../yolov3-spp-sm2-1cls.cfg --epochs 100 --num-workers 8 --img-size 320 --nosave # train ultralytics 99 | python3 test.py --data ../supermarket2/supermarket2.data --weights ../darknet/backup/yolov3-spp-sm2-1cls_5000.weights --cfg cfg/yolov3-spp-sm2-1cls.cfg # test 100 | gsutil cp -r backup/*.weights gs://sm6/weights # weights to bucket 101 | 102 | python3 test.py --data ../supermarket2/supermarket2.data --weights weights/yolov3-spp-sm2-1cls_5000.weights --cfg ../yolov3-spp-sm2-1cls.cfg --img-size 320 --conf-thres 0.2 # test 103 | python3 test.py --data ../supermarket2/supermarket2.data --weights weights/yolov3-spp-sm2-1cls-scalexy_125_5000.weights --cfg ../yolov3-spp-sm2-1cls-scalexy_125.cfg --img-size 320 --conf-thres 0.2 # test 104 | python3 test.py --data ../supermarket2/supermarket2.data --weights weights/yolov3-spp-sm2-1cls-scalexy_150_5000.weights --cfg ../yolov3-spp-sm2-1cls-scalexy_150.cfg --img-size 320 --conf-thres 0.2 # test 105 | python3 test.py --data ../supermarket2/supermarket2.data --weights weights/yolov3-spp-sm2-1cls-scalexy_200_5000.weights --cfg ../yolov3-spp-sm2-1cls-scalexy_200.cfg --img-size 320 --conf-thres 0.2 # test 106 | python3 test.py --data ../supermarket2/supermarket2.data --weights ../darknet/backup/yolov3-spp-sm2-1cls-scalexy_variable_5000.weights --cfg ../yolov3-spp-sm2-1cls-scalexy_variable.cfg --img-size 320 --conf-thres 0.2 # test 107 | 108 | python3 train.py --img-size 320 --epochs 27 --batch-size 64 --accumulate 1 --nosave --notest && python3 test.py --weights weights/last.pt --img-size 320 --save-json && sudo shutdown 109 | 110 | # Debug/Development 111 | python3 train.py --data data/coco.data --img-size 320 --single-scale --batch-size 64 --accumulate 1 --epochs 1 --evolve --giou 112 | python3 test.py --weights weights/last.pt --cfg cfg/yolov3-spp.cfg --img-size 320 113 | 114 | gsutil cp evolve.txt gs://ultralytics 115 | sudo shutdown 116 | 117 | #Docker 118 | sudo docker kill $(sudo docker ps -q) 119 | sudo docker pull ultralytics/yolov3:v1 120 | sudo nvidia-docker run -it --ipc=host --mount type=bind,source="$(pwd)"/coco,target=/usr/src/coco ultralytics/yolov3:v1 121 | 122 | clear 123 | while true 124 | do 125 | python3 train.py --data data/coco.data --img-size 320 --batch-size 64 --accumulate 1 --evolve --epochs 1 --adam --bucket yolov4/adamdefaultpw_coco_1e --device 1 126 | done 127 | 128 | python3 train.py --data data/coco.data --img-size 320 --batch-size 64 --accumulate 1 --epochs 1 --adam --device 1 --prebias 129 | while true; do python3 train.py --data data/coco.data --img-size 320 --batch-size 64 --accumulate 1 --evolve --epochs 1 --adam --bucket yolov4/adamdefaultpw_coco_1e; done 130 | -------------------------------------------------------------------------------- /utils/google_utils.py: -------------------------------------------------------------------------------- 1 | # This file contains google utils: https://cloud.google.com/storage/docs/reference/libraries 2 | # pip install --upgrade google-cloud-storage 3 | 4 | import os 5 | import time 6 | 7 | 8 | # from google.cloud import storage 9 | 10 | 11 | def gdrive_download(id='1HaXkef9z6y5l4vUnCYgdmEAj61c6bfWO', name='coco.zip'): 12 | # https://gist.github.com/tanaikech/f0f2d122e05bf5f971611258c22c110f 13 | # Downloads a file from Google Drive, accepting presented query 14 | # from utils.google_utils import *; gdrive_download() 15 | t = time.time() 16 | 17 | print('Downloading https://drive.google.com/uc?export=download&id=%s as %s... ' % (id, name), end='') 18 | if os.path.exists(name): # remove existing 19 | os.remove(name) 20 | 21 | # Attempt large file download 22 | s = ["curl -c ./cookie -s -L \"https://drive.google.com/uc?export=download&id=%s\" > /dev/null" % id, 23 | "curl -Lb ./cookie -s \"https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=%s\" -o %s" % ( 24 | id, name), 25 | 'rm ./cookie'] 26 | [os.system(x) for x in s] # run commands 27 | 28 | # Attempt small file download 29 | if not os.path.exists(name): # file size < 40MB 30 | s = 'curl -f -L -o %s https://drive.google.com/uc?export=download&id=%s' % (name, id) 31 | os.system(s) 32 | 33 | # Unzip if archive 34 | if name.endswith('.zip'): 35 | print('unzipping... ', end='') 36 | os.system('unzip -q %s' % name) # unzip 37 | os.remove(name) # remove zip to free space 38 | 39 | print('Done (%.1fs)' % (time.time() - t)) 40 | 41 | 42 | def upload_blob(bucket_name, source_file_name, destination_blob_name): 43 | # Uploads a file to a bucket 44 | # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python 45 | 46 | storage_client = storage.Client() 47 | bucket = storage_client.get_bucket(bucket_name) 48 | blob = bucket.blob(destination_blob_name) 49 | 50 | blob.upload_from_filename(source_file_name) 51 | 52 | print('File {} uploaded to {}.'.format( 53 | source_file_name, 54 | destination_blob_name)) 55 | 56 | 57 | def download_blob(bucket_name, source_blob_name, destination_file_name): 58 | # Uploads a blob from a bucket 59 | storage_client = storage.Client() 60 | bucket = storage_client.get_bucket(bucket_name) 61 | blob = bucket.blob(source_blob_name) 62 | 63 | blob.download_to_filename(destination_file_name) 64 | 65 | print('Blob {} downloaded to {}.'.format( 66 | source_blob_name, 67 | destination_file_name)) 68 | -------------------------------------------------------------------------------- /utils/parse_config.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def parse_model_cfg(path): 5 | # Parses the yolo-v3 layer configuration file and returns module definitions 6 | file = open(path, 'r') 7 | lines = file.read().split('\n') 8 | lines = [x for x in lines if x and not x.startswith('#')] 9 | lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces 10 | mdefs = [] # module definitions 11 | for line in lines: 12 | if line.startswith('['): # This marks the start of a new block 13 | mdefs.append({}) 14 | mdefs[-1]['type'] = line[1:-1].rstrip() 15 | if mdefs[-1]['type'] == 'convolutional': 16 | mdefs[-1]['batch_normalize'] = 0 # pre-populate with zeros (may be overwritten later) 17 | else: 18 | key, val = line.split("=") 19 | key = key.rstrip() 20 | 21 | if 'anchors' in key: 22 | mdefs[-1][key] = np.array([float(x) for x in val.split(',')]).reshape((-1, 2)) # np anchors 23 | else: 24 | mdefs[-1][key] = val.strip() 25 | 26 | return mdefs 27 | 28 | 29 | def parse_data_cfg(path): 30 | # Parses the data configuration file 31 | options = dict() 32 | with open(path, 'r') as fp: 33 | lines = fp.readlines() 34 | 35 | for line in lines: 36 | line = line.strip() 37 | if line == '' or line.startswith('#'): 38 | continue 39 | key, val = line.split('=') 40 | options[key.strip()] = val.strip() 41 | 42 | return options 43 | -------------------------------------------------------------------------------- /utils/prune_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from terminaltables import AsciiTable 3 | from copy import deepcopy 4 | import numpy as np 5 | import torch.nn.functional as F 6 | from scipy.spatial import distance 7 | 8 | 9 | def get_sr_flag(epoch, sr): 10 | # return epoch >= 5 and sr 11 | return sr 12 | 13 | def parse_module_defs3(module_defs): 14 | 15 | CBL_idx = [] 16 | Conv_idx = [] 17 | for i, module_def in enumerate(module_defs): 18 | if module_def['type'] == 'convolutional': 19 | if module_def['batch_normalize'] == '1': 20 | CBL_idx.append(i) 21 | else: 22 | Conv_idx.append(i) 23 | 24 | ignore_idx = set() 25 | 26 | ignore_idx.add(18) 27 | 28 | 29 | prune_idx = [idx for idx in CBL_idx if idx not in ignore_idx] 30 | 31 | return CBL_idx, Conv_idx, prune_idx 32 | 33 | def parse_module_defs2(module_defs): 34 | 35 | CBL_idx = [] 36 | Conv_idx = [] 37 | shortcut_idx=dict() 38 | shortcut_all=set() 39 | for i, module_def in enumerate(module_defs): 40 | if module_def['type'] == 'convolutional': 41 | if module_def['batch_normalize'] == '1': 42 | CBL_idx.append(i) 43 | else: 44 | Conv_idx.append(i) 45 | 46 | ignore_idx = set() 47 | for i, module_def in enumerate(module_defs): 48 | if module_def['type'] == 'shortcut': 49 | identity_idx = (i + int(module_def['from'])) 50 | if module_defs[identity_idx]['type'] == 'convolutional': 51 | 52 | #ignore_idx.add(identity_idx) 53 | shortcut_idx[i-1]=identity_idx 54 | shortcut_all.add(identity_idx) 55 | elif module_defs[identity_idx]['type'] == 'shortcut': 56 | 57 | #ignore_idx.add(identity_idx - 1) 58 | shortcut_idx[i-1]=identity_idx-1 59 | shortcut_all.add(identity_idx-1) 60 | shortcut_all.add(i-1) 61 | #上采样层前的卷积层不裁剪 62 | ignore_idx.add(84) 63 | ignore_idx.add(96) 64 | 65 | prune_idx = [idx for idx in CBL_idx if idx not in ignore_idx] 66 | 67 | return CBL_idx, Conv_idx, prune_idx,shortcut_idx,shortcut_all 68 | 69 | def parse_module_defs_rep(module_defs): 70 | 71 | CBL_idx = [] 72 | Conv_idx = [] 73 | rep_idx = [] 74 | shortcut_idx=dict() 75 | shortcut_all=set() 76 | for i, module_def in enumerate(module_defs): 77 | if module_def['type'] == 'RepvggBlock': 78 | CBL_idx.append(i*2) 79 | rep_idx.append(i*2) 80 | 81 | for i, module_def in enumerate(module_defs): 82 | if module_def['type'] == 'convolutional': 83 | if module_def['batch_normalize'] == '1': 84 | CBL_idx.append(i+28) 85 | else: 86 | Conv_idx.append(i+28) 87 | 88 | ignore_idx = set() 89 | for i, module_def in enumerate(module_defs): 90 | if module_def['type'] == 'shortcut': 91 | identity_idx = (i + int(module_def['from'])) 92 | if module_defs[identity_idx]['type'] == 'convolutional': 93 | 94 | #ignore_idx.add(identity_idx) 95 | shortcut_idx[i-1]=identity_idx 96 | shortcut_all.add(identity_idx) 97 | elif module_defs[identity_idx]['type'] == 'shortcut': 98 | 99 | #ignore_idx.add(identity_idx - 1) 100 | shortcut_idx[i-1]=identity_idx-1 101 | shortcut_all.add(identity_idx-1) 102 | shortcut_all.add(i-1) 103 | #上采样层前的卷积层不裁剪 104 | ignore_idx.add(38+28-1) 105 | ignore_idx.add(50+28-1) 106 | 107 | prune_idx = [idx for idx in CBL_idx if idx not in ignore_idx] 108 | 109 | return CBL_idx, Conv_idx, prune_idx, rep_idx, shortcut_idx,shortcut_all 110 | 111 | 112 | def parse_module_defs(module_defs): 113 | 114 | CBL_idx = [] 115 | Conv_idx = [] 116 | for i, module_def in enumerate(module_defs): 117 | if module_def['type'] == 'convolutional': 118 | if module_def['batch_normalize'] == '1': 119 | CBL_idx.append(i) 120 | else: 121 | Conv_idx.append(i) 122 | ignore_idx = set() 123 | for i, module_def in enumerate(module_defs): 124 | if module_def['type'] == 'shortcut': 125 | ignore_idx.add(i-1) 126 | identity_idx = (i + int(module_def['from'])) 127 | if module_defs[identity_idx]['type'] == 'convolutional': 128 | ignore_idx.add(identity_idx) 129 | elif module_defs[identity_idx]['type'] == 'shortcut': 130 | ignore_idx.add(identity_idx - 1) 131 | #上采样层前的卷积层不裁剪 132 | ignore_idx.add(84) 133 | ignore_idx.add(96) 134 | 135 | prune_idx = [idx for idx in CBL_idx if idx not in ignore_idx] 136 | 137 | return CBL_idx, Conv_idx, prune_idx 138 | 139 | 140 | def gather_bn_weights(module_list, prune_idx): 141 | 142 | size_list = [module_list[idx][1].weight.data.shape[0] for idx in prune_idx] 143 | 144 | bn_weights = torch.zeros(sum(size_list)) 145 | index = 0 146 | for idx, size in zip(prune_idx, size_list): 147 | bn_weights[index:(index + size)] = module_list[idx][1].weight.data.abs().clone() 148 | index += size 149 | 150 | return bn_weights 151 | 152 | 153 | def write_cfg(cfg_file, module_defs): 154 | 155 | with open(cfg_file, 'w') as f: 156 | for module_def in module_defs: 157 | f.write(f"[{module_def['type']}]\n") 158 | for key, value in module_def.items(): 159 | if key != 'type': 160 | f.write(f"{key}={value}\n") 161 | f.write("\n") 162 | return cfg_file 163 | 164 | 165 | class BNOptimizer(): 166 | 167 | @staticmethod 168 | def updateBN(sr_flag, module_list, s, prune_idx): 169 | if sr_flag: 170 | for idx in prune_idx: 171 | # Squential(Conv, BN, Lrelu) 172 | bn_module = module_list[idx][1] 173 | bn_module.weight.grad.data.add_(s * torch.sign(bn_module.weight.data)) # L1 174 | 175 | 176 | def obtain_quantiles(bn_weights, num_quantile=5): 177 | 178 | sorted_bn_weights, i = torch.sort(bn_weights) 179 | total = sorted_bn_weights.shape[0] 180 | quantiles = sorted_bn_weights.tolist()[-1::-total//num_quantile][::-1] 181 | print("\nBN weights quantile:") 182 | quantile_table = [ 183 | [f'{i}/{num_quantile}' for i in range(1, num_quantile+1)], 184 | ["%.3f" % quantile for quantile in quantiles] 185 | ] 186 | print(AsciiTable(quantile_table).table) 187 | 188 | return quantiles 189 | 190 | 191 | def get_input_mask(module_defs, idx, CBLidx2mask): 192 | 193 | if idx == 0: 194 | return np.ones(3) 195 | 196 | if idx == 56: 197 | return CBLidx2mask[idx - 2] 198 | 199 | if module_defs[idx-28-1]['type'] == 'convolutional': 200 | return CBLidx2mask[idx - 1] 201 | elif module_defs[idx-28-1]['type'] == 'shortcut': 202 | return CBLidx2mask[idx - 2] 203 | elif module_defs[idx-28-1]['type'] == 'route': 204 | # print('idx:') 205 | # print(idx) 206 | route_in_idxs = [] 207 | for layer_i in module_defs[idx-28-1]['layers'].split(","): 208 | if int(layer_i) < 0: 209 | route_in_idxs.append(idx-1 + int(layer_i)) 210 | else: 211 | route_in_idxs.append(int(layer_i)*2) 212 | # print('route_in_idxs:') 213 | # print(route_in_idxs) 214 | if len(route_in_idxs) == 1: 215 | return CBLidx2mask[route_in_idxs[0]] 216 | elif len(route_in_idxs) == 2: 217 | # return np.concatenate([CBLidx2mask[in_idx-1] for in_idx in route_in_idxs]) 218 | return np.concatenate([CBLidx2mask[route_in_idxs[0]-1],CBLidx2mask[route_in_idxs[1]]]) 219 | else: 220 | print("Something wrong with route module!") 221 | raise Exception 222 | 223 | def get_rep_input_mask(module_defs, idx, CBLidx2mask): 224 | 225 | if idx == 0: 226 | return np.ones(3) 227 | 228 | if module_defs[int(idx/2) - 1]['type'] == 'RepvggBlock': 229 | return CBLidx2mask[idx - 2] 230 | 231 | def init_weights_from_loose_model(compact_model, loose_model, CBL_idx, Conv_idx, CBLidx2mask): 232 | 233 | for idx in CBL_idx: 234 | compact_CBL = compact_model.module_list[idx] 235 | loose_CBL = loose_model.module_list[idx] 236 | out_channel_idx = np.argwhere(CBLidx2mask[idx])[:, 0].tolist() 237 | 238 | compact_bn, loose_bn = compact_CBL[1], loose_CBL[1] 239 | compact_bn.weight.data = loose_bn.weight.data[out_channel_idx].clone() 240 | compact_bn.bias.data = loose_bn.bias.data[out_channel_idx].clone() 241 | compact_bn.running_mean.data = loose_bn.running_mean.data[out_channel_idx].clone() 242 | compact_bn.running_var.data = loose_bn.running_var.data[out_channel_idx].clone() 243 | 244 | input_mask = get_input_mask(loose_model.module_defs, idx, CBLidx2mask) 245 | in_channel_idx = np.argwhere(input_mask)[:, 0].tolist() 246 | compact_conv, loose_conv = compact_CBL[0], loose_CBL[0] 247 | tmp = loose_conv.weight.data[:, in_channel_idx, :, :].clone() 248 | compact_conv.weight.data = tmp[out_channel_idx, :, :, :].clone() 249 | 250 | for idx in Conv_idx: 251 | compact_conv = compact_model.module_list[idx][0] 252 | loose_conv = loose_model.module_list[idx][0] 253 | 254 | input_mask = get_input_mask(loose_model.module_defs, idx, CBLidx2mask) 255 | in_channel_idx = np.argwhere(input_mask)[:, 0].tolist() 256 | compact_conv.weight.data = loose_conv.weight.data[:, in_channel_idx, :, :].clone() 257 | compact_conv.bias.data = loose_conv.bias.data.clone() 258 | 259 | def init_weights_from_loose_model_rep(compact_model, loose_model, CBL_idx, Conv_idx, rep_idx, CBLidx2mask): 260 | 261 | # print(compact_model.module_list) 262 | # print('~~~~~~~~~~~~~~~~~~~~~~~~~') 263 | # print(loose_model.module_list) 264 | 265 | for idx in CBL_idx: 266 | if idx in rep_idx: 267 | compact_CBL = compact_model.module_list[idx] 268 | loose_CBL = loose_model.module_list[idx] 269 | # print(compact_CBL) 270 | # print(loose_CBL) 271 | out_channel_idx = np.argwhere(CBLidx2mask[idx])[:, 0].tolist() 272 | 273 | input_mask = get_rep_input_mask(loose_model.module_defs, idx, CBLidx2mask) 274 | # print(input_mask) 275 | # try: 276 | # in_channel_idx = np.argwhere(input_mask)[:, 0].tolist() 277 | # except: 278 | # print(idx) 279 | # print(input_mask) 280 | in_channel_idx = np.argwhere(input_mask)[:, 0].tolist() 281 | # if idx==0: 282 | # print(in_channel_idx) 283 | # print('------------') 284 | # print(out_channel_idx) 285 | compact_conv, loose_conv = compact_CBL[0], loose_CBL[0] 286 | tmp = loose_conv.weight.data[:, in_channel_idx, :, :].clone() 287 | compact_conv.weight.data = tmp[out_channel_idx, :, :, :].clone() 288 | # iden = compact_conv.weight.data==loose_conv.weight.data 289 | # print(iden.sum()) 290 | else: 291 | compact_CBL = compact_model.module_list[idx] 292 | loose_CBL = loose_model.module_list[idx] 293 | out_channel_idx = np.argwhere(CBLidx2mask[idx])[:, 0].tolist() 294 | 295 | compact_bn, loose_bn = compact_CBL[1], loose_CBL[1] 296 | compact_bn.weight.data = loose_bn.weight.data[out_channel_idx].clone() 297 | compact_bn.bias.data = loose_bn.bias.data[out_channel_idx].clone() 298 | compact_bn.running_mean.data = loose_bn.running_mean.data[out_channel_idx].clone() 299 | compact_bn.running_var.data = loose_bn.running_var.data[out_channel_idx].clone() 300 | 301 | input_mask = get_input_mask(loose_model.module_defs, idx, CBLidx2mask) 302 | in_channel_idx = np.argwhere(input_mask)[:, 0].tolist() 303 | compact_conv, loose_conv = compact_CBL[0], loose_CBL[0] 304 | tmp = loose_conv.weight.data[:, in_channel_idx, :, :].clone() 305 | compact_conv.weight.data = tmp[out_channel_idx, :, :, :].clone() 306 | # print('idx: '+str(idx)) 307 | # print(len(in_channel_idx)) 308 | # print(len(out_channel_idx)) 309 | # iden = compact_conv.weight.data==loose_conv.weight.data 310 | # print(iden.sum()) 311 | # iden2 = compact_bn.weight.data==loose_bn.weight.data 312 | # print(iden2.sum()) 313 | # print('-----------') 314 | 315 | for idx in Conv_idx: 316 | compact_conv = compact_model.module_list[idx][0] 317 | loose_conv = loose_model.module_list[idx][0] 318 | 319 | input_mask = get_input_mask(loose_model.module_defs, idx, CBLidx2mask) 320 | in_channel_idx = np.argwhere(input_mask)[:, 0].tolist() 321 | compact_conv.weight.data = loose_conv.weight.data[:, in_channel_idx, :, :].clone() 322 | compact_conv.bias.data = loose_conv.bias.data.clone() 323 | 324 | 325 | def prune_model_keep_size(model, prune_idx, CBL_idx, CBLidx2mask): 326 | 327 | pruned_model = deepcopy(model) 328 | for idx in prune_idx: 329 | mask = torch.from_numpy(CBLidx2mask[idx]).cuda() 330 | bn_module = pruned_model.module_list[idx][1] 331 | 332 | bn_module.weight.data.mul_(mask) 333 | 334 | activation = F.leaky_relu((1 - mask) * bn_module.bias.data, 0.1) 335 | 336 | # 两个上采样层前的卷积层 337 | next_idx_list = [idx + 1] 338 | if idx == 79: 339 | next_idx_list.append(84) 340 | elif idx == 91: 341 | next_idx_list.append(96) 342 | 343 | for next_idx in next_idx_list: 344 | next_conv = pruned_model.module_list[next_idx][0] 345 | conv_sum = next_conv.weight.data.sum(dim=(2, 3)) 346 | offset = conv_sum.matmul(activation.reshape(-1, 1)).reshape(-1) 347 | if next_idx in CBL_idx: 348 | next_bn = pruned_model.module_list[next_idx][1] 349 | next_bn.running_mean.data.sub_(offset) 350 | else: 351 | #这里需要注意的是,对于convolutionnal,如果有BN,则该层卷积层不使用bias,如果无BN,则使用bias 352 | next_conv.bias.data.add_(offset) 353 | 354 | bn_module.bias.data.mul_(mask) 355 | 356 | return pruned_model 357 | 358 | 359 | def prune_rep_model_keep_size(model, prune_idx, CBL_idx, rep_idx, CBLidx2mask): 360 | 361 | pruned_model = deepcopy(model) 362 | # for idx in prune_idx: 363 | # if idx in rep_idx: 364 | # # mask = torch.from_numpy(CBLidx2mask[idx]).cuda() 365 | # # conv_module = pruned_model.module_list[idx][0] 366 | # # conv_module.weight.data = conv_module.weight.data.permute(1, 2, 3, 0).mul(mask).float().permute(3, 0, 1, 2) 367 | # # next_idx_list = [idx + 2] 368 | # pass 369 | # else: 370 | # mask = torch.from_numpy(CBLidx2mask[idx]).cuda() 371 | # bn_module = pruned_model.module_list[idx][1] 372 | 373 | # bn_module.weight.data.mul_(mask) 374 | 375 | # activation = F.leaky_relu((1 - mask) * bn_module.bias.data, 0.1) 376 | 377 | # # 两个上采样层前的卷积层 378 | # next_idx_list = [idx + 1] 379 | # if idx == 60: 380 | # next_idx_list.append(65) 381 | # elif idx == 72: 382 | # next_idx_list.append(77) 383 | 384 | # for next_idx in next_idx_list: 385 | # next_conv = pruned_model.module_list[next_idx][0] 386 | # conv_sum = next_conv.weight.data.sum(dim=(2, 3)) 387 | # offset = conv_sum.matmul(activation.float().reshape(-1, 1)).reshape(-1) 388 | # if next_idx in CBL_idx: 389 | # next_bn = pruned_model.module_list[next_idx][1] 390 | # next_bn.running_mean.data.sub_(offset) 391 | # else: 392 | # #这里需要注意的是,对于convolutionnal,如果有BN,则该层卷积层不使用bias,如果无BN,则使用bias 393 | # next_conv.bias.data.add_(offset) 394 | 395 | # bn_module.bias.data.mul_(mask) 396 | 397 | return pruned_model 398 | 399 | 400 | def obtain_bn_mask(bn_module, thre): 401 | 402 | thre = thre.cuda() 403 | mask = bn_module.weight.data.abs().ge(thre).float() 404 | 405 | return mask 406 | 407 | def obtain_l1_mask(bn_module, random_rate): 408 | 409 | w_copy = bn_module.weight.data.abs().clone() 410 | w_copy = torch.sum(w_copy, dim=(1,2,3)) 411 | length = w_copy.cpu().numpy().shape[0] 412 | num_retain = int(length*(1-random_rate)) 413 | _,y = torch.topk(w_copy,num_retain) 414 | 415 | mask = np.zeros(length) 416 | mask[y.cpu()] = 1 417 | 418 | return mask 419 | 420 | def obtain_l1_mask2(bn_module, random_rate): 421 | 422 | w_copy = bn_module.weight.data.abs().clone() 423 | w_copy = torch.sum(w_copy, dim=(1,2,3)) 424 | length = w_copy.cpu().numpy().shape[0] 425 | num_retain = int(length*random_rate) 426 | if num_retain==0: 427 | num_retain=1 428 | _,y = torch.topk(w_copy,num_retain) 429 | 430 | mask = np.zeros(length) 431 | mask[y.cpu()] = 1 432 | 433 | return mask 434 | 435 | def obtain_rep_mask(conv_module, distance_rate): 436 | length = conv_module.weight.data.size()[0] 437 | codebook = np.ones(length) 438 | weight_torch = conv_module.weight.data.abs().clone() 439 | 440 | similar_pruned_num = int(weight_torch.size()[0] * distance_rate) 441 | weight_vec = weight_torch.view(weight_torch.size()[0], -1) 442 | # norm1 = torch.norm(weight_vec, 1, 1) 443 | # norm1_np = norm1.cpu().numpy() 444 | norm2 = torch.norm(weight_vec, 2, 1) 445 | norm2_np = norm2.cpu().numpy() 446 | filter_small_index = [] 447 | filter_large_index = [] 448 | filter_large_index = norm2_np.argsort() 449 | 450 | indices = torch.LongTensor(filter_large_index).cuda() 451 | weight_vec_after_norm = torch.index_select(weight_vec, 0, indices).cpu().numpy() 452 | # for euclidean distance 453 | similar_matrix = distance.cdist(weight_vec_after_norm, weight_vec_after_norm, 'euclidean') 454 | # for cos similarity 455 | # similar_matrix = 1 - distance.cdist(weight_vec_after_norm, weight_vec_after_norm, 'cosine') 456 | similar_sum = np.sum(np.abs(similar_matrix), axis=0) 457 | 458 | # for distance similar: get the filter index with largest similarity == small distance 459 | similar_large_index = similar_sum.argsort()[similar_pruned_num:] 460 | similar_small_index = similar_sum.argsort()[: similar_pruned_num] 461 | similar_index_for_filter = [filter_large_index[i] for i in similar_small_index] 462 | 463 | # kernel_length = weight_torch.size()[1] * weight_torch.size()[2] * weight_torch.size()[3] 464 | # for x in range(0, len(similar_index_for_filter)): 465 | # codebook[ 466 | # similar_index_for_filter[x] * kernel_length: (similar_index_for_filter[x] + 1) * kernel_length] = 0 467 | 468 | mask = np.ones(length) 469 | # mask[similar_index_for_filter] = 0 470 | 471 | return mask -------------------------------------------------------------------------------- /utils/tiny_prune_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from terminaltables import AsciiTable 3 | from copy import deepcopy 4 | import numpy as np 5 | import torch.nn.functional as F 6 | 7 | 8 | def get_sr_flag(epoch, sr): 9 | # return epoch >= 5 and sr 10 | return sr 11 | 12 | 13 | def parse_module_defs(module_defs): 14 | 15 | CBL_idx = [] 16 | Conv_idx = [] 17 | for i, module_def in enumerate(module_defs): 18 | if module_def['type'] == 'convolutional': 19 | if module_def['batch_normalize'] == '1': 20 | CBL_idx.append(i) 21 | else: 22 | Conv_idx.append(i) 23 | 24 | ignore_idx = set() 25 | 26 | ignore_idx.add(18) 27 | 28 | 29 | prune_idx = [idx for idx in CBL_idx if idx not in ignore_idx] 30 | 31 | return CBL_idx, Conv_idx, prune_idx 32 | 33 | 34 | def gather_bn_weights(module_list, prune_idx): 35 | 36 | size_list = [module_list[idx][1].weight.data.shape[0] for idx in prune_idx] 37 | 38 | bn_weights = torch.zeros(sum(size_list)) 39 | index = 0 40 | for idx, size in zip(prune_idx, size_list): 41 | bn_weights[index:(index + size)] = module_list[idx][1].weight.data.abs().clone() 42 | index += size 43 | 44 | return bn_weights 45 | 46 | 47 | def write_cfg(cfg_file, module_defs): 48 | 49 | with open(cfg_file, 'w') as f: 50 | for module_def in module_defs: 51 | f.write(f"[{module_def['type']}]\n") 52 | for key, value in module_def.items(): 53 | if key != 'type': 54 | f.write(f"{key}={value}\n") 55 | f.write("\n") 56 | return cfg_file 57 | 58 | 59 | class BNOptimizer(): 60 | 61 | @staticmethod 62 | def updateBN(sr_flag, module_list, s, prune_idx): 63 | if sr_flag: 64 | for idx in prune_idx: 65 | # Squential(Conv, BN, Lrelu) 66 | bn_module = module_list[idx][1] 67 | bn_module.weight.grad.data.add_(s * torch.sign(bn_module.weight.data)) # L1 68 | 69 | 70 | def obtain_quantiles(bn_weights, num_quantile=5): 71 | 72 | sorted_bn_weights, i = torch.sort(bn_weights) 73 | total = sorted_bn_weights.shape[0] 74 | quantiles = sorted_bn_weights.tolist()[-1::-total//num_quantile][::-1] 75 | print("\nBN weights quantile:") 76 | quantile_table = [ 77 | [f'{i}/{num_quantile}' for i in range(1, num_quantile+1)], 78 | ["%.3f" % quantile for quantile in quantiles] 79 | ] 80 | print(AsciiTable(quantile_table).table) 81 | 82 | return quantiles 83 | 84 | 85 | def get_input_mask(module_defs, idx, CBLidx2mask): 86 | 87 | if idx == 0: 88 | return np.ones(3) 89 | 90 | if module_defs[idx - 1]['type'] == 'convolutional': 91 | return CBLidx2mask[idx - 1] 92 | elif module_defs[idx - 1]['type'] == 'shortcut': 93 | return CBLidx2mask[idx - 2] 94 | elif module_defs[idx - 1]['type'] == 'route': 95 | route_in_idxs = [] 96 | for layer_i in module_defs[idx - 1]['layers'].split(","): 97 | if int(layer_i) < 0: 98 | route_in_idxs.append(idx - 1 + int(layer_i)) 99 | else: 100 | route_in_idxs.append(int(layer_i)) 101 | if len(route_in_idxs) == 1: 102 | return CBLidx2mask[route_in_idxs[0]] 103 | elif len(route_in_idxs) == 2: 104 | return np.concatenate([CBLidx2mask[in_idx - 1] for in_idx in route_in_idxs]) 105 | else: 106 | print("Something wrong with route module!") 107 | raise Exception 108 | 109 | 110 | def init_weights_from_loose_model(compact_model, loose_model, CBL_idx, Conv_idx, CBLidx2mask): 111 | 112 | for idx in CBL_idx: 113 | compact_CBL = compact_model.module_list[idx] 114 | loose_CBL = loose_model.module_list[idx] 115 | out_channel_idx = np.argwhere(CBLidx2mask[idx])[:, 0].tolist() 116 | 117 | compact_bn, loose_bn = compact_CBL[1], loose_CBL[1] 118 | compact_bn.weight.data = loose_bn.weight.data[out_channel_idx].clone() 119 | compact_bn.bias.data = loose_bn.bias.data[out_channel_idx].clone() 120 | compact_bn.running_mean.data = loose_bn.running_mean.data[out_channel_idx].clone() 121 | compact_bn.running_var.data = loose_bn.running_var.data[out_channel_idx].clone() 122 | 123 | input_mask = get_input_mask(loose_model.module_defs, idx, CBLidx2mask) 124 | in_channel_idx = np.argwhere(input_mask)[:, 0].tolist() 125 | compact_conv, loose_conv = compact_CBL[0], loose_CBL[0] 126 | tmp = loose_conv.weight.data[:, in_channel_idx, :, :].clone() 127 | compact_conv.weight.data = tmp[out_channel_idx, :, :, :].clone() 128 | 129 | for idx in Conv_idx: 130 | compact_conv = compact_model.module_list[idx][0] 131 | loose_conv = loose_model.module_list[idx][0] 132 | 133 | input_mask = get_input_mask(loose_model.module_defs, idx, CBLidx2mask) 134 | in_channel_idx = np.argwhere(input_mask)[:, 0].tolist() 135 | compact_conv.weight.data = loose_conv.weight.data[:, in_channel_idx, :, :].clone() 136 | compact_conv.bias.data = loose_conv.bias.data.clone() 137 | 138 | 139 | def prune_model_keep_size(model, prune_idx, CBL_idx, CBLidx2mask): 140 | 141 | pruned_model = deepcopy(model) 142 | for idx in prune_idx: 143 | mask = torch.from_numpy(CBLidx2mask[idx]).cuda() 144 | bn_module = pruned_model.module_list[idx][1] 145 | 146 | bn_module.weight.data.mul_(mask) 147 | 148 | activation = F.leaky_relu((1 - mask) * bn_module.bias.data, 0.1) 149 | 150 | 151 | if idx<12: 152 | next_idx_list = [idx + 2] 153 | else: 154 | next_idx_list = [idx + 1] 155 | 156 | 157 | #next_idx_list = [idx + 1] 158 | if idx == 13: 159 | next_idx_list.append(18) 160 | 161 | 162 | 163 | for next_idx in next_idx_list: 164 | next_conv = pruned_model.module_list[next_idx][0] 165 | conv_sum = next_conv.weight.data.sum(dim=(2, 3)) 166 | offset = conv_sum.matmul(activation.reshape(-1, 1)).reshape(-1) 167 | 168 | if next_idx in CBL_idx: 169 | next_bn = pruned_model.module_list[next_idx][1] 170 | next_bn.running_mean.data.sub_(offset) 171 | else: 172 | #这里需要注意的是,对于convolutionnal,如果有BN,则该层卷积层不使用bias,如果无BN,则使用bias 173 | next_conv.bias.data.add_(offset) 174 | 175 | 176 | bn_module.bias.data.mul_(mask) 177 | 178 | return pruned_model 179 | 180 | 181 | def obtain_bn_mask(bn_module, thre): 182 | 183 | thre = thre.cuda() 184 | mask = bn_module.weight.data.abs().ge(thre).float() 185 | 186 | return mask 187 | -------------------------------------------------------------------------------- /utils/torch_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import torch 4 | 5 | 6 | def init_seeds(seed=0): 7 | torch.manual_seed(seed) 8 | torch.cuda.manual_seed(seed) 9 | torch.cuda.manual_seed_all(seed) 10 | 11 | # Remove randomness (may be slower on Tesla GPUs) # https://pytorch.org/docs/stable/notes/randomness.html 12 | if seed == 0: 13 | torch.backends.cudnn.deterministic = True 14 | torch.backends.cudnn.benchmark = False 15 | 16 | 17 | def select_device(device='', apex=False): 18 | # device = 'cpu' or '0' or '0,1,2,3' 19 | cpu_request = device.lower() == 'cpu' 20 | if device and not cpu_request: # if device requested other than 'cpu' 21 | os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable 22 | assert torch.cuda.is_available(), 'CUDA unavailable, invalid device %s requested' % device # check availablity 23 | 24 | cuda = False if cpu_request else torch.cuda.is_available() 25 | if cuda: 26 | c = 1024 ** 2 # bytes to MB 27 | ng = torch.cuda.device_count() 28 | x = [torch.cuda.get_device_properties(i) for i in range(ng)] 29 | cuda_str = 'Using CUDA ' + ('Apex ' if apex else '') # apex for mixed precision https://github.com/NVIDIA/apex 30 | for i in range(0, ng): 31 | if i == 1: 32 | cuda_str = ' ' * len(cuda_str) 33 | print("%sdevice%g _CudaDeviceProperties(name='%s', total_memory=%dMB)" % 34 | (cuda_str, i, x[i].name, x[i].total_memory / c)) 35 | else: 36 | print('Using CPU') 37 | 38 | print('') # skip a line 39 | return torch.device('cuda:0' if cuda else 'cpu') 40 | 41 | 42 | def fuse_conv_and_bn(conv, bn): 43 | # https://tehnokv.com/posts/fusing-batchnorm-and-conv/ 44 | with torch.no_grad(): 45 | # init 46 | fusedconv = torch.nn.Conv2d(conv.in_channels, 47 | conv.out_channels, 48 | kernel_size=conv.kernel_size, 49 | stride=conv.stride, 50 | padding=conv.padding, 51 | bias=True) 52 | 53 | # prepare filters 54 | w_conv = conv.weight.clone().view(conv.out_channels, -1) 55 | w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var))) 56 | fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size())) 57 | 58 | # prepare spatial bias 59 | if conv.bias is not None: 60 | b_conv = conv.bias 61 | else: 62 | b_conv = torch.zeros(conv.weight.size(0)).cuda() 63 | b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps)) 64 | fusedconv.bias.copy_(b_conv + b_bn) 65 | 66 | return fusedconv 67 | 68 | 69 | def model_info(model, report='summary'): 70 | # Plots a line-by-line description of a PyTorch model 71 | n_p = sum(x.numel() for x in model.parameters()) # number parameters 72 | n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients 73 | if report is 'full': 74 | print('%5s %40s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma')) 75 | for i, (name, p) in enumerate(model.named_parameters()): 76 | name = name.replace('module_list.', '') 77 | print('%5g %40s %9s %12g %20s %10.3g %10.3g' % 78 | (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std())) 79 | print('Model Summary: %g layers, %g parameters, %g gradients' % (len(list(model.parameters())), n_p, n_g)) 80 | 81 | 82 | def load_classifier(name='resnet101', n=2): 83 | # Loads a pretrained model reshaped to n-class output 84 | import pretrainedmodels # https://github.com/Cadene/pretrained-models.pytorch#torchvision 85 | model = pretrainedmodels.__dict__[name](num_classes=1000, pretrained='imagenet') 86 | 87 | # Display model properties 88 | for x in ['model.input_size', 'model.input_space', 'model.input_range', 'model.mean', 'model.std']: 89 | print(x + ' =', eval(x)) 90 | 91 | # Reshape output to n classes 92 | filters = model.last_linear.weight.shape[1] 93 | model.last_linear.bias = torch.nn.Parameter(torch.zeros(n)) 94 | model.last_linear.weight = torch.nn.Parameter(torch.zeros(n, filters)) 95 | model.last_linear.out_features = n 96 | return model 97 | -------------------------------------------------------------------------------- /weights/download_yolov3_weights.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # make '/weights' directory if it does not exist and cd into it 4 | mkdir -p weights && cd weights 5 | 6 | # copy darknet weight files, continue '-c' if partially downloaded 7 | wget -c https://pjreddie.com/media/files/yolov3.weights 8 | wget -c https://pjreddie.com/media/files/yolov3-tiny.weights 9 | wget -c https://pjreddie.com/media/files/yolov3-spp.weights 10 | 11 | # yolov3 pytorch weights 12 | # download from Google Drive: https://drive.google.com/drive/folders/1uxgUBemJVw9wZsdpboYbzUN4bcRhsuAI 13 | 14 | # darknet53 weights (first 75 layers only) 15 | wget -c https://pjreddie.com/media/files/darknet53.conv.74 16 | 17 | # yolov3-tiny weights from darknet (first 16 layers only) 18 | # ./darknet partial cfg/yolov3-tiny.cfg yolov3-tiny.weights yolov3-tiny.conv.15 15 19 | # mv yolov3-tiny.conv.15 ../ 20 | 21 | --------------------------------------------------------------------------------