├── .gitignore
├── 4.cfg
├── 4prn.cfg
├── LICENSE
├── README.md
├── coco.data
├── coco.names
├── configs
    └── deep_sort.yaml
├── deep_sort
    ├── README.md
    ├── __init__.py
    ├── deep
    │   ├── __init__.py
    │   ├── checkpoint
    │   │   └── .gitkeep
    │   ├── evaluate.py
    │   ├── feature_extractor.py
    │   ├── model.py
    │   ├── original_model.py
    │   ├── test.py
    │   ├── train.jpg
    │   └── train.py
    ├── deep_sort.py
    └── sort
    │   ├── __init__.py
    │   ├── detection.py
    │   ├── iou_matching.py
    │   ├── kalman_filter.py
    │   ├── linear_assignment.py
    │   ├── nn_matching.py
    │   ├── preprocessing.py
    │   ├── track.py
    │   └── tracker.py
├── detector
    ├── v4darknet.py
    └── v4detector.py
├── eval_tracker.py
├── requirements.txt
├── scripts
    ├── yolov3_deepsort.sh
    └── yolov3_tiny_deepsort.sh
├── tracker.py
├── utils
    ├── __init__.py
    ├── draw.py
    ├── evaluation.py
    ├── io.py
    ├── log.py
    └── parser.py
└── worker.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Folders
 2 | __pycache__/
 3 | build/
 4 | *.egg-info
 5 | 
 6 | 
 7 | # Files
 8 | *.weights
 9 | *.t7
10 | *.mp4
11 | *.avi
12 | *.so
13 | *.txt
14 | .idea/
15 | *.weights
16 | 


--------------------------------------------------------------------------------
/4.cfg:
--------------------------------------------------------------------------------
   1 | [net]
   2 | # Testing
   3 | #batch=1
   4 | #subdivisions=1
   5 | # Training
   6 | batch=1
   7 | subdivisions=1
   8 | width=608
   9 | height=608
  10 | channels=3
  11 | momentum=0.949
  12 | decay=0.0005
  13 | angle=0
  14 | saturation = 1.5
  15 | exposure = 1.5
  16 | hue=.1
  17 | 
  18 | learning_rate=0.00261
  19 | burn_in=1000
  20 | max_batches = 500500
  21 | policy=steps
  22 | steps=400000,450000
  23 | scales=.1,.1
  24 | 
  25 | #cutmix=1
  26 | mosaic=1
  27 | 
  28 | #:104x104 54:52x52 85:26x26 104:13x13 for 416
  29 | 
  30 | [convolutional]
  31 | batch_normalize=1
  32 | filters=32
  33 | size=3
  34 | stride=1
  35 | pad=1
  36 | activation=mish
  37 | 
  38 | # Downsample
  39 | 
  40 | [convolutional]
  41 | batch_normalize=1
  42 | filters=64
  43 | size=3
  44 | stride=2
  45 | pad=1
  46 | activation=mish
  47 | 
  48 | [convolutional]
  49 | batch_normalize=1
  50 | filters=64
  51 | size=1
  52 | stride=1
  53 | pad=1
  54 | activation=mish
  55 | 
  56 | [route]
  57 | layers = -2
  58 | 
  59 | [convolutional]
  60 | batch_normalize=1
  61 | filters=64
  62 | size=1
  63 | stride=1
  64 | pad=1
  65 | activation=mish
  66 | 
  67 | [convolutional]
  68 | batch_normalize=1
  69 | filters=32
  70 | size=1
  71 | stride=1
  72 | pad=1
  73 | activation=mish
  74 | 
  75 | [convolutional]
  76 | batch_normalize=1
  77 | filters=64
  78 | size=3
  79 | stride=1
  80 | pad=1
  81 | activation=mish
  82 | 
  83 | [shortcut]
  84 | from=-3
  85 | activation=linear
  86 | 
  87 | [convolutional]
  88 | batch_normalize=1
  89 | filters=64
  90 | size=1
  91 | stride=1
  92 | pad=1
  93 | activation=mish
  94 | 
  95 | [route]
  96 | layers = -1,-7
  97 | 
  98 | [convolutional]
  99 | batch_normalize=1
 100 | filters=64
 101 | size=1
 102 | stride=1
 103 | pad=1
 104 | activation=mish
 105 | 
 106 | # Downsample
 107 | 
 108 | [convolutional]
 109 | batch_normalize=1
 110 | filters=128
 111 | size=3
 112 | stride=2
 113 | pad=1
 114 | activation=mish
 115 | 
 116 | [convolutional]
 117 | batch_normalize=1
 118 | filters=64
 119 | size=1
 120 | stride=1
 121 | pad=1
 122 | activation=mish
 123 | 
 124 | [route]
 125 | layers = -2
 126 | 
 127 | [convolutional]
 128 | batch_normalize=1
 129 | filters=64
 130 | size=1
 131 | stride=1
 132 | pad=1
 133 | activation=mish
 134 | 
 135 | [convolutional]
 136 | batch_normalize=1
 137 | filters=64
 138 | size=1
 139 | stride=1
 140 | pad=1
 141 | activation=mish
 142 | 
 143 | [convolutional]
 144 | batch_normalize=1
 145 | filters=64
 146 | size=3
 147 | stride=1
 148 | pad=1
 149 | activation=mish
 150 | 
 151 | [shortcut]
 152 | from=-3
 153 | activation=linear
 154 | 
 155 | [convolutional]
 156 | batch_normalize=1
 157 | filters=64
 158 | size=1
 159 | stride=1
 160 | pad=1
 161 | activation=mish
 162 | 
 163 | [convolutional]
 164 | batch_normalize=1
 165 | filters=64
 166 | size=3
 167 | stride=1
 168 | pad=1
 169 | activation=mish
 170 | 
 171 | [shortcut]
 172 | from=-3
 173 | activation=linear
 174 | 
 175 | [convolutional]
 176 | batch_normalize=1
 177 | filters=64
 178 | size=1
 179 | stride=1
 180 | pad=1
 181 | activation=mish
 182 | 
 183 | [route]
 184 | layers = -1,-10
 185 | 
 186 | [convolutional]
 187 | batch_normalize=1
 188 | filters=128
 189 | size=1
 190 | stride=1
 191 | pad=1
 192 | activation=mish
 193 | 
 194 | # Downsample
 195 | 
 196 | [convolutional]
 197 | batch_normalize=1
 198 | filters=256
 199 | size=3
 200 | stride=2
 201 | pad=1
 202 | activation=mish
 203 | 
 204 | [convolutional]
 205 | batch_normalize=1
 206 | filters=128
 207 | size=1
 208 | stride=1
 209 | pad=1
 210 | activation=mish
 211 | 
 212 | [route]
 213 | layers = -2
 214 | 
 215 | [convolutional]
 216 | batch_normalize=1
 217 | filters=128
 218 | size=1
 219 | stride=1
 220 | pad=1
 221 | activation=mish
 222 | 
 223 | [convolutional]
 224 | batch_normalize=1
 225 | filters=128
 226 | size=1
 227 | stride=1
 228 | pad=1
 229 | activation=mish
 230 | 
 231 | [convolutional]
 232 | batch_normalize=1
 233 | filters=128
 234 | size=3
 235 | stride=1
 236 | pad=1
 237 | activation=mish
 238 | 
 239 | [shortcut]
 240 | from=-3
 241 | activation=linear
 242 | 
 243 | [convolutional]
 244 | batch_normalize=1
 245 | filters=128
 246 | size=1
 247 | stride=1
 248 | pad=1
 249 | activation=mish
 250 | 
 251 | [convolutional]
 252 | batch_normalize=1
 253 | filters=128
 254 | size=3
 255 | stride=1
 256 | pad=1
 257 | activation=mish
 258 | 
 259 | [shortcut]
 260 | from=-3
 261 | activation=linear
 262 | 
 263 | [convolutional]
 264 | batch_normalize=1
 265 | filters=128
 266 | size=1
 267 | stride=1
 268 | pad=1
 269 | activation=mish
 270 | 
 271 | [convolutional]
 272 | batch_normalize=1
 273 | filters=128
 274 | size=3
 275 | stride=1
 276 | pad=1
 277 | activation=mish
 278 | 
 279 | [shortcut]
 280 | from=-3
 281 | activation=linear
 282 | 
 283 | [convolutional]
 284 | batch_normalize=1
 285 | filters=128
 286 | size=1
 287 | stride=1
 288 | pad=1
 289 | activation=mish
 290 | 
 291 | [convolutional]
 292 | batch_normalize=1
 293 | filters=128
 294 | size=3
 295 | stride=1
 296 | pad=1
 297 | activation=mish
 298 | 
 299 | [shortcut]
 300 | from=-3
 301 | activation=linear
 302 | 
 303 | 
 304 | [convolutional]
 305 | batch_normalize=1
 306 | filters=128
 307 | size=1
 308 | stride=1
 309 | pad=1
 310 | activation=mish
 311 | 
 312 | [convolutional]
 313 | batch_normalize=1
 314 | filters=128
 315 | size=3
 316 | stride=1
 317 | pad=1
 318 | activation=mish
 319 | 
 320 | [shortcut]
 321 | from=-3
 322 | activation=linear
 323 | 
 324 | [convolutional]
 325 | batch_normalize=1
 326 | filters=128
 327 | size=1
 328 | stride=1
 329 | pad=1
 330 | activation=mish
 331 | 
 332 | [convolutional]
 333 | batch_normalize=1
 334 | filters=128
 335 | size=3
 336 | stride=1
 337 | pad=1
 338 | activation=mish
 339 | 
 340 | [shortcut]
 341 | from=-3
 342 | activation=linear
 343 | 
 344 | [convolutional]
 345 | batch_normalize=1
 346 | filters=128
 347 | size=1
 348 | stride=1
 349 | pad=1
 350 | activation=mish
 351 | 
 352 | [convolutional]
 353 | batch_normalize=1
 354 | filters=128
 355 | size=3
 356 | stride=1
 357 | pad=1
 358 | activation=mish
 359 | 
 360 | [shortcut]
 361 | from=-3
 362 | activation=linear
 363 | 
 364 | [convolutional]
 365 | batch_normalize=1
 366 | filters=128
 367 | size=1
 368 | stride=1
 369 | pad=1
 370 | activation=mish
 371 | 
 372 | [convolutional]
 373 | batch_normalize=1
 374 | filters=128
 375 | size=3
 376 | stride=1
 377 | pad=1
 378 | activation=mish
 379 | 
 380 | [shortcut]
 381 | from=-3
 382 | activation=linear
 383 | 
 384 | [convolutional]
 385 | batch_normalize=1
 386 | filters=128
 387 | size=1
 388 | stride=1
 389 | pad=1
 390 | activation=mish
 391 | 
 392 | [route]
 393 | layers = -1,-28
 394 | 
 395 | [convolutional]
 396 | batch_normalize=1
 397 | filters=256
 398 | size=1
 399 | stride=1
 400 | pad=1
 401 | activation=mish
 402 | 
 403 | # Downsample
 404 | 
 405 | [convolutional]
 406 | batch_normalize=1
 407 | filters=512
 408 | size=3
 409 | stride=2
 410 | pad=1
 411 | activation=mish
 412 | 
 413 | [convolutional]
 414 | batch_normalize=1
 415 | filters=256
 416 | size=1
 417 | stride=1
 418 | pad=1
 419 | activation=mish
 420 | 
 421 | [route]
 422 | layers = -2
 423 | 
 424 | [convolutional]
 425 | batch_normalize=1
 426 | filters=256
 427 | size=1
 428 | stride=1
 429 | pad=1
 430 | activation=mish
 431 | 
 432 | [convolutional]
 433 | batch_normalize=1
 434 | filters=256
 435 | size=1
 436 | stride=1
 437 | pad=1
 438 | activation=mish
 439 | 
 440 | [convolutional]
 441 | batch_normalize=1
 442 | filters=256
 443 | size=3
 444 | stride=1
 445 | pad=1
 446 | activation=mish
 447 | 
 448 | [shortcut]
 449 | from=-3
 450 | activation=linear
 451 | 
 452 | 
 453 | [convolutional]
 454 | batch_normalize=1
 455 | filters=256
 456 | size=1
 457 | stride=1
 458 | pad=1
 459 | activation=mish
 460 | 
 461 | [convolutional]
 462 | batch_normalize=1
 463 | filters=256
 464 | size=3
 465 | stride=1
 466 | pad=1
 467 | activation=mish
 468 | 
 469 | [shortcut]
 470 | from=-3
 471 | activation=linear
 472 | 
 473 | 
 474 | [convolutional]
 475 | batch_normalize=1
 476 | filters=256
 477 | size=1
 478 | stride=1
 479 | pad=1
 480 | activation=mish
 481 | 
 482 | [convolutional]
 483 | batch_normalize=1
 484 | filters=256
 485 | size=3
 486 | stride=1
 487 | pad=1
 488 | activation=mish
 489 | 
 490 | [shortcut]
 491 | from=-3
 492 | activation=linear
 493 | 
 494 | 
 495 | [convolutional]
 496 | batch_normalize=1
 497 | filters=256
 498 | size=1
 499 | stride=1
 500 | pad=1
 501 | activation=mish
 502 | 
 503 | [convolutional]
 504 | batch_normalize=1
 505 | filters=256
 506 | size=3
 507 | stride=1
 508 | pad=1
 509 | activation=mish
 510 | 
 511 | [shortcut]
 512 | from=-3
 513 | activation=linear
 514 | 
 515 | 
 516 | [convolutional]
 517 | batch_normalize=1
 518 | filters=256
 519 | size=1
 520 | stride=1
 521 | pad=1
 522 | activation=mish
 523 | 
 524 | [convolutional]
 525 | batch_normalize=1
 526 | filters=256
 527 | size=3
 528 | stride=1
 529 | pad=1
 530 | activation=mish
 531 | 
 532 | [shortcut]
 533 | from=-3
 534 | activation=linear
 535 | 
 536 | 
 537 | [convolutional]
 538 | batch_normalize=1
 539 | filters=256
 540 | size=1
 541 | stride=1
 542 | pad=1
 543 | activation=mish
 544 | 
 545 | [convolutional]
 546 | batch_normalize=1
 547 | filters=256
 548 | size=3
 549 | stride=1
 550 | pad=1
 551 | activation=mish
 552 | 
 553 | [shortcut]
 554 | from=-3
 555 | activation=linear
 556 | 
 557 | 
 558 | [convolutional]
 559 | batch_normalize=1
 560 | filters=256
 561 | size=1
 562 | stride=1
 563 | pad=1
 564 | activation=mish
 565 | 
 566 | [convolutional]
 567 | batch_normalize=1
 568 | filters=256
 569 | size=3
 570 | stride=1
 571 | pad=1
 572 | activation=mish
 573 | 
 574 | [shortcut]
 575 | from=-3
 576 | activation=linear
 577 | 
 578 | [convolutional]
 579 | batch_normalize=1
 580 | filters=256
 581 | size=1
 582 | stride=1
 583 | pad=1
 584 | activation=mish
 585 | 
 586 | [convolutional]
 587 | batch_normalize=1
 588 | filters=256
 589 | size=3
 590 | stride=1
 591 | pad=1
 592 | activation=mish
 593 | 
 594 | [shortcut]
 595 | from=-3
 596 | activation=linear
 597 | 
 598 | [convolutional]
 599 | batch_normalize=1
 600 | filters=256
 601 | size=1
 602 | stride=1
 603 | pad=1
 604 | activation=mish
 605 | 
 606 | [route]
 607 | layers = -1,-28
 608 | 
 609 | [convolutional]
 610 | batch_normalize=1
 611 | filters=512
 612 | size=1
 613 | stride=1
 614 | pad=1
 615 | activation=mish
 616 | 
 617 | # Downsample
 618 | 
 619 | [convolutional]
 620 | batch_normalize=1
 621 | filters=1024
 622 | size=3
 623 | stride=2
 624 | pad=1
 625 | activation=mish
 626 | 
 627 | [convolutional]
 628 | batch_normalize=1
 629 | filters=512
 630 | size=1
 631 | stride=1
 632 | pad=1
 633 | activation=mish
 634 | 
 635 | [route]
 636 | layers = -2
 637 | 
 638 | [convolutional]
 639 | batch_normalize=1
 640 | filters=512
 641 | size=1
 642 | stride=1
 643 | pad=1
 644 | activation=mish
 645 | 
 646 | [convolutional]
 647 | batch_normalize=1
 648 | filters=512
 649 | size=1
 650 | stride=1
 651 | pad=1
 652 | activation=mish
 653 | 
 654 | [convolutional]
 655 | batch_normalize=1
 656 | filters=512
 657 | size=3
 658 | stride=1
 659 | pad=1
 660 | activation=mish
 661 | 
 662 | [shortcut]
 663 | from=-3
 664 | activation=linear
 665 | 
 666 | [convolutional]
 667 | batch_normalize=1
 668 | filters=512
 669 | size=1
 670 | stride=1
 671 | pad=1
 672 | activation=mish
 673 | 
 674 | [convolutional]
 675 | batch_normalize=1
 676 | filters=512
 677 | size=3
 678 | stride=1
 679 | pad=1
 680 | activation=mish
 681 | 
 682 | [shortcut]
 683 | from=-3
 684 | activation=linear
 685 | 
 686 | [convolutional]
 687 | batch_normalize=1
 688 | filters=512
 689 | size=1
 690 | stride=1
 691 | pad=1
 692 | activation=mish
 693 | 
 694 | [convolutional]
 695 | batch_normalize=1
 696 | filters=512
 697 | size=3
 698 | stride=1
 699 | pad=1
 700 | activation=mish
 701 | 
 702 | [shortcut]
 703 | from=-3
 704 | activation=linear
 705 | 
 706 | [convolutional]
 707 | batch_normalize=1
 708 | filters=512
 709 | size=1
 710 | stride=1
 711 | pad=1
 712 | activation=mish
 713 | 
 714 | [convolutional]
 715 | batch_normalize=1
 716 | filters=512
 717 | size=3
 718 | stride=1
 719 | pad=1
 720 | activation=mish
 721 | 
 722 | [shortcut]
 723 | from=-3
 724 | activation=linear
 725 | 
 726 | [convolutional]
 727 | batch_normalize=1
 728 | filters=512
 729 | size=1
 730 | stride=1
 731 | pad=1
 732 | activation=mish
 733 | 
 734 | [route]
 735 | layers = -1,-16
 736 | 
 737 | [convolutional]
 738 | batch_normalize=1
 739 | filters=1024
 740 | size=1
 741 | stride=1
 742 | pad=1
 743 | activation=mish
 744 | 
 745 | ##########################
 746 | 
 747 | [convolutional]
 748 | batch_normalize=1
 749 | filters=512
 750 | size=1
 751 | stride=1
 752 | pad=1
 753 | activation=leaky
 754 | 
 755 | [convolutional]
 756 | batch_normalize=1
 757 | size=3
 758 | stride=1
 759 | pad=1
 760 | filters=1024
 761 | activation=leaky
 762 | 
 763 | [convolutional]
 764 | batch_normalize=1
 765 | filters=512
 766 | size=1
 767 | stride=1
 768 | pad=1
 769 | activation=leaky
 770 | 
 771 | ### SPP ###
 772 | [maxpool]
 773 | stride=1
 774 | size=5
 775 | 
 776 | [route]
 777 | layers=-2
 778 | 
 779 | [maxpool]
 780 | stride=1
 781 | size=9
 782 | 
 783 | [route]
 784 | layers=-4
 785 | 
 786 | [maxpool]
 787 | stride=1
 788 | size=13
 789 | 
 790 | [route]
 791 | layers=-1,-3,-5,-6
 792 | ### End SPP ###
 793 | 
 794 | [convolutional]
 795 | batch_normalize=1
 796 | filters=512
 797 | size=1
 798 | stride=1
 799 | pad=1
 800 | activation=leaky
 801 | 
 802 | [convolutional]
 803 | batch_normalize=1
 804 | size=3
 805 | stride=1
 806 | pad=1
 807 | filters=1024
 808 | activation=leaky
 809 | 
 810 | [convolutional]
 811 | batch_normalize=1
 812 | filters=512
 813 | size=1
 814 | stride=1
 815 | pad=1
 816 | activation=leaky
 817 | 
 818 | [convolutional]
 819 | batch_normalize=1
 820 | filters=256
 821 | size=1
 822 | stride=1
 823 | pad=1
 824 | activation=leaky
 825 | 
 826 | [upsample]
 827 | stride=2
 828 | 
 829 | [route]
 830 | layers = 85
 831 | 
 832 | [convolutional]
 833 | batch_normalize=1
 834 | filters=256
 835 | size=1
 836 | stride=1
 837 | pad=1
 838 | activation=leaky
 839 | 
 840 | [route]
 841 | layers = -1, -3
 842 | 
 843 | [convolutional]
 844 | batch_normalize=1
 845 | filters=256
 846 | size=1
 847 | stride=1
 848 | pad=1
 849 | activation=leaky
 850 | 
 851 | [convolutional]
 852 | batch_normalize=1
 853 | size=3
 854 | stride=1
 855 | pad=1
 856 | filters=512
 857 | activation=leaky
 858 | 
 859 | [convolutional]
 860 | batch_normalize=1
 861 | filters=256
 862 | size=1
 863 | stride=1
 864 | pad=1
 865 | activation=leaky
 866 | 
 867 | [convolutional]
 868 | batch_normalize=1
 869 | size=3
 870 | stride=1
 871 | pad=1
 872 | filters=512
 873 | activation=leaky
 874 | 
 875 | [convolutional]
 876 | batch_normalize=1
 877 | filters=256
 878 | size=1
 879 | stride=1
 880 | pad=1
 881 | activation=leaky
 882 | 
 883 | [convolutional]
 884 | batch_normalize=1
 885 | filters=128
 886 | size=1
 887 | stride=1
 888 | pad=1
 889 | activation=leaky
 890 | 
 891 | [upsample]
 892 | stride=2
 893 | 
 894 | [route]
 895 | layers = 54
 896 | 
 897 | [convolutional]
 898 | batch_normalize=1
 899 | filters=128
 900 | size=1
 901 | stride=1
 902 | pad=1
 903 | activation=leaky
 904 | 
 905 | [route]
 906 | layers = -1, -3
 907 | 
 908 | [convolutional]
 909 | batch_normalize=1
 910 | filters=128
 911 | size=1
 912 | stride=1
 913 | pad=1
 914 | activation=leaky
 915 | 
 916 | [convolutional]
 917 | batch_normalize=1
 918 | size=3
 919 | stride=1
 920 | pad=1
 921 | filters=256
 922 | activation=leaky
 923 | 
 924 | [convolutional]
 925 | batch_normalize=1
 926 | filters=128
 927 | size=1
 928 | stride=1
 929 | pad=1
 930 | activation=leaky
 931 | 
 932 | [convolutional]
 933 | batch_normalize=1
 934 | size=3
 935 | stride=1
 936 | pad=1
 937 | filters=256
 938 | activation=leaky
 939 | 
 940 | [convolutional]
 941 | batch_normalize=1
 942 | filters=128
 943 | size=1
 944 | stride=1
 945 | pad=1
 946 | activation=leaky
 947 | 
 948 | ##########################
 949 | 
 950 | [convolutional]
 951 | batch_normalize=1
 952 | size=3
 953 | stride=1
 954 | pad=1
 955 | filters=256
 956 | activation=leaky
 957 | 
 958 | [convolutional]
 959 | size=1
 960 | stride=1
 961 | pad=1
 962 | filters=255
 963 | activation=linear
 964 | 
 965 | 
 966 | [yolo]
 967 | mask = 0,1,2
 968 | anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
 969 | classes=80
 970 | num=9
 971 | jitter=.3
 972 | ignore_thresh = .7
 973 | truth_thresh = 1
 974 | scale_x_y = 1.2
 975 | iou_thresh=0.213
 976 | cls_normalizer=1.0
 977 | iou_normalizer=0.07
 978 | iou_loss=ciou
 979 | nms_kind=greedynms
 980 | beta_nms=0.6
 981 | 
 982 | 
 983 | [route]
 984 | layers = -4
 985 | 
 986 | [convolutional]
 987 | batch_normalize=1
 988 | size=3
 989 | stride=2
 990 | pad=1
 991 | filters=256
 992 | activation=leaky
 993 | 
 994 | [route]
 995 | layers = -1, -16
 996 | 
 997 | [convolutional]
 998 | batch_normalize=1
 999 | filters=256
1000 | size=1
1001 | stride=1
1002 | pad=1
1003 | activation=leaky
1004 | 
1005 | [convolutional]
1006 | batch_normalize=1
1007 | size=3
1008 | stride=1
1009 | pad=1
1010 | filters=512
1011 | activation=leaky
1012 | 
1013 | [convolutional]
1014 | batch_normalize=1
1015 | filters=256
1016 | size=1
1017 | stride=1
1018 | pad=1
1019 | activation=leaky
1020 | 
1021 | [convolutional]
1022 | batch_normalize=1
1023 | size=3
1024 | stride=1
1025 | pad=1
1026 | filters=512
1027 | activation=leaky
1028 | 
1029 | [convolutional]
1030 | batch_normalize=1
1031 | filters=256
1032 | size=1
1033 | stride=1
1034 | pad=1
1035 | activation=leaky
1036 | 
1037 | [convolutional]
1038 | batch_normalize=1
1039 | size=3
1040 | stride=1
1041 | pad=1
1042 | filters=512
1043 | activation=leaky
1044 | 
1045 | [convolutional]
1046 | size=1
1047 | stride=1
1048 | pad=1
1049 | filters=255
1050 | activation=linear
1051 | 
1052 | 
1053 | [yolo]
1054 | mask = 3,4,5
1055 | anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
1056 | classes=80
1057 | num=9
1058 | jitter=.3
1059 | ignore_thresh = .7
1060 | truth_thresh = 1
1061 | scale_x_y = 1.1
1062 | iou_thresh=0.213
1063 | cls_normalizer=1.0
1064 | iou_normalizer=0.07
1065 | iou_loss=ciou
1066 | nms_kind=greedynms
1067 | beta_nms=0.6
1068 | 
1069 | 
1070 | [route]
1071 | layers = -4
1072 | 
1073 | [convolutional]
1074 | batch_normalize=1
1075 | size=3
1076 | stride=2
1077 | pad=1
1078 | filters=512
1079 | activation=leaky
1080 | 
1081 | [route]
1082 | layers = -1, -37
1083 | 
1084 | [convolutional]
1085 | batch_normalize=1
1086 | filters=512
1087 | size=1
1088 | stride=1
1089 | pad=1
1090 | activation=leaky
1091 | 
1092 | [convolutional]
1093 | batch_normalize=1
1094 | size=3
1095 | stride=1
1096 | pad=1
1097 | filters=1024
1098 | activation=leaky
1099 | 
1100 | [convolutional]
1101 | batch_normalize=1
1102 | filters=512
1103 | size=1
1104 | stride=1
1105 | pad=1
1106 | activation=leaky
1107 | 
1108 | [convolutional]
1109 | batch_normalize=1
1110 | size=3
1111 | stride=1
1112 | pad=1
1113 | filters=1024
1114 | activation=leaky
1115 | 
1116 | [convolutional]
1117 | batch_normalize=1
1118 | filters=512
1119 | size=1
1120 | stride=1
1121 | pad=1
1122 | activation=leaky
1123 | 
1124 | [convolutional]
1125 | batch_normalize=1
1126 | size=3
1127 | stride=1
1128 | pad=1
1129 | filters=1024
1130 | activation=leaky
1131 | 
1132 | [convolutional]
1133 | size=1
1134 | stride=1
1135 | pad=1
1136 | filters=255
1137 | activation=linear
1138 | 
1139 | 
1140 | [yolo]
1141 | mask = 6,7,8
1142 | anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
1143 | classes=80
1144 | num=9
1145 | jitter=.3
1146 | ignore_thresh = .7
1147 | truth_thresh = 1
1148 | random=1
1149 | scale_x_y = 1.05
1150 | iou_thresh=0.213
1151 | cls_normalizer=1.0
1152 | iou_normalizer=0.07
1153 | iou_loss=ciou
1154 | nms_kind=greedynms
1155 | beta_nms=0.6
1156 | 
1157 | 


--------------------------------------------------------------------------------
/4prn.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | #batch=1
  4 | #subdivisions=1
  5 | # Training
  6 | batch=64
  7 | subdivisions=8
  8 | width=608
  9 | height=608
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=16
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | [maxpool]
 34 | size=2
 35 | stride=2
 36 | 
 37 | [convolutional]
 38 | batch_normalize=1
 39 | filters=32
 40 | size=3
 41 | stride=1
 42 | pad=1
 43 | activation=leaky
 44 | 
 45 | [maxpool]
 46 | size=2
 47 | stride=2
 48 | 
 49 | [convolutional]
 50 | batch_normalize=1
 51 | filters=64
 52 | size=3
 53 | stride=1
 54 | pad=1
 55 | activation=leaky
 56 | 
 57 | [maxpool]
 58 | size=2
 59 | stride=2
 60 | 
 61 | [convolutional]
 62 | batch_normalize=1
 63 | filters=128
 64 | size=3
 65 | stride=1
 66 | pad=1
 67 | activation=leaky
 68 | 
 69 | [maxpool]
 70 | size=2
 71 | stride=2
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=256
 76 | size=3
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [maxpool]
 82 | size=2
 83 | stride=2
 84 | 
 85 | [convolutional]
 86 | batch_normalize=1
 87 | filters=512
 88 | size=3
 89 | stride=1
 90 | pad=1
 91 | activation=leaky
 92 | 
 93 | [maxpool]
 94 | size=2
 95 | stride=1
 96 | 
 97 | [convolutional]
 98 | batch_normalize=1
 99 | filters=512
100 | size=3
101 | stride=1
102 | pad=1
103 | activation=leaky
104 | 
105 | [shortcut]
106 | activation=leaky
107 | from=-3
108 | 
109 | ###########
110 | 
111 | [convolutional]
112 | batch_normalize=1
113 | filters=256
114 | size=1
115 | stride=1
116 | pad=1
117 | activation=leaky
118 | 
119 | [convolutional]
120 | batch_normalize=1
121 | filters=256
122 | size=3
123 | stride=1
124 | pad=1
125 | activation=leaky
126 | 
127 | [shortcut]
128 | activation=leaky
129 | from=-2
130 | 
131 | [convolutional]
132 | size=1
133 | stride=1
134 | pad=1
135 | filters=255
136 | activation=linear
137 | 
138 | 
139 | 
140 | [yolo]
141 | mask = 3,4,5
142 | anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
143 | classes=80
144 | num=6
145 | jitter=.3
146 | ignore_thresh = .7
147 | truth_thresh = 1
148 | random=1
149 | 
150 | [route]
151 | layers = -4
152 | 
153 | [convolutional]
154 | batch_normalize=1
155 | filters=128
156 | size=1
157 | stride=1
158 | pad=1
159 | activation=leaky
160 | 
161 | [upsample]
162 | stride=2
163 | 
164 | [shortcut]
165 | activation=leaky
166 | from=8
167 | 
168 | [convolutional]
169 | batch_normalize=1
170 | filters=128
171 | size=3
172 | stride=1
173 | pad=1
174 | activation=leaky
175 | 
176 | [shortcut]
177 | activation=leaky
178 | from=-3
179 | 
180 | [shortcut]
181 | activation=leaky
182 | from=8
183 | 
184 | [convolutional]
185 | size=1
186 | stride=1
187 | pad=1
188 | filters=255
189 | activation=linear
190 | 
191 | [yolo]
192 | mask = 1,2,3
193 | anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
194 | classes=80
195 | num=6
196 | jitter=.3
197 | ignore_thresh = .7
198 | truth_thresh = 1
199 | random=1
200 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 derek285
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Yolov4 + Deep Sort with PyTorch
 2 | 
 3 | ## remember build your own libdarknet.so and put under folder yolov4_deep_sort_pytorch/
 4 | ## ref : https://github.com/ZQPei/deep_sort_pytorch
 5 | 
 6 | ## Quick Start
 7 | 0. Check all dependencies installed
 8 | ```bash
 9 | pip3 install -r requirements.txt
10 | ```
11 | for user in china, you can specify pypi source to accelerate install like:
12 | ```bash
13 | pip3 install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
14 | ```
15 | 
16 | 1. Clone this repository
17 | ```
18 | git clone https://github.com/derek285/yolov4_deep_sort_pytorch.git
19 | ```
20 | 
21 | 2. Download YOLOv4 parameters
22 | ```
23 | wget yolov4.weights and cfg
24 | //并且重命名为4.cfg 和 4.weights 这个hard code有点山寨了：detector/v4detector.py ：
25 |     configPath = "4.cfg"
26 |     weightPath = "4.weights"
27 | ```
28 | 
29 | 3. Download deepsort parameters ckpt.t7
30 | ```
31 | cd deep_sort/deep/checkpoint
32 | # download ckpt.t7 from
33 | https://drive.google.com/drive/folders/1xhG0kRH1EX5B9_Iz8gQJb7UNnn_riXi6 to this folder
34 | cd ../../../
35 | ```
36 | 
37 | Notice:
38 | If compiling failed, the simplist way is to **Upgrade your pytorch >= 1.1 and torchvision >= 0.3" and you can avoid the troublesome compiling problems which are most likely caused by either `gcc version too low` or `libraries missing`.
39 | 
40 | 4. Run demo
41 | ```
42 | usage: python tracker.py VIDEO_PATH
43 |                                 [--help]
44 |                                 [--frame_interval FRAME_INTERVAL]
45 |                                 [--config_detection CONFIG_DETECTION]
46 |                                 [--config_deepsort CONFIG_DEEPSORT]
47 |                                 [--display]
48 |                                 [--display_width DISPLAY_WIDTH]
49 |                                 [--display_height DISPLAY_HEIGHT]
50 |                                 [--save_path SAVE_PATH]          
51 |                                 [--cpu]          
52 | 
53 | 
54 | # yolov4 + deepsort on video file
55 | python3 tracker.py VIDEO_PATH
56 | # yolov4 + deepsort on webcam
57 | python3 tracker.py /dev/video0 --camera 0
58 | 
59 | # todo
60 | ```
61 | 1. get class_id return
62 | 2. fix depends on libdarknet.so
63 | ```
64 | 
65 | ## References
66 | - paper: [Simple Online and Realtime Tracking with a Deep Association Metric](https://arxiv.org/abs/1703.07402)
67 | - code: [nwojke/deep_sort](https://github.com/nwojke/deep_sort)
68 | - paper: [YOLOv3](https://pjreddie.com/media/files/papers/YOLOv3.pdf)
69 | - code: [Joseph Redmon/yolov3](https://pjreddie.com/darknet/yolo/)
70 | - code: [ZQPei/deep_sort_pytorch](https://github.com/ZQPei/deep_sort_pytorch)
71 | - code:[AlexeyAB/darknet](https://github.com/AlexeyAB/darknet)
72 | 


--------------------------------------------------------------------------------
/coco.data:
--------------------------------------------------------------------------------
1 | classes= 80
2 | names = coco.names
3 | eval=coco
4 | 
5 | 


--------------------------------------------------------------------------------
/coco.names:
--------------------------------------------------------------------------------
 1 | person
 2 | bicycle
 3 | car
 4 | motorbike
 5 | aeroplane
 6 | bus
 7 | train
 8 | truck
 9 | boat
10 | traffic light
11 | fire hydrant
12 | stop sign
13 | parking meter
14 | bench
15 | bird
16 | cat
17 | dog
18 | horse
19 | sheep
20 | cow
21 | elephant
22 | bear
23 | zebra
24 | giraffe
25 | backpack
26 | umbrella
27 | handbag
28 | tie
29 | suitcase
30 | frisbee
31 | skis
32 | snowboard
33 | sports ball
34 | kite
35 | baseball bat
36 | baseball glove
37 | skateboard
38 | surfboard
39 | tennis racket
40 | bottle
41 | wine glass
42 | cup
43 | fork
44 | knife
45 | spoon
46 | bowl
47 | banana
48 | apple
49 | sandwich
50 | orange
51 | broccoli
52 | carrot
53 | hot dog
54 | pizza
55 | donut
56 | cake
57 | chair
58 | sofa
59 | pottedplant
60 | bed
61 | diningtable
62 | toilet
63 | tvmonitor
64 | laptop
65 | mouse
66 | remote
67 | keyboard
68 | cell phone
69 | microwave
70 | oven
71 | toaster
72 | sink
73 | refrigerator
74 | book
75 | clock
76 | vase
77 | scissors
78 | teddy bear
79 | hair drier
80 | toothbrush
81 | 


--------------------------------------------------------------------------------
/configs/deep_sort.yaml:
--------------------------------------------------------------------------------
 1 | DEEPSORT:
 2 |   REID_CKPT: "./deep_sort/deep/checkpoint/ckpt.t7"
 3 |   MAX_DIST: 0.2
 4 |   MIN_CONFIDENCE: 0.3
 5 |   NMS_MAX_OVERLAP: 0.5
 6 |   MAX_IOU_DISTANCE: 0.7
 7 |   MAX_AGE: 70
 8 |   N_INIT: 3
 9 |   NN_BUDGET: 100
10 |   


--------------------------------------------------------------------------------
/deep_sort/README.md:
--------------------------------------------------------------------------------
1 | # Deep Sort 
2 | 
3 | This is the implemention of deep sort with pytorch.


--------------------------------------------------------------------------------
/deep_sort/__init__.py:
--------------------------------------------------------------------------------
 1 | from .deep_sort import DeepSort
 2 | 
 3 | 
 4 | __all__ = ['DeepSort', 'build_tracker']
 5 | 
 6 | 
 7 | def build_tracker(cfg, use_cuda):
 8 |     return DeepSort(cfg.DEEPSORT.REID_CKPT, 
 9 |                 max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, 
10 |                 nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, 
11 |                 max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=use_cuda)
12 |     
13 | 
14 | 
15 | 
16 | 
17 | 
18 | 
19 | 
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/deep_sort/deep/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/derek285/yolov4_deep_sort_pytorch/00e408a24693ce2438289f4d3aed819cf0362436/deep_sort/deep/__init__.py


--------------------------------------------------------------------------------
/deep_sort/deep/checkpoint/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/derek285/yolov4_deep_sort_pytorch/00e408a24693ce2438289f4d3aed819cf0362436/deep_sort/deep/checkpoint/.gitkeep


--------------------------------------------------------------------------------
/deep_sort/deep/evaluate.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | features = torch.load("features.pth")
 4 | qf = features["qf"]
 5 | ql = features["ql"]
 6 | gf = features["gf"]
 7 | gl = features["gl"]
 8 | 
 9 | scores = qf.mm(gf.t())
10 | res = scores.topk(5, dim=1)[1][:,0]
11 | top1correct = gl[res].eq(ql).sum().item()
12 | 
13 | print("Acc top1:{:.3f}".format(top1correct/ql.size(0)))
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/deep_sort/deep/feature_extractor.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torchvision.transforms as transforms
 3 | import numpy as np
 4 | import cv2
 5 | import logging
 6 | 
 7 | from .model import Net
 8 | 
 9 | class Extractor(object):
10 |     def __init__(self, model_path, use_cuda=True):
11 |         self.net = Net(reid=True)
12 |         self.device = "cuda" if torch.cuda.is_available() and use_cuda else "cpu"
13 |         state_dict = torch.load(model_path, map_location=lambda storage, loc: storage)['net_dict']
14 |         self.net.load_state_dict(state_dict)
15 |         logger = logging.getLogger("root.tracker")
16 |         logger.info("Loading weights from {}... Done!".format(model_path))
17 |         self.net.to(self.device)
18 |         self.size = (64, 128)
19 |         self.norm = transforms.Compose([
20 |             transforms.ToTensor(),
21 |             transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
22 |         ])
23 |         
24 | 
25 | 
26 |     def _preprocess(self, im_crops):
27 |         """
28 |         TODO:
29 |             1. to float with scale from 0 to 1
30 |             2. resize to (64, 128) as Market1501 dataset did
31 |             3. concatenate to a numpy array
32 |             3. to torch Tensor
33 |             4. normalize
34 |         """
35 |         def _resize(im, size):
36 |             return cv2.resize(im.astype(np.float32)/255., size)
37 | 
38 |         im_batch = torch.cat([self.norm(_resize(im, self.size)).unsqueeze(0) for im in im_crops], dim=0).float()
39 |         return im_batch
40 | 
41 | 
42 |     def __call__(self, im_crops):
43 |         im_batch = self._preprocess(im_crops)
44 |         with torch.no_grad():
45 |             im_batch = im_batch.to(self.device)
46 |             features = self.net(im_batch)
47 |         return features.cpu().numpy()
48 | 
49 | 
50 | if __name__ == '__main__':
51 |     img = cv2.imread("demo.jpg")[:,:,(2,1,0)]
52 |     extr = Extractor("checkpoint/ckpt.t7")
53 |     feature = extr(img)
54 |     print(feature.shape)
55 | 
56 | 


--------------------------------------------------------------------------------
/deep_sort/deep/model.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | class BasicBlock(nn.Module):
  6 |     def __init__(self, c_in, c_out,is_downsample=False):
  7 |         super(BasicBlock,self).__init__()
  8 |         self.is_downsample = is_downsample
  9 |         if is_downsample:
 10 |             self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=2, padding=1, bias=False)
 11 |         else:
 12 |             self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=1, padding=1, bias=False)
 13 |         self.bn1 = nn.BatchNorm2d(c_out)
 14 |         self.relu = nn.ReLU(True)
 15 |         self.conv2 = nn.Conv2d(c_out,c_out,3,stride=1,padding=1, bias=False)
 16 |         self.bn2 = nn.BatchNorm2d(c_out)
 17 |         if is_downsample:
 18 |             self.downsample = nn.Sequential(
 19 |                 nn.Conv2d(c_in, c_out, 1, stride=2, bias=False),
 20 |                 nn.BatchNorm2d(c_out)
 21 |             )
 22 |         elif c_in != c_out:
 23 |             self.downsample = nn.Sequential(
 24 |                 nn.Conv2d(c_in, c_out, 1, stride=1, bias=False),
 25 |                 nn.BatchNorm2d(c_out)
 26 |             )
 27 |             self.is_downsample = True
 28 | 
 29 |     def forward(self,x):
 30 |         y = self.conv1(x)
 31 |         y = self.bn1(y)
 32 |         y = self.relu(y)
 33 |         y = self.conv2(y)
 34 |         y = self.bn2(y)
 35 |         if self.is_downsample:
 36 |             x = self.downsample(x)
 37 |         return F.relu(x.add(y),True)
 38 | 
 39 | def make_layers(c_in,c_out,repeat_times, is_downsample=False):
 40 |     blocks = []
 41 |     for i in range(repeat_times):
 42 |         if i ==0:
 43 |             blocks += [BasicBlock(c_in,c_out, is_downsample=is_downsample),]
 44 |         else:
 45 |             blocks += [BasicBlock(c_out,c_out),]
 46 |     return nn.Sequential(*blocks)
 47 | 
 48 | class Net(nn.Module):
 49 |     def __init__(self, num_classes=751 ,reid=False):
 50 |         super(Net,self).__init__()
 51 |         # 3 128 64
 52 |         self.conv = nn.Sequential(
 53 |             nn.Conv2d(3,64,3,stride=1,padding=1),
 54 |             nn.BatchNorm2d(64),
 55 |             nn.ReLU(inplace=True),
 56 |             # nn.Conv2d(32,32,3,stride=1,padding=1),
 57 |             # nn.BatchNorm2d(32),
 58 |             # nn.ReLU(inplace=True),
 59 |             nn.MaxPool2d(3,2,padding=1),
 60 |         )
 61 |         # 32 64 32
 62 |         self.layer1 = make_layers(64,64,2,False)
 63 |         # 32 64 32
 64 |         self.layer2 = make_layers(64,128,2,True)
 65 |         # 64 32 16
 66 |         self.layer3 = make_layers(128,256,2,True)
 67 |         # 128 16 8
 68 |         self.layer4 = make_layers(256,512,2,True)
 69 |         # 256 8 4
 70 |         self.avgpool = nn.AvgPool2d((8,4),1)
 71 |         # 256 1 1 
 72 |         self.reid = reid
 73 |         self.classifier = nn.Sequential(
 74 |             nn.Linear(512, 256),
 75 |             nn.BatchNorm1d(256),
 76 |             nn.ReLU(inplace=True),
 77 |             nn.Dropout(),
 78 |             nn.Linear(256, num_classes),
 79 |         )
 80 |     
 81 |     def forward(self, x):
 82 |         x = self.conv(x)
 83 |         x = self.layer1(x)
 84 |         x = self.layer2(x)
 85 |         x = self.layer3(x)
 86 |         x = self.layer4(x)
 87 |         x = self.avgpool(x)
 88 |         x = x.view(x.size(0),-1)
 89 |         # B x 128
 90 |         if self.reid:
 91 |             x = x.div(x.norm(p=2,dim=1,keepdim=True))
 92 |             return x
 93 |         # classifier
 94 |         x = self.classifier(x)
 95 |         return x
 96 | 
 97 | 
 98 | if __name__ == '__main__':
 99 |     net = Net()
100 |     x = torch.randn(4,3,128,64)
101 |     y = net(x)
102 |     import ipdb; ipdb.set_trace()
103 | 
104 | 
105 | 


--------------------------------------------------------------------------------
/deep_sort/deep/original_model.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | class BasicBlock(nn.Module):
  6 |     def __init__(self, c_in, c_out,is_downsample=False):
  7 |         super(BasicBlock,self).__init__()
  8 |         self.is_downsample = is_downsample
  9 |         if is_downsample:
 10 |             self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=2, padding=1, bias=False)
 11 |         else:
 12 |             self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=1, padding=1, bias=False)
 13 |         self.bn1 = nn.BatchNorm2d(c_out)
 14 |         self.relu = nn.ReLU(True)
 15 |         self.conv2 = nn.Conv2d(c_out,c_out,3,stride=1,padding=1, bias=False)
 16 |         self.bn2 = nn.BatchNorm2d(c_out)
 17 |         if is_downsample:
 18 |             self.downsample = nn.Sequential(
 19 |                 nn.Conv2d(c_in, c_out, 1, stride=2, bias=False),
 20 |                 nn.BatchNorm2d(c_out)
 21 |             )
 22 |         elif c_in != c_out:
 23 |             self.downsample = nn.Sequential(
 24 |                 nn.Conv2d(c_in, c_out, 1, stride=1, bias=False),
 25 |                 nn.BatchNorm2d(c_out)
 26 |             )
 27 |             self.is_downsample = True
 28 | 
 29 |     def forward(self,x):
 30 |         y = self.conv1(x)
 31 |         y = self.bn1(y)
 32 |         y = self.relu(y)
 33 |         y = self.conv2(y)
 34 |         y = self.bn2(y)
 35 |         if self.is_downsample:
 36 |             x = self.downsample(x)
 37 |         return F.relu(x.add(y),True)
 38 | 
 39 | def make_layers(c_in,c_out,repeat_times, is_downsample=False):
 40 |     blocks = []
 41 |     for i in range(repeat_times):
 42 |         if i ==0:
 43 |             blocks += [BasicBlock(c_in,c_out, is_downsample=is_downsample),]
 44 |         else:
 45 |             blocks += [BasicBlock(c_out,c_out),]
 46 |     return nn.Sequential(*blocks)
 47 | 
 48 | class Net(nn.Module):
 49 |     def __init__(self, num_classes=625 ,reid=False):
 50 |         super(Net,self).__init__()
 51 |         # 3 128 64
 52 |         self.conv = nn.Sequential(
 53 |             nn.Conv2d(3,32,3,stride=1,padding=1),
 54 |             nn.BatchNorm2d(32),
 55 |             nn.ELU(inplace=True),
 56 |             nn.Conv2d(32,32,3,stride=1,padding=1),
 57 |             nn.BatchNorm2d(32),
 58 |             nn.ELU(inplace=True),
 59 |             nn.MaxPool2d(3,2,padding=1),
 60 |         )
 61 |         # 32 64 32
 62 |         self.layer1 = make_layers(32,32,2,False)
 63 |         # 32 64 32
 64 |         self.layer2 = make_layers(32,64,2,True)
 65 |         # 64 32 16
 66 |         self.layer3 = make_layers(64,128,2,True)
 67 |         # 128 16 8
 68 |         self.dense = nn.Sequential(
 69 |             nn.Dropout(p=0.6),
 70 |             nn.Linear(128*16*8, 128),
 71 |             nn.BatchNorm1d(128),
 72 |             nn.ELU(inplace=True)
 73 |         )
 74 |         # 256 1 1 
 75 |         self.reid = reid
 76 |         self.batch_norm = nn.BatchNorm1d(128)
 77 |         self.classifier = nn.Sequential(
 78 |             nn.Linear(128, num_classes),
 79 |         )
 80 |     
 81 |     def forward(self, x):
 82 |         x = self.conv(x)
 83 |         x = self.layer1(x)
 84 |         x = self.layer2(x)
 85 |         x = self.layer3(x)
 86 | 
 87 |         x = x.view(x.size(0),-1)
 88 |         if self.reid:
 89 |             x = self.dense[0](x)
 90 |             x = self.dense[1](x)
 91 |             x = x.div(x.norm(p=2,dim=1,keepdim=True))
 92 |             return x
 93 |         x = self.dense(x)
 94 |         # B x 128
 95 |         # classifier
 96 |         x = self.classifier(x)
 97 |         return x
 98 | 
 99 | 
100 | if __name__ == '__main__':
101 |     net = Net(reid=True)
102 |     x = torch.randn(4,3,128,64)
103 |     y = net(x)
104 |     import ipdb; ipdb.set_trace()
105 | 
106 | 
107 | 


--------------------------------------------------------------------------------
/deep_sort/deep/test.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.backends.cudnn as cudnn
 3 | import torchvision
 4 | 
 5 | import argparse
 6 | import os
 7 | 
 8 | from model import Net
 9 | 
10 | parser = argparse.ArgumentParser(description="Train on market1501")
11 | parser.add_argument("--data-dir",default='data',type=str)
12 | parser.add_argument("--no-cuda",action="store_true")
13 | parser.add_argument("--gpu-id",default=0,type=int)
14 | args = parser.parse_args()
15 | 
16 | # device
17 | device = "cuda:{}".format(args.gpu_id) if torch.cuda.is_available() and not args.no_cuda else "cpu"
18 | if torch.cuda.is_available() and not args.no_cuda:
19 |     cudnn.benchmark = True
20 | 
21 | # data loader
22 | root = args.data_dir
23 | query_dir = os.path.join(root,"query")
24 | gallery_dir = os.path.join(root,"gallery")
25 | transform = torchvision.transforms.Compose([
26 |     torchvision.transforms.Resize((128,64)),
27 |     torchvision.transforms.ToTensor(),
28 |     torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
29 | ])
30 | queryloader = torch.utils.data.DataLoader(
31 |     torchvision.datasets.ImageFolder(query_dir, transform=transform),
32 |     batch_size=64, shuffle=False
33 | )
34 | galleryloader = torch.utils.data.DataLoader(
35 |     torchvision.datasets.ImageFolder(gallery_dir, transform=transform),
36 |     batch_size=64, shuffle=False
37 | )
38 | 
39 | # net definition
40 | net = Net(reid=True)
41 | assert os.path.isfile("./checkpoint/ckpt.t7"), "Error: no checkpoint file found!"
42 | print('Loading from checkpoint/ckpt.t7')
43 | checkpoint = torch.load("./checkpoint/ckpt.t7")
44 | net_dict = checkpoint['net_dict']
45 | net.load_state_dict(net_dict, strict=False)
46 | net.eval()
47 | net.to(device)
48 | 
49 | # compute features
50 | query_features = torch.tensor([]).float()
51 | query_labels = torch.tensor([]).long()
52 | gallery_features = torch.tensor([]).float()
53 | gallery_labels = torch.tensor([]).long()
54 | 
55 | with torch.no_grad():
56 |     for idx,(inputs,labels) in enumerate(queryloader):
57 |         inputs = inputs.to(device)
58 |         features = net(inputs).cpu()
59 |         query_features = torch.cat((query_features, features), dim=0)
60 |         query_labels = torch.cat((query_labels, labels))
61 | 
62 |     for idx,(inputs,labels) in enumerate(galleryloader):
63 |         inputs = inputs.to(device)
64 |         features = net(inputs).cpu()
65 |         gallery_features = torch.cat((gallery_features, features), dim=0)
66 |         gallery_labels = torch.cat((gallery_labels, labels))
67 | 
68 | gallery_labels -= 2
69 | 
70 | # save features
71 | features = {
72 |     "qf": query_features,
73 |     "ql": query_labels,
74 |     "gf": gallery_features,
75 |     "gl": gallery_labels
76 | }
77 | torch.save(features,"features.pth")


--------------------------------------------------------------------------------
/deep_sort/deep/train.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/derek285/yolov4_deep_sort_pytorch/00e408a24693ce2438289f4d3aed819cf0362436/deep_sort/deep/train.jpg


--------------------------------------------------------------------------------
/deep_sort/deep/train.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import time
  4 | 
  5 | import numpy as np
  6 | import matplotlib.pyplot as plt
  7 | import torch
  8 | import torch.backends.cudnn as cudnn
  9 | import torchvision
 10 | 
 11 | from model import Net
 12 | 
 13 | parser = argparse.ArgumentParser(description="Train on market1501")
 14 | parser.add_argument("--data-dir",default='data',type=str)
 15 | parser.add_argument("--no-cuda",action="store_true")
 16 | parser.add_argument("--gpu-id",default=0,type=int)
 17 | parser.add_argument("--lr",default=0.1, type=float)
 18 | parser.add_argument("--interval",'-i',default=20,type=int)
 19 | parser.add_argument('--resume', '-r',action='store_true')
 20 | args = parser.parse_args()
 21 | 
 22 | # device
 23 | device = "cuda:{}".format(args.gpu_id) if torch.cuda.is_available() and not args.no_cuda else "cpu"
 24 | if torch.cuda.is_available() and not args.no_cuda:
 25 |     cudnn.benchmark = True
 26 | 
 27 | # data loading
 28 | root = args.data_dir
 29 | train_dir = os.path.join(root,"train")
 30 | test_dir = os.path.join(root,"test")
 31 | transform_train = torchvision.transforms.Compose([
 32 |     torchvision.transforms.RandomCrop((128,64),padding=4),
 33 |     torchvision.transforms.RandomHorizontalFlip(),
 34 |     torchvision.transforms.ToTensor(),
 35 |     torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
 36 | ])
 37 | transform_test = torchvision.transforms.Compose([
 38 |     torchvision.transforms.Resize((128,64)),
 39 |     torchvision.transforms.ToTensor(),
 40 |     torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
 41 | ])
 42 | trainloader = torch.utils.data.DataLoader(
 43 |     torchvision.datasets.ImageFolder(train_dir, transform=transform_train),
 44 |     batch_size=64,shuffle=True
 45 | )
 46 | testloader = torch.utils.data.DataLoader(
 47 |     torchvision.datasets.ImageFolder(test_dir, transform=transform_test),
 48 |     batch_size=64,shuffle=True
 49 | )
 50 | num_classes = len(trainloader.dataset.classes)
 51 | 
 52 | # net definition
 53 | start_epoch = 0
 54 | net = Net(num_classes=num_classes)
 55 | if args.resume:
 56 |     assert os.path.isfile("./checkpoint/ckpt.t7"), "Error: no checkpoint file found!"
 57 |     print('Loading from checkpoint/ckpt.t7')
 58 |     checkpoint = torch.load("./checkpoint/ckpt.t7")
 59 |     # import ipdb; ipdb.set_trace()
 60 |     net_dict = checkpoint['net_dict']
 61 |     net.load_state_dict(net_dict)
 62 |     best_acc = checkpoint['acc']
 63 |     start_epoch = checkpoint['epoch']
 64 | net.to(device)
 65 | 
 66 | # loss and optimizer
 67 | criterion = torch.nn.CrossEntropyLoss()
 68 | optimizer = torch.optim.SGD(net.parameters(), args.lr, momentum=0.9, weight_decay=5e-4)
 69 | best_acc = 0.
 70 | 
 71 | # train function for each epoch
 72 | def train(epoch):
 73 |     print("\nEpoch : %d"%(epoch+1))
 74 |     net.train()
 75 |     training_loss = 0.
 76 |     train_loss = 0.
 77 |     correct = 0
 78 |     total = 0
 79 |     interval = args.interval
 80 |     start = time.time()
 81 |     for idx, (inputs, labels) in enumerate(trainloader):
 82 |         # forward
 83 |         inputs,labels = inputs.to(device),labels.to(device)
 84 |         outputs = net(inputs)
 85 |         loss = criterion(outputs, labels)
 86 | 
 87 |         # backward
 88 |         optimizer.zero_grad()
 89 |         loss.backward()
 90 |         optimizer.step()
 91 | 
 92 |         # accumurating
 93 |         training_loss += loss.item()
 94 |         train_loss += loss.item()
 95 |         correct += outputs.max(dim=1)[1].eq(labels).sum().item()
 96 |         total += labels.size(0)
 97 | 
 98 |         # print 
 99 |         if (idx+1)%interval == 0:
100 |             end = time.time()
101 |             print("[progress:{:.1f}%]time:{:.2f}s Loss:{:.5f} Correct:{}/{} Acc:{:.3f}%".format(
102 |                 100.*(idx+1)/len(trainloader), end-start, training_loss/interval, correct, total, 100.*correct/total
103 |             ))
104 |             training_loss = 0.
105 |             start = time.time()
106 |     
107 |     return train_loss/len(trainloader), 1.- correct/total
108 | 
109 | def test(epoch):
110 |     global best_acc
111 |     net.eval()
112 |     test_loss = 0.
113 |     correct = 0
114 |     total = 0
115 |     start = time.time()
116 |     with torch.no_grad():
117 |         for idx, (inputs, labels) in enumerate(testloader):
118 |             inputs, labels = inputs.to(device), labels.to(device)
119 |             outputs = net(inputs)
120 |             loss = criterion(outputs, labels)
121 | 
122 |             test_loss += loss.item()
123 |             correct += outputs.max(dim=1)[1].eq(labels).sum().item()
124 |             total += labels.size(0)
125 |         
126 |         print("Testing ...")
127 |         end = time.time()
128 |         print("[progress:{:.1f}%]time:{:.2f}s Loss:{:.5f} Correct:{}/{} Acc:{:.3f}%".format(
129 |                 100.*(idx+1)/len(testloader), end-start, test_loss/len(testloader), correct, total, 100.*correct/total
130 |             ))
131 | 
132 |     # saving checkpoint
133 |     acc = 100.*correct/total
134 |     if acc > best_acc:
135 |         best_acc = acc
136 |         print("Saving parameters to checkpoint/ckpt.t7")
137 |         checkpoint = {
138 |             'net_dict':net.state_dict(),
139 |             'acc':acc,
140 |             'epoch':epoch,
141 |         }
142 |         if not os.path.isdir('checkpoint'):
143 |             os.mkdir('checkpoint')
144 |         torch.save(checkpoint, './checkpoint/ckpt.t7')
145 | 
146 |     return test_loss/len(testloader), 1.- correct/total
147 | 
148 | # plot figure
149 | x_epoch = []
150 | record = {'train_loss':[], 'train_err':[], 'test_loss':[], 'test_err':[]}
151 | fig = plt.figure()
152 | ax0 = fig.add_subplot(121, title="loss")
153 | ax1 = fig.add_subplot(122, title="top1err")
154 | def draw_curve(epoch, train_loss, train_err, test_loss, test_err):
155 |     global record
156 |     record['train_loss'].append(train_loss)
157 |     record['train_err'].append(train_err)
158 |     record['test_loss'].append(test_loss)
159 |     record['test_err'].append(test_err)
160 | 
161 |     x_epoch.append(epoch)
162 |     ax0.plot(x_epoch, record['train_loss'], 'bo-', label='train')
163 |     ax0.plot(x_epoch, record['test_loss'], 'ro-', label='val')
164 |     ax1.plot(x_epoch, record['train_err'], 'bo-', label='train')
165 |     ax1.plot(x_epoch, record['test_err'], 'ro-', label='val')
166 |     if epoch == 0:
167 |         ax0.legend()
168 |         ax1.legend()
169 |     fig.savefig("train.jpg")
170 | 
171 | # lr decay
172 | def lr_decay():
173 |     global optimizer
174 |     for params in optimizer.param_groups:
175 |         params['lr'] *= 0.1
176 |         lr = params['lr']
177 |         print("Learning rate adjusted to {}".format(lr))
178 | 
179 | def main():
180 |     for epoch in range(start_epoch, start_epoch+40):
181 |         train_loss, train_err = train(epoch)
182 |         test_loss, test_err = test(epoch)
183 |         draw_curve(epoch, train_loss, train_err, test_loss, test_err)
184 |         if (epoch+1)%20==0:
185 |             lr_decay()
186 | 
187 | 
188 | if __name__ == '__main__':
189 |     main()


--------------------------------------------------------------------------------
/deep_sort/deep_sort.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | 
  4 | from .deep.feature_extractor import Extractor
  5 | from .sort.nn_matching import NearestNeighborDistanceMetric
  6 | from .sort.preprocessing import non_max_suppression
  7 | from .sort.detection import Detection
  8 | from .sort.tracker import Tracker
  9 | 
 10 | 
 11 | __all__ = ['DeepSort']
 12 | 
 13 | 
 14 | class DeepSort(object):
 15 |     def __init__(self, model_path, max_dist=0.2, min_confidence=0.3, nms_max_overlap=1.0, max_iou_distance=0.7, max_age=70, n_init=3, nn_budget=100, use_cuda=True):
 16 |         self.min_confidence = min_confidence
 17 |         self.nms_max_overlap = nms_max_overlap
 18 | 
 19 |         self.extractor = Extractor(model_path, use_cuda=use_cuda)
 20 | 
 21 |         max_cosine_distance = max_dist
 22 |         nn_budget = 100
 23 |         metric = NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
 24 |         self.tracker = Tracker(metric, max_iou_distance=max_iou_distance, max_age=max_age, n_init=n_init)
 25 | 
 26 |     def update(self, bbox_xywh, confidences, ori_img, cls_ids):
 27 |         self.height, self.width = ori_img.shape[:2]
 28 |         # generate detections
 29 |         features = self._get_features(bbox_xywh, ori_img)
 30 |         if len(features) == 0:
 31 |             return np.array([])
 32 |         bbox_tlwh = self._xywh_to_tlwh(bbox_xywh)
 33 | 
 34 |         # print("len(features): ", len(features))
 35 |         # print("len(confidences): ", len(confidences))
 36 |         # print("len(bbox_tlwh): ", len(bbox_tlwh))
 37 |         detections = [Detection(bbox_tlwh[i], conf, features[i], cls_ids[i]) for i, conf in enumerate(confidences) if conf > self.min_confidence]
 38 | 
 39 |         # run on non-maximum supression
 40 |         boxes = np.array([d.tlwh for d in detections])
 41 |         scores = np.array([d.confidence for d in detections])
 42 |         indices = non_max_suppression(boxes, self.nms_max_overlap, scores)
 43 |         detections = [detections[i] for i in indices]
 44 |         # for dt in detections:
 45 |         #     print("detections", dt.clsid)
 46 | 
 47 |         # update tracker
 48 |         self.tracker.predict()
 49 |         self.tracker.update(detections)
 50 | 
 51 |         # output bbox identities
 52 |         outputs = []
 53 |         for track in self.tracker.tracks:
 54 |             if not track.is_confirmed() or track.time_since_update > 1:
 55 |                 continue
 56 |             box = track.to_tlwh()
 57 |             x1,y1,x2,y2 = self._tlwh_to_xyxy(box)
 58 |             track_id = track.track_id
 59 |             # cls_id = track.cls_id
 60 |             # outputs.append(np.array([x1,y1,x2,y2,track_id, cls_id], dtype=np.int))
 61 |             outputs.append(np.array([x1,y1,x2,y2,track_id], dtype=np.int))
 62 |         if len(outputs) > 0:
 63 |             outputs = np.stack(outputs,axis=0)
 64 |         return outputs
 65 | 
 66 | 
 67 |     """
 68 |     TODO:
 69 |         Convert bbox from xc_yc_w_h to xtl_ytl_w_h
 70 |     Thanks JieChen91@github.com for reporting this bug!
 71 |     """
 72 |     @staticmethod
 73 |     def _xywh_to_tlwh(bbox_xywh):
 74 |         if isinstance(bbox_xywh, np.ndarray):
 75 |             bbox_tlwh = bbox_xywh.copy()
 76 |         elif isinstance(bbox_xywh, torch.Tensor):
 77 |             bbox_tlwh = bbox_xywh.clone()
 78 |         bbox_tlwh[:,0] = bbox_xywh[:,0] - bbox_xywh[:,2]/2.
 79 |         bbox_tlwh[:,1] = bbox_xywh[:,1] - bbox_xywh[:,3]/2.
 80 |         return bbox_tlwh
 81 | 
 82 | 
 83 |     def _xywh_to_xyxy(self, bbox_xywh):
 84 |         x,y,w,h = bbox_xywh
 85 |         x1 = max(int(x-w/2),0)
 86 |         x2 = min(int(x+w/2),self.width-1)
 87 |         y1 = max(int(y-h/2),0)
 88 |         y2 = min(int(y+h/2),self.height-1)
 89 |         return x1,y1,x2,y2
 90 | 
 91 |     def _tlwh_to_xyxy(self, bbox_tlwh):
 92 |         """
 93 |         TODO:
 94 |             Convert bbox from xtl_ytl_w_h to xc_yc_w_h
 95 |         Thanks JieChen91@github.com for reporting this bug!
 96 |         """
 97 |         x,y,w,h = bbox_tlwh
 98 |         x1 = max(int(x),0)
 99 |         x2 = min(int(x+w),self.width-1)
100 |         y1 = max(int(y),0)
101 |         y2 = min(int(y+h),self.height-1)
102 |         return x1,y1,x2,y2
103 | 
104 |     def _xyxy_to_tlwh(self, bbox_xyxy):
105 |         x1,y1,x2,y2 = bbox_xyxy
106 | 
107 |         t = x1
108 |         l = y1
109 |         w = int(x2-x1)
110 |         h = int(y2-y1)
111 |         return t,l,w,h
112 |     
113 |     def _get_features(self, bbox_xywh, ori_img):
114 |         im_crops = []
115 |         for box in bbox_xywh:
116 |             x1,y1,x2,y2 = self._xywh_to_xyxy(box)
117 |             im = ori_img[y1:y2,x1:x2]
118 |             # if len(im) == 0:
119 |             #     continue
120 |             im_crops.append(im)
121 |         if im_crops:
122 |             features = self.extractor(im_crops)
123 |         else:
124 |             features = np.array([])
125 |         return features
126 | 
127 | 
128 | 


--------------------------------------------------------------------------------
/deep_sort/sort/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/derek285/yolov4_deep_sort_pytorch/00e408a24693ce2438289f4d3aed819cf0362436/deep_sort/sort/__init__.py


--------------------------------------------------------------------------------
/deep_sort/sort/detection.py:
--------------------------------------------------------------------------------
 1 | # vim: expandtab:ts=4:sw=4
 2 | import numpy as np
 3 | 
 4 | 
 5 | class Detection(object):
 6 |     """
 7 |     This class represents a bounding box detection in a single image.
 8 | 
 9 |     Parameters
10 |     ----------
11 |     tlwh : array_like
12 |         Bounding box in format `(x, y, w, h)`.
13 |     confidence : float
14 |         Detector confidence score.
15 |     feature : array_like
16 |         A feature vector that describes the object contained in this image.
17 | 
18 |     Attributes
19 |     ----------
20 |     tlwh : ndarray
21 |         Bounding box in format `(top left x, top left y, width, height)`.
22 |     confidence : ndarray
23 |         Detector confidence score.
24 |     feature : ndarray | NoneType
25 |         A feature vector that describes the object contained in this image.
26 | 
27 |     """
28 | 
29 |     def __init__(self, tlwh, confidence, feature, cls_id):
30 |         self.tlwh = np.asarray(tlwh, dtype=np.float)
31 |         self.confidence = float(confidence)
32 |         self.feature = np.asarray(feature, dtype=np.float32)
33 |         self.clsid = int(cls_id)
34 | 
35 |     def to_tlbr(self):
36 |         """Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
37 |         `(top left, bottom right)`.
38 |         """
39 |         ret = self.tlwh.copy()
40 |         ret[2:] += ret[:2]
41 |         return ret
42 | 
43 |     def to_xyah(self):
44 |         """Convert bounding box to format `(center x, center y, aspect ratio,
45 |         height)`, where the aspect ratio is `width / height`.
46 |         """
47 |         ret = self.tlwh.copy()
48 |         ret[:2] += ret[2:] / 2
49 |         ret[2] /= ret[3]
50 |         return ret
51 | 


--------------------------------------------------------------------------------
/deep_sort/sort/iou_matching.py:
--------------------------------------------------------------------------------
 1 | # vim: expandtab:ts=4:sw=4
 2 | from __future__ import absolute_import
 3 | import numpy as np
 4 | from . import linear_assignment
 5 | 
 6 | 
 7 | def iou(bbox, candidates):
 8 |     """Computer intersection over union.
 9 | 
10 |     Parameters
11 |     ----------
12 |     bbox : ndarray
13 |         A bounding box in format `(top left x, top left y, width, height)`.
14 |     candidates : ndarray
15 |         A matrix of candidate bounding boxes (one per row) in the same format
16 |         as `bbox`.
17 | 
18 |     Returns
19 |     -------
20 |     ndarray
21 |         The intersection over union in [0, 1] between the `bbox` and each
22 |         candidate. A higher score means a larger fraction of the `bbox` is
23 |         occluded by the candidate.
24 | 
25 |     """
26 |     bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:]
27 |     candidates_tl = candidates[:, :2]
28 |     candidates_br = candidates[:, :2] + candidates[:, 2:]
29 | 
30 |     tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis],
31 |                np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]]
32 |     br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis],
33 |                np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]]
34 |     wh = np.maximum(0., br - tl)
35 | 
36 |     area_intersection = wh.prod(axis=1)
37 |     area_bbox = bbox[2:].prod()
38 |     area_candidates = candidates[:, 2:].prod(axis=1)
39 |     return area_intersection / (area_bbox + area_candidates - area_intersection)
40 | 
41 | 
42 | def iou_cost(tracks, detections, track_indices=None,
43 |              detection_indices=None):
44 |     """An intersection over union distance metric.
45 | 
46 |     Parameters
47 |     ----------
48 |     tracks : List[deep_sort.track.Track]
49 |         A list of tracks.
50 |     detections : List[deep_sort.detection.Detection]
51 |         A list of detections.
52 |     track_indices : Optional[List[int]]
53 |         A list of indices to tracks that should be matched. Defaults to
54 |         all `tracks`.
55 |     detection_indices : Optional[List[int]]
56 |         A list of indices to detections that should be matched. Defaults
57 |         to all `detections`.
58 | 
59 |     Returns
60 |     -------
61 |     ndarray
62 |         Returns a cost matrix of shape
63 |         len(track_indices), len(detection_indices) where entry (i, j) is
64 |         `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`.
65 | 
66 |     """
67 |     if track_indices is None:
68 |         track_indices = np.arange(len(tracks))
69 |     if detection_indices is None:
70 |         detection_indices = np.arange(len(detections))
71 | 
72 |     cost_matrix = np.zeros((len(track_indices), len(detection_indices)))
73 |     for row, track_idx in enumerate(track_indices):
74 |         if tracks[track_idx].time_since_update > 1:
75 |             cost_matrix[row, :] = linear_assignment.INFTY_COST
76 |             continue
77 | 
78 |         bbox = tracks[track_idx].to_tlwh()
79 |         candidates = np.asarray([detections[i].tlwh for i in detection_indices])
80 |         cost_matrix[row, :] = 1. - iou(bbox, candidates)
81 |     return cost_matrix
82 | 


--------------------------------------------------------------------------------
/deep_sort/sort/kalman_filter.py:
--------------------------------------------------------------------------------
  1 | # vim: expandtab:ts=4:sw=4
  2 | import numpy as np
  3 | import scipy.linalg
  4 | 
  5 | 
  6 | """
  7 | Table for the 0.95 quantile of the chi-square distribution with N degrees of
  8 | freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv
  9 | function and used as Mahalanobis gating threshold.
 10 | """
 11 | chi2inv95 = {
 12 |     1: 3.8415,
 13 |     2: 5.9915,
 14 |     3: 7.8147,
 15 |     4: 9.4877,
 16 |     5: 11.070,
 17 |     6: 12.592,
 18 |     7: 14.067,
 19 |     8: 15.507,
 20 |     9: 16.919}
 21 | 
 22 | 
 23 | class KalmanFilter(object):
 24 |     """
 25 |     A simple Kalman filter for tracking bounding boxes in image space.
 26 | 
 27 |     The 8-dimensional state space
 28 | 
 29 |         x, y, a, h, vx, vy, va, vh
 30 | 
 31 |     contains the bounding box center position (x, y), aspect ratio a, height h,
 32 |     and their respective velocities.
 33 | 
 34 |     Object motion follows a constant velocity model. The bounding box location
 35 |     (x, y, a, h) is taken as direct observation of the state space (linear
 36 |     observation model).
 37 | 
 38 |     """
 39 | 
 40 |     def __init__(self):
 41 |         ndim, dt = 4, 1.
 42 | 
 43 |         # Create Kalman filter model matrices.
 44 |         self._motion_mat = np.eye(2 * ndim, 2 * ndim)
 45 |         for i in range(ndim):
 46 |             self._motion_mat[i, ndim + i] = dt
 47 |         self._update_mat = np.eye(ndim, 2 * ndim)
 48 | 
 49 |         # Motion and observation uncertainty are chosen relative to the current
 50 |         # state estimate. These weights control the amount of uncertainty in
 51 |         # the model. This is a bit hacky.
 52 |         self._std_weight_position = 1. / 20
 53 |         self._std_weight_velocity = 1. / 160
 54 | 
 55 |     def initiate(self, measurement):
 56 |         """Create track from unassociated measurement.
 57 | 
 58 |         Parameters
 59 |         ----------
 60 |         measurement : ndarray
 61 |             Bounding box coordinates (x, y, a, h) with center position (x, y),
 62 |             aspect ratio a, and height h.
 63 | 
 64 |         Returns
 65 |         -------
 66 |         (ndarray, ndarray)
 67 |             Returns the mean vector (8 dimensional) and covariance matrix (8x8
 68 |             dimensional) of the new track. Unobserved velocities are initialized
 69 |             to 0 mean.
 70 | 
 71 |         """
 72 |         mean_pos = measurement
 73 |         mean_vel = np.zeros_like(mean_pos)
 74 |         mean = np.r_[mean_pos, mean_vel]
 75 | 
 76 |         std = [
 77 |             2 * self._std_weight_position * measurement[3],
 78 |             2 * self._std_weight_position * measurement[3],
 79 |             1e-2,
 80 |             2 * self._std_weight_position * measurement[3],
 81 |             10 * self._std_weight_velocity * measurement[3],
 82 |             10 * self._std_weight_velocity * measurement[3],
 83 |             1e-5,
 84 |             10 * self._std_weight_velocity * measurement[3]]
 85 |         covariance = np.diag(np.square(std))
 86 |         return mean, covariance
 87 | 
 88 |     def predict(self, mean, covariance):
 89 |         """Run Kalman filter prediction step.
 90 | 
 91 |         Parameters
 92 |         ----------
 93 |         mean : ndarray
 94 |             The 8 dimensional mean vector of the object state at the previous
 95 |             time step.
 96 |         covariance : ndarray
 97 |             The 8x8 dimensional covariance matrix of the object state at the
 98 |             previous time step.
 99 | 
100 |         Returns
101 |         -------
102 |         (ndarray, ndarray)
103 |             Returns the mean vector and covariance matrix of the predicted
104 |             state. Unobserved velocities are initialized to 0 mean.
105 | 
106 |         """
107 |         std_pos = [
108 |             self._std_weight_position * mean[3],
109 |             self._std_weight_position * mean[3],
110 |             1e-2,
111 |             self._std_weight_position * mean[3]]
112 |         std_vel = [
113 |             self._std_weight_velocity * mean[3],
114 |             self._std_weight_velocity * mean[3],
115 |             1e-5,
116 |             self._std_weight_velocity * mean[3]]
117 |         motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
118 | 
119 |         mean = np.dot(self._motion_mat, mean)
120 |         covariance = np.linalg.multi_dot((
121 |             self._motion_mat, covariance, self._motion_mat.T)) + motion_cov
122 | 
123 |         return mean, covariance
124 | 
125 |     def project(self, mean, covariance):
126 |         """Project state distribution to measurement space.
127 | 
128 |         Parameters
129 |         ----------
130 |         mean : ndarray
131 |             The state's mean vector (8 dimensional array).
132 |         covariance : ndarray
133 |             The state's covariance matrix (8x8 dimensional).
134 | 
135 |         Returns
136 |         -------
137 |         (ndarray, ndarray)
138 |             Returns the projected mean and covariance matrix of the given state
139 |             estimate.
140 | 
141 |         """
142 |         std = [
143 |             self._std_weight_position * mean[3],
144 |             self._std_weight_position * mean[3],
145 |             1e-1,
146 |             self._std_weight_position * mean[3]]
147 |         innovation_cov = np.diag(np.square(std))
148 | 
149 |         mean = np.dot(self._update_mat, mean)
150 |         covariance = np.linalg.multi_dot((
151 |             self._update_mat, covariance, self._update_mat.T))
152 |         return mean, covariance + innovation_cov
153 | 
154 |     def update(self, mean, covariance, measurement):
155 |         """Run Kalman filter correction step.
156 | 
157 |         Parameters
158 |         ----------
159 |         mean : ndarray
160 |             The predicted state's mean vector (8 dimensional).
161 |         covariance : ndarray
162 |             The state's covariance matrix (8x8 dimensional).
163 |         measurement : ndarray
164 |             The 4 dimensional measurement vector (x, y, a, h), where (x, y)
165 |             is the center position, a the aspect ratio, and h the height of the
166 |             bounding box.
167 | 
168 |         Returns
169 |         -------
170 |         (ndarray, ndarray)
171 |             Returns the measurement-corrected state distribution.
172 | 
173 |         """
174 |         projected_mean, projected_cov = self.project(mean, covariance)
175 | 
176 |         chol_factor, lower = scipy.linalg.cho_factor(
177 |             projected_cov, lower=True, check_finite=False)
178 |         kalman_gain = scipy.linalg.cho_solve(
179 |             (chol_factor, lower), np.dot(covariance, self._update_mat.T).T,
180 |             check_finite=False).T
181 |         innovation = measurement - projected_mean
182 | 
183 |         new_mean = mean + np.dot(innovation, kalman_gain.T)
184 |         new_covariance = covariance - np.linalg.multi_dot((
185 |             kalman_gain, projected_cov, kalman_gain.T))
186 |         return new_mean, new_covariance
187 | 
188 |     def gating_distance(self, mean, covariance, measurements,
189 |                         only_position=False):
190 |         """Compute gating distance between state distribution and measurements.
191 | 
192 |         A suitable distance threshold can be obtained from `chi2inv95`. If
193 |         `only_position` is False, the chi-square distribution has 4 degrees of
194 |         freedom, otherwise 2.
195 | 
196 |         Parameters
197 |         ----------
198 |         mean : ndarray
199 |             Mean vector over the state distribution (8 dimensional).
200 |         covariance : ndarray
201 |             Covariance of the state distribution (8x8 dimensional).
202 |         measurements : ndarray
203 |             An Nx4 dimensional matrix of N measurements, each in
204 |             format (x, y, a, h) where (x, y) is the bounding box center
205 |             position, a the aspect ratio, and h the height.
206 |         only_position : Optional[bool]
207 |             If True, distance computation is done with respect to the bounding
208 |             box center position only.
209 | 
210 |         Returns
211 |         -------
212 |         ndarray
213 |             Returns an array of length N, where the i-th element contains the
214 |             squared Mahalanobis distance between (mean, covariance) and
215 |             `measurements[i]`.
216 | 
217 |         """
218 |         mean, covariance = self.project(mean, covariance)
219 |         if only_position:
220 |             mean, covariance = mean[:2], covariance[:2, :2]
221 |             measurements = measurements[:, :2]
222 | 
223 |         cholesky_factor = np.linalg.cholesky(covariance)
224 |         d = measurements - mean
225 |         z = scipy.linalg.solve_triangular(
226 |             cholesky_factor, d.T, lower=True, check_finite=False,
227 |             overwrite_b=True)
228 |         squared_maha = np.sum(z * z, axis=0)
229 |         return squared_maha
230 | 


--------------------------------------------------------------------------------
/deep_sort/sort/linear_assignment.py:
--------------------------------------------------------------------------------
  1 | # vim: expandtab:ts=4:sw=4
  2 | from __future__ import absolute_import
  3 | import numpy as np
  4 | # from sklearn.utils.linear_assignment_ import linear_assignment
  5 | from scipy.optimize import linear_sum_assignment as linear_assignment
  6 | from . import kalman_filter
  7 | 
  8 | 
  9 | INFTY_COST = 1e+5
 10 | 
 11 | 
 12 | def min_cost_matching(
 13 |         distance_metric, max_distance, tracks, detections, track_indices=None,
 14 |         detection_indices=None):
 15 |     """Solve linear assignment problem.
 16 | 
 17 |     Parameters
 18 |     ----------
 19 |     distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
 20 |         The distance metric is given a list of tracks and detections as well as
 21 |         a list of N track indices and M detection indices. The metric should
 22 |         return the NxM dimensional cost matrix, where element (i, j) is the
 23 |         association cost between the i-th track in the given track indices and
 24 |         the j-th detection in the given detection_indices.
 25 |     max_distance : float
 26 |         Gating threshold. Associations with cost larger than this value are
 27 |         disregarded.
 28 |     tracks : List[track.Track]
 29 |         A list of predicted tracks at the current time step.
 30 |     detections : List[detection.Detection]
 31 |         A list of detections at the current time step.
 32 |     track_indices : List[int]
 33 |         List of track indices that maps rows in `cost_matrix` to tracks in
 34 |         `tracks` (see description above).
 35 |     detection_indices : List[int]
 36 |         List of detection indices that maps columns in `cost_matrix` to
 37 |         detections in `detections` (see description above).
 38 | 
 39 |     Returns
 40 |     -------
 41 |     (List[(int, int)], List[int], List[int])
 42 |         Returns a tuple with the following three entries:
 43 |         * A list of matched track and detection indices.
 44 |         * A list of unmatched track indices.
 45 |         * A list of unmatched detection indices.
 46 | 
 47 |     """
 48 |     if track_indices is None:
 49 |         track_indices = np.arange(len(tracks))
 50 |     if detection_indices is None:
 51 |         detection_indices = np.arange(len(detections))
 52 | 
 53 |     if len(detection_indices) == 0 or len(track_indices) == 0:
 54 |         return [], track_indices, detection_indices  # Nothing to match.
 55 | 
 56 |     cost_matrix = distance_metric(
 57 |         tracks, detections, track_indices, detection_indices)
 58 |     cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5
 59 | 
 60 |     row_indices, col_indices = linear_assignment(cost_matrix)
 61 | 
 62 |     matches, unmatched_tracks, unmatched_detections = [], [], []
 63 |     for col, detection_idx in enumerate(detection_indices):
 64 |         if col not in col_indices:
 65 |             unmatched_detections.append(detection_idx)
 66 |     for row, track_idx in enumerate(track_indices):
 67 |         if row not in row_indices:
 68 |             unmatched_tracks.append(track_idx)
 69 |     for row, col in zip(row_indices, col_indices):
 70 |         track_idx = track_indices[row]
 71 |         detection_idx = detection_indices[col]
 72 |         if cost_matrix[row, col] > max_distance:
 73 |             unmatched_tracks.append(track_idx)
 74 |             unmatched_detections.append(detection_idx)
 75 |         else:
 76 |             matches.append((track_idx, detection_idx))
 77 |     return matches, unmatched_tracks, unmatched_detections
 78 | 
 79 | 
 80 | def matching_cascade(
 81 |         distance_metric, max_distance, cascade_depth, tracks, detections,
 82 |         track_indices=None, detection_indices=None):
 83 |     """Run matching cascade.
 84 | 
 85 |     Parameters
 86 |     ----------
 87 |     distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
 88 |         The distance metric is given a list of tracks and detections as well as
 89 |         a list of N track indices and M detection indices. The metric should
 90 |         return the NxM dimensional cost matrix, where element (i, j) is the
 91 |         association cost between the i-th track in the given track indices and
 92 |         the j-th detection in the given detection indices.
 93 |     max_distance : float
 94 |         Gating threshold. Associations with cost larger than this value are
 95 |         disregarded.
 96 |     cascade_depth: int
 97 |         The cascade depth, should be se to the maximum track age.
 98 |     tracks : List[track.Track]
 99 |         A list of predicted tracks at the current time step.
100 |     detections : List[detection.Detection]
101 |         A list of detections at the current time step.
102 |     track_indices : Optional[List[int]]
103 |         List of track indices that maps rows in `cost_matrix` to tracks in
104 |         `tracks` (see description above). Defaults to all tracks.
105 |     detection_indices : Optional[List[int]]
106 |         List of detection indices that maps columns in `cost_matrix` to
107 |         detections in `detections` (see description above). Defaults to all
108 |         detections.
109 | 
110 |     Returns
111 |     -------
112 |     (List[(int, int)], List[int], List[int])
113 |         Returns a tuple with the following three entries:
114 |         * A list of matched track and detection indices.
115 |         * A list of unmatched track indices.
116 |         * A list of unmatched detection indices.
117 | 
118 |     """
119 |     if track_indices is None:
120 |         track_indices = list(range(len(tracks)))
121 |     if detection_indices is None:
122 |         detection_indices = list(range(len(detections)))
123 | 
124 |     unmatched_detections = detection_indices
125 |     matches = []
126 |     for level in range(cascade_depth):
127 |         if len(unmatched_detections) == 0:  # No detections left
128 |             break
129 | 
130 |         track_indices_l = [
131 |             k for k in track_indices
132 |             if tracks[k].time_since_update == 1 + level
133 |         ]
134 |         if len(track_indices_l) == 0:  # Nothing to match at this level
135 |             continue
136 | 
137 |         matches_l, _, unmatched_detections = \
138 |             min_cost_matching(
139 |                 distance_metric, max_distance, tracks, detections,
140 |                 track_indices_l, unmatched_detections)
141 |         matches += matches_l
142 |     unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches))
143 |     return matches, unmatched_tracks, unmatched_detections
144 | 
145 | 
146 | def gate_cost_matrix(
147 |         kf, cost_matrix, tracks, detections, track_indices, detection_indices,
148 |         gated_cost=INFTY_COST, only_position=False):
149 |     """Invalidate infeasible entries in cost matrix based on the state
150 |     distributions obtained by Kalman filtering.
151 | 
152 |     Parameters
153 |     ----------
154 |     kf : The Kalman filter.
155 |     cost_matrix : ndarray
156 |         The NxM dimensional cost matrix, where N is the number of track indices
157 |         and M is the number of detection indices, such that entry (i, j) is the
158 |         association cost between `tracks[track_indices[i]]` and
159 |         `detections[detection_indices[j]]`.
160 |     tracks : List[track.Track]
161 |         A list of predicted tracks at the current time step.
162 |     detections : List[detection.Detection]
163 |         A list of detections at the current time step.
164 |     track_indices : List[int]
165 |         List of track indices that maps rows in `cost_matrix` to tracks in
166 |         `tracks` (see description above).
167 |     detection_indices : List[int]
168 |         List of detection indices that maps columns in `cost_matrix` to
169 |         detections in `detections` (see description above).
170 |     gated_cost : Optional[float]
171 |         Entries in the cost matrix corresponding to infeasible associations are
172 |         set this value. Defaults to a very large value.
173 |     only_position : Optional[bool]
174 |         If True, only the x, y position of the state distribution is considered
175 |         during gating. Defaults to False.
176 | 
177 |     Returns
178 |     -------
179 |     ndarray
180 |         Returns the modified cost matrix.
181 | 
182 |     """
183 |     gating_dim = 2 if only_position else 4
184 |     gating_threshold = kalman_filter.chi2inv95[gating_dim]
185 |     measurements = np.asarray(
186 |         [detections[i].to_xyah() for i in detection_indices])
187 |     for row, track_idx in enumerate(track_indices):
188 |         track = tracks[track_idx]
189 |         gating_distance = kf.gating_distance(
190 |             track.mean, track.covariance, measurements, only_position)
191 |         cost_matrix[row, gating_distance > gating_threshold] = gated_cost
192 |     return cost_matrix
193 | 


--------------------------------------------------------------------------------
/deep_sort/sort/nn_matching.py:
--------------------------------------------------------------------------------
  1 | # vim: expandtab:ts=4:sw=4
  2 | import numpy as np
  3 | 
  4 | 
  5 | def _pdist(a, b):
  6 |     """Compute pair-wise squared distance between points in `a` and `b`.
  7 | 
  8 |     Parameters
  9 |     ----------
 10 |     a : array_like
 11 |         An NxM matrix of N samples of dimensionality M.
 12 |     b : array_like
 13 |         An LxM matrix of L samples of dimensionality M.
 14 | 
 15 |     Returns
 16 |     -------
 17 |     ndarray
 18 |         Returns a matrix of size len(a), len(b) such that eleement (i, j)
 19 |         contains the squared distance between `a[i]` and `b[j]`.
 20 | 
 21 |     """
 22 |     a, b = np.asarray(a), np.asarray(b)
 23 |     if len(a) == 0 or len(b) == 0:
 24 |         return np.zeros((len(a), len(b)))
 25 |     a2, b2 = np.square(a).sum(axis=1), np.square(b).sum(axis=1)
 26 |     r2 = -2. * np.dot(a, b.T) + a2[:, None] + b2[None, :]
 27 |     r2 = np.clip(r2, 0., float(np.inf))
 28 |     return r2
 29 | 
 30 | 
 31 | def _cosine_distance(a, b, data_is_normalized=False):
 32 |     """Compute pair-wise cosine distance between points in `a` and `b`.
 33 | 
 34 |     Parameters
 35 |     ----------
 36 |     a : array_like
 37 |         An NxM matrix of N samples of dimensionality M.
 38 |     b : array_like
 39 |         An LxM matrix of L samples of dimensionality M.
 40 |     data_is_normalized : Optional[bool]
 41 |         If True, assumes rows in a and b are unit length vectors.
 42 |         Otherwise, a and b are explicitly normalized to lenght 1.
 43 | 
 44 |     Returns
 45 |     -------
 46 |     ndarray
 47 |         Returns a matrix of size len(a), len(b) such that eleement (i, j)
 48 |         contains the squared distance between `a[i]` and `b[j]`.
 49 | 
 50 |     """
 51 |     if not data_is_normalized:
 52 |         a = np.asarray(a) / np.linalg.norm(a, axis=1, keepdims=True)
 53 |         b = np.asarray(b) / np.linalg.norm(b, axis=1, keepdims=True)
 54 |     return 1. - np.dot(a, b.T)
 55 | 
 56 | 
 57 | def _nn_euclidean_distance(x, y):
 58 |     """ Helper function for nearest neighbor distance metric (Euclidean).
 59 | 
 60 |     Parameters
 61 |     ----------
 62 |     x : ndarray
 63 |         A matrix of N row-vectors (sample points).
 64 |     y : ndarray
 65 |         A matrix of M row-vectors (query points).
 66 | 
 67 |     Returns
 68 |     -------
 69 |     ndarray
 70 |         A vector of length M that contains for each entry in `y` the
 71 |         smallest Euclidean distance to a sample in `x`.
 72 | 
 73 |     """
 74 |     distances = _pdist(x, y)
 75 |     return np.maximum(0.0, distances.min(axis=0))
 76 | 
 77 | 
 78 | def _nn_cosine_distance(x, y):
 79 |     """ Helper function for nearest neighbor distance metric (cosine).
 80 | 
 81 |     Parameters
 82 |     ----------
 83 |     x : ndarray
 84 |         A matrix of N row-vectors (sample points).
 85 |     y : ndarray
 86 |         A matrix of M row-vectors (query points).
 87 | 
 88 |     Returns
 89 |     -------
 90 |     ndarray
 91 |         A vector of length M that contains for each entry in `y` the
 92 |         smallest cosine distance to a sample in `x`.
 93 | 
 94 |     """
 95 |     distances = _cosine_distance(x, y)
 96 |     return distances.min(axis=0)
 97 | 
 98 | 
 99 | class NearestNeighborDistanceMetric(object):
100 |     """
101 |     A nearest neighbor distance metric that, for each target, returns
102 |     the closest distance to any sample that has been observed so far.
103 | 
104 |     Parameters
105 |     ----------
106 |     metric : str
107 |         Either "euclidean" or "cosine".
108 |     matching_threshold: float
109 |         The matching threshold. Samples with larger distance are considered an
110 |         invalid match.
111 |     budget : Optional[int]
112 |         If not None, fix samples per class to at most this number. Removes
113 |         the oldest samples when the budget is reached.
114 | 
115 |     Attributes
116 |     ----------
117 |     samples : Dict[int -> List[ndarray]]
118 |         A dictionary that maps from target identities to the list of samples
119 |         that have been observed so far.
120 | 
121 |     """
122 | 
123 |     def __init__(self, metric, matching_threshold, budget=None):
124 | 
125 | 
126 |         if metric == "euclidean":
127 |             self._metric = _nn_euclidean_distance
128 |         elif metric == "cosine":
129 |             self._metric = _nn_cosine_distance
130 |         else:
131 |             raise ValueError(
132 |                 "Invalid metric; must be either 'euclidean' or 'cosine'")
133 |         self.matching_threshold = matching_threshold
134 |         self.budget = budget
135 |         self.samples = {}
136 | 
137 |     def partial_fit(self, features, targets, active_targets):
138 |         """Update the distance metric with new data.
139 | 
140 |         Parameters
141 |         ----------
142 |         features : ndarray
143 |             An NxM matrix of N features of dimensionality M.
144 |         targets : ndarray
145 |             An integer array of associated target identities.
146 |         active_targets : List[int]
147 |             A list of targets that are currently present in the scene.
148 | 
149 |         """
150 |         for feature, target in zip(features, targets):
151 |             self.samples.setdefault(target, []).append(feature)
152 |             if self.budget is not None:
153 |                 self.samples[target] = self.samples[target][-self.budget:]
154 |         self.samples = {k: self.samples[k] for k in active_targets}
155 | 
156 |     def distance(self, features, targets):
157 |         """Compute distance between features and targets.
158 | 
159 |         Parameters
160 |         ----------
161 |         features : ndarray
162 |             An NxM matrix of N features of dimensionality M.
163 |         targets : List[int]
164 |             A list of targets to match the given `features` against.
165 | 
166 |         Returns
167 |         -------
168 |         ndarray
169 |             Returns a cost matrix of shape len(targets), len(features), where
170 |             element (i, j) contains the closest squared distance between
171 |             `targets[i]` and `features[j]`.
172 | 
173 |         """
174 |         cost_matrix = np.zeros((len(targets), len(features)))
175 |         for i, target in enumerate(targets):
176 |             cost_matrix[i, :] = self._metric(self.samples[target], features)
177 |         return cost_matrix
178 | 


--------------------------------------------------------------------------------
/deep_sort/sort/preprocessing.py:
--------------------------------------------------------------------------------
 1 | # vim: expandtab:ts=4:sw=4
 2 | import numpy as np
 3 | import cv2
 4 | 
 5 | 
 6 | def non_max_suppression(boxes, max_bbox_overlap, scores=None):
 7 |     """Suppress overlapping detections.
 8 | 
 9 |     Original code from [1]_ has been adapted to include confidence score.
10 | 
11 |     .. [1] http://www.pyimagesearch.com/2015/02/16/
12 |            faster-non-maximum-suppression-python/
13 | 
14 |     Examples
15 |     --------
16 | 
17 |         >>> boxes = [d.roi for d in detections]
18 |         >>> scores = [d.confidence for d in detections]
19 |         >>> indices = non_max_suppression(boxes, max_bbox_overlap, scores)
20 |         >>> detections = [detections[i] for i in indices]
21 | 
22 |     Parameters
23 |     ----------
24 |     boxes : ndarray
25 |         Array of ROIs (x, y, width, height).
26 |     max_bbox_overlap : float
27 |         ROIs that overlap more than this values are suppressed.
28 |     scores : Optional[array_like]
29 |         Detector confidence score.
30 | 
31 |     Returns
32 |     -------
33 |     List[int]
34 |         Returns indices of detections that have survived non-maxima suppression.
35 | 
36 |     """
37 |     if len(boxes) == 0:
38 |         return []
39 | 
40 |     boxes = boxes.astype(np.float)
41 |     pick = []
42 | 
43 |     x1 = boxes[:, 0]
44 |     y1 = boxes[:, 1]
45 |     x2 = boxes[:, 2] + boxes[:, 0]
46 |     y2 = boxes[:, 3] + boxes[:, 1]
47 | 
48 |     area = (x2 - x1 + 1) * (y2 - y1 + 1)
49 |     if scores is not None:
50 |         idxs = np.argsort(scores)
51 |     else:
52 |         idxs = np.argsort(y2)
53 | 
54 |     while len(idxs) > 0:
55 |         last = len(idxs) - 1
56 |         i = idxs[last]
57 |         pick.append(i)
58 | 
59 |         xx1 = np.maximum(x1[i], x1[idxs[:last]])
60 |         yy1 = np.maximum(y1[i], y1[idxs[:last]])
61 |         xx2 = np.minimum(x2[i], x2[idxs[:last]])
62 |         yy2 = np.minimum(y2[i], y2[idxs[:last]])
63 | 
64 |         w = np.maximum(0, xx2 - xx1 + 1)
65 |         h = np.maximum(0, yy2 - yy1 + 1)
66 | 
67 |         overlap = (w * h) / area[idxs[:last]]
68 | 
69 |         idxs = np.delete(
70 |             idxs, np.concatenate(
71 |                 ([last], np.where(overlap > max_bbox_overlap)[0])))
72 | 
73 |     return pick
74 | 


--------------------------------------------------------------------------------
/deep_sort/sort/track.py:
--------------------------------------------------------------------------------
  1 | # vim: expandtab:ts=4:sw=4
  2 | 
  3 | 
  4 | class TrackState:
  5 |     """
  6 |     Enumeration type for the single target track state. Newly created tracks are
  7 |     classified as `tentative` until enough evidence has been collected. Then,
  8 |     the track state is changed to `confirmed`. Tracks that are no longer alive
  9 |     are classified as `deleted` to mark them for removal from the set of active
 10 |     tracks.
 11 | 
 12 |     """
 13 | 
 14 |     Tentative = 1
 15 |     Confirmed = 2
 16 |     Deleted = 3
 17 | 
 18 | 
 19 | class Track:
 20 |     """
 21 |     A single target track with state space `(x, y, a, h)` and associated
 22 |     velocities, where `(x, y)` is the center of the bounding box, `a` is the
 23 |     aspect ratio and `h` is the height.
 24 | 
 25 |     Parameters
 26 |     ----------
 27 |     mean : ndarray
 28 |         Mean vector of the initial state distribution.
 29 |     covariance : ndarray
 30 |         Covariance matrix of the initial state distribution.
 31 |     track_id : int
 32 |         A unique track identifier.
 33 |     n_init : int
 34 |         Number of consecutive detections before the track is confirmed. The
 35 |         track state is set to `Deleted` if a miss occurs within the first
 36 |         `n_init` frames.
 37 |     max_age : int
 38 |         The maximum number of consecutive misses before the track state is
 39 |         set to `Deleted`.
 40 |     feature : Optional[ndarray]
 41 |         Feature vector of the detection this track originates from. If not None,
 42 |         this feature is added to the `features` cache.
 43 | 
 44 |     Attributes
 45 |     ----------
 46 |     mean : ndarray
 47 |         Mean vector of the initial state distribution.
 48 |     covariance : ndarray
 49 |         Covariance matrix of the initial state distribution.
 50 |     track_id : int
 51 |         A unique track identifier.
 52 |     hits : int
 53 |         Total number of measurement updates.
 54 |     age : int
 55 |         Total number of frames since first occurance.
 56 |     time_since_update : int
 57 |         Total number of frames since last measurement update.
 58 |     state : TrackState
 59 |         The current track state.
 60 |     features : List[ndarray]
 61 |         A cache of features. On each measurement update, the associated feature
 62 |         vector is added to this list.
 63 | 
 64 |     """
 65 | 
 66 |     def __init__(self, mean, covariance, track_id, n_init, max_age, cls_id,
 67 |                  feature=None):
 68 |         self.mean = mean
 69 |         self.covariance = covariance
 70 |         self.track_id = track_id
 71 |         self.hits = 1
 72 |         self.age = 1
 73 |         self.time_since_update = 0
 74 |         self.cls_id = cls_id
 75 | 
 76 |         self.state = TrackState.Tentative
 77 |         self.features = []
 78 |         if feature is not None:
 79 |             self.features.append(feature)
 80 | 
 81 |         self._n_init = n_init
 82 |         self._max_age = max_age
 83 | 
 84 |     def to_tlwh(self):
 85 |         """Get current position in bounding box format `(top left x, top left y,
 86 |         width, height)`.
 87 | 
 88 |         Returns
 89 |         -------
 90 |         ndarray
 91 |             The bounding box.
 92 | 
 93 |         """
 94 |         ret = self.mean[:4].copy()
 95 |         ret[2] *= ret[3]
 96 |         ret[:2] -= ret[2:] / 2
 97 |         return ret
 98 | 
 99 |     def to_tlbr(self):
100 |         """Get current position in bounding box format `(min x, miny, max x,
101 |         max y)`.
102 | 
103 |         Returns
104 |         -------
105 |         ndarray
106 |             The bounding box.
107 | 
108 |         """
109 |         ret = self.to_tlwh()
110 |         ret[2:] = ret[:2] + ret[2:]
111 |         return ret
112 | 
113 |     def predict(self, kf):
114 |         """Propagate the state distribution to the current time step using a
115 |         Kalman filter prediction step.
116 | 
117 |         Parameters
118 |         ----------
119 |         kf : kalman_filter.KalmanFilter
120 |             The Kalman filter.
121 | 
122 |         """
123 |         self.mean, self.covariance = kf.predict(self.mean, self.covariance)
124 |         self.age += 1
125 |         self.time_since_update += 1
126 | 
127 |     def update(self, kf, detections, detection_idx):
128 |         """Perform Kalman filter measurement update step and update the feature
129 |         cache.
130 | 
131 |         Parameters
132 |         ----------
133 |         kf : kalman_filter.KalmanFilter
134 |             The Kalman filter.
135 |         detection : Detection
136 |             The associated detection.
137 | 
138 |         """
139 | 
140 |         self.mean, self.covariance = kf.update(
141 |             self.mean, self.covariance, detections[detection_idx].to_xyah())
142 |         self.features.append(detections[detection_idx].feature)
143 | 
144 |         self.hits += 1
145 |         self.time_since_update = 0
146 |         if self.state == TrackState.Tentative and self.hits >= self._n_init:
147 |             self.state = TrackState.Confirmed
148 |             # self.cls_id = detection_idx
149 | 
150 |     def mark_missed(self):
151 |         """Mark this track as missed (no association at the current time step).
152 |         """
153 |         if self.state == TrackState.Tentative:
154 |             self.state = TrackState.Deleted
155 |         elif self.time_since_update > self._max_age:
156 |             self.state = TrackState.Deleted
157 | 
158 |     def is_tentative(self):
159 |         """Returns True if this track is tentative (unconfirmed).
160 |         """
161 |         return self.state == TrackState.Tentative
162 | 
163 |     def is_confirmed(self):
164 |         """Returns True if this track is confirmed."""
165 |         return self.state == TrackState.Confirmed
166 | 
167 |     def is_deleted(self):
168 |         """Returns True if this track is dead and should be deleted."""
169 |         return self.state == TrackState.Deleted
170 | 


--------------------------------------------------------------------------------
/deep_sort/sort/tracker.py:
--------------------------------------------------------------------------------
  1 | # vim: expandtab:ts=4:sw=4
  2 | from __future__ import absolute_import
  3 | import numpy as np
  4 | from . import kalman_filter
  5 | from . import linear_assignment
  6 | from . import iou_matching
  7 | from .track import Track
  8 | 
  9 | 
 10 | class Tracker:
 11 |     """
 12 |     This is the multi-target tracker.
 13 | 
 14 |     Parameters
 15 |     ----------
 16 |     metric : nn_matching.NearestNeighborDistanceMetric
 17 |         A distance metric for measurement-to-track association.
 18 |     max_age : int
 19 |         Maximum number of missed misses before a track is deleted.
 20 |     n_init : int
 21 |         Number of consecutive detections before the track is confirmed. The
 22 |         track state is set to `Deleted` if a miss occurs within the first
 23 |         `n_init` frames.
 24 | 
 25 |     Attributes
 26 |     ----------
 27 |     metric : nn_matching.NearestNeighborDistanceMetric
 28 |         The distance metric used for measurement to track association.
 29 |     max_age : int
 30 |         Maximum number of missed misses before a track is deleted.
 31 |     n_init : int
 32 |         Number of frames that a track remains in initialization phase.
 33 |     kf : kalman_filter.KalmanFilter
 34 |         A Kalman filter to filter target trajectories in image space.
 35 |     tracks : List[Track]
 36 |         The list of active tracks at the current time step.
 37 | 
 38 |     """
 39 | 
 40 |     def __init__(self, metric, max_iou_distance=0.7, max_age=70, n_init=3):
 41 |         self.metric = metric
 42 |         self.max_iou_distance = max_iou_distance
 43 |         self.max_age = max_age
 44 |         self.n_init = n_init
 45 | 
 46 |         self.kf = kalman_filter.KalmanFilter()
 47 |         self.tracks = []
 48 |         self._next_id = 1
 49 | 
 50 |     def predict(self):
 51 |         """Propagate track state distributions one time step forward.
 52 | 
 53 |         This function should be called once every time step, before `update`.
 54 |         """
 55 |         for track in self.tracks:
 56 |             track.predict(self.kf)
 57 | 
 58 |     def update(self, detections):
 59 |         """Perform measurement update and track management.
 60 | 
 61 |         Parameters
 62 |         ----------
 63 |         detections : List[deep_sort.detection.Detection]
 64 |             A list of detections at the current time step.
 65 | 
 66 |         """
 67 |         # Run matching cascade.
 68 |         matches, unmatched_tracks, unmatched_detections = \
 69 |             self._match(detections)
 70 | 
 71 |         # Update track set.
 72 |         for track_idx, detection_idx in matches:
 73 |             self.tracks[track_idx].update(
 74 |                 self.kf, detections, detection_idx)
 75 |             # self.tracks[track_idx].cls_id = detection_idx
 76 | 
 77 |         for track_idx in unmatched_tracks:
 78 |             self.tracks[track_idx].mark_missed()
 79 |         for detection_idx in unmatched_detections:
 80 |             self._initiate_track(detections, detection_idx)
 81 |         self.tracks = [t for t in self.tracks if not t.is_deleted()]
 82 | 
 83 |         # Update distance metric.
 84 |         active_targets = [t.track_id for t in self.tracks if t.is_confirmed()]
 85 |         features, targets = [], []
 86 |         for track in self.tracks:
 87 |             if not track.is_confirmed():
 88 |                 continue
 89 |             features += track.features
 90 |             targets += [track.track_id for _ in track.features]
 91 |             track.features = []
 92 |         self.metric.partial_fit(
 93 |             np.asarray(features), np.asarray(targets), active_targets)
 94 | 
 95 |     def _match(self, detections):
 96 | 
 97 |         def gated_metric(tracks, dets, track_indices, detection_indices):
 98 |             features = np.array([dets[i].feature for i in detection_indices])
 99 |             targets = np.array([tracks[i].track_id for i in track_indices])
100 |             cost_matrix = self.metric.distance(features, targets)
101 |             cost_matrix = linear_assignment.gate_cost_matrix(
102 |                 self.kf, cost_matrix, tracks, dets, track_indices,
103 |                 detection_indices)
104 | 
105 |             return cost_matrix
106 | 
107 |         # Split track set into confirmed and unconfirmed tracks.
108 |         confirmed_tracks = [
109 |             i for i, t in enumerate(self.tracks) if t.is_confirmed()]
110 |         unconfirmed_tracks = [
111 |             i for i, t in enumerate(self.tracks) if not t.is_confirmed()]
112 | 
113 |         # Associate confirmed tracks using appearance features.
114 |         matches_a, unmatched_tracks_a, unmatched_detections = \
115 |             linear_assignment.matching_cascade(
116 |                 gated_metric, self.metric.matching_threshold, self.max_age,
117 |                 self.tracks, detections, confirmed_tracks)
118 | 
119 |         # Associate remaining tracks together with unconfirmed tracks using IOU.
120 |         iou_track_candidates = unconfirmed_tracks + [
121 |             k for k in unmatched_tracks_a if
122 |             self.tracks[k].time_since_update == 1]
123 |         unmatched_tracks_a = [
124 |             k for k in unmatched_tracks_a if
125 |             self.tracks[k].time_since_update != 1]
126 |         matches_b, unmatched_tracks_b, unmatched_detections = \
127 |             linear_assignment.min_cost_matching(
128 |                 iou_matching.iou_cost, self.max_iou_distance, self.tracks,
129 |                 detections, iou_track_candidates, unmatched_detections)
130 | 
131 |         matches = matches_a + matches_b
132 |         unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b))
133 |         return matches, unmatched_tracks, unmatched_detections
134 | 
135 |     def _initiate_track(self, detection, detection_idx):
136 |         mean, covariance = self.kf.initiate(detection[detection_idx].to_xyah())
137 |         self.tracks.append(Track(
138 |             mean, covariance, self._next_id, self.n_init, self.max_age, detection_idx,
139 |             detection[detection_idx].feature))
140 |         self._next_id += 1
141 | 


--------------------------------------------------------------------------------
/detector/v4darknet.py:
--------------------------------------------------------------------------------
  1 | #!python3
  2 | """
  3 | Python 3 wrapper for identifying objects in images
  4 | 
  5 | Requires DLL compilation
  6 | 
  7 | Both the GPU and no-GPU version should be compiled; the no-GPU version should be renamed "yolo_cpp_dll_nogpu.dll".
  8 | 
  9 | On a GPU system, you can force CPU evaluation by any of:
 10 | 
 11 | - Set global variable DARKNET_FORCE_CPU to True
 12 | - Set environment variable CUDA_VISIBLE_DEVICES to -1
 13 | - Set environment variable "FORCE_CPU" to "true"
 14 | 
 15 | 
 16 | To use, either run performDetect() after import, or modify the end of this file.
 17 | 
 18 | See the docstring of performDetect() for parameters.
 19 | 
 20 | Directly viewing or returning bounding-boxed images requires scikit-image to be installed (`pip install scikit-image`)
 21 | 
 22 | 
 23 | Original *nix 2.7: https://github.com/pjreddie/darknet/blob/0f110834f4e18b30d5f101bf8f1724c34b7b83db/python/darknet.py
 24 | Windows Python 2.7 version: https://github.com/AlexeyAB/darknet/blob/fc496d52bf22a0bb257300d3c79be9cd80e722cb/build/darknet/x64/darknet.py
 25 | 
 26 | @author: Philip Kahn
 27 | @date: 20180503
 28 | """
 29 | #pylint: disable=R, W0401, W0614, W0703
 30 | from ctypes import *
 31 | import math
 32 | import random
 33 | import os
 34 | import torch
 35 | 
 36 | def sample(probs):
 37 |     s = sum(probs)
 38 |     probs = [a/s for a in probs]
 39 |     r = random.uniform(0, 1)
 40 |     for i in range(len(probs)):
 41 |         r = r - probs[i]
 42 |         if r <= 0:
 43 |             return i
 44 |     return len(probs)-1
 45 | 
 46 | def c_array(ctype, values):
 47 |     arr = (ctype*len(values))()
 48 |     arr[:] = values
 49 |     return arr
 50 | 
 51 | class BOX(Structure):
 52 |     _fields_ = [("x", c_float),
 53 |                 ("y", c_float),
 54 |                 ("w", c_float),
 55 |                 ("h", c_float)]
 56 | 
 57 | class DETECTION(Structure):
 58 |     _fields_ = [("bbox", BOX),
 59 |                 ("classes", c_int),
 60 |                 ("prob", POINTER(c_float)),
 61 |                 ("mask", POINTER(c_float)),
 62 |                 ("objectness", c_float),
 63 |                 ("sort_class", c_int),
 64 |                 ("uc", POINTER(c_float)),
 65 |                 ("points", c_int)]
 66 | 
 67 | class DETNUMPAIR(Structure):
 68 |     _fields_ = [("num", c_int),
 69 |                 ("dets", POINTER(DETECTION))]
 70 | 
 71 | class IMAGE(Structure):
 72 |     _fields_ = [("w", c_int),
 73 |                 ("h", c_int),
 74 |                 ("c", c_int),
 75 |                 ("data", POINTER(c_float))]
 76 | 
 77 | class METADATA(Structure):
 78 |     _fields_ = [("classes", c_int),
 79 |                 ("names", POINTER(c_char_p))]
 80 | 
 81 | 
 82 | 
 83 | #lib = CDLL("/home/pjreddie/documents/darknet/libdarknet.so", RTLD_GLOBAL)
 84 | #lib = CDLL("libdarknet.so", RTLD_GLOBAL)
 85 | hasGPU = True
 86 | if os.name == "nt":
 87 |     cwd = os.path.dirname(__file__)
 88 |     os.environ['PATH'] = cwd + ';' + os.environ['PATH']
 89 |     winGPUdll = os.path.join(cwd, "yolo_cpp_dll.dll")
 90 |     winNoGPUdll = os.path.join(cwd, "yolo_cpp_dll_nogpu.dll")
 91 |     envKeys = list()
 92 |     for k, v in os.environ.items():
 93 |         envKeys.append(k)
 94 |     try:
 95 |         try:
 96 |             tmp = os.environ["FORCE_CPU"].lower()
 97 |             if tmp in ["1", "true", "yes", "on"]:
 98 |                 raise ValueError("ForceCPU")
 99 |             else:
100 |                 print("Flag value '"+tmp+"' not forcing CPU mode")
101 |         except KeyError:
102 |             # We never set the flag
103 |             if 'CUDA_VISIBLE_DEVICES' in envKeys:
104 |                 if int(os.environ['CUDA_VISIBLE_DEVICES']) < 0:
105 |                     raise ValueError("ForceCPU")
106 |             try:
107 |                 global DARKNET_FORCE_CPU
108 |                 if DARKNET_FORCE_CPU:
109 |                     raise ValueError("ForceCPU")
110 |             except NameError:
111 |                 pass
112 |             # print(os.environ.keys())
113 |             # print("FORCE_CPU flag undefined, proceeding with GPU")
114 |         if not os.path.exists(winGPUdll):
115 |             raise ValueError("NoDLL")
116 |         lib = CDLL(winGPUdll, RTLD_GLOBAL)
117 |     except (KeyError, ValueError):
118 |         hasGPU = False
119 |         if os.path.exists(winNoGPUdll):
120 |             lib = CDLL(winNoGPUdll, RTLD_GLOBAL)
121 |             print("Notice: CPU-only mode")
122 |         else:
123 |             # Try the other way, in case no_gpu was
124 |             # compile but not renamed
125 |             lib = CDLL(winGPUdll, RTLD_GLOBAL)
126 |             print("Environment variables indicated a CPU run, but we didn't find `"+winNoGPUdll+"`. Trying a GPU run anyway.")
127 | else:
128 |     lib = CDLL("./libdarknet.so", RTLD_GLOBAL)
129 | lib.network_width.argtypes = [c_void_p]
130 | lib.network_width.restype = c_int
131 | lib.network_height.argtypes = [c_void_p]
132 | lib.network_height.restype = c_int
133 | 
134 | copy_image_from_bytes = lib.copy_image_from_bytes
135 | copy_image_from_bytes.argtypes = [IMAGE,c_char_p]
136 | 
137 | def network_width(net):
138 |     return lib.network_width(net)
139 | 
140 | def network_height(net):
141 |     return lib.network_height(net)
142 | 
143 | predict = lib.network_predict_ptr
144 | predict.argtypes = [c_void_p, POINTER(c_float)]
145 | predict.restype = POINTER(c_float)
146 | 
147 | if hasGPU:
148 |     set_gpu = lib.cuda_set_device
149 |     set_gpu.argtypes = [c_int]
150 | 
151 | init_cpu = lib.init_cpu
152 | 
153 | make_image = lib.make_image
154 | make_image.argtypes = [c_int, c_int, c_int]
155 | make_image.restype = IMAGE
156 | 
157 | get_network_boxes = lib.get_network_boxes
158 | get_network_boxes.argtypes = [c_void_p, c_int, c_int, c_float, c_float, POINTER(c_int), c_int, POINTER(c_int), c_int]
159 | get_network_boxes.restype = POINTER(DETECTION)
160 | 
161 | make_network_boxes = lib.make_network_boxes
162 | make_network_boxes.argtypes = [c_void_p]
163 | make_network_boxes.restype = POINTER(DETECTION)
164 | 
165 | free_detections = lib.free_detections
166 | free_detections.argtypes = [POINTER(DETECTION), c_int]
167 | 
168 | free_batch_detections = lib.free_batch_detections
169 | free_batch_detections.argtypes = [POINTER(DETNUMPAIR), c_int]
170 | 
171 | free_ptrs = lib.free_ptrs
172 | free_ptrs.argtypes = [POINTER(c_void_p), c_int]
173 | 
174 | network_predict = lib.network_predict_ptr
175 | network_predict.argtypes = [c_void_p, POINTER(c_float)]
176 | 
177 | reset_rnn = lib.reset_rnn
178 | reset_rnn.argtypes = [c_void_p]
179 | 
180 | load_net = lib.load_network
181 | load_net.argtypes = [c_char_p, c_char_p, c_int]
182 | load_net.restype = c_void_p
183 | 
184 | load_net_custom = lib.load_network_custom
185 | load_net_custom.argtypes = [c_char_p, c_char_p, c_int, c_int]
186 | load_net_custom.restype = c_void_p
187 | 
188 | do_nms_obj = lib.do_nms_obj
189 | do_nms_obj.argtypes = [POINTER(DETECTION), c_int, c_int, c_float]
190 | 
191 | do_nms_sort = lib.do_nms_sort
192 | do_nms_sort.argtypes = [POINTER(DETECTION), c_int, c_int, c_float]
193 | 
194 | free_image = lib.free_image
195 | free_image.argtypes = [IMAGE]
196 | 
197 | letterbox_image = lib.letterbox_image
198 | letterbox_image.argtypes = [IMAGE, c_int, c_int]
199 | letterbox_image.restype = IMAGE
200 | 
201 | load_meta = lib.get_metadata
202 | lib.get_metadata.argtypes = [c_char_p]
203 | lib.get_metadata.restype = METADATA
204 | 
205 | load_image = lib.load_image_color
206 | load_image.argtypes = [c_char_p, c_int, c_int]
207 | load_image.restype = IMAGE
208 | 
209 | rgbgr_image = lib.rgbgr_image
210 | rgbgr_image.argtypes = [IMAGE]
211 | 
212 | predict_image = lib.network_predict_image
213 | predict_image.argtypes = [c_void_p, IMAGE]
214 | predict_image.restype = POINTER(c_float)
215 | 
216 | predict_image_letterbox = lib.network_predict_image_letterbox
217 | predict_image_letterbox.argtypes = [c_void_p, IMAGE]
218 | predict_image_letterbox.restype = POINTER(c_float)
219 | 
220 | network_predict_batch = lib.network_predict_batch
221 | network_predict_batch.argtypes = [c_void_p, IMAGE, c_int, c_int, c_int,
222 |                                    c_float, c_float, POINTER(c_int), c_int, c_int]
223 | network_predict_batch.restype = POINTER(DETNUMPAIR)
224 | 
225 | def array_to_image(arr):
226 |     import numpy as np
227 |     # need to return old values to avoid python freeing memory
228 |     arr = arr.transpose(2,0,1)
229 |     c = arr.shape[0]
230 |     h = arr.shape[1]
231 |     w = arr.shape[2]
232 |     arr = np.ascontiguousarray(arr.flat, dtype=np.float32) / 255.0
233 |     data = arr.ctypes.data_as(POINTER(c_float))
234 |     im = IMAGE(w,h,c,data)
235 |     return im, arr
236 | 
237 | def classify(net, meta, im):
238 |     out = predict_image(net, im)
239 |     res = []
240 |     for i in range(meta.classes):
241 |         if altNames is None:
242 |             nameTag = meta.names[i]
243 |         else:
244 |             nameTag = altNames[i]
245 |         res.append((nameTag, out[i]))
246 |     res = sorted(res, key=lambda x: -x[1])
247 |     return res
248 | 
249 | def detect(net, meta, image, thresh=.5, hier_thresh=.5, nms=.45, debug= False):
250 |     """
251 |     Performs the meat of the detection
252 |     """
253 |     #pylint: disable= C0321
254 |     im = load_image(image, 0, 0)
255 |     if debug: print("Loaded image")
256 |     ret = detect_image(net, meta, im, thresh, hier_thresh, nms, debug)
257 |     free_image(im)
258 |     if debug: print("freed image")
259 |     return ret
260 | 
261 | def detect_image(net, meta, im, scale_h, scale_w, thresh=.5, hier_thresh=.5, nms=.45, debug= False):
262 |     num = c_int(0)
263 |     pnum = pointer(num)
264 |     predict_image(net, im)
265 |     letter_box = 0
266 |     #dets = get_network_boxes(net, custom_image_bgr.shape[1], custom_image_bgr.shape[0], thresh, hier_thresh, None, 0, pnum, letter_box) # OpenCV
267 |     dets = get_network_boxes(net, im.w, im.h, thresh, hier_thresh, None, 0, pnum, letter_box)
268 |     num = pnum[0]
269 |     if nms:
270 |         do_nms_sort(dets, num, meta.classes, nms)
271 | 
272 |     bbox_xywh = []
273 |     cls_conf = []
274 |     cls_ids = []
275 |     for j in range(num):
276 |         for i in range(meta.classes):
277 |             if dets[j].prob[i] > 0:
278 |                 b = dets[j].bbox
279 |                 if altNames is None:
280 |                     nameTag = meta.names[i]
281 |                 else:
282 |                     nameTag = altNames[i]
283 |                 if debug:
284 |                     print("Got bbox", b)
285 |                     print(nameTag)
286 |                     print(dets[j].prob[i])
287 |                     print((b.x, b.y, b.w, b.h))
288 |                 cls_ids.append(i)
289 |                 bbox_xywh.append((b.x*scale_w, b.y*scale_h, b.w*scale_w, b.h*scale_h))
290 |                 cls_conf.append(dets[j].prob[i])
291 | 
292 |     if len(cls_ids)==0:
293 |             bbox_xywh = torch.FloatTensor([]).reshape([0,4])
294 |             cls_conf = torch.FloatTensor([])
295 |             cls_ids = torch.LongTensor([])
296 | 
297 |     return torch.FloatTensor(bbox_xywh).numpy(), torch.FloatTensor(cls_conf).numpy(), torch.LongTensor(cls_ids).numpy()
298 | 
299 | def convertBack(x, y, w, h):
300 |     xmin = int(round(x - (w / 2)))
301 |     xmax = int(round(x + (w / 2)))
302 |     ymin = int(round(y - (h / 2)))
303 |     ymax = int(round(y + (h / 2)))
304 |     return xmin, ymin, xmax, ymax
305 | 
306 | netMain = None
307 | metaMain = None
308 | altNames = None
309 | 
310 | def performDetect(imagePath="data/dog.jpg", thresh= 0.25, configPath = "./cfg/yolov3.cfg", weightPath = "yolov3.weights", metaPath= "./cfg/coco.data", showImage= True, makeImageOnly = False, initOnly= False):
311 |     """
312 |     Convenience function to handle the detection and returns of objects.
313 | 
314 |     Displaying bounding boxes requires libraries scikit-image and numpy
315 | 
316 |     Parameters
317 |     ----------------
318 |     imagePath: str
319 |         Path to the image to evaluate. Raises ValueError if not found
320 | 
321 |     thresh: float (default= 0.25)
322 |         The detection threshold
323 | 
324 |     configPath: str
325 |         Path to the configuration file. Raises ValueError if not found
326 | 
327 |     weightPath: str
328 |         Path to the weights file. Raises ValueError if not found
329 | 
330 |     metaPath: str
331 |         Path to the data file. Raises ValueError if not found
332 | 
333 |     showImage: bool (default= True)
334 |         Compute (and show) bounding boxes. Changes return.
335 | 
336 |     makeImageOnly: bool (default= False)
337 |         If showImage is True, this won't actually *show* the image, but will create the array and return it.
338 | 
339 |     initOnly: bool (default= False)
340 |         Only initialize globals. Don't actually run a prediction.
341 | 
342 |     Returns
343 |     ----------------------
344 | 
345 | 
346 |     When showImage is False, list of tuples like
347 |         ('obj_label', confidence, (bounding_box_x_px, bounding_box_y_px, bounding_box_width_px, bounding_box_height_px))
348 |         The X and Y coordinates are from the center of the bounding box. Subtract half the width or height to get the lower corner.
349 | 
350 |     Otherwise, a dict with
351 |         {
352 |             "detections": as above
353 |             "image": a numpy array representing an image, compatible with scikit-image
354 |             "caption": an image caption
355 |         }
356 |     """
357 |     # Import the global variables. This lets us instance Darknet once, then just call performDetect() again without instancing again
358 |     global metaMain, netMain, altNames #pylint: disable=W0603
359 |     assert 0 < thresh < 1, "Threshold should be a float between zero and one (non-inclusive)"
360 |     if not os.path.exists(configPath):
361 |         raise ValueError("Invalid config path `"+os.path.abspath(configPath)+"`")
362 |     if not os.path.exists(weightPath):
363 |         raise ValueError("Invalid weight path `"+os.path.abspath(weightPath)+"`")
364 |     if not os.path.exists(metaPath):
365 |         raise ValueError("Invalid data file path `"+os.path.abspath(metaPath)+"`")
366 |     if netMain is None:
367 |         netMain = load_net_custom(configPath.encode("ascii"), weightPath.encode("ascii"), 0, 1)  # batch size = 1
368 |     if metaMain is None:
369 |         metaMain = load_meta(metaPath.encode("ascii"))
370 |     if altNames is None:
371 |         # In Python 3, the metafile default access craps out on Windows (but not Linux)
372 |         # Read the names file and create a list to feed to detect
373 |         try:
374 |             with open(metaPath) as metaFH:
375 |                 metaContents = metaFH.read()
376 |                 import re
377 |                 match = re.search("names *= *(.*)$", metaContents, re.IGNORECASE | re.MULTILINE)
378 |                 if match:
379 |                     result = match.group(1)
380 |                 else:
381 |                     result = None
382 |                 try:
383 |                     if os.path.exists(result):
384 |                         with open(result) as namesFH:
385 |                             namesList = namesFH.read().strip().split("\n")
386 |                             altNames = [x.strip() for x in namesList]
387 |                 except TypeError:
388 |                     pass
389 |         except Exception:
390 |             pass
391 |     if initOnly:
392 |         print("Initialized detector")
393 |         return None
394 |     if not os.path.exists(imagePath):
395 |         raise ValueError("Invalid image path `"+os.path.abspath(imagePath)+"`")
396 |     # Do the detection
397 |     #detections = detect(netMain, metaMain, imagePath, thresh)	# if is used cv2.imread(image)
398 |     detections = detect(netMain, metaMain, imagePath.encode("ascii"), thresh)
399 |     if showImage:
400 |         try:
401 |             from skimage import io, draw
402 |             import numpy as np
403 |             image = io.imread(imagePath)
404 |             print("*** "+str(len(detections))+" Results, color coded by confidence ***")
405 |             imcaption = []
406 |             for detection in detections:
407 |                 label = detection[0]
408 |                 confidence = detection[1]
409 |                 pstring = label+": "+str(np.rint(100 * confidence))+"%"
410 |                 imcaption.append(pstring)
411 |                 print(pstring)
412 |                 bounds = detection[2]
413 |                 shape = image.shape
414 |                 # x = shape[1]
415 |                 # xExtent = int(x * bounds[2] / 100)
416 |                 # y = shape[0]
417 |                 # yExtent = int(y * bounds[3] / 100)
418 |                 yExtent = int(bounds[3])
419 |                 xEntent = int(bounds[2])
420 |                 # Coordinates are around the center
421 |                 xCoord = int(bounds[0] - bounds[2]/2)
422 |                 yCoord = int(bounds[1] - bounds[3]/2)
423 |                 boundingBox = [
424 |                     [xCoord, yCoord],
425 |                     [xCoord, yCoord + yExtent],
426 |                     [xCoord + xEntent, yCoord + yExtent],
427 |                     [xCoord + xEntent, yCoord]
428 |                 ]
429 |                 # Wiggle it around to make a 3px border
430 |                 rr, cc = draw.polygon_perimeter([x[1] for x in boundingBox], [x[0] for x in boundingBox], shape= shape)
431 |                 rr2, cc2 = draw.polygon_perimeter([x[1] + 1 for x in boundingBox], [x[0] for x in boundingBox], shape= shape)
432 |                 rr3, cc3 = draw.polygon_perimeter([x[1] - 1 for x in boundingBox], [x[0] for x in boundingBox], shape= shape)
433 |                 rr4, cc4 = draw.polygon_perimeter([x[1] for x in boundingBox], [x[0] + 1 for x in boundingBox], shape= shape)
434 |                 rr5, cc5 = draw.polygon_perimeter([x[1] for x in boundingBox], [x[0] - 1 for x in boundingBox], shape= shape)
435 |                 boxColor = (int(255 * (1 - (confidence ** 2))), int(255 * (confidence ** 2)), 0)
436 |                 draw.set_color(image, (rr, cc), boxColor, alpha= 0.8)
437 |                 draw.set_color(image, (rr2, cc2), boxColor, alpha= 0.8)
438 |                 draw.set_color(image, (rr3, cc3), boxColor, alpha= 0.8)
439 |                 draw.set_color(image, (rr4, cc4), boxColor, alpha= 0.8)
440 |                 draw.set_color(image, (rr5, cc5), boxColor, alpha= 0.8)
441 |             if not makeImageOnly:
442 |                 io.imshow(image)
443 |                 io.show()
444 |             detections = {
445 |                 "detections": detections,
446 |                 "image": image,
447 |                 "caption": "\n<br/>".join(imcaption)
448 |             }
449 |         except Exception as e:
450 |             print("Unable to show image: "+str(e))
451 |     return detections
452 | 
453 | 
454 | if __name__ == "__main__":
455 |     print("just do")


--------------------------------------------------------------------------------
/detector/v4detector.py:
--------------------------------------------------------------------------------
 1 | from ctypes import *
 2 | import math
 3 | import random
 4 | import os
 5 | import cv2
 6 | import numpy as np
 7 | import time
 8 | import detector.v4darknet
 9 | 
10 | 
11 | netMain = None
12 | metaMain = None
13 | altNames = None
14 | 
15 | 
16 | def YOLO(frame_read):
17 | 
18 |     global metaMain, netMain, altNames
19 |     configPath = "4.cfg"
20 |     weightPath = "4.weights"
21 |     metaPath = "coco.data"
22 |     if not os.path.exists(configPath):
23 |         raise ValueError("Invalid config path `" +
24 |                          os.path.abspath(configPath)+"`")
25 |     if not os.path.exists(weightPath):
26 |         raise ValueError("Invalid weight path `" +
27 |                          os.path.abspath(weightPath)+"`")
28 |     if not os.path.exists(metaPath):
29 |         raise ValueError("Invalid data file path `" +
30 |                          os.path.abspath(metaPath)+"`")
31 |     if netMain is None:
32 |         netMain = detector.v4darknet.load_net_custom(configPath.encode(
33 |             "ascii"), weightPath.encode("ascii"), 0, 1)  # batch size = 1
34 |     if metaMain is None:
35 |         metaMain = detector.v4darknet.load_meta(metaPath.encode("ascii"))
36 |     if altNames is None:
37 |         try:
38 |             with open(metaPath) as metaFH:
39 |                 metaContents = metaFH.read()
40 |                 import re
41 |                 match = re.search("names *= *(.*)$", metaContents,
42 |                                   re.IGNORECASE | re.MULTILINE)
43 |                 if match:
44 |                     result = match.group(1)
45 |                 else:
46 |                     result = None
47 |                 try:
48 |                     if os.path.exists(result):
49 |                         with open(result) as namesFH:
50 |                             namesList = namesFH.read().strip().split("\n")
51 |                             altNames = [x.strip() for x in namesList]
52 |                 except TypeError:
53 |                     pass
54 |         except Exception:
55 |             pass
56 | 
57 |     img_h, img_w = frame_read.shape[:2]
58 |     net_h = detector.v4darknet.network_height(netMain)
59 |     net_w = detector.v4darknet.network_width(netMain)
60 |     scale_h = img_h / net_h
61 |     scale_w = img_w / net_w
62 |     darknet_image = detector.v4darknet.make_image(net_w, net_h, 3)
63 |     frame_rgb = cv2.cvtColor(frame_read, cv2.COLOR_BGR2RGB)
64 |     frame_resized = cv2.resize(frame_rgb, (net_w, net_h), interpolation=cv2.INTER_LINEAR)
65 |     detector.v4darknet.copy_image_from_bytes(darknet_image,frame_resized.tobytes())
66 |     return detector.v4darknet.detect_image(netMain, metaMain, darknet_image, scale_h, scale_w, thresh=0.25)
67 | 
68 | 
69 | if __name__ == "__main__":
70 |     YOLO()
71 | 


--------------------------------------------------------------------------------
/eval_tracker.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import os.path as osp
 3 | import logging
 4 | import argparse
 5 | from pathlib import Path
 6 | 
 7 | from utils.log import logger
 8 | from yolov3_deepsort import VideoTracker
 9 | from utils.parser import get_config
10 | 
11 | import motmetrics as mm
12 | mm.lap.default_solver = 'lap'
13 | from utils.evaluation import Evaluator
14 | 
15 | def mkdir_if_missing(dir):
16 |     os.makedirs(dir, exist_ok=True)
17 | 
18 | def main(data_root='', seqs=('',), args=""):
19 |     logger.setLevel(logging.INFO)
20 |     data_type = 'mot'
21 |     result_root = os.path.join(Path(data_root), "mot_results")
22 |     mkdir_if_missing(result_root)
23 | 
24 |     cfg = get_config()
25 |     cfg.merge_from_file(args.config_detection)
26 |     cfg.merge_from_file(args.config_deepsort)
27 | 
28 |     # run tracking
29 |     accs = []
30 |     for seq in seqs:
31 |         logger.info('start seq: {}'.format(seq))
32 |         result_filename = os.path.join(result_root, '{}.txt'.format(seq))
33 |         video_path = data_root+"/"+seq+"/video/video.mp4"
34 | 
35 |         with VideoTracker(cfg, args, video_path, result_filename) as vdo_trk:
36 |             vdo_trk.run()
37 | 
38 |         # eval
39 |         logger.info('Evaluate seq: {}'.format(seq))
40 |         evaluator = Evaluator(data_root, seq, data_type)
41 |         accs.append(evaluator.eval_file(result_filename))
42 | 
43 |     # get summary
44 |     metrics = mm.metrics.motchallenge_metrics
45 |     mh = mm.metrics.create()
46 |     summary = Evaluator.get_summary(accs, seqs, metrics)
47 |     strsummary = mm.io.render_summary(
48 |         summary,
49 |         formatters=mh.formatters,
50 |         namemap=mm.io.motchallenge_metric_names
51 |     )
52 |     print(strsummary)
53 |     Evaluator.save_summary(summary, os.path.join(result_root, 'summary_global.xlsx'))
54 | 
55 | 
56 | def parse_args():
57 |     parser = argparse.ArgumentParser()
58 |     parser.add_argument("--config_detection", type=str, default="./configs/yolov3.yaml")
59 |     parser.add_argument("--config_deepsort", type=str, default="./configs/deep_sort.yaml")
60 |     parser.add_argument("--ignore_display", dest="display", action="store_false", default=False)
61 |     parser.add_argument("--frame_interval", type=int, default=1)
62 |     parser.add_argument("--display_width", type=int, default=800)
63 |     parser.add_argument("--display_height", type=int, default=600)
64 |     parser.add_argument("--save_path", type=str, default="./demo/demo.avi")
65 |     parser.add_argument("--cpu", dest="use_cuda", action="store_false", default=True)
66 |     parser.add_argument("--camera", action="store", dest="cam", type=int, default="-1")
67 |     return parser.parse_args()
68 | 
69 | if __name__ == '__main__':
70 |     args = parse_args()
71 | 
72 |     seqs_str = '''MOT16-02       
73 |                   MOT16-04
74 |                   MOT16-05
75 |                   MOT16-09
76 |                   MOT16-10
77 |                   MOT16-11
78 |                   MOT16-13
79 |                   '''        
80 |     data_root = 'data/dataset/MOT16/train/'
81 | 
82 |     seqs = [seq.strip() for seq in seqs_str.split()]
83 | 
84 |     main(data_root=data_root,
85 |          seqs=seqs,
86 |          args=args)


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | atomicwrites==1.3.0
 2 | attrs==19.3.0
 3 | colorama==0.4.3
 4 | easydict==1.9
 5 | entrypoints==0.3
 6 | et-xmlfile==1.0.1
 7 | flake8==3.7.9
 8 | flake8-import-order==0.18.1
 9 | importlib-metadata==1.6.0
10 | jdcal==1.4.1
11 | joblib==0.14.1
12 | lap==0.4.0
13 | mccabe==0.6.1
14 | more-itertools==8.2.0
15 | motmetrics==1.2.0
16 | numpy==1.18.2
17 | opencv-python==4.2.0.34
18 | openpyxl==3.0.3
19 | packaging==20.3
20 | pandas==1.0.3
21 | Pillow==8.2.0
22 | pluggy==0.13.1
23 | py==1.10.0
24 | py-cpuinfo==5.0.0
25 | pycodestyle==2.5.0
26 | pyflakes==2.1.1
27 | pyparsing==2.4.7
28 | pytest==5.4.1
29 | pytest-benchmark==3.2.3
30 | python-dateutil==2.8.1
31 | pytz==2019.3
32 | PyYAML==5.3.1
33 | scikit-learn==0.22.2.post1
34 | scipy==1.4.1
35 | six==1.14.0
36 | sklearn==0.0
37 | torch==1.4.0
38 | torchvision==0.5.0
39 | Vizer==0.1.5
40 | wcwidth==0.1.9
41 | xmltodict==0.12.0
42 | zipp==3.1.0
43 | 


--------------------------------------------------------------------------------
/scripts/yolov3_deepsort.sh:
--------------------------------------------------------------------------------
1 | python yolov3_deepsort.py [VIDEO_PATH] --config_detection


--------------------------------------------------------------------------------
/scripts/yolov3_tiny_deepsort.sh:
--------------------------------------------------------------------------------
1 | python yolov3_deepsort.py [VIDEO_PATH] --config_detection ./configs/yolov3_tiny.yaml


--------------------------------------------------------------------------------
/tracker.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import cv2
  3 | import time
  4 | import argparse
  5 | import torch
  6 | import warnings
  7 | import numpy as np
  8 | 
  9 | from deep_sort import build_tracker
 10 | from utils.draw import draw_boxes
 11 | from utils.parser import get_config
 12 | from utils.log import get_logger
 13 | from utils.io import write_results
 14 | from detector import v4detector
 15 | 
 16 | lst_move_life = [0,1,2,3,4,5,6,7,8,9,16,77]
 17 | 
 18 | class VideoTracker(object):
 19 |     def __init__(self, cfg, args, video_path):
 20 |         self.cfg = cfg
 21 |         self.args = args
 22 |         self.video_path = video_path
 23 |         self.logger = get_logger("root")
 24 | 
 25 |         use_cuda = args.use_cuda and torch.cuda.is_available()
 26 |         if not use_cuda:
 27 |             warnings.warn("Running in cpu mode which maybe very slow!", UserWarning)
 28 | 
 29 |         if args.display:
 30 |             cv2.namedWindow("test", cv2.WINDOW_NORMAL)
 31 |             cv2.resizeWindow("test", args.display_width, args.display_height)
 32 | 
 33 |         if args.cam != -1:
 34 |             print("Using webcam " + str(args.cam))
 35 |             self.vdo = cv2.VideoCapture(args.cam)
 36 |         else:
 37 |             self.vdo = cv2.VideoCapture()
 38 |         self.deepsort = build_tracker(cfg, use_cuda=use_cuda)
 39 | 
 40 | 
 41 |     def __enter__(self):
 42 |         if self.args.cam != -1:
 43 |             ret, frame = self.vdo.read()
 44 |             assert ret, "Error: Camera error"
 45 |             self.im_width = frame.shape[0]
 46 |             self.im_height = frame.shape[1]
 47 | 
 48 |         else:
 49 |             assert os.path.isfile(self.video_path), "Path error"
 50 |             self.vdo.open(self.video_path)
 51 |             self.im_width = int(self.vdo.get(cv2.CAP_PROP_FRAME_WIDTH))
 52 |             self.im_height = int(self.vdo.get(cv2.CAP_PROP_FRAME_HEIGHT))
 53 |             assert self.vdo.isOpened()
 54 | 
 55 |         if self.args.save_path:
 56 |             os.makedirs(self.args.save_path, exist_ok=True)
 57 | 
 58 |             # path of saved video and results
 59 |             self.save_video_path = os.path.join(self.args.save_path, "results.avi")
 60 |             self.save_results_path = os.path.join(self.args.save_path, "results.txt")
 61 | 
 62 |             # create video writer
 63 |             fourcc =  cv2.VideoWriter_fourcc(*'MJPG')
 64 |             self.writer = cv2.VideoWriter(self.save_video_path, fourcc, 20, (self.im_width,self.im_height))
 65 | 
 66 |             # logging
 67 |             self.logger.info("Save results to {}".format(self.args.save_path))
 68 | 
 69 |         return self
 70 | 
 71 | 
 72 |     def __exit__(self, exc_type, exc_value, exc_traceback):
 73 |         if exc_type:
 74 |             print(exc_type, exc_value, exc_traceback)
 75 | 
 76 | 
 77 |     def run(self):
 78 |         results = []
 79 |         idx_frame = 0
 80 |         while self.vdo.grab():
 81 |             idx_frame += 1
 82 |             if idx_frame % self.args.frame_interval:
 83 |                 continue
 84 | 
 85 |             start = time.time()
 86 |             _, ori_im = self.vdo.retrieve()
 87 |             im = cv2.cvtColor(ori_im, cv2.COLOR_BGR2RGB)
 88 |             if len(im) == 0:
 89 |                 continue
 90 |             # do detection
 91 |             # bbox_xywh, cls_conf, cls_ids = self.detector(im)
 92 | 
 93 |             bbox_xywh, cls_conf, cls_ids = v4detector.YOLO(im)
 94 |             if len(bbox_xywh) == 0:
 95 |                 continue
 96 |             print("detection cls_ids:", cls_ids)
 97 | 
 98 |             # #filter cls id for tracking
 99 |             # print("cls_ids")
100 |             # print(cls_ids)
101 |             # # select person class
102 |             mask = []
103 |             # lst_for_track = []
104 |             for id in cls_ids:
105 |                 if id in lst_move_life:
106 |                     # lst_for_track.append(id)
107 |                     mask.append(True)
108 |                 else:
109 |                     mask.append(False)
110 |             print("mask cls_ids:", mask)
111 | 
112 |             bbox_xywh = bbox_xywh[mask]
113 |             # # bbox dilation just in case bbox too small, delete this line if using a better pedestrian detector
114 |             bbox_xywh[:,3:] *= 1.2
115 |             cls_conf = cls_conf[mask]
116 | 
117 |             # do tracking
118 | 
119 | 
120 |             outputs = self.deepsort.update(bbox_xywh, cls_conf, im, cls_ids)
121 | 
122 |             # draw boxes for visualization
123 |             if len(outputs) > 0:
124 |                 bbox_tlwh = []
125 |                 bbox_xyxy = outputs[:,:4]
126 |                 identities = outputs[:,4:5]
127 |                 cls_id = outputs[:,-1]
128 |                 print("track res cls_id:", cls_id)
129 |                 # cls_ids_show = [cls_ids[i] for i in cls_id]
130 |                 ori_im = draw_boxes(ori_im, bbox_xyxy, cls_ids, identities)
131 | 
132 |                 for bb_xyxy in bbox_xyxy:
133 |                     bbox_tlwh.append(self.deepsort._xyxy_to_tlwh(bb_xyxy))
134 | 
135 |                 results.append((idx_frame-1, bbox_tlwh, identities))
136 | 
137 |             end = time.time()
138 | 
139 |             if self.args.display:
140 |                 cv2.imshow("test", ori_im)
141 |                 cv2.waitKey(1)
142 | 
143 |             if self.args.save_path:
144 |                 self.writer.write(ori_im)
145 | 
146 |             # save results
147 |             write_results(self.save_results_path, results, 'mot')
148 | 
149 |              # logging
150 |             self.logger.info("time: {:.03f}s, fps: {:.03f}, detection numbers: {}, tracking numbers: {}" \
151 |                             .format(end-start, 1/(end-start), bbox_xywh.shape[0], len(outputs)))
152 | 
153 | 
154 | def parse_args():
155 |     parser = argparse.ArgumentParser()
156 |     parser.add_argument("VIDEO_PATH", type=str)
157 |     parser.add_argument("--config_deepsort", type=str, default="./configs/deep_sort.yaml")
158 |     parser.add_argument("--ignore_display", dest="display", action="store_false", default=True)
159 |     #parser.add_argument("--display", action="store_true")
160 |     parser.add_argument("--frame_interval", type=int, default=1)
161 |     parser.add_argument("--display_width", type=int, default=608)
162 |     parser.add_argument("--display_height", type=int, default=608)
163 |     parser.add_argument("--save_path", type=str, default="./output/")
164 |     parser.add_argument("--cpu", dest="use_cuda", action="store_false", default=True)
165 |     parser.add_argument("--camera", action="store", dest="cam", type=int, default="-1")
166 |     return parser.parse_args()
167 | 
168 | 
169 | if __name__=="__main__":
170 |     args = parse_args()
171 |     cfg = get_config()
172 |     cfg.merge_from_file(args.config_deepsort)
173 | 
174 |     with VideoTracker(cfg, args, video_path=args.VIDEO_PATH) as vdo_trk:
175 |         vdo_trk.run()


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/derek285/yolov4_deep_sort_pytorch/00e408a24693ce2438289f4d3aed819cf0362436/utils/__init__.py


--------------------------------------------------------------------------------
/utils/draw.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2
 3 | 
 4 | palette = (2 ** 11 - 1, 2 ** 15 - 1, 2 ** 20 - 1)
 5 | 
 6 | 
 7 | def compute_color_for_labels(label):
 8 |     """
 9 |     Simple function that adds fixed color depending on the class
10 |     """
11 |     color = [int((p * (label ** 2 - label + 1)) % 255) for p in palette]
12 |     return tuple(color)
13 | 
14 | 
15 | def draw_boxes(img, bbox, cls_id, identities=None, offset=(0,0)):
16 |     for i,box in enumerate(bbox):
17 |         x1,y1,x2,y2 = [int(i) for i in box]
18 |         x1 += offset[0]
19 |         x2 += offset[0]
20 |         y1 += offset[1]
21 |         y2 += offset[1]
22 |         # box text and bar
23 |         id = int(identities[i]) if identities is not None else 0    
24 |         color = compute_color_for_labels(id)
25 |         label = '{}{:d}'.format("", id)
26 |         # label = label + "[" +str(cls_id[i]) + "]"
27 |         t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2 , 2)[0]
28 |         cv2.rectangle(img,(x1, y1),(x2,y2),color,3)
29 |         cv2.rectangle(img,(x1, y1),(x1+t_size[0]+3,y1+t_size[1]+4), color,-1)
30 |         cv2.putText(img,label,(x1,y1+t_size[1]+4), cv2.FONT_HERSHEY_PLAIN, 2, [255,255,255], 2)
31 |     return img
32 | 
33 | 
34 | 
35 | if __name__ == '__main__':
36 |     for i in range(82):
37 |         print(compute_color_for_labels(i))
38 | 


--------------------------------------------------------------------------------
/utils/evaluation.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import copy
  4 | import motmetrics as mm
  5 | mm.lap.default_solver = 'lap'
  6 | from utils.io import read_results, unzip_objs
  7 | 
  8 | 
  9 | class Evaluator(object):
 10 | 
 11 |     def __init__(self, data_root, seq_name, data_type):
 12 |         self.data_root = data_root
 13 |         self.seq_name = seq_name
 14 |         self.data_type = data_type
 15 | 
 16 |         self.load_annotations()
 17 |         self.reset_accumulator()
 18 | 
 19 |     def load_annotations(self):
 20 |         assert self.data_type == 'mot'
 21 | 
 22 |         gt_filename = os.path.join(self.data_root, self.seq_name, 'gt', 'gt.txt')
 23 |         self.gt_frame_dict = read_results(gt_filename, self.data_type, is_gt=True)
 24 |         self.gt_ignore_frame_dict = read_results(gt_filename, self.data_type, is_ignore=True)
 25 | 
 26 |     def reset_accumulator(self):
 27 |         self.acc = mm.MOTAccumulator(auto_id=True)
 28 | 
 29 |     def eval_frame(self, frame_id, trk_tlwhs, trk_ids, rtn_events=False):
 30 |         # results
 31 |         trk_tlwhs = np.copy(trk_tlwhs)
 32 |         trk_ids = np.copy(trk_ids)
 33 | 
 34 |         # gts
 35 |         gt_objs = self.gt_frame_dict.get(frame_id, [])
 36 |         gt_tlwhs, gt_ids = unzip_objs(gt_objs)[:2]
 37 | 
 38 |         # ignore boxes
 39 |         ignore_objs = self.gt_ignore_frame_dict.get(frame_id, [])
 40 |         ignore_tlwhs = unzip_objs(ignore_objs)[0]
 41 | 
 42 | 
 43 |         # remove ignored results
 44 |         keep = np.ones(len(trk_tlwhs), dtype=bool)
 45 |         iou_distance = mm.distances.iou_matrix(ignore_tlwhs, trk_tlwhs, max_iou=0.5)
 46 |         if len(iou_distance) > 0:
 47 |             match_is, match_js = mm.lap.linear_sum_assignment(iou_distance)
 48 |             match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js])
 49 |             match_ious = iou_distance[match_is, match_js]
 50 | 
 51 |             match_js = np.asarray(match_js, dtype=int)
 52 |             match_js = match_js[np.logical_not(np.isnan(match_ious))]
 53 |             keep[match_js] = False
 54 |             trk_tlwhs = trk_tlwhs[keep]
 55 |             trk_ids = trk_ids[keep]
 56 | 
 57 |         # get distance matrix
 58 |         iou_distance = mm.distances.iou_matrix(gt_tlwhs, trk_tlwhs, max_iou=0.5)
 59 | 
 60 |         # acc
 61 |         self.acc.update(gt_ids, trk_ids, iou_distance)
 62 | 
 63 |         if rtn_events and iou_distance.size > 0 and hasattr(self.acc, 'last_mot_events'):
 64 |             events = self.acc.last_mot_events  # only supported by https://github.com/longcw/py-motmetrics
 65 |         else:
 66 |             events = None
 67 |         return events
 68 | 
 69 |     def eval_file(self, filename):
 70 |         self.reset_accumulator()
 71 | 
 72 |         result_frame_dict = read_results(filename, self.data_type, is_gt=False)
 73 |         frames = sorted(list(set(self.gt_frame_dict.keys()) | set(result_frame_dict.keys())))
 74 |         for frame_id in frames:
 75 |             trk_objs = result_frame_dict.get(frame_id, [])
 76 |             trk_tlwhs, trk_ids = unzip_objs(trk_objs)[:2]
 77 |             self.eval_frame(frame_id, trk_tlwhs, trk_ids, rtn_events=False)
 78 | 
 79 |         return self.acc
 80 | 
 81 |     @staticmethod
 82 |     def get_summary(accs, names, metrics=('mota', 'num_switches', 'idp', 'idr', 'idf1', 'precision', 'recall')):
 83 |         names = copy.deepcopy(names)
 84 |         if metrics is None:
 85 |             metrics = mm.metrics.motchallenge_metrics
 86 |         metrics = copy.deepcopy(metrics)
 87 | 
 88 |         mh = mm.metrics.create()
 89 |         summary = mh.compute_many(
 90 |             accs,
 91 |             metrics=metrics,
 92 |             names=names,
 93 |             generate_overall=True
 94 |         )
 95 | 
 96 |         return summary
 97 | 
 98 |     @staticmethod
 99 |     def save_summary(summary, filename):
100 |         import pandas as pd
101 |         writer = pd.ExcelWriter(filename)
102 |         summary.to_excel(writer)
103 |         writer.save()
104 | 


--------------------------------------------------------------------------------
/utils/io.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from typing import Dict
  3 | import numpy as np
  4 | 
  5 | # from utils.log import get_logger
  6 | 
  7 | 
  8 | def write_results(filename, results, data_type):
  9 |     if data_type == 'mot':
 10 |         save_format = '{frame},{id},{x1},{y1},{w},{h},-1,-1,-1,-1\n'
 11 |     elif data_type == 'kitti':
 12 |         save_format = '{frame} {id} pedestrian 0 0 -10 {x1} {y1} {x2} {y2} -10 -10 -10 -1000 -1000 -1000 -10\n'
 13 |     else:
 14 |         raise ValueError(data_type)
 15 | 
 16 |     with open(filename, 'w') as f:
 17 |         for frame_id, tlwhs, track_ids in results:
 18 |             if data_type == 'kitti':
 19 |                 frame_id -= 1
 20 |             for tlwh, track_id in zip(tlwhs, track_ids):
 21 |                 if track_id < 0:
 22 |                     continue
 23 |                 x1, y1, w, h = tlwh
 24 |                 x2, y2 = x1 + w, y1 + h
 25 |                 line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h)
 26 |                 f.write(line)
 27 | 
 28 | 
 29 | # def write_results(filename, results_dict: Dict, data_type: str):
 30 | #     if not filename:
 31 | #         return
 32 | #     path = os.path.dirname(filename)
 33 | #     if not os.path.exists(path):
 34 | #         os.makedirs(path)
 35 | 
 36 | #     if data_type in ('mot', 'mcmot', 'lab'):
 37 | #         save_format = '{frame},{id},{x1},{y1},{w},{h},1,-1,-1,-1\n'
 38 | #     elif data_type == 'kitti':
 39 | #         save_format = '{frame} {id} pedestrian -1 -1 -10 {x1} {y1} {x2} {y2} -1 -1 -1 -1000 -1000 -1000 -10 {score}\n'
 40 | #     else:
 41 | #         raise ValueError(data_type)
 42 | 
 43 | #     with open(filename, 'w') as f:
 44 | #         for frame_id, frame_data in results_dict.items():
 45 | #             if data_type == 'kitti':
 46 | #                 frame_id -= 1
 47 | #             for tlwh, track_id in frame_data:
 48 | #                 if track_id < 0:
 49 | #                     continue
 50 | #                 x1, y1, w, h = tlwh
 51 | #                 x2, y2 = x1 + w, y1 + h
 52 | #                 line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h, score=1.0)
 53 | #                 f.write(line)
 54 | #     logger.info('Save results to {}'.format(filename))
 55 | 
 56 | 
 57 | def read_results(filename, data_type: str, is_gt=False, is_ignore=False):
 58 |     if data_type in ('mot', 'lab'):
 59 |         read_fun = read_mot_results
 60 |     else:
 61 |         raise ValueError('Unknown data type: {}'.format(data_type))
 62 | 
 63 |     return read_fun(filename, is_gt, is_ignore)
 64 | 
 65 | 
 66 | """
 67 | labels={'ped', ...			% 1
 68 | 'person_on_vhcl', ...	% 2
 69 | 'car', ...				% 3
 70 | 'bicycle', ...			% 4
 71 | 'mbike', ...			% 5
 72 | 'non_mot_vhcl', ...		% 6
 73 | 'static_person', ...	% 7
 74 | 'distractor', ...		% 8
 75 | 'occluder', ...			% 9
 76 | 'occluder_on_grnd', ...		%10
 77 | 'occluder_full', ...		% 11
 78 | 'reflection', ...		% 12
 79 | 'crowd' ...			% 13
 80 | };
 81 | """
 82 | 
 83 | 
 84 | def read_mot_results(filename, is_gt, is_ignore):
 85 |     valid_labels = {1}
 86 |     ignore_labels = {2, 7, 8, 12}
 87 |     results_dict = dict()
 88 |     if os.path.isfile(filename):
 89 |         with open(filename, 'r') as f:
 90 |             for line in f.readlines():
 91 |                 linelist = line.split(',')
 92 |                 if len(linelist) < 7:
 93 |                     continue
 94 |                 fid = int(linelist[0])
 95 |                 if fid < 1:
 96 |                     continue
 97 |                 results_dict.setdefault(fid, list())
 98 | 
 99 |                 if is_gt:
100 |                     if 'MOT16-' in filename or 'MOT17-' in filename:
101 |                         label = int(float(linelist[7]))
102 |                         mark = int(float(linelist[6]))
103 |                         if mark == 0 or label not in valid_labels:
104 |                             continue
105 |                     score = 1
106 |                 elif is_ignore:
107 |                     if 'MOT16-' in filename or 'MOT17-' in filename:
108 |                         label = int(float(linelist[7]))
109 |                         vis_ratio = float(linelist[8])
110 |                         if label not in ignore_labels and vis_ratio >= 0:
111 |                             continue
112 |                     else:
113 |                         continue
114 |                     score = 1
115 |                 else:
116 |                     score = float(linelist[6])
117 | 
118 |                 tlwh = tuple(map(float, linelist[2:6]))
119 |                 target_id = int(linelist[1])
120 | 
121 |                 results_dict[fid].append((tlwh, target_id, score))
122 | 
123 |     return results_dict
124 | 
125 | 
126 | def unzip_objs(objs):
127 |     if len(objs) > 0:
128 |         tlwhs, ids, scores = zip(*objs)
129 |     else:
130 |         tlwhs, ids, scores = [], [], []
131 |     tlwhs = np.asarray(tlwhs, dtype=float).reshape(-1, 4)
132 | 
133 |     return tlwhs, ids, scores


--------------------------------------------------------------------------------
/utils/log.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | 
 4 | def get_logger(name='root'):
 5 |     formatter = logging.Formatter(
 6 |         # fmt='%(asctime)s [%(levelname)s]: %(filename)s(%(funcName)s:%(lineno)s) >> %(message)s')
 7 |         fmt='%(asctime)s [%(levelname)s]: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
 8 | 
 9 |     handler = logging.StreamHandler()
10 |     handler.setFormatter(formatter)
11 | 
12 |     logger = logging.getLogger(name)
13 |     logger.setLevel(logging.INFO)
14 |     logger.addHandler(handler)
15 |     return logger
16 | 
17 | 
18 | 


--------------------------------------------------------------------------------
/utils/parser.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import yaml
 3 | from easydict import EasyDict as edict
 4 | 
 5 | class YamlParser(edict):
 6 |     """
 7 |     This is yaml parser based on EasyDict.
 8 |     """
 9 |     def __init__(self, cfg_dict=None, config_file=None):
10 |         if cfg_dict is None:
11 |             cfg_dict = {}
12 | 
13 |         if config_file is not None:
14 |             assert(os.path.isfile(config_file))
15 |             with open(config_file, 'r') as fo:
16 |                 cfg_dict.update(yaml.load(fo.read()))
17 | 
18 |         super(YamlParser, self).__init__(cfg_dict)
19 | 
20 |     
21 |     def merge_from_file(self, config_file):
22 |         with open(config_file, 'r') as fo:
23 |             self.update(yaml.load(fo.read()))
24 | 
25 |     
26 |     def merge_from_dict(self, config_dict):
27 |         self.update(config_dict)
28 | 
29 | 
30 | def get_config(config_file=None):
31 |     return YamlParser(config_file=config_file)
32 | 
33 | 
34 | if __name__ == "__main__":
35 |     cfg = YamlParser(config_file="../configs/yolov3.yaml")
36 |     cfg.merge_from_file("../configs/deep_sort.yaml")
37 | 
38 |     import ipdb; ipdb.set_trace()


--------------------------------------------------------------------------------
/worker.py:
--------------------------------------------------------------------------------
  1 | import paho.mqtt.client as mqtt
  2 | import uuid
  3 | import json
  4 | import cv2
  5 | import time
  6 | import struct
  7 | import threading
  8 | import numpy as np
  9 | import signal
 10 | 
 11 | 
 12 | sigint_catched = False
 13 | def sigint_handler(sig, frame):
 14 |     global sigint_catched
 15 |     sigint_catched = True
 16 | 
 17 | class Worker:
 18 |     def __init__(self, ip, port, sub='jwai/camera/0001', pub='jwai/track/0001', timeout=30):
 19 |         id = str(uuid.uuid4())
 20 |         self.mqtt_client = mqtt.Client(id, userdata=self, clean_session=True)
 21 |         self.mqtt_client.on_connect = self.on_connect
 22 |         self.mqtt_client.on_disconnect = self.on_disconnect
 23 |         self.mqtt_client.on_message = self.on_message
 24 |         self.ip = ip
 25 |         self.port = port
 26 |         self.sub_topic = sub
 27 |         self.pub_topic = pub
 28 |         self.timeout = timeout
 29 |         self.callback_mutex = threading.RLock()
 30 |         self.on_new_image = None
 31 |     def connect(self):
 32 |         self.mqtt_client.connect(self.ip, self.port, self.timeout)
 33 |         self.mqtt_client.loop_start()
 34 | 
 35 |     def disconnect(self):
 36 |         self.mqtt_client.loop_stop()
 37 |         self.mqtt_client.disconnect()
 38 |         
 39 |     # def loop(self):
 40 |     #     self.mqtt_client.loop_forever()
 41 |         
 42 |     def on_connect(self, client, userdata, flags, rc):
 43 |         print("Connected with result code "+str(rc))
 44 |         
 45 |         print(self.sub_topic)
 46 |         self.mqtt_client.subscribe(self.sub_topic, 0)
 47 |     
 48 |     def on_disconnect(self, client, userdata, rc):
 49 |         print("disconnected with result code "+str(rc))
 50 |     
 51 |     def on_message(self, client, userdata, msg):
 52 |         stamp = struct.unpack('LL', msg.payload[-16:])
 53 |         img = cv2.imdecode(np.fromstring(msg.payload, dtype='uint8'), cv2.IMREAD_UNCHANGED)
 54 |         print('Worker::on_message: ', stamp, time.clock_gettime(time.CLOCK_MONOTONIC))
 55 |         
 56 |         if(self.on_new_image != None):
 57 |             self.on_new_image(stamp, img)
 58 |             
 59 |     def publish(self, stamp, data):
 60 |         if(self.mqtt_client.is_connected() == False):
 61 |             return
 62 |         
 63 |         msg = bytes()
 64 |         #pack stamp & data into msg
 65 |         self.mqtt_client.publish(self.pub_topic, msg)
 66 |         
 67 |     @property
 68 |     def on_new_image(self):
 69 |         return self.on_new_image
 70 |     
 71 |     def on_new_image(self, func):
 72 |         with self.callback_mutex:
 73 |             self.on_new_image = func
 74 |         
 75 | def on_new_image(stamp, img):
 76 |     print(stamp)
 77 |     print(img.shape)
 78 |     cv2.imshow("img", img)
 79 |     cv2.waitKey(1)
 80 | 
 81 | 
 82 | '''
 83 | {
 84 |     "stamp": stamp,
 85 |     "data": [
 86 |         {"cls_id": 2, "track_id": 3, "bbox": "4,5,6,7"},
 87 |         {"cls_id": 3, "track_id": 5, "bbox": "6,7,8,9"}]
 88 | }
 89 | stamp: 时间戳
 90 | cls_id：检测物体类别id
 91 | track_id：跟踪结果id
 92 | bbox：左上右下，x1, y1, x2, y2
 93 | 
 94 | 另其线程做检测跟踪，
 95 | 无检测跟踪结果，publish
 96 | {"stamp": "stamp", "data": None}
 97 | 丢帧无返回
 98 | '''
 99 | 
100 | 
101 | if __name__ == "__main__":
102 |     original_handler = signal.getsignal(signal.SIGINT)
103 |     signal.signal(signal.SIGINT, sigint_handler)
104 |     
105 |     worker = Worker('192.168.1.24', 1883)
106 |     worker.on_new_image = on_new_image
107 |     worker.connect()
108 |     
109 |     while sigint_catched == False:
110 |         time.sleep(0.1)
111 |     
112 |     signal.signal(signal.SIGINT, original_handler)
113 |     
114 |     worker.disconnect()
115 | 
116 | 


--------------------------------------------------------------------------------