├── .gitignore ├── 4.cfg ├── 4prn.cfg ├── LICENSE ├── README.md ├── coco.data ├── coco.names ├── configs └── deep_sort.yaml ├── deep_sort ├── README.md ├── __init__.py ├── deep │ ├── __init__.py │ ├── checkpoint │ │ └── .gitkeep │ ├── evaluate.py │ ├── feature_extractor.py │ ├── model.py │ ├── original_model.py │ ├── test.py │ ├── train.jpg │ └── train.py ├── deep_sort.py └── sort │ ├── __init__.py │ ├── detection.py │ ├── iou_matching.py │ ├── kalman_filter.py │ ├── linear_assignment.py │ ├── nn_matching.py │ ├── preprocessing.py │ ├── track.py │ └── tracker.py ├── detector ├── v4darknet.py └── v4detector.py ├── eval_tracker.py ├── requirements.txt ├── scripts ├── yolov3_deepsort.sh └── yolov3_tiny_deepsort.sh ├── tracker.py ├── utils ├── __init__.py ├── draw.py ├── evaluation.py ├── io.py ├── log.py └── parser.py └── worker.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Folders 2 | __pycache__/ 3 | build/ 4 | *.egg-info 5 | 6 | 7 | # Files 8 | *.weights 9 | *.t7 10 | *.mp4 11 | *.avi 12 | *.so 13 | *.txt 14 | .idea/ 15 | *.weights 16 | -------------------------------------------------------------------------------- /4.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | #batch=1 4 | #subdivisions=1 5 | # Training 6 | batch=1 7 | subdivisions=1 8 | width=608 9 | height=608 10 | channels=3 11 | momentum=0.949 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.00261 19 | burn_in=1000 20 | max_batches = 500500 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | #cutmix=1 26 | mosaic=1 27 | 28 | #:104x104 54:52x52 85:26x26 104:13x13 for 416 29 | 30 | [convolutional] 31 | batch_normalize=1 32 | filters=32 33 | size=3 34 | stride=1 35 | pad=1 36 | activation=mish 37 | 38 | # Downsample 39 | 40 | [convolutional] 41 | batch_normalize=1 42 | filters=64 43 | size=3 44 | stride=2 45 | pad=1 46 | activation=mish 47 | 48 | [convolutional] 49 | batch_normalize=1 50 | filters=64 51 | size=1 52 | stride=1 53 | pad=1 54 | activation=mish 55 | 56 | [route] 57 | layers = -2 58 | 59 | [convolutional] 60 | batch_normalize=1 61 | filters=64 62 | size=1 63 | stride=1 64 | pad=1 65 | activation=mish 66 | 67 | [convolutional] 68 | batch_normalize=1 69 | filters=32 70 | size=1 71 | stride=1 72 | pad=1 73 | activation=mish 74 | 75 | [convolutional] 76 | batch_normalize=1 77 | filters=64 78 | size=3 79 | stride=1 80 | pad=1 81 | activation=mish 82 | 83 | [shortcut] 84 | from=-3 85 | activation=linear 86 | 87 | [convolutional] 88 | batch_normalize=1 89 | filters=64 90 | size=1 91 | stride=1 92 | pad=1 93 | activation=mish 94 | 95 | [route] 96 | layers = -1,-7 97 | 98 | [convolutional] 99 | batch_normalize=1 100 | filters=64 101 | size=1 102 | stride=1 103 | pad=1 104 | activation=mish 105 | 106 | # Downsample 107 | 108 | [convolutional] 109 | batch_normalize=1 110 | filters=128 111 | size=3 112 | stride=2 113 | pad=1 114 | activation=mish 115 | 116 | [convolutional] 117 | batch_normalize=1 118 | filters=64 119 | size=1 120 | stride=1 121 | pad=1 122 | activation=mish 123 | 124 | [route] 125 | layers = -2 126 | 127 | [convolutional] 128 | batch_normalize=1 129 | filters=64 130 | size=1 131 | stride=1 132 | pad=1 133 | activation=mish 134 | 135 | [convolutional] 136 | batch_normalize=1 137 | filters=64 138 | size=1 139 | stride=1 140 | pad=1 141 | activation=mish 142 | 143 | [convolutional] 144 | batch_normalize=1 145 | filters=64 146 | size=3 147 | stride=1 148 | pad=1 149 | activation=mish 150 | 151 | [shortcut] 152 | from=-3 153 | activation=linear 154 | 155 | [convolutional] 156 | batch_normalize=1 157 | filters=64 158 | size=1 159 | stride=1 160 | pad=1 161 | activation=mish 162 | 163 | [convolutional] 164 | batch_normalize=1 165 | filters=64 166 | size=3 167 | stride=1 168 | pad=1 169 | activation=mish 170 | 171 | [shortcut] 172 | from=-3 173 | activation=linear 174 | 175 | [convolutional] 176 | batch_normalize=1 177 | filters=64 178 | size=1 179 | stride=1 180 | pad=1 181 | activation=mish 182 | 183 | [route] 184 | layers = -1,-10 185 | 186 | [convolutional] 187 | batch_normalize=1 188 | filters=128 189 | size=1 190 | stride=1 191 | pad=1 192 | activation=mish 193 | 194 | # Downsample 195 | 196 | [convolutional] 197 | batch_normalize=1 198 | filters=256 199 | size=3 200 | stride=2 201 | pad=1 202 | activation=mish 203 | 204 | [convolutional] 205 | batch_normalize=1 206 | filters=128 207 | size=1 208 | stride=1 209 | pad=1 210 | activation=mish 211 | 212 | [route] 213 | layers = -2 214 | 215 | [convolutional] 216 | batch_normalize=1 217 | filters=128 218 | size=1 219 | stride=1 220 | pad=1 221 | activation=mish 222 | 223 | [convolutional] 224 | batch_normalize=1 225 | filters=128 226 | size=1 227 | stride=1 228 | pad=1 229 | activation=mish 230 | 231 | [convolutional] 232 | batch_normalize=1 233 | filters=128 234 | size=3 235 | stride=1 236 | pad=1 237 | activation=mish 238 | 239 | [shortcut] 240 | from=-3 241 | activation=linear 242 | 243 | [convolutional] 244 | batch_normalize=1 245 | filters=128 246 | size=1 247 | stride=1 248 | pad=1 249 | activation=mish 250 | 251 | [convolutional] 252 | batch_normalize=1 253 | filters=128 254 | size=3 255 | stride=1 256 | pad=1 257 | activation=mish 258 | 259 | [shortcut] 260 | from=-3 261 | activation=linear 262 | 263 | [convolutional] 264 | batch_normalize=1 265 | filters=128 266 | size=1 267 | stride=1 268 | pad=1 269 | activation=mish 270 | 271 | [convolutional] 272 | batch_normalize=1 273 | filters=128 274 | size=3 275 | stride=1 276 | pad=1 277 | activation=mish 278 | 279 | [shortcut] 280 | from=-3 281 | activation=linear 282 | 283 | [convolutional] 284 | batch_normalize=1 285 | filters=128 286 | size=1 287 | stride=1 288 | pad=1 289 | activation=mish 290 | 291 | [convolutional] 292 | batch_normalize=1 293 | filters=128 294 | size=3 295 | stride=1 296 | pad=1 297 | activation=mish 298 | 299 | [shortcut] 300 | from=-3 301 | activation=linear 302 | 303 | 304 | [convolutional] 305 | batch_normalize=1 306 | filters=128 307 | size=1 308 | stride=1 309 | pad=1 310 | activation=mish 311 | 312 | [convolutional] 313 | batch_normalize=1 314 | filters=128 315 | size=3 316 | stride=1 317 | pad=1 318 | activation=mish 319 | 320 | [shortcut] 321 | from=-3 322 | activation=linear 323 | 324 | [convolutional] 325 | batch_normalize=1 326 | filters=128 327 | size=1 328 | stride=1 329 | pad=1 330 | activation=mish 331 | 332 | [convolutional] 333 | batch_normalize=1 334 | filters=128 335 | size=3 336 | stride=1 337 | pad=1 338 | activation=mish 339 | 340 | [shortcut] 341 | from=-3 342 | activation=linear 343 | 344 | [convolutional] 345 | batch_normalize=1 346 | filters=128 347 | size=1 348 | stride=1 349 | pad=1 350 | activation=mish 351 | 352 | [convolutional] 353 | batch_normalize=1 354 | filters=128 355 | size=3 356 | stride=1 357 | pad=1 358 | activation=mish 359 | 360 | [shortcut] 361 | from=-3 362 | activation=linear 363 | 364 | [convolutional] 365 | batch_normalize=1 366 | filters=128 367 | size=1 368 | stride=1 369 | pad=1 370 | activation=mish 371 | 372 | [convolutional] 373 | batch_normalize=1 374 | filters=128 375 | size=3 376 | stride=1 377 | pad=1 378 | activation=mish 379 | 380 | [shortcut] 381 | from=-3 382 | activation=linear 383 | 384 | [convolutional] 385 | batch_normalize=1 386 | filters=128 387 | size=1 388 | stride=1 389 | pad=1 390 | activation=mish 391 | 392 | [route] 393 | layers = -1,-28 394 | 395 | [convolutional] 396 | batch_normalize=1 397 | filters=256 398 | size=1 399 | stride=1 400 | pad=1 401 | activation=mish 402 | 403 | # Downsample 404 | 405 | [convolutional] 406 | batch_normalize=1 407 | filters=512 408 | size=3 409 | stride=2 410 | pad=1 411 | activation=mish 412 | 413 | [convolutional] 414 | batch_normalize=1 415 | filters=256 416 | size=1 417 | stride=1 418 | pad=1 419 | activation=mish 420 | 421 | [route] 422 | layers = -2 423 | 424 | [convolutional] 425 | batch_normalize=1 426 | filters=256 427 | size=1 428 | stride=1 429 | pad=1 430 | activation=mish 431 | 432 | [convolutional] 433 | batch_normalize=1 434 | filters=256 435 | size=1 436 | stride=1 437 | pad=1 438 | activation=mish 439 | 440 | [convolutional] 441 | batch_normalize=1 442 | filters=256 443 | size=3 444 | stride=1 445 | pad=1 446 | activation=mish 447 | 448 | [shortcut] 449 | from=-3 450 | activation=linear 451 | 452 | 453 | [convolutional] 454 | batch_normalize=1 455 | filters=256 456 | size=1 457 | stride=1 458 | pad=1 459 | activation=mish 460 | 461 | [convolutional] 462 | batch_normalize=1 463 | filters=256 464 | size=3 465 | stride=1 466 | pad=1 467 | activation=mish 468 | 469 | [shortcut] 470 | from=-3 471 | activation=linear 472 | 473 | 474 | [convolutional] 475 | batch_normalize=1 476 | filters=256 477 | size=1 478 | stride=1 479 | pad=1 480 | activation=mish 481 | 482 | [convolutional] 483 | batch_normalize=1 484 | filters=256 485 | size=3 486 | stride=1 487 | pad=1 488 | activation=mish 489 | 490 | [shortcut] 491 | from=-3 492 | activation=linear 493 | 494 | 495 | [convolutional] 496 | batch_normalize=1 497 | filters=256 498 | size=1 499 | stride=1 500 | pad=1 501 | activation=mish 502 | 503 | [convolutional] 504 | batch_normalize=1 505 | filters=256 506 | size=3 507 | stride=1 508 | pad=1 509 | activation=mish 510 | 511 | [shortcut] 512 | from=-3 513 | activation=linear 514 | 515 | 516 | [convolutional] 517 | batch_normalize=1 518 | filters=256 519 | size=1 520 | stride=1 521 | pad=1 522 | activation=mish 523 | 524 | [convolutional] 525 | batch_normalize=1 526 | filters=256 527 | size=3 528 | stride=1 529 | pad=1 530 | activation=mish 531 | 532 | [shortcut] 533 | from=-3 534 | activation=linear 535 | 536 | 537 | [convolutional] 538 | batch_normalize=1 539 | filters=256 540 | size=1 541 | stride=1 542 | pad=1 543 | activation=mish 544 | 545 | [convolutional] 546 | batch_normalize=1 547 | filters=256 548 | size=3 549 | stride=1 550 | pad=1 551 | activation=mish 552 | 553 | [shortcut] 554 | from=-3 555 | activation=linear 556 | 557 | 558 | [convolutional] 559 | batch_normalize=1 560 | filters=256 561 | size=1 562 | stride=1 563 | pad=1 564 | activation=mish 565 | 566 | [convolutional] 567 | batch_normalize=1 568 | filters=256 569 | size=3 570 | stride=1 571 | pad=1 572 | activation=mish 573 | 574 | [shortcut] 575 | from=-3 576 | activation=linear 577 | 578 | [convolutional] 579 | batch_normalize=1 580 | filters=256 581 | size=1 582 | stride=1 583 | pad=1 584 | activation=mish 585 | 586 | [convolutional] 587 | batch_normalize=1 588 | filters=256 589 | size=3 590 | stride=1 591 | pad=1 592 | activation=mish 593 | 594 | [shortcut] 595 | from=-3 596 | activation=linear 597 | 598 | [convolutional] 599 | batch_normalize=1 600 | filters=256 601 | size=1 602 | stride=1 603 | pad=1 604 | activation=mish 605 | 606 | [route] 607 | layers = -1,-28 608 | 609 | [convolutional] 610 | batch_normalize=1 611 | filters=512 612 | size=1 613 | stride=1 614 | pad=1 615 | activation=mish 616 | 617 | # Downsample 618 | 619 | [convolutional] 620 | batch_normalize=1 621 | filters=1024 622 | size=3 623 | stride=2 624 | pad=1 625 | activation=mish 626 | 627 | [convolutional] 628 | batch_normalize=1 629 | filters=512 630 | size=1 631 | stride=1 632 | pad=1 633 | activation=mish 634 | 635 | [route] 636 | layers = -2 637 | 638 | [convolutional] 639 | batch_normalize=1 640 | filters=512 641 | size=1 642 | stride=1 643 | pad=1 644 | activation=mish 645 | 646 | [convolutional] 647 | batch_normalize=1 648 | filters=512 649 | size=1 650 | stride=1 651 | pad=1 652 | activation=mish 653 | 654 | [convolutional] 655 | batch_normalize=1 656 | filters=512 657 | size=3 658 | stride=1 659 | pad=1 660 | activation=mish 661 | 662 | [shortcut] 663 | from=-3 664 | activation=linear 665 | 666 | [convolutional] 667 | batch_normalize=1 668 | filters=512 669 | size=1 670 | stride=1 671 | pad=1 672 | activation=mish 673 | 674 | [convolutional] 675 | batch_normalize=1 676 | filters=512 677 | size=3 678 | stride=1 679 | pad=1 680 | activation=mish 681 | 682 | [shortcut] 683 | from=-3 684 | activation=linear 685 | 686 | [convolutional] 687 | batch_normalize=1 688 | filters=512 689 | size=1 690 | stride=1 691 | pad=1 692 | activation=mish 693 | 694 | [convolutional] 695 | batch_normalize=1 696 | filters=512 697 | size=3 698 | stride=1 699 | pad=1 700 | activation=mish 701 | 702 | [shortcut] 703 | from=-3 704 | activation=linear 705 | 706 | [convolutional] 707 | batch_normalize=1 708 | filters=512 709 | size=1 710 | stride=1 711 | pad=1 712 | activation=mish 713 | 714 | [convolutional] 715 | batch_normalize=1 716 | filters=512 717 | size=3 718 | stride=1 719 | pad=1 720 | activation=mish 721 | 722 | [shortcut] 723 | from=-3 724 | activation=linear 725 | 726 | [convolutional] 727 | batch_normalize=1 728 | filters=512 729 | size=1 730 | stride=1 731 | pad=1 732 | activation=mish 733 | 734 | [route] 735 | layers = -1,-16 736 | 737 | [convolutional] 738 | batch_normalize=1 739 | filters=1024 740 | size=1 741 | stride=1 742 | pad=1 743 | activation=mish 744 | 745 | ########################## 746 | 747 | [convolutional] 748 | batch_normalize=1 749 | filters=512 750 | size=1 751 | stride=1 752 | pad=1 753 | activation=leaky 754 | 755 | [convolutional] 756 | batch_normalize=1 757 | size=3 758 | stride=1 759 | pad=1 760 | filters=1024 761 | activation=leaky 762 | 763 | [convolutional] 764 | batch_normalize=1 765 | filters=512 766 | size=1 767 | stride=1 768 | pad=1 769 | activation=leaky 770 | 771 | ### SPP ### 772 | [maxpool] 773 | stride=1 774 | size=5 775 | 776 | [route] 777 | layers=-2 778 | 779 | [maxpool] 780 | stride=1 781 | size=9 782 | 783 | [route] 784 | layers=-4 785 | 786 | [maxpool] 787 | stride=1 788 | size=13 789 | 790 | [route] 791 | layers=-1,-3,-5,-6 792 | ### End SPP ### 793 | 794 | [convolutional] 795 | batch_normalize=1 796 | filters=512 797 | size=1 798 | stride=1 799 | pad=1 800 | activation=leaky 801 | 802 | [convolutional] 803 | batch_normalize=1 804 | size=3 805 | stride=1 806 | pad=1 807 | filters=1024 808 | activation=leaky 809 | 810 | [convolutional] 811 | batch_normalize=1 812 | filters=512 813 | size=1 814 | stride=1 815 | pad=1 816 | activation=leaky 817 | 818 | [convolutional] 819 | batch_normalize=1 820 | filters=256 821 | size=1 822 | stride=1 823 | pad=1 824 | activation=leaky 825 | 826 | [upsample] 827 | stride=2 828 | 829 | [route] 830 | layers = 85 831 | 832 | [convolutional] 833 | batch_normalize=1 834 | filters=256 835 | size=1 836 | stride=1 837 | pad=1 838 | activation=leaky 839 | 840 | [route] 841 | layers = -1, -3 842 | 843 | [convolutional] 844 | batch_normalize=1 845 | filters=256 846 | size=1 847 | stride=1 848 | pad=1 849 | activation=leaky 850 | 851 | [convolutional] 852 | batch_normalize=1 853 | size=3 854 | stride=1 855 | pad=1 856 | filters=512 857 | activation=leaky 858 | 859 | [convolutional] 860 | batch_normalize=1 861 | filters=256 862 | size=1 863 | stride=1 864 | pad=1 865 | activation=leaky 866 | 867 | [convolutional] 868 | batch_normalize=1 869 | size=3 870 | stride=1 871 | pad=1 872 | filters=512 873 | activation=leaky 874 | 875 | [convolutional] 876 | batch_normalize=1 877 | filters=256 878 | size=1 879 | stride=1 880 | pad=1 881 | activation=leaky 882 | 883 | [convolutional] 884 | batch_normalize=1 885 | filters=128 886 | size=1 887 | stride=1 888 | pad=1 889 | activation=leaky 890 | 891 | [upsample] 892 | stride=2 893 | 894 | [route] 895 | layers = 54 896 | 897 | [convolutional] 898 | batch_normalize=1 899 | filters=128 900 | size=1 901 | stride=1 902 | pad=1 903 | activation=leaky 904 | 905 | [route] 906 | layers = -1, -3 907 | 908 | [convolutional] 909 | batch_normalize=1 910 | filters=128 911 | size=1 912 | stride=1 913 | pad=1 914 | activation=leaky 915 | 916 | [convolutional] 917 | batch_normalize=1 918 | size=3 919 | stride=1 920 | pad=1 921 | filters=256 922 | activation=leaky 923 | 924 | [convolutional] 925 | batch_normalize=1 926 | filters=128 927 | size=1 928 | stride=1 929 | pad=1 930 | activation=leaky 931 | 932 | [convolutional] 933 | batch_normalize=1 934 | size=3 935 | stride=1 936 | pad=1 937 | filters=256 938 | activation=leaky 939 | 940 | [convolutional] 941 | batch_normalize=1 942 | filters=128 943 | size=1 944 | stride=1 945 | pad=1 946 | activation=leaky 947 | 948 | ########################## 949 | 950 | [convolutional] 951 | batch_normalize=1 952 | size=3 953 | stride=1 954 | pad=1 955 | filters=256 956 | activation=leaky 957 | 958 | [convolutional] 959 | size=1 960 | stride=1 961 | pad=1 962 | filters=255 963 | activation=linear 964 | 965 | 966 | [yolo] 967 | mask = 0,1,2 968 | anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 969 | classes=80 970 | num=9 971 | jitter=.3 972 | ignore_thresh = .7 973 | truth_thresh = 1 974 | scale_x_y = 1.2 975 | iou_thresh=0.213 976 | cls_normalizer=1.0 977 | iou_normalizer=0.07 978 | iou_loss=ciou 979 | nms_kind=greedynms 980 | beta_nms=0.6 981 | 982 | 983 | [route] 984 | layers = -4 985 | 986 | [convolutional] 987 | batch_normalize=1 988 | size=3 989 | stride=2 990 | pad=1 991 | filters=256 992 | activation=leaky 993 | 994 | [route] 995 | layers = -1, -16 996 | 997 | [convolutional] 998 | batch_normalize=1 999 | filters=256 1000 | size=1 1001 | stride=1 1002 | pad=1 1003 | activation=leaky 1004 | 1005 | [convolutional] 1006 | batch_normalize=1 1007 | size=3 1008 | stride=1 1009 | pad=1 1010 | filters=512 1011 | activation=leaky 1012 | 1013 | [convolutional] 1014 | batch_normalize=1 1015 | filters=256 1016 | size=1 1017 | stride=1 1018 | pad=1 1019 | activation=leaky 1020 | 1021 | [convolutional] 1022 | batch_normalize=1 1023 | size=3 1024 | stride=1 1025 | pad=1 1026 | filters=512 1027 | activation=leaky 1028 | 1029 | [convolutional] 1030 | batch_normalize=1 1031 | filters=256 1032 | size=1 1033 | stride=1 1034 | pad=1 1035 | activation=leaky 1036 | 1037 | [convolutional] 1038 | batch_normalize=1 1039 | size=3 1040 | stride=1 1041 | pad=1 1042 | filters=512 1043 | activation=leaky 1044 | 1045 | [convolutional] 1046 | size=1 1047 | stride=1 1048 | pad=1 1049 | filters=255 1050 | activation=linear 1051 | 1052 | 1053 | [yolo] 1054 | mask = 3,4,5 1055 | anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 1056 | classes=80 1057 | num=9 1058 | jitter=.3 1059 | ignore_thresh = .7 1060 | truth_thresh = 1 1061 | scale_x_y = 1.1 1062 | iou_thresh=0.213 1063 | cls_normalizer=1.0 1064 | iou_normalizer=0.07 1065 | iou_loss=ciou 1066 | nms_kind=greedynms 1067 | beta_nms=0.6 1068 | 1069 | 1070 | [route] 1071 | layers = -4 1072 | 1073 | [convolutional] 1074 | batch_normalize=1 1075 | size=3 1076 | stride=2 1077 | pad=1 1078 | filters=512 1079 | activation=leaky 1080 | 1081 | [route] 1082 | layers = -1, -37 1083 | 1084 | [convolutional] 1085 | batch_normalize=1 1086 | filters=512 1087 | size=1 1088 | stride=1 1089 | pad=1 1090 | activation=leaky 1091 | 1092 | [convolutional] 1093 | batch_normalize=1 1094 | size=3 1095 | stride=1 1096 | pad=1 1097 | filters=1024 1098 | activation=leaky 1099 | 1100 | [convolutional] 1101 | batch_normalize=1 1102 | filters=512 1103 | size=1 1104 | stride=1 1105 | pad=1 1106 | activation=leaky 1107 | 1108 | [convolutional] 1109 | batch_normalize=1 1110 | size=3 1111 | stride=1 1112 | pad=1 1113 | filters=1024 1114 | activation=leaky 1115 | 1116 | [convolutional] 1117 | batch_normalize=1 1118 | filters=512 1119 | size=1 1120 | stride=1 1121 | pad=1 1122 | activation=leaky 1123 | 1124 | [convolutional] 1125 | batch_normalize=1 1126 | size=3 1127 | stride=1 1128 | pad=1 1129 | filters=1024 1130 | activation=leaky 1131 | 1132 | [convolutional] 1133 | size=1 1134 | stride=1 1135 | pad=1 1136 | filters=255 1137 | activation=linear 1138 | 1139 | 1140 | [yolo] 1141 | mask = 6,7,8 1142 | anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 1143 | classes=80 1144 | num=9 1145 | jitter=.3 1146 | ignore_thresh = .7 1147 | truth_thresh = 1 1148 | random=1 1149 | scale_x_y = 1.05 1150 | iou_thresh=0.213 1151 | cls_normalizer=1.0 1152 | iou_normalizer=0.07 1153 | iou_loss=ciou 1154 | nms_kind=greedynms 1155 | beta_nms=0.6 1156 | 1157 | -------------------------------------------------------------------------------- /4prn.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | #batch=1 4 | #subdivisions=1 5 | # Training 6 | batch=64 7 | subdivisions=8 8 | width=608 9 | height=608 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=16 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | batch_normalize=1 39 | filters=32 40 | size=3 41 | stride=1 42 | pad=1 43 | activation=leaky 44 | 45 | [maxpool] 46 | size=2 47 | stride=2 48 | 49 | [convolutional] 50 | batch_normalize=1 51 | filters=64 52 | size=3 53 | stride=1 54 | pad=1 55 | activation=leaky 56 | 57 | [maxpool] 58 | size=2 59 | stride=2 60 | 61 | [convolutional] 62 | batch_normalize=1 63 | filters=128 64 | size=3 65 | stride=1 66 | pad=1 67 | activation=leaky 68 | 69 | [maxpool] 70 | size=2 71 | stride=2 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=256 76 | size=3 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [maxpool] 82 | size=2 83 | stride=2 84 | 85 | [convolutional] 86 | batch_normalize=1 87 | filters=512 88 | size=3 89 | stride=1 90 | pad=1 91 | activation=leaky 92 | 93 | [maxpool] 94 | size=2 95 | stride=1 96 | 97 | [convolutional] 98 | batch_normalize=1 99 | filters=512 100 | size=3 101 | stride=1 102 | pad=1 103 | activation=leaky 104 | 105 | [shortcut] 106 | activation=leaky 107 | from=-3 108 | 109 | ########### 110 | 111 | [convolutional] 112 | batch_normalize=1 113 | filters=256 114 | size=1 115 | stride=1 116 | pad=1 117 | activation=leaky 118 | 119 | [convolutional] 120 | batch_normalize=1 121 | filters=256 122 | size=3 123 | stride=1 124 | pad=1 125 | activation=leaky 126 | 127 | [shortcut] 128 | activation=leaky 129 | from=-2 130 | 131 | [convolutional] 132 | size=1 133 | stride=1 134 | pad=1 135 | filters=255 136 | activation=linear 137 | 138 | 139 | 140 | [yolo] 141 | mask = 3,4,5 142 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 143 | classes=80 144 | num=6 145 | jitter=.3 146 | ignore_thresh = .7 147 | truth_thresh = 1 148 | random=1 149 | 150 | [route] 151 | layers = -4 152 | 153 | [convolutional] 154 | batch_normalize=1 155 | filters=128 156 | size=1 157 | stride=1 158 | pad=1 159 | activation=leaky 160 | 161 | [upsample] 162 | stride=2 163 | 164 | [shortcut] 165 | activation=leaky 166 | from=8 167 | 168 | [convolutional] 169 | batch_normalize=1 170 | filters=128 171 | size=3 172 | stride=1 173 | pad=1 174 | activation=leaky 175 | 176 | [shortcut] 177 | activation=leaky 178 | from=-3 179 | 180 | [shortcut] 181 | activation=leaky 182 | from=8 183 | 184 | [convolutional] 185 | size=1 186 | stride=1 187 | pad=1 188 | filters=255 189 | activation=linear 190 | 191 | [yolo] 192 | mask = 1,2,3 193 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 194 | classes=80 195 | num=6 196 | jitter=.3 197 | ignore_thresh = .7 198 | truth_thresh = 1 199 | random=1 200 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 derek285 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Yolov4 + Deep Sort with PyTorch 2 | 3 | ## remember build your own libdarknet.so and put under folder yolov4_deep_sort_pytorch/ 4 | ## ref : https://github.com/ZQPei/deep_sort_pytorch 5 | 6 | ## Quick Start 7 | 0. Check all dependencies installed 8 | ```bash 9 | pip3 install -r requirements.txt 10 | ``` 11 | for user in china, you can specify pypi source to accelerate install like: 12 | ```bash 13 | pip3 install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple 14 | ``` 15 | 16 | 1. Clone this repository 17 | ``` 18 | git clone https://github.com/derek285/yolov4_deep_sort_pytorch.git 19 | ``` 20 | 21 | 2. Download YOLOv4 parameters 22 | ``` 23 | wget yolov4.weights and cfg 24 | //并且重命名为4.cfg 和 4.weights 这个hard code有点山寨了:detector/v4detector.py : 25 | configPath = "4.cfg" 26 | weightPath = "4.weights" 27 | ``` 28 | 29 | 3. Download deepsort parameters ckpt.t7 30 | ``` 31 | cd deep_sort/deep/checkpoint 32 | # download ckpt.t7 from 33 | https://drive.google.com/drive/folders/1xhG0kRH1EX5B9_Iz8gQJb7UNnn_riXi6 to this folder 34 | cd ../../../ 35 | ``` 36 | 37 | Notice: 38 | If compiling failed, the simplist way is to **Upgrade your pytorch >= 1.1 and torchvision >= 0.3" and you can avoid the troublesome compiling problems which are most likely caused by either `gcc version too low` or `libraries missing`. 39 | 40 | 4. Run demo 41 | ``` 42 | usage: python tracker.py VIDEO_PATH 43 | [--help] 44 | [--frame_interval FRAME_INTERVAL] 45 | [--config_detection CONFIG_DETECTION] 46 | [--config_deepsort CONFIG_DEEPSORT] 47 | [--display] 48 | [--display_width DISPLAY_WIDTH] 49 | [--display_height DISPLAY_HEIGHT] 50 | [--save_path SAVE_PATH] 51 | [--cpu] 52 | 53 | 54 | # yolov4 + deepsort on video file 55 | python3 tracker.py VIDEO_PATH 56 | # yolov4 + deepsort on webcam 57 | python3 tracker.py /dev/video0 --camera 0 58 | 59 | # todo 60 | ``` 61 | 1. get class_id return 62 | 2. fix depends on libdarknet.so 63 | ``` 64 | 65 | ## References 66 | - paper: [Simple Online and Realtime Tracking with a Deep Association Metric](https://arxiv.org/abs/1703.07402) 67 | - code: [nwojke/deep_sort](https://github.com/nwojke/deep_sort) 68 | - paper: [YOLOv3](https://pjreddie.com/media/files/papers/YOLOv3.pdf) 69 | - code: [Joseph Redmon/yolov3](https://pjreddie.com/darknet/yolo/) 70 | - code: [ZQPei/deep_sort_pytorch](https://github.com/ZQPei/deep_sort_pytorch) 71 | - code:[AlexeyAB/darknet](https://github.com/AlexeyAB/darknet) 72 | -------------------------------------------------------------------------------- /coco.data: -------------------------------------------------------------------------------- 1 | classes= 80 2 | names = coco.names 3 | eval=coco 4 | 5 | -------------------------------------------------------------------------------- /coco.names: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorbike 5 | aeroplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | sofa 59 | pottedplant 60 | bed 61 | diningtable 62 | toilet 63 | tvmonitor 64 | laptop 65 | mouse 66 | remote 67 | keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush 81 | -------------------------------------------------------------------------------- /configs/deep_sort.yaml: -------------------------------------------------------------------------------- 1 | DEEPSORT: 2 | REID_CKPT: "./deep_sort/deep/checkpoint/ckpt.t7" 3 | MAX_DIST: 0.2 4 | MIN_CONFIDENCE: 0.3 5 | NMS_MAX_OVERLAP: 0.5 6 | MAX_IOU_DISTANCE: 0.7 7 | MAX_AGE: 70 8 | N_INIT: 3 9 | NN_BUDGET: 100 10 | -------------------------------------------------------------------------------- /deep_sort/README.md: -------------------------------------------------------------------------------- 1 | # Deep Sort 2 | 3 | This is the implemention of deep sort with pytorch. -------------------------------------------------------------------------------- /deep_sort/__init__.py: -------------------------------------------------------------------------------- 1 | from .deep_sort import DeepSort 2 | 3 | 4 | __all__ = ['DeepSort', 'build_tracker'] 5 | 6 | 7 | def build_tracker(cfg, use_cuda): 8 | return DeepSort(cfg.DEEPSORT.REID_CKPT, 9 | max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, 10 | nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, 11 | max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=use_cuda) 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /deep_sort/deep/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/derek285/yolov4_deep_sort_pytorch/00e408a24693ce2438289f4d3aed819cf0362436/deep_sort/deep/__init__.py -------------------------------------------------------------------------------- /deep_sort/deep/checkpoint/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/derek285/yolov4_deep_sort_pytorch/00e408a24693ce2438289f4d3aed819cf0362436/deep_sort/deep/checkpoint/.gitkeep -------------------------------------------------------------------------------- /deep_sort/deep/evaluate.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | features = torch.load("features.pth") 4 | qf = features["qf"] 5 | ql = features["ql"] 6 | gf = features["gf"] 7 | gl = features["gl"] 8 | 9 | scores = qf.mm(gf.t()) 10 | res = scores.topk(5, dim=1)[1][:,0] 11 | top1correct = gl[res].eq(ql).sum().item() 12 | 13 | print("Acc top1:{:.3f}".format(top1correct/ql.size(0))) 14 | 15 | 16 | -------------------------------------------------------------------------------- /deep_sort/deep/feature_extractor.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision.transforms as transforms 3 | import numpy as np 4 | import cv2 5 | import logging 6 | 7 | from .model import Net 8 | 9 | class Extractor(object): 10 | def __init__(self, model_path, use_cuda=True): 11 | self.net = Net(reid=True) 12 | self.device = "cuda" if torch.cuda.is_available() and use_cuda else "cpu" 13 | state_dict = torch.load(model_path, map_location=lambda storage, loc: storage)['net_dict'] 14 | self.net.load_state_dict(state_dict) 15 | logger = logging.getLogger("root.tracker") 16 | logger.info("Loading weights from {}... Done!".format(model_path)) 17 | self.net.to(self.device) 18 | self.size = (64, 128) 19 | self.norm = transforms.Compose([ 20 | transforms.ToTensor(), 21 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 22 | ]) 23 | 24 | 25 | 26 | def _preprocess(self, im_crops): 27 | """ 28 | TODO: 29 | 1. to float with scale from 0 to 1 30 | 2. resize to (64, 128) as Market1501 dataset did 31 | 3. concatenate to a numpy array 32 | 3. to torch Tensor 33 | 4. normalize 34 | """ 35 | def _resize(im, size): 36 | return cv2.resize(im.astype(np.float32)/255., size) 37 | 38 | im_batch = torch.cat([self.norm(_resize(im, self.size)).unsqueeze(0) for im in im_crops], dim=0).float() 39 | return im_batch 40 | 41 | 42 | def __call__(self, im_crops): 43 | im_batch = self._preprocess(im_crops) 44 | with torch.no_grad(): 45 | im_batch = im_batch.to(self.device) 46 | features = self.net(im_batch) 47 | return features.cpu().numpy() 48 | 49 | 50 | if __name__ == '__main__': 51 | img = cv2.imread("demo.jpg")[:,:,(2,1,0)] 52 | extr = Extractor("checkpoint/ckpt.t7") 53 | feature = extr(img) 54 | print(feature.shape) 55 | 56 | -------------------------------------------------------------------------------- /deep_sort/deep/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | class BasicBlock(nn.Module): 6 | def __init__(self, c_in, c_out,is_downsample=False): 7 | super(BasicBlock,self).__init__() 8 | self.is_downsample = is_downsample 9 | if is_downsample: 10 | self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=2, padding=1, bias=False) 11 | else: 12 | self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=1, padding=1, bias=False) 13 | self.bn1 = nn.BatchNorm2d(c_out) 14 | self.relu = nn.ReLU(True) 15 | self.conv2 = nn.Conv2d(c_out,c_out,3,stride=1,padding=1, bias=False) 16 | self.bn2 = nn.BatchNorm2d(c_out) 17 | if is_downsample: 18 | self.downsample = nn.Sequential( 19 | nn.Conv2d(c_in, c_out, 1, stride=2, bias=False), 20 | nn.BatchNorm2d(c_out) 21 | ) 22 | elif c_in != c_out: 23 | self.downsample = nn.Sequential( 24 | nn.Conv2d(c_in, c_out, 1, stride=1, bias=False), 25 | nn.BatchNorm2d(c_out) 26 | ) 27 | self.is_downsample = True 28 | 29 | def forward(self,x): 30 | y = self.conv1(x) 31 | y = self.bn1(y) 32 | y = self.relu(y) 33 | y = self.conv2(y) 34 | y = self.bn2(y) 35 | if self.is_downsample: 36 | x = self.downsample(x) 37 | return F.relu(x.add(y),True) 38 | 39 | def make_layers(c_in,c_out,repeat_times, is_downsample=False): 40 | blocks = [] 41 | for i in range(repeat_times): 42 | if i ==0: 43 | blocks += [BasicBlock(c_in,c_out, is_downsample=is_downsample),] 44 | else: 45 | blocks += [BasicBlock(c_out,c_out),] 46 | return nn.Sequential(*blocks) 47 | 48 | class Net(nn.Module): 49 | def __init__(self, num_classes=751 ,reid=False): 50 | super(Net,self).__init__() 51 | # 3 128 64 52 | self.conv = nn.Sequential( 53 | nn.Conv2d(3,64,3,stride=1,padding=1), 54 | nn.BatchNorm2d(64), 55 | nn.ReLU(inplace=True), 56 | # nn.Conv2d(32,32,3,stride=1,padding=1), 57 | # nn.BatchNorm2d(32), 58 | # nn.ReLU(inplace=True), 59 | nn.MaxPool2d(3,2,padding=1), 60 | ) 61 | # 32 64 32 62 | self.layer1 = make_layers(64,64,2,False) 63 | # 32 64 32 64 | self.layer2 = make_layers(64,128,2,True) 65 | # 64 32 16 66 | self.layer3 = make_layers(128,256,2,True) 67 | # 128 16 8 68 | self.layer4 = make_layers(256,512,2,True) 69 | # 256 8 4 70 | self.avgpool = nn.AvgPool2d((8,4),1) 71 | # 256 1 1 72 | self.reid = reid 73 | self.classifier = nn.Sequential( 74 | nn.Linear(512, 256), 75 | nn.BatchNorm1d(256), 76 | nn.ReLU(inplace=True), 77 | nn.Dropout(), 78 | nn.Linear(256, num_classes), 79 | ) 80 | 81 | def forward(self, x): 82 | x = self.conv(x) 83 | x = self.layer1(x) 84 | x = self.layer2(x) 85 | x = self.layer3(x) 86 | x = self.layer4(x) 87 | x = self.avgpool(x) 88 | x = x.view(x.size(0),-1) 89 | # B x 128 90 | if self.reid: 91 | x = x.div(x.norm(p=2,dim=1,keepdim=True)) 92 | return x 93 | # classifier 94 | x = self.classifier(x) 95 | return x 96 | 97 | 98 | if __name__ == '__main__': 99 | net = Net() 100 | x = torch.randn(4,3,128,64) 101 | y = net(x) 102 | import ipdb; ipdb.set_trace() 103 | 104 | 105 | -------------------------------------------------------------------------------- /deep_sort/deep/original_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | class BasicBlock(nn.Module): 6 | def __init__(self, c_in, c_out,is_downsample=False): 7 | super(BasicBlock,self).__init__() 8 | self.is_downsample = is_downsample 9 | if is_downsample: 10 | self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=2, padding=1, bias=False) 11 | else: 12 | self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=1, padding=1, bias=False) 13 | self.bn1 = nn.BatchNorm2d(c_out) 14 | self.relu = nn.ReLU(True) 15 | self.conv2 = nn.Conv2d(c_out,c_out,3,stride=1,padding=1, bias=False) 16 | self.bn2 = nn.BatchNorm2d(c_out) 17 | if is_downsample: 18 | self.downsample = nn.Sequential( 19 | nn.Conv2d(c_in, c_out, 1, stride=2, bias=False), 20 | nn.BatchNorm2d(c_out) 21 | ) 22 | elif c_in != c_out: 23 | self.downsample = nn.Sequential( 24 | nn.Conv2d(c_in, c_out, 1, stride=1, bias=False), 25 | nn.BatchNorm2d(c_out) 26 | ) 27 | self.is_downsample = True 28 | 29 | def forward(self,x): 30 | y = self.conv1(x) 31 | y = self.bn1(y) 32 | y = self.relu(y) 33 | y = self.conv2(y) 34 | y = self.bn2(y) 35 | if self.is_downsample: 36 | x = self.downsample(x) 37 | return F.relu(x.add(y),True) 38 | 39 | def make_layers(c_in,c_out,repeat_times, is_downsample=False): 40 | blocks = [] 41 | for i in range(repeat_times): 42 | if i ==0: 43 | blocks += [BasicBlock(c_in,c_out, is_downsample=is_downsample),] 44 | else: 45 | blocks += [BasicBlock(c_out,c_out),] 46 | return nn.Sequential(*blocks) 47 | 48 | class Net(nn.Module): 49 | def __init__(self, num_classes=625 ,reid=False): 50 | super(Net,self).__init__() 51 | # 3 128 64 52 | self.conv = nn.Sequential( 53 | nn.Conv2d(3,32,3,stride=1,padding=1), 54 | nn.BatchNorm2d(32), 55 | nn.ELU(inplace=True), 56 | nn.Conv2d(32,32,3,stride=1,padding=1), 57 | nn.BatchNorm2d(32), 58 | nn.ELU(inplace=True), 59 | nn.MaxPool2d(3,2,padding=1), 60 | ) 61 | # 32 64 32 62 | self.layer1 = make_layers(32,32,2,False) 63 | # 32 64 32 64 | self.layer2 = make_layers(32,64,2,True) 65 | # 64 32 16 66 | self.layer3 = make_layers(64,128,2,True) 67 | # 128 16 8 68 | self.dense = nn.Sequential( 69 | nn.Dropout(p=0.6), 70 | nn.Linear(128*16*8, 128), 71 | nn.BatchNorm1d(128), 72 | nn.ELU(inplace=True) 73 | ) 74 | # 256 1 1 75 | self.reid = reid 76 | self.batch_norm = nn.BatchNorm1d(128) 77 | self.classifier = nn.Sequential( 78 | nn.Linear(128, num_classes), 79 | ) 80 | 81 | def forward(self, x): 82 | x = self.conv(x) 83 | x = self.layer1(x) 84 | x = self.layer2(x) 85 | x = self.layer3(x) 86 | 87 | x = x.view(x.size(0),-1) 88 | if self.reid: 89 | x = self.dense[0](x) 90 | x = self.dense[1](x) 91 | x = x.div(x.norm(p=2,dim=1,keepdim=True)) 92 | return x 93 | x = self.dense(x) 94 | # B x 128 95 | # classifier 96 | x = self.classifier(x) 97 | return x 98 | 99 | 100 | if __name__ == '__main__': 101 | net = Net(reid=True) 102 | x = torch.randn(4,3,128,64) 103 | y = net(x) 104 | import ipdb; ipdb.set_trace() 105 | 106 | 107 | -------------------------------------------------------------------------------- /deep_sort/deep/test.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.backends.cudnn as cudnn 3 | import torchvision 4 | 5 | import argparse 6 | import os 7 | 8 | from model import Net 9 | 10 | parser = argparse.ArgumentParser(description="Train on market1501") 11 | parser.add_argument("--data-dir",default='data',type=str) 12 | parser.add_argument("--no-cuda",action="store_true") 13 | parser.add_argument("--gpu-id",default=0,type=int) 14 | args = parser.parse_args() 15 | 16 | # device 17 | device = "cuda:{}".format(args.gpu_id) if torch.cuda.is_available() and not args.no_cuda else "cpu" 18 | if torch.cuda.is_available() and not args.no_cuda: 19 | cudnn.benchmark = True 20 | 21 | # data loader 22 | root = args.data_dir 23 | query_dir = os.path.join(root,"query") 24 | gallery_dir = os.path.join(root,"gallery") 25 | transform = torchvision.transforms.Compose([ 26 | torchvision.transforms.Resize((128,64)), 27 | torchvision.transforms.ToTensor(), 28 | torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 29 | ]) 30 | queryloader = torch.utils.data.DataLoader( 31 | torchvision.datasets.ImageFolder(query_dir, transform=transform), 32 | batch_size=64, shuffle=False 33 | ) 34 | galleryloader = torch.utils.data.DataLoader( 35 | torchvision.datasets.ImageFolder(gallery_dir, transform=transform), 36 | batch_size=64, shuffle=False 37 | ) 38 | 39 | # net definition 40 | net = Net(reid=True) 41 | assert os.path.isfile("./checkpoint/ckpt.t7"), "Error: no checkpoint file found!" 42 | print('Loading from checkpoint/ckpt.t7') 43 | checkpoint = torch.load("./checkpoint/ckpt.t7") 44 | net_dict = checkpoint['net_dict'] 45 | net.load_state_dict(net_dict, strict=False) 46 | net.eval() 47 | net.to(device) 48 | 49 | # compute features 50 | query_features = torch.tensor([]).float() 51 | query_labels = torch.tensor([]).long() 52 | gallery_features = torch.tensor([]).float() 53 | gallery_labels = torch.tensor([]).long() 54 | 55 | with torch.no_grad(): 56 | for idx,(inputs,labels) in enumerate(queryloader): 57 | inputs = inputs.to(device) 58 | features = net(inputs).cpu() 59 | query_features = torch.cat((query_features, features), dim=0) 60 | query_labels = torch.cat((query_labels, labels)) 61 | 62 | for idx,(inputs,labels) in enumerate(galleryloader): 63 | inputs = inputs.to(device) 64 | features = net(inputs).cpu() 65 | gallery_features = torch.cat((gallery_features, features), dim=0) 66 | gallery_labels = torch.cat((gallery_labels, labels)) 67 | 68 | gallery_labels -= 2 69 | 70 | # save features 71 | features = { 72 | "qf": query_features, 73 | "ql": query_labels, 74 | "gf": gallery_features, 75 | "gl": gallery_labels 76 | } 77 | torch.save(features,"features.pth") -------------------------------------------------------------------------------- /deep_sort/deep/train.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/derek285/yolov4_deep_sort_pytorch/00e408a24693ce2438289f4d3aed819cf0362436/deep_sort/deep/train.jpg -------------------------------------------------------------------------------- /deep_sort/deep/train.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import time 4 | 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | import torch 8 | import torch.backends.cudnn as cudnn 9 | import torchvision 10 | 11 | from model import Net 12 | 13 | parser = argparse.ArgumentParser(description="Train on market1501") 14 | parser.add_argument("--data-dir",default='data',type=str) 15 | parser.add_argument("--no-cuda",action="store_true") 16 | parser.add_argument("--gpu-id",default=0,type=int) 17 | parser.add_argument("--lr",default=0.1, type=float) 18 | parser.add_argument("--interval",'-i',default=20,type=int) 19 | parser.add_argument('--resume', '-r',action='store_true') 20 | args = parser.parse_args() 21 | 22 | # device 23 | device = "cuda:{}".format(args.gpu_id) if torch.cuda.is_available() and not args.no_cuda else "cpu" 24 | if torch.cuda.is_available() and not args.no_cuda: 25 | cudnn.benchmark = True 26 | 27 | # data loading 28 | root = args.data_dir 29 | train_dir = os.path.join(root,"train") 30 | test_dir = os.path.join(root,"test") 31 | transform_train = torchvision.transforms.Compose([ 32 | torchvision.transforms.RandomCrop((128,64),padding=4), 33 | torchvision.transforms.RandomHorizontalFlip(), 34 | torchvision.transforms.ToTensor(), 35 | torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 36 | ]) 37 | transform_test = torchvision.transforms.Compose([ 38 | torchvision.transforms.Resize((128,64)), 39 | torchvision.transforms.ToTensor(), 40 | torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 41 | ]) 42 | trainloader = torch.utils.data.DataLoader( 43 | torchvision.datasets.ImageFolder(train_dir, transform=transform_train), 44 | batch_size=64,shuffle=True 45 | ) 46 | testloader = torch.utils.data.DataLoader( 47 | torchvision.datasets.ImageFolder(test_dir, transform=transform_test), 48 | batch_size=64,shuffle=True 49 | ) 50 | num_classes = len(trainloader.dataset.classes) 51 | 52 | # net definition 53 | start_epoch = 0 54 | net = Net(num_classes=num_classes) 55 | if args.resume: 56 | assert os.path.isfile("./checkpoint/ckpt.t7"), "Error: no checkpoint file found!" 57 | print('Loading from checkpoint/ckpt.t7') 58 | checkpoint = torch.load("./checkpoint/ckpt.t7") 59 | # import ipdb; ipdb.set_trace() 60 | net_dict = checkpoint['net_dict'] 61 | net.load_state_dict(net_dict) 62 | best_acc = checkpoint['acc'] 63 | start_epoch = checkpoint['epoch'] 64 | net.to(device) 65 | 66 | # loss and optimizer 67 | criterion = torch.nn.CrossEntropyLoss() 68 | optimizer = torch.optim.SGD(net.parameters(), args.lr, momentum=0.9, weight_decay=5e-4) 69 | best_acc = 0. 70 | 71 | # train function for each epoch 72 | def train(epoch): 73 | print("\nEpoch : %d"%(epoch+1)) 74 | net.train() 75 | training_loss = 0. 76 | train_loss = 0. 77 | correct = 0 78 | total = 0 79 | interval = args.interval 80 | start = time.time() 81 | for idx, (inputs, labels) in enumerate(trainloader): 82 | # forward 83 | inputs,labels = inputs.to(device),labels.to(device) 84 | outputs = net(inputs) 85 | loss = criterion(outputs, labels) 86 | 87 | # backward 88 | optimizer.zero_grad() 89 | loss.backward() 90 | optimizer.step() 91 | 92 | # accumurating 93 | training_loss += loss.item() 94 | train_loss += loss.item() 95 | correct += outputs.max(dim=1)[1].eq(labels).sum().item() 96 | total += labels.size(0) 97 | 98 | # print 99 | if (idx+1)%interval == 0: 100 | end = time.time() 101 | print("[progress:{:.1f}%]time:{:.2f}s Loss:{:.5f} Correct:{}/{} Acc:{:.3f}%".format( 102 | 100.*(idx+1)/len(trainloader), end-start, training_loss/interval, correct, total, 100.*correct/total 103 | )) 104 | training_loss = 0. 105 | start = time.time() 106 | 107 | return train_loss/len(trainloader), 1.- correct/total 108 | 109 | def test(epoch): 110 | global best_acc 111 | net.eval() 112 | test_loss = 0. 113 | correct = 0 114 | total = 0 115 | start = time.time() 116 | with torch.no_grad(): 117 | for idx, (inputs, labels) in enumerate(testloader): 118 | inputs, labels = inputs.to(device), labels.to(device) 119 | outputs = net(inputs) 120 | loss = criterion(outputs, labels) 121 | 122 | test_loss += loss.item() 123 | correct += outputs.max(dim=1)[1].eq(labels).sum().item() 124 | total += labels.size(0) 125 | 126 | print("Testing ...") 127 | end = time.time() 128 | print("[progress:{:.1f}%]time:{:.2f}s Loss:{:.5f} Correct:{}/{} Acc:{:.3f}%".format( 129 | 100.*(idx+1)/len(testloader), end-start, test_loss/len(testloader), correct, total, 100.*correct/total 130 | )) 131 | 132 | # saving checkpoint 133 | acc = 100.*correct/total 134 | if acc > best_acc: 135 | best_acc = acc 136 | print("Saving parameters to checkpoint/ckpt.t7") 137 | checkpoint = { 138 | 'net_dict':net.state_dict(), 139 | 'acc':acc, 140 | 'epoch':epoch, 141 | } 142 | if not os.path.isdir('checkpoint'): 143 | os.mkdir('checkpoint') 144 | torch.save(checkpoint, './checkpoint/ckpt.t7') 145 | 146 | return test_loss/len(testloader), 1.- correct/total 147 | 148 | # plot figure 149 | x_epoch = [] 150 | record = {'train_loss':[], 'train_err':[], 'test_loss':[], 'test_err':[]} 151 | fig = plt.figure() 152 | ax0 = fig.add_subplot(121, title="loss") 153 | ax1 = fig.add_subplot(122, title="top1err") 154 | def draw_curve(epoch, train_loss, train_err, test_loss, test_err): 155 | global record 156 | record['train_loss'].append(train_loss) 157 | record['train_err'].append(train_err) 158 | record['test_loss'].append(test_loss) 159 | record['test_err'].append(test_err) 160 | 161 | x_epoch.append(epoch) 162 | ax0.plot(x_epoch, record['train_loss'], 'bo-', label='train') 163 | ax0.plot(x_epoch, record['test_loss'], 'ro-', label='val') 164 | ax1.plot(x_epoch, record['train_err'], 'bo-', label='train') 165 | ax1.plot(x_epoch, record['test_err'], 'ro-', label='val') 166 | if epoch == 0: 167 | ax0.legend() 168 | ax1.legend() 169 | fig.savefig("train.jpg") 170 | 171 | # lr decay 172 | def lr_decay(): 173 | global optimizer 174 | for params in optimizer.param_groups: 175 | params['lr'] *= 0.1 176 | lr = params['lr'] 177 | print("Learning rate adjusted to {}".format(lr)) 178 | 179 | def main(): 180 | for epoch in range(start_epoch, start_epoch+40): 181 | train_loss, train_err = train(epoch) 182 | test_loss, test_err = test(epoch) 183 | draw_curve(epoch, train_loss, train_err, test_loss, test_err) 184 | if (epoch+1)%20==0: 185 | lr_decay() 186 | 187 | 188 | if __name__ == '__main__': 189 | main() -------------------------------------------------------------------------------- /deep_sort/deep_sort.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from .deep.feature_extractor import Extractor 5 | from .sort.nn_matching import NearestNeighborDistanceMetric 6 | from .sort.preprocessing import non_max_suppression 7 | from .sort.detection import Detection 8 | from .sort.tracker import Tracker 9 | 10 | 11 | __all__ = ['DeepSort'] 12 | 13 | 14 | class DeepSort(object): 15 | def __init__(self, model_path, max_dist=0.2, min_confidence=0.3, nms_max_overlap=1.0, max_iou_distance=0.7, max_age=70, n_init=3, nn_budget=100, use_cuda=True): 16 | self.min_confidence = min_confidence 17 | self.nms_max_overlap = nms_max_overlap 18 | 19 | self.extractor = Extractor(model_path, use_cuda=use_cuda) 20 | 21 | max_cosine_distance = max_dist 22 | nn_budget = 100 23 | metric = NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) 24 | self.tracker = Tracker(metric, max_iou_distance=max_iou_distance, max_age=max_age, n_init=n_init) 25 | 26 | def update(self, bbox_xywh, confidences, ori_img, cls_ids): 27 | self.height, self.width = ori_img.shape[:2] 28 | # generate detections 29 | features = self._get_features(bbox_xywh, ori_img) 30 | if len(features) == 0: 31 | return np.array([]) 32 | bbox_tlwh = self._xywh_to_tlwh(bbox_xywh) 33 | 34 | # print("len(features): ", len(features)) 35 | # print("len(confidences): ", len(confidences)) 36 | # print("len(bbox_tlwh): ", len(bbox_tlwh)) 37 | detections = [Detection(bbox_tlwh[i], conf, features[i], cls_ids[i]) for i, conf in enumerate(confidences) if conf > self.min_confidence] 38 | 39 | # run on non-maximum supression 40 | boxes = np.array([d.tlwh for d in detections]) 41 | scores = np.array([d.confidence for d in detections]) 42 | indices = non_max_suppression(boxes, self.nms_max_overlap, scores) 43 | detections = [detections[i] for i in indices] 44 | # for dt in detections: 45 | # print("detections", dt.clsid) 46 | 47 | # update tracker 48 | self.tracker.predict() 49 | self.tracker.update(detections) 50 | 51 | # output bbox identities 52 | outputs = [] 53 | for track in self.tracker.tracks: 54 | if not track.is_confirmed() or track.time_since_update > 1: 55 | continue 56 | box = track.to_tlwh() 57 | x1,y1,x2,y2 = self._tlwh_to_xyxy(box) 58 | track_id = track.track_id 59 | # cls_id = track.cls_id 60 | # outputs.append(np.array([x1,y1,x2,y2,track_id, cls_id], dtype=np.int)) 61 | outputs.append(np.array([x1,y1,x2,y2,track_id], dtype=np.int)) 62 | if len(outputs) > 0: 63 | outputs = np.stack(outputs,axis=0) 64 | return outputs 65 | 66 | 67 | """ 68 | TODO: 69 | Convert bbox from xc_yc_w_h to xtl_ytl_w_h 70 | Thanks JieChen91@github.com for reporting this bug! 71 | """ 72 | @staticmethod 73 | def _xywh_to_tlwh(bbox_xywh): 74 | if isinstance(bbox_xywh, np.ndarray): 75 | bbox_tlwh = bbox_xywh.copy() 76 | elif isinstance(bbox_xywh, torch.Tensor): 77 | bbox_tlwh = bbox_xywh.clone() 78 | bbox_tlwh[:,0] = bbox_xywh[:,0] - bbox_xywh[:,2]/2. 79 | bbox_tlwh[:,1] = bbox_xywh[:,1] - bbox_xywh[:,3]/2. 80 | return bbox_tlwh 81 | 82 | 83 | def _xywh_to_xyxy(self, bbox_xywh): 84 | x,y,w,h = bbox_xywh 85 | x1 = max(int(x-w/2),0) 86 | x2 = min(int(x+w/2),self.width-1) 87 | y1 = max(int(y-h/2),0) 88 | y2 = min(int(y+h/2),self.height-1) 89 | return x1,y1,x2,y2 90 | 91 | def _tlwh_to_xyxy(self, bbox_tlwh): 92 | """ 93 | TODO: 94 | Convert bbox from xtl_ytl_w_h to xc_yc_w_h 95 | Thanks JieChen91@github.com for reporting this bug! 96 | """ 97 | x,y,w,h = bbox_tlwh 98 | x1 = max(int(x),0) 99 | x2 = min(int(x+w),self.width-1) 100 | y1 = max(int(y),0) 101 | y2 = min(int(y+h),self.height-1) 102 | return x1,y1,x2,y2 103 | 104 | def _xyxy_to_tlwh(self, bbox_xyxy): 105 | x1,y1,x2,y2 = bbox_xyxy 106 | 107 | t = x1 108 | l = y1 109 | w = int(x2-x1) 110 | h = int(y2-y1) 111 | return t,l,w,h 112 | 113 | def _get_features(self, bbox_xywh, ori_img): 114 | im_crops = [] 115 | for box in bbox_xywh: 116 | x1,y1,x2,y2 = self._xywh_to_xyxy(box) 117 | im = ori_img[y1:y2,x1:x2] 118 | # if len(im) == 0: 119 | # continue 120 | im_crops.append(im) 121 | if im_crops: 122 | features = self.extractor(im_crops) 123 | else: 124 | features = np.array([]) 125 | return features 126 | 127 | 128 | -------------------------------------------------------------------------------- /deep_sort/sort/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/derek285/yolov4_deep_sort_pytorch/00e408a24693ce2438289f4d3aed819cf0362436/deep_sort/sort/__init__.py -------------------------------------------------------------------------------- /deep_sort/sort/detection.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import numpy as np 3 | 4 | 5 | class Detection(object): 6 | """ 7 | This class represents a bounding box detection in a single image. 8 | 9 | Parameters 10 | ---------- 11 | tlwh : array_like 12 | Bounding box in format `(x, y, w, h)`. 13 | confidence : float 14 | Detector confidence score. 15 | feature : array_like 16 | A feature vector that describes the object contained in this image. 17 | 18 | Attributes 19 | ---------- 20 | tlwh : ndarray 21 | Bounding box in format `(top left x, top left y, width, height)`. 22 | confidence : ndarray 23 | Detector confidence score. 24 | feature : ndarray | NoneType 25 | A feature vector that describes the object contained in this image. 26 | 27 | """ 28 | 29 | def __init__(self, tlwh, confidence, feature, cls_id): 30 | self.tlwh = np.asarray(tlwh, dtype=np.float) 31 | self.confidence = float(confidence) 32 | self.feature = np.asarray(feature, dtype=np.float32) 33 | self.clsid = int(cls_id) 34 | 35 | def to_tlbr(self): 36 | """Convert bounding box to format `(min x, min y, max x, max y)`, i.e., 37 | `(top left, bottom right)`. 38 | """ 39 | ret = self.tlwh.copy() 40 | ret[2:] += ret[:2] 41 | return ret 42 | 43 | def to_xyah(self): 44 | """Convert bounding box to format `(center x, center y, aspect ratio, 45 | height)`, where the aspect ratio is `width / height`. 46 | """ 47 | ret = self.tlwh.copy() 48 | ret[:2] += ret[2:] / 2 49 | ret[2] /= ret[3] 50 | return ret 51 | -------------------------------------------------------------------------------- /deep_sort/sort/iou_matching.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | from __future__ import absolute_import 3 | import numpy as np 4 | from . import linear_assignment 5 | 6 | 7 | def iou(bbox, candidates): 8 | """Computer intersection over union. 9 | 10 | Parameters 11 | ---------- 12 | bbox : ndarray 13 | A bounding box in format `(top left x, top left y, width, height)`. 14 | candidates : ndarray 15 | A matrix of candidate bounding boxes (one per row) in the same format 16 | as `bbox`. 17 | 18 | Returns 19 | ------- 20 | ndarray 21 | The intersection over union in [0, 1] between the `bbox` and each 22 | candidate. A higher score means a larger fraction of the `bbox` is 23 | occluded by the candidate. 24 | 25 | """ 26 | bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:] 27 | candidates_tl = candidates[:, :2] 28 | candidates_br = candidates[:, :2] + candidates[:, 2:] 29 | 30 | tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis], 31 | np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]] 32 | br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis], 33 | np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]] 34 | wh = np.maximum(0., br - tl) 35 | 36 | area_intersection = wh.prod(axis=1) 37 | area_bbox = bbox[2:].prod() 38 | area_candidates = candidates[:, 2:].prod(axis=1) 39 | return area_intersection / (area_bbox + area_candidates - area_intersection) 40 | 41 | 42 | def iou_cost(tracks, detections, track_indices=None, 43 | detection_indices=None): 44 | """An intersection over union distance metric. 45 | 46 | Parameters 47 | ---------- 48 | tracks : List[deep_sort.track.Track] 49 | A list of tracks. 50 | detections : List[deep_sort.detection.Detection] 51 | A list of detections. 52 | track_indices : Optional[List[int]] 53 | A list of indices to tracks that should be matched. Defaults to 54 | all `tracks`. 55 | detection_indices : Optional[List[int]] 56 | A list of indices to detections that should be matched. Defaults 57 | to all `detections`. 58 | 59 | Returns 60 | ------- 61 | ndarray 62 | Returns a cost matrix of shape 63 | len(track_indices), len(detection_indices) where entry (i, j) is 64 | `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`. 65 | 66 | """ 67 | if track_indices is None: 68 | track_indices = np.arange(len(tracks)) 69 | if detection_indices is None: 70 | detection_indices = np.arange(len(detections)) 71 | 72 | cost_matrix = np.zeros((len(track_indices), len(detection_indices))) 73 | for row, track_idx in enumerate(track_indices): 74 | if tracks[track_idx].time_since_update > 1: 75 | cost_matrix[row, :] = linear_assignment.INFTY_COST 76 | continue 77 | 78 | bbox = tracks[track_idx].to_tlwh() 79 | candidates = np.asarray([detections[i].tlwh for i in detection_indices]) 80 | cost_matrix[row, :] = 1. - iou(bbox, candidates) 81 | return cost_matrix 82 | -------------------------------------------------------------------------------- /deep_sort/sort/kalman_filter.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import numpy as np 3 | import scipy.linalg 4 | 5 | 6 | """ 7 | Table for the 0.95 quantile of the chi-square distribution with N degrees of 8 | freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv 9 | function and used as Mahalanobis gating threshold. 10 | """ 11 | chi2inv95 = { 12 | 1: 3.8415, 13 | 2: 5.9915, 14 | 3: 7.8147, 15 | 4: 9.4877, 16 | 5: 11.070, 17 | 6: 12.592, 18 | 7: 14.067, 19 | 8: 15.507, 20 | 9: 16.919} 21 | 22 | 23 | class KalmanFilter(object): 24 | """ 25 | A simple Kalman filter for tracking bounding boxes in image space. 26 | 27 | The 8-dimensional state space 28 | 29 | x, y, a, h, vx, vy, va, vh 30 | 31 | contains the bounding box center position (x, y), aspect ratio a, height h, 32 | and their respective velocities. 33 | 34 | Object motion follows a constant velocity model. The bounding box location 35 | (x, y, a, h) is taken as direct observation of the state space (linear 36 | observation model). 37 | 38 | """ 39 | 40 | def __init__(self): 41 | ndim, dt = 4, 1. 42 | 43 | # Create Kalman filter model matrices. 44 | self._motion_mat = np.eye(2 * ndim, 2 * ndim) 45 | for i in range(ndim): 46 | self._motion_mat[i, ndim + i] = dt 47 | self._update_mat = np.eye(ndim, 2 * ndim) 48 | 49 | # Motion and observation uncertainty are chosen relative to the current 50 | # state estimate. These weights control the amount of uncertainty in 51 | # the model. This is a bit hacky. 52 | self._std_weight_position = 1. / 20 53 | self._std_weight_velocity = 1. / 160 54 | 55 | def initiate(self, measurement): 56 | """Create track from unassociated measurement. 57 | 58 | Parameters 59 | ---------- 60 | measurement : ndarray 61 | Bounding box coordinates (x, y, a, h) with center position (x, y), 62 | aspect ratio a, and height h. 63 | 64 | Returns 65 | ------- 66 | (ndarray, ndarray) 67 | Returns the mean vector (8 dimensional) and covariance matrix (8x8 68 | dimensional) of the new track. Unobserved velocities are initialized 69 | to 0 mean. 70 | 71 | """ 72 | mean_pos = measurement 73 | mean_vel = np.zeros_like(mean_pos) 74 | mean = np.r_[mean_pos, mean_vel] 75 | 76 | std = [ 77 | 2 * self._std_weight_position * measurement[3], 78 | 2 * self._std_weight_position * measurement[3], 79 | 1e-2, 80 | 2 * self._std_weight_position * measurement[3], 81 | 10 * self._std_weight_velocity * measurement[3], 82 | 10 * self._std_weight_velocity * measurement[3], 83 | 1e-5, 84 | 10 * self._std_weight_velocity * measurement[3]] 85 | covariance = np.diag(np.square(std)) 86 | return mean, covariance 87 | 88 | def predict(self, mean, covariance): 89 | """Run Kalman filter prediction step. 90 | 91 | Parameters 92 | ---------- 93 | mean : ndarray 94 | The 8 dimensional mean vector of the object state at the previous 95 | time step. 96 | covariance : ndarray 97 | The 8x8 dimensional covariance matrix of the object state at the 98 | previous time step. 99 | 100 | Returns 101 | ------- 102 | (ndarray, ndarray) 103 | Returns the mean vector and covariance matrix of the predicted 104 | state. Unobserved velocities are initialized to 0 mean. 105 | 106 | """ 107 | std_pos = [ 108 | self._std_weight_position * mean[3], 109 | self._std_weight_position * mean[3], 110 | 1e-2, 111 | self._std_weight_position * mean[3]] 112 | std_vel = [ 113 | self._std_weight_velocity * mean[3], 114 | self._std_weight_velocity * mean[3], 115 | 1e-5, 116 | self._std_weight_velocity * mean[3]] 117 | motion_cov = np.diag(np.square(np.r_[std_pos, std_vel])) 118 | 119 | mean = np.dot(self._motion_mat, mean) 120 | covariance = np.linalg.multi_dot(( 121 | self._motion_mat, covariance, self._motion_mat.T)) + motion_cov 122 | 123 | return mean, covariance 124 | 125 | def project(self, mean, covariance): 126 | """Project state distribution to measurement space. 127 | 128 | Parameters 129 | ---------- 130 | mean : ndarray 131 | The state's mean vector (8 dimensional array). 132 | covariance : ndarray 133 | The state's covariance matrix (8x8 dimensional). 134 | 135 | Returns 136 | ------- 137 | (ndarray, ndarray) 138 | Returns the projected mean and covariance matrix of the given state 139 | estimate. 140 | 141 | """ 142 | std = [ 143 | self._std_weight_position * mean[3], 144 | self._std_weight_position * mean[3], 145 | 1e-1, 146 | self._std_weight_position * mean[3]] 147 | innovation_cov = np.diag(np.square(std)) 148 | 149 | mean = np.dot(self._update_mat, mean) 150 | covariance = np.linalg.multi_dot(( 151 | self._update_mat, covariance, self._update_mat.T)) 152 | return mean, covariance + innovation_cov 153 | 154 | def update(self, mean, covariance, measurement): 155 | """Run Kalman filter correction step. 156 | 157 | Parameters 158 | ---------- 159 | mean : ndarray 160 | The predicted state's mean vector (8 dimensional). 161 | covariance : ndarray 162 | The state's covariance matrix (8x8 dimensional). 163 | measurement : ndarray 164 | The 4 dimensional measurement vector (x, y, a, h), where (x, y) 165 | is the center position, a the aspect ratio, and h the height of the 166 | bounding box. 167 | 168 | Returns 169 | ------- 170 | (ndarray, ndarray) 171 | Returns the measurement-corrected state distribution. 172 | 173 | """ 174 | projected_mean, projected_cov = self.project(mean, covariance) 175 | 176 | chol_factor, lower = scipy.linalg.cho_factor( 177 | projected_cov, lower=True, check_finite=False) 178 | kalman_gain = scipy.linalg.cho_solve( 179 | (chol_factor, lower), np.dot(covariance, self._update_mat.T).T, 180 | check_finite=False).T 181 | innovation = measurement - projected_mean 182 | 183 | new_mean = mean + np.dot(innovation, kalman_gain.T) 184 | new_covariance = covariance - np.linalg.multi_dot(( 185 | kalman_gain, projected_cov, kalman_gain.T)) 186 | return new_mean, new_covariance 187 | 188 | def gating_distance(self, mean, covariance, measurements, 189 | only_position=False): 190 | """Compute gating distance between state distribution and measurements. 191 | 192 | A suitable distance threshold can be obtained from `chi2inv95`. If 193 | `only_position` is False, the chi-square distribution has 4 degrees of 194 | freedom, otherwise 2. 195 | 196 | Parameters 197 | ---------- 198 | mean : ndarray 199 | Mean vector over the state distribution (8 dimensional). 200 | covariance : ndarray 201 | Covariance of the state distribution (8x8 dimensional). 202 | measurements : ndarray 203 | An Nx4 dimensional matrix of N measurements, each in 204 | format (x, y, a, h) where (x, y) is the bounding box center 205 | position, a the aspect ratio, and h the height. 206 | only_position : Optional[bool] 207 | If True, distance computation is done with respect to the bounding 208 | box center position only. 209 | 210 | Returns 211 | ------- 212 | ndarray 213 | Returns an array of length N, where the i-th element contains the 214 | squared Mahalanobis distance between (mean, covariance) and 215 | `measurements[i]`. 216 | 217 | """ 218 | mean, covariance = self.project(mean, covariance) 219 | if only_position: 220 | mean, covariance = mean[:2], covariance[:2, :2] 221 | measurements = measurements[:, :2] 222 | 223 | cholesky_factor = np.linalg.cholesky(covariance) 224 | d = measurements - mean 225 | z = scipy.linalg.solve_triangular( 226 | cholesky_factor, d.T, lower=True, check_finite=False, 227 | overwrite_b=True) 228 | squared_maha = np.sum(z * z, axis=0) 229 | return squared_maha 230 | -------------------------------------------------------------------------------- /deep_sort/sort/linear_assignment.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | from __future__ import absolute_import 3 | import numpy as np 4 | # from sklearn.utils.linear_assignment_ import linear_assignment 5 | from scipy.optimize import linear_sum_assignment as linear_assignment 6 | from . import kalman_filter 7 | 8 | 9 | INFTY_COST = 1e+5 10 | 11 | 12 | def min_cost_matching( 13 | distance_metric, max_distance, tracks, detections, track_indices=None, 14 | detection_indices=None): 15 | """Solve linear assignment problem. 16 | 17 | Parameters 18 | ---------- 19 | distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray 20 | The distance metric is given a list of tracks and detections as well as 21 | a list of N track indices and M detection indices. The metric should 22 | return the NxM dimensional cost matrix, where element (i, j) is the 23 | association cost between the i-th track in the given track indices and 24 | the j-th detection in the given detection_indices. 25 | max_distance : float 26 | Gating threshold. Associations with cost larger than this value are 27 | disregarded. 28 | tracks : List[track.Track] 29 | A list of predicted tracks at the current time step. 30 | detections : List[detection.Detection] 31 | A list of detections at the current time step. 32 | track_indices : List[int] 33 | List of track indices that maps rows in `cost_matrix` to tracks in 34 | `tracks` (see description above). 35 | detection_indices : List[int] 36 | List of detection indices that maps columns in `cost_matrix` to 37 | detections in `detections` (see description above). 38 | 39 | Returns 40 | ------- 41 | (List[(int, int)], List[int], List[int]) 42 | Returns a tuple with the following three entries: 43 | * A list of matched track and detection indices. 44 | * A list of unmatched track indices. 45 | * A list of unmatched detection indices. 46 | 47 | """ 48 | if track_indices is None: 49 | track_indices = np.arange(len(tracks)) 50 | if detection_indices is None: 51 | detection_indices = np.arange(len(detections)) 52 | 53 | if len(detection_indices) == 0 or len(track_indices) == 0: 54 | return [], track_indices, detection_indices # Nothing to match. 55 | 56 | cost_matrix = distance_metric( 57 | tracks, detections, track_indices, detection_indices) 58 | cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5 59 | 60 | row_indices, col_indices = linear_assignment(cost_matrix) 61 | 62 | matches, unmatched_tracks, unmatched_detections = [], [], [] 63 | for col, detection_idx in enumerate(detection_indices): 64 | if col not in col_indices: 65 | unmatched_detections.append(detection_idx) 66 | for row, track_idx in enumerate(track_indices): 67 | if row not in row_indices: 68 | unmatched_tracks.append(track_idx) 69 | for row, col in zip(row_indices, col_indices): 70 | track_idx = track_indices[row] 71 | detection_idx = detection_indices[col] 72 | if cost_matrix[row, col] > max_distance: 73 | unmatched_tracks.append(track_idx) 74 | unmatched_detections.append(detection_idx) 75 | else: 76 | matches.append((track_idx, detection_idx)) 77 | return matches, unmatched_tracks, unmatched_detections 78 | 79 | 80 | def matching_cascade( 81 | distance_metric, max_distance, cascade_depth, tracks, detections, 82 | track_indices=None, detection_indices=None): 83 | """Run matching cascade. 84 | 85 | Parameters 86 | ---------- 87 | distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray 88 | The distance metric is given a list of tracks and detections as well as 89 | a list of N track indices and M detection indices. The metric should 90 | return the NxM dimensional cost matrix, where element (i, j) is the 91 | association cost between the i-th track in the given track indices and 92 | the j-th detection in the given detection indices. 93 | max_distance : float 94 | Gating threshold. Associations with cost larger than this value are 95 | disregarded. 96 | cascade_depth: int 97 | The cascade depth, should be se to the maximum track age. 98 | tracks : List[track.Track] 99 | A list of predicted tracks at the current time step. 100 | detections : List[detection.Detection] 101 | A list of detections at the current time step. 102 | track_indices : Optional[List[int]] 103 | List of track indices that maps rows in `cost_matrix` to tracks in 104 | `tracks` (see description above). Defaults to all tracks. 105 | detection_indices : Optional[List[int]] 106 | List of detection indices that maps columns in `cost_matrix` to 107 | detections in `detections` (see description above). Defaults to all 108 | detections. 109 | 110 | Returns 111 | ------- 112 | (List[(int, int)], List[int], List[int]) 113 | Returns a tuple with the following three entries: 114 | * A list of matched track and detection indices. 115 | * A list of unmatched track indices. 116 | * A list of unmatched detection indices. 117 | 118 | """ 119 | if track_indices is None: 120 | track_indices = list(range(len(tracks))) 121 | if detection_indices is None: 122 | detection_indices = list(range(len(detections))) 123 | 124 | unmatched_detections = detection_indices 125 | matches = [] 126 | for level in range(cascade_depth): 127 | if len(unmatched_detections) == 0: # No detections left 128 | break 129 | 130 | track_indices_l = [ 131 | k for k in track_indices 132 | if tracks[k].time_since_update == 1 + level 133 | ] 134 | if len(track_indices_l) == 0: # Nothing to match at this level 135 | continue 136 | 137 | matches_l, _, unmatched_detections = \ 138 | min_cost_matching( 139 | distance_metric, max_distance, tracks, detections, 140 | track_indices_l, unmatched_detections) 141 | matches += matches_l 142 | unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches)) 143 | return matches, unmatched_tracks, unmatched_detections 144 | 145 | 146 | def gate_cost_matrix( 147 | kf, cost_matrix, tracks, detections, track_indices, detection_indices, 148 | gated_cost=INFTY_COST, only_position=False): 149 | """Invalidate infeasible entries in cost matrix based on the state 150 | distributions obtained by Kalman filtering. 151 | 152 | Parameters 153 | ---------- 154 | kf : The Kalman filter. 155 | cost_matrix : ndarray 156 | The NxM dimensional cost matrix, where N is the number of track indices 157 | and M is the number of detection indices, such that entry (i, j) is the 158 | association cost between `tracks[track_indices[i]]` and 159 | `detections[detection_indices[j]]`. 160 | tracks : List[track.Track] 161 | A list of predicted tracks at the current time step. 162 | detections : List[detection.Detection] 163 | A list of detections at the current time step. 164 | track_indices : List[int] 165 | List of track indices that maps rows in `cost_matrix` to tracks in 166 | `tracks` (see description above). 167 | detection_indices : List[int] 168 | List of detection indices that maps columns in `cost_matrix` to 169 | detections in `detections` (see description above). 170 | gated_cost : Optional[float] 171 | Entries in the cost matrix corresponding to infeasible associations are 172 | set this value. Defaults to a very large value. 173 | only_position : Optional[bool] 174 | If True, only the x, y position of the state distribution is considered 175 | during gating. Defaults to False. 176 | 177 | Returns 178 | ------- 179 | ndarray 180 | Returns the modified cost matrix. 181 | 182 | """ 183 | gating_dim = 2 if only_position else 4 184 | gating_threshold = kalman_filter.chi2inv95[gating_dim] 185 | measurements = np.asarray( 186 | [detections[i].to_xyah() for i in detection_indices]) 187 | for row, track_idx in enumerate(track_indices): 188 | track = tracks[track_idx] 189 | gating_distance = kf.gating_distance( 190 | track.mean, track.covariance, measurements, only_position) 191 | cost_matrix[row, gating_distance > gating_threshold] = gated_cost 192 | return cost_matrix 193 | -------------------------------------------------------------------------------- /deep_sort/sort/nn_matching.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import numpy as np 3 | 4 | 5 | def _pdist(a, b): 6 | """Compute pair-wise squared distance between points in `a` and `b`. 7 | 8 | Parameters 9 | ---------- 10 | a : array_like 11 | An NxM matrix of N samples of dimensionality M. 12 | b : array_like 13 | An LxM matrix of L samples of dimensionality M. 14 | 15 | Returns 16 | ------- 17 | ndarray 18 | Returns a matrix of size len(a), len(b) such that eleement (i, j) 19 | contains the squared distance between `a[i]` and `b[j]`. 20 | 21 | """ 22 | a, b = np.asarray(a), np.asarray(b) 23 | if len(a) == 0 or len(b) == 0: 24 | return np.zeros((len(a), len(b))) 25 | a2, b2 = np.square(a).sum(axis=1), np.square(b).sum(axis=1) 26 | r2 = -2. * np.dot(a, b.T) + a2[:, None] + b2[None, :] 27 | r2 = np.clip(r2, 0., float(np.inf)) 28 | return r2 29 | 30 | 31 | def _cosine_distance(a, b, data_is_normalized=False): 32 | """Compute pair-wise cosine distance between points in `a` and `b`. 33 | 34 | Parameters 35 | ---------- 36 | a : array_like 37 | An NxM matrix of N samples of dimensionality M. 38 | b : array_like 39 | An LxM matrix of L samples of dimensionality M. 40 | data_is_normalized : Optional[bool] 41 | If True, assumes rows in a and b are unit length vectors. 42 | Otherwise, a and b are explicitly normalized to lenght 1. 43 | 44 | Returns 45 | ------- 46 | ndarray 47 | Returns a matrix of size len(a), len(b) such that eleement (i, j) 48 | contains the squared distance between `a[i]` and `b[j]`. 49 | 50 | """ 51 | if not data_is_normalized: 52 | a = np.asarray(a) / np.linalg.norm(a, axis=1, keepdims=True) 53 | b = np.asarray(b) / np.linalg.norm(b, axis=1, keepdims=True) 54 | return 1. - np.dot(a, b.T) 55 | 56 | 57 | def _nn_euclidean_distance(x, y): 58 | """ Helper function for nearest neighbor distance metric (Euclidean). 59 | 60 | Parameters 61 | ---------- 62 | x : ndarray 63 | A matrix of N row-vectors (sample points). 64 | y : ndarray 65 | A matrix of M row-vectors (query points). 66 | 67 | Returns 68 | ------- 69 | ndarray 70 | A vector of length M that contains for each entry in `y` the 71 | smallest Euclidean distance to a sample in `x`. 72 | 73 | """ 74 | distances = _pdist(x, y) 75 | return np.maximum(0.0, distances.min(axis=0)) 76 | 77 | 78 | def _nn_cosine_distance(x, y): 79 | """ Helper function for nearest neighbor distance metric (cosine). 80 | 81 | Parameters 82 | ---------- 83 | x : ndarray 84 | A matrix of N row-vectors (sample points). 85 | y : ndarray 86 | A matrix of M row-vectors (query points). 87 | 88 | Returns 89 | ------- 90 | ndarray 91 | A vector of length M that contains for each entry in `y` the 92 | smallest cosine distance to a sample in `x`. 93 | 94 | """ 95 | distances = _cosine_distance(x, y) 96 | return distances.min(axis=0) 97 | 98 | 99 | class NearestNeighborDistanceMetric(object): 100 | """ 101 | A nearest neighbor distance metric that, for each target, returns 102 | the closest distance to any sample that has been observed so far. 103 | 104 | Parameters 105 | ---------- 106 | metric : str 107 | Either "euclidean" or "cosine". 108 | matching_threshold: float 109 | The matching threshold. Samples with larger distance are considered an 110 | invalid match. 111 | budget : Optional[int] 112 | If not None, fix samples per class to at most this number. Removes 113 | the oldest samples when the budget is reached. 114 | 115 | Attributes 116 | ---------- 117 | samples : Dict[int -> List[ndarray]] 118 | A dictionary that maps from target identities to the list of samples 119 | that have been observed so far. 120 | 121 | """ 122 | 123 | def __init__(self, metric, matching_threshold, budget=None): 124 | 125 | 126 | if metric == "euclidean": 127 | self._metric = _nn_euclidean_distance 128 | elif metric == "cosine": 129 | self._metric = _nn_cosine_distance 130 | else: 131 | raise ValueError( 132 | "Invalid metric; must be either 'euclidean' or 'cosine'") 133 | self.matching_threshold = matching_threshold 134 | self.budget = budget 135 | self.samples = {} 136 | 137 | def partial_fit(self, features, targets, active_targets): 138 | """Update the distance metric with new data. 139 | 140 | Parameters 141 | ---------- 142 | features : ndarray 143 | An NxM matrix of N features of dimensionality M. 144 | targets : ndarray 145 | An integer array of associated target identities. 146 | active_targets : List[int] 147 | A list of targets that are currently present in the scene. 148 | 149 | """ 150 | for feature, target in zip(features, targets): 151 | self.samples.setdefault(target, []).append(feature) 152 | if self.budget is not None: 153 | self.samples[target] = self.samples[target][-self.budget:] 154 | self.samples = {k: self.samples[k] for k in active_targets} 155 | 156 | def distance(self, features, targets): 157 | """Compute distance between features and targets. 158 | 159 | Parameters 160 | ---------- 161 | features : ndarray 162 | An NxM matrix of N features of dimensionality M. 163 | targets : List[int] 164 | A list of targets to match the given `features` against. 165 | 166 | Returns 167 | ------- 168 | ndarray 169 | Returns a cost matrix of shape len(targets), len(features), where 170 | element (i, j) contains the closest squared distance between 171 | `targets[i]` and `features[j]`. 172 | 173 | """ 174 | cost_matrix = np.zeros((len(targets), len(features))) 175 | for i, target in enumerate(targets): 176 | cost_matrix[i, :] = self._metric(self.samples[target], features) 177 | return cost_matrix 178 | -------------------------------------------------------------------------------- /deep_sort/sort/preprocessing.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import numpy as np 3 | import cv2 4 | 5 | 6 | def non_max_suppression(boxes, max_bbox_overlap, scores=None): 7 | """Suppress overlapping detections. 8 | 9 | Original code from [1]_ has been adapted to include confidence score. 10 | 11 | .. [1] http://www.pyimagesearch.com/2015/02/16/ 12 | faster-non-maximum-suppression-python/ 13 | 14 | Examples 15 | -------- 16 | 17 | >>> boxes = [d.roi for d in detections] 18 | >>> scores = [d.confidence for d in detections] 19 | >>> indices = non_max_suppression(boxes, max_bbox_overlap, scores) 20 | >>> detections = [detections[i] for i in indices] 21 | 22 | Parameters 23 | ---------- 24 | boxes : ndarray 25 | Array of ROIs (x, y, width, height). 26 | max_bbox_overlap : float 27 | ROIs that overlap more than this values are suppressed. 28 | scores : Optional[array_like] 29 | Detector confidence score. 30 | 31 | Returns 32 | ------- 33 | List[int] 34 | Returns indices of detections that have survived non-maxima suppression. 35 | 36 | """ 37 | if len(boxes) == 0: 38 | return [] 39 | 40 | boxes = boxes.astype(np.float) 41 | pick = [] 42 | 43 | x1 = boxes[:, 0] 44 | y1 = boxes[:, 1] 45 | x2 = boxes[:, 2] + boxes[:, 0] 46 | y2 = boxes[:, 3] + boxes[:, 1] 47 | 48 | area = (x2 - x1 + 1) * (y2 - y1 + 1) 49 | if scores is not None: 50 | idxs = np.argsort(scores) 51 | else: 52 | idxs = np.argsort(y2) 53 | 54 | while len(idxs) > 0: 55 | last = len(idxs) - 1 56 | i = idxs[last] 57 | pick.append(i) 58 | 59 | xx1 = np.maximum(x1[i], x1[idxs[:last]]) 60 | yy1 = np.maximum(y1[i], y1[idxs[:last]]) 61 | xx2 = np.minimum(x2[i], x2[idxs[:last]]) 62 | yy2 = np.minimum(y2[i], y2[idxs[:last]]) 63 | 64 | w = np.maximum(0, xx2 - xx1 + 1) 65 | h = np.maximum(0, yy2 - yy1 + 1) 66 | 67 | overlap = (w * h) / area[idxs[:last]] 68 | 69 | idxs = np.delete( 70 | idxs, np.concatenate( 71 | ([last], np.where(overlap > max_bbox_overlap)[0]))) 72 | 73 | return pick 74 | -------------------------------------------------------------------------------- /deep_sort/sort/track.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | 3 | 4 | class TrackState: 5 | """ 6 | Enumeration type for the single target track state. Newly created tracks are 7 | classified as `tentative` until enough evidence has been collected. Then, 8 | the track state is changed to `confirmed`. Tracks that are no longer alive 9 | are classified as `deleted` to mark them for removal from the set of active 10 | tracks. 11 | 12 | """ 13 | 14 | Tentative = 1 15 | Confirmed = 2 16 | Deleted = 3 17 | 18 | 19 | class Track: 20 | """ 21 | A single target track with state space `(x, y, a, h)` and associated 22 | velocities, where `(x, y)` is the center of the bounding box, `a` is the 23 | aspect ratio and `h` is the height. 24 | 25 | Parameters 26 | ---------- 27 | mean : ndarray 28 | Mean vector of the initial state distribution. 29 | covariance : ndarray 30 | Covariance matrix of the initial state distribution. 31 | track_id : int 32 | A unique track identifier. 33 | n_init : int 34 | Number of consecutive detections before the track is confirmed. The 35 | track state is set to `Deleted` if a miss occurs within the first 36 | `n_init` frames. 37 | max_age : int 38 | The maximum number of consecutive misses before the track state is 39 | set to `Deleted`. 40 | feature : Optional[ndarray] 41 | Feature vector of the detection this track originates from. If not None, 42 | this feature is added to the `features` cache. 43 | 44 | Attributes 45 | ---------- 46 | mean : ndarray 47 | Mean vector of the initial state distribution. 48 | covariance : ndarray 49 | Covariance matrix of the initial state distribution. 50 | track_id : int 51 | A unique track identifier. 52 | hits : int 53 | Total number of measurement updates. 54 | age : int 55 | Total number of frames since first occurance. 56 | time_since_update : int 57 | Total number of frames since last measurement update. 58 | state : TrackState 59 | The current track state. 60 | features : List[ndarray] 61 | A cache of features. On each measurement update, the associated feature 62 | vector is added to this list. 63 | 64 | """ 65 | 66 | def __init__(self, mean, covariance, track_id, n_init, max_age, cls_id, 67 | feature=None): 68 | self.mean = mean 69 | self.covariance = covariance 70 | self.track_id = track_id 71 | self.hits = 1 72 | self.age = 1 73 | self.time_since_update = 0 74 | self.cls_id = cls_id 75 | 76 | self.state = TrackState.Tentative 77 | self.features = [] 78 | if feature is not None: 79 | self.features.append(feature) 80 | 81 | self._n_init = n_init 82 | self._max_age = max_age 83 | 84 | def to_tlwh(self): 85 | """Get current position in bounding box format `(top left x, top left y, 86 | width, height)`. 87 | 88 | Returns 89 | ------- 90 | ndarray 91 | The bounding box. 92 | 93 | """ 94 | ret = self.mean[:4].copy() 95 | ret[2] *= ret[3] 96 | ret[:2] -= ret[2:] / 2 97 | return ret 98 | 99 | def to_tlbr(self): 100 | """Get current position in bounding box format `(min x, miny, max x, 101 | max y)`. 102 | 103 | Returns 104 | ------- 105 | ndarray 106 | The bounding box. 107 | 108 | """ 109 | ret = self.to_tlwh() 110 | ret[2:] = ret[:2] + ret[2:] 111 | return ret 112 | 113 | def predict(self, kf): 114 | """Propagate the state distribution to the current time step using a 115 | Kalman filter prediction step. 116 | 117 | Parameters 118 | ---------- 119 | kf : kalman_filter.KalmanFilter 120 | The Kalman filter. 121 | 122 | """ 123 | self.mean, self.covariance = kf.predict(self.mean, self.covariance) 124 | self.age += 1 125 | self.time_since_update += 1 126 | 127 | def update(self, kf, detections, detection_idx): 128 | """Perform Kalman filter measurement update step and update the feature 129 | cache. 130 | 131 | Parameters 132 | ---------- 133 | kf : kalman_filter.KalmanFilter 134 | The Kalman filter. 135 | detection : Detection 136 | The associated detection. 137 | 138 | """ 139 | 140 | self.mean, self.covariance = kf.update( 141 | self.mean, self.covariance, detections[detection_idx].to_xyah()) 142 | self.features.append(detections[detection_idx].feature) 143 | 144 | self.hits += 1 145 | self.time_since_update = 0 146 | if self.state == TrackState.Tentative and self.hits >= self._n_init: 147 | self.state = TrackState.Confirmed 148 | # self.cls_id = detection_idx 149 | 150 | def mark_missed(self): 151 | """Mark this track as missed (no association at the current time step). 152 | """ 153 | if self.state == TrackState.Tentative: 154 | self.state = TrackState.Deleted 155 | elif self.time_since_update > self._max_age: 156 | self.state = TrackState.Deleted 157 | 158 | def is_tentative(self): 159 | """Returns True if this track is tentative (unconfirmed). 160 | """ 161 | return self.state == TrackState.Tentative 162 | 163 | def is_confirmed(self): 164 | """Returns True if this track is confirmed.""" 165 | return self.state == TrackState.Confirmed 166 | 167 | def is_deleted(self): 168 | """Returns True if this track is dead and should be deleted.""" 169 | return self.state == TrackState.Deleted 170 | -------------------------------------------------------------------------------- /deep_sort/sort/tracker.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | from __future__ import absolute_import 3 | import numpy as np 4 | from . import kalman_filter 5 | from . import linear_assignment 6 | from . import iou_matching 7 | from .track import Track 8 | 9 | 10 | class Tracker: 11 | """ 12 | This is the multi-target tracker. 13 | 14 | Parameters 15 | ---------- 16 | metric : nn_matching.NearestNeighborDistanceMetric 17 | A distance metric for measurement-to-track association. 18 | max_age : int 19 | Maximum number of missed misses before a track is deleted. 20 | n_init : int 21 | Number of consecutive detections before the track is confirmed. The 22 | track state is set to `Deleted` if a miss occurs within the first 23 | `n_init` frames. 24 | 25 | Attributes 26 | ---------- 27 | metric : nn_matching.NearestNeighborDistanceMetric 28 | The distance metric used for measurement to track association. 29 | max_age : int 30 | Maximum number of missed misses before a track is deleted. 31 | n_init : int 32 | Number of frames that a track remains in initialization phase. 33 | kf : kalman_filter.KalmanFilter 34 | A Kalman filter to filter target trajectories in image space. 35 | tracks : List[Track] 36 | The list of active tracks at the current time step. 37 | 38 | """ 39 | 40 | def __init__(self, metric, max_iou_distance=0.7, max_age=70, n_init=3): 41 | self.metric = metric 42 | self.max_iou_distance = max_iou_distance 43 | self.max_age = max_age 44 | self.n_init = n_init 45 | 46 | self.kf = kalman_filter.KalmanFilter() 47 | self.tracks = [] 48 | self._next_id = 1 49 | 50 | def predict(self): 51 | """Propagate track state distributions one time step forward. 52 | 53 | This function should be called once every time step, before `update`. 54 | """ 55 | for track in self.tracks: 56 | track.predict(self.kf) 57 | 58 | def update(self, detections): 59 | """Perform measurement update and track management. 60 | 61 | Parameters 62 | ---------- 63 | detections : List[deep_sort.detection.Detection] 64 | A list of detections at the current time step. 65 | 66 | """ 67 | # Run matching cascade. 68 | matches, unmatched_tracks, unmatched_detections = \ 69 | self._match(detections) 70 | 71 | # Update track set. 72 | for track_idx, detection_idx in matches: 73 | self.tracks[track_idx].update( 74 | self.kf, detections, detection_idx) 75 | # self.tracks[track_idx].cls_id = detection_idx 76 | 77 | for track_idx in unmatched_tracks: 78 | self.tracks[track_idx].mark_missed() 79 | for detection_idx in unmatched_detections: 80 | self._initiate_track(detections, detection_idx) 81 | self.tracks = [t for t in self.tracks if not t.is_deleted()] 82 | 83 | # Update distance metric. 84 | active_targets = [t.track_id for t in self.tracks if t.is_confirmed()] 85 | features, targets = [], [] 86 | for track in self.tracks: 87 | if not track.is_confirmed(): 88 | continue 89 | features += track.features 90 | targets += [track.track_id for _ in track.features] 91 | track.features = [] 92 | self.metric.partial_fit( 93 | np.asarray(features), np.asarray(targets), active_targets) 94 | 95 | def _match(self, detections): 96 | 97 | def gated_metric(tracks, dets, track_indices, detection_indices): 98 | features = np.array([dets[i].feature for i in detection_indices]) 99 | targets = np.array([tracks[i].track_id for i in track_indices]) 100 | cost_matrix = self.metric.distance(features, targets) 101 | cost_matrix = linear_assignment.gate_cost_matrix( 102 | self.kf, cost_matrix, tracks, dets, track_indices, 103 | detection_indices) 104 | 105 | return cost_matrix 106 | 107 | # Split track set into confirmed and unconfirmed tracks. 108 | confirmed_tracks = [ 109 | i for i, t in enumerate(self.tracks) if t.is_confirmed()] 110 | unconfirmed_tracks = [ 111 | i for i, t in enumerate(self.tracks) if not t.is_confirmed()] 112 | 113 | # Associate confirmed tracks using appearance features. 114 | matches_a, unmatched_tracks_a, unmatched_detections = \ 115 | linear_assignment.matching_cascade( 116 | gated_metric, self.metric.matching_threshold, self.max_age, 117 | self.tracks, detections, confirmed_tracks) 118 | 119 | # Associate remaining tracks together with unconfirmed tracks using IOU. 120 | iou_track_candidates = unconfirmed_tracks + [ 121 | k for k in unmatched_tracks_a if 122 | self.tracks[k].time_since_update == 1] 123 | unmatched_tracks_a = [ 124 | k for k in unmatched_tracks_a if 125 | self.tracks[k].time_since_update != 1] 126 | matches_b, unmatched_tracks_b, unmatched_detections = \ 127 | linear_assignment.min_cost_matching( 128 | iou_matching.iou_cost, self.max_iou_distance, self.tracks, 129 | detections, iou_track_candidates, unmatched_detections) 130 | 131 | matches = matches_a + matches_b 132 | unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b)) 133 | return matches, unmatched_tracks, unmatched_detections 134 | 135 | def _initiate_track(self, detection, detection_idx): 136 | mean, covariance = self.kf.initiate(detection[detection_idx].to_xyah()) 137 | self.tracks.append(Track( 138 | mean, covariance, self._next_id, self.n_init, self.max_age, detection_idx, 139 | detection[detection_idx].feature)) 140 | self._next_id += 1 141 | -------------------------------------------------------------------------------- /detector/v4darknet.py: -------------------------------------------------------------------------------- 1 | #!python3 2 | """ 3 | Python 3 wrapper for identifying objects in images 4 | 5 | Requires DLL compilation 6 | 7 | Both the GPU and no-GPU version should be compiled; the no-GPU version should be renamed "yolo_cpp_dll_nogpu.dll". 8 | 9 | On a GPU system, you can force CPU evaluation by any of: 10 | 11 | - Set global variable DARKNET_FORCE_CPU to True 12 | - Set environment variable CUDA_VISIBLE_DEVICES to -1 13 | - Set environment variable "FORCE_CPU" to "true" 14 | 15 | 16 | To use, either run performDetect() after import, or modify the end of this file. 17 | 18 | See the docstring of performDetect() for parameters. 19 | 20 | Directly viewing or returning bounding-boxed images requires scikit-image to be installed (`pip install scikit-image`) 21 | 22 | 23 | Original *nix 2.7: https://github.com/pjreddie/darknet/blob/0f110834f4e18b30d5f101bf8f1724c34b7b83db/python/darknet.py 24 | Windows Python 2.7 version: https://github.com/AlexeyAB/darknet/blob/fc496d52bf22a0bb257300d3c79be9cd80e722cb/build/darknet/x64/darknet.py 25 | 26 | @author: Philip Kahn 27 | @date: 20180503 28 | """ 29 | #pylint: disable=R, W0401, W0614, W0703 30 | from ctypes import * 31 | import math 32 | import random 33 | import os 34 | import torch 35 | 36 | def sample(probs): 37 | s = sum(probs) 38 | probs = [a/s for a in probs] 39 | r = random.uniform(0, 1) 40 | for i in range(len(probs)): 41 | r = r - probs[i] 42 | if r <= 0: 43 | return i 44 | return len(probs)-1 45 | 46 | def c_array(ctype, values): 47 | arr = (ctype*len(values))() 48 | arr[:] = values 49 | return arr 50 | 51 | class BOX(Structure): 52 | _fields_ = [("x", c_float), 53 | ("y", c_float), 54 | ("w", c_float), 55 | ("h", c_float)] 56 | 57 | class DETECTION(Structure): 58 | _fields_ = [("bbox", BOX), 59 | ("classes", c_int), 60 | ("prob", POINTER(c_float)), 61 | ("mask", POINTER(c_float)), 62 | ("objectness", c_float), 63 | ("sort_class", c_int), 64 | ("uc", POINTER(c_float)), 65 | ("points", c_int)] 66 | 67 | class DETNUMPAIR(Structure): 68 | _fields_ = [("num", c_int), 69 | ("dets", POINTER(DETECTION))] 70 | 71 | class IMAGE(Structure): 72 | _fields_ = [("w", c_int), 73 | ("h", c_int), 74 | ("c", c_int), 75 | ("data", POINTER(c_float))] 76 | 77 | class METADATA(Structure): 78 | _fields_ = [("classes", c_int), 79 | ("names", POINTER(c_char_p))] 80 | 81 | 82 | 83 | #lib = CDLL("/home/pjreddie/documents/darknet/libdarknet.so", RTLD_GLOBAL) 84 | #lib = CDLL("libdarknet.so", RTLD_GLOBAL) 85 | hasGPU = True 86 | if os.name == "nt": 87 | cwd = os.path.dirname(__file__) 88 | os.environ['PATH'] = cwd + ';' + os.environ['PATH'] 89 | winGPUdll = os.path.join(cwd, "yolo_cpp_dll.dll") 90 | winNoGPUdll = os.path.join(cwd, "yolo_cpp_dll_nogpu.dll") 91 | envKeys = list() 92 | for k, v in os.environ.items(): 93 | envKeys.append(k) 94 | try: 95 | try: 96 | tmp = os.environ["FORCE_CPU"].lower() 97 | if tmp in ["1", "true", "yes", "on"]: 98 | raise ValueError("ForceCPU") 99 | else: 100 | print("Flag value '"+tmp+"' not forcing CPU mode") 101 | except KeyError: 102 | # We never set the flag 103 | if 'CUDA_VISIBLE_DEVICES' in envKeys: 104 | if int(os.environ['CUDA_VISIBLE_DEVICES']) < 0: 105 | raise ValueError("ForceCPU") 106 | try: 107 | global DARKNET_FORCE_CPU 108 | if DARKNET_FORCE_CPU: 109 | raise ValueError("ForceCPU") 110 | except NameError: 111 | pass 112 | # print(os.environ.keys()) 113 | # print("FORCE_CPU flag undefined, proceeding with GPU") 114 | if not os.path.exists(winGPUdll): 115 | raise ValueError("NoDLL") 116 | lib = CDLL(winGPUdll, RTLD_GLOBAL) 117 | except (KeyError, ValueError): 118 | hasGPU = False 119 | if os.path.exists(winNoGPUdll): 120 | lib = CDLL(winNoGPUdll, RTLD_GLOBAL) 121 | print("Notice: CPU-only mode") 122 | else: 123 | # Try the other way, in case no_gpu was 124 | # compile but not renamed 125 | lib = CDLL(winGPUdll, RTLD_GLOBAL) 126 | print("Environment variables indicated a CPU run, but we didn't find `"+winNoGPUdll+"`. Trying a GPU run anyway.") 127 | else: 128 | lib = CDLL("./libdarknet.so", RTLD_GLOBAL) 129 | lib.network_width.argtypes = [c_void_p] 130 | lib.network_width.restype = c_int 131 | lib.network_height.argtypes = [c_void_p] 132 | lib.network_height.restype = c_int 133 | 134 | copy_image_from_bytes = lib.copy_image_from_bytes 135 | copy_image_from_bytes.argtypes = [IMAGE,c_char_p] 136 | 137 | def network_width(net): 138 | return lib.network_width(net) 139 | 140 | def network_height(net): 141 | return lib.network_height(net) 142 | 143 | predict = lib.network_predict_ptr 144 | predict.argtypes = [c_void_p, POINTER(c_float)] 145 | predict.restype = POINTER(c_float) 146 | 147 | if hasGPU: 148 | set_gpu = lib.cuda_set_device 149 | set_gpu.argtypes = [c_int] 150 | 151 | init_cpu = lib.init_cpu 152 | 153 | make_image = lib.make_image 154 | make_image.argtypes = [c_int, c_int, c_int] 155 | make_image.restype = IMAGE 156 | 157 | get_network_boxes = lib.get_network_boxes 158 | get_network_boxes.argtypes = [c_void_p, c_int, c_int, c_float, c_float, POINTER(c_int), c_int, POINTER(c_int), c_int] 159 | get_network_boxes.restype = POINTER(DETECTION) 160 | 161 | make_network_boxes = lib.make_network_boxes 162 | make_network_boxes.argtypes = [c_void_p] 163 | make_network_boxes.restype = POINTER(DETECTION) 164 | 165 | free_detections = lib.free_detections 166 | free_detections.argtypes = [POINTER(DETECTION), c_int] 167 | 168 | free_batch_detections = lib.free_batch_detections 169 | free_batch_detections.argtypes = [POINTER(DETNUMPAIR), c_int] 170 | 171 | free_ptrs = lib.free_ptrs 172 | free_ptrs.argtypes = [POINTER(c_void_p), c_int] 173 | 174 | network_predict = lib.network_predict_ptr 175 | network_predict.argtypes = [c_void_p, POINTER(c_float)] 176 | 177 | reset_rnn = lib.reset_rnn 178 | reset_rnn.argtypes = [c_void_p] 179 | 180 | load_net = lib.load_network 181 | load_net.argtypes = [c_char_p, c_char_p, c_int] 182 | load_net.restype = c_void_p 183 | 184 | load_net_custom = lib.load_network_custom 185 | load_net_custom.argtypes = [c_char_p, c_char_p, c_int, c_int] 186 | load_net_custom.restype = c_void_p 187 | 188 | do_nms_obj = lib.do_nms_obj 189 | do_nms_obj.argtypes = [POINTER(DETECTION), c_int, c_int, c_float] 190 | 191 | do_nms_sort = lib.do_nms_sort 192 | do_nms_sort.argtypes = [POINTER(DETECTION), c_int, c_int, c_float] 193 | 194 | free_image = lib.free_image 195 | free_image.argtypes = [IMAGE] 196 | 197 | letterbox_image = lib.letterbox_image 198 | letterbox_image.argtypes = [IMAGE, c_int, c_int] 199 | letterbox_image.restype = IMAGE 200 | 201 | load_meta = lib.get_metadata 202 | lib.get_metadata.argtypes = [c_char_p] 203 | lib.get_metadata.restype = METADATA 204 | 205 | load_image = lib.load_image_color 206 | load_image.argtypes = [c_char_p, c_int, c_int] 207 | load_image.restype = IMAGE 208 | 209 | rgbgr_image = lib.rgbgr_image 210 | rgbgr_image.argtypes = [IMAGE] 211 | 212 | predict_image = lib.network_predict_image 213 | predict_image.argtypes = [c_void_p, IMAGE] 214 | predict_image.restype = POINTER(c_float) 215 | 216 | predict_image_letterbox = lib.network_predict_image_letterbox 217 | predict_image_letterbox.argtypes = [c_void_p, IMAGE] 218 | predict_image_letterbox.restype = POINTER(c_float) 219 | 220 | network_predict_batch = lib.network_predict_batch 221 | network_predict_batch.argtypes = [c_void_p, IMAGE, c_int, c_int, c_int, 222 | c_float, c_float, POINTER(c_int), c_int, c_int] 223 | network_predict_batch.restype = POINTER(DETNUMPAIR) 224 | 225 | def array_to_image(arr): 226 | import numpy as np 227 | # need to return old values to avoid python freeing memory 228 | arr = arr.transpose(2,0,1) 229 | c = arr.shape[0] 230 | h = arr.shape[1] 231 | w = arr.shape[2] 232 | arr = np.ascontiguousarray(arr.flat, dtype=np.float32) / 255.0 233 | data = arr.ctypes.data_as(POINTER(c_float)) 234 | im = IMAGE(w,h,c,data) 235 | return im, arr 236 | 237 | def classify(net, meta, im): 238 | out = predict_image(net, im) 239 | res = [] 240 | for i in range(meta.classes): 241 | if altNames is None: 242 | nameTag = meta.names[i] 243 | else: 244 | nameTag = altNames[i] 245 | res.append((nameTag, out[i])) 246 | res = sorted(res, key=lambda x: -x[1]) 247 | return res 248 | 249 | def detect(net, meta, image, thresh=.5, hier_thresh=.5, nms=.45, debug= False): 250 | """ 251 | Performs the meat of the detection 252 | """ 253 | #pylint: disable= C0321 254 | im = load_image(image, 0, 0) 255 | if debug: print("Loaded image") 256 | ret = detect_image(net, meta, im, thresh, hier_thresh, nms, debug) 257 | free_image(im) 258 | if debug: print("freed image") 259 | return ret 260 | 261 | def detect_image(net, meta, im, scale_h, scale_w, thresh=.5, hier_thresh=.5, nms=.45, debug= False): 262 | num = c_int(0) 263 | pnum = pointer(num) 264 | predict_image(net, im) 265 | letter_box = 0 266 | #dets = get_network_boxes(net, custom_image_bgr.shape[1], custom_image_bgr.shape[0], thresh, hier_thresh, None, 0, pnum, letter_box) # OpenCV 267 | dets = get_network_boxes(net, im.w, im.h, thresh, hier_thresh, None, 0, pnum, letter_box) 268 | num = pnum[0] 269 | if nms: 270 | do_nms_sort(dets, num, meta.classes, nms) 271 | 272 | bbox_xywh = [] 273 | cls_conf = [] 274 | cls_ids = [] 275 | for j in range(num): 276 | for i in range(meta.classes): 277 | if dets[j].prob[i] > 0: 278 | b = dets[j].bbox 279 | if altNames is None: 280 | nameTag = meta.names[i] 281 | else: 282 | nameTag = altNames[i] 283 | if debug: 284 | print("Got bbox", b) 285 | print(nameTag) 286 | print(dets[j].prob[i]) 287 | print((b.x, b.y, b.w, b.h)) 288 | cls_ids.append(i) 289 | bbox_xywh.append((b.x*scale_w, b.y*scale_h, b.w*scale_w, b.h*scale_h)) 290 | cls_conf.append(dets[j].prob[i]) 291 | 292 | if len(cls_ids)==0: 293 | bbox_xywh = torch.FloatTensor([]).reshape([0,4]) 294 | cls_conf = torch.FloatTensor([]) 295 | cls_ids = torch.LongTensor([]) 296 | 297 | return torch.FloatTensor(bbox_xywh).numpy(), torch.FloatTensor(cls_conf).numpy(), torch.LongTensor(cls_ids).numpy() 298 | 299 | def convertBack(x, y, w, h): 300 | xmin = int(round(x - (w / 2))) 301 | xmax = int(round(x + (w / 2))) 302 | ymin = int(round(y - (h / 2))) 303 | ymax = int(round(y + (h / 2))) 304 | return xmin, ymin, xmax, ymax 305 | 306 | netMain = None 307 | metaMain = None 308 | altNames = None 309 | 310 | def performDetect(imagePath="data/dog.jpg", thresh= 0.25, configPath = "./cfg/yolov3.cfg", weightPath = "yolov3.weights", metaPath= "./cfg/coco.data", showImage= True, makeImageOnly = False, initOnly= False): 311 | """ 312 | Convenience function to handle the detection and returns of objects. 313 | 314 | Displaying bounding boxes requires libraries scikit-image and numpy 315 | 316 | Parameters 317 | ---------------- 318 | imagePath: str 319 | Path to the image to evaluate. Raises ValueError if not found 320 | 321 | thresh: float (default= 0.25) 322 | The detection threshold 323 | 324 | configPath: str 325 | Path to the configuration file. Raises ValueError if not found 326 | 327 | weightPath: str 328 | Path to the weights file. Raises ValueError if not found 329 | 330 | metaPath: str 331 | Path to the data file. Raises ValueError if not found 332 | 333 | showImage: bool (default= True) 334 | Compute (and show) bounding boxes. Changes return. 335 | 336 | makeImageOnly: bool (default= False) 337 | If showImage is True, this won't actually *show* the image, but will create the array and return it. 338 | 339 | initOnly: bool (default= False) 340 | Only initialize globals. Don't actually run a prediction. 341 | 342 | Returns 343 | ---------------------- 344 | 345 | 346 | When showImage is False, list of tuples like 347 | ('obj_label', confidence, (bounding_box_x_px, bounding_box_y_px, bounding_box_width_px, bounding_box_height_px)) 348 | The X and Y coordinates are from the center of the bounding box. Subtract half the width or height to get the lower corner. 349 | 350 | Otherwise, a dict with 351 | { 352 | "detections": as above 353 | "image": a numpy array representing an image, compatible with scikit-image 354 | "caption": an image caption 355 | } 356 | """ 357 | # Import the global variables. This lets us instance Darknet once, then just call performDetect() again without instancing again 358 | global metaMain, netMain, altNames #pylint: disable=W0603 359 | assert 0 < thresh < 1, "Threshold should be a float between zero and one (non-inclusive)" 360 | if not os.path.exists(configPath): 361 | raise ValueError("Invalid config path `"+os.path.abspath(configPath)+"`") 362 | if not os.path.exists(weightPath): 363 | raise ValueError("Invalid weight path `"+os.path.abspath(weightPath)+"`") 364 | if not os.path.exists(metaPath): 365 | raise ValueError("Invalid data file path `"+os.path.abspath(metaPath)+"`") 366 | if netMain is None: 367 | netMain = load_net_custom(configPath.encode("ascii"), weightPath.encode("ascii"), 0, 1) # batch size = 1 368 | if metaMain is None: 369 | metaMain = load_meta(metaPath.encode("ascii")) 370 | if altNames is None: 371 | # In Python 3, the metafile default access craps out on Windows (but not Linux) 372 | # Read the names file and create a list to feed to detect 373 | try: 374 | with open(metaPath) as metaFH: 375 | metaContents = metaFH.read() 376 | import re 377 | match = re.search("names *= *(.*)$", metaContents, re.IGNORECASE | re.MULTILINE) 378 | if match: 379 | result = match.group(1) 380 | else: 381 | result = None 382 | try: 383 | if os.path.exists(result): 384 | with open(result) as namesFH: 385 | namesList = namesFH.read().strip().split("\n") 386 | altNames = [x.strip() for x in namesList] 387 | except TypeError: 388 | pass 389 | except Exception: 390 | pass 391 | if initOnly: 392 | print("Initialized detector") 393 | return None 394 | if not os.path.exists(imagePath): 395 | raise ValueError("Invalid image path `"+os.path.abspath(imagePath)+"`") 396 | # Do the detection 397 | #detections = detect(netMain, metaMain, imagePath, thresh) # if is used cv2.imread(image) 398 | detections = detect(netMain, metaMain, imagePath.encode("ascii"), thresh) 399 | if showImage: 400 | try: 401 | from skimage import io, draw 402 | import numpy as np 403 | image = io.imread(imagePath) 404 | print("*** "+str(len(detections))+" Results, color coded by confidence ***") 405 | imcaption = [] 406 | for detection in detections: 407 | label = detection[0] 408 | confidence = detection[1] 409 | pstring = label+": "+str(np.rint(100 * confidence))+"%" 410 | imcaption.append(pstring) 411 | print(pstring) 412 | bounds = detection[2] 413 | shape = image.shape 414 | # x = shape[1] 415 | # xExtent = int(x * bounds[2] / 100) 416 | # y = shape[0] 417 | # yExtent = int(y * bounds[3] / 100) 418 | yExtent = int(bounds[3]) 419 | xEntent = int(bounds[2]) 420 | # Coordinates are around the center 421 | xCoord = int(bounds[0] - bounds[2]/2) 422 | yCoord = int(bounds[1] - bounds[3]/2) 423 | boundingBox = [ 424 | [xCoord, yCoord], 425 | [xCoord, yCoord + yExtent], 426 | [xCoord + xEntent, yCoord + yExtent], 427 | [xCoord + xEntent, yCoord] 428 | ] 429 | # Wiggle it around to make a 3px border 430 | rr, cc = draw.polygon_perimeter([x[1] for x in boundingBox], [x[0] for x in boundingBox], shape= shape) 431 | rr2, cc2 = draw.polygon_perimeter([x[1] + 1 for x in boundingBox], [x[0] for x in boundingBox], shape= shape) 432 | rr3, cc3 = draw.polygon_perimeter([x[1] - 1 for x in boundingBox], [x[0] for x in boundingBox], shape= shape) 433 | rr4, cc4 = draw.polygon_perimeter([x[1] for x in boundingBox], [x[0] + 1 for x in boundingBox], shape= shape) 434 | rr5, cc5 = draw.polygon_perimeter([x[1] for x in boundingBox], [x[0] - 1 for x in boundingBox], shape= shape) 435 | boxColor = (int(255 * (1 - (confidence ** 2))), int(255 * (confidence ** 2)), 0) 436 | draw.set_color(image, (rr, cc), boxColor, alpha= 0.8) 437 | draw.set_color(image, (rr2, cc2), boxColor, alpha= 0.8) 438 | draw.set_color(image, (rr3, cc3), boxColor, alpha= 0.8) 439 | draw.set_color(image, (rr4, cc4), boxColor, alpha= 0.8) 440 | draw.set_color(image, (rr5, cc5), boxColor, alpha= 0.8) 441 | if not makeImageOnly: 442 | io.imshow(image) 443 | io.show() 444 | detections = { 445 | "detections": detections, 446 | "image": image, 447 | "caption": "\n
".join(imcaption) 448 | } 449 | except Exception as e: 450 | print("Unable to show image: "+str(e)) 451 | return detections 452 | 453 | 454 | if __name__ == "__main__": 455 | print("just do") -------------------------------------------------------------------------------- /detector/v4detector.py: -------------------------------------------------------------------------------- 1 | from ctypes import * 2 | import math 3 | import random 4 | import os 5 | import cv2 6 | import numpy as np 7 | import time 8 | import detector.v4darknet 9 | 10 | 11 | netMain = None 12 | metaMain = None 13 | altNames = None 14 | 15 | 16 | def YOLO(frame_read): 17 | 18 | global metaMain, netMain, altNames 19 | configPath = "4.cfg" 20 | weightPath = "4.weights" 21 | metaPath = "coco.data" 22 | if not os.path.exists(configPath): 23 | raise ValueError("Invalid config path `" + 24 | os.path.abspath(configPath)+"`") 25 | if not os.path.exists(weightPath): 26 | raise ValueError("Invalid weight path `" + 27 | os.path.abspath(weightPath)+"`") 28 | if not os.path.exists(metaPath): 29 | raise ValueError("Invalid data file path `" + 30 | os.path.abspath(metaPath)+"`") 31 | if netMain is None: 32 | netMain = detector.v4darknet.load_net_custom(configPath.encode( 33 | "ascii"), weightPath.encode("ascii"), 0, 1) # batch size = 1 34 | if metaMain is None: 35 | metaMain = detector.v4darknet.load_meta(metaPath.encode("ascii")) 36 | if altNames is None: 37 | try: 38 | with open(metaPath) as metaFH: 39 | metaContents = metaFH.read() 40 | import re 41 | match = re.search("names *= *(.*)$", metaContents, 42 | re.IGNORECASE | re.MULTILINE) 43 | if match: 44 | result = match.group(1) 45 | else: 46 | result = None 47 | try: 48 | if os.path.exists(result): 49 | with open(result) as namesFH: 50 | namesList = namesFH.read().strip().split("\n") 51 | altNames = [x.strip() for x in namesList] 52 | except TypeError: 53 | pass 54 | except Exception: 55 | pass 56 | 57 | img_h, img_w = frame_read.shape[:2] 58 | net_h = detector.v4darknet.network_height(netMain) 59 | net_w = detector.v4darknet.network_width(netMain) 60 | scale_h = img_h / net_h 61 | scale_w = img_w / net_w 62 | darknet_image = detector.v4darknet.make_image(net_w, net_h, 3) 63 | frame_rgb = cv2.cvtColor(frame_read, cv2.COLOR_BGR2RGB) 64 | frame_resized = cv2.resize(frame_rgb, (net_w, net_h), interpolation=cv2.INTER_LINEAR) 65 | detector.v4darknet.copy_image_from_bytes(darknet_image,frame_resized.tobytes()) 66 | return detector.v4darknet.detect_image(netMain, metaMain, darknet_image, scale_h, scale_w, thresh=0.25) 67 | 68 | 69 | if __name__ == "__main__": 70 | YOLO() 71 | -------------------------------------------------------------------------------- /eval_tracker.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path as osp 3 | import logging 4 | import argparse 5 | from pathlib import Path 6 | 7 | from utils.log import logger 8 | from yolov3_deepsort import VideoTracker 9 | from utils.parser import get_config 10 | 11 | import motmetrics as mm 12 | mm.lap.default_solver = 'lap' 13 | from utils.evaluation import Evaluator 14 | 15 | def mkdir_if_missing(dir): 16 | os.makedirs(dir, exist_ok=True) 17 | 18 | def main(data_root='', seqs=('',), args=""): 19 | logger.setLevel(logging.INFO) 20 | data_type = 'mot' 21 | result_root = os.path.join(Path(data_root), "mot_results") 22 | mkdir_if_missing(result_root) 23 | 24 | cfg = get_config() 25 | cfg.merge_from_file(args.config_detection) 26 | cfg.merge_from_file(args.config_deepsort) 27 | 28 | # run tracking 29 | accs = [] 30 | for seq in seqs: 31 | logger.info('start seq: {}'.format(seq)) 32 | result_filename = os.path.join(result_root, '{}.txt'.format(seq)) 33 | video_path = data_root+"/"+seq+"/video/video.mp4" 34 | 35 | with VideoTracker(cfg, args, video_path, result_filename) as vdo_trk: 36 | vdo_trk.run() 37 | 38 | # eval 39 | logger.info('Evaluate seq: {}'.format(seq)) 40 | evaluator = Evaluator(data_root, seq, data_type) 41 | accs.append(evaluator.eval_file(result_filename)) 42 | 43 | # get summary 44 | metrics = mm.metrics.motchallenge_metrics 45 | mh = mm.metrics.create() 46 | summary = Evaluator.get_summary(accs, seqs, metrics) 47 | strsummary = mm.io.render_summary( 48 | summary, 49 | formatters=mh.formatters, 50 | namemap=mm.io.motchallenge_metric_names 51 | ) 52 | print(strsummary) 53 | Evaluator.save_summary(summary, os.path.join(result_root, 'summary_global.xlsx')) 54 | 55 | 56 | def parse_args(): 57 | parser = argparse.ArgumentParser() 58 | parser.add_argument("--config_detection", type=str, default="./configs/yolov3.yaml") 59 | parser.add_argument("--config_deepsort", type=str, default="./configs/deep_sort.yaml") 60 | parser.add_argument("--ignore_display", dest="display", action="store_false", default=False) 61 | parser.add_argument("--frame_interval", type=int, default=1) 62 | parser.add_argument("--display_width", type=int, default=800) 63 | parser.add_argument("--display_height", type=int, default=600) 64 | parser.add_argument("--save_path", type=str, default="./demo/demo.avi") 65 | parser.add_argument("--cpu", dest="use_cuda", action="store_false", default=True) 66 | parser.add_argument("--camera", action="store", dest="cam", type=int, default="-1") 67 | return parser.parse_args() 68 | 69 | if __name__ == '__main__': 70 | args = parse_args() 71 | 72 | seqs_str = '''MOT16-02 73 | MOT16-04 74 | MOT16-05 75 | MOT16-09 76 | MOT16-10 77 | MOT16-11 78 | MOT16-13 79 | ''' 80 | data_root = 'data/dataset/MOT16/train/' 81 | 82 | seqs = [seq.strip() for seq in seqs_str.split()] 83 | 84 | main(data_root=data_root, 85 | seqs=seqs, 86 | args=args) -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | atomicwrites==1.3.0 2 | attrs==19.3.0 3 | colorama==0.4.3 4 | easydict==1.9 5 | entrypoints==0.3 6 | et-xmlfile==1.0.1 7 | flake8==3.7.9 8 | flake8-import-order==0.18.1 9 | importlib-metadata==1.6.0 10 | jdcal==1.4.1 11 | joblib==0.14.1 12 | lap==0.4.0 13 | mccabe==0.6.1 14 | more-itertools==8.2.0 15 | motmetrics==1.2.0 16 | numpy==1.18.2 17 | opencv-python==4.2.0.34 18 | openpyxl==3.0.3 19 | packaging==20.3 20 | pandas==1.0.3 21 | Pillow==8.2.0 22 | pluggy==0.13.1 23 | py==1.10.0 24 | py-cpuinfo==5.0.0 25 | pycodestyle==2.5.0 26 | pyflakes==2.1.1 27 | pyparsing==2.4.7 28 | pytest==5.4.1 29 | pytest-benchmark==3.2.3 30 | python-dateutil==2.8.1 31 | pytz==2019.3 32 | PyYAML==5.3.1 33 | scikit-learn==0.22.2.post1 34 | scipy==1.4.1 35 | six==1.14.0 36 | sklearn==0.0 37 | torch==1.4.0 38 | torchvision==0.5.0 39 | Vizer==0.1.5 40 | wcwidth==0.1.9 41 | xmltodict==0.12.0 42 | zipp==3.1.0 43 | -------------------------------------------------------------------------------- /scripts/yolov3_deepsort.sh: -------------------------------------------------------------------------------- 1 | python yolov3_deepsort.py [VIDEO_PATH] --config_detection -------------------------------------------------------------------------------- /scripts/yolov3_tiny_deepsort.sh: -------------------------------------------------------------------------------- 1 | python yolov3_deepsort.py [VIDEO_PATH] --config_detection ./configs/yolov3_tiny.yaml -------------------------------------------------------------------------------- /tracker.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import time 4 | import argparse 5 | import torch 6 | import warnings 7 | import numpy as np 8 | 9 | from deep_sort import build_tracker 10 | from utils.draw import draw_boxes 11 | from utils.parser import get_config 12 | from utils.log import get_logger 13 | from utils.io import write_results 14 | from detector import v4detector 15 | 16 | lst_move_life = [0,1,2,3,4,5,6,7,8,9,16,77] 17 | 18 | class VideoTracker(object): 19 | def __init__(self, cfg, args, video_path): 20 | self.cfg = cfg 21 | self.args = args 22 | self.video_path = video_path 23 | self.logger = get_logger("root") 24 | 25 | use_cuda = args.use_cuda and torch.cuda.is_available() 26 | if not use_cuda: 27 | warnings.warn("Running in cpu mode which maybe very slow!", UserWarning) 28 | 29 | if args.display: 30 | cv2.namedWindow("test", cv2.WINDOW_NORMAL) 31 | cv2.resizeWindow("test", args.display_width, args.display_height) 32 | 33 | if args.cam != -1: 34 | print("Using webcam " + str(args.cam)) 35 | self.vdo = cv2.VideoCapture(args.cam) 36 | else: 37 | self.vdo = cv2.VideoCapture() 38 | self.deepsort = build_tracker(cfg, use_cuda=use_cuda) 39 | 40 | 41 | def __enter__(self): 42 | if self.args.cam != -1: 43 | ret, frame = self.vdo.read() 44 | assert ret, "Error: Camera error" 45 | self.im_width = frame.shape[0] 46 | self.im_height = frame.shape[1] 47 | 48 | else: 49 | assert os.path.isfile(self.video_path), "Path error" 50 | self.vdo.open(self.video_path) 51 | self.im_width = int(self.vdo.get(cv2.CAP_PROP_FRAME_WIDTH)) 52 | self.im_height = int(self.vdo.get(cv2.CAP_PROP_FRAME_HEIGHT)) 53 | assert self.vdo.isOpened() 54 | 55 | if self.args.save_path: 56 | os.makedirs(self.args.save_path, exist_ok=True) 57 | 58 | # path of saved video and results 59 | self.save_video_path = os.path.join(self.args.save_path, "results.avi") 60 | self.save_results_path = os.path.join(self.args.save_path, "results.txt") 61 | 62 | # create video writer 63 | fourcc = cv2.VideoWriter_fourcc(*'MJPG') 64 | self.writer = cv2.VideoWriter(self.save_video_path, fourcc, 20, (self.im_width,self.im_height)) 65 | 66 | # logging 67 | self.logger.info("Save results to {}".format(self.args.save_path)) 68 | 69 | return self 70 | 71 | 72 | def __exit__(self, exc_type, exc_value, exc_traceback): 73 | if exc_type: 74 | print(exc_type, exc_value, exc_traceback) 75 | 76 | 77 | def run(self): 78 | results = [] 79 | idx_frame = 0 80 | while self.vdo.grab(): 81 | idx_frame += 1 82 | if idx_frame % self.args.frame_interval: 83 | continue 84 | 85 | start = time.time() 86 | _, ori_im = self.vdo.retrieve() 87 | im = cv2.cvtColor(ori_im, cv2.COLOR_BGR2RGB) 88 | if len(im) == 0: 89 | continue 90 | # do detection 91 | # bbox_xywh, cls_conf, cls_ids = self.detector(im) 92 | 93 | bbox_xywh, cls_conf, cls_ids = v4detector.YOLO(im) 94 | if len(bbox_xywh) == 0: 95 | continue 96 | print("detection cls_ids:", cls_ids) 97 | 98 | # #filter cls id for tracking 99 | # print("cls_ids") 100 | # print(cls_ids) 101 | # # select person class 102 | mask = [] 103 | # lst_for_track = [] 104 | for id in cls_ids: 105 | if id in lst_move_life: 106 | # lst_for_track.append(id) 107 | mask.append(True) 108 | else: 109 | mask.append(False) 110 | print("mask cls_ids:", mask) 111 | 112 | bbox_xywh = bbox_xywh[mask] 113 | # # bbox dilation just in case bbox too small, delete this line if using a better pedestrian detector 114 | bbox_xywh[:,3:] *= 1.2 115 | cls_conf = cls_conf[mask] 116 | 117 | # do tracking 118 | 119 | 120 | outputs = self.deepsort.update(bbox_xywh, cls_conf, im, cls_ids) 121 | 122 | # draw boxes for visualization 123 | if len(outputs) > 0: 124 | bbox_tlwh = [] 125 | bbox_xyxy = outputs[:,:4] 126 | identities = outputs[:,4:5] 127 | cls_id = outputs[:,-1] 128 | print("track res cls_id:", cls_id) 129 | # cls_ids_show = [cls_ids[i] for i in cls_id] 130 | ori_im = draw_boxes(ori_im, bbox_xyxy, cls_ids, identities) 131 | 132 | for bb_xyxy in bbox_xyxy: 133 | bbox_tlwh.append(self.deepsort._xyxy_to_tlwh(bb_xyxy)) 134 | 135 | results.append((idx_frame-1, bbox_tlwh, identities)) 136 | 137 | end = time.time() 138 | 139 | if self.args.display: 140 | cv2.imshow("test", ori_im) 141 | cv2.waitKey(1) 142 | 143 | if self.args.save_path: 144 | self.writer.write(ori_im) 145 | 146 | # save results 147 | write_results(self.save_results_path, results, 'mot') 148 | 149 | # logging 150 | self.logger.info("time: {:.03f}s, fps: {:.03f}, detection numbers: {}, tracking numbers: {}" \ 151 | .format(end-start, 1/(end-start), bbox_xywh.shape[0], len(outputs))) 152 | 153 | 154 | def parse_args(): 155 | parser = argparse.ArgumentParser() 156 | parser.add_argument("VIDEO_PATH", type=str) 157 | parser.add_argument("--config_deepsort", type=str, default="./configs/deep_sort.yaml") 158 | parser.add_argument("--ignore_display", dest="display", action="store_false", default=True) 159 | #parser.add_argument("--display", action="store_true") 160 | parser.add_argument("--frame_interval", type=int, default=1) 161 | parser.add_argument("--display_width", type=int, default=608) 162 | parser.add_argument("--display_height", type=int, default=608) 163 | parser.add_argument("--save_path", type=str, default="./output/") 164 | parser.add_argument("--cpu", dest="use_cuda", action="store_false", default=True) 165 | parser.add_argument("--camera", action="store", dest="cam", type=int, default="-1") 166 | return parser.parse_args() 167 | 168 | 169 | if __name__=="__main__": 170 | args = parse_args() 171 | cfg = get_config() 172 | cfg.merge_from_file(args.config_deepsort) 173 | 174 | with VideoTracker(cfg, args, video_path=args.VIDEO_PATH) as vdo_trk: 175 | vdo_trk.run() -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/derek285/yolov4_deep_sort_pytorch/00e408a24693ce2438289f4d3aed819cf0362436/utils/__init__.py -------------------------------------------------------------------------------- /utils/draw.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | palette = (2 ** 11 - 1, 2 ** 15 - 1, 2 ** 20 - 1) 5 | 6 | 7 | def compute_color_for_labels(label): 8 | """ 9 | Simple function that adds fixed color depending on the class 10 | """ 11 | color = [int((p * (label ** 2 - label + 1)) % 255) for p in palette] 12 | return tuple(color) 13 | 14 | 15 | def draw_boxes(img, bbox, cls_id, identities=None, offset=(0,0)): 16 | for i,box in enumerate(bbox): 17 | x1,y1,x2,y2 = [int(i) for i in box] 18 | x1 += offset[0] 19 | x2 += offset[0] 20 | y1 += offset[1] 21 | y2 += offset[1] 22 | # box text and bar 23 | id = int(identities[i]) if identities is not None else 0 24 | color = compute_color_for_labels(id) 25 | label = '{}{:d}'.format("", id) 26 | # label = label + "[" +str(cls_id[i]) + "]" 27 | t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2 , 2)[0] 28 | cv2.rectangle(img,(x1, y1),(x2,y2),color,3) 29 | cv2.rectangle(img,(x1, y1),(x1+t_size[0]+3,y1+t_size[1]+4), color,-1) 30 | cv2.putText(img,label,(x1,y1+t_size[1]+4), cv2.FONT_HERSHEY_PLAIN, 2, [255,255,255], 2) 31 | return img 32 | 33 | 34 | 35 | if __name__ == '__main__': 36 | for i in range(82): 37 | print(compute_color_for_labels(i)) 38 | -------------------------------------------------------------------------------- /utils/evaluation.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import copy 4 | import motmetrics as mm 5 | mm.lap.default_solver = 'lap' 6 | from utils.io import read_results, unzip_objs 7 | 8 | 9 | class Evaluator(object): 10 | 11 | def __init__(self, data_root, seq_name, data_type): 12 | self.data_root = data_root 13 | self.seq_name = seq_name 14 | self.data_type = data_type 15 | 16 | self.load_annotations() 17 | self.reset_accumulator() 18 | 19 | def load_annotations(self): 20 | assert self.data_type == 'mot' 21 | 22 | gt_filename = os.path.join(self.data_root, self.seq_name, 'gt', 'gt.txt') 23 | self.gt_frame_dict = read_results(gt_filename, self.data_type, is_gt=True) 24 | self.gt_ignore_frame_dict = read_results(gt_filename, self.data_type, is_ignore=True) 25 | 26 | def reset_accumulator(self): 27 | self.acc = mm.MOTAccumulator(auto_id=True) 28 | 29 | def eval_frame(self, frame_id, trk_tlwhs, trk_ids, rtn_events=False): 30 | # results 31 | trk_tlwhs = np.copy(trk_tlwhs) 32 | trk_ids = np.copy(trk_ids) 33 | 34 | # gts 35 | gt_objs = self.gt_frame_dict.get(frame_id, []) 36 | gt_tlwhs, gt_ids = unzip_objs(gt_objs)[:2] 37 | 38 | # ignore boxes 39 | ignore_objs = self.gt_ignore_frame_dict.get(frame_id, []) 40 | ignore_tlwhs = unzip_objs(ignore_objs)[0] 41 | 42 | 43 | # remove ignored results 44 | keep = np.ones(len(trk_tlwhs), dtype=bool) 45 | iou_distance = mm.distances.iou_matrix(ignore_tlwhs, trk_tlwhs, max_iou=0.5) 46 | if len(iou_distance) > 0: 47 | match_is, match_js = mm.lap.linear_sum_assignment(iou_distance) 48 | match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js]) 49 | match_ious = iou_distance[match_is, match_js] 50 | 51 | match_js = np.asarray(match_js, dtype=int) 52 | match_js = match_js[np.logical_not(np.isnan(match_ious))] 53 | keep[match_js] = False 54 | trk_tlwhs = trk_tlwhs[keep] 55 | trk_ids = trk_ids[keep] 56 | 57 | # get distance matrix 58 | iou_distance = mm.distances.iou_matrix(gt_tlwhs, trk_tlwhs, max_iou=0.5) 59 | 60 | # acc 61 | self.acc.update(gt_ids, trk_ids, iou_distance) 62 | 63 | if rtn_events and iou_distance.size > 0 and hasattr(self.acc, 'last_mot_events'): 64 | events = self.acc.last_mot_events # only supported by https://github.com/longcw/py-motmetrics 65 | else: 66 | events = None 67 | return events 68 | 69 | def eval_file(self, filename): 70 | self.reset_accumulator() 71 | 72 | result_frame_dict = read_results(filename, self.data_type, is_gt=False) 73 | frames = sorted(list(set(self.gt_frame_dict.keys()) | set(result_frame_dict.keys()))) 74 | for frame_id in frames: 75 | trk_objs = result_frame_dict.get(frame_id, []) 76 | trk_tlwhs, trk_ids = unzip_objs(trk_objs)[:2] 77 | self.eval_frame(frame_id, trk_tlwhs, trk_ids, rtn_events=False) 78 | 79 | return self.acc 80 | 81 | @staticmethod 82 | def get_summary(accs, names, metrics=('mota', 'num_switches', 'idp', 'idr', 'idf1', 'precision', 'recall')): 83 | names = copy.deepcopy(names) 84 | if metrics is None: 85 | metrics = mm.metrics.motchallenge_metrics 86 | metrics = copy.deepcopy(metrics) 87 | 88 | mh = mm.metrics.create() 89 | summary = mh.compute_many( 90 | accs, 91 | metrics=metrics, 92 | names=names, 93 | generate_overall=True 94 | ) 95 | 96 | return summary 97 | 98 | @staticmethod 99 | def save_summary(summary, filename): 100 | import pandas as pd 101 | writer = pd.ExcelWriter(filename) 102 | summary.to_excel(writer) 103 | writer.save() 104 | -------------------------------------------------------------------------------- /utils/io.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Dict 3 | import numpy as np 4 | 5 | # from utils.log import get_logger 6 | 7 | 8 | def write_results(filename, results, data_type): 9 | if data_type == 'mot': 10 | save_format = '{frame},{id},{x1},{y1},{w},{h},-1,-1,-1,-1\n' 11 | elif data_type == 'kitti': 12 | save_format = '{frame} {id} pedestrian 0 0 -10 {x1} {y1} {x2} {y2} -10 -10 -10 -1000 -1000 -1000 -10\n' 13 | else: 14 | raise ValueError(data_type) 15 | 16 | with open(filename, 'w') as f: 17 | for frame_id, tlwhs, track_ids in results: 18 | if data_type == 'kitti': 19 | frame_id -= 1 20 | for tlwh, track_id in zip(tlwhs, track_ids): 21 | if track_id < 0: 22 | continue 23 | x1, y1, w, h = tlwh 24 | x2, y2 = x1 + w, y1 + h 25 | line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h) 26 | f.write(line) 27 | 28 | 29 | # def write_results(filename, results_dict: Dict, data_type: str): 30 | # if not filename: 31 | # return 32 | # path = os.path.dirname(filename) 33 | # if not os.path.exists(path): 34 | # os.makedirs(path) 35 | 36 | # if data_type in ('mot', 'mcmot', 'lab'): 37 | # save_format = '{frame},{id},{x1},{y1},{w},{h},1,-1,-1,-1\n' 38 | # elif data_type == 'kitti': 39 | # save_format = '{frame} {id} pedestrian -1 -1 -10 {x1} {y1} {x2} {y2} -1 -1 -1 -1000 -1000 -1000 -10 {score}\n' 40 | # else: 41 | # raise ValueError(data_type) 42 | 43 | # with open(filename, 'w') as f: 44 | # for frame_id, frame_data in results_dict.items(): 45 | # if data_type == 'kitti': 46 | # frame_id -= 1 47 | # for tlwh, track_id in frame_data: 48 | # if track_id < 0: 49 | # continue 50 | # x1, y1, w, h = tlwh 51 | # x2, y2 = x1 + w, y1 + h 52 | # line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h, score=1.0) 53 | # f.write(line) 54 | # logger.info('Save results to {}'.format(filename)) 55 | 56 | 57 | def read_results(filename, data_type: str, is_gt=False, is_ignore=False): 58 | if data_type in ('mot', 'lab'): 59 | read_fun = read_mot_results 60 | else: 61 | raise ValueError('Unknown data type: {}'.format(data_type)) 62 | 63 | return read_fun(filename, is_gt, is_ignore) 64 | 65 | 66 | """ 67 | labels={'ped', ... % 1 68 | 'person_on_vhcl', ... % 2 69 | 'car', ... % 3 70 | 'bicycle', ... % 4 71 | 'mbike', ... % 5 72 | 'non_mot_vhcl', ... % 6 73 | 'static_person', ... % 7 74 | 'distractor', ... % 8 75 | 'occluder', ... % 9 76 | 'occluder_on_grnd', ... %10 77 | 'occluder_full', ... % 11 78 | 'reflection', ... % 12 79 | 'crowd' ... % 13 80 | }; 81 | """ 82 | 83 | 84 | def read_mot_results(filename, is_gt, is_ignore): 85 | valid_labels = {1} 86 | ignore_labels = {2, 7, 8, 12} 87 | results_dict = dict() 88 | if os.path.isfile(filename): 89 | with open(filename, 'r') as f: 90 | for line in f.readlines(): 91 | linelist = line.split(',') 92 | if len(linelist) < 7: 93 | continue 94 | fid = int(linelist[0]) 95 | if fid < 1: 96 | continue 97 | results_dict.setdefault(fid, list()) 98 | 99 | if is_gt: 100 | if 'MOT16-' in filename or 'MOT17-' in filename: 101 | label = int(float(linelist[7])) 102 | mark = int(float(linelist[6])) 103 | if mark == 0 or label not in valid_labels: 104 | continue 105 | score = 1 106 | elif is_ignore: 107 | if 'MOT16-' in filename or 'MOT17-' in filename: 108 | label = int(float(linelist[7])) 109 | vis_ratio = float(linelist[8]) 110 | if label not in ignore_labels and vis_ratio >= 0: 111 | continue 112 | else: 113 | continue 114 | score = 1 115 | else: 116 | score = float(linelist[6]) 117 | 118 | tlwh = tuple(map(float, linelist[2:6])) 119 | target_id = int(linelist[1]) 120 | 121 | results_dict[fid].append((tlwh, target_id, score)) 122 | 123 | return results_dict 124 | 125 | 126 | def unzip_objs(objs): 127 | if len(objs) > 0: 128 | tlwhs, ids, scores = zip(*objs) 129 | else: 130 | tlwhs, ids, scores = [], [], [] 131 | tlwhs = np.asarray(tlwhs, dtype=float).reshape(-1, 4) 132 | 133 | return tlwhs, ids, scores -------------------------------------------------------------------------------- /utils/log.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | 4 | def get_logger(name='root'): 5 | formatter = logging.Formatter( 6 | # fmt='%(asctime)s [%(levelname)s]: %(filename)s(%(funcName)s:%(lineno)s) >> %(message)s') 7 | fmt='%(asctime)s [%(levelname)s]: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') 8 | 9 | handler = logging.StreamHandler() 10 | handler.setFormatter(formatter) 11 | 12 | logger = logging.getLogger(name) 13 | logger.setLevel(logging.INFO) 14 | logger.addHandler(handler) 15 | return logger 16 | 17 | 18 | -------------------------------------------------------------------------------- /utils/parser.py: -------------------------------------------------------------------------------- 1 | import os 2 | import yaml 3 | from easydict import EasyDict as edict 4 | 5 | class YamlParser(edict): 6 | """ 7 | This is yaml parser based on EasyDict. 8 | """ 9 | def __init__(self, cfg_dict=None, config_file=None): 10 | if cfg_dict is None: 11 | cfg_dict = {} 12 | 13 | if config_file is not None: 14 | assert(os.path.isfile(config_file)) 15 | with open(config_file, 'r') as fo: 16 | cfg_dict.update(yaml.load(fo.read())) 17 | 18 | super(YamlParser, self).__init__(cfg_dict) 19 | 20 | 21 | def merge_from_file(self, config_file): 22 | with open(config_file, 'r') as fo: 23 | self.update(yaml.load(fo.read())) 24 | 25 | 26 | def merge_from_dict(self, config_dict): 27 | self.update(config_dict) 28 | 29 | 30 | def get_config(config_file=None): 31 | return YamlParser(config_file=config_file) 32 | 33 | 34 | if __name__ == "__main__": 35 | cfg = YamlParser(config_file="../configs/yolov3.yaml") 36 | cfg.merge_from_file("../configs/deep_sort.yaml") 37 | 38 | import ipdb; ipdb.set_trace() -------------------------------------------------------------------------------- /worker.py: -------------------------------------------------------------------------------- 1 | import paho.mqtt.client as mqtt 2 | import uuid 3 | import json 4 | import cv2 5 | import time 6 | import struct 7 | import threading 8 | import numpy as np 9 | import signal 10 | 11 | 12 | sigint_catched = False 13 | def sigint_handler(sig, frame): 14 | global sigint_catched 15 | sigint_catched = True 16 | 17 | class Worker: 18 | def __init__(self, ip, port, sub='jwai/camera/0001', pub='jwai/track/0001', timeout=30): 19 | id = str(uuid.uuid4()) 20 | self.mqtt_client = mqtt.Client(id, userdata=self, clean_session=True) 21 | self.mqtt_client.on_connect = self.on_connect 22 | self.mqtt_client.on_disconnect = self.on_disconnect 23 | self.mqtt_client.on_message = self.on_message 24 | self.ip = ip 25 | self.port = port 26 | self.sub_topic = sub 27 | self.pub_topic = pub 28 | self.timeout = timeout 29 | self.callback_mutex = threading.RLock() 30 | self.on_new_image = None 31 | def connect(self): 32 | self.mqtt_client.connect(self.ip, self.port, self.timeout) 33 | self.mqtt_client.loop_start() 34 | 35 | def disconnect(self): 36 | self.mqtt_client.loop_stop() 37 | self.mqtt_client.disconnect() 38 | 39 | # def loop(self): 40 | # self.mqtt_client.loop_forever() 41 | 42 | def on_connect(self, client, userdata, flags, rc): 43 | print("Connected with result code "+str(rc)) 44 | 45 | print(self.sub_topic) 46 | self.mqtt_client.subscribe(self.sub_topic, 0) 47 | 48 | def on_disconnect(self, client, userdata, rc): 49 | print("disconnected with result code "+str(rc)) 50 | 51 | def on_message(self, client, userdata, msg): 52 | stamp = struct.unpack('LL', msg.payload[-16:]) 53 | img = cv2.imdecode(np.fromstring(msg.payload, dtype='uint8'), cv2.IMREAD_UNCHANGED) 54 | print('Worker::on_message: ', stamp, time.clock_gettime(time.CLOCK_MONOTONIC)) 55 | 56 | if(self.on_new_image != None): 57 | self.on_new_image(stamp, img) 58 | 59 | def publish(self, stamp, data): 60 | if(self.mqtt_client.is_connected() == False): 61 | return 62 | 63 | msg = bytes() 64 | #pack stamp & data into msg 65 | self.mqtt_client.publish(self.pub_topic, msg) 66 | 67 | @property 68 | def on_new_image(self): 69 | return self.on_new_image 70 | 71 | def on_new_image(self, func): 72 | with self.callback_mutex: 73 | self.on_new_image = func 74 | 75 | def on_new_image(stamp, img): 76 | print(stamp) 77 | print(img.shape) 78 | cv2.imshow("img", img) 79 | cv2.waitKey(1) 80 | 81 | 82 | ''' 83 | { 84 | "stamp": stamp, 85 | "data": [ 86 | {"cls_id": 2, "track_id": 3, "bbox": "4,5,6,7"}, 87 | {"cls_id": 3, "track_id": 5, "bbox": "6,7,8,9"}] 88 | } 89 | stamp: 时间戳 90 | cls_id:检测物体类别id 91 | track_id:跟踪结果id 92 | bbox:左上右下,x1, y1, x2, y2 93 | 94 | 另其线程做检测跟踪, 95 | 无检测跟踪结果,publish 96 | {"stamp": "stamp", "data": None} 97 | 丢帧无返回 98 | ''' 99 | 100 | 101 | if __name__ == "__main__": 102 | original_handler = signal.getsignal(signal.SIGINT) 103 | signal.signal(signal.SIGINT, sigint_handler) 104 | 105 | worker = Worker('192.168.1.24', 1883) 106 | worker.on_new_image = on_new_image 107 | worker.connect() 108 | 109 | while sigint_catched == False: 110 | time.sleep(0.1) 111 | 112 | signal.signal(signal.SIGINT, original_handler) 113 | 114 | worker.disconnect() 115 | 116 | --------------------------------------------------------------------------------