├── .gitattributes
├── README.md
├── deep_sort
    ├── README.md
    ├── __init__.py
    ├── __pycache__
    │   ├── __init__.cpython-310.pyc
    │   ├── __init__.cpython-39.pyc
    │   ├── deep_sort.cpython-310.pyc
    │   └── deep_sort.cpython-39.pyc
    ├── configs
    │   └── deep_sort.yaml
    ├── deep
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-310.pyc
    │   │   ├── __init__.cpython-39.pyc
    │   │   ├── feature_extractor.cpython-310.pyc
    │   │   ├── feature_extractor.cpython-39.pyc
    │   │   ├── model.cpython-310.pyc
    │   │   └── model.cpython-39.pyc
    │   ├── checkpoint
    │   │   ├── .gitkeep
    │   │   ├── ckpt.t7
    │   │   └── original_ckpt.t7
    │   ├── evaluate.py
    │   ├── feature_extractor.py
    │   ├── model.py
    │   ├── original_model.py
    │   ├── test.py
    │   └── train.py
    ├── deep_sort.py
    ├── sort
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-310.pyc
    │   │   ├── __init__.cpython-39.pyc
    │   │   ├── detection.cpython-310.pyc
    │   │   ├── detection.cpython-39.pyc
    │   │   ├── iou_matching.cpython-310.pyc
    │   │   ├── iou_matching.cpython-39.pyc
    │   │   ├── kalman_filter.cpython-310.pyc
    │   │   ├── kalman_filter.cpython-39.pyc
    │   │   ├── linear_assignment.cpython-310.pyc
    │   │   ├── linear_assignment.cpython-39.pyc
    │   │   ├── nn_matching.cpython-310.pyc
    │   │   ├── nn_matching.cpython-39.pyc
    │   │   ├── track.cpython-310.pyc
    │   │   ├── track.cpython-39.pyc
    │   │   ├── tracker.cpython-310.pyc
    │   │   └── tracker.cpython-39.pyc
    │   ├── detection.py
    │   ├── detection.py.bak
    │   ├── iou_matching.py
    │   ├── kalman_filter.py
    │   ├── linear_assignment.py
    │   ├── nn_matching.py
    │   ├── preprocessing.py
    │   ├── track.py
    │   └── tracker.py
    └── utils
    │   ├── __init__.py
    │   ├── __pycache__
    │       ├── __init__.cpython-310.pyc
    │       ├── __init__.cpython-39.pyc
    │       ├── parser.cpython-310.pyc
    │       └── parser.cpython-39.pyc
    │   ├── asserts.py
    │   ├── draw.py
    │   ├── evaluation.py
    │   ├── io.py
    │   ├── json_logger.py
    │   ├── log.py
    │   ├── parser.py
    │   └── tools.py
├── requirements.txt
└── track_count_persons .ipynb


/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Tracking-and-counting-Using-YOLOv8-and-DeepSORT
 2 | Tracking and counting persons
 3 | 
 4 | Follow this Youtube video to run this code: https://youtu.be/Y2fyDYcfmBg
 5 | 
 6 | Clone this github repo: 
 7 | 
 8 | git clone https://github.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT.git
 9 | 
10 | Open jupyter notebook and start working
11 | 
12 | 
13 | ![1](https://github.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/assets/60029146/a1057b86-fcd7-412c-b7b0-583101cf91b6)
14 | 
15 | 
16 | 
17 | 
18 | 


--------------------------------------------------------------------------------
/deep_sort/README.md:
--------------------------------------------------------------------------------
1 | # Deep Sort 
2 | 
3 | This is the implemention of deep sort with pytorch.


--------------------------------------------------------------------------------
/deep_sort/__init__.py:
--------------------------------------------------------------------------------
 1 | from .deep_sort import DeepSort
 2 | 
 3 | 
 4 | __all__ = ['DeepSort', 'build_tracker']
 5 | 
 6 | 
 7 | def build_tracker(cfg, use_cuda):
 8 |     return DeepSort(cfg.DEEPSORT.REID_CKPT, 
 9 |                 max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, 
10 |                 nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, 
11 |                 max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=use_cuda)
12 |     
13 | 
14 | 
15 | 
16 | 
17 | 
18 | 
19 | 
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/deep_sort/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/deep_sort/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/deep_sort/__pycache__/deep_sort.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/__pycache__/deep_sort.cpython-310.pyc


--------------------------------------------------------------------------------
/deep_sort/__pycache__/deep_sort.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/__pycache__/deep_sort.cpython-39.pyc


--------------------------------------------------------------------------------
/deep_sort/configs/deep_sort.yaml:
--------------------------------------------------------------------------------
 1 | DEEPSORT:
 2 |   REID_CKPT: "deep_sort_pytorch/deep_sort/deep/checkpoint/ckpt.t7"
 3 |   MAX_DIST: 0.2
 4 |   MIN_CONFIDENCE: 0.3
 5 |   NMS_MAX_OVERLAP: 0.5
 6 |   MAX_IOU_DISTANCE: 0.7
 7 |   MAX_AGE: 70
 8 |   N_INIT: 3
 9 |   NN_BUDGET: 100
10 |   
11 | 


--------------------------------------------------------------------------------
/deep_sort/deep/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/deep/__init__.py


--------------------------------------------------------------------------------
/deep_sort/deep/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/deep/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/deep_sort/deep/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/deep/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/deep_sort/deep/__pycache__/feature_extractor.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/deep/__pycache__/feature_extractor.cpython-310.pyc


--------------------------------------------------------------------------------
/deep_sort/deep/__pycache__/feature_extractor.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/deep/__pycache__/feature_extractor.cpython-39.pyc


--------------------------------------------------------------------------------
/deep_sort/deep/__pycache__/model.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/deep/__pycache__/model.cpython-310.pyc


--------------------------------------------------------------------------------
/deep_sort/deep/__pycache__/model.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/deep/__pycache__/model.cpython-39.pyc


--------------------------------------------------------------------------------
/deep_sort/deep/checkpoint/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/deep/checkpoint/.gitkeep


--------------------------------------------------------------------------------
/deep_sort/deep/checkpoint/ckpt.t7:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/deep/checkpoint/ckpt.t7


--------------------------------------------------------------------------------
/deep_sort/deep/checkpoint/original_ckpt.t7:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/deep/checkpoint/original_ckpt.t7


--------------------------------------------------------------------------------
/deep_sort/deep/evaluate.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | features = torch.load("features.pth")
 4 | qf = features["qf"]
 5 | ql = features["ql"]
 6 | gf = features["gf"]
 7 | gl = features["gl"]
 8 | 
 9 | scores = qf.mm(gf.t())
10 | res = scores.topk(5, dim=1)[1][:, 0]
11 | top1correct = gl[res].eq(ql).sum().item()
12 | 
13 | print("Acc top1:{:.3f}".format(top1correct / ql.size(0)))
14 | 


--------------------------------------------------------------------------------
/deep_sort/deep/feature_extractor.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torchvision.transforms as transforms
 3 | import numpy as np
 4 | import cv2
 5 | import logging
 6 | 
 7 | from .model import Net
 8 | 
 9 | 
10 | class Extractor(object):
11 |     def __init__(self, model_path, use_cuda=True):
12 |         self.net = Net(reid=True)
13 |         self.device = "cuda" if torch.cuda.is_available() and use_cuda else "cpu"
14 |         state_dict = torch.load(model_path, map_location=torch.device(self.device))[
15 |             'net_dict']
16 |         self.net.load_state_dict(state_dict)
17 |         logger = logging.getLogger("root.tracker")
18 |         logger.info("Loading weights from {}... Done!".format(model_path))
19 |         self.net.to(self.device)
20 |         self.size = (64, 128)
21 |         self.norm = transforms.Compose([
22 |             transforms.ToTensor(),
23 |             transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
24 |         ])
25 | 
26 |     def _preprocess(self, im_crops):
27 |         """
28 |         TODO:
29 |             1. to float with scale from 0 to 1
30 |             2. resize to (64, 128) as Market1501 dataset did
31 |             3. concatenate to a numpy array
32 |             3. to torch Tensor
33 |             4. normalize
34 |         """
35 |         def _resize(im, size):
36 |             return cv2.resize(im.astype(np.float32)/255., size)
37 | 
38 |         im_batch = torch.cat([self.norm(_resize(im, self.size)).unsqueeze(
39 |             0) for im in im_crops], dim=0).float()
40 |         return im_batch
41 | 
42 |     def __call__(self, im_crops):
43 |         im_batch = self._preprocess(im_crops)
44 |         with torch.no_grad():
45 |             im_batch = im_batch.to(self.device)
46 |             features = self.net(im_batch)
47 |         return features.cpu().numpy()
48 | 
49 | 
50 | if __name__ == '__main__':
51 |     img = cv2.imread("demo.jpg")[:, :, (2, 1, 0)]
52 |     extr = Extractor("checkpoint/ckpt.t7")
53 |     feature = extr(img)
54 |     print(feature.shape)
55 | 


--------------------------------------------------------------------------------
/deep_sort/deep/model.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | 
  6 | class BasicBlock(nn.Module):
  7 |     def __init__(self, c_in, c_out, is_downsample=False):
  8 |         super(BasicBlock, self).__init__()
  9 |         self.is_downsample = is_downsample
 10 |         if is_downsample:
 11 |             self.conv1 = nn.Conv2d(
 12 |                 c_in, c_out, 3, stride=2, padding=1, bias=False)
 13 |         else:
 14 |             self.conv1 = nn.Conv2d(
 15 |                 c_in, c_out, 3, stride=1, padding=1, bias=False)
 16 |         self.bn1 = nn.BatchNorm2d(c_out)
 17 |         self.relu = nn.ReLU(True)
 18 |         self.conv2 = nn.Conv2d(c_out, c_out, 3, stride=1,
 19 |                                padding=1, bias=False)
 20 |         self.bn2 = nn.BatchNorm2d(c_out)
 21 |         if is_downsample:
 22 |             self.downsample = nn.Sequential(
 23 |                 nn.Conv2d(c_in, c_out, 1, stride=2, bias=False),
 24 |                 nn.BatchNorm2d(c_out)
 25 |             )
 26 |         elif c_in != c_out:
 27 |             self.downsample = nn.Sequential(
 28 |                 nn.Conv2d(c_in, c_out, 1, stride=1, bias=False),
 29 |                 nn.BatchNorm2d(c_out)
 30 |             )
 31 |             self.is_downsample = True
 32 | 
 33 |     def forward(self, x):
 34 |         y = self.conv1(x)
 35 |         y = self.bn1(y)
 36 |         y = self.relu(y)
 37 |         y = self.conv2(y)
 38 |         y = self.bn2(y)
 39 |         if self.is_downsample:
 40 |             x = self.downsample(x)
 41 |         return F.relu(x.add(y), True)
 42 | 
 43 | 
 44 | def make_layers(c_in, c_out, repeat_times, is_downsample=False):
 45 |     blocks = []
 46 |     for i in range(repeat_times):
 47 |         if i == 0:
 48 |             blocks += [BasicBlock(c_in, c_out, is_downsample=is_downsample), ]
 49 |         else:
 50 |             blocks += [BasicBlock(c_out, c_out), ]
 51 |     return nn.Sequential(*blocks)
 52 | 
 53 | 
 54 | class Net(nn.Module):
 55 |     def __init__(self, num_classes=751, reid=False):
 56 |         super(Net, self).__init__()
 57 |         # 3 128 64
 58 |         self.conv = nn.Sequential(
 59 |             nn.Conv2d(3, 64, 3, stride=1, padding=1),
 60 |             nn.BatchNorm2d(64),
 61 |             nn.ReLU(inplace=True),
 62 |             # nn.Conv2d(32,32,3,stride=1,padding=1),
 63 |             # nn.BatchNorm2d(32),
 64 |             # nn.ReLU(inplace=True),
 65 |             nn.MaxPool2d(3, 2, padding=1),
 66 |         )
 67 |         # 32 64 32
 68 |         self.layer1 = make_layers(64, 64, 2, False)
 69 |         # 32 64 32
 70 |         self.layer2 = make_layers(64, 128, 2, True)
 71 |         # 64 32 16
 72 |         self.layer3 = make_layers(128, 256, 2, True)
 73 |         # 128 16 8
 74 |         self.layer4 = make_layers(256, 512, 2, True)
 75 |         # 256 8 4
 76 |         self.avgpool = nn.AvgPool2d((8, 4), 1)
 77 |         # 256 1 1
 78 |         self.reid = reid
 79 |         self.classifier = nn.Sequential(
 80 |             nn.Linear(512, 256),
 81 |             nn.BatchNorm1d(256),
 82 |             nn.ReLU(inplace=True),
 83 |             nn.Dropout(),
 84 |             nn.Linear(256, num_classes),
 85 |         )
 86 | 
 87 |     def forward(self, x):
 88 |         x = self.conv(x)
 89 |         x = self.layer1(x)
 90 |         x = self.layer2(x)
 91 |         x = self.layer3(x)
 92 |         x = self.layer4(x)
 93 |         x = self.avgpool(x)
 94 |         x = x.view(x.size(0), -1)
 95 |         # B x 128
 96 |         if self.reid:
 97 |             x = x.div(x.norm(p=2, dim=1, keepdim=True))
 98 |             return x
 99 |         # classifier
100 |         x = self.classifier(x)
101 |         return x
102 | 
103 | 
104 | if __name__ == '__main__':
105 |     net = Net()
106 |     x = torch.randn(4, 3, 128, 64)
107 |     y = net(x)
108 |     import ipdb
109 |     ipdb.set_trace()
110 | 


--------------------------------------------------------------------------------
/deep_sort/deep/original_model.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | 
  6 | class BasicBlock(nn.Module):
  7 |     def __init__(self, c_in, c_out, is_downsample=False):
  8 |         super(BasicBlock, self).__init__()
  9 |         self.is_downsample = is_downsample
 10 |         if is_downsample:
 11 |             self.conv1 = nn.Conv2d(
 12 |                 c_in, c_out, 3, stride=2, padding=1, bias=False)
 13 |         else:
 14 |             self.conv1 = nn.Conv2d(
 15 |                 c_in, c_out, 3, stride=1, padding=1, bias=False)
 16 |         self.bn1 = nn.BatchNorm2d(c_out)
 17 |         self.relu = nn.ReLU(True)
 18 |         self.conv2 = nn.Conv2d(c_out, c_out, 3, stride=1,
 19 |                                padding=1, bias=False)
 20 |         self.bn2 = nn.BatchNorm2d(c_out)
 21 |         if is_downsample:
 22 |             self.downsample = nn.Sequential(
 23 |                 nn.Conv2d(c_in, c_out, 1, stride=2, bias=False),
 24 |                 nn.BatchNorm2d(c_out)
 25 |             )
 26 |         elif c_in != c_out:
 27 |             self.downsample = nn.Sequential(
 28 |                 nn.Conv2d(c_in, c_out, 1, stride=1, bias=False),
 29 |                 nn.BatchNorm2d(c_out)
 30 |             )
 31 |             self.is_downsample = True
 32 | 
 33 |     def forward(self, x):
 34 |         y = self.conv1(x)
 35 |         y = self.bn1(y)
 36 |         y = self.relu(y)
 37 |         y = self.conv2(y)
 38 |         y = self.bn2(y)
 39 |         if self.is_downsample:
 40 |             x = self.downsample(x)
 41 |         return F.relu(x.add(y), True)
 42 | 
 43 | 
 44 | def make_layers(c_in, c_out, repeat_times, is_downsample=False):
 45 |     blocks = []
 46 |     for i in range(repeat_times):
 47 |         if i == 0:
 48 |             blocks += [BasicBlock(c_in, c_out, is_downsample=is_downsample), ]
 49 |         else:
 50 |             blocks += [BasicBlock(c_out, c_out), ]
 51 |     return nn.Sequential(*blocks)
 52 | 
 53 | 
 54 | class Net(nn.Module):
 55 |     def __init__(self, num_classes=625, reid=False):
 56 |         super(Net, self).__init__()
 57 |         # 3 128 64
 58 |         self.conv = nn.Sequential(
 59 |             nn.Conv2d(3, 32, 3, stride=1, padding=1),
 60 |             nn.BatchNorm2d(32),
 61 |             nn.ELU(inplace=True),
 62 |             nn.Conv2d(32, 32, 3, stride=1, padding=1),
 63 |             nn.BatchNorm2d(32),
 64 |             nn.ELU(inplace=True),
 65 |             nn.MaxPool2d(3, 2, padding=1),
 66 |         )
 67 |         # 32 64 32
 68 |         self.layer1 = make_layers(32, 32, 2, False)
 69 |         # 32 64 32
 70 |         self.layer2 = make_layers(32, 64, 2, True)
 71 |         # 64 32 16
 72 |         self.layer3 = make_layers(64, 128, 2, True)
 73 |         # 128 16 8
 74 |         self.dense = nn.Sequential(
 75 |             nn.Dropout(p=0.6),
 76 |             nn.Linear(128*16*8, 128),
 77 |             nn.BatchNorm1d(128),
 78 |             nn.ELU(inplace=True)
 79 |         )
 80 |         # 256 1 1
 81 |         self.reid = reid
 82 |         self.batch_norm = nn.BatchNorm1d(128)
 83 |         self.classifier = nn.Sequential(
 84 |             nn.Linear(128, num_classes),
 85 |         )
 86 | 
 87 |     def forward(self, x):
 88 |         x = self.conv(x)
 89 |         x = self.layer1(x)
 90 |         x = self.layer2(x)
 91 |         x = self.layer3(x)
 92 | 
 93 |         x = x.view(x.size(0), -1)
 94 |         if self.reid:
 95 |             x = self.dense[0](x)
 96 |             x = self.dense[1](x)
 97 |             x = x.div(x.norm(p=2, dim=1, keepdim=True))
 98 |             return x
 99 |         x = self.dense(x)
100 |         # B x 128
101 |         # classifier
102 |         x = self.classifier(x)
103 |         return x
104 | 
105 | 
106 | if __name__ == '__main__':
107 |     net = Net(reid=True)
108 |     x = torch.randn(4, 3, 128, 64)
109 |     y = net(x)
110 |     import ipdb
111 |     ipdb.set_trace()
112 | 


--------------------------------------------------------------------------------
/deep_sort/deep/test.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.backends.cudnn as cudnn
 3 | import torchvision
 4 | 
 5 | import argparse
 6 | import os
 7 | 
 8 | from model import Net
 9 | 
10 | parser = argparse.ArgumentParser(description="Train on market1501")
11 | parser.add_argument("--data-dir", default='data', type=str)
12 | parser.add_argument("--no-cuda", action="store_true")
13 | parser.add_argument("--gpu-id", default=0, type=int)
14 | args = parser.parse_args()
15 | 
16 | # device
17 | device = "cuda:{}".format(
18 |     args.gpu_id) if torch.cuda.is_available() and not args.no_cuda else "cpu"
19 | if torch.cuda.is_available() and not args.no_cuda:
20 |     cudnn.benchmark = True
21 | 
22 | # data loader
23 | root = args.data_dir
24 | query_dir = os.path.join(root, "query")
25 | gallery_dir = os.path.join(root, "gallery")
26 | transform = torchvision.transforms.Compose([
27 |     torchvision.transforms.Resize((128, 64)),
28 |     torchvision.transforms.ToTensor(),
29 |     torchvision.transforms.Normalize(
30 |         [0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
31 | ])
32 | queryloader = torch.utils.data.DataLoader(
33 |     torchvision.datasets.ImageFolder(query_dir, transform=transform),
34 |     batch_size=64, shuffle=False
35 | )
36 | galleryloader = torch.utils.data.DataLoader(
37 |     torchvision.datasets.ImageFolder(gallery_dir, transform=transform),
38 |     batch_size=64, shuffle=False
39 | )
40 | 
41 | # net definition
42 | net = Net(reid=True)
43 | assert os.path.isfile(
44 |     "./checkpoint/ckpt.t7"), "Error: no checkpoint file found!"
45 | print('Loading from checkpoint/ckpt.t7')
46 | checkpoint = torch.load("./checkpoint/ckpt.t7")
47 | net_dict = checkpoint['net_dict']
48 | net.load_state_dict(net_dict, strict=False)
49 | net.eval()
50 | net.to(device)
51 | 
52 | # compute features
53 | query_features = torch.tensor([]).float()
54 | query_labels = torch.tensor([]).long()
55 | gallery_features = torch.tensor([]).float()
56 | gallery_labels = torch.tensor([]).long()
57 | 
58 | with torch.no_grad():
59 |     for idx, (inputs, labels) in enumerate(queryloader):
60 |         inputs = inputs.to(device)
61 |         features = net(inputs).cpu()
62 |         query_features = torch.cat((query_features, features), dim=0)
63 |         query_labels = torch.cat((query_labels, labels))
64 | 
65 |     for idx, (inputs, labels) in enumerate(galleryloader):
66 |         inputs = inputs.to(device)
67 |         features = net(inputs).cpu()
68 |         gallery_features = torch.cat((gallery_features, features), dim=0)
69 |         gallery_labels = torch.cat((gallery_labels, labels))
70 | 
71 | gallery_labels -= 2
72 | 
73 | # save features
74 | features = {
75 |     "qf": query_features,
76 |     "ql": query_labels,
77 |     "gf": gallery_features,
78 |     "gl": gallery_labels
79 | }
80 | torch.save(features, "features.pth")
81 | 


--------------------------------------------------------------------------------
/deep_sort/deep/train.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import time
  4 | 
  5 | import numpy as np
  6 | import matplotlib.pyplot as plt
  7 | import torch
  8 | import torch.backends.cudnn as cudnn
  9 | import torchvision
 10 | 
 11 | from model import Net
 12 | 
 13 | parser = argparse.ArgumentParser(description="Train on market1501")
 14 | parser.add_argument("--data-dir", default='data', type=str)
 15 | parser.add_argument("--no-cuda", action="store_true")
 16 | parser.add_argument("--gpu-id", default=0, type=int)
 17 | parser.add_argument("--lr", default=0.1, type=float)
 18 | parser.add_argument("--interval", '-i', default=20, type=int)
 19 | parser.add_argument('--resume', '-r', action='store_true')
 20 | args = parser.parse_args()
 21 | 
 22 | # device
 23 | device = "cuda:{}".format(
 24 |     args.gpu_id) if torch.cuda.is_available() and not args.no_cuda else "cpu"
 25 | if torch.cuda.is_available() and not args.no_cuda:
 26 |     cudnn.benchmark = True
 27 | 
 28 | # data loading
 29 | root = args.data_dir
 30 | train_dir = os.path.join(root, "train")
 31 | test_dir = os.path.join(root, "test")
 32 | transform_train = torchvision.transforms.Compose([
 33 |     torchvision.transforms.RandomCrop((128, 64), padding=4),
 34 |     torchvision.transforms.RandomHorizontalFlip(),
 35 |     torchvision.transforms.ToTensor(),
 36 |     torchvision.transforms.Normalize(
 37 |         [0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
 38 | ])
 39 | transform_test = torchvision.transforms.Compose([
 40 |     torchvision.transforms.Resize((128, 64)),
 41 |     torchvision.transforms.ToTensor(),
 42 |     torchvision.transforms.Normalize(
 43 |         [0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
 44 | ])
 45 | trainloader = torch.utils.data.DataLoader(
 46 |     torchvision.datasets.ImageFolder(train_dir, transform=transform_train),
 47 |     batch_size=64, shuffle=True
 48 | )
 49 | testloader = torch.utils.data.DataLoader(
 50 |     torchvision.datasets.ImageFolder(test_dir, transform=transform_test),
 51 |     batch_size=64, shuffle=True
 52 | )
 53 | num_classes = max(len(trainloader.dataset.classes),
 54 |                   len(testloader.dataset.classes))
 55 | 
 56 | # net definition
 57 | start_epoch = 0
 58 | net = Net(num_classes=num_classes)
 59 | if args.resume:
 60 |     assert os.path.isfile(
 61 |         "./checkpoint/ckpt.t7"), "Error: no checkpoint file found!"
 62 |     print('Loading from checkpoint/ckpt.t7')
 63 |     checkpoint = torch.load("./checkpoint/ckpt.t7")
 64 |     # import ipdb; ipdb.set_trace()
 65 |     net_dict = checkpoint['net_dict']
 66 |     net.load_state_dict(net_dict)
 67 |     best_acc = checkpoint['acc']
 68 |     start_epoch = checkpoint['epoch']
 69 | net.to(device)
 70 | 
 71 | # loss and optimizer
 72 | criterion = torch.nn.CrossEntropyLoss()
 73 | optimizer = torch.optim.SGD(
 74 |     net.parameters(), args.lr, momentum=0.9, weight_decay=5e-4)
 75 | best_acc = 0.
 76 | 
 77 | # train function for each epoch
 78 | 
 79 | 
 80 | def train(epoch):
 81 |     print("\nEpoch : %d" % (epoch+1))
 82 |     net.train()
 83 |     training_loss = 0.
 84 |     train_loss = 0.
 85 |     correct = 0
 86 |     total = 0
 87 |     interval = args.interval
 88 |     start = time.time()
 89 |     for idx, (inputs, labels) in enumerate(trainloader):
 90 |         # forward
 91 |         inputs, labels = inputs.to(device), labels.to(device)
 92 |         outputs = net(inputs)
 93 |         loss = criterion(outputs, labels)
 94 | 
 95 |         # backward
 96 |         optimizer.zero_grad()
 97 |         loss.backward()
 98 |         optimizer.step()
 99 | 
100 |         # accumurating
101 |         training_loss += loss.item()
102 |         train_loss += loss.item()
103 |         correct += outputs.max(dim=1)[1].eq(labels).sum().item()
104 |         total += labels.size(0)
105 | 
106 |         # print
107 |         if (idx+1) % interval == 0:
108 |             end = time.time()
109 |             print("[progress:{:.1f}%]time:{:.2f}s Loss:{:.5f} Correct:{}/{} Acc:{:.3f}%".format(
110 |                 100.*(idx+1)/len(trainloader), end-start, training_loss /
111 |                 interval, correct, total, 100.*correct/total
112 |             ))
113 |             training_loss = 0.
114 |             start = time.time()
115 | 
116 |     return train_loss/len(trainloader), 1. - correct/total
117 | 
118 | 
119 | def test(epoch):
120 |     global best_acc
121 |     net.eval()
122 |     test_loss = 0.
123 |     correct = 0
124 |     total = 0
125 |     start = time.time()
126 |     with torch.no_grad():
127 |         for idx, (inputs, labels) in enumerate(testloader):
128 |             inputs, labels = inputs.to(device), labels.to(device)
129 |             outputs = net(inputs)
130 |             loss = criterion(outputs, labels)
131 | 
132 |             test_loss += loss.item()
133 |             correct += outputs.max(dim=1)[1].eq(labels).sum().item()
134 |             total += labels.size(0)
135 | 
136 |         print("Testing ...")
137 |         end = time.time()
138 |         print("[progress:{:.1f}%]time:{:.2f}s Loss:{:.5f} Correct:{}/{} Acc:{:.3f}%".format(
139 |             100.*(idx+1)/len(testloader), end-start, test_loss /
140 |             len(testloader), correct, total, 100.*correct/total
141 |         ))
142 | 
143 |     # saving checkpoint
144 |     acc = 100.*correct/total
145 |     if acc > best_acc:
146 |         best_acc = acc
147 |         print("Saving parameters to checkpoint/ckpt.t7")
148 |         checkpoint = {
149 |             'net_dict': net.state_dict(),
150 |             'acc': acc,
151 |             'epoch': epoch,
152 |         }
153 |         if not os.path.isdir('checkpoint'):
154 |             os.mkdir('checkpoint')
155 |         torch.save(checkpoint, './checkpoint/ckpt.t7')
156 | 
157 |     return test_loss/len(testloader), 1. - correct/total
158 | 
159 | 
160 | # plot figure
161 | x_epoch = []
162 | record = {'train_loss': [], 'train_err': [], 'test_loss': [], 'test_err': []}
163 | fig = plt.figure()
164 | ax0 = fig.add_subplot(121, title="loss")
165 | ax1 = fig.add_subplot(122, title="top1err")
166 | 
167 | 
168 | def draw_curve(epoch, train_loss, train_err, test_loss, test_err):
169 |     global record
170 |     record['train_loss'].append(train_loss)
171 |     record['train_err'].append(train_err)
172 |     record['test_loss'].append(test_loss)
173 |     record['test_err'].append(test_err)
174 | 
175 |     x_epoch.append(epoch)
176 |     ax0.plot(x_epoch, record['train_loss'], 'bo-', label='train')
177 |     ax0.plot(x_epoch, record['test_loss'], 'ro-', label='val')
178 |     ax1.plot(x_epoch, record['train_err'], 'bo-', label='train')
179 |     ax1.plot(x_epoch, record['test_err'], 'ro-', label='val')
180 |     if epoch == 0:
181 |         ax0.legend()
182 |         ax1.legend()
183 |     fig.savefig("train.jpg")
184 | 
185 | # lr decay
186 | 
187 | 
188 | def lr_decay():
189 |     global optimizer
190 |     for params in optimizer.param_groups:
191 |         params['lr'] *= 0.1
192 |         lr = params['lr']
193 |         print("Learning rate adjusted to {}".format(lr))
194 | 
195 | 
196 | def main():
197 |     for epoch in range(start_epoch, start_epoch+40):
198 |         train_loss, train_err = train(epoch)
199 |         test_loss, test_err = test(epoch)
200 |         draw_curve(epoch, train_loss, train_err, test_loss, test_err)
201 |         if (epoch+1) % 20 == 0:
202 |             lr_decay()
203 | 
204 | 
205 | if __name__ == '__main__':
206 |     main()
207 | 


--------------------------------------------------------------------------------
/deep_sort/deep_sort.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | 
  4 | from .deep.feature_extractor import Extractor
  5 | from .sort.nn_matching import NearestNeighborDistanceMetric
  6 | from .sort.detection import Detection
  7 | from .sort.tracker import Tracker
  8 | 
  9 | 
 10 | __all__ = ['DeepSort']
 11 | 
 12 | 
 13 | class DeepSort(object):
 14 |     def __init__(self, model_path, max_dist=0.2, min_confidence=0.3, nms_max_overlap=1.0, max_iou_distance=0.7, max_age=70, n_init=3, nn_budget=100, use_cuda=True):
 15 |         self.min_confidence = min_confidence
 16 |         self.nms_max_overlap = nms_max_overlap
 17 | 
 18 |         self.extractor = Extractor(model_path, use_cuda=use_cuda)
 19 | 
 20 |         max_cosine_distance = max_dist
 21 |         metric = NearestNeighborDistanceMetric(
 22 |             "cosine", max_cosine_distance, nn_budget)
 23 |         self.tracker = Tracker(
 24 |             metric, max_iou_distance=max_iou_distance, max_age=max_age, n_init=n_init)
 25 | 
 26 |     def update(self, bbox_xywh, confidences, ori_img):
 27 |         self.height, self.width = ori_img.shape[:2]
 28 |         # generate detections
 29 |         features = self._get_features(bbox_xywh, ori_img)
 30 |         bbox_tlwh = self._xywh_to_tlwh(bbox_xywh)
 31 |         detections = [Detection(bbox_tlwh[i], conf, features[i]) for i, conf in enumerate(
 32 |             confidences) if conf > self.min_confidence]
 33 | 
 34 |         # run on non-maximum supression
 35 |         boxes = np.array([d.tlwh for d in detections])
 36 |         scores = np.array([d.confidence for d in detections])
 37 | 
 38 |         # update tracker
 39 |         self.tracker.predict()
 40 |         self.tracker.update(detections)
 41 | 
 42 |         # output bbox identities
 43 |         outputs = []
 44 |         for track in self.tracker.tracks:
 45 |             if not track.is_confirmed() or track.time_since_update > 1:
 46 |                 continue
 47 |             box = track.to_tlwh()
 48 |             x1, y1, x2, y2 = self._tlwh_to_xyxy(box)
 49 |             track_id = track.track_id
 50 |             #outputs.append(np.array([x1, y1, x2, y2, track_id], dtype=np.int))
 51 |             outputs.append(np.array([x1, y1, x2, y2, track_id], dtype=int))
 52 | 
 53 |         if len(outputs) > 0:
 54 |             outputs = np.stack(outputs, axis=0)
 55 |         return outputs
 56 | 
 57 |     """
 58 |     TODO:
 59 |         Convert bbox from xc_yc_w_h to xtl_ytl_w_h
 60 |     Thanks JieChen91@github.com for reporting this bug!
 61 |     """
 62 |     @staticmethod
 63 |     def _xywh_to_tlwh(bbox_xywh):
 64 |         if isinstance(bbox_xywh, np.ndarray):
 65 |             bbox_tlwh = bbox_xywh.copy()
 66 |         elif isinstance(bbox_xywh, torch.Tensor):
 67 |             bbox_tlwh = bbox_xywh.clone()
 68 |         bbox_tlwh[:, 0] = bbox_xywh[:, 0] - bbox_xywh[:, 2] / 2.
 69 |         bbox_tlwh[:, 1] = bbox_xywh[:, 1] - bbox_xywh[:, 3] / 2.
 70 |         return bbox_tlwh
 71 | 
 72 |     def _xywh_to_xyxy(self, bbox_xywh):
 73 |         x, y, w, h = bbox_xywh
 74 |         x1 = max(int(x - w / 2), 0)
 75 |         x2 = min(int(x + w / 2), self.width - 1)
 76 |         y1 = max(int(y - h / 2), 0)
 77 |         y2 = min(int(y + h / 2), self.height - 1)
 78 |         return x1, y1, x2, y2
 79 | 
 80 |     def _tlwh_to_xyxy(self, bbox_tlwh):
 81 |         """
 82 |         TODO:
 83 |             Convert bbox from xtl_ytl_w_h to xc_yc_w_h
 84 |         Thanks JieChen91@github.com for reporting this bug!
 85 |         """
 86 |         x, y, w, h = bbox_tlwh
 87 |         x1 = max(int(x), 0)
 88 |         x2 = min(int(x+w), self.width - 1)
 89 |         y1 = max(int(y), 0)
 90 |         y2 = min(int(y+h), self.height - 1)
 91 |         return x1, y1, x2, y2
 92 | 
 93 |     def increment_ages(self):
 94 |         self.tracker.increment_ages()
 95 | 
 96 |     def _xyxy_to_tlwh(self, bbox_xyxy):
 97 |         x1, y1, x2, y2 = bbox_xyxy
 98 | 
 99 |         t = x1
100 |         l = y1
101 |         w = int(x2 - x1)
102 |         h = int(y2 - y1)
103 |         return t, l, w, h
104 | 
105 |     def _get_features(self, bbox_xywh, ori_img):
106 |         im_crops = []
107 |         for box in bbox_xywh:
108 |             x1, y1, x2, y2 = self._xywh_to_xyxy(box)
109 |             im = ori_img[y1:y2, x1:x2]
110 |             im_crops.append(im)
111 |         if im_crops:
112 |             features = self.extractor(im_crops)
113 |         else:
114 |             features = np.array([])
115 |         return features
116 | 


--------------------------------------------------------------------------------
/deep_sort/sort/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/sort/__init__.py


--------------------------------------------------------------------------------
/deep_sort/sort/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/sort/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/deep_sort/sort/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/sort/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/deep_sort/sort/__pycache__/detection.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/sort/__pycache__/detection.cpython-310.pyc


--------------------------------------------------------------------------------
/deep_sort/sort/__pycache__/detection.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/sort/__pycache__/detection.cpython-39.pyc


--------------------------------------------------------------------------------
/deep_sort/sort/__pycache__/iou_matching.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/sort/__pycache__/iou_matching.cpython-310.pyc


--------------------------------------------------------------------------------
/deep_sort/sort/__pycache__/iou_matching.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/sort/__pycache__/iou_matching.cpython-39.pyc


--------------------------------------------------------------------------------
/deep_sort/sort/__pycache__/kalman_filter.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/sort/__pycache__/kalman_filter.cpython-310.pyc


--------------------------------------------------------------------------------
/deep_sort/sort/__pycache__/kalman_filter.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/sort/__pycache__/kalman_filter.cpython-39.pyc


--------------------------------------------------------------------------------
/deep_sort/sort/__pycache__/linear_assignment.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/sort/__pycache__/linear_assignment.cpython-310.pyc


--------------------------------------------------------------------------------
/deep_sort/sort/__pycache__/linear_assignment.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/sort/__pycache__/linear_assignment.cpython-39.pyc


--------------------------------------------------------------------------------
/deep_sort/sort/__pycache__/nn_matching.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/sort/__pycache__/nn_matching.cpython-310.pyc


--------------------------------------------------------------------------------
/deep_sort/sort/__pycache__/nn_matching.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/sort/__pycache__/nn_matching.cpython-39.pyc


--------------------------------------------------------------------------------
/deep_sort/sort/__pycache__/track.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/sort/__pycache__/track.cpython-310.pyc


--------------------------------------------------------------------------------
/deep_sort/sort/__pycache__/track.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/sort/__pycache__/track.cpython-39.pyc


--------------------------------------------------------------------------------
/deep_sort/sort/__pycache__/tracker.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/sort/__pycache__/tracker.cpython-310.pyc


--------------------------------------------------------------------------------
/deep_sort/sort/__pycache__/tracker.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/sort/__pycache__/tracker.cpython-39.pyc


--------------------------------------------------------------------------------
/deep_sort/sort/detection.py:
--------------------------------------------------------------------------------
 1 | # vim: expandtab:ts=4:sw=4
 2 | import numpy as np
 3 | 
 4 | 
 5 | class Detection(object):
 6 |     """
 7 |     This class represents a bounding box detection in a single image.
 8 | 
 9 |     Parameters
10 |     ----------
11 |     tlwh : array_like
12 |         Bounding box in format `(x, y, w, h)`.
13 |     confidence : float
14 |         Detector confidence score.
15 |     feature : array_like
16 |         A feature vector that describes the object contained in this image.
17 | 
18 |     Attributes
19 |     ----------
20 |     tlwh : ndarray
21 |         Bounding box in format `(top left x, top left y, width, height)`.
22 |     confidence : ndarray
23 |         Detector confidence score.
24 |     feature : ndarray | NoneType
25 |         A feature vector that describes the object contained in this image.
26 | 
27 |     """
28 |     
29 |     def __init__(self, tlwh, confidence, feature):
30 |         self.tlwh = np.asarray(tlwh, dtype=float)
31 |         self.confidence = float(confidence)
32 |         self.feature = np.asarray(feature, dtype=np.float32)    
33 |     
34 | 
35 |     # def __init__(self, tlwh, confidence, feature):
36 |         # self.tlwh = np.asarray(tlwh, dtype=np.float)
37 |         # self.confidence = float(confidence)
38 |         # self.feature = np.asarray(feature, dtype=np.float32)
39 | 
40 |     def to_tlbr(self):
41 |         """Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
42 |         `(top left, bottom right)`.
43 |         """
44 |         ret = self.tlwh.copy()
45 |         ret[2:] += ret[:2]
46 |         return ret
47 | 
48 |     def to_xyah(self):
49 |         """Convert bounding box to format `(center x, center y, aspect ratio,
50 |         height)`, where the aspect ratio is `width / height`.
51 |         """
52 |         ret = self.tlwh.copy()
53 |         ret[:2] += ret[2:] / 2
54 |         ret[2] /= ret[3]
55 |         return ret
56 | 


--------------------------------------------------------------------------------
/deep_sort/sort/detection.py.bak:
--------------------------------------------------------------------------------
 1 | # vim: expandtab:ts=4:sw=4
 2 | import numpy as np
 3 | 
 4 | 
 5 | class Detection(object):
 6 |     """
 7 |     This class represents a bounding box detection in a single image.
 8 | 
 9 |     Parameters
10 |     ----------
11 |     tlwh : array_like
12 |         Bounding box in format `(x, y, w, h)`.
13 |     confidence : float
14 |         Detector confidence score.
15 |     feature : array_like
16 |         A feature vector that describes the object contained in this image.
17 | 
18 |     Attributes
19 |     ----------
20 |     tlwh : ndarray
21 |         Bounding box in format `(top left x, top left y, width, height)`.
22 |     confidence : ndarray
23 |         Detector confidence score.
24 |     feature : ndarray | NoneType
25 |         A feature vector that describes the object contained in this image.
26 | 
27 |     """
28 | 
29 |     def __init__(self, tlwh, confidence, feature):
30 |         self.tlwh = np.asarray(tlwh, dtype=np.float)
31 |         self.confidence = float(confidence)
32 |         self.feature = np.asarray(feature, dtype=np.float32)
33 | 
34 |     def to_tlbr(self):
35 |         """Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
36 |         `(top left, bottom right)`.
37 |         """
38 |         ret = self.tlwh.copy()
39 |         ret[2:] += ret[:2]
40 |         return ret
41 | 
42 |     def to_xyah(self):
43 |         """Convert bounding box to format `(center x, center y, aspect ratio,
44 |         height)`, where the aspect ratio is `width / height`.
45 |         """
46 |         ret = self.tlwh.copy()
47 |         ret[:2] += ret[2:] / 2
48 |         ret[2] /= ret[3]
49 |         return ret
50 | 


--------------------------------------------------------------------------------
/deep_sort/sort/iou_matching.py:
--------------------------------------------------------------------------------
 1 | # vim: expandtab:ts=4:sw=4
 2 | from __future__ import absolute_import
 3 | import numpy as np
 4 | from . import linear_assignment
 5 | 
 6 | 
 7 | def iou(bbox, candidates):
 8 |     """Computer intersection over union.
 9 | 
10 |     Parameters
11 |     ----------
12 |     bbox : ndarray
13 |         A bounding box in format `(top left x, top left y, width, height)`.
14 |     candidates : ndarray
15 |         A matrix of candidate bounding boxes (one per row) in the same format
16 |         as `bbox`.
17 | 
18 |     Returns
19 |     -------
20 |     ndarray
21 |         The intersection over union in [0, 1] between the `bbox` and each
22 |         candidate. A higher score means a larger fraction of the `bbox` is
23 |         occluded by the candidate.
24 | 
25 |     """
26 |     bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:]
27 |     candidates_tl = candidates[:, :2]
28 |     candidates_br = candidates[:, :2] + candidates[:, 2:]
29 | 
30 |     tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis],
31 |                np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]]
32 |     br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis],
33 |                np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]]
34 |     wh = np.maximum(0., br - tl)
35 | 
36 |     area_intersection = wh.prod(axis=1)
37 |     area_bbox = bbox[2:].prod()
38 |     area_candidates = candidates[:, 2:].prod(axis=1)
39 |     return area_intersection / (area_bbox + area_candidates - area_intersection)
40 | 
41 | 
42 | def iou_cost(tracks, detections, track_indices=None,
43 |              detection_indices=None):
44 |     """An intersection over union distance metric.
45 | 
46 |     Parameters
47 |     ----------
48 |     tracks : List[deep_sort.track.Track]
49 |         A list of tracks.
50 |     detections : List[deep_sort.detection.Detection]
51 |         A list of detections.
52 |     track_indices : Optional[List[int]]
53 |         A list of indices to tracks that should be matched. Defaults to
54 |         all `tracks`.
55 |     detection_indices : Optional[List[int]]
56 |         A list of indices to detections that should be matched. Defaults
57 |         to all `detections`.
58 | 
59 |     Returns
60 |     -------
61 |     ndarray
62 |         Returns a cost matrix of shape
63 |         len(track_indices), len(detection_indices) where entry (i, j) is
64 |         `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`.
65 | 
66 |     """
67 |     if track_indices is None:
68 |         track_indices = np.arange(len(tracks))
69 |     if detection_indices is None:
70 |         detection_indices = np.arange(len(detections))
71 | 
72 |     cost_matrix = np.zeros((len(track_indices), len(detection_indices)))
73 |     for row, track_idx in enumerate(track_indices):
74 |         if tracks[track_idx].time_since_update > 1:
75 |             cost_matrix[row, :] = linear_assignment.INFTY_COST
76 |             continue
77 | 
78 |         bbox = tracks[track_idx].to_tlwh()
79 |         candidates = np.asarray(
80 |             [detections[i].tlwh for i in detection_indices])
81 |         cost_matrix[row, :] = 1. - iou(bbox, candidates)
82 |     return cost_matrix
83 | 


--------------------------------------------------------------------------------
/deep_sort/sort/kalman_filter.py:
--------------------------------------------------------------------------------
  1 | # vim: expandtab:ts=4:sw=4
  2 | import numpy as np
  3 | import scipy.linalg
  4 | 
  5 | 
  6 | """
  7 | Table for the 0.95 quantile of the chi-square distribution with N degrees of
  8 | freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv
  9 | function and used as Mahalanobis gating threshold.
 10 | """
 11 | chi2inv95 = {
 12 |     1: 3.8415,
 13 |     2: 5.9915,
 14 |     3: 7.8147,
 15 |     4: 9.4877,
 16 |     5: 11.070,
 17 |     6: 12.592,
 18 |     7: 14.067,
 19 |     8: 15.507,
 20 |     9: 16.919}
 21 | 
 22 | 
 23 | class KalmanFilter(object):
 24 |     """
 25 |     A simple Kalman filter for tracking bounding boxes in image space.
 26 | 
 27 |     The 8-dimensional state space
 28 | 
 29 |         x, y, a, h, vx, vy, va, vh
 30 | 
 31 |     contains the bounding box center position (x, y), aspect ratio a, height h,
 32 |     and their respective velocities.
 33 | 
 34 |     Object motion follows a constant velocity model. The bounding box location
 35 |     (x, y, a, h) is taken as direct observation of the state space (linear
 36 |     observation model).
 37 | 
 38 |     """
 39 | 
 40 |     def __init__(self):
 41 |         ndim, dt = 4, 1.
 42 | 
 43 |         # Create Kalman filter model matrices.
 44 |         self._motion_mat = np.eye(2 * ndim, 2 * ndim)
 45 |         for i in range(ndim):
 46 |             self._motion_mat[i, ndim + i] = dt
 47 |         self._update_mat = np.eye(ndim, 2 * ndim)
 48 | 
 49 |         # Motion and observation uncertainty are chosen relative to the current
 50 |         # state estimate. These weights control the amount of uncertainty in
 51 |         # the model. This is a bit hacky.
 52 |         self._std_weight_position = 1. / 20
 53 |         self._std_weight_velocity = 1. / 160
 54 | 
 55 |     def initiate(self, measurement):
 56 |         """Create track from unassociated measurement.
 57 | 
 58 |         Parameters
 59 |         ----------
 60 |         measurement : ndarray
 61 |             Bounding box coordinates (x, y, a, h) with center position (x, y),
 62 |             aspect ratio a, and height h.
 63 | 
 64 |         Returns
 65 |         -------
 66 |         (ndarray, ndarray)
 67 |             Returns the mean vector (8 dimensional) and covariance matrix (8x8
 68 |             dimensional) of the new track. Unobserved velocities are initialized
 69 |             to 0 mean.
 70 | 
 71 |         """
 72 |         mean_pos = measurement
 73 |         mean_vel = np.zeros_like(mean_pos)
 74 |         mean = np.r_[mean_pos, mean_vel]
 75 | 
 76 |         std = [
 77 |             2 * self._std_weight_position * measurement[3],
 78 |             2 * self._std_weight_position * measurement[3],
 79 |             1e-2,
 80 |             2 * self._std_weight_position * measurement[3],
 81 |             10 * self._std_weight_velocity * measurement[3],
 82 |             10 * self._std_weight_velocity * measurement[3],
 83 |             1e-5,
 84 |             10 * self._std_weight_velocity * measurement[3]]
 85 |         covariance = np.diag(np.square(std))
 86 |         return mean, covariance
 87 | 
 88 |     def predict(self, mean, covariance):
 89 |         """Run Kalman filter prediction step.
 90 | 
 91 |         Parameters
 92 |         ----------
 93 |         mean : ndarray
 94 |             The 8 dimensional mean vector of the object state at the previous
 95 |             time step.
 96 |         covariance : ndarray
 97 |             The 8x8 dimensional covariance matrix of the object state at the
 98 |             previous time step.
 99 | 
100 |         Returns
101 |         -------
102 |         (ndarray, ndarray)
103 |             Returns the mean vector and covariance matrix of the predicted
104 |             state. Unobserved velocities are initialized to 0 mean.
105 | 
106 |         """
107 |         std_pos = [
108 |             self._std_weight_position * mean[3],
109 |             self._std_weight_position * mean[3],
110 |             1e-2,
111 |             self._std_weight_position * mean[3]]
112 |         std_vel = [
113 |             self._std_weight_velocity * mean[3],
114 |             self._std_weight_velocity * mean[3],
115 |             1e-5,
116 |             self._std_weight_velocity * mean[3]]
117 |         motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
118 | 
119 |         mean = np.dot(self._motion_mat, mean)
120 |         covariance = np.linalg.multi_dot((
121 |             self._motion_mat, covariance, self._motion_mat.T)) + motion_cov
122 | 
123 |         return mean, covariance
124 | 
125 |     def project(self, mean, covariance):
126 |         """Project state distribution to measurement space.
127 | 
128 |         Parameters
129 |         ----------
130 |         mean : ndarray
131 |             The state's mean vector (8 dimensional array).
132 |         covariance : ndarray
133 |             The state's covariance matrix (8x8 dimensional).
134 | 
135 |         Returns
136 |         -------
137 |         (ndarray, ndarray)
138 |             Returns the projected mean and covariance matrix of the given state
139 |             estimate.
140 | 
141 |         """
142 |         std = [
143 |             self._std_weight_position * mean[3],
144 |             self._std_weight_position * mean[3],
145 |             1e-1,
146 |             self._std_weight_position * mean[3]]
147 |         innovation_cov = np.diag(np.square(std))
148 | 
149 |         mean = np.dot(self._update_mat, mean)
150 |         covariance = np.linalg.multi_dot((
151 |             self._update_mat, covariance, self._update_mat.T))
152 |         return mean, covariance + innovation_cov
153 | 
154 |     def update(self, mean, covariance, measurement):
155 |         """Run Kalman filter correction step.
156 | 
157 |         Parameters
158 |         ----------
159 |         mean : ndarray
160 |             The predicted state's mean vector (8 dimensional).
161 |         covariance : ndarray
162 |             The state's covariance matrix (8x8 dimensional).
163 |         measurement : ndarray
164 |             The 4 dimensional measurement vector (x, y, a, h), where (x, y)
165 |             is the center position, a the aspect ratio, and h the height of the
166 |             bounding box.
167 | 
168 |         Returns
169 |         -------
170 |         (ndarray, ndarray)
171 |             Returns the measurement-corrected state distribution.
172 | 
173 |         """
174 |         projected_mean, projected_cov = self.project(mean, covariance)
175 | 
176 |         chol_factor, lower = scipy.linalg.cho_factor(
177 |             projected_cov, lower=True, check_finite=False)
178 |         kalman_gain = scipy.linalg.cho_solve(
179 |             (chol_factor, lower), np.dot(covariance, self._update_mat.T).T,
180 |             check_finite=False).T
181 |         innovation = measurement - projected_mean
182 | 
183 |         new_mean = mean + np.dot(innovation, kalman_gain.T)
184 |         new_covariance = covariance - np.linalg.multi_dot((
185 |             kalman_gain, projected_cov, kalman_gain.T))
186 |         return new_mean, new_covariance
187 | 
188 |     def gating_distance(self, mean, covariance, measurements,
189 |                         only_position=False):
190 |         """Compute gating distance between state distribution and measurements.
191 | 
192 |         A suitable distance threshold can be obtained from `chi2inv95`. If
193 |         `only_position` is False, the chi-square distribution has 4 degrees of
194 |         freedom, otherwise 2.
195 | 
196 |         Parameters
197 |         ----------
198 |         mean : ndarray
199 |             Mean vector over the state distribution (8 dimensional).
200 |         covariance : ndarray
201 |             Covariance of the state distribution (8x8 dimensional).
202 |         measurements : ndarray
203 |             An Nx4 dimensional matrix of N measurements, each in
204 |             format (x, y, a, h) where (x, y) is the bounding box center
205 |             position, a the aspect ratio, and h the height.
206 |         only_position : Optional[bool]
207 |             If True, distance computation is done with respect to the bounding
208 |             box center position only.
209 | 
210 |         Returns
211 |         -------
212 |         ndarray
213 |             Returns an array of length N, where the i-th element contains the
214 |             squared Mahalanobis distance between (mean, covariance) and
215 |             `measurements[i]`.
216 | 
217 |         """
218 |         mean, covariance = self.project(mean, covariance)
219 |         if only_position:
220 |             mean, covariance = mean[:2], covariance[:2, :2]
221 |             measurements = measurements[:, :2]
222 | 
223 |         cholesky_factor = np.linalg.cholesky(covariance)
224 |         d = measurements - mean
225 |         z = scipy.linalg.solve_triangular(
226 |             cholesky_factor, d.T, lower=True, check_finite=False,
227 |             overwrite_b=True)
228 |         squared_maha = np.sum(z * z, axis=0)
229 |         return squared_maha
230 | 


--------------------------------------------------------------------------------
/deep_sort/sort/linear_assignment.py:
--------------------------------------------------------------------------------
  1 | # vim: expandtab:ts=4:sw=4
  2 | from __future__ import absolute_import
  3 | import numpy as np
  4 | # from sklearn.utils.linear_assignment_ import linear_assignment
  5 | from scipy.optimize import linear_sum_assignment as linear_assignment
  6 | from . import kalman_filter
  7 | 
  8 | 
  9 | INFTY_COST = 1e+5
 10 | 
 11 | 
 12 | def min_cost_matching(
 13 |         distance_metric, max_distance, tracks, detections, track_indices=None,
 14 |         detection_indices=None):
 15 |     """Solve linear assignment problem.
 16 | 
 17 |     Parameters
 18 |     ----------
 19 |     distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
 20 |         The distance metric is given a list of tracks and detections as well as
 21 |         a list of N track indices and M detection indices. The metric should
 22 |         return the NxM dimensional cost matrix, where element (i, j) is the
 23 |         association cost between the i-th track in the given track indices and
 24 |         the j-th detection in the given detection_indices.
 25 |     max_distance : float
 26 |         Gating threshold. Associations with cost larger than this value are
 27 |         disregarded.
 28 |     tracks : List[track.Track]
 29 |         A list of predicted tracks at the current time step.
 30 |     detections : List[detection.Detection]
 31 |         A list of detections at the current time step.
 32 |     track_indices : List[int]
 33 |         List of track indices that maps rows in `cost_matrix` to tracks in
 34 |         `tracks` (see description above).
 35 |     detection_indices : List[int]
 36 |         List of detection indices that maps columns in `cost_matrix` to
 37 |         detections in `detections` (see description above).
 38 | 
 39 |     Returns
 40 |     -------
 41 |     (List[(int, int)], List[int], List[int])
 42 |         Returns a tuple with the following three entries:
 43 |         * A list of matched track and detection indices.
 44 |         * A list of unmatched track indices.
 45 |         * A list of unmatched detection indices.
 46 | 
 47 |     """
 48 |     if track_indices is None:
 49 |         track_indices = np.arange(len(tracks))
 50 |     if detection_indices is None:
 51 |         detection_indices = np.arange(len(detections))
 52 | 
 53 |     if len(detection_indices) == 0 or len(track_indices) == 0:
 54 |         return [], track_indices, detection_indices  # Nothing to match.
 55 | 
 56 |     cost_matrix = distance_metric(
 57 |         tracks, detections, track_indices, detection_indices)
 58 |     cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5
 59 | 
 60 |     row_indices, col_indices = linear_assignment(cost_matrix)
 61 | 
 62 |     matches, unmatched_tracks, unmatched_detections = [], [], []
 63 |     for col, detection_idx in enumerate(detection_indices):
 64 |         if col not in col_indices:
 65 |             unmatched_detections.append(detection_idx)
 66 |     for row, track_idx in enumerate(track_indices):
 67 |         if row not in row_indices:
 68 |             unmatched_tracks.append(track_idx)
 69 |     for row, col in zip(row_indices, col_indices):
 70 |         track_idx = track_indices[row]
 71 |         detection_idx = detection_indices[col]
 72 |         if cost_matrix[row, col] > max_distance:
 73 |             unmatched_tracks.append(track_idx)
 74 |             unmatched_detections.append(detection_idx)
 75 |         else:
 76 |             matches.append((track_idx, detection_idx))
 77 |     return matches, unmatched_tracks, unmatched_detections
 78 | 
 79 | 
 80 | def matching_cascade(
 81 |         distance_metric, max_distance, cascade_depth, tracks, detections,
 82 |         track_indices=None, detection_indices=None):
 83 |     """Run matching cascade.
 84 | 
 85 |     Parameters
 86 |     ----------
 87 |     distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
 88 |         The distance metric is given a list of tracks and detections as well as
 89 |         a list of N track indices and M detection indices. The metric should
 90 |         return the NxM dimensional cost matrix, where element (i, j) is the
 91 |         association cost between the i-th track in the given track indices and
 92 |         the j-th detection in the given detection indices.
 93 |     max_distance : float
 94 |         Gating threshold. Associations with cost larger than this value are
 95 |         disregarded.
 96 |     cascade_depth: int
 97 |         The cascade depth, should be se to the maximum track age.
 98 |     tracks : List[track.Track]
 99 |         A list of predicted tracks at the current time step.
100 |     detections : List[detection.Detection]
101 |         A list of detections at the current time step.
102 |     track_indices : Optional[List[int]]
103 |         List of track indices that maps rows in `cost_matrix` to tracks in
104 |         `tracks` (see description above). Defaults to all tracks.
105 |     detection_indices : Optional[List[int]]
106 |         List of detection indices that maps columns in `cost_matrix` to
107 |         detections in `detections` (see description above). Defaults to all
108 |         detections.
109 | 
110 |     Returns
111 |     -------
112 |     (List[(int, int)], List[int], List[int])
113 |         Returns a tuple with the following three entries:
114 |         * A list of matched track and detection indices.
115 |         * A list of unmatched track indices.
116 |         * A list of unmatched detection indices.
117 | 
118 |     """
119 |     if track_indices is None:
120 |         track_indices = list(range(len(tracks)))
121 |     if detection_indices is None:
122 |         detection_indices = list(range(len(detections)))
123 | 
124 |     unmatched_detections = detection_indices
125 |     matches = []
126 |     for level in range(cascade_depth):
127 |         if len(unmatched_detections) == 0:  # No detections left
128 |             break
129 | 
130 |         track_indices_l = [
131 |             k for k in track_indices
132 |             if tracks[k].time_since_update == 1 + level
133 |         ]
134 |         if len(track_indices_l) == 0:  # Nothing to match at this level
135 |             continue
136 | 
137 |         matches_l, _, unmatched_detections = \
138 |             min_cost_matching(
139 |                 distance_metric, max_distance, tracks, detections,
140 |                 track_indices_l, unmatched_detections)
141 |         matches += matches_l
142 |     unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches))
143 |     return matches, unmatched_tracks, unmatched_detections
144 | 
145 | 
146 | def gate_cost_matrix(
147 |         kf, cost_matrix, tracks, detections, track_indices, detection_indices,
148 |         gated_cost=INFTY_COST, only_position=False):
149 |     """Invalidate infeasible entries in cost matrix based on the state
150 |     distributions obtained by Kalman filtering.
151 | 
152 |     Parameters
153 |     ----------
154 |     kf : The Kalman filter.
155 |     cost_matrix : ndarray
156 |         The NxM dimensional cost matrix, where N is the number of track indices
157 |         and M is the number of detection indices, such that entry (i, j) is the
158 |         association cost between `tracks[track_indices[i]]` and
159 |         `detections[detection_indices[j]]`.
160 |     tracks : List[track.Track]
161 |         A list of predicted tracks at the current time step.
162 |     detections : List[detection.Detection]
163 |         A list of detections at the current time step.
164 |     track_indices : List[int]
165 |         List of track indices that maps rows in `cost_matrix` to tracks in
166 |         `tracks` (see description above).
167 |     detection_indices : List[int]
168 |         List of detection indices that maps columns in `cost_matrix` to
169 |         detections in `detections` (see description above).
170 |     gated_cost : Optional[float]
171 |         Entries in the cost matrix corresponding to infeasible associations are
172 |         set this value. Defaults to a very large value.
173 |     only_position : Optional[bool]
174 |         If True, only the x, y position of the state distribution is considered
175 |         during gating. Defaults to False.
176 | 
177 |     Returns
178 |     -------
179 |     ndarray
180 |         Returns the modified cost matrix.
181 | 
182 |     """
183 |     gating_dim = 2 if only_position else 4
184 |     gating_threshold = kalman_filter.chi2inv95[gating_dim]
185 |     measurements = np.asarray(
186 |         [detections[i].to_xyah() for i in detection_indices])
187 |     for row, track_idx in enumerate(track_indices):
188 |         track = tracks[track_idx]
189 |         gating_distance = kf.gating_distance(
190 |             track.mean, track.covariance, measurements, only_position)
191 |         cost_matrix[row, gating_distance > gating_threshold] = gated_cost
192 |     return cost_matrix
193 | 


--------------------------------------------------------------------------------
/deep_sort/sort/nn_matching.py:
--------------------------------------------------------------------------------
  1 | # vim: expandtab:ts=4:sw=4
  2 | import numpy as np
  3 | 
  4 | 
  5 | def _pdist(a, b):
  6 |     """Compute pair-wise squared distance between points in `a` and `b`.
  7 | 
  8 |     Parameters
  9 |     ----------
 10 |     a : array_like
 11 |         An NxM matrix of N samples of dimensionality M.
 12 |     b : array_like
 13 |         An LxM matrix of L samples of dimensionality M.
 14 | 
 15 |     Returns
 16 |     -------
 17 |     ndarray
 18 |         Returns a matrix of size len(a), len(b) such that eleement (i, j)
 19 |         contains the squared distance between `a[i]` and `b[j]`.
 20 | 
 21 |     """
 22 |     a, b = np.asarray(a), np.asarray(b)
 23 |     if len(a) == 0 or len(b) == 0:
 24 |         return np.zeros((len(a), len(b)))
 25 |     a2, b2 = np.square(a).sum(axis=1), np.square(b).sum(axis=1)
 26 |     r2 = -2. * np.dot(a, b.T) + a2[:, None] + b2[None, :]
 27 |     r2 = np.clip(r2, 0., float(np.inf))
 28 |     return r2
 29 | 
 30 | 
 31 | def _cosine_distance(a, b, data_is_normalized=False):
 32 |     """Compute pair-wise cosine distance between points in `a` and `b`.
 33 | 
 34 |     Parameters
 35 |     ----------
 36 |     a : array_like
 37 |         An NxM matrix of N samples of dimensionality M.
 38 |     b : array_like
 39 |         An LxM matrix of L samples of dimensionality M.
 40 |     data_is_normalized : Optional[bool]
 41 |         If True, assumes rows in a and b are unit length vectors.
 42 |         Otherwise, a and b are explicitly normalized to lenght 1.
 43 | 
 44 |     Returns
 45 |     -------
 46 |     ndarray
 47 |         Returns a matrix of size len(a), len(b) such that eleement (i, j)
 48 |         contains the squared distance between `a[i]` and `b[j]`.
 49 | 
 50 |     """
 51 |     if not data_is_normalized:
 52 |         a = np.asarray(a) / np.linalg.norm(a, axis=1, keepdims=True)
 53 |         b = np.asarray(b) / np.linalg.norm(b, axis=1, keepdims=True)
 54 |     return 1. - np.dot(a, b.T)
 55 | 
 56 | 
 57 | def _nn_euclidean_distance(x, y):
 58 |     """ Helper function for nearest neighbor distance metric (Euclidean).
 59 | 
 60 |     Parameters
 61 |     ----------
 62 |     x : ndarray
 63 |         A matrix of N row-vectors (sample points).
 64 |     y : ndarray
 65 |         A matrix of M row-vectors (query points).
 66 | 
 67 |     Returns
 68 |     -------
 69 |     ndarray
 70 |         A vector of length M that contains for each entry in `y` the
 71 |         smallest Euclidean distance to a sample in `x`.
 72 | 
 73 |     """
 74 |     distances = _pdist(x, y)
 75 |     return np.maximum(0.0, distances.min(axis=0))
 76 | 
 77 | 
 78 | def _nn_cosine_distance(x, y):
 79 |     """ Helper function for nearest neighbor distance metric (cosine).
 80 | 
 81 |     Parameters
 82 |     ----------
 83 |     x : ndarray
 84 |         A matrix of N row-vectors (sample points).
 85 |     y : ndarray
 86 |         A matrix of M row-vectors (query points).
 87 | 
 88 |     Returns
 89 |     -------
 90 |     ndarray
 91 |         A vector of length M that contains for each entry in `y` the
 92 |         smallest cosine distance to a sample in `x`.
 93 | 
 94 |     """
 95 |     distances = _cosine_distance(x, y)
 96 |     return distances.min(axis=0)
 97 | 
 98 | 
 99 | class NearestNeighborDistanceMetric(object):
100 |     """
101 |     A nearest neighbor distance metric that, for each target, returns
102 |     the closest distance to any sample that has been observed so far.
103 | 
104 |     Parameters
105 |     ----------
106 |     metric : str
107 |         Either "euclidean" or "cosine".
108 |     matching_threshold: float
109 |         The matching threshold. Samples with larger distance are considered an
110 |         invalid match.
111 |     budget : Optional[int]
112 |         If not None, fix samples per class to at most this number. Removes
113 |         the oldest samples when the budget is reached.
114 | 
115 |     Attributes
116 |     ----------
117 |     samples : Dict[int -> List[ndarray]]
118 |         A dictionary that maps from target identities to the list of samples
119 |         that have been observed so far.
120 | 
121 |     """
122 | 
123 |     def __init__(self, metric, matching_threshold, budget=None):
124 | 
125 |         if metric == "euclidean":
126 |             self._metric = _nn_euclidean_distance
127 |         elif metric == "cosine":
128 |             self._metric = _nn_cosine_distance
129 |         else:
130 |             raise ValueError(
131 |                 "Invalid metric; must be either 'euclidean' or 'cosine'")
132 |         self.matching_threshold = matching_threshold
133 |         self.budget = budget
134 |         self.samples = {}
135 | 
136 |     def partial_fit(self, features, targets, active_targets):
137 |         """Update the distance metric with new data.
138 | 
139 |         Parameters
140 |         ----------
141 |         features : ndarray
142 |             An NxM matrix of N features of dimensionality M.
143 |         targets : ndarray
144 |             An integer array of associated target identities.
145 |         active_targets : List[int]
146 |             A list of targets that are currently present in the scene.
147 | 
148 |         """
149 |         for feature, target in zip(features, targets):
150 |             self.samples.setdefault(target, []).append(feature)
151 |             if self.budget is not None:
152 |                 self.samples[target] = self.samples[target][-self.budget:]
153 |         self.samples = {k: self.samples[k] for k in active_targets}
154 | 
155 |     def distance(self, features, targets):
156 |         """Compute distance between features and targets.
157 | 
158 |         Parameters
159 |         ----------
160 |         features : ndarray
161 |             An NxM matrix of N features of dimensionality M.
162 |         targets : List[int]
163 |             A list of targets to match the given `features` against.
164 | 
165 |         Returns
166 |         -------
167 |         ndarray
168 |             Returns a cost matrix of shape len(targets), len(features), where
169 |             element (i, j) contains the closest squared distance between
170 |             `targets[i]` and `features[j]`.
171 | 
172 |         """
173 |         cost_matrix = np.zeros((len(targets), len(features)))
174 |         for i, target in enumerate(targets):
175 |             cost_matrix[i, :] = self._metric(self.samples[target], features)
176 |         return cost_matrix
177 | 


--------------------------------------------------------------------------------
/deep_sort/sort/preprocessing.py:
--------------------------------------------------------------------------------
 1 | # vim: expandtab:ts=4:sw=4
 2 | import numpy as np
 3 | import cv2
 4 | 
 5 | 
 6 | def non_max_suppression(boxes, max_bbox_overlap, scores=None):
 7 |     """Suppress overlapping detections.
 8 | 
 9 |     Original code from [1]_ has been adapted to include confidence score.
10 | 
11 |     .. [1] http://www.pyimagesearch.com/2015/02/16/
12 |            faster-non-maximum-suppression-python/
13 | 
14 |     Examples
15 |     --------
16 | 
17 |         >>> boxes = [d.roi for d in detections]
18 |         >>> scores = [d.confidence for d in detections]
19 |         >>> indices = non_max_suppression(boxes, max_bbox_overlap, scores)
20 |         >>> detections = [detections[i] for i in indices]
21 | 
22 |     Parameters
23 |     ----------
24 |     boxes : ndarray
25 |         Array of ROIs (x, y, width, height).
26 |     max_bbox_overlap : float
27 |         ROIs that overlap more than this values are suppressed.
28 |     scores : Optional[array_like]
29 |         Detector confidence score.
30 | 
31 |     Returns
32 |     -------
33 |     List[int]
34 |         Returns indices of detections that have survived non-maxima suppression.
35 | 
36 |     """
37 |     if len(boxes) == 0:
38 |         return []
39 | 
40 |     boxes = boxes.astype(np.float)
41 |     pick = []
42 | 
43 |     x1 = boxes[:, 0]
44 |     y1 = boxes[:, 1]
45 |     x2 = boxes[:, 2] + boxes[:, 0]
46 |     y2 = boxes[:, 3] + boxes[:, 1]
47 | 
48 |     area = (x2 - x1 + 1) * (y2 - y1 + 1)
49 |     if scores is not None:
50 |         idxs = np.argsort(scores)
51 |     else:
52 |         idxs = np.argsort(y2)
53 | 
54 |     while len(idxs) > 0:
55 |         last = len(idxs) - 1
56 |         i = idxs[last]
57 |         pick.append(i)
58 | 
59 |         xx1 = np.maximum(x1[i], x1[idxs[:last]])
60 |         yy1 = np.maximum(y1[i], y1[idxs[:last]])
61 |         xx2 = np.minimum(x2[i], x2[idxs[:last]])
62 |         yy2 = np.minimum(y2[i], y2[idxs[:last]])
63 | 
64 |         w = np.maximum(0, xx2 - xx1 + 1)
65 |         h = np.maximum(0, yy2 - yy1 + 1)
66 | 
67 |         overlap = (w * h) / area[idxs[:last]]
68 | 
69 |         idxs = np.delete(
70 |             idxs, np.concatenate(
71 |                 ([last], np.where(overlap > max_bbox_overlap)[0])))
72 | 
73 |     return pick
74 | 


--------------------------------------------------------------------------------
/deep_sort/sort/track.py:
--------------------------------------------------------------------------------
  1 | # vim: expandtab:ts=4:sw=4
  2 | 
  3 | 
  4 | class TrackState:
  5 |     """
  6 |     Enumeration type for the single target track state. Newly created tracks are
  7 |     classified as `tentative` until enough evidence has been collected. Then,
  8 |     the track state is changed to `confirmed`. Tracks that are no longer alive
  9 |     are classified as `deleted` to mark them for removal from the set of active
 10 |     tracks.
 11 | 
 12 |     """
 13 | 
 14 |     Tentative = 1
 15 |     Confirmed = 2
 16 |     Deleted = 3
 17 | 
 18 | 
 19 | class Track:
 20 |     """
 21 |     A single target track with state space `(x, y, a, h)` and associated
 22 |     velocities, where `(x, y)` is the center of the bounding box, `a` is the
 23 |     aspect ratio and `h` is the height.
 24 | 
 25 |     Parameters
 26 |     ----------
 27 |     mean : ndarray
 28 |         Mean vector of the initial state distribution.
 29 |     covariance : ndarray
 30 |         Covariance matrix of the initial state distribution.
 31 |     track_id : int
 32 |         A unique track identifier.
 33 |     n_init : int
 34 |         Number of consecutive detections before the track is confirmed. The
 35 |         track state is set to `Deleted` if a miss occurs within the first
 36 |         `n_init` frames.
 37 |     max_age : int
 38 |         The maximum number of consecutive misses before the track state is
 39 |         set to `Deleted`.
 40 |     feature : Optional[ndarray]
 41 |         Feature vector of the detection this track originates from. If not None,
 42 |         this feature is added to the `features` cache.
 43 | 
 44 |     Attributes
 45 |     ----------
 46 |     mean : ndarray
 47 |         Mean vector of the initial state distribution.
 48 |     covariance : ndarray
 49 |         Covariance matrix of the initial state distribution.
 50 |     track_id : int
 51 |         A unique track identifier.
 52 |     hits : int
 53 |         Total number of measurement updates.
 54 |     age : int
 55 |         Total number of frames since first occurance.
 56 |     time_since_update : int
 57 |         Total number of frames since last measurement update.
 58 |     state : TrackState
 59 |         The current track state.
 60 |     features : List[ndarray]
 61 |         A cache of features. On each measurement update, the associated feature
 62 |         vector is added to this list.
 63 | 
 64 |     """
 65 | 
 66 |     def __init__(self, mean, covariance, track_id, n_init, max_age,
 67 |                  feature=None):
 68 |         self.mean = mean
 69 |         self.covariance = covariance
 70 |         self.track_id = track_id
 71 |         self.hits = 1
 72 |         self.age = 1
 73 |         self.time_since_update = 0
 74 | 
 75 |         self.state = TrackState.Tentative
 76 |         self.features = []
 77 |         if feature is not None:
 78 |             self.features.append(feature)
 79 | 
 80 |         self._n_init = n_init
 81 |         self._max_age = max_age
 82 | 
 83 |     def to_tlwh(self):
 84 |         """Get current position in bounding box format `(top left x, top left y,
 85 |         width, height)`.
 86 | 
 87 |         Returns
 88 |         -------
 89 |         ndarray
 90 |             The bounding box.
 91 | 
 92 |         """
 93 |         ret = self.mean[:4].copy()
 94 |         ret[2] *= ret[3]
 95 |         ret[:2] -= ret[2:] / 2
 96 |         return ret
 97 | 
 98 |     def to_tlbr(self):
 99 |         """Get current position in bounding box format `(min x, miny, max x,
100 |         max y)`.
101 | 
102 |         Returns
103 |         -------
104 |         ndarray
105 |             The bounding box.
106 | 
107 |         """
108 |         ret = self.to_tlwh()
109 |         ret[2:] = ret[:2] + ret[2:]
110 |         return ret
111 | 
112 |     def increment_age(self):
113 |         self.age += 1
114 |         self.time_since_update += 1
115 | 
116 |     def predict(self, kf):
117 |         """Propagate the state distribution to the current time step using a
118 |         Kalman filter prediction step.
119 | 
120 |         Parameters
121 |         ----------
122 |         kf : kalman_filter.KalmanFilter
123 |             The Kalman filter.
124 | 
125 |         """
126 |         self.mean, self.covariance = kf.predict(self.mean, self.covariance)
127 |         self.increment_age()
128 | 
129 |     def update(self, kf, detection):
130 |         """Perform Kalman filter measurement update step and update the feature
131 |         cache.
132 | 
133 |         Parameters
134 |         ----------
135 |         kf : kalman_filter.KalmanFilter
136 |             The Kalman filter.
137 |         detection : Detection
138 |             The associated detection.
139 | 
140 |         """
141 |         self.mean, self.covariance = kf.update(
142 |             self.mean, self.covariance, detection.to_xyah())
143 |         self.features.append(detection.feature)
144 | 
145 |         self.hits += 1
146 |         self.time_since_update = 0
147 |         if self.state == TrackState.Tentative and self.hits >= self._n_init:
148 |             self.state = TrackState.Confirmed
149 | 
150 |     def mark_missed(self):
151 |         """Mark this track as missed (no association at the current time step).
152 |         """
153 |         if self.state == TrackState.Tentative:
154 |             self.state = TrackState.Deleted
155 |         elif self.time_since_update > self._max_age:
156 |             self.state = TrackState.Deleted
157 | 
158 |     def is_tentative(self):
159 |         """Returns True if this track is tentative (unconfirmed).
160 |         """
161 |         return self.state == TrackState.Tentative
162 | 
163 |     def is_confirmed(self):
164 |         """Returns True if this track is confirmed."""
165 |         return self.state == TrackState.Confirmed
166 | 
167 |     def is_deleted(self):
168 |         """Returns True if this track is dead and should be deleted."""
169 |         return self.state == TrackState.Deleted
170 | 


--------------------------------------------------------------------------------
/deep_sort/sort/tracker.py:
--------------------------------------------------------------------------------
  1 | # vim: expandtab:ts=4:sw=4
  2 | from __future__ import absolute_import
  3 | import numpy as np
  4 | from . import kalman_filter
  5 | from . import linear_assignment
  6 | from . import iou_matching
  7 | from .track import Track
  8 | 
  9 | 
 10 | class Tracker:
 11 |     """
 12 |     This is the multi-target tracker.
 13 | 
 14 |     Parameters
 15 |     ----------
 16 |     metric : nn_matching.NearestNeighborDistanceMetric
 17 |         A distance metric for measurement-to-track association.
 18 |     max_age : int
 19 |         Maximum number of missed misses before a track is deleted.
 20 |     n_init : int
 21 |         Number of consecutive detections before the track is confirmed. The
 22 |         track state is set to `Deleted` if a miss occurs within the first
 23 |         `n_init` frames.
 24 | 
 25 |     Attributes
 26 |     ----------
 27 |     metric : nn_matching.NearestNeighborDistanceMetric
 28 |         The distance metric used for measurement to track association.
 29 |     max_age : int
 30 |         Maximum number of missed misses before a track is deleted.
 31 |     n_init : int
 32 |         Number of frames that a track remains in initialization phase.
 33 |     kf : kalman_filter.KalmanFilter
 34 |         A Kalman filter to filter target trajectories in image space.
 35 |     tracks : List[Track]
 36 |         The list of active tracks at the current time step.
 37 | 
 38 |     """
 39 | 
 40 |     def __init__(self, metric, max_iou_distance=0.7, max_age=70, n_init=3):
 41 |         self.metric = metric
 42 |         self.max_iou_distance = max_iou_distance
 43 |         self.max_age = max_age
 44 |         self.n_init = n_init
 45 | 
 46 |         self.kf = kalman_filter.KalmanFilter()
 47 |         self.tracks = []
 48 |         self._next_id = 1
 49 | 
 50 |     def predict(self):
 51 |         """Propagate track state distributions one time step forward.
 52 | 
 53 |         This function should be called once every time step, before `update`.
 54 |         """
 55 |         for track in self.tracks:
 56 |             track.predict(self.kf)
 57 | 
 58 |     def increment_ages(self):
 59 |         for track in self.tracks:
 60 |             track.increment_age()
 61 |             track.mark_missed()
 62 | 
 63 |     def update(self, detections):
 64 |         """Perform measurement update and track management.
 65 | 
 66 |         Parameters
 67 |         ----------
 68 |         detections : List[deep_sort.detection.Detection]
 69 |             A list of detections at the current time step.
 70 | 
 71 |         """
 72 |         # Run matching cascade.
 73 |         matches, unmatched_tracks, unmatched_detections = \
 74 |             self._match(detections)
 75 | 
 76 |         # Update track set.
 77 |         for track_idx, detection_idx in matches:
 78 |             self.tracks[track_idx].update(
 79 |                 self.kf, detections[detection_idx])
 80 |         for track_idx in unmatched_tracks:
 81 |             self.tracks[track_idx].mark_missed()
 82 |         for detection_idx in unmatched_detections:
 83 |             self._initiate_track(detections[detection_idx])
 84 |         self.tracks = [t for t in self.tracks if not t.is_deleted()]
 85 | 
 86 |         # Update distance metric.
 87 |         active_targets = [t.track_id for t in self.tracks if t.is_confirmed()]
 88 |         features, targets = [], []
 89 |         for track in self.tracks:
 90 |             if not track.is_confirmed():
 91 |                 continue
 92 |             features += track.features
 93 |             targets += [track.track_id for _ in track.features]
 94 |             track.features = []
 95 |         self.metric.partial_fit(
 96 |             np.asarray(features), np.asarray(targets), active_targets)
 97 | 
 98 |     def _match(self, detections):
 99 | 
100 |         def gated_metric(tracks, dets, track_indices, detection_indices):
101 |             features = np.array([dets[i].feature for i in detection_indices])
102 |             targets = np.array([tracks[i].track_id for i in track_indices])
103 |             cost_matrix = self.metric.distance(features, targets)
104 |             cost_matrix = linear_assignment.gate_cost_matrix(
105 |                 self.kf, cost_matrix, tracks, dets, track_indices,
106 |                 detection_indices)
107 | 
108 |             return cost_matrix
109 | 
110 |         # Split track set into confirmed and unconfirmed tracks.
111 |         confirmed_tracks = [
112 |             i for i, t in enumerate(self.tracks) if t.is_confirmed()]
113 |         unconfirmed_tracks = [
114 |             i for i, t in enumerate(self.tracks) if not t.is_confirmed()]
115 | 
116 |         # Associate confirmed tracks using appearance features.
117 |         matches_a, unmatched_tracks_a, unmatched_detections = \
118 |             linear_assignment.matching_cascade(
119 |                 gated_metric, self.metric.matching_threshold, self.max_age,
120 |                 self.tracks, detections, confirmed_tracks)
121 | 
122 |         # Associate remaining tracks together with unconfirmed tracks using IOU.
123 |         iou_track_candidates = unconfirmed_tracks + [
124 |             k for k in unmatched_tracks_a if
125 |             self.tracks[k].time_since_update == 1]
126 |         unmatched_tracks_a = [
127 |             k for k in unmatched_tracks_a if
128 |             self.tracks[k].time_since_update != 1]
129 |         matches_b, unmatched_tracks_b, unmatched_detections = \
130 |             linear_assignment.min_cost_matching(
131 |                 iou_matching.iou_cost, self.max_iou_distance, self.tracks,
132 |                 detections, iou_track_candidates, unmatched_detections)
133 | 
134 |         matches = matches_a + matches_b
135 |         unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b))
136 |         return matches, unmatched_tracks, unmatched_detections
137 | 
138 |     def _initiate_track(self, detection):
139 |         mean, covariance = self.kf.initiate(detection.to_xyah())
140 |         self.tracks.append(Track(
141 |             mean, covariance, self._next_id, self.n_init, self.max_age,
142 |             detection.feature))
143 |         self._next_id += 1
144 | 


--------------------------------------------------------------------------------
/deep_sort/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/utils/__init__.py


--------------------------------------------------------------------------------
/deep_sort/utils/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/utils/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/deep_sort/utils/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/utils/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/deep_sort/utils/__pycache__/parser.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/utils/__pycache__/parser.cpython-310.pyc


--------------------------------------------------------------------------------
/deep_sort/utils/__pycache__/parser.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/utils/__pycache__/parser.cpython-39.pyc


--------------------------------------------------------------------------------
/deep_sort/utils/asserts.py:
--------------------------------------------------------------------------------
 1 | from os import environ
 2 | 
 3 | 
 4 | def assert_in(file, files_to_check):
 5 |     if file not in files_to_check:
 6 |         raise AssertionError("{} does not exist in the list".format(str(file)))
 7 |     return True
 8 | 
 9 | 
10 | def assert_in_env(check_list: list):
11 |     for item in check_list:
12 |         assert_in(item, environ.keys())
13 |     return True
14 | 


--------------------------------------------------------------------------------
/deep_sort/utils/draw.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2
 3 | 
 4 | palette = (2 ** 11 - 1, 2 ** 15 - 1, 2 ** 20 - 1)
 5 | 
 6 | 
 7 | def compute_color_for_labels(label):
 8 |     """
 9 |     Simple function that adds fixed color depending on the class
10 |     """
11 |     color = [int((p * (label ** 2 - label + 1)) % 255) for p in palette]
12 |     return tuple(color)
13 | 
14 | 
15 | def draw_boxes(img, bbox, identities=None, offset=(0,0)):
16 |     for i,box in enumerate(bbox):
17 |         x1,y1,x2,y2 = [int(i) for i in box]
18 |         x1 += offset[0]
19 |         x2 += offset[0]
20 |         y1 += offset[1]
21 |         y2 += offset[1]
22 |         # box text and bar
23 |         id = int(identities[i]) if identities is not None else 0    
24 |         color = compute_color_for_labels(id)
25 |         label = '{}{:d}'.format("", id)
26 |         t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2 , 2)[0]
27 |         cv2.rectangle(img,(x1, y1),(x2,y2),color,3)
28 |         cv2.rectangle(img,(x1, y1),(x1+t_size[0]+3,y1+t_size[1]+4), color,-1)
29 |         cv2.putText(img,label,(x1,y1+t_size[1]+4), cv2.FONT_HERSHEY_PLAIN, 2, [255,255,255], 2)
30 |     return img
31 | 
32 | 
33 | 
34 | if __name__ == '__main__':
35 |     for i in range(82):
36 |         print(compute_color_for_labels(i))
37 | 


--------------------------------------------------------------------------------
/deep_sort/utils/evaluation.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import copy
  4 | import motmetrics as mm
  5 | mm.lap.default_solver = 'lap'
  6 | from utils.io import read_results, unzip_objs
  7 | 
  8 | 
  9 | class Evaluator(object):
 10 | 
 11 |     def __init__(self, data_root, seq_name, data_type):
 12 |         self.data_root = data_root
 13 |         self.seq_name = seq_name
 14 |         self.data_type = data_type
 15 | 
 16 |         self.load_annotations()
 17 |         self.reset_accumulator()
 18 | 
 19 |     def load_annotations(self):
 20 |         assert self.data_type == 'mot'
 21 | 
 22 |         gt_filename = os.path.join(self.data_root, self.seq_name, 'gt', 'gt.txt')
 23 |         self.gt_frame_dict = read_results(gt_filename, self.data_type, is_gt=True)
 24 |         self.gt_ignore_frame_dict = read_results(gt_filename, self.data_type, is_ignore=True)
 25 | 
 26 |     def reset_accumulator(self):
 27 |         self.acc = mm.MOTAccumulator(auto_id=True)
 28 | 
 29 |     def eval_frame(self, frame_id, trk_tlwhs, trk_ids, rtn_events=False):
 30 |         # results
 31 |         trk_tlwhs = np.copy(trk_tlwhs)
 32 |         trk_ids = np.copy(trk_ids)
 33 | 
 34 |         # gts
 35 |         gt_objs = self.gt_frame_dict.get(frame_id, [])
 36 |         gt_tlwhs, gt_ids = unzip_objs(gt_objs)[:2]
 37 | 
 38 |         # ignore boxes
 39 |         ignore_objs = self.gt_ignore_frame_dict.get(frame_id, [])
 40 |         ignore_tlwhs = unzip_objs(ignore_objs)[0]
 41 | 
 42 | 
 43 |         # remove ignored results
 44 |         keep = np.ones(len(trk_tlwhs), dtype=bool)
 45 |         iou_distance = mm.distances.iou_matrix(ignore_tlwhs, trk_tlwhs, max_iou=0.5)
 46 |         if len(iou_distance) > 0:
 47 |             match_is, match_js = mm.lap.linear_sum_assignment(iou_distance)
 48 |             match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js])
 49 |             match_ious = iou_distance[match_is, match_js]
 50 | 
 51 |             match_js = np.asarray(match_js, dtype=int)
 52 |             match_js = match_js[np.logical_not(np.isnan(match_ious))]
 53 |             keep[match_js] = False
 54 |             trk_tlwhs = trk_tlwhs[keep]
 55 |             trk_ids = trk_ids[keep]
 56 | 
 57 |         # get distance matrix
 58 |         iou_distance = mm.distances.iou_matrix(gt_tlwhs, trk_tlwhs, max_iou=0.5)
 59 | 
 60 |         # acc
 61 |         self.acc.update(gt_ids, trk_ids, iou_distance)
 62 | 
 63 |         if rtn_events and iou_distance.size > 0 and hasattr(self.acc, 'last_mot_events'):
 64 |             events = self.acc.last_mot_events  # only supported by https://github.com/longcw/py-motmetrics
 65 |         else:
 66 |             events = None
 67 |         return events
 68 | 
 69 |     def eval_file(self, filename):
 70 |         self.reset_accumulator()
 71 | 
 72 |         result_frame_dict = read_results(filename, self.data_type, is_gt=False)
 73 |         frames = sorted(list(set(self.gt_frame_dict.keys()) | set(result_frame_dict.keys())))
 74 |         for frame_id in frames:
 75 |             trk_objs = result_frame_dict.get(frame_id, [])
 76 |             trk_tlwhs, trk_ids = unzip_objs(trk_objs)[:2]
 77 |             self.eval_frame(frame_id, trk_tlwhs, trk_ids, rtn_events=False)
 78 | 
 79 |         return self.acc
 80 | 
 81 |     @staticmethod
 82 |     def get_summary(accs, names, metrics=('mota', 'num_switches', 'idp', 'idr', 'idf1', 'precision', 'recall')):
 83 |         names = copy.deepcopy(names)
 84 |         if metrics is None:
 85 |             metrics = mm.metrics.motchallenge_metrics
 86 |         metrics = copy.deepcopy(metrics)
 87 | 
 88 |         mh = mm.metrics.create()
 89 |         summary = mh.compute_many(
 90 |             accs,
 91 |             metrics=metrics,
 92 |             names=names,
 93 |             generate_overall=True
 94 |         )
 95 | 
 96 |         return summary
 97 | 
 98 |     @staticmethod
 99 |     def save_summary(summary, filename):
100 |         import pandas as pd
101 |         writer = pd.ExcelWriter(filename)
102 |         summary.to_excel(writer)
103 |         writer.save()
104 | 


--------------------------------------------------------------------------------
/deep_sort/utils/io.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from typing import Dict
  3 | import numpy as np
  4 | 
  5 | # from utils.log import get_logger
  6 | 
  7 | 
  8 | def write_results(filename, results, data_type):
  9 |     if data_type == 'mot':
 10 |         save_format = '{frame},{id},{x1},{y1},{w},{h},-1,-1,-1,-1\n'
 11 |     elif data_type == 'kitti':
 12 |         save_format = '{frame} {id} pedestrian 0 0 -10 {x1} {y1} {x2} {y2} -10 -10 -10 -1000 -1000 -1000 -10\n'
 13 |     else:
 14 |         raise ValueError(data_type)
 15 | 
 16 |     with open(filename, 'w') as f:
 17 |         for frame_id, tlwhs, track_ids in results:
 18 |             if data_type == 'kitti':
 19 |                 frame_id -= 1
 20 |             for tlwh, track_id in zip(tlwhs, track_ids):
 21 |                 if track_id < 0:
 22 |                     continue
 23 |                 x1, y1, w, h = tlwh
 24 |                 x2, y2 = x1 + w, y1 + h
 25 |                 line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h)
 26 |                 f.write(line)
 27 | 
 28 | 
 29 | # def write_results(filename, results_dict: Dict, data_type: str):
 30 | #     if not filename:
 31 | #         return
 32 | #     path = os.path.dirname(filename)
 33 | #     if not os.path.exists(path):
 34 | #         os.makedirs(path)
 35 | 
 36 | #     if data_type in ('mot', 'mcmot', 'lab'):
 37 | #         save_format = '{frame},{id},{x1},{y1},{w},{h},1,-1,-1,-1\n'
 38 | #     elif data_type == 'kitti':
 39 | #         save_format = '{frame} {id} pedestrian -1 -1 -10 {x1} {y1} {x2} {y2} -1 -1 -1 -1000 -1000 -1000 -10 {score}\n'
 40 | #     else:
 41 | #         raise ValueError(data_type)
 42 | 
 43 | #     with open(filename, 'w') as f:
 44 | #         for frame_id, frame_data in results_dict.items():
 45 | #             if data_type == 'kitti':
 46 | #                 frame_id -= 1
 47 | #             for tlwh, track_id in frame_data:
 48 | #                 if track_id < 0:
 49 | #                     continue
 50 | #                 x1, y1, w, h = tlwh
 51 | #                 x2, y2 = x1 + w, y1 + h
 52 | #                 line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h, score=1.0)
 53 | #                 f.write(line)
 54 | #     logger.info('Save results to {}'.format(filename))
 55 | 
 56 | 
 57 | def read_results(filename, data_type: str, is_gt=False, is_ignore=False):
 58 |     if data_type in ('mot', 'lab'):
 59 |         read_fun = read_mot_results
 60 |     else:
 61 |         raise ValueError('Unknown data type: {}'.format(data_type))
 62 | 
 63 |     return read_fun(filename, is_gt, is_ignore)
 64 | 
 65 | 
 66 | """
 67 | labels={'ped', ...			% 1
 68 | 'person_on_vhcl', ...	% 2
 69 | 'car', ...				% 3
 70 | 'bicycle', ...			% 4
 71 | 'mbike', ...			% 5
 72 | 'non_mot_vhcl', ...		% 6
 73 | 'static_person', ...	% 7
 74 | 'distractor', ...		% 8
 75 | 'occluder', ...			% 9
 76 | 'occluder_on_grnd', ...		%10
 77 | 'occluder_full', ...		% 11
 78 | 'reflection', ...		% 12
 79 | 'crowd' ...			% 13
 80 | };
 81 | """
 82 | 
 83 | 
 84 | def read_mot_results(filename, is_gt, is_ignore):
 85 |     valid_labels = {1}
 86 |     ignore_labels = {2, 7, 8, 12}
 87 |     results_dict = dict()
 88 |     if os.path.isfile(filename):
 89 |         with open(filename, 'r') as f:
 90 |             for line in f.readlines():
 91 |                 linelist = line.split(',')
 92 |                 if len(linelist) < 7:
 93 |                     continue
 94 |                 fid = int(linelist[0])
 95 |                 if fid < 1:
 96 |                     continue
 97 |                 results_dict.setdefault(fid, list())
 98 | 
 99 |                 if is_gt:
100 |                     if 'MOT16-' in filename or 'MOT17-' in filename:
101 |                         label = int(float(linelist[7]))
102 |                         mark = int(float(linelist[6]))
103 |                         if mark == 0 or label not in valid_labels:
104 |                             continue
105 |                     score = 1
106 |                 elif is_ignore:
107 |                     if 'MOT16-' in filename or 'MOT17-' in filename:
108 |                         label = int(float(linelist[7]))
109 |                         vis_ratio = float(linelist[8])
110 |                         if label not in ignore_labels and vis_ratio >= 0:
111 |                             continue
112 |                     else:
113 |                         continue
114 |                     score = 1
115 |                 else:
116 |                     score = float(linelist[6])
117 | 
118 |                 tlwh = tuple(map(float, linelist[2:6]))
119 |                 target_id = int(linelist[1])
120 | 
121 |                 results_dict[fid].append((tlwh, target_id, score))
122 | 
123 |     return results_dict
124 | 
125 | 
126 | def unzip_objs(objs):
127 |     if len(objs) > 0:
128 |         tlwhs, ids, scores = zip(*objs)
129 |     else:
130 |         tlwhs, ids, scores = [], [], []
131 |     tlwhs = np.asarray(tlwhs, dtype=float).reshape(-1, 4)
132 | 
133 |     return tlwhs, ids, scores


--------------------------------------------------------------------------------
/deep_sort/utils/json_logger.py:
--------------------------------------------------------------------------------
  1 | """
  2 | References:
  3 |     https://medium.com/analytics-vidhya/creating-a-custom-logging-mechanism-for-real-time-object-detection-using-tdd-4ca2cfcd0a2f
  4 | """
  5 | import json
  6 | from os import makedirs
  7 | from os.path import exists, join
  8 | from datetime import datetime
  9 | 
 10 | 
 11 | class JsonMeta(object):
 12 |     HOURS = 3
 13 |     MINUTES = 59
 14 |     SECONDS = 59
 15 |     PATH_TO_SAVE = 'LOGS'
 16 |     DEFAULT_FILE_NAME = 'remaining'
 17 | 
 18 | 
 19 | class BaseJsonLogger(object):
 20 |     """
 21 |     This is the base class that returns __dict__ of its own
 22 |     it also returns the dicts of objects in the attributes that are list instances
 23 | 
 24 |     """
 25 | 
 26 |     def dic(self):
 27 |         # returns dicts of objects
 28 |         out = {}
 29 |         for k, v in self.__dict__.items():
 30 |             if hasattr(v, 'dic'):
 31 |                 out[k] = v.dic()
 32 |             elif isinstance(v, list):
 33 |                 out[k] = self.list(v)
 34 |             else:
 35 |                 out[k] = v
 36 |         return out
 37 | 
 38 |     @staticmethod
 39 |     def list(values):
 40 |         # applies the dic method on items in the list
 41 |         return [v.dic() if hasattr(v, 'dic') else v for v in values]
 42 | 
 43 | 
 44 | class Label(BaseJsonLogger):
 45 |     """
 46 |     For each bounding box there are various categories with confidences. Label class keeps track of that information.
 47 |     """
 48 | 
 49 |     def __init__(self, category: str, confidence: float):
 50 |         self.category = category
 51 |         self.confidence = confidence
 52 | 
 53 | 
 54 | class Bbox(BaseJsonLogger):
 55 |     """
 56 |     This module stores the information for each frame and use them in JsonParser
 57 |     Attributes:
 58 |         labels (list): List of label module.
 59 |         top (int):
 60 |         left (int):
 61 |         width (int):
 62 |         height (int):
 63 | 
 64 |     Args:
 65 |         bbox_id (float):
 66 |         top (int):
 67 |         left (int):
 68 |         width (int):
 69 |         height (int):
 70 | 
 71 |     References:
 72 |         Check Label module for better understanding.
 73 | 
 74 | 
 75 |     """
 76 | 
 77 |     def __init__(self, bbox_id, top, left, width, height):
 78 |         self.labels = []
 79 |         self.bbox_id = bbox_id
 80 |         self.top = top
 81 |         self.left = left
 82 |         self.width = width
 83 |         self.height = height
 84 | 
 85 |     def add_label(self, category, confidence):
 86 |         # adds category and confidence only if top_k is not exceeded.
 87 |         self.labels.append(Label(category, confidence))
 88 | 
 89 |     def labels_full(self, value):
 90 |         return len(self.labels) == value
 91 | 
 92 | 
 93 | class Frame(BaseJsonLogger):
 94 |     """
 95 |     This module stores the information for each frame and use them in JsonParser
 96 |     Attributes:
 97 |         timestamp (float): The elapsed time of captured frame
 98 |         frame_id (int): The frame number of the captured video
 99 |         bboxes (list of Bbox objects): Stores the list of bbox objects.
100 | 
101 |     References:
102 |         Check Bbox class for better information
103 | 
104 |     Args:
105 |         timestamp (float):
106 |         frame_id (int):
107 | 
108 |     """
109 | 
110 |     def __init__(self, frame_id: int, timestamp: float = None):
111 |         self.frame_id = frame_id
112 |         self.timestamp = timestamp
113 |         self.bboxes = []
114 | 
115 |     def add_bbox(self, bbox_id: int, top: int, left: int, width: int, height: int):
116 |         bboxes_ids = [bbox.bbox_id for bbox in self.bboxes]
117 |         if bbox_id not in bboxes_ids:
118 |             self.bboxes.append(Bbox(bbox_id, top, left, width, height))
119 |         else:
120 |             raise ValueError("Frame with id: {} already has a Bbox with id: {}".format(self.frame_id, bbox_id))
121 | 
122 |     def add_label_to_bbox(self, bbox_id: int, category: str, confidence: float):
123 |         bboxes = {bbox.id: bbox for bbox in self.bboxes}
124 |         if bbox_id in bboxes.keys():
125 |             res = bboxes.get(bbox_id)
126 |             res.add_label(category, confidence)
127 |         else:
128 |             raise ValueError('the bbox with id: {} does not exists!'.format(bbox_id))
129 | 
130 | 
131 | class BboxToJsonLogger(BaseJsonLogger):
132 |     """
133 |     ُ This module is designed to automate the task of logging jsons. An example json is used
134 |     to show the contents of json file shortly
135 |     Example:
136 |           {
137 |           "video_details": {
138 |             "frame_width": 1920,
139 |             "frame_height": 1080,
140 |             "frame_rate": 20,
141 |             "video_name": "/home/gpu/codes/MSD/pedestrian_2/project/public/camera1.avi"
142 |           },
143 |           "frames": [
144 |             {
145 |               "frame_id": 329,
146 |               "timestamp": 3365.1254
147 |               "bboxes": [
148 |                 {
149 |                   "labels": [
150 |                     {
151 |                       "category": "pedestrian",
152 |                       "confidence": 0.9
153 |                     }
154 |                   ],
155 |                   "bbox_id": 0,
156 |                   "top": 1257,
157 |                   "left": 138,
158 |                   "width": 68,
159 |                   "height": 109
160 |                 }
161 |               ]
162 |             }],
163 | 
164 |     Attributes:
165 |         frames (dict): It's a dictionary that maps each frame_id to json attributes.
166 |         video_details (dict): information about video file.
167 |         top_k_labels (int): shows the allowed number of labels
168 |         start_time (datetime object): we use it to automate the json output by time.
169 | 
170 |     Args:
171 |         top_k_labels (int): shows the allowed number of labels
172 | 
173 |     """
174 | 
175 |     def __init__(self, top_k_labels: int = 1):
176 |         self.frames = {}
177 |         self.video_details = self.video_details = dict(frame_width=None, frame_height=None, frame_rate=None,
178 |                                                        video_name=None)
179 |         self.top_k_labels = top_k_labels
180 |         self.start_time = datetime.now()
181 | 
182 |     def set_top_k(self, value):
183 |         self.top_k_labels = value
184 | 
185 |     def frame_exists(self, frame_id: int) -> bool:
186 |         """
187 |         Args:
188 |             frame_id (int):
189 | 
190 |         Returns:
191 |             bool: true if frame_id is recognized
192 |         """
193 |         return frame_id in self.frames.keys()
194 | 
195 |     def add_frame(self, frame_id: int, timestamp: float = None) -> None:
196 |         """
197 |         Args:
198 |             frame_id (int):
199 |             timestamp (float): opencv captured frame time property
200 | 
201 |         Raises:
202 |              ValueError: if frame_id would not exist in class frames attribute
203 | 
204 |         Returns:
205 |             None
206 | 
207 |         """
208 |         if not self.frame_exists(frame_id):
209 |             self.frames[frame_id] = Frame(frame_id, timestamp)
210 |         else:
211 |             raise ValueError("Frame id: {} already exists".format(frame_id))
212 | 
213 |     def bbox_exists(self, frame_id: int, bbox_id: int) -> bool:
214 |         """
215 |         Args:
216 |             frame_id:
217 |             bbox_id:
218 | 
219 |         Returns:
220 |             bool: if bbox exists in frame bboxes list
221 |         """
222 |         bboxes = []
223 |         if self.frame_exists(frame_id=frame_id):
224 |             bboxes = [bbox.bbox_id for bbox in self.frames[frame_id].bboxes]
225 |         return bbox_id in bboxes
226 | 
227 |     def find_bbox(self, frame_id: int, bbox_id: int):
228 |         """
229 | 
230 |         Args:
231 |             frame_id:
232 |             bbox_id:
233 | 
234 |         Returns:
235 |             bbox_id (int):
236 | 
237 |         Raises:
238 |             ValueError: if bbox_id does not exist in the bbox list of specific frame.
239 |         """
240 |         if not self.bbox_exists(frame_id, bbox_id):
241 |             raise ValueError("frame with id: {} does not contain bbox with id: {}".format(frame_id, bbox_id))
242 |         bboxes = {bbox.bbox_id: bbox for bbox in self.frames[frame_id].bboxes}
243 |         return bboxes.get(bbox_id)
244 | 
245 |     def add_bbox_to_frame(self, frame_id: int, bbox_id: int, top: int, left: int, width: int, height: int) -> None:
246 |         """
247 | 
248 |         Args:
249 |             frame_id (int):
250 |             bbox_id (int):
251 |             top (int):
252 |             left (int):
253 |             width (int):
254 |             height (int):
255 | 
256 |         Returns:
257 |             None
258 | 
259 |         Raises:
260 |             ValueError: if bbox_id already exist in frame information with frame_id
261 |             ValueError: if frame_id does not exist in frames attribute
262 |         """
263 |         if self.frame_exists(frame_id):
264 |             frame = self.frames[frame_id]
265 |             if not self.bbox_exists(frame_id, bbox_id):
266 |                 frame.add_bbox(bbox_id, top, left, width, height)
267 |             else:
268 |                 raise ValueError(
269 |                     "frame with frame_id: {} already contains the bbox with id: {} ".format(frame_id, bbox_id))
270 |         else:
271 |             raise ValueError("frame with frame_id: {} does not exist".format(frame_id))
272 | 
273 |     def add_label_to_bbox(self, frame_id: int, bbox_id: int, category: str, confidence: float):
274 |         """
275 |         Args:
276 |             frame_id:
277 |             bbox_id:
278 |             category:
279 |             confidence: the confidence value returned from yolo detection
280 | 
281 |         Returns:
282 |             None
283 | 
284 |         Raises:
285 |             ValueError: if labels quota (top_k_labels) exceeds.
286 |         """
287 |         bbox = self.find_bbox(frame_id, bbox_id)
288 |         if not bbox.labels_full(self.top_k_labels):
289 |             bbox.add_label(category, confidence)
290 |         else:
291 |             raise ValueError("labels in frame_id: {}, bbox_id: {} is fulled".format(frame_id, bbox_id))
292 | 
293 |     def add_video_details(self, frame_width: int = None, frame_height: int = None, frame_rate: int = None,
294 |                           video_name: str = None):
295 |         self.video_details['frame_width'] = frame_width
296 |         self.video_details['frame_height'] = frame_height
297 |         self.video_details['frame_rate'] = frame_rate
298 |         self.video_details['video_name'] = video_name
299 | 
300 |     def output(self):
301 |         output = {'video_details': self.video_details}
302 |         result = list(self.frames.values())
303 |         output['frames'] = [item.dic() for item in result]
304 |         return output
305 | 
306 |     def json_output(self, output_name):
307 |         """
308 |         Args:
309 |             output_name:
310 | 
311 |         Returns:
312 |             None
313 | 
314 |         Notes:
315 |             It creates the json output with `output_name` name.
316 |         """
317 |         if not output_name.endswith('.json'):
318 |             output_name += '.json'
319 |         with open(output_name, 'w') as file:
320 |             json.dump(self.output(), file)
321 |         file.close()
322 | 
323 |     def set_start(self):
324 |         self.start_time = datetime.now()
325 | 
326 |     def schedule_output_by_time(self, output_dir=JsonMeta.PATH_TO_SAVE, hours: int = 0, minutes: int = 0,
327 |                                 seconds: int = 60) -> None:
328 |         """
329 |         Notes:
330 |             Creates folder and then periodically stores the jsons on that address.
331 | 
332 |         Args:
333 |             output_dir (str): the directory where output files will be stored
334 |             hours (int):
335 |             minutes (int):
336 |             seconds (int):
337 | 
338 |         Returns:
339 |             None
340 | 
341 |         """
342 |         end = datetime.now()
343 |         interval = 0
344 |         interval += abs(min([hours, JsonMeta.HOURS]) * 3600)
345 |         interval += abs(min([minutes, JsonMeta.MINUTES]) * 60)
346 |         interval += abs(min([seconds, JsonMeta.SECONDS]))
347 |         diff = (end - self.start_time).seconds
348 | 
349 |         if diff > interval:
350 |             output_name = self.start_time.strftime('%Y-%m-%d %H-%M-%S') + '.json'
351 |             if not exists(output_dir):
352 |                 makedirs(output_dir)
353 |             output = join(output_dir, output_name)
354 |             self.json_output(output_name=output)
355 |             self.frames = {}
356 |             self.start_time = datetime.now()
357 | 
358 |     def schedule_output_by_frames(self, frames_quota, frame_counter, output_dir=JsonMeta.PATH_TO_SAVE):
359 |         """
360 |         saves as the number of frames quota increases higher.
361 |         :param frames_quota:
362 |         :param frame_counter:
363 |         :param output_dir:
364 |         :return:
365 |         """
366 |         pass
367 | 
368 |     def flush(self, output_dir):
369 |         """
370 |         Notes:
371 |             We use this function to output jsons whenever possible.
372 |             like the time that we exit the while loop of opencv.
373 | 
374 |         Args:
375 |             output_dir:
376 | 
377 |         Returns:
378 |             None
379 | 
380 |         """
381 |         filename = self.start_time.strftime('%Y-%m-%d %H-%M-%S') + '-remaining.json'
382 |         output = join(output_dir, filename)
383 |         self.json_output(output_name=output)
384 | 


--------------------------------------------------------------------------------
/deep_sort/utils/log.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | 
 4 | def get_logger(name='root'):
 5 |     formatter = logging.Formatter(
 6 |         # fmt='%(asctime)s [%(levelname)s]: %(filename)s(%(funcName)s:%(lineno)s) >> %(message)s')
 7 |         fmt='%(asctime)s [%(levelname)s]: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
 8 | 
 9 |     handler = logging.StreamHandler()
10 |     handler.setFormatter(formatter)
11 | 
12 |     logger = logging.getLogger(name)
13 |     logger.setLevel(logging.INFO)
14 |     logger.addHandler(handler)
15 |     return logger
16 | 
17 | 
18 | 


--------------------------------------------------------------------------------
/deep_sort/utils/parser.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import yaml
 3 | from easydict import EasyDict as edict
 4 | 
 5 | 
 6 | class YamlParser(edict):
 7 |     """
 8 |     This is yaml parser based on EasyDict.
 9 |     """
10 | 
11 |     def __init__(self, cfg_dict=None, config_file=None):
12 |         if cfg_dict is None:
13 |             cfg_dict = {}
14 | 
15 |         if config_file is not None:
16 |             assert(os.path.isfile(config_file))
17 |             with open(config_file, 'r') as fo:
18 |                 cfg_dict.update(yaml.load(fo.read()))
19 | 
20 |         super(YamlParser, self).__init__(cfg_dict)
21 | 
22 |     def merge_from_file(self, config_file):
23 |         with open(config_file, 'r') as fo:
24 |             self.update(yaml.load(fo.read()))
25 | 
26 |     def merge_from_dict(self, config_dict):
27 |         self.update(config_dict)
28 | 
29 | 
30 | def get_config(config_file=None):
31 |     return YamlParser(config_file=config_file)
32 | 
33 | 
34 | if __name__ == "__main__":
35 |     cfg = YamlParser(config_file="../configs/yolov3.yaml")
36 |     cfg.merge_from_file("../configs/deep_sort.yaml")
37 | 
38 |     import ipdb
39 |     ipdb.set_trace()
40 | 


--------------------------------------------------------------------------------
/deep_sort/utils/tools.py:
--------------------------------------------------------------------------------
 1 | from functools import wraps
 2 | from time import time
 3 | 
 4 | 
 5 | def is_video(ext: str):
 6 |     """
 7 |     Returns true if ext exists in
 8 |     allowed_exts for video files.
 9 | 
10 |     Args:
11 |         ext:
12 | 
13 |     Returns:
14 | 
15 |     """
16 | 
17 |     allowed_exts = ('.mp4', '.webm', '.ogg', '.avi', '.wmv', '.mkv', '.3gp')
18 |     return any((ext.endswith(x) for x in allowed_exts))
19 | 
20 | 
21 | def tik_tok(func):
22 |     """
23 |     keep track of time for each process.
24 |     Args:
25 |         func:
26 | 
27 |     Returns:
28 | 
29 |     """
30 |     @wraps(func)
31 |     def _time_it(*args, **kwargs):
32 |         start = time()
33 |         try:
34 |             return func(*args, **kwargs)
35 |         finally:
36 |             end_ = time()
37 |             print("time: {:.03f}s, fps: {:.03f}".format(end_ - start, 1 / (end_ - start)))
38 | 
39 |     return _time_it
40 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
  1 | absl-py==1.4.0
  2 | antlr4-python3-runtime==4.9.3
  3 | anyio==3.6.2
  4 | argon2-cffi==21.3.0
  5 | argon2-cffi-bindings==21.2.0
  6 | arrow==1.2.3
  7 | asttokens==2.2.1
  8 | attrs==23.1.0
  9 | backcall==0.2.0
 10 | beautifulsoup4==4.12.2
 11 | bleach==6.0.0
 12 | blinker==1.7.0
 13 | cachetools==5.3.1
 14 | certifi==2023.5.7
 15 | cffi==1.15.1
 16 | charset-normalizer==3.1.0
 17 | click==8.1.7
 18 | colorama==0.4.6
 19 | comm==0.1.3
 20 | contourpy==1.0.7
 21 | cvzone==1.6.1
 22 | cycler==0.11.0
 23 | Cython==3.0.2
 24 | dataclasses-json==0.6.0
 25 | debugpy==1.6.7
 26 | decorator==5.1.1
 27 | defusedxml==0.7.1
 28 | easydict==1.10
 29 | easyocr==1.7.1
 30 | executing==1.2.0
 31 | fastjsonschema==2.17.1
 32 | filelock==3.12.0
 33 | filterpy==1.4.5
 34 | Flask==3.0.0
 35 | fonttools==4.39.4
 36 | fqdn==1.5.1
 37 | google-auth==2.22.0
 38 | google-auth-oauthlib==1.0.0
 39 | grpcio==1.58.0
 40 | h5py==3.9.0
 41 | hydra-core==1.2.0
 42 | idna==3.4
 43 | imageio==2.31.3
 44 | importlib-metadata==6.6.0
 45 | importlib-resources==5.12.0
 46 | ipykernel==6.23.1
 47 | ipython==8.13.2
 48 | ipython-genutils==0.2.0
 49 | ipywidgets==8.0.6
 50 | isoduration==20.11.0
 51 | itsdangerous==2.1.2
 52 | jedi==0.18.2
 53 | Jinja2==3.1.2
 54 | jsonpointer==2.3
 55 | jsonschema==4.17.3
 56 | jupyter==1.0.0
 57 | jupyter-console==6.6.3
 58 | jupyter-events==0.6.3
 59 | jupyter_client==8.2.0
 60 | jupyter_core==5.3.0
 61 | jupyter_server==2.5.0
 62 | jupyter_server_terminals==0.4.4
 63 | jupyterlab-pygments==0.2.2
 64 | jupyterlab-widgets==3.0.7
 65 | kiwisolver==1.4.4
 66 | labelImg==1.8.6
 67 | lap==0.4.0
 68 | lazy_loader==0.3
 69 | loguru==0.7.1
 70 | lxml==4.9.3
 71 | Markdown==3.4.4
 72 | MarkupSafe==2.1.2
 73 | marshmallow==3.20.1
 74 | matplotlib==3.7.1
 75 | matplotlib-inline==0.1.6
 76 | mistune==2.0.5
 77 | motmetrics==1.4.0
 78 | mpmath==1.3.0
 79 | mypy-extensions==1.0.0
 80 | nbclassic==1.0.0
 81 | nbclient==0.8.0
 82 | nbconvert==7.4.0
 83 | nbformat==5.8.0
 84 | nest-asyncio==1.5.6
 85 | networkx==3.1
 86 | ninja==1.11.1
 87 | notebook==6.5.4
 88 | notebook_shim==0.2.3
 89 | numpy==1.24.3
 90 | oauthlib==3.2.2
 91 | omegaconf==2.3.0
 92 | onemetric==0.1.2
 93 | opencv-python==4.9.0.80
 94 | packaging==23.1
 95 | pafy==0.5.5
 96 | pandas==2.0.1
 97 | pandocfilters==1.5.0
 98 | parso==0.8.3
 99 | pickleshare==0.7.5
100 | Pillow==9.5.0
101 | platformdirs==3.5.1
102 | prometheus-client==0.16.0
103 | prompt-toolkit==3.0.38
104 | protobuf==4.24.3
105 | psutil==5.9.5
106 | pure-eval==0.2.2
107 | pyasn1==0.5.0
108 | pyasn1-modules==0.3.0
109 | pyclipper==1.3.0.post5
110 | pycocotools==2.0.7
111 | pycparser==2.21
112 | Pygments==2.15.1
113 | pyparsing==3.0.9
114 | PyQt5==5.15.9
115 | PyQt5-Qt5==5.15.2
116 | PyQt5-sip==12.12.2
117 | pyrsistent==0.19.3
118 | python-bidi==0.4.2
119 | python-dateutil==2.8.2
120 | python-json-logger==2.0.7
121 | pytube==15.0.0
122 | pytz==2023.3
123 | PyWavelets==1.4.1
124 | pywin32==306
125 | pywinpty==2.0.10
126 | PyYAML==6.0
127 | pyzmq==25.0.2
128 | qtconsole==5.4.3
129 | QtPy==2.3.1
130 | requests==2.31.0
131 | requests-oauthlib==1.3.1
132 | rfc3339-validator==0.1.4
133 | rfc3986-validator==0.1.1
134 | rsa==4.9
135 | scikit-image==0.21.0
136 | scipy==1.10.1
137 | seaborn==0.12.2
138 | Send2Trash==1.8.2
139 | sentry-sdk==1.23.1
140 | shapely==2.0.1
141 | six==1.16.0
142 | sniffio==1.3.0
143 | soupsieve==2.4.1
144 | stack-data==0.6.2
145 | supervision==0.14.0
146 | sympy==1.12
147 | tabulate==0.9.0
148 | tensorboard==2.14.0
149 | tensorboard-data-server==0.7.1
150 | terminado==0.17.1
151 | thop==0.1.1.post2209072238
152 | tifffile==2023.8.30
153 | tinycss2==1.2.1
154 | torch>=2.1.2
155 | torchaudio==2.0.2+cu117
156 | torchvision==0.15.2+cu117
157 | tornado==6.3.2
158 | tqdm==4.65.0
159 | traitlets==5.9.0
160 | typing-inspect==0.9.0
161 | typing_extensions==4.5.0
162 | tzdata==2023.3
163 | ultralytics==8.0.106
164 | uri-template==1.2.0
165 | urllib3==1.26.15
166 | wcwidth==0.2.6
167 | webcolors==1.13
168 | webencodings==0.5.1
169 | websocket-client==1.5.2
170 | Werkzeug==3.0.1
171 | widgetsnbextension==4.0.7
172 | win32-setctime==1.1.0
173 | xmltodict==0.13.0
174 | youtube-dl==2020.12.2
175 | zipp==3.15.0
176 | 


--------------------------------------------------------------------------------
/track_count_persons .ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "code",
   5 |    "execution_count": 1,
   6 |    "id": "4961e54c",
   7 |    "metadata": {},
   8 |    "outputs": [
   9 |     {
  10 |      "name": "stdout",
  11 |      "output_type": "stream",
  12 |      "text": [
  13 |       "Python 3.9.10\n"
  14 |      ]
  15 |     }
  16 |    ],
  17 |    "source": [
  18 |     "!python --version"
  19 |    ]
  20 |   },
  21 |   {
  22 |    "cell_type": "code",
  23 |    "execution_count": 2,
  24 |    "id": "5ea3ead0",
  25 |    "metadata": {},
  26 |    "outputs": [
  27 |     {
  28 |      "data": {
  29 |       "text/plain": [
  30 |        "'8.0.106'"
  31 |       ]
  32 |      },
  33 |      "execution_count": 2,
  34 |      "metadata": {},
  35 |      "output_type": "execute_result"
  36 |     }
  37 |    ],
  38 |    "source": [
  39 |     "import ultralytics\n",
  40 |     "ultralytics.__version__"
  41 |    ]
  42 |   },
  43 |   {
  44 |    "cell_type": "code",
  45 |    "execution_count": 4,
  46 |    "id": "6801a5a9",
  47 |    "metadata": {},
  48 |    "outputs": [
  49 |     {
  50 |      "data": {
  51 |       "text/plain": [
  52 |        "'2.0.1+cu117'"
  53 |       ]
  54 |      },
  55 |      "execution_count": 4,
  56 |      "metadata": {},
  57 |      "output_type": "execute_result"
  58 |     }
  59 |    ],
  60 |    "source": [
  61 |     "import torch\n",
  62 |     "torch.__version__"
  63 |    ]
  64 |   },
  65 |   {
  66 |    "cell_type": "code",
  67 |    "execution_count": 5,
  68 |    "id": "9627ba4a",
  69 |    "metadata": {},
  70 |    "outputs": [
  71 |     {
  72 |      "data": {
  73 |       "text/plain": [
  74 |        "'NVIDIA GeForce RTX 3090'"
  75 |       ]
  76 |      },
  77 |      "execution_count": 5,
  78 |      "metadata": {},
  79 |      "output_type": "execute_result"
  80 |     }
  81 |    ],
  82 |    "source": [
  83 |     "torch.cuda.get_device_name(0)"
  84 |    ]
  85 |   },
  86 |   {
  87 |    "cell_type": "markdown",
  88 |    "id": "30beac26",
  89 |    "metadata": {},
  90 |    "source": [
  91 |     "# Detect, track and count Persons"
  92 |    ]
  93 |   },
  94 |   {
  95 |    "cell_type": "code",
  96 |    "execution_count": 1,
  97 |    "id": "c23349aa",
  98 |    "metadata": {},
  99 |    "outputs": [
 100 |     {
 101 |      "name": "stdout",
 102 |      "output_type": "stream",
 103 |      "text": [
 104 |       "D:\\yolov8_SAM_env\\yolov8_tracking\\1_yolov8_DeepSORT\\yolov8_DeepSORT\n"
 105 |      ]
 106 |     }
 107 |    ],
 108 |    "source": [
 109 |     "%cd yolov8_DeepSORT"
 110 |    ]
 111 |   },
 112 |   {
 113 |    "cell_type": "code",
 114 |    "execution_count": 2,
 115 |    "id": "7ac57944",
 116 |    "metadata": {},
 117 |    "outputs": [
 118 |     {
 119 |      "name": "stderr",
 120 |      "output_type": "stream",
 121 |      "text": [
 122 |       "\n",
 123 |       "image 1/1 D:\\yolov8_SAM_env\\yolov8_tracking\\1_yolov8_DeepSORT\\YOLOv8_DeepSORT\\images\\person.jpg: 384x640 1 person, 1 cup, 1 chair, 2 potted plants, 1 bed, 1 book, 67.0ms\n",
 124 |       "Speed: 2.0ms preprocess, 67.0ms inference, 6.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 125 |       "Results saved to \u001b[1mruns\\detect\\predict\u001b[0m\n"
 126 |      ]
 127 |     },
 128 |     {
 129 |      "name": "stdout",
 130 |      "output_type": "stream",
 131 |      "text": [
 132 |       "[0.0, 58.0, 59.0, 58.0, 41.0, 73.0, 56.0]\n",
 133 |       "Class: person\n",
 134 |       "Class: potted plant\n",
 135 |       "Class: bed\n",
 136 |       "Class: potted plant\n",
 137 |       "Class: cup\n",
 138 |       "Class: book\n",
 139 |       "Class: chair\n"
 140 |      ]
 141 |     }
 142 |    ],
 143 |    "source": [
 144 |     "from ultralytics import YOLO\n",
 145 |     "\n",
 146 |     "import time\n",
 147 |     "import torch\n",
 148 |     "import cv2\n",
 149 |     "import torch.backends.cudnn as cudnn\n",
 150 |     "from PIL import Image\n",
 151 |     "import colorsys\n",
 152 |     "import numpy as np\n",
 153 |     "\n",
 154 |     "# Load a model\n",
 155 |     "model = YOLO(\"yolov8n.pt\")  # load a pretrained model (recommended for training)\n",
 156 |     "\n",
 157 |     "results = model(\"images/person.jpg\", save=True)\n",
 158 |     "\n",
 159 |     "\n",
 160 |     "\n",
 161 |     "class_names = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']\n",
 162 |     "\n",
 163 |     "for result in results:\n",
 164 |     "    boxes = result.boxes  # Boxes object for bbox outputs\n",
 165 |     "    probs = result.probs  # Class probabilities for classification outputs\n",
 166 |     "    cls = boxes.cls.tolist()  # Convert tensor to list\n",
 167 |     "    xyxy = boxes.xyxy\n",
 168 |     "    xywh = boxes.xywh  # box with xywh format, (N, 4)\n",
 169 |     "    conf = boxes.conf\n",
 170 |     "    print(cls)\n",
 171 |     "    for class_index in cls:\n",
 172 |     "        class_name = class_names[int(class_index)]\n",
 173 |     "        print(\"Class:\", class_name)"
 174 |    ]
 175 |   },
 176 |   {
 177 |    "cell_type": "markdown",
 178 |    "id": "461c7b6e",
 179 |    "metadata": {},
 180 |    "source": [
 181 |     "# DeepSORT"
 182 |    ]
 183 |   },
 184 |   {
 185 |    "cell_type": "code",
 186 |    "execution_count": 3,
 187 |    "id": "945f584b",
 188 |    "metadata": {},
 189 |    "outputs": [],
 190 |    "source": [
 191 |     "from deep_sort.utils.parser import get_config\n",
 192 |     "from deep_sort.deep_sort import DeepSort\n",
 193 |     "from deep_sort.sort.tracker import Tracker\n",
 194 |     "\n",
 195 |     "deep_sort_weights = 'deep_sort/deep/checkpoint/ckpt.t7'\n",
 196 |     "tracker = DeepSort(model_path=deep_sort_weights, max_age=70)"
 197 |    ]
 198 |   },
 199 |   {
 200 |    "cell_type": "code",
 201 |    "execution_count": 4,
 202 |    "id": "2d74f1e2",
 203 |    "metadata": {},
 204 |    "outputs": [],
 205 |    "source": [
 206 |     "# Define the video path\n",
 207 |     "video_path = 'test_videos/2.mp4'\n",
 208 |     "\n",
 209 |     "cap = cv2.VideoCapture(video_path)\n",
 210 |     "\n",
 211 |     "# Get the video properties\n",
 212 |     "frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))\n",
 213 |     "frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))\n",
 214 |     "fps = cap.get(cv2.CAP_PROP_FPS)\n",
 215 |     "\n",
 216 |     "# Define the codec and create VideoWriter object\n",
 217 |     "fourcc = cv2.VideoWriter_fourcc(*'mp4v')\n",
 218 |     "output_path = 'output.mp4'\n",
 219 |     "out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))\n",
 220 |     "\n",
 221 |     "device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')"
 222 |    ]
 223 |   },
 224 |   {
 225 |    "cell_type": "code",
 226 |    "execution_count": 5,
 227 |    "id": "09056afd",
 228 |    "metadata": {},
 229 |    "outputs": [],
 230 |    "source": [
 231 |     "frames = []\n",
 232 |     "\n",
 233 |     "unique_track_ids = set()"
 234 |    ]
 235 |   },
 236 |   {
 237 |    "cell_type": "code",
 238 |    "execution_count": null,
 239 |    "id": "533ff5cc",
 240 |    "metadata": {},
 241 |    "outputs": [
 242 |     {
 243 |      "name": "stderr",
 244 |      "output_type": "stream",
 245 |      "text": [
 246 |       "\n",
 247 |       "0: 384x640 4 persons, 6.5ms\n",
 248 |       "Speed: 1.0ms preprocess, 6.5ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 249 |       "\n",
 250 |       "0: 384x640 4 persons, 8.0ms\n",
 251 |       "Speed: 2.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 252 |       "\n",
 253 |       "0: 384x640 4 persons, 7.0ms\n",
 254 |       "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 255 |       "\n",
 256 |       "0: 384x640 4 persons, 7.0ms\n",
 257 |       "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 258 |       "\n",
 259 |       "0: 384x640 4 persons, 8.0ms\n",
 260 |       "Speed: 1.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 261 |       "\n",
 262 |       "0: 384x640 4 persons, 7.0ms\n",
 263 |       "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 264 |       "\n",
 265 |       "0: 384x640 4 persons, 8.0ms\n",
 266 |       "Speed: 1.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 267 |       "\n",
 268 |       "0: 384x640 4 persons, 7.0ms\n",
 269 |       "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 270 |       "\n",
 271 |       "0: 384x640 4 persons, 10.0ms\n",
 272 |       "Speed: 1.0ms preprocess, 10.0ms inference, 11.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 273 |       "\n",
 274 |       "0: 384x640 4 persons, 7.0ms\n",
 275 |       "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 276 |       "\n",
 277 |       "0: 384x640 4 persons, 11.0ms\n",
 278 |       "Speed: 2.0ms preprocess, 11.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 279 |       "\n",
 280 |       "0: 384x640 4 persons, 8.0ms\n",
 281 |       "Speed: 2.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 282 |       "\n",
 283 |       "0: 384x640 4 persons, 7.0ms\n",
 284 |       "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 285 |       "\n",
 286 |       "0: 384x640 4 persons, 7.0ms\n",
 287 |       "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 288 |       "\n",
 289 |       "0: 384x640 4 persons, 6.0ms\n",
 290 |       "Speed: 1.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 291 |       "\n",
 292 |       "0: 384x640 4 persons, 7.0ms\n",
 293 |       "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 294 |       "\n",
 295 |       "0: 384x640 4 persons, 8.0ms\n",
 296 |       "Speed: 2.0ms preprocess, 8.0ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 297 |       "\n",
 298 |       "0: 384x640 4 persons, 7.0ms\n",
 299 |       "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 300 |       "\n",
 301 |       "0: 384x640 4 persons, 7.0ms\n",
 302 |       "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 303 |       "\n",
 304 |       "0: 384x640 4 persons, 7.3ms\n",
 305 |       "Speed: 1.6ms preprocess, 7.3ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 640)\n",
 306 |       "\n",
 307 |       "0: 384x640 4 persons, 13.0ms\n",
 308 |       "Speed: 1.3ms preprocess, 13.0ms inference, 10.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 309 |       "\n",
 310 |       "0: 384x640 4 persons, 8.0ms\n",
 311 |       "Speed: 1.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 312 |       "\n",
 313 |       "0: 384x640 4 persons, 8.0ms\n",
 314 |       "Speed: 1.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 315 |       "\n",
 316 |       "0: 384x640 4 persons, 6.0ms\n",
 317 |       "Speed: 2.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 318 |       "\n",
 319 |       "0: 384x640 4 persons, 6.0ms\n",
 320 |       "Speed: 2.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 321 |       "\n",
 322 |       "0: 384x640 4 persons, 8.0ms\n",
 323 |       "Speed: 1.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 324 |       "\n",
 325 |       "0: 384x640 4 persons, 6.0ms\n",
 326 |       "Speed: 2.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 327 |       "\n",
 328 |       "0: 384x640 4 persons, 7.0ms\n",
 329 |       "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 330 |       "\n",
 331 |       "0: 384x640 4 persons, 8.0ms\n",
 332 |       "Speed: 1.0ms preprocess, 8.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 333 |       "\n",
 334 |       "0: 384x640 4 persons, 7.0ms\n",
 335 |       "Speed: 1.0ms preprocess, 7.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 336 |       "\n",
 337 |       "0: 384x640 4 persons, 7.0ms\n",
 338 |       "Speed: 1.0ms preprocess, 7.0ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 339 |       "\n",
 340 |       "0: 384x640 4 persons, 6.0ms\n",
 341 |       "Speed: 2.0ms preprocess, 6.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 342 |       "\n",
 343 |       "0: 384x640 4 persons, 9.0ms\n",
 344 |       "Speed: 2.0ms preprocess, 9.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 345 |       "\n",
 346 |       "0: 384x640 4 persons, 8.0ms\n",
 347 |       "Speed: 2.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 348 |       "\n",
 349 |       "0: 384x640 4 persons, 8.0ms\n",
 350 |       "Speed: 2.0ms preprocess, 8.0ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 351 |       "\n",
 352 |       "0: 384x640 4 persons, 12.0ms\n",
 353 |       "Speed: 2.0ms preprocess, 12.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 354 |       "\n",
 355 |       "0: 384x640 4 persons, 7.0ms\n",
 356 |       "Speed: 2.0ms preprocess, 7.0ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 357 |       "\n",
 358 |       "0: 384x640 4 persons, 8.0ms\n",
 359 |       "Speed: 2.0ms preprocess, 8.0ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 360 |       "\n",
 361 |       "0: 384x640 4 persons, 7.0ms\n",
 362 |       "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 363 |       "\n",
 364 |       "0: 384x640 4 persons, 7.0ms\n",
 365 |       "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 366 |       "\n",
 367 |       "0: 384x640 4 persons, 7.0ms\n",
 368 |       "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 369 |       "\n",
 370 |       "0: 384x640 4 persons, 7.0ms\n",
 371 |       "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 372 |       "\n",
 373 |       "0: 384x640 4 persons, 8.0ms\n",
 374 |       "Speed: 1.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 375 |       "\n",
 376 |       "0: 384x640 4 persons, 8.0ms\n",
 377 |       "Speed: 2.0ms preprocess, 8.0ms inference, 3.1ms postprocess per image at shape (1, 3, 640, 640)\n",
 378 |       "\n",
 379 |       "0: 384x640 4 persons, 8.0ms\n",
 380 |       "Speed: 2.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 381 |       "\n",
 382 |       "0: 384x640 4 persons, 8.0ms\n",
 383 |       "Speed: 2.0ms preprocess, 8.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 384 |       "\n",
 385 |       "0: 384x640 4 persons, 7.0ms\n",
 386 |       "Speed: 2.0ms preprocess, 7.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 387 |       "\n",
 388 |       "0: 384x640 4 persons, 12.0ms\n",
 389 |       "Speed: 3.0ms preprocess, 12.0ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 390 |       "\n",
 391 |       "0: 384x640 4 persons, 7.0ms\n",
 392 |       "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 393 |       "\n",
 394 |       "0: 384x640 4 persons, 7.0ms\n",
 395 |       "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 396 |       "\n",
 397 |       "0: 384x640 4 persons, 6.0ms\n",
 398 |       "Speed: 1.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 399 |       "\n",
 400 |       "0: 384x640 4 persons, 7.0ms\n",
 401 |       "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 402 |       "\n",
 403 |       "0: 384x640 4 persons, 8.0ms\n",
 404 |       "Speed: 1.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 405 |       "\n",
 406 |       "0: 384x640 4 persons, 7.0ms\n",
 407 |       "Speed: 1.0ms preprocess, 7.0ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 408 |       "\n",
 409 |       "0: 384x640 4 persons, 8.0ms\n",
 410 |       "Speed: 2.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 411 |       "\n",
 412 |       "0: 384x640 4 persons, 8.0ms\n",
 413 |       "Speed: 1.0ms preprocess, 8.0ms inference, 6.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 414 |       "\n",
 415 |       "0: 384x640 4 persons, 6.0ms\n",
 416 |       "Speed: 2.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 417 |       "\n",
 418 |       "0: 384x640 4 persons, 6.0ms\n",
 419 |       "Speed: 2.0ms preprocess, 6.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 420 |       "\n",
 421 |       "0: 384x640 4 persons, 7.0ms\n",
 422 |       "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 423 |       "\n",
 424 |       "0: 384x640 4 persons, 8.0ms\n",
 425 |       "Speed: 1.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 426 |       "\n",
 427 |       "0: 384x640 4 persons, 6.0ms\n",
 428 |       "Speed: 1.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 429 |       "\n",
 430 |       "0: 384x640 4 persons, 10.0ms\n",
 431 |       "Speed: 2.0ms preprocess, 10.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 432 |       "\n",
 433 |       "0: 384x640 4 persons, 7.0ms\n",
 434 |       "Speed: 2.0ms preprocess, 7.0ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 435 |       "\n",
 436 |       "0: 384x640 4 persons, 6.0ms\n",
 437 |       "Speed: 1.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 438 |       "\n",
 439 |       "0: 384x640 4 persons, 7.0ms\n",
 440 |       "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 441 |       "\n",
 442 |       "0: 384x640 4 persons, 6.0ms\n",
 443 |       "Speed: 1.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n"
 444 |      ]
 445 |     },
 446 |     {
 447 |      "name": "stderr",
 448 |      "output_type": "stream",
 449 |      "text": [
 450 |       "\n",
 451 |       "0: 384x640 4 persons, 10.0ms\n",
 452 |       "Speed: 2.0ms preprocess, 10.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 453 |       "\n",
 454 |       "0: 384x640 4 persons, 7.0ms\n",
 455 |       "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 456 |       "\n",
 457 |       "0: 384x640 4 persons, 6.0ms\n",
 458 |       "Speed: 2.0ms preprocess, 6.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 459 |       "\n",
 460 |       "0: 384x640 4 persons, 7.0ms\n",
 461 |       "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 462 |       "\n",
 463 |       "0: 384x640 4 persons, 6.0ms\n",
 464 |       "Speed: 1.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 465 |       "\n",
 466 |       "0: 384x640 4 persons, 7.8ms\n",
 467 |       "Speed: 2.0ms preprocess, 7.8ms inference, 3.6ms postprocess per image at shape (1, 3, 640, 640)\n",
 468 |       "\n",
 469 |       "0: 384x640 4 persons, 9.0ms\n",
 470 |       "Speed: 1.0ms preprocess, 9.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 471 |       "\n",
 472 |       "0: 384x640 4 persons, 8.0ms\n",
 473 |       "Speed: 1.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 474 |       "\n",
 475 |       "0: 384x640 4 persons, 7.0ms\n",
 476 |       "Speed: 1.0ms preprocess, 7.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 477 |       "\n",
 478 |       "0: 384x640 4 persons, 9.0ms\n",
 479 |       "Speed: 2.0ms preprocess, 9.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 480 |       "\n",
 481 |       "0: 384x640 4 persons, 6.0ms\n",
 482 |       "Speed: 1.0ms preprocess, 6.0ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 483 |       "\n",
 484 |       "0: 384x640 4 persons, 7.0ms\n",
 485 |       "Speed: 1.0ms preprocess, 7.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 486 |       "\n",
 487 |       "0: 384x640 4 persons, 9.0ms\n",
 488 |       "Speed: 2.0ms preprocess, 9.0ms inference, 10.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 489 |       "\n",
 490 |       "0: 384x640 4 persons, 7.1ms\n",
 491 |       "Speed: 1.0ms preprocess, 7.1ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 492 |       "\n",
 493 |       "0: 384x640 4 persons, 8.0ms\n",
 494 |       "Speed: 1.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 495 |       "\n",
 496 |       "0: 384x640 4 persons, 6.0ms\n",
 497 |       "Speed: 2.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 498 |       "\n",
 499 |       "0: 384x640 4 persons, 7.0ms\n",
 500 |       "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 501 |       "\n",
 502 |       "0: 384x640 4 persons, 8.0ms\n",
 503 |       "Speed: 1.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 504 |       "\n",
 505 |       "0: 384x640 4 persons, 8.0ms\n",
 506 |       "Speed: 1.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 507 |       "\n",
 508 |       "0: 384x640 4 persons, 8.0ms\n",
 509 |       "Speed: 1.0ms preprocess, 8.0ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 510 |       "\n",
 511 |       "0: 384x640 4 persons, 7.0ms\n",
 512 |       "Speed: 2.0ms preprocess, 7.0ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 513 |       "\n",
 514 |       "0: 384x640 4 persons, 7.0ms\n",
 515 |       "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 516 |       "\n",
 517 |       "0: 384x640 4 persons, 7.0ms\n",
 518 |       "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 519 |       "\n",
 520 |       "0: 384x640 4 persons, 7.0ms\n",
 521 |       "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 522 |       "\n",
 523 |       "0: 384x640 4 persons, 7.0ms\n",
 524 |       "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 525 |       "\n",
 526 |       "0: 384x640 4 persons, 6.0ms\n",
 527 |       "Speed: 2.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 528 |       "\n",
 529 |       "0: 384x640 4 persons, 7.0ms\n",
 530 |       "Speed: 1.0ms preprocess, 7.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 531 |       "\n",
 532 |       "0: 384x640 4 persons, 7.0ms\n",
 533 |       "Speed: 2.0ms preprocess, 7.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 534 |       "\n",
 535 |       "0: 384x640 4 persons, 6.0ms\n",
 536 |       "Speed: 2.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 537 |       "\n",
 538 |       "0: 384x640 4 persons, 7.0ms\n",
 539 |       "Speed: 2.0ms preprocess, 7.0ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 540 |       "\n",
 541 |       "0: 384x640 4 persons, 7.4ms\n",
 542 |       "Speed: 1.0ms preprocess, 7.4ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 543 |       "\n",
 544 |       "0: 384x640 4 persons, 6.0ms\n",
 545 |       "Speed: 2.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 546 |       "\n",
 547 |       "0: 384x640 4 persons, 7.0ms\n",
 548 |       "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 549 |       "\n",
 550 |       "0: 384x640 4 persons, 8.0ms\n",
 551 |       "Speed: 1.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 552 |       "\n",
 553 |       "0: 384x640 4 persons, 7.0ms\n",
 554 |       "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 555 |       "\n",
 556 |       "0: 384x640 4 persons, 9.0ms\n",
 557 |       "Speed: 1.0ms preprocess, 9.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 558 |       "\n",
 559 |       "0: 384x640 4 persons, 8.0ms\n",
 560 |       "Speed: 1.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 561 |       "\n",
 562 |       "0: 384x640 4 persons, 7.0ms\n",
 563 |       "Speed: 2.0ms preprocess, 7.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 564 |       "\n",
 565 |       "0: 384x640 4 persons, 12.0ms\n",
 566 |       "Speed: 2.0ms preprocess, 12.0ms inference, 14.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 567 |       "\n",
 568 |       "0: 384x640 4 persons, 7.0ms\n",
 569 |       "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 570 |       "\n",
 571 |       "0: 384x640 4 persons, 7.0ms\n",
 572 |       "Speed: 1.0ms preprocess, 7.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 573 |       "\n",
 574 |       "0: 384x640 4 persons, 6.0ms\n",
 575 |       "Speed: 1.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 576 |       "\n",
 577 |       "0: 384x640 4 persons, 7.0ms\n",
 578 |       "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 579 |       "\n",
 580 |       "0: 384x640 4 persons, 8.0ms\n",
 581 |       "Speed: 2.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 582 |       "\n",
 583 |       "0: 384x640 4 persons, 7.0ms\n",
 584 |       "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 585 |       "\n",
 586 |       "0: 384x640 4 persons, 7.0ms\n",
 587 |       "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 588 |       "\n",
 589 |       "0: 384x640 4 persons, 6.0ms\n",
 590 |       "Speed: 2.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 591 |       "\n",
 592 |       "0: 384x640 4 persons, 7.0ms\n",
 593 |       "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 594 |       "\n",
 595 |       "0: 384x640 4 persons, 6.0ms\n",
 596 |       "Speed: 2.0ms preprocess, 6.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 597 |       "\n",
 598 |       "0: 384x640 3 persons, 8.0ms\n",
 599 |       "Speed: 2.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 600 |       "\n",
 601 |       "0: 384x640 3 persons, 8.0ms\n",
 602 |       "Speed: 2.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 603 |       "\n",
 604 |       "0: 384x640 3 persons, 7.0ms\n",
 605 |       "Speed: 1.0ms preprocess, 7.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 606 |       "\n",
 607 |       "0: 384x640 4 persons, 8.0ms\n",
 608 |       "Speed: 1.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 609 |       "\n",
 610 |       "0: 384x640 4 persons, 8.0ms\n",
 611 |       "Speed: 2.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 612 |       "\n",
 613 |       "0: 384x640 4 persons, 8.0ms\n",
 614 |       "Speed: 1.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 615 |       "\n",
 616 |       "0: 384x640 4 persons, 7.8ms\n",
 617 |       "Speed: 1.0ms preprocess, 7.8ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 640)\n",
 618 |       "\n",
 619 |       "0: 384x640 4 persons, 7.0ms\n",
 620 |       "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 621 |       "\n",
 622 |       "0: 384x640 4 persons, 7.0ms\n",
 623 |       "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 624 |       "\n",
 625 |       "0: 384x640 4 persons, 6.0ms\n",
 626 |       "Speed: 1.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 627 |       "\n",
 628 |       "0: 384x640 4 persons, 7.0ms\n",
 629 |       "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 630 |       "\n",
 631 |       "0: 384x640 4 persons, 7.0ms\n",
 632 |       "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 633 |       "\n",
 634 |       "0: 384x640 4 persons, 7.5ms\n",
 635 |       "Speed: 2.0ms preprocess, 7.5ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 636 |       "\n",
 637 |       "0: 384x640 4 persons, 7.0ms\n",
 638 |       "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 639 |       "\n",
 640 |       "0: 384x640 4 persons, 7.0ms\n",
 641 |       "Speed: 1.0ms preprocess, 7.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 642 |       "\n",
 643 |       "0: 384x640 4 persons, 6.0ms\n",
 644 |       "Speed: 1.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 645 |       "\n",
 646 |       "0: 384x640 4 persons, 7.0ms\n",
 647 |       "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n"
 648 |      ]
 649 |     },
 650 |     {
 651 |      "name": "stderr",
 652 |      "output_type": "stream",
 653 |      "text": [
 654 |       "\n",
 655 |       "0: 384x640 4 persons, 6.0ms\n",
 656 |       "Speed: 2.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 657 |       "\n",
 658 |       "0: 384x640 4 persons, 8.0ms\n",
 659 |       "Speed: 2.0ms preprocess, 8.0ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 660 |       "\n",
 661 |       "0: 384x640 4 persons, 6.0ms\n",
 662 |       "Speed: 1.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 663 |       "\n",
 664 |       "0: 384x640 4 persons, 8.0ms\n",
 665 |       "Speed: 1.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 666 |       "\n",
 667 |       "0: 384x640 4 persons, 7.0ms\n",
 668 |       "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 669 |       "\n",
 670 |       "0: 384x640 4 persons, 7.0ms\n",
 671 |       "Speed: 1.0ms preprocess, 7.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 672 |       "\n",
 673 |       "0: 384x640 4 persons, 7.0ms\n",
 674 |       "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 675 |       "\n",
 676 |       "0: 384x640 4 persons, 7.0ms\n",
 677 |       "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 678 |       "\n",
 679 |       "0: 384x640 4 persons, 7.0ms\n",
 680 |       "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 681 |       "\n",
 682 |       "0: 384x640 4 persons, 7.0ms\n",
 683 |       "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 684 |       "\n",
 685 |       "0: 384x640 4 persons, 6.0ms\n",
 686 |       "Speed: 2.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 687 |       "\n",
 688 |       "0: 384x640 4 persons, 7.0ms\n",
 689 |       "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 690 |       "\n",
 691 |       "0: 384x640 4 persons, 7.0ms\n",
 692 |       "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 693 |       "\n",
 694 |       "0: 384x640 4 persons, 9.0ms\n",
 695 |       "Speed: 1.0ms preprocess, 9.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 696 |       "\n",
 697 |       "0: 384x640 4 persons, 7.0ms\n",
 698 |       "Speed: 2.0ms preprocess, 7.0ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 699 |       "\n",
 700 |       "0: 384x640 4 persons, 11.0ms\n",
 701 |       "Speed: 2.1ms preprocess, 11.0ms inference, 3.7ms postprocess per image at shape (1, 3, 640, 640)\n",
 702 |       "\n",
 703 |       "0: 384x640 4 persons, 6.0ms\n",
 704 |       "Speed: 1.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 705 |       "\n",
 706 |       "0: 384x640 4 persons, 8.0ms\n",
 707 |       "Speed: 1.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 708 |       "\n",
 709 |       "0: 384x640 4 persons, 8.0ms\n",
 710 |       "Speed: 1.0ms preprocess, 8.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 711 |       "\n",
 712 |       "0: 384x640 4 persons, 7.0ms\n",
 713 |       "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 714 |       "\n",
 715 |       "0: 384x640 4 persons, 7.0ms\n",
 716 |       "Speed: 1.0ms preprocess, 7.0ms inference, 6.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 717 |       "\n",
 718 |       "0: 384x640 4 persons, 7.8ms\n",
 719 |       "Speed: 1.0ms preprocess, 7.8ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 640)\n",
 720 |       "\n",
 721 |       "0: 384x640 3 persons, 8.0ms\n",
 722 |       "Speed: 2.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 723 |       "\n",
 724 |       "0: 384x640 3 persons, 6.0ms\n",
 725 |       "Speed: 2.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 726 |       "\n",
 727 |       "0: 384x640 3 persons, 13.0ms\n",
 728 |       "Speed: 1.0ms preprocess, 13.0ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 729 |       "\n",
 730 |       "0: 384x640 3 persons, 7.0ms\n",
 731 |       "Speed: 1.0ms preprocess, 7.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 732 |       "\n",
 733 |       "0: 384x640 3 persons, 6.0ms\n",
 734 |       "Speed: 1.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 735 |       "\n",
 736 |       "0: 384x640 4 persons, 6.0ms\n",
 737 |       "Speed: 2.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 738 |       "\n",
 739 |       "0: 384x640 4 persons, 8.0ms\n",
 740 |       "Speed: 1.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 741 |       "\n",
 742 |       "0: 384x640 4 persons, 6.0ms\n",
 743 |       "Speed: 2.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 744 |       "\n",
 745 |       "0: 384x640 4 persons, 7.0ms\n",
 746 |       "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 747 |       "\n",
 748 |       "0: 384x640 4 persons, 10.0ms\n",
 749 |       "Speed: 3.0ms preprocess, 10.0ms inference, 12.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 750 |       "\n",
 751 |       "0: 384x640 4 persons, 6.0ms\n",
 752 |       "Speed: 2.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 753 |       "\n",
 754 |       "0: 384x640 4 persons, 7.0ms\n",
 755 |       "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 756 |       "\n",
 757 |       "0: 384x640 4 persons, 8.0ms\n",
 758 |       "Speed: 1.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 759 |       "\n",
 760 |       "0: 384x640 4 persons, 8.0ms\n",
 761 |       "Speed: 1.0ms preprocess, 8.0ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 762 |       "\n",
 763 |       "0: 384x640 4 persons, 6.0ms\n",
 764 |       "Speed: 2.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 765 |       "\n",
 766 |       "0: 384x640 4 persons, 7.0ms\n",
 767 |       "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 768 |       "\n",
 769 |       "0: 384x640 4 persons, 12.0ms\n",
 770 |       "Speed: 1.0ms preprocess, 12.0ms inference, 12.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 771 |       "\n",
 772 |       "0: 384x640 4 persons, 7.0ms\n",
 773 |       "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 774 |       "\n",
 775 |       "0: 384x640 4 persons, 7.0ms\n",
 776 |       "Speed: 1.0ms preprocess, 7.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 777 |       "\n",
 778 |       "0: 384x640 4 persons, 7.0ms\n",
 779 |       "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 780 |       "\n",
 781 |       "0: 384x640 4 persons, 10.0ms\n",
 782 |       "Speed: 2.0ms preprocess, 10.0ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 783 |       "\n",
 784 |       "0: 384x640 4 persons, 7.0ms\n",
 785 |       "Speed: 1.0ms preprocess, 7.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 786 |       "\n",
 787 |       "0: 384x640 4 persons, 6.0ms\n",
 788 |       "Speed: 2.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 789 |       "\n",
 790 |       "0: 384x640 4 persons, 9.0ms\n",
 791 |       "Speed: 1.0ms preprocess, 9.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 792 |       "\n",
 793 |       "0: 384x640 4 persons, 6.0ms\n",
 794 |       "Speed: 2.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 795 |       "\n",
 796 |       "0: 384x640 4 persons, 8.0ms\n",
 797 |       "Speed: 1.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 798 |       "\n",
 799 |       "0: 384x640 4 persons, 7.0ms\n",
 800 |       "Speed: 1.0ms preprocess, 7.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 801 |       "\n",
 802 |       "0: 384x640 4 persons, 8.0ms\n",
 803 |       "Speed: 2.0ms preprocess, 8.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 804 |       "\n",
 805 |       "0: 384x640 4 persons, 7.0ms\n",
 806 |       "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 807 |       "\n",
 808 |       "0: 384x640 4 persons, 7.0ms\n",
 809 |       "Speed: 1.0ms preprocess, 7.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 810 |       "\n",
 811 |       "0: 384x640 4 persons, 10.0ms\n",
 812 |       "Speed: 1.0ms preprocess, 10.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 813 |       "\n",
 814 |       "0: 384x640 4 persons, 7.2ms\n",
 815 |       "Speed: 2.0ms preprocess, 7.2ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 816 |       "\n",
 817 |       "0: 384x640 4 persons, 7.0ms\n",
 818 |       "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 819 |       "\n",
 820 |       "0: 384x640 4 persons, 10.0ms\n",
 821 |       "Speed: 2.0ms preprocess, 10.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 822 |       "\n",
 823 |       "0: 384x640 4 persons, 6.0ms\n",
 824 |       "Speed: 2.0ms preprocess, 6.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 825 |       "\n",
 826 |       "0: 384x640 4 persons, 7.0ms\n",
 827 |       "Speed: 2.0ms preprocess, 7.0ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 828 |       "\n",
 829 |       "0: 384x640 4 persons, 7.0ms\n",
 830 |       "Speed: 2.0ms preprocess, 7.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 831 |       "\n",
 832 |       "0: 384x640 4 persons, 6.0ms\n",
 833 |       "Speed: 2.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 834 |       "\n",
 835 |       "0: 384x640 4 persons, 6.0ms\n",
 836 |       "Speed: 2.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 837 |       "\n",
 838 |       "0: 384x640 4 persons, 12.0ms\n",
 839 |       "Speed: 2.0ms preprocess, 12.0ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 840 |       "\n",
 841 |       "0: 384x640 4 persons, 7.0ms\n",
 842 |       "Speed: 1.0ms preprocess, 7.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 843 |       "\n",
 844 |       "0: 384x640 4 persons, 6.0ms\n",
 845 |       "Speed: 2.0ms preprocess, 6.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 846 |       "\n",
 847 |       "0: 384x640 4 persons, 8.0ms\n",
 848 |       "Speed: 2.0ms preprocess, 8.0ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 849 |       "\n",
 850 |       "0: 384x640 4 persons, 7.0ms\n",
 851 |       "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n"
 852 |      ]
 853 |     },
 854 |     {
 855 |      "name": "stderr",
 856 |      "output_type": "stream",
 857 |      "text": [
 858 |       "\n",
 859 |       "0: 384x640 4 persons, 7.3ms\n",
 860 |       "Speed: 1.6ms preprocess, 7.3ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 861 |       "\n",
 862 |       "0: 384x640 4 persons, 6.2ms\n",
 863 |       "Speed: 2.0ms preprocess, 6.2ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 864 |       "\n",
 865 |       "0: 384x640 4 persons, 7.0ms\n",
 866 |       "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 867 |       "\n",
 868 |       "0: 384x640 4 persons, 6.0ms\n",
 869 |       "Speed: 2.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 870 |       "\n",
 871 |       "0: 384x640 4 persons, 7.0ms\n",
 872 |       "Speed: 2.0ms preprocess, 7.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 873 |       "\n",
 874 |       "0: 384x640 4 persons, 6.0ms\n",
 875 |       "Speed: 1.0ms preprocess, 6.0ms inference, 5.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 876 |       "\n",
 877 |       "0: 384x640 4 persons, 9.1ms\n",
 878 |       "Speed: 1.0ms preprocess, 9.1ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 879 |       "\n",
 880 |       "0: 384x640 4 persons, 7.0ms\n",
 881 |       "Speed: 1.0ms preprocess, 7.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 882 |       "\n",
 883 |       "0: 384x640 4 persons, 6.0ms\n",
 884 |       "Speed: 2.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 885 |       "\n",
 886 |       "0: 384x640 4 persons, 13.0ms\n",
 887 |       "Speed: 2.0ms preprocess, 13.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 888 |       "\n",
 889 |       "0: 384x640 4 persons, 6.0ms\n",
 890 |       "Speed: 2.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 891 |       "\n",
 892 |       "0: 384x640 4 persons, 7.0ms\n",
 893 |       "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 894 |       "\n",
 895 |       "0: 384x640 4 persons, 7.0ms\n",
 896 |       "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 897 |       "\n",
 898 |       "0: 384x640 4 persons, 7.0ms\n",
 899 |       "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 900 |       "\n",
 901 |       "0: 384x640 4 persons, 7.0ms\n",
 902 |       "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 903 |       "\n",
 904 |       "0: 384x640 4 persons, 7.0ms\n",
 905 |       "Speed: 1.0ms preprocess, 7.0ms inference, 5.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 906 |       "\n",
 907 |       "0: 384x640 4 persons, 6.0ms\n",
 908 |       "Speed: 2.0ms preprocess, 6.0ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 909 |       "\n",
 910 |       "0: 384x640 4 persons, 7.0ms\n",
 911 |       "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 912 |       "\n",
 913 |       "0: 384x640 4 persons, 7.0ms\n",
 914 |       "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 915 |       "\n",
 916 |       "0: 384x640 4 persons, 7.0ms\n",
 917 |       "Speed: 2.0ms preprocess, 7.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 918 |       "\n",
 919 |       "0: 384x640 4 persons, 9.0ms\n",
 920 |       "Speed: 2.0ms preprocess, 9.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 921 |       "\n",
 922 |       "0: 384x640 4 persons, 6.0ms\n",
 923 |       "Speed: 1.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 924 |       "\n",
 925 |       "0: 384x640 4 persons, 7.0ms\n",
 926 |       "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 927 |       "\n",
 928 |       "0: 384x640 4 persons, 7.0ms\n",
 929 |       "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 930 |       "\n",
 931 |       "0: 384x640 4 persons, 7.0ms\n",
 932 |       "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 933 |       "\n",
 934 |       "0: 384x640 4 persons, 13.2ms\n",
 935 |       "Speed: 1.8ms preprocess, 13.2ms inference, 3.1ms postprocess per image at shape (1, 3, 640, 640)\n",
 936 |       "\n",
 937 |       "0: 384x640 4 persons, 7.0ms\n",
 938 |       "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 939 |       "\n",
 940 |       "0: 384x640 4 persons, 7.0ms\n",
 941 |       "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 942 |       "\n",
 943 |       "0: 384x640 4 persons, 7.0ms\n",
 944 |       "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 945 |       "\n",
 946 |       "0: 384x640 4 persons, 8.0ms\n",
 947 |       "Speed: 1.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 948 |       "\n",
 949 |       "0: 384x640 4 persons, 6.0ms\n",
 950 |       "Speed: 4.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 951 |       "\n",
 952 |       "0: 384x640 4 persons, 7.9ms\n",
 953 |       "Speed: 1.0ms preprocess, 7.9ms inference, 3.6ms postprocess per image at shape (1, 3, 640, 640)\n",
 954 |       "\n",
 955 |       "0: 384x640 4 persons, 7.0ms\n",
 956 |       "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 957 |       "\n",
 958 |       "0: 384x640 4 persons, 7.0ms\n",
 959 |       "Speed: 2.0ms preprocess, 7.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 960 |       "\n",
 961 |       "0: 384x640 4 persons, 7.0ms\n",
 962 |       "Speed: 1.0ms preprocess, 7.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 963 |       "\n",
 964 |       "0: 384x640 4 persons, 7.0ms\n",
 965 |       "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 966 |       "\n",
 967 |       "0: 384x640 3 persons, 7.0ms\n",
 968 |       "Speed: 2.0ms preprocess, 7.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 969 |       "\n",
 970 |       "0: 384x640 3 persons, 7.0ms\n",
 971 |       "Speed: 1.0ms preprocess, 7.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 972 |       "\n",
 973 |       "0: 384x640 3 persons, 8.0ms\n",
 974 |       "Speed: 2.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 975 |       "\n",
 976 |       "0: 384x640 3 persons, 8.0ms\n",
 977 |       "Speed: 2.0ms preprocess, 8.0ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 978 |       "\n",
 979 |       "0: 384x640 3 persons, 7.0ms\n",
 980 |       "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 981 |       "\n",
 982 |       "0: 384x640 3 persons, 10.0ms\n",
 983 |       "Speed: 2.0ms preprocess, 10.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 984 |       "\n",
 985 |       "0: 384x640 3 persons, 7.0ms\n",
 986 |       "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 987 |       "\n",
 988 |       "0: 384x640 3 persons, 9.0ms\n",
 989 |       "Speed: 3.0ms preprocess, 9.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 990 |       "\n",
 991 |       "0: 384x640 3 persons, 8.0ms\n",
 992 |       "Speed: 1.0ms preprocess, 8.0ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 993 |       "\n",
 994 |       "0: 384x640 3 persons, 8.0ms\n",
 995 |       "Speed: 2.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 996 |       "\n",
 997 |       "0: 384x640 3 persons, 7.0ms\n",
 998 |       "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n",
 999 |       "\n",
1000 |       "0: 384x640 3 persons, 6.0ms\n"
1001 |      ]
1002 |     }
1003 |    ],
1004 |    "source": [
1005 |     "i = 0\n",
1006 |     "counter, fps, elapsed = 0, 0, 0\n",
1007 |     "start_time = time.perf_counter()\n",
1008 |     "\n",
1009 |     "while cap.isOpened():\n",
1010 |     "    ret, frame = cap.read()\n",
1011 |     "\n",
1012 |     "    if ret:\n",
1013 |     "        \n",
1014 |     "        og_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)\n",
1015 |     "        frame = og_frame.copy()\n",
1016 |     "\n",
1017 |     "        model = YOLO(\"yolov8n.pt\")  # load a pretrained model (recommended for training)\n",
1018 |     "\n",
1019 |     "        results = model(frame, device=0, classes=0, conf=0.8)\n",
1020 |     "\n",
1021 |     "        class_names = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']\n",
1022 |     "\n",
1023 |     "        for result in results:\n",
1024 |     "            boxes = result.boxes  # Boxes object for bbox outputs\n",
1025 |     "            probs = result.probs  # Class probabilities for classification outputs\n",
1026 |     "            cls = boxes.cls.tolist()  # Convert tensor to list\n",
1027 |     "            xyxy = boxes.xyxy\n",
1028 |     "            conf = boxes.conf\n",
1029 |     "            xywh = boxes.xywh  # box with xywh format, (N, 4)\n",
1030 |     "            for class_index in cls:\n",
1031 |     "                class_name = class_names[int(class_index)]\n",
1032 |     "                #print(\"Class:\", class_name)\n",
1033 |     "\n",
1034 |     "        pred_cls = np.array(cls)\n",
1035 |     "        conf = conf.detach().cpu().numpy()\n",
1036 |     "        xyxy = xyxy.detach().cpu().numpy()\n",
1037 |     "        bboxes_xywh = xywh\n",
1038 |     "        bboxes_xywh = xywh.cpu().numpy()\n",
1039 |     "        bboxes_xywh = np.array(bboxes_xywh, dtype=float)\n",
1040 |     "        \n",
1041 |     "        tracks = tracker.update(bboxes_xywh, conf, og_frame)\n",
1042 |     "        \n",
1043 |     "        for track in tracker.tracker.tracks:\n",
1044 |     "            track_id = track.track_id\n",
1045 |     "            hits = track.hits\n",
1046 |     "            x1, y1, x2, y2 = track.to_tlbr()  # Get bounding box coordinates in (x1, y1, x2, y2) format\n",
1047 |     "            w = x2 - x1  # Calculate width\n",
1048 |     "            h = y2 - y1  # Calculate height\n",
1049 |     "\n",
1050 |     "            # Set color values for red, blue, and green\n",
1051 |     "            red_color = (0, 0, 255)  # (B, G, R)\n",
1052 |     "            blue_color = (255, 0, 0)  # (B, G, R)\n",
1053 |     "            green_color = (0, 255, 0)  # (B, G, R)\n",
1054 |     "\n",
1055 |     "            # Determine color based on track_id\n",
1056 |     "            color_id = track_id % 3\n",
1057 |     "            if color_id == 0:\n",
1058 |     "                color = red_color\n",
1059 |     "            elif color_id == 1:\n",
1060 |     "                color = blue_color\n",
1061 |     "            else:\n",
1062 |     "                color = green_color\n",
1063 |     "\n",
1064 |     "            cv2.rectangle(og_frame, (int(x1), int(y1)), (int(x1 + w), int(y1 + h)), color, 2)\n",
1065 |     "\n",
1066 |     "            text_color = (0, 0, 0)  # Black color for text\n",
1067 |     "            cv2.putText(og_frame, f\"{class_name}-{track_id}\", (int(x1) + 10, int(y1) - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, text_color, 1, cv2.LINE_AA)\n",
1068 |     "\n",
1069 |     "            # Add the track_id to the set of unique track IDs\n",
1070 |     "            unique_track_ids.add(track_id)\n",
1071 |     "\n",
1072 |     "        # Update the person count based on the number of unique track IDs\n",
1073 |     "        person_count = len(unique_track_ids)\n",
1074 |     "\n",
1075 |     "        # Update FPS and place on frame\n",
1076 |     "        current_time = time.perf_counter()\n",
1077 |     "        elapsed = (current_time - start_time)\n",
1078 |     "        counter += 1\n",
1079 |     "        if elapsed > 1:\n",
1080 |     "            fps = counter / elapsed\n",
1081 |     "            counter = 0\n",
1082 |     "            start_time = current_time\n",
1083 |     "\n",
1084 |     "        # Draw person count on frame\n",
1085 |     "        cv2.putText(og_frame, f\"Person Count: {person_count}\", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)\n",
1086 |     "\n",
1087 |     "        # Append the frame to the list\n",
1088 |     "        frames.append(og_frame)\n",
1089 |     "\n",
1090 |     "        # Write the frame to the output video file\n",
1091 |     "        out.write(cv2.cvtColor(og_frame, cv2.COLOR_RGB2BGR))\n",
1092 |     "\n",
1093 |     "        # Show the frame\n",
1094 |     "        #cv2.imshow(\"Video\", og_frame)\n",
1095 |     "#         if cv2.waitKey(1) & 0xFF == ord('q'):\n",
1096 |     "#             break\n",
1097 |     "\n",
1098 |     "cap.release()\n",
1099 |     "out.release()\n",
1100 |     "cv2.destroyAllWindows()\n"
1101 |    ]
1102 |   }
1103 |  ],
1104 |  "metadata": {
1105 |   "kernelspec": {
1106 |    "display_name": "Python 3 (ipykernel)",
1107 |    "language": "python",
1108 |    "name": "python3"
1109 |   },
1110 |   "language_info": {
1111 |    "codemirror_mode": {
1112 |     "name": "ipython",
1113 |     "version": 3
1114 |    },
1115 |    "file_extension": ".py",
1116 |    "mimetype": "text/x-python",
1117 |    "name": "python",
1118 |    "nbconvert_exporter": "python",
1119 |    "pygments_lexer": "ipython3",
1120 |    "version": "3.9.10"
1121 |   }
1122 |  },
1123 |  "nbformat": 4,
1124 |  "nbformat_minor": 5
1125 | }
1126 | 


--------------------------------------------------------------------------------