├── .gitattributes ├── README.md ├── deep_sort ├── README.md ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── __init__.cpython-39.pyc │ ├── deep_sort.cpython-310.pyc │ └── deep_sort.cpython-39.pyc ├── configs │ └── deep_sort.yaml ├── deep │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── __init__.cpython-39.pyc │ │ ├── feature_extractor.cpython-310.pyc │ │ ├── feature_extractor.cpython-39.pyc │ │ ├── model.cpython-310.pyc │ │ └── model.cpython-39.pyc │ ├── checkpoint │ │ ├── .gitkeep │ │ ├── ckpt.t7 │ │ └── original_ckpt.t7 │ ├── evaluate.py │ ├── feature_extractor.py │ ├── model.py │ ├── original_model.py │ ├── test.py │ └── train.py ├── deep_sort.py ├── sort │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── __init__.cpython-39.pyc │ │ ├── detection.cpython-310.pyc │ │ ├── detection.cpython-39.pyc │ │ ├── iou_matching.cpython-310.pyc │ │ ├── iou_matching.cpython-39.pyc │ │ ├── kalman_filter.cpython-310.pyc │ │ ├── kalman_filter.cpython-39.pyc │ │ ├── linear_assignment.cpython-310.pyc │ │ ├── linear_assignment.cpython-39.pyc │ │ ├── nn_matching.cpython-310.pyc │ │ ├── nn_matching.cpython-39.pyc │ │ ├── track.cpython-310.pyc │ │ ├── track.cpython-39.pyc │ │ ├── tracker.cpython-310.pyc │ │ └── tracker.cpython-39.pyc │ ├── detection.py │ ├── detection.py.bak │ ├── iou_matching.py │ ├── kalman_filter.py │ ├── linear_assignment.py │ ├── nn_matching.py │ ├── preprocessing.py │ ├── track.py │ └── tracker.py └── utils │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── __init__.cpython-39.pyc │ ├── parser.cpython-310.pyc │ └── parser.cpython-39.pyc │ ├── asserts.py │ ├── draw.py │ ├── evaluation.py │ ├── io.py │ ├── json_logger.py │ ├── log.py │ ├── parser.py │ └── tools.py ├── requirements.txt └── track_count_persons .ipynb /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Tracking-and-counting-Using-YOLOv8-and-DeepSORT 2 | Tracking and counting persons 3 | 4 | Follow this Youtube video to run this code: https://youtu.be/Y2fyDYcfmBg 5 | 6 | Clone this github repo: 7 | 8 | git clone https://github.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT.git 9 | 10 | Open jupyter notebook and start working 11 | 12 | 13 | ![1](https://github.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/assets/60029146/a1057b86-fcd7-412c-b7b0-583101cf91b6) 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /deep_sort/README.md: -------------------------------------------------------------------------------- 1 | # Deep Sort 2 | 3 | This is the implemention of deep sort with pytorch. -------------------------------------------------------------------------------- /deep_sort/__init__.py: -------------------------------------------------------------------------------- 1 | from .deep_sort import DeepSort 2 | 3 | 4 | __all__ = ['DeepSort', 'build_tracker'] 5 | 6 | 7 | def build_tracker(cfg, use_cuda): 8 | return DeepSort(cfg.DEEPSORT.REID_CKPT, 9 | max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, 10 | nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, 11 | max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=use_cuda) 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /deep_sort/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /deep_sort/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /deep_sort/__pycache__/deep_sort.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/__pycache__/deep_sort.cpython-310.pyc -------------------------------------------------------------------------------- /deep_sort/__pycache__/deep_sort.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/__pycache__/deep_sort.cpython-39.pyc -------------------------------------------------------------------------------- /deep_sort/configs/deep_sort.yaml: -------------------------------------------------------------------------------- 1 | DEEPSORT: 2 | REID_CKPT: "deep_sort_pytorch/deep_sort/deep/checkpoint/ckpt.t7" 3 | MAX_DIST: 0.2 4 | MIN_CONFIDENCE: 0.3 5 | NMS_MAX_OVERLAP: 0.5 6 | MAX_IOU_DISTANCE: 0.7 7 | MAX_AGE: 70 8 | N_INIT: 3 9 | NN_BUDGET: 100 10 | 11 | -------------------------------------------------------------------------------- /deep_sort/deep/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/deep/__init__.py -------------------------------------------------------------------------------- /deep_sort/deep/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/deep/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /deep_sort/deep/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/deep/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /deep_sort/deep/__pycache__/feature_extractor.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/deep/__pycache__/feature_extractor.cpython-310.pyc -------------------------------------------------------------------------------- /deep_sort/deep/__pycache__/feature_extractor.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/deep/__pycache__/feature_extractor.cpython-39.pyc -------------------------------------------------------------------------------- /deep_sort/deep/__pycache__/model.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/deep/__pycache__/model.cpython-310.pyc -------------------------------------------------------------------------------- /deep_sort/deep/__pycache__/model.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/deep/__pycache__/model.cpython-39.pyc -------------------------------------------------------------------------------- /deep_sort/deep/checkpoint/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/deep/checkpoint/.gitkeep -------------------------------------------------------------------------------- /deep_sort/deep/checkpoint/ckpt.t7: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/deep/checkpoint/ckpt.t7 -------------------------------------------------------------------------------- /deep_sort/deep/checkpoint/original_ckpt.t7: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/deep/checkpoint/original_ckpt.t7 -------------------------------------------------------------------------------- /deep_sort/deep/evaluate.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | features = torch.load("features.pth") 4 | qf = features["qf"] 5 | ql = features["ql"] 6 | gf = features["gf"] 7 | gl = features["gl"] 8 | 9 | scores = qf.mm(gf.t()) 10 | res = scores.topk(5, dim=1)[1][:, 0] 11 | top1correct = gl[res].eq(ql).sum().item() 12 | 13 | print("Acc top1:{:.3f}".format(top1correct / ql.size(0))) 14 | -------------------------------------------------------------------------------- /deep_sort/deep/feature_extractor.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision.transforms as transforms 3 | import numpy as np 4 | import cv2 5 | import logging 6 | 7 | from .model import Net 8 | 9 | 10 | class Extractor(object): 11 | def __init__(self, model_path, use_cuda=True): 12 | self.net = Net(reid=True) 13 | self.device = "cuda" if torch.cuda.is_available() and use_cuda else "cpu" 14 | state_dict = torch.load(model_path, map_location=torch.device(self.device))[ 15 | 'net_dict'] 16 | self.net.load_state_dict(state_dict) 17 | logger = logging.getLogger("root.tracker") 18 | logger.info("Loading weights from {}... Done!".format(model_path)) 19 | self.net.to(self.device) 20 | self.size = (64, 128) 21 | self.norm = transforms.Compose([ 22 | transforms.ToTensor(), 23 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 24 | ]) 25 | 26 | def _preprocess(self, im_crops): 27 | """ 28 | TODO: 29 | 1. to float with scale from 0 to 1 30 | 2. resize to (64, 128) as Market1501 dataset did 31 | 3. concatenate to a numpy array 32 | 3. to torch Tensor 33 | 4. normalize 34 | """ 35 | def _resize(im, size): 36 | return cv2.resize(im.astype(np.float32)/255., size) 37 | 38 | im_batch = torch.cat([self.norm(_resize(im, self.size)).unsqueeze( 39 | 0) for im in im_crops], dim=0).float() 40 | return im_batch 41 | 42 | def __call__(self, im_crops): 43 | im_batch = self._preprocess(im_crops) 44 | with torch.no_grad(): 45 | im_batch = im_batch.to(self.device) 46 | features = self.net(im_batch) 47 | return features.cpu().numpy() 48 | 49 | 50 | if __name__ == '__main__': 51 | img = cv2.imread("demo.jpg")[:, :, (2, 1, 0)] 52 | extr = Extractor("checkpoint/ckpt.t7") 53 | feature = extr(img) 54 | print(feature.shape) 55 | -------------------------------------------------------------------------------- /deep_sort/deep/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class BasicBlock(nn.Module): 7 | def __init__(self, c_in, c_out, is_downsample=False): 8 | super(BasicBlock, self).__init__() 9 | self.is_downsample = is_downsample 10 | if is_downsample: 11 | self.conv1 = nn.Conv2d( 12 | c_in, c_out, 3, stride=2, padding=1, bias=False) 13 | else: 14 | self.conv1 = nn.Conv2d( 15 | c_in, c_out, 3, stride=1, padding=1, bias=False) 16 | self.bn1 = nn.BatchNorm2d(c_out) 17 | self.relu = nn.ReLU(True) 18 | self.conv2 = nn.Conv2d(c_out, c_out, 3, stride=1, 19 | padding=1, bias=False) 20 | self.bn2 = nn.BatchNorm2d(c_out) 21 | if is_downsample: 22 | self.downsample = nn.Sequential( 23 | nn.Conv2d(c_in, c_out, 1, stride=2, bias=False), 24 | nn.BatchNorm2d(c_out) 25 | ) 26 | elif c_in != c_out: 27 | self.downsample = nn.Sequential( 28 | nn.Conv2d(c_in, c_out, 1, stride=1, bias=False), 29 | nn.BatchNorm2d(c_out) 30 | ) 31 | self.is_downsample = True 32 | 33 | def forward(self, x): 34 | y = self.conv1(x) 35 | y = self.bn1(y) 36 | y = self.relu(y) 37 | y = self.conv2(y) 38 | y = self.bn2(y) 39 | if self.is_downsample: 40 | x = self.downsample(x) 41 | return F.relu(x.add(y), True) 42 | 43 | 44 | def make_layers(c_in, c_out, repeat_times, is_downsample=False): 45 | blocks = [] 46 | for i in range(repeat_times): 47 | if i == 0: 48 | blocks += [BasicBlock(c_in, c_out, is_downsample=is_downsample), ] 49 | else: 50 | blocks += [BasicBlock(c_out, c_out), ] 51 | return nn.Sequential(*blocks) 52 | 53 | 54 | class Net(nn.Module): 55 | def __init__(self, num_classes=751, reid=False): 56 | super(Net, self).__init__() 57 | # 3 128 64 58 | self.conv = nn.Sequential( 59 | nn.Conv2d(3, 64, 3, stride=1, padding=1), 60 | nn.BatchNorm2d(64), 61 | nn.ReLU(inplace=True), 62 | # nn.Conv2d(32,32,3,stride=1,padding=1), 63 | # nn.BatchNorm2d(32), 64 | # nn.ReLU(inplace=True), 65 | nn.MaxPool2d(3, 2, padding=1), 66 | ) 67 | # 32 64 32 68 | self.layer1 = make_layers(64, 64, 2, False) 69 | # 32 64 32 70 | self.layer2 = make_layers(64, 128, 2, True) 71 | # 64 32 16 72 | self.layer3 = make_layers(128, 256, 2, True) 73 | # 128 16 8 74 | self.layer4 = make_layers(256, 512, 2, True) 75 | # 256 8 4 76 | self.avgpool = nn.AvgPool2d((8, 4), 1) 77 | # 256 1 1 78 | self.reid = reid 79 | self.classifier = nn.Sequential( 80 | nn.Linear(512, 256), 81 | nn.BatchNorm1d(256), 82 | nn.ReLU(inplace=True), 83 | nn.Dropout(), 84 | nn.Linear(256, num_classes), 85 | ) 86 | 87 | def forward(self, x): 88 | x = self.conv(x) 89 | x = self.layer1(x) 90 | x = self.layer2(x) 91 | x = self.layer3(x) 92 | x = self.layer4(x) 93 | x = self.avgpool(x) 94 | x = x.view(x.size(0), -1) 95 | # B x 128 96 | if self.reid: 97 | x = x.div(x.norm(p=2, dim=1, keepdim=True)) 98 | return x 99 | # classifier 100 | x = self.classifier(x) 101 | return x 102 | 103 | 104 | if __name__ == '__main__': 105 | net = Net() 106 | x = torch.randn(4, 3, 128, 64) 107 | y = net(x) 108 | import ipdb 109 | ipdb.set_trace() 110 | -------------------------------------------------------------------------------- /deep_sort/deep/original_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class BasicBlock(nn.Module): 7 | def __init__(self, c_in, c_out, is_downsample=False): 8 | super(BasicBlock, self).__init__() 9 | self.is_downsample = is_downsample 10 | if is_downsample: 11 | self.conv1 = nn.Conv2d( 12 | c_in, c_out, 3, stride=2, padding=1, bias=False) 13 | else: 14 | self.conv1 = nn.Conv2d( 15 | c_in, c_out, 3, stride=1, padding=1, bias=False) 16 | self.bn1 = nn.BatchNorm2d(c_out) 17 | self.relu = nn.ReLU(True) 18 | self.conv2 = nn.Conv2d(c_out, c_out, 3, stride=1, 19 | padding=1, bias=False) 20 | self.bn2 = nn.BatchNorm2d(c_out) 21 | if is_downsample: 22 | self.downsample = nn.Sequential( 23 | nn.Conv2d(c_in, c_out, 1, stride=2, bias=False), 24 | nn.BatchNorm2d(c_out) 25 | ) 26 | elif c_in != c_out: 27 | self.downsample = nn.Sequential( 28 | nn.Conv2d(c_in, c_out, 1, stride=1, bias=False), 29 | nn.BatchNorm2d(c_out) 30 | ) 31 | self.is_downsample = True 32 | 33 | def forward(self, x): 34 | y = self.conv1(x) 35 | y = self.bn1(y) 36 | y = self.relu(y) 37 | y = self.conv2(y) 38 | y = self.bn2(y) 39 | if self.is_downsample: 40 | x = self.downsample(x) 41 | return F.relu(x.add(y), True) 42 | 43 | 44 | def make_layers(c_in, c_out, repeat_times, is_downsample=False): 45 | blocks = [] 46 | for i in range(repeat_times): 47 | if i == 0: 48 | blocks += [BasicBlock(c_in, c_out, is_downsample=is_downsample), ] 49 | else: 50 | blocks += [BasicBlock(c_out, c_out), ] 51 | return nn.Sequential(*blocks) 52 | 53 | 54 | class Net(nn.Module): 55 | def __init__(self, num_classes=625, reid=False): 56 | super(Net, self).__init__() 57 | # 3 128 64 58 | self.conv = nn.Sequential( 59 | nn.Conv2d(3, 32, 3, stride=1, padding=1), 60 | nn.BatchNorm2d(32), 61 | nn.ELU(inplace=True), 62 | nn.Conv2d(32, 32, 3, stride=1, padding=1), 63 | nn.BatchNorm2d(32), 64 | nn.ELU(inplace=True), 65 | nn.MaxPool2d(3, 2, padding=1), 66 | ) 67 | # 32 64 32 68 | self.layer1 = make_layers(32, 32, 2, False) 69 | # 32 64 32 70 | self.layer2 = make_layers(32, 64, 2, True) 71 | # 64 32 16 72 | self.layer3 = make_layers(64, 128, 2, True) 73 | # 128 16 8 74 | self.dense = nn.Sequential( 75 | nn.Dropout(p=0.6), 76 | nn.Linear(128*16*8, 128), 77 | nn.BatchNorm1d(128), 78 | nn.ELU(inplace=True) 79 | ) 80 | # 256 1 1 81 | self.reid = reid 82 | self.batch_norm = nn.BatchNorm1d(128) 83 | self.classifier = nn.Sequential( 84 | nn.Linear(128, num_classes), 85 | ) 86 | 87 | def forward(self, x): 88 | x = self.conv(x) 89 | x = self.layer1(x) 90 | x = self.layer2(x) 91 | x = self.layer3(x) 92 | 93 | x = x.view(x.size(0), -1) 94 | if self.reid: 95 | x = self.dense[0](x) 96 | x = self.dense[1](x) 97 | x = x.div(x.norm(p=2, dim=1, keepdim=True)) 98 | return x 99 | x = self.dense(x) 100 | # B x 128 101 | # classifier 102 | x = self.classifier(x) 103 | return x 104 | 105 | 106 | if __name__ == '__main__': 107 | net = Net(reid=True) 108 | x = torch.randn(4, 3, 128, 64) 109 | y = net(x) 110 | import ipdb 111 | ipdb.set_trace() 112 | -------------------------------------------------------------------------------- /deep_sort/deep/test.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.backends.cudnn as cudnn 3 | import torchvision 4 | 5 | import argparse 6 | import os 7 | 8 | from model import Net 9 | 10 | parser = argparse.ArgumentParser(description="Train on market1501") 11 | parser.add_argument("--data-dir", default='data', type=str) 12 | parser.add_argument("--no-cuda", action="store_true") 13 | parser.add_argument("--gpu-id", default=0, type=int) 14 | args = parser.parse_args() 15 | 16 | # device 17 | device = "cuda:{}".format( 18 | args.gpu_id) if torch.cuda.is_available() and not args.no_cuda else "cpu" 19 | if torch.cuda.is_available() and not args.no_cuda: 20 | cudnn.benchmark = True 21 | 22 | # data loader 23 | root = args.data_dir 24 | query_dir = os.path.join(root, "query") 25 | gallery_dir = os.path.join(root, "gallery") 26 | transform = torchvision.transforms.Compose([ 27 | torchvision.transforms.Resize((128, 64)), 28 | torchvision.transforms.ToTensor(), 29 | torchvision.transforms.Normalize( 30 | [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 31 | ]) 32 | queryloader = torch.utils.data.DataLoader( 33 | torchvision.datasets.ImageFolder(query_dir, transform=transform), 34 | batch_size=64, shuffle=False 35 | ) 36 | galleryloader = torch.utils.data.DataLoader( 37 | torchvision.datasets.ImageFolder(gallery_dir, transform=transform), 38 | batch_size=64, shuffle=False 39 | ) 40 | 41 | # net definition 42 | net = Net(reid=True) 43 | assert os.path.isfile( 44 | "./checkpoint/ckpt.t7"), "Error: no checkpoint file found!" 45 | print('Loading from checkpoint/ckpt.t7') 46 | checkpoint = torch.load("./checkpoint/ckpt.t7") 47 | net_dict = checkpoint['net_dict'] 48 | net.load_state_dict(net_dict, strict=False) 49 | net.eval() 50 | net.to(device) 51 | 52 | # compute features 53 | query_features = torch.tensor([]).float() 54 | query_labels = torch.tensor([]).long() 55 | gallery_features = torch.tensor([]).float() 56 | gallery_labels = torch.tensor([]).long() 57 | 58 | with torch.no_grad(): 59 | for idx, (inputs, labels) in enumerate(queryloader): 60 | inputs = inputs.to(device) 61 | features = net(inputs).cpu() 62 | query_features = torch.cat((query_features, features), dim=0) 63 | query_labels = torch.cat((query_labels, labels)) 64 | 65 | for idx, (inputs, labels) in enumerate(galleryloader): 66 | inputs = inputs.to(device) 67 | features = net(inputs).cpu() 68 | gallery_features = torch.cat((gallery_features, features), dim=0) 69 | gallery_labels = torch.cat((gallery_labels, labels)) 70 | 71 | gallery_labels -= 2 72 | 73 | # save features 74 | features = { 75 | "qf": query_features, 76 | "ql": query_labels, 77 | "gf": gallery_features, 78 | "gl": gallery_labels 79 | } 80 | torch.save(features, "features.pth") 81 | -------------------------------------------------------------------------------- /deep_sort/deep/train.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import time 4 | 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | import torch 8 | import torch.backends.cudnn as cudnn 9 | import torchvision 10 | 11 | from model import Net 12 | 13 | parser = argparse.ArgumentParser(description="Train on market1501") 14 | parser.add_argument("--data-dir", default='data', type=str) 15 | parser.add_argument("--no-cuda", action="store_true") 16 | parser.add_argument("--gpu-id", default=0, type=int) 17 | parser.add_argument("--lr", default=0.1, type=float) 18 | parser.add_argument("--interval", '-i', default=20, type=int) 19 | parser.add_argument('--resume', '-r', action='store_true') 20 | args = parser.parse_args() 21 | 22 | # device 23 | device = "cuda:{}".format( 24 | args.gpu_id) if torch.cuda.is_available() and not args.no_cuda else "cpu" 25 | if torch.cuda.is_available() and not args.no_cuda: 26 | cudnn.benchmark = True 27 | 28 | # data loading 29 | root = args.data_dir 30 | train_dir = os.path.join(root, "train") 31 | test_dir = os.path.join(root, "test") 32 | transform_train = torchvision.transforms.Compose([ 33 | torchvision.transforms.RandomCrop((128, 64), padding=4), 34 | torchvision.transforms.RandomHorizontalFlip(), 35 | torchvision.transforms.ToTensor(), 36 | torchvision.transforms.Normalize( 37 | [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 38 | ]) 39 | transform_test = torchvision.transforms.Compose([ 40 | torchvision.transforms.Resize((128, 64)), 41 | torchvision.transforms.ToTensor(), 42 | torchvision.transforms.Normalize( 43 | [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 44 | ]) 45 | trainloader = torch.utils.data.DataLoader( 46 | torchvision.datasets.ImageFolder(train_dir, transform=transform_train), 47 | batch_size=64, shuffle=True 48 | ) 49 | testloader = torch.utils.data.DataLoader( 50 | torchvision.datasets.ImageFolder(test_dir, transform=transform_test), 51 | batch_size=64, shuffle=True 52 | ) 53 | num_classes = max(len(trainloader.dataset.classes), 54 | len(testloader.dataset.classes)) 55 | 56 | # net definition 57 | start_epoch = 0 58 | net = Net(num_classes=num_classes) 59 | if args.resume: 60 | assert os.path.isfile( 61 | "./checkpoint/ckpt.t7"), "Error: no checkpoint file found!" 62 | print('Loading from checkpoint/ckpt.t7') 63 | checkpoint = torch.load("./checkpoint/ckpt.t7") 64 | # import ipdb; ipdb.set_trace() 65 | net_dict = checkpoint['net_dict'] 66 | net.load_state_dict(net_dict) 67 | best_acc = checkpoint['acc'] 68 | start_epoch = checkpoint['epoch'] 69 | net.to(device) 70 | 71 | # loss and optimizer 72 | criterion = torch.nn.CrossEntropyLoss() 73 | optimizer = torch.optim.SGD( 74 | net.parameters(), args.lr, momentum=0.9, weight_decay=5e-4) 75 | best_acc = 0. 76 | 77 | # train function for each epoch 78 | 79 | 80 | def train(epoch): 81 | print("\nEpoch : %d" % (epoch+1)) 82 | net.train() 83 | training_loss = 0. 84 | train_loss = 0. 85 | correct = 0 86 | total = 0 87 | interval = args.interval 88 | start = time.time() 89 | for idx, (inputs, labels) in enumerate(trainloader): 90 | # forward 91 | inputs, labels = inputs.to(device), labels.to(device) 92 | outputs = net(inputs) 93 | loss = criterion(outputs, labels) 94 | 95 | # backward 96 | optimizer.zero_grad() 97 | loss.backward() 98 | optimizer.step() 99 | 100 | # accumurating 101 | training_loss += loss.item() 102 | train_loss += loss.item() 103 | correct += outputs.max(dim=1)[1].eq(labels).sum().item() 104 | total += labels.size(0) 105 | 106 | # print 107 | if (idx+1) % interval == 0: 108 | end = time.time() 109 | print("[progress:{:.1f}%]time:{:.2f}s Loss:{:.5f} Correct:{}/{} Acc:{:.3f}%".format( 110 | 100.*(idx+1)/len(trainloader), end-start, training_loss / 111 | interval, correct, total, 100.*correct/total 112 | )) 113 | training_loss = 0. 114 | start = time.time() 115 | 116 | return train_loss/len(trainloader), 1. - correct/total 117 | 118 | 119 | def test(epoch): 120 | global best_acc 121 | net.eval() 122 | test_loss = 0. 123 | correct = 0 124 | total = 0 125 | start = time.time() 126 | with torch.no_grad(): 127 | for idx, (inputs, labels) in enumerate(testloader): 128 | inputs, labels = inputs.to(device), labels.to(device) 129 | outputs = net(inputs) 130 | loss = criterion(outputs, labels) 131 | 132 | test_loss += loss.item() 133 | correct += outputs.max(dim=1)[1].eq(labels).sum().item() 134 | total += labels.size(0) 135 | 136 | print("Testing ...") 137 | end = time.time() 138 | print("[progress:{:.1f}%]time:{:.2f}s Loss:{:.5f} Correct:{}/{} Acc:{:.3f}%".format( 139 | 100.*(idx+1)/len(testloader), end-start, test_loss / 140 | len(testloader), correct, total, 100.*correct/total 141 | )) 142 | 143 | # saving checkpoint 144 | acc = 100.*correct/total 145 | if acc > best_acc: 146 | best_acc = acc 147 | print("Saving parameters to checkpoint/ckpt.t7") 148 | checkpoint = { 149 | 'net_dict': net.state_dict(), 150 | 'acc': acc, 151 | 'epoch': epoch, 152 | } 153 | if not os.path.isdir('checkpoint'): 154 | os.mkdir('checkpoint') 155 | torch.save(checkpoint, './checkpoint/ckpt.t7') 156 | 157 | return test_loss/len(testloader), 1. - correct/total 158 | 159 | 160 | # plot figure 161 | x_epoch = [] 162 | record = {'train_loss': [], 'train_err': [], 'test_loss': [], 'test_err': []} 163 | fig = plt.figure() 164 | ax0 = fig.add_subplot(121, title="loss") 165 | ax1 = fig.add_subplot(122, title="top1err") 166 | 167 | 168 | def draw_curve(epoch, train_loss, train_err, test_loss, test_err): 169 | global record 170 | record['train_loss'].append(train_loss) 171 | record['train_err'].append(train_err) 172 | record['test_loss'].append(test_loss) 173 | record['test_err'].append(test_err) 174 | 175 | x_epoch.append(epoch) 176 | ax0.plot(x_epoch, record['train_loss'], 'bo-', label='train') 177 | ax0.plot(x_epoch, record['test_loss'], 'ro-', label='val') 178 | ax1.plot(x_epoch, record['train_err'], 'bo-', label='train') 179 | ax1.plot(x_epoch, record['test_err'], 'ro-', label='val') 180 | if epoch == 0: 181 | ax0.legend() 182 | ax1.legend() 183 | fig.savefig("train.jpg") 184 | 185 | # lr decay 186 | 187 | 188 | def lr_decay(): 189 | global optimizer 190 | for params in optimizer.param_groups: 191 | params['lr'] *= 0.1 192 | lr = params['lr'] 193 | print("Learning rate adjusted to {}".format(lr)) 194 | 195 | 196 | def main(): 197 | for epoch in range(start_epoch, start_epoch+40): 198 | train_loss, train_err = train(epoch) 199 | test_loss, test_err = test(epoch) 200 | draw_curve(epoch, train_loss, train_err, test_loss, test_err) 201 | if (epoch+1) % 20 == 0: 202 | lr_decay() 203 | 204 | 205 | if __name__ == '__main__': 206 | main() 207 | -------------------------------------------------------------------------------- /deep_sort/deep_sort.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from .deep.feature_extractor import Extractor 5 | from .sort.nn_matching import NearestNeighborDistanceMetric 6 | from .sort.detection import Detection 7 | from .sort.tracker import Tracker 8 | 9 | 10 | __all__ = ['DeepSort'] 11 | 12 | 13 | class DeepSort(object): 14 | def __init__(self, model_path, max_dist=0.2, min_confidence=0.3, nms_max_overlap=1.0, max_iou_distance=0.7, max_age=70, n_init=3, nn_budget=100, use_cuda=True): 15 | self.min_confidence = min_confidence 16 | self.nms_max_overlap = nms_max_overlap 17 | 18 | self.extractor = Extractor(model_path, use_cuda=use_cuda) 19 | 20 | max_cosine_distance = max_dist 21 | metric = NearestNeighborDistanceMetric( 22 | "cosine", max_cosine_distance, nn_budget) 23 | self.tracker = Tracker( 24 | metric, max_iou_distance=max_iou_distance, max_age=max_age, n_init=n_init) 25 | 26 | def update(self, bbox_xywh, confidences, ori_img): 27 | self.height, self.width = ori_img.shape[:2] 28 | # generate detections 29 | features = self._get_features(bbox_xywh, ori_img) 30 | bbox_tlwh = self._xywh_to_tlwh(bbox_xywh) 31 | detections = [Detection(bbox_tlwh[i], conf, features[i]) for i, conf in enumerate( 32 | confidences) if conf > self.min_confidence] 33 | 34 | # run on non-maximum supression 35 | boxes = np.array([d.tlwh for d in detections]) 36 | scores = np.array([d.confidence for d in detections]) 37 | 38 | # update tracker 39 | self.tracker.predict() 40 | self.tracker.update(detections) 41 | 42 | # output bbox identities 43 | outputs = [] 44 | for track in self.tracker.tracks: 45 | if not track.is_confirmed() or track.time_since_update > 1: 46 | continue 47 | box = track.to_tlwh() 48 | x1, y1, x2, y2 = self._tlwh_to_xyxy(box) 49 | track_id = track.track_id 50 | #outputs.append(np.array([x1, y1, x2, y2, track_id], dtype=np.int)) 51 | outputs.append(np.array([x1, y1, x2, y2, track_id], dtype=int)) 52 | 53 | if len(outputs) > 0: 54 | outputs = np.stack(outputs, axis=0) 55 | return outputs 56 | 57 | """ 58 | TODO: 59 | Convert bbox from xc_yc_w_h to xtl_ytl_w_h 60 | Thanks JieChen91@github.com for reporting this bug! 61 | """ 62 | @staticmethod 63 | def _xywh_to_tlwh(bbox_xywh): 64 | if isinstance(bbox_xywh, np.ndarray): 65 | bbox_tlwh = bbox_xywh.copy() 66 | elif isinstance(bbox_xywh, torch.Tensor): 67 | bbox_tlwh = bbox_xywh.clone() 68 | bbox_tlwh[:, 0] = bbox_xywh[:, 0] - bbox_xywh[:, 2] / 2. 69 | bbox_tlwh[:, 1] = bbox_xywh[:, 1] - bbox_xywh[:, 3] / 2. 70 | return bbox_tlwh 71 | 72 | def _xywh_to_xyxy(self, bbox_xywh): 73 | x, y, w, h = bbox_xywh 74 | x1 = max(int(x - w / 2), 0) 75 | x2 = min(int(x + w / 2), self.width - 1) 76 | y1 = max(int(y - h / 2), 0) 77 | y2 = min(int(y + h / 2), self.height - 1) 78 | return x1, y1, x2, y2 79 | 80 | def _tlwh_to_xyxy(self, bbox_tlwh): 81 | """ 82 | TODO: 83 | Convert bbox from xtl_ytl_w_h to xc_yc_w_h 84 | Thanks JieChen91@github.com for reporting this bug! 85 | """ 86 | x, y, w, h = bbox_tlwh 87 | x1 = max(int(x), 0) 88 | x2 = min(int(x+w), self.width - 1) 89 | y1 = max(int(y), 0) 90 | y2 = min(int(y+h), self.height - 1) 91 | return x1, y1, x2, y2 92 | 93 | def increment_ages(self): 94 | self.tracker.increment_ages() 95 | 96 | def _xyxy_to_tlwh(self, bbox_xyxy): 97 | x1, y1, x2, y2 = bbox_xyxy 98 | 99 | t = x1 100 | l = y1 101 | w = int(x2 - x1) 102 | h = int(y2 - y1) 103 | return t, l, w, h 104 | 105 | def _get_features(self, bbox_xywh, ori_img): 106 | im_crops = [] 107 | for box in bbox_xywh: 108 | x1, y1, x2, y2 = self._xywh_to_xyxy(box) 109 | im = ori_img[y1:y2, x1:x2] 110 | im_crops.append(im) 111 | if im_crops: 112 | features = self.extractor(im_crops) 113 | else: 114 | features = np.array([]) 115 | return features 116 | -------------------------------------------------------------------------------- /deep_sort/sort/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/sort/__init__.py -------------------------------------------------------------------------------- /deep_sort/sort/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/sort/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /deep_sort/sort/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/sort/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /deep_sort/sort/__pycache__/detection.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/sort/__pycache__/detection.cpython-310.pyc -------------------------------------------------------------------------------- /deep_sort/sort/__pycache__/detection.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/sort/__pycache__/detection.cpython-39.pyc -------------------------------------------------------------------------------- /deep_sort/sort/__pycache__/iou_matching.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/sort/__pycache__/iou_matching.cpython-310.pyc -------------------------------------------------------------------------------- /deep_sort/sort/__pycache__/iou_matching.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/sort/__pycache__/iou_matching.cpython-39.pyc -------------------------------------------------------------------------------- /deep_sort/sort/__pycache__/kalman_filter.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/sort/__pycache__/kalman_filter.cpython-310.pyc -------------------------------------------------------------------------------- /deep_sort/sort/__pycache__/kalman_filter.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/sort/__pycache__/kalman_filter.cpython-39.pyc -------------------------------------------------------------------------------- /deep_sort/sort/__pycache__/linear_assignment.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/sort/__pycache__/linear_assignment.cpython-310.pyc -------------------------------------------------------------------------------- /deep_sort/sort/__pycache__/linear_assignment.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/sort/__pycache__/linear_assignment.cpython-39.pyc -------------------------------------------------------------------------------- /deep_sort/sort/__pycache__/nn_matching.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/sort/__pycache__/nn_matching.cpython-310.pyc -------------------------------------------------------------------------------- /deep_sort/sort/__pycache__/nn_matching.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/sort/__pycache__/nn_matching.cpython-39.pyc -------------------------------------------------------------------------------- /deep_sort/sort/__pycache__/track.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/sort/__pycache__/track.cpython-310.pyc -------------------------------------------------------------------------------- /deep_sort/sort/__pycache__/track.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/sort/__pycache__/track.cpython-39.pyc -------------------------------------------------------------------------------- /deep_sort/sort/__pycache__/tracker.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/sort/__pycache__/tracker.cpython-310.pyc -------------------------------------------------------------------------------- /deep_sort/sort/__pycache__/tracker.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/sort/__pycache__/tracker.cpython-39.pyc -------------------------------------------------------------------------------- /deep_sort/sort/detection.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import numpy as np 3 | 4 | 5 | class Detection(object): 6 | """ 7 | This class represents a bounding box detection in a single image. 8 | 9 | Parameters 10 | ---------- 11 | tlwh : array_like 12 | Bounding box in format `(x, y, w, h)`. 13 | confidence : float 14 | Detector confidence score. 15 | feature : array_like 16 | A feature vector that describes the object contained in this image. 17 | 18 | Attributes 19 | ---------- 20 | tlwh : ndarray 21 | Bounding box in format `(top left x, top left y, width, height)`. 22 | confidence : ndarray 23 | Detector confidence score. 24 | feature : ndarray | NoneType 25 | A feature vector that describes the object contained in this image. 26 | 27 | """ 28 | 29 | def __init__(self, tlwh, confidence, feature): 30 | self.tlwh = np.asarray(tlwh, dtype=float) 31 | self.confidence = float(confidence) 32 | self.feature = np.asarray(feature, dtype=np.float32) 33 | 34 | 35 | # def __init__(self, tlwh, confidence, feature): 36 | # self.tlwh = np.asarray(tlwh, dtype=np.float) 37 | # self.confidence = float(confidence) 38 | # self.feature = np.asarray(feature, dtype=np.float32) 39 | 40 | def to_tlbr(self): 41 | """Convert bounding box to format `(min x, min y, max x, max y)`, i.e., 42 | `(top left, bottom right)`. 43 | """ 44 | ret = self.tlwh.copy() 45 | ret[2:] += ret[:2] 46 | return ret 47 | 48 | def to_xyah(self): 49 | """Convert bounding box to format `(center x, center y, aspect ratio, 50 | height)`, where the aspect ratio is `width / height`. 51 | """ 52 | ret = self.tlwh.copy() 53 | ret[:2] += ret[2:] / 2 54 | ret[2] /= ret[3] 55 | return ret 56 | -------------------------------------------------------------------------------- /deep_sort/sort/detection.py.bak: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import numpy as np 3 | 4 | 5 | class Detection(object): 6 | """ 7 | This class represents a bounding box detection in a single image. 8 | 9 | Parameters 10 | ---------- 11 | tlwh : array_like 12 | Bounding box in format `(x, y, w, h)`. 13 | confidence : float 14 | Detector confidence score. 15 | feature : array_like 16 | A feature vector that describes the object contained in this image. 17 | 18 | Attributes 19 | ---------- 20 | tlwh : ndarray 21 | Bounding box in format `(top left x, top left y, width, height)`. 22 | confidence : ndarray 23 | Detector confidence score. 24 | feature : ndarray | NoneType 25 | A feature vector that describes the object contained in this image. 26 | 27 | """ 28 | 29 | def __init__(self, tlwh, confidence, feature): 30 | self.tlwh = np.asarray(tlwh, dtype=np.float) 31 | self.confidence = float(confidence) 32 | self.feature = np.asarray(feature, dtype=np.float32) 33 | 34 | def to_tlbr(self): 35 | """Convert bounding box to format `(min x, min y, max x, max y)`, i.e., 36 | `(top left, bottom right)`. 37 | """ 38 | ret = self.tlwh.copy() 39 | ret[2:] += ret[:2] 40 | return ret 41 | 42 | def to_xyah(self): 43 | """Convert bounding box to format `(center x, center y, aspect ratio, 44 | height)`, where the aspect ratio is `width / height`. 45 | """ 46 | ret = self.tlwh.copy() 47 | ret[:2] += ret[2:] / 2 48 | ret[2] /= ret[3] 49 | return ret 50 | -------------------------------------------------------------------------------- /deep_sort/sort/iou_matching.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | from __future__ import absolute_import 3 | import numpy as np 4 | from . import linear_assignment 5 | 6 | 7 | def iou(bbox, candidates): 8 | """Computer intersection over union. 9 | 10 | Parameters 11 | ---------- 12 | bbox : ndarray 13 | A bounding box in format `(top left x, top left y, width, height)`. 14 | candidates : ndarray 15 | A matrix of candidate bounding boxes (one per row) in the same format 16 | as `bbox`. 17 | 18 | Returns 19 | ------- 20 | ndarray 21 | The intersection over union in [0, 1] between the `bbox` and each 22 | candidate. A higher score means a larger fraction of the `bbox` is 23 | occluded by the candidate. 24 | 25 | """ 26 | bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:] 27 | candidates_tl = candidates[:, :2] 28 | candidates_br = candidates[:, :2] + candidates[:, 2:] 29 | 30 | tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis], 31 | np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]] 32 | br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis], 33 | np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]] 34 | wh = np.maximum(0., br - tl) 35 | 36 | area_intersection = wh.prod(axis=1) 37 | area_bbox = bbox[2:].prod() 38 | area_candidates = candidates[:, 2:].prod(axis=1) 39 | return area_intersection / (area_bbox + area_candidates - area_intersection) 40 | 41 | 42 | def iou_cost(tracks, detections, track_indices=None, 43 | detection_indices=None): 44 | """An intersection over union distance metric. 45 | 46 | Parameters 47 | ---------- 48 | tracks : List[deep_sort.track.Track] 49 | A list of tracks. 50 | detections : List[deep_sort.detection.Detection] 51 | A list of detections. 52 | track_indices : Optional[List[int]] 53 | A list of indices to tracks that should be matched. Defaults to 54 | all `tracks`. 55 | detection_indices : Optional[List[int]] 56 | A list of indices to detections that should be matched. Defaults 57 | to all `detections`. 58 | 59 | Returns 60 | ------- 61 | ndarray 62 | Returns a cost matrix of shape 63 | len(track_indices), len(detection_indices) where entry (i, j) is 64 | `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`. 65 | 66 | """ 67 | if track_indices is None: 68 | track_indices = np.arange(len(tracks)) 69 | if detection_indices is None: 70 | detection_indices = np.arange(len(detections)) 71 | 72 | cost_matrix = np.zeros((len(track_indices), len(detection_indices))) 73 | for row, track_idx in enumerate(track_indices): 74 | if tracks[track_idx].time_since_update > 1: 75 | cost_matrix[row, :] = linear_assignment.INFTY_COST 76 | continue 77 | 78 | bbox = tracks[track_idx].to_tlwh() 79 | candidates = np.asarray( 80 | [detections[i].tlwh for i in detection_indices]) 81 | cost_matrix[row, :] = 1. - iou(bbox, candidates) 82 | return cost_matrix 83 | -------------------------------------------------------------------------------- /deep_sort/sort/kalman_filter.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import numpy as np 3 | import scipy.linalg 4 | 5 | 6 | """ 7 | Table for the 0.95 quantile of the chi-square distribution with N degrees of 8 | freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv 9 | function and used as Mahalanobis gating threshold. 10 | """ 11 | chi2inv95 = { 12 | 1: 3.8415, 13 | 2: 5.9915, 14 | 3: 7.8147, 15 | 4: 9.4877, 16 | 5: 11.070, 17 | 6: 12.592, 18 | 7: 14.067, 19 | 8: 15.507, 20 | 9: 16.919} 21 | 22 | 23 | class KalmanFilter(object): 24 | """ 25 | A simple Kalman filter for tracking bounding boxes in image space. 26 | 27 | The 8-dimensional state space 28 | 29 | x, y, a, h, vx, vy, va, vh 30 | 31 | contains the bounding box center position (x, y), aspect ratio a, height h, 32 | and their respective velocities. 33 | 34 | Object motion follows a constant velocity model. The bounding box location 35 | (x, y, a, h) is taken as direct observation of the state space (linear 36 | observation model). 37 | 38 | """ 39 | 40 | def __init__(self): 41 | ndim, dt = 4, 1. 42 | 43 | # Create Kalman filter model matrices. 44 | self._motion_mat = np.eye(2 * ndim, 2 * ndim) 45 | for i in range(ndim): 46 | self._motion_mat[i, ndim + i] = dt 47 | self._update_mat = np.eye(ndim, 2 * ndim) 48 | 49 | # Motion and observation uncertainty are chosen relative to the current 50 | # state estimate. These weights control the amount of uncertainty in 51 | # the model. This is a bit hacky. 52 | self._std_weight_position = 1. / 20 53 | self._std_weight_velocity = 1. / 160 54 | 55 | def initiate(self, measurement): 56 | """Create track from unassociated measurement. 57 | 58 | Parameters 59 | ---------- 60 | measurement : ndarray 61 | Bounding box coordinates (x, y, a, h) with center position (x, y), 62 | aspect ratio a, and height h. 63 | 64 | Returns 65 | ------- 66 | (ndarray, ndarray) 67 | Returns the mean vector (8 dimensional) and covariance matrix (8x8 68 | dimensional) of the new track. Unobserved velocities are initialized 69 | to 0 mean. 70 | 71 | """ 72 | mean_pos = measurement 73 | mean_vel = np.zeros_like(mean_pos) 74 | mean = np.r_[mean_pos, mean_vel] 75 | 76 | std = [ 77 | 2 * self._std_weight_position * measurement[3], 78 | 2 * self._std_weight_position * measurement[3], 79 | 1e-2, 80 | 2 * self._std_weight_position * measurement[3], 81 | 10 * self._std_weight_velocity * measurement[3], 82 | 10 * self._std_weight_velocity * measurement[3], 83 | 1e-5, 84 | 10 * self._std_weight_velocity * measurement[3]] 85 | covariance = np.diag(np.square(std)) 86 | return mean, covariance 87 | 88 | def predict(self, mean, covariance): 89 | """Run Kalman filter prediction step. 90 | 91 | Parameters 92 | ---------- 93 | mean : ndarray 94 | The 8 dimensional mean vector of the object state at the previous 95 | time step. 96 | covariance : ndarray 97 | The 8x8 dimensional covariance matrix of the object state at the 98 | previous time step. 99 | 100 | Returns 101 | ------- 102 | (ndarray, ndarray) 103 | Returns the mean vector and covariance matrix of the predicted 104 | state. Unobserved velocities are initialized to 0 mean. 105 | 106 | """ 107 | std_pos = [ 108 | self._std_weight_position * mean[3], 109 | self._std_weight_position * mean[3], 110 | 1e-2, 111 | self._std_weight_position * mean[3]] 112 | std_vel = [ 113 | self._std_weight_velocity * mean[3], 114 | self._std_weight_velocity * mean[3], 115 | 1e-5, 116 | self._std_weight_velocity * mean[3]] 117 | motion_cov = np.diag(np.square(np.r_[std_pos, std_vel])) 118 | 119 | mean = np.dot(self._motion_mat, mean) 120 | covariance = np.linalg.multi_dot(( 121 | self._motion_mat, covariance, self._motion_mat.T)) + motion_cov 122 | 123 | return mean, covariance 124 | 125 | def project(self, mean, covariance): 126 | """Project state distribution to measurement space. 127 | 128 | Parameters 129 | ---------- 130 | mean : ndarray 131 | The state's mean vector (8 dimensional array). 132 | covariance : ndarray 133 | The state's covariance matrix (8x8 dimensional). 134 | 135 | Returns 136 | ------- 137 | (ndarray, ndarray) 138 | Returns the projected mean and covariance matrix of the given state 139 | estimate. 140 | 141 | """ 142 | std = [ 143 | self._std_weight_position * mean[3], 144 | self._std_weight_position * mean[3], 145 | 1e-1, 146 | self._std_weight_position * mean[3]] 147 | innovation_cov = np.diag(np.square(std)) 148 | 149 | mean = np.dot(self._update_mat, mean) 150 | covariance = np.linalg.multi_dot(( 151 | self._update_mat, covariance, self._update_mat.T)) 152 | return mean, covariance + innovation_cov 153 | 154 | def update(self, mean, covariance, measurement): 155 | """Run Kalman filter correction step. 156 | 157 | Parameters 158 | ---------- 159 | mean : ndarray 160 | The predicted state's mean vector (8 dimensional). 161 | covariance : ndarray 162 | The state's covariance matrix (8x8 dimensional). 163 | measurement : ndarray 164 | The 4 dimensional measurement vector (x, y, a, h), where (x, y) 165 | is the center position, a the aspect ratio, and h the height of the 166 | bounding box. 167 | 168 | Returns 169 | ------- 170 | (ndarray, ndarray) 171 | Returns the measurement-corrected state distribution. 172 | 173 | """ 174 | projected_mean, projected_cov = self.project(mean, covariance) 175 | 176 | chol_factor, lower = scipy.linalg.cho_factor( 177 | projected_cov, lower=True, check_finite=False) 178 | kalman_gain = scipy.linalg.cho_solve( 179 | (chol_factor, lower), np.dot(covariance, self._update_mat.T).T, 180 | check_finite=False).T 181 | innovation = measurement - projected_mean 182 | 183 | new_mean = mean + np.dot(innovation, kalman_gain.T) 184 | new_covariance = covariance - np.linalg.multi_dot(( 185 | kalman_gain, projected_cov, kalman_gain.T)) 186 | return new_mean, new_covariance 187 | 188 | def gating_distance(self, mean, covariance, measurements, 189 | only_position=False): 190 | """Compute gating distance between state distribution and measurements. 191 | 192 | A suitable distance threshold can be obtained from `chi2inv95`. If 193 | `only_position` is False, the chi-square distribution has 4 degrees of 194 | freedom, otherwise 2. 195 | 196 | Parameters 197 | ---------- 198 | mean : ndarray 199 | Mean vector over the state distribution (8 dimensional). 200 | covariance : ndarray 201 | Covariance of the state distribution (8x8 dimensional). 202 | measurements : ndarray 203 | An Nx4 dimensional matrix of N measurements, each in 204 | format (x, y, a, h) where (x, y) is the bounding box center 205 | position, a the aspect ratio, and h the height. 206 | only_position : Optional[bool] 207 | If True, distance computation is done with respect to the bounding 208 | box center position only. 209 | 210 | Returns 211 | ------- 212 | ndarray 213 | Returns an array of length N, where the i-th element contains the 214 | squared Mahalanobis distance between (mean, covariance) and 215 | `measurements[i]`. 216 | 217 | """ 218 | mean, covariance = self.project(mean, covariance) 219 | if only_position: 220 | mean, covariance = mean[:2], covariance[:2, :2] 221 | measurements = measurements[:, :2] 222 | 223 | cholesky_factor = np.linalg.cholesky(covariance) 224 | d = measurements - mean 225 | z = scipy.linalg.solve_triangular( 226 | cholesky_factor, d.T, lower=True, check_finite=False, 227 | overwrite_b=True) 228 | squared_maha = np.sum(z * z, axis=0) 229 | return squared_maha 230 | -------------------------------------------------------------------------------- /deep_sort/sort/linear_assignment.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | from __future__ import absolute_import 3 | import numpy as np 4 | # from sklearn.utils.linear_assignment_ import linear_assignment 5 | from scipy.optimize import linear_sum_assignment as linear_assignment 6 | from . import kalman_filter 7 | 8 | 9 | INFTY_COST = 1e+5 10 | 11 | 12 | def min_cost_matching( 13 | distance_metric, max_distance, tracks, detections, track_indices=None, 14 | detection_indices=None): 15 | """Solve linear assignment problem. 16 | 17 | Parameters 18 | ---------- 19 | distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray 20 | The distance metric is given a list of tracks and detections as well as 21 | a list of N track indices and M detection indices. The metric should 22 | return the NxM dimensional cost matrix, where element (i, j) is the 23 | association cost between the i-th track in the given track indices and 24 | the j-th detection in the given detection_indices. 25 | max_distance : float 26 | Gating threshold. Associations with cost larger than this value are 27 | disregarded. 28 | tracks : List[track.Track] 29 | A list of predicted tracks at the current time step. 30 | detections : List[detection.Detection] 31 | A list of detections at the current time step. 32 | track_indices : List[int] 33 | List of track indices that maps rows in `cost_matrix` to tracks in 34 | `tracks` (see description above). 35 | detection_indices : List[int] 36 | List of detection indices that maps columns in `cost_matrix` to 37 | detections in `detections` (see description above). 38 | 39 | Returns 40 | ------- 41 | (List[(int, int)], List[int], List[int]) 42 | Returns a tuple with the following three entries: 43 | * A list of matched track and detection indices. 44 | * A list of unmatched track indices. 45 | * A list of unmatched detection indices. 46 | 47 | """ 48 | if track_indices is None: 49 | track_indices = np.arange(len(tracks)) 50 | if detection_indices is None: 51 | detection_indices = np.arange(len(detections)) 52 | 53 | if len(detection_indices) == 0 or len(track_indices) == 0: 54 | return [], track_indices, detection_indices # Nothing to match. 55 | 56 | cost_matrix = distance_metric( 57 | tracks, detections, track_indices, detection_indices) 58 | cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5 59 | 60 | row_indices, col_indices = linear_assignment(cost_matrix) 61 | 62 | matches, unmatched_tracks, unmatched_detections = [], [], [] 63 | for col, detection_idx in enumerate(detection_indices): 64 | if col not in col_indices: 65 | unmatched_detections.append(detection_idx) 66 | for row, track_idx in enumerate(track_indices): 67 | if row not in row_indices: 68 | unmatched_tracks.append(track_idx) 69 | for row, col in zip(row_indices, col_indices): 70 | track_idx = track_indices[row] 71 | detection_idx = detection_indices[col] 72 | if cost_matrix[row, col] > max_distance: 73 | unmatched_tracks.append(track_idx) 74 | unmatched_detections.append(detection_idx) 75 | else: 76 | matches.append((track_idx, detection_idx)) 77 | return matches, unmatched_tracks, unmatched_detections 78 | 79 | 80 | def matching_cascade( 81 | distance_metric, max_distance, cascade_depth, tracks, detections, 82 | track_indices=None, detection_indices=None): 83 | """Run matching cascade. 84 | 85 | Parameters 86 | ---------- 87 | distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray 88 | The distance metric is given a list of tracks and detections as well as 89 | a list of N track indices and M detection indices. The metric should 90 | return the NxM dimensional cost matrix, where element (i, j) is the 91 | association cost between the i-th track in the given track indices and 92 | the j-th detection in the given detection indices. 93 | max_distance : float 94 | Gating threshold. Associations with cost larger than this value are 95 | disregarded. 96 | cascade_depth: int 97 | The cascade depth, should be se to the maximum track age. 98 | tracks : List[track.Track] 99 | A list of predicted tracks at the current time step. 100 | detections : List[detection.Detection] 101 | A list of detections at the current time step. 102 | track_indices : Optional[List[int]] 103 | List of track indices that maps rows in `cost_matrix` to tracks in 104 | `tracks` (see description above). Defaults to all tracks. 105 | detection_indices : Optional[List[int]] 106 | List of detection indices that maps columns in `cost_matrix` to 107 | detections in `detections` (see description above). Defaults to all 108 | detections. 109 | 110 | Returns 111 | ------- 112 | (List[(int, int)], List[int], List[int]) 113 | Returns a tuple with the following three entries: 114 | * A list of matched track and detection indices. 115 | * A list of unmatched track indices. 116 | * A list of unmatched detection indices. 117 | 118 | """ 119 | if track_indices is None: 120 | track_indices = list(range(len(tracks))) 121 | if detection_indices is None: 122 | detection_indices = list(range(len(detections))) 123 | 124 | unmatched_detections = detection_indices 125 | matches = [] 126 | for level in range(cascade_depth): 127 | if len(unmatched_detections) == 0: # No detections left 128 | break 129 | 130 | track_indices_l = [ 131 | k for k in track_indices 132 | if tracks[k].time_since_update == 1 + level 133 | ] 134 | if len(track_indices_l) == 0: # Nothing to match at this level 135 | continue 136 | 137 | matches_l, _, unmatched_detections = \ 138 | min_cost_matching( 139 | distance_metric, max_distance, tracks, detections, 140 | track_indices_l, unmatched_detections) 141 | matches += matches_l 142 | unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches)) 143 | return matches, unmatched_tracks, unmatched_detections 144 | 145 | 146 | def gate_cost_matrix( 147 | kf, cost_matrix, tracks, detections, track_indices, detection_indices, 148 | gated_cost=INFTY_COST, only_position=False): 149 | """Invalidate infeasible entries in cost matrix based on the state 150 | distributions obtained by Kalman filtering. 151 | 152 | Parameters 153 | ---------- 154 | kf : The Kalman filter. 155 | cost_matrix : ndarray 156 | The NxM dimensional cost matrix, where N is the number of track indices 157 | and M is the number of detection indices, such that entry (i, j) is the 158 | association cost between `tracks[track_indices[i]]` and 159 | `detections[detection_indices[j]]`. 160 | tracks : List[track.Track] 161 | A list of predicted tracks at the current time step. 162 | detections : List[detection.Detection] 163 | A list of detections at the current time step. 164 | track_indices : List[int] 165 | List of track indices that maps rows in `cost_matrix` to tracks in 166 | `tracks` (see description above). 167 | detection_indices : List[int] 168 | List of detection indices that maps columns in `cost_matrix` to 169 | detections in `detections` (see description above). 170 | gated_cost : Optional[float] 171 | Entries in the cost matrix corresponding to infeasible associations are 172 | set this value. Defaults to a very large value. 173 | only_position : Optional[bool] 174 | If True, only the x, y position of the state distribution is considered 175 | during gating. Defaults to False. 176 | 177 | Returns 178 | ------- 179 | ndarray 180 | Returns the modified cost matrix. 181 | 182 | """ 183 | gating_dim = 2 if only_position else 4 184 | gating_threshold = kalman_filter.chi2inv95[gating_dim] 185 | measurements = np.asarray( 186 | [detections[i].to_xyah() for i in detection_indices]) 187 | for row, track_idx in enumerate(track_indices): 188 | track = tracks[track_idx] 189 | gating_distance = kf.gating_distance( 190 | track.mean, track.covariance, measurements, only_position) 191 | cost_matrix[row, gating_distance > gating_threshold] = gated_cost 192 | return cost_matrix 193 | -------------------------------------------------------------------------------- /deep_sort/sort/nn_matching.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import numpy as np 3 | 4 | 5 | def _pdist(a, b): 6 | """Compute pair-wise squared distance between points in `a` and `b`. 7 | 8 | Parameters 9 | ---------- 10 | a : array_like 11 | An NxM matrix of N samples of dimensionality M. 12 | b : array_like 13 | An LxM matrix of L samples of dimensionality M. 14 | 15 | Returns 16 | ------- 17 | ndarray 18 | Returns a matrix of size len(a), len(b) such that eleement (i, j) 19 | contains the squared distance between `a[i]` and `b[j]`. 20 | 21 | """ 22 | a, b = np.asarray(a), np.asarray(b) 23 | if len(a) == 0 or len(b) == 0: 24 | return np.zeros((len(a), len(b))) 25 | a2, b2 = np.square(a).sum(axis=1), np.square(b).sum(axis=1) 26 | r2 = -2. * np.dot(a, b.T) + a2[:, None] + b2[None, :] 27 | r2 = np.clip(r2, 0., float(np.inf)) 28 | return r2 29 | 30 | 31 | def _cosine_distance(a, b, data_is_normalized=False): 32 | """Compute pair-wise cosine distance between points in `a` and `b`. 33 | 34 | Parameters 35 | ---------- 36 | a : array_like 37 | An NxM matrix of N samples of dimensionality M. 38 | b : array_like 39 | An LxM matrix of L samples of dimensionality M. 40 | data_is_normalized : Optional[bool] 41 | If True, assumes rows in a and b are unit length vectors. 42 | Otherwise, a and b are explicitly normalized to lenght 1. 43 | 44 | Returns 45 | ------- 46 | ndarray 47 | Returns a matrix of size len(a), len(b) such that eleement (i, j) 48 | contains the squared distance between `a[i]` and `b[j]`. 49 | 50 | """ 51 | if not data_is_normalized: 52 | a = np.asarray(a) / np.linalg.norm(a, axis=1, keepdims=True) 53 | b = np.asarray(b) / np.linalg.norm(b, axis=1, keepdims=True) 54 | return 1. - np.dot(a, b.T) 55 | 56 | 57 | def _nn_euclidean_distance(x, y): 58 | """ Helper function for nearest neighbor distance metric (Euclidean). 59 | 60 | Parameters 61 | ---------- 62 | x : ndarray 63 | A matrix of N row-vectors (sample points). 64 | y : ndarray 65 | A matrix of M row-vectors (query points). 66 | 67 | Returns 68 | ------- 69 | ndarray 70 | A vector of length M that contains for each entry in `y` the 71 | smallest Euclidean distance to a sample in `x`. 72 | 73 | """ 74 | distances = _pdist(x, y) 75 | return np.maximum(0.0, distances.min(axis=0)) 76 | 77 | 78 | def _nn_cosine_distance(x, y): 79 | """ Helper function for nearest neighbor distance metric (cosine). 80 | 81 | Parameters 82 | ---------- 83 | x : ndarray 84 | A matrix of N row-vectors (sample points). 85 | y : ndarray 86 | A matrix of M row-vectors (query points). 87 | 88 | Returns 89 | ------- 90 | ndarray 91 | A vector of length M that contains for each entry in `y` the 92 | smallest cosine distance to a sample in `x`. 93 | 94 | """ 95 | distances = _cosine_distance(x, y) 96 | return distances.min(axis=0) 97 | 98 | 99 | class NearestNeighborDistanceMetric(object): 100 | """ 101 | A nearest neighbor distance metric that, for each target, returns 102 | the closest distance to any sample that has been observed so far. 103 | 104 | Parameters 105 | ---------- 106 | metric : str 107 | Either "euclidean" or "cosine". 108 | matching_threshold: float 109 | The matching threshold. Samples with larger distance are considered an 110 | invalid match. 111 | budget : Optional[int] 112 | If not None, fix samples per class to at most this number. Removes 113 | the oldest samples when the budget is reached. 114 | 115 | Attributes 116 | ---------- 117 | samples : Dict[int -> List[ndarray]] 118 | A dictionary that maps from target identities to the list of samples 119 | that have been observed so far. 120 | 121 | """ 122 | 123 | def __init__(self, metric, matching_threshold, budget=None): 124 | 125 | if metric == "euclidean": 126 | self._metric = _nn_euclidean_distance 127 | elif metric == "cosine": 128 | self._metric = _nn_cosine_distance 129 | else: 130 | raise ValueError( 131 | "Invalid metric; must be either 'euclidean' or 'cosine'") 132 | self.matching_threshold = matching_threshold 133 | self.budget = budget 134 | self.samples = {} 135 | 136 | def partial_fit(self, features, targets, active_targets): 137 | """Update the distance metric with new data. 138 | 139 | Parameters 140 | ---------- 141 | features : ndarray 142 | An NxM matrix of N features of dimensionality M. 143 | targets : ndarray 144 | An integer array of associated target identities. 145 | active_targets : List[int] 146 | A list of targets that are currently present in the scene. 147 | 148 | """ 149 | for feature, target in zip(features, targets): 150 | self.samples.setdefault(target, []).append(feature) 151 | if self.budget is not None: 152 | self.samples[target] = self.samples[target][-self.budget:] 153 | self.samples = {k: self.samples[k] for k in active_targets} 154 | 155 | def distance(self, features, targets): 156 | """Compute distance between features and targets. 157 | 158 | Parameters 159 | ---------- 160 | features : ndarray 161 | An NxM matrix of N features of dimensionality M. 162 | targets : List[int] 163 | A list of targets to match the given `features` against. 164 | 165 | Returns 166 | ------- 167 | ndarray 168 | Returns a cost matrix of shape len(targets), len(features), where 169 | element (i, j) contains the closest squared distance between 170 | `targets[i]` and `features[j]`. 171 | 172 | """ 173 | cost_matrix = np.zeros((len(targets), len(features))) 174 | for i, target in enumerate(targets): 175 | cost_matrix[i, :] = self._metric(self.samples[target], features) 176 | return cost_matrix 177 | -------------------------------------------------------------------------------- /deep_sort/sort/preprocessing.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import numpy as np 3 | import cv2 4 | 5 | 6 | def non_max_suppression(boxes, max_bbox_overlap, scores=None): 7 | """Suppress overlapping detections. 8 | 9 | Original code from [1]_ has been adapted to include confidence score. 10 | 11 | .. [1] http://www.pyimagesearch.com/2015/02/16/ 12 | faster-non-maximum-suppression-python/ 13 | 14 | Examples 15 | -------- 16 | 17 | >>> boxes = [d.roi for d in detections] 18 | >>> scores = [d.confidence for d in detections] 19 | >>> indices = non_max_suppression(boxes, max_bbox_overlap, scores) 20 | >>> detections = [detections[i] for i in indices] 21 | 22 | Parameters 23 | ---------- 24 | boxes : ndarray 25 | Array of ROIs (x, y, width, height). 26 | max_bbox_overlap : float 27 | ROIs that overlap more than this values are suppressed. 28 | scores : Optional[array_like] 29 | Detector confidence score. 30 | 31 | Returns 32 | ------- 33 | List[int] 34 | Returns indices of detections that have survived non-maxima suppression. 35 | 36 | """ 37 | if len(boxes) == 0: 38 | return [] 39 | 40 | boxes = boxes.astype(np.float) 41 | pick = [] 42 | 43 | x1 = boxes[:, 0] 44 | y1 = boxes[:, 1] 45 | x2 = boxes[:, 2] + boxes[:, 0] 46 | y2 = boxes[:, 3] + boxes[:, 1] 47 | 48 | area = (x2 - x1 + 1) * (y2 - y1 + 1) 49 | if scores is not None: 50 | idxs = np.argsort(scores) 51 | else: 52 | idxs = np.argsort(y2) 53 | 54 | while len(idxs) > 0: 55 | last = len(idxs) - 1 56 | i = idxs[last] 57 | pick.append(i) 58 | 59 | xx1 = np.maximum(x1[i], x1[idxs[:last]]) 60 | yy1 = np.maximum(y1[i], y1[idxs[:last]]) 61 | xx2 = np.minimum(x2[i], x2[idxs[:last]]) 62 | yy2 = np.minimum(y2[i], y2[idxs[:last]]) 63 | 64 | w = np.maximum(0, xx2 - xx1 + 1) 65 | h = np.maximum(0, yy2 - yy1 + 1) 66 | 67 | overlap = (w * h) / area[idxs[:last]] 68 | 69 | idxs = np.delete( 70 | idxs, np.concatenate( 71 | ([last], np.where(overlap > max_bbox_overlap)[0]))) 72 | 73 | return pick 74 | -------------------------------------------------------------------------------- /deep_sort/sort/track.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | 3 | 4 | class TrackState: 5 | """ 6 | Enumeration type for the single target track state. Newly created tracks are 7 | classified as `tentative` until enough evidence has been collected. Then, 8 | the track state is changed to `confirmed`. Tracks that are no longer alive 9 | are classified as `deleted` to mark them for removal from the set of active 10 | tracks. 11 | 12 | """ 13 | 14 | Tentative = 1 15 | Confirmed = 2 16 | Deleted = 3 17 | 18 | 19 | class Track: 20 | """ 21 | A single target track with state space `(x, y, a, h)` and associated 22 | velocities, where `(x, y)` is the center of the bounding box, `a` is the 23 | aspect ratio and `h` is the height. 24 | 25 | Parameters 26 | ---------- 27 | mean : ndarray 28 | Mean vector of the initial state distribution. 29 | covariance : ndarray 30 | Covariance matrix of the initial state distribution. 31 | track_id : int 32 | A unique track identifier. 33 | n_init : int 34 | Number of consecutive detections before the track is confirmed. The 35 | track state is set to `Deleted` if a miss occurs within the first 36 | `n_init` frames. 37 | max_age : int 38 | The maximum number of consecutive misses before the track state is 39 | set to `Deleted`. 40 | feature : Optional[ndarray] 41 | Feature vector of the detection this track originates from. If not None, 42 | this feature is added to the `features` cache. 43 | 44 | Attributes 45 | ---------- 46 | mean : ndarray 47 | Mean vector of the initial state distribution. 48 | covariance : ndarray 49 | Covariance matrix of the initial state distribution. 50 | track_id : int 51 | A unique track identifier. 52 | hits : int 53 | Total number of measurement updates. 54 | age : int 55 | Total number of frames since first occurance. 56 | time_since_update : int 57 | Total number of frames since last measurement update. 58 | state : TrackState 59 | The current track state. 60 | features : List[ndarray] 61 | A cache of features. On each measurement update, the associated feature 62 | vector is added to this list. 63 | 64 | """ 65 | 66 | def __init__(self, mean, covariance, track_id, n_init, max_age, 67 | feature=None): 68 | self.mean = mean 69 | self.covariance = covariance 70 | self.track_id = track_id 71 | self.hits = 1 72 | self.age = 1 73 | self.time_since_update = 0 74 | 75 | self.state = TrackState.Tentative 76 | self.features = [] 77 | if feature is not None: 78 | self.features.append(feature) 79 | 80 | self._n_init = n_init 81 | self._max_age = max_age 82 | 83 | def to_tlwh(self): 84 | """Get current position in bounding box format `(top left x, top left y, 85 | width, height)`. 86 | 87 | Returns 88 | ------- 89 | ndarray 90 | The bounding box. 91 | 92 | """ 93 | ret = self.mean[:4].copy() 94 | ret[2] *= ret[3] 95 | ret[:2] -= ret[2:] / 2 96 | return ret 97 | 98 | def to_tlbr(self): 99 | """Get current position in bounding box format `(min x, miny, max x, 100 | max y)`. 101 | 102 | Returns 103 | ------- 104 | ndarray 105 | The bounding box. 106 | 107 | """ 108 | ret = self.to_tlwh() 109 | ret[2:] = ret[:2] + ret[2:] 110 | return ret 111 | 112 | def increment_age(self): 113 | self.age += 1 114 | self.time_since_update += 1 115 | 116 | def predict(self, kf): 117 | """Propagate the state distribution to the current time step using a 118 | Kalman filter prediction step. 119 | 120 | Parameters 121 | ---------- 122 | kf : kalman_filter.KalmanFilter 123 | The Kalman filter. 124 | 125 | """ 126 | self.mean, self.covariance = kf.predict(self.mean, self.covariance) 127 | self.increment_age() 128 | 129 | def update(self, kf, detection): 130 | """Perform Kalman filter measurement update step and update the feature 131 | cache. 132 | 133 | Parameters 134 | ---------- 135 | kf : kalman_filter.KalmanFilter 136 | The Kalman filter. 137 | detection : Detection 138 | The associated detection. 139 | 140 | """ 141 | self.mean, self.covariance = kf.update( 142 | self.mean, self.covariance, detection.to_xyah()) 143 | self.features.append(detection.feature) 144 | 145 | self.hits += 1 146 | self.time_since_update = 0 147 | if self.state == TrackState.Tentative and self.hits >= self._n_init: 148 | self.state = TrackState.Confirmed 149 | 150 | def mark_missed(self): 151 | """Mark this track as missed (no association at the current time step). 152 | """ 153 | if self.state == TrackState.Tentative: 154 | self.state = TrackState.Deleted 155 | elif self.time_since_update > self._max_age: 156 | self.state = TrackState.Deleted 157 | 158 | def is_tentative(self): 159 | """Returns True if this track is tentative (unconfirmed). 160 | """ 161 | return self.state == TrackState.Tentative 162 | 163 | def is_confirmed(self): 164 | """Returns True if this track is confirmed.""" 165 | return self.state == TrackState.Confirmed 166 | 167 | def is_deleted(self): 168 | """Returns True if this track is dead and should be deleted.""" 169 | return self.state == TrackState.Deleted 170 | -------------------------------------------------------------------------------- /deep_sort/sort/tracker.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | from __future__ import absolute_import 3 | import numpy as np 4 | from . import kalman_filter 5 | from . import linear_assignment 6 | from . import iou_matching 7 | from .track import Track 8 | 9 | 10 | class Tracker: 11 | """ 12 | This is the multi-target tracker. 13 | 14 | Parameters 15 | ---------- 16 | metric : nn_matching.NearestNeighborDistanceMetric 17 | A distance metric for measurement-to-track association. 18 | max_age : int 19 | Maximum number of missed misses before a track is deleted. 20 | n_init : int 21 | Number of consecutive detections before the track is confirmed. The 22 | track state is set to `Deleted` if a miss occurs within the first 23 | `n_init` frames. 24 | 25 | Attributes 26 | ---------- 27 | metric : nn_matching.NearestNeighborDistanceMetric 28 | The distance metric used for measurement to track association. 29 | max_age : int 30 | Maximum number of missed misses before a track is deleted. 31 | n_init : int 32 | Number of frames that a track remains in initialization phase. 33 | kf : kalman_filter.KalmanFilter 34 | A Kalman filter to filter target trajectories in image space. 35 | tracks : List[Track] 36 | The list of active tracks at the current time step. 37 | 38 | """ 39 | 40 | def __init__(self, metric, max_iou_distance=0.7, max_age=70, n_init=3): 41 | self.metric = metric 42 | self.max_iou_distance = max_iou_distance 43 | self.max_age = max_age 44 | self.n_init = n_init 45 | 46 | self.kf = kalman_filter.KalmanFilter() 47 | self.tracks = [] 48 | self._next_id = 1 49 | 50 | def predict(self): 51 | """Propagate track state distributions one time step forward. 52 | 53 | This function should be called once every time step, before `update`. 54 | """ 55 | for track in self.tracks: 56 | track.predict(self.kf) 57 | 58 | def increment_ages(self): 59 | for track in self.tracks: 60 | track.increment_age() 61 | track.mark_missed() 62 | 63 | def update(self, detections): 64 | """Perform measurement update and track management. 65 | 66 | Parameters 67 | ---------- 68 | detections : List[deep_sort.detection.Detection] 69 | A list of detections at the current time step. 70 | 71 | """ 72 | # Run matching cascade. 73 | matches, unmatched_tracks, unmatched_detections = \ 74 | self._match(detections) 75 | 76 | # Update track set. 77 | for track_idx, detection_idx in matches: 78 | self.tracks[track_idx].update( 79 | self.kf, detections[detection_idx]) 80 | for track_idx in unmatched_tracks: 81 | self.tracks[track_idx].mark_missed() 82 | for detection_idx in unmatched_detections: 83 | self._initiate_track(detections[detection_idx]) 84 | self.tracks = [t for t in self.tracks if not t.is_deleted()] 85 | 86 | # Update distance metric. 87 | active_targets = [t.track_id for t in self.tracks if t.is_confirmed()] 88 | features, targets = [], [] 89 | for track in self.tracks: 90 | if not track.is_confirmed(): 91 | continue 92 | features += track.features 93 | targets += [track.track_id for _ in track.features] 94 | track.features = [] 95 | self.metric.partial_fit( 96 | np.asarray(features), np.asarray(targets), active_targets) 97 | 98 | def _match(self, detections): 99 | 100 | def gated_metric(tracks, dets, track_indices, detection_indices): 101 | features = np.array([dets[i].feature for i in detection_indices]) 102 | targets = np.array([tracks[i].track_id for i in track_indices]) 103 | cost_matrix = self.metric.distance(features, targets) 104 | cost_matrix = linear_assignment.gate_cost_matrix( 105 | self.kf, cost_matrix, tracks, dets, track_indices, 106 | detection_indices) 107 | 108 | return cost_matrix 109 | 110 | # Split track set into confirmed and unconfirmed tracks. 111 | confirmed_tracks = [ 112 | i for i, t in enumerate(self.tracks) if t.is_confirmed()] 113 | unconfirmed_tracks = [ 114 | i for i, t in enumerate(self.tracks) if not t.is_confirmed()] 115 | 116 | # Associate confirmed tracks using appearance features. 117 | matches_a, unmatched_tracks_a, unmatched_detections = \ 118 | linear_assignment.matching_cascade( 119 | gated_metric, self.metric.matching_threshold, self.max_age, 120 | self.tracks, detections, confirmed_tracks) 121 | 122 | # Associate remaining tracks together with unconfirmed tracks using IOU. 123 | iou_track_candidates = unconfirmed_tracks + [ 124 | k for k in unmatched_tracks_a if 125 | self.tracks[k].time_since_update == 1] 126 | unmatched_tracks_a = [ 127 | k for k in unmatched_tracks_a if 128 | self.tracks[k].time_since_update != 1] 129 | matches_b, unmatched_tracks_b, unmatched_detections = \ 130 | linear_assignment.min_cost_matching( 131 | iou_matching.iou_cost, self.max_iou_distance, self.tracks, 132 | detections, iou_track_candidates, unmatched_detections) 133 | 134 | matches = matches_a + matches_b 135 | unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b)) 136 | return matches, unmatched_tracks, unmatched_detections 137 | 138 | def _initiate_track(self, detection): 139 | mean, covariance = self.kf.initiate(detection.to_xyah()) 140 | self.tracks.append(Track( 141 | mean, covariance, self._next_id, self.n_init, self.max_age, 142 | detection.feature)) 143 | self._next_id += 1 144 | -------------------------------------------------------------------------------- /deep_sort/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/utils/__init__.py -------------------------------------------------------------------------------- /deep_sort/utils/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/utils/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /deep_sort/utils/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/utils/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /deep_sort/utils/__pycache__/parser.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/utils/__pycache__/parser.cpython-310.pyc -------------------------------------------------------------------------------- /deep_sort/utils/__pycache__/parser.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AarohiSingla/Tracking-and-counting-Using-YOLOv8-and-DeepSORT/aabfc814370762fd4f5034c65c6bb5c138e63efb/deep_sort/utils/__pycache__/parser.cpython-39.pyc -------------------------------------------------------------------------------- /deep_sort/utils/asserts.py: -------------------------------------------------------------------------------- 1 | from os import environ 2 | 3 | 4 | def assert_in(file, files_to_check): 5 | if file not in files_to_check: 6 | raise AssertionError("{} does not exist in the list".format(str(file))) 7 | return True 8 | 9 | 10 | def assert_in_env(check_list: list): 11 | for item in check_list: 12 | assert_in(item, environ.keys()) 13 | return True 14 | -------------------------------------------------------------------------------- /deep_sort/utils/draw.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | palette = (2 ** 11 - 1, 2 ** 15 - 1, 2 ** 20 - 1) 5 | 6 | 7 | def compute_color_for_labels(label): 8 | """ 9 | Simple function that adds fixed color depending on the class 10 | """ 11 | color = [int((p * (label ** 2 - label + 1)) % 255) for p in palette] 12 | return tuple(color) 13 | 14 | 15 | def draw_boxes(img, bbox, identities=None, offset=(0,0)): 16 | for i,box in enumerate(bbox): 17 | x1,y1,x2,y2 = [int(i) for i in box] 18 | x1 += offset[0] 19 | x2 += offset[0] 20 | y1 += offset[1] 21 | y2 += offset[1] 22 | # box text and bar 23 | id = int(identities[i]) if identities is not None else 0 24 | color = compute_color_for_labels(id) 25 | label = '{}{:d}'.format("", id) 26 | t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2 , 2)[0] 27 | cv2.rectangle(img,(x1, y1),(x2,y2),color,3) 28 | cv2.rectangle(img,(x1, y1),(x1+t_size[0]+3,y1+t_size[1]+4), color,-1) 29 | cv2.putText(img,label,(x1,y1+t_size[1]+4), cv2.FONT_HERSHEY_PLAIN, 2, [255,255,255], 2) 30 | return img 31 | 32 | 33 | 34 | if __name__ == '__main__': 35 | for i in range(82): 36 | print(compute_color_for_labels(i)) 37 | -------------------------------------------------------------------------------- /deep_sort/utils/evaluation.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import copy 4 | import motmetrics as mm 5 | mm.lap.default_solver = 'lap' 6 | from utils.io import read_results, unzip_objs 7 | 8 | 9 | class Evaluator(object): 10 | 11 | def __init__(self, data_root, seq_name, data_type): 12 | self.data_root = data_root 13 | self.seq_name = seq_name 14 | self.data_type = data_type 15 | 16 | self.load_annotations() 17 | self.reset_accumulator() 18 | 19 | def load_annotations(self): 20 | assert self.data_type == 'mot' 21 | 22 | gt_filename = os.path.join(self.data_root, self.seq_name, 'gt', 'gt.txt') 23 | self.gt_frame_dict = read_results(gt_filename, self.data_type, is_gt=True) 24 | self.gt_ignore_frame_dict = read_results(gt_filename, self.data_type, is_ignore=True) 25 | 26 | def reset_accumulator(self): 27 | self.acc = mm.MOTAccumulator(auto_id=True) 28 | 29 | def eval_frame(self, frame_id, trk_tlwhs, trk_ids, rtn_events=False): 30 | # results 31 | trk_tlwhs = np.copy(trk_tlwhs) 32 | trk_ids = np.copy(trk_ids) 33 | 34 | # gts 35 | gt_objs = self.gt_frame_dict.get(frame_id, []) 36 | gt_tlwhs, gt_ids = unzip_objs(gt_objs)[:2] 37 | 38 | # ignore boxes 39 | ignore_objs = self.gt_ignore_frame_dict.get(frame_id, []) 40 | ignore_tlwhs = unzip_objs(ignore_objs)[0] 41 | 42 | 43 | # remove ignored results 44 | keep = np.ones(len(trk_tlwhs), dtype=bool) 45 | iou_distance = mm.distances.iou_matrix(ignore_tlwhs, trk_tlwhs, max_iou=0.5) 46 | if len(iou_distance) > 0: 47 | match_is, match_js = mm.lap.linear_sum_assignment(iou_distance) 48 | match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js]) 49 | match_ious = iou_distance[match_is, match_js] 50 | 51 | match_js = np.asarray(match_js, dtype=int) 52 | match_js = match_js[np.logical_not(np.isnan(match_ious))] 53 | keep[match_js] = False 54 | trk_tlwhs = trk_tlwhs[keep] 55 | trk_ids = trk_ids[keep] 56 | 57 | # get distance matrix 58 | iou_distance = mm.distances.iou_matrix(gt_tlwhs, trk_tlwhs, max_iou=0.5) 59 | 60 | # acc 61 | self.acc.update(gt_ids, trk_ids, iou_distance) 62 | 63 | if rtn_events and iou_distance.size > 0 and hasattr(self.acc, 'last_mot_events'): 64 | events = self.acc.last_mot_events # only supported by https://github.com/longcw/py-motmetrics 65 | else: 66 | events = None 67 | return events 68 | 69 | def eval_file(self, filename): 70 | self.reset_accumulator() 71 | 72 | result_frame_dict = read_results(filename, self.data_type, is_gt=False) 73 | frames = sorted(list(set(self.gt_frame_dict.keys()) | set(result_frame_dict.keys()))) 74 | for frame_id in frames: 75 | trk_objs = result_frame_dict.get(frame_id, []) 76 | trk_tlwhs, trk_ids = unzip_objs(trk_objs)[:2] 77 | self.eval_frame(frame_id, trk_tlwhs, trk_ids, rtn_events=False) 78 | 79 | return self.acc 80 | 81 | @staticmethod 82 | def get_summary(accs, names, metrics=('mota', 'num_switches', 'idp', 'idr', 'idf1', 'precision', 'recall')): 83 | names = copy.deepcopy(names) 84 | if metrics is None: 85 | metrics = mm.metrics.motchallenge_metrics 86 | metrics = copy.deepcopy(metrics) 87 | 88 | mh = mm.metrics.create() 89 | summary = mh.compute_many( 90 | accs, 91 | metrics=metrics, 92 | names=names, 93 | generate_overall=True 94 | ) 95 | 96 | return summary 97 | 98 | @staticmethod 99 | def save_summary(summary, filename): 100 | import pandas as pd 101 | writer = pd.ExcelWriter(filename) 102 | summary.to_excel(writer) 103 | writer.save() 104 | -------------------------------------------------------------------------------- /deep_sort/utils/io.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Dict 3 | import numpy as np 4 | 5 | # from utils.log import get_logger 6 | 7 | 8 | def write_results(filename, results, data_type): 9 | if data_type == 'mot': 10 | save_format = '{frame},{id},{x1},{y1},{w},{h},-1,-1,-1,-1\n' 11 | elif data_type == 'kitti': 12 | save_format = '{frame} {id} pedestrian 0 0 -10 {x1} {y1} {x2} {y2} -10 -10 -10 -1000 -1000 -1000 -10\n' 13 | else: 14 | raise ValueError(data_type) 15 | 16 | with open(filename, 'w') as f: 17 | for frame_id, tlwhs, track_ids in results: 18 | if data_type == 'kitti': 19 | frame_id -= 1 20 | for tlwh, track_id in zip(tlwhs, track_ids): 21 | if track_id < 0: 22 | continue 23 | x1, y1, w, h = tlwh 24 | x2, y2 = x1 + w, y1 + h 25 | line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h) 26 | f.write(line) 27 | 28 | 29 | # def write_results(filename, results_dict: Dict, data_type: str): 30 | # if not filename: 31 | # return 32 | # path = os.path.dirname(filename) 33 | # if not os.path.exists(path): 34 | # os.makedirs(path) 35 | 36 | # if data_type in ('mot', 'mcmot', 'lab'): 37 | # save_format = '{frame},{id},{x1},{y1},{w},{h},1,-1,-1,-1\n' 38 | # elif data_type == 'kitti': 39 | # save_format = '{frame} {id} pedestrian -1 -1 -10 {x1} {y1} {x2} {y2} -1 -1 -1 -1000 -1000 -1000 -10 {score}\n' 40 | # else: 41 | # raise ValueError(data_type) 42 | 43 | # with open(filename, 'w') as f: 44 | # for frame_id, frame_data in results_dict.items(): 45 | # if data_type == 'kitti': 46 | # frame_id -= 1 47 | # for tlwh, track_id in frame_data: 48 | # if track_id < 0: 49 | # continue 50 | # x1, y1, w, h = tlwh 51 | # x2, y2 = x1 + w, y1 + h 52 | # line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h, score=1.0) 53 | # f.write(line) 54 | # logger.info('Save results to {}'.format(filename)) 55 | 56 | 57 | def read_results(filename, data_type: str, is_gt=False, is_ignore=False): 58 | if data_type in ('mot', 'lab'): 59 | read_fun = read_mot_results 60 | else: 61 | raise ValueError('Unknown data type: {}'.format(data_type)) 62 | 63 | return read_fun(filename, is_gt, is_ignore) 64 | 65 | 66 | """ 67 | labels={'ped', ... % 1 68 | 'person_on_vhcl', ... % 2 69 | 'car', ... % 3 70 | 'bicycle', ... % 4 71 | 'mbike', ... % 5 72 | 'non_mot_vhcl', ... % 6 73 | 'static_person', ... % 7 74 | 'distractor', ... % 8 75 | 'occluder', ... % 9 76 | 'occluder_on_grnd', ... %10 77 | 'occluder_full', ... % 11 78 | 'reflection', ... % 12 79 | 'crowd' ... % 13 80 | }; 81 | """ 82 | 83 | 84 | def read_mot_results(filename, is_gt, is_ignore): 85 | valid_labels = {1} 86 | ignore_labels = {2, 7, 8, 12} 87 | results_dict = dict() 88 | if os.path.isfile(filename): 89 | with open(filename, 'r') as f: 90 | for line in f.readlines(): 91 | linelist = line.split(',') 92 | if len(linelist) < 7: 93 | continue 94 | fid = int(linelist[0]) 95 | if fid < 1: 96 | continue 97 | results_dict.setdefault(fid, list()) 98 | 99 | if is_gt: 100 | if 'MOT16-' in filename or 'MOT17-' in filename: 101 | label = int(float(linelist[7])) 102 | mark = int(float(linelist[6])) 103 | if mark == 0 or label not in valid_labels: 104 | continue 105 | score = 1 106 | elif is_ignore: 107 | if 'MOT16-' in filename or 'MOT17-' in filename: 108 | label = int(float(linelist[7])) 109 | vis_ratio = float(linelist[8]) 110 | if label not in ignore_labels and vis_ratio >= 0: 111 | continue 112 | else: 113 | continue 114 | score = 1 115 | else: 116 | score = float(linelist[6]) 117 | 118 | tlwh = tuple(map(float, linelist[2:6])) 119 | target_id = int(linelist[1]) 120 | 121 | results_dict[fid].append((tlwh, target_id, score)) 122 | 123 | return results_dict 124 | 125 | 126 | def unzip_objs(objs): 127 | if len(objs) > 0: 128 | tlwhs, ids, scores = zip(*objs) 129 | else: 130 | tlwhs, ids, scores = [], [], [] 131 | tlwhs = np.asarray(tlwhs, dtype=float).reshape(-1, 4) 132 | 133 | return tlwhs, ids, scores -------------------------------------------------------------------------------- /deep_sort/utils/json_logger.py: -------------------------------------------------------------------------------- 1 | """ 2 | References: 3 | https://medium.com/analytics-vidhya/creating-a-custom-logging-mechanism-for-real-time-object-detection-using-tdd-4ca2cfcd0a2f 4 | """ 5 | import json 6 | from os import makedirs 7 | from os.path import exists, join 8 | from datetime import datetime 9 | 10 | 11 | class JsonMeta(object): 12 | HOURS = 3 13 | MINUTES = 59 14 | SECONDS = 59 15 | PATH_TO_SAVE = 'LOGS' 16 | DEFAULT_FILE_NAME = 'remaining' 17 | 18 | 19 | class BaseJsonLogger(object): 20 | """ 21 | This is the base class that returns __dict__ of its own 22 | it also returns the dicts of objects in the attributes that are list instances 23 | 24 | """ 25 | 26 | def dic(self): 27 | # returns dicts of objects 28 | out = {} 29 | for k, v in self.__dict__.items(): 30 | if hasattr(v, 'dic'): 31 | out[k] = v.dic() 32 | elif isinstance(v, list): 33 | out[k] = self.list(v) 34 | else: 35 | out[k] = v 36 | return out 37 | 38 | @staticmethod 39 | def list(values): 40 | # applies the dic method on items in the list 41 | return [v.dic() if hasattr(v, 'dic') else v for v in values] 42 | 43 | 44 | class Label(BaseJsonLogger): 45 | """ 46 | For each bounding box there are various categories with confidences. Label class keeps track of that information. 47 | """ 48 | 49 | def __init__(self, category: str, confidence: float): 50 | self.category = category 51 | self.confidence = confidence 52 | 53 | 54 | class Bbox(BaseJsonLogger): 55 | """ 56 | This module stores the information for each frame and use them in JsonParser 57 | Attributes: 58 | labels (list): List of label module. 59 | top (int): 60 | left (int): 61 | width (int): 62 | height (int): 63 | 64 | Args: 65 | bbox_id (float): 66 | top (int): 67 | left (int): 68 | width (int): 69 | height (int): 70 | 71 | References: 72 | Check Label module for better understanding. 73 | 74 | 75 | """ 76 | 77 | def __init__(self, bbox_id, top, left, width, height): 78 | self.labels = [] 79 | self.bbox_id = bbox_id 80 | self.top = top 81 | self.left = left 82 | self.width = width 83 | self.height = height 84 | 85 | def add_label(self, category, confidence): 86 | # adds category and confidence only if top_k is not exceeded. 87 | self.labels.append(Label(category, confidence)) 88 | 89 | def labels_full(self, value): 90 | return len(self.labels) == value 91 | 92 | 93 | class Frame(BaseJsonLogger): 94 | """ 95 | This module stores the information for each frame and use them in JsonParser 96 | Attributes: 97 | timestamp (float): The elapsed time of captured frame 98 | frame_id (int): The frame number of the captured video 99 | bboxes (list of Bbox objects): Stores the list of bbox objects. 100 | 101 | References: 102 | Check Bbox class for better information 103 | 104 | Args: 105 | timestamp (float): 106 | frame_id (int): 107 | 108 | """ 109 | 110 | def __init__(self, frame_id: int, timestamp: float = None): 111 | self.frame_id = frame_id 112 | self.timestamp = timestamp 113 | self.bboxes = [] 114 | 115 | def add_bbox(self, bbox_id: int, top: int, left: int, width: int, height: int): 116 | bboxes_ids = [bbox.bbox_id for bbox in self.bboxes] 117 | if bbox_id not in bboxes_ids: 118 | self.bboxes.append(Bbox(bbox_id, top, left, width, height)) 119 | else: 120 | raise ValueError("Frame with id: {} already has a Bbox with id: {}".format(self.frame_id, bbox_id)) 121 | 122 | def add_label_to_bbox(self, bbox_id: int, category: str, confidence: float): 123 | bboxes = {bbox.id: bbox for bbox in self.bboxes} 124 | if bbox_id in bboxes.keys(): 125 | res = bboxes.get(bbox_id) 126 | res.add_label(category, confidence) 127 | else: 128 | raise ValueError('the bbox with id: {} does not exists!'.format(bbox_id)) 129 | 130 | 131 | class BboxToJsonLogger(BaseJsonLogger): 132 | """ 133 | ُ This module is designed to automate the task of logging jsons. An example json is used 134 | to show the contents of json file shortly 135 | Example: 136 | { 137 | "video_details": { 138 | "frame_width": 1920, 139 | "frame_height": 1080, 140 | "frame_rate": 20, 141 | "video_name": "/home/gpu/codes/MSD/pedestrian_2/project/public/camera1.avi" 142 | }, 143 | "frames": [ 144 | { 145 | "frame_id": 329, 146 | "timestamp": 3365.1254 147 | "bboxes": [ 148 | { 149 | "labels": [ 150 | { 151 | "category": "pedestrian", 152 | "confidence": 0.9 153 | } 154 | ], 155 | "bbox_id": 0, 156 | "top": 1257, 157 | "left": 138, 158 | "width": 68, 159 | "height": 109 160 | } 161 | ] 162 | }], 163 | 164 | Attributes: 165 | frames (dict): It's a dictionary that maps each frame_id to json attributes. 166 | video_details (dict): information about video file. 167 | top_k_labels (int): shows the allowed number of labels 168 | start_time (datetime object): we use it to automate the json output by time. 169 | 170 | Args: 171 | top_k_labels (int): shows the allowed number of labels 172 | 173 | """ 174 | 175 | def __init__(self, top_k_labels: int = 1): 176 | self.frames = {} 177 | self.video_details = self.video_details = dict(frame_width=None, frame_height=None, frame_rate=None, 178 | video_name=None) 179 | self.top_k_labels = top_k_labels 180 | self.start_time = datetime.now() 181 | 182 | def set_top_k(self, value): 183 | self.top_k_labels = value 184 | 185 | def frame_exists(self, frame_id: int) -> bool: 186 | """ 187 | Args: 188 | frame_id (int): 189 | 190 | Returns: 191 | bool: true if frame_id is recognized 192 | """ 193 | return frame_id in self.frames.keys() 194 | 195 | def add_frame(self, frame_id: int, timestamp: float = None) -> None: 196 | """ 197 | Args: 198 | frame_id (int): 199 | timestamp (float): opencv captured frame time property 200 | 201 | Raises: 202 | ValueError: if frame_id would not exist in class frames attribute 203 | 204 | Returns: 205 | None 206 | 207 | """ 208 | if not self.frame_exists(frame_id): 209 | self.frames[frame_id] = Frame(frame_id, timestamp) 210 | else: 211 | raise ValueError("Frame id: {} already exists".format(frame_id)) 212 | 213 | def bbox_exists(self, frame_id: int, bbox_id: int) -> bool: 214 | """ 215 | Args: 216 | frame_id: 217 | bbox_id: 218 | 219 | Returns: 220 | bool: if bbox exists in frame bboxes list 221 | """ 222 | bboxes = [] 223 | if self.frame_exists(frame_id=frame_id): 224 | bboxes = [bbox.bbox_id for bbox in self.frames[frame_id].bboxes] 225 | return bbox_id in bboxes 226 | 227 | def find_bbox(self, frame_id: int, bbox_id: int): 228 | """ 229 | 230 | Args: 231 | frame_id: 232 | bbox_id: 233 | 234 | Returns: 235 | bbox_id (int): 236 | 237 | Raises: 238 | ValueError: if bbox_id does not exist in the bbox list of specific frame. 239 | """ 240 | if not self.bbox_exists(frame_id, bbox_id): 241 | raise ValueError("frame with id: {} does not contain bbox with id: {}".format(frame_id, bbox_id)) 242 | bboxes = {bbox.bbox_id: bbox for bbox in self.frames[frame_id].bboxes} 243 | return bboxes.get(bbox_id) 244 | 245 | def add_bbox_to_frame(self, frame_id: int, bbox_id: int, top: int, left: int, width: int, height: int) -> None: 246 | """ 247 | 248 | Args: 249 | frame_id (int): 250 | bbox_id (int): 251 | top (int): 252 | left (int): 253 | width (int): 254 | height (int): 255 | 256 | Returns: 257 | None 258 | 259 | Raises: 260 | ValueError: if bbox_id already exist in frame information with frame_id 261 | ValueError: if frame_id does not exist in frames attribute 262 | """ 263 | if self.frame_exists(frame_id): 264 | frame = self.frames[frame_id] 265 | if not self.bbox_exists(frame_id, bbox_id): 266 | frame.add_bbox(bbox_id, top, left, width, height) 267 | else: 268 | raise ValueError( 269 | "frame with frame_id: {} already contains the bbox with id: {} ".format(frame_id, bbox_id)) 270 | else: 271 | raise ValueError("frame with frame_id: {} does not exist".format(frame_id)) 272 | 273 | def add_label_to_bbox(self, frame_id: int, bbox_id: int, category: str, confidence: float): 274 | """ 275 | Args: 276 | frame_id: 277 | bbox_id: 278 | category: 279 | confidence: the confidence value returned from yolo detection 280 | 281 | Returns: 282 | None 283 | 284 | Raises: 285 | ValueError: if labels quota (top_k_labels) exceeds. 286 | """ 287 | bbox = self.find_bbox(frame_id, bbox_id) 288 | if not bbox.labels_full(self.top_k_labels): 289 | bbox.add_label(category, confidence) 290 | else: 291 | raise ValueError("labels in frame_id: {}, bbox_id: {} is fulled".format(frame_id, bbox_id)) 292 | 293 | def add_video_details(self, frame_width: int = None, frame_height: int = None, frame_rate: int = None, 294 | video_name: str = None): 295 | self.video_details['frame_width'] = frame_width 296 | self.video_details['frame_height'] = frame_height 297 | self.video_details['frame_rate'] = frame_rate 298 | self.video_details['video_name'] = video_name 299 | 300 | def output(self): 301 | output = {'video_details': self.video_details} 302 | result = list(self.frames.values()) 303 | output['frames'] = [item.dic() for item in result] 304 | return output 305 | 306 | def json_output(self, output_name): 307 | """ 308 | Args: 309 | output_name: 310 | 311 | Returns: 312 | None 313 | 314 | Notes: 315 | It creates the json output with `output_name` name. 316 | """ 317 | if not output_name.endswith('.json'): 318 | output_name += '.json' 319 | with open(output_name, 'w') as file: 320 | json.dump(self.output(), file) 321 | file.close() 322 | 323 | def set_start(self): 324 | self.start_time = datetime.now() 325 | 326 | def schedule_output_by_time(self, output_dir=JsonMeta.PATH_TO_SAVE, hours: int = 0, minutes: int = 0, 327 | seconds: int = 60) -> None: 328 | """ 329 | Notes: 330 | Creates folder and then periodically stores the jsons on that address. 331 | 332 | Args: 333 | output_dir (str): the directory where output files will be stored 334 | hours (int): 335 | minutes (int): 336 | seconds (int): 337 | 338 | Returns: 339 | None 340 | 341 | """ 342 | end = datetime.now() 343 | interval = 0 344 | interval += abs(min([hours, JsonMeta.HOURS]) * 3600) 345 | interval += abs(min([minutes, JsonMeta.MINUTES]) * 60) 346 | interval += abs(min([seconds, JsonMeta.SECONDS])) 347 | diff = (end - self.start_time).seconds 348 | 349 | if diff > interval: 350 | output_name = self.start_time.strftime('%Y-%m-%d %H-%M-%S') + '.json' 351 | if not exists(output_dir): 352 | makedirs(output_dir) 353 | output = join(output_dir, output_name) 354 | self.json_output(output_name=output) 355 | self.frames = {} 356 | self.start_time = datetime.now() 357 | 358 | def schedule_output_by_frames(self, frames_quota, frame_counter, output_dir=JsonMeta.PATH_TO_SAVE): 359 | """ 360 | saves as the number of frames quota increases higher. 361 | :param frames_quota: 362 | :param frame_counter: 363 | :param output_dir: 364 | :return: 365 | """ 366 | pass 367 | 368 | def flush(self, output_dir): 369 | """ 370 | Notes: 371 | We use this function to output jsons whenever possible. 372 | like the time that we exit the while loop of opencv. 373 | 374 | Args: 375 | output_dir: 376 | 377 | Returns: 378 | None 379 | 380 | """ 381 | filename = self.start_time.strftime('%Y-%m-%d %H-%M-%S') + '-remaining.json' 382 | output = join(output_dir, filename) 383 | self.json_output(output_name=output) 384 | -------------------------------------------------------------------------------- /deep_sort/utils/log.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | 4 | def get_logger(name='root'): 5 | formatter = logging.Formatter( 6 | # fmt='%(asctime)s [%(levelname)s]: %(filename)s(%(funcName)s:%(lineno)s) >> %(message)s') 7 | fmt='%(asctime)s [%(levelname)s]: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') 8 | 9 | handler = logging.StreamHandler() 10 | handler.setFormatter(formatter) 11 | 12 | logger = logging.getLogger(name) 13 | logger.setLevel(logging.INFO) 14 | logger.addHandler(handler) 15 | return logger 16 | 17 | 18 | -------------------------------------------------------------------------------- /deep_sort/utils/parser.py: -------------------------------------------------------------------------------- 1 | import os 2 | import yaml 3 | from easydict import EasyDict as edict 4 | 5 | 6 | class YamlParser(edict): 7 | """ 8 | This is yaml parser based on EasyDict. 9 | """ 10 | 11 | def __init__(self, cfg_dict=None, config_file=None): 12 | if cfg_dict is None: 13 | cfg_dict = {} 14 | 15 | if config_file is not None: 16 | assert(os.path.isfile(config_file)) 17 | with open(config_file, 'r') as fo: 18 | cfg_dict.update(yaml.load(fo.read())) 19 | 20 | super(YamlParser, self).__init__(cfg_dict) 21 | 22 | def merge_from_file(self, config_file): 23 | with open(config_file, 'r') as fo: 24 | self.update(yaml.load(fo.read())) 25 | 26 | def merge_from_dict(self, config_dict): 27 | self.update(config_dict) 28 | 29 | 30 | def get_config(config_file=None): 31 | return YamlParser(config_file=config_file) 32 | 33 | 34 | if __name__ == "__main__": 35 | cfg = YamlParser(config_file="../configs/yolov3.yaml") 36 | cfg.merge_from_file("../configs/deep_sort.yaml") 37 | 38 | import ipdb 39 | ipdb.set_trace() 40 | -------------------------------------------------------------------------------- /deep_sort/utils/tools.py: -------------------------------------------------------------------------------- 1 | from functools import wraps 2 | from time import time 3 | 4 | 5 | def is_video(ext: str): 6 | """ 7 | Returns true if ext exists in 8 | allowed_exts for video files. 9 | 10 | Args: 11 | ext: 12 | 13 | Returns: 14 | 15 | """ 16 | 17 | allowed_exts = ('.mp4', '.webm', '.ogg', '.avi', '.wmv', '.mkv', '.3gp') 18 | return any((ext.endswith(x) for x in allowed_exts)) 19 | 20 | 21 | def tik_tok(func): 22 | """ 23 | keep track of time for each process. 24 | Args: 25 | func: 26 | 27 | Returns: 28 | 29 | """ 30 | @wraps(func) 31 | def _time_it(*args, **kwargs): 32 | start = time() 33 | try: 34 | return func(*args, **kwargs) 35 | finally: 36 | end_ = time() 37 | print("time: {:.03f}s, fps: {:.03f}".format(end_ - start, 1 / (end_ - start))) 38 | 39 | return _time_it 40 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py==1.4.0 2 | antlr4-python3-runtime==4.9.3 3 | anyio==3.6.2 4 | argon2-cffi==21.3.0 5 | argon2-cffi-bindings==21.2.0 6 | arrow==1.2.3 7 | asttokens==2.2.1 8 | attrs==23.1.0 9 | backcall==0.2.0 10 | beautifulsoup4==4.12.2 11 | bleach==6.0.0 12 | blinker==1.7.0 13 | cachetools==5.3.1 14 | certifi==2023.5.7 15 | cffi==1.15.1 16 | charset-normalizer==3.1.0 17 | click==8.1.7 18 | colorama==0.4.6 19 | comm==0.1.3 20 | contourpy==1.0.7 21 | cvzone==1.6.1 22 | cycler==0.11.0 23 | Cython==3.0.2 24 | dataclasses-json==0.6.0 25 | debugpy==1.6.7 26 | decorator==5.1.1 27 | defusedxml==0.7.1 28 | easydict==1.10 29 | easyocr==1.7.1 30 | executing==1.2.0 31 | fastjsonschema==2.17.1 32 | filelock==3.12.0 33 | filterpy==1.4.5 34 | Flask==3.0.0 35 | fonttools==4.39.4 36 | fqdn==1.5.1 37 | google-auth==2.22.0 38 | google-auth-oauthlib==1.0.0 39 | grpcio==1.58.0 40 | h5py==3.9.0 41 | hydra-core==1.2.0 42 | idna==3.4 43 | imageio==2.31.3 44 | importlib-metadata==6.6.0 45 | importlib-resources==5.12.0 46 | ipykernel==6.23.1 47 | ipython==8.13.2 48 | ipython-genutils==0.2.0 49 | ipywidgets==8.0.6 50 | isoduration==20.11.0 51 | itsdangerous==2.1.2 52 | jedi==0.18.2 53 | Jinja2==3.1.2 54 | jsonpointer==2.3 55 | jsonschema==4.17.3 56 | jupyter==1.0.0 57 | jupyter-console==6.6.3 58 | jupyter-events==0.6.3 59 | jupyter_client==8.2.0 60 | jupyter_core==5.3.0 61 | jupyter_server==2.5.0 62 | jupyter_server_terminals==0.4.4 63 | jupyterlab-pygments==0.2.2 64 | jupyterlab-widgets==3.0.7 65 | kiwisolver==1.4.4 66 | labelImg==1.8.6 67 | lap==0.4.0 68 | lazy_loader==0.3 69 | loguru==0.7.1 70 | lxml==4.9.3 71 | Markdown==3.4.4 72 | MarkupSafe==2.1.2 73 | marshmallow==3.20.1 74 | matplotlib==3.7.1 75 | matplotlib-inline==0.1.6 76 | mistune==2.0.5 77 | motmetrics==1.4.0 78 | mpmath==1.3.0 79 | mypy-extensions==1.0.0 80 | nbclassic==1.0.0 81 | nbclient==0.8.0 82 | nbconvert==7.4.0 83 | nbformat==5.8.0 84 | nest-asyncio==1.5.6 85 | networkx==3.1 86 | ninja==1.11.1 87 | notebook==6.5.4 88 | notebook_shim==0.2.3 89 | numpy==1.24.3 90 | oauthlib==3.2.2 91 | omegaconf==2.3.0 92 | onemetric==0.1.2 93 | opencv-python==4.9.0.80 94 | packaging==23.1 95 | pafy==0.5.5 96 | pandas==2.0.1 97 | pandocfilters==1.5.0 98 | parso==0.8.3 99 | pickleshare==0.7.5 100 | Pillow==9.5.0 101 | platformdirs==3.5.1 102 | prometheus-client==0.16.0 103 | prompt-toolkit==3.0.38 104 | protobuf==4.24.3 105 | psutil==5.9.5 106 | pure-eval==0.2.2 107 | pyasn1==0.5.0 108 | pyasn1-modules==0.3.0 109 | pyclipper==1.3.0.post5 110 | pycocotools==2.0.7 111 | pycparser==2.21 112 | Pygments==2.15.1 113 | pyparsing==3.0.9 114 | PyQt5==5.15.9 115 | PyQt5-Qt5==5.15.2 116 | PyQt5-sip==12.12.2 117 | pyrsistent==0.19.3 118 | python-bidi==0.4.2 119 | python-dateutil==2.8.2 120 | python-json-logger==2.0.7 121 | pytube==15.0.0 122 | pytz==2023.3 123 | PyWavelets==1.4.1 124 | pywin32==306 125 | pywinpty==2.0.10 126 | PyYAML==6.0 127 | pyzmq==25.0.2 128 | qtconsole==5.4.3 129 | QtPy==2.3.1 130 | requests==2.31.0 131 | requests-oauthlib==1.3.1 132 | rfc3339-validator==0.1.4 133 | rfc3986-validator==0.1.1 134 | rsa==4.9 135 | scikit-image==0.21.0 136 | scipy==1.10.1 137 | seaborn==0.12.2 138 | Send2Trash==1.8.2 139 | sentry-sdk==1.23.1 140 | shapely==2.0.1 141 | six==1.16.0 142 | sniffio==1.3.0 143 | soupsieve==2.4.1 144 | stack-data==0.6.2 145 | supervision==0.14.0 146 | sympy==1.12 147 | tabulate==0.9.0 148 | tensorboard==2.14.0 149 | tensorboard-data-server==0.7.1 150 | terminado==0.17.1 151 | thop==0.1.1.post2209072238 152 | tifffile==2023.8.30 153 | tinycss2==1.2.1 154 | torch>=2.1.2 155 | torchaudio==2.0.2+cu117 156 | torchvision==0.15.2+cu117 157 | tornado==6.3.2 158 | tqdm==4.65.0 159 | traitlets==5.9.0 160 | typing-inspect==0.9.0 161 | typing_extensions==4.5.0 162 | tzdata==2023.3 163 | ultralytics==8.0.106 164 | uri-template==1.2.0 165 | urllib3==1.26.15 166 | wcwidth==0.2.6 167 | webcolors==1.13 168 | webencodings==0.5.1 169 | websocket-client==1.5.2 170 | Werkzeug==3.0.1 171 | widgetsnbextension==4.0.7 172 | win32-setctime==1.1.0 173 | xmltodict==0.13.0 174 | youtube-dl==2020.12.2 175 | zipp==3.15.0 176 | -------------------------------------------------------------------------------- /track_count_persons .ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "4961e54c", 7 | "metadata": {}, 8 | "outputs": [ 9 | { 10 | "name": "stdout", 11 | "output_type": "stream", 12 | "text": [ 13 | "Python 3.9.10\n" 14 | ] 15 | } 16 | ], 17 | "source": [ 18 | "!python --version" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "id": "5ea3ead0", 25 | "metadata": {}, 26 | "outputs": [ 27 | { 28 | "data": { 29 | "text/plain": [ 30 | "'8.0.106'" 31 | ] 32 | }, 33 | "execution_count": 2, 34 | "metadata": {}, 35 | "output_type": "execute_result" 36 | } 37 | ], 38 | "source": [ 39 | "import ultralytics\n", 40 | "ultralytics.__version__" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 4, 46 | "id": "6801a5a9", 47 | "metadata": {}, 48 | "outputs": [ 49 | { 50 | "data": { 51 | "text/plain": [ 52 | "'2.0.1+cu117'" 53 | ] 54 | }, 55 | "execution_count": 4, 56 | "metadata": {}, 57 | "output_type": "execute_result" 58 | } 59 | ], 60 | "source": [ 61 | "import torch\n", 62 | "torch.__version__" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 5, 68 | "id": "9627ba4a", 69 | "metadata": {}, 70 | "outputs": [ 71 | { 72 | "data": { 73 | "text/plain": [ 74 | "'NVIDIA GeForce RTX 3090'" 75 | ] 76 | }, 77 | "execution_count": 5, 78 | "metadata": {}, 79 | "output_type": "execute_result" 80 | } 81 | ], 82 | "source": [ 83 | "torch.cuda.get_device_name(0)" 84 | ] 85 | }, 86 | { 87 | "cell_type": "markdown", 88 | "id": "30beac26", 89 | "metadata": {}, 90 | "source": [ 91 | "# Detect, track and count Persons" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 1, 97 | "id": "c23349aa", 98 | "metadata": {}, 99 | "outputs": [ 100 | { 101 | "name": "stdout", 102 | "output_type": "stream", 103 | "text": [ 104 | "D:\\yolov8_SAM_env\\yolov8_tracking\\1_yolov8_DeepSORT\\yolov8_DeepSORT\n" 105 | ] 106 | } 107 | ], 108 | "source": [ 109 | "%cd yolov8_DeepSORT" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 2, 115 | "id": "7ac57944", 116 | "metadata": {}, 117 | "outputs": [ 118 | { 119 | "name": "stderr", 120 | "output_type": "stream", 121 | "text": [ 122 | "\n", 123 | "image 1/1 D:\\yolov8_SAM_env\\yolov8_tracking\\1_yolov8_DeepSORT\\YOLOv8_DeepSORT\\images\\person.jpg: 384x640 1 person, 1 cup, 1 chair, 2 potted plants, 1 bed, 1 book, 67.0ms\n", 124 | "Speed: 2.0ms preprocess, 67.0ms inference, 6.0ms postprocess per image at shape (1, 3, 640, 640)\n", 125 | "Results saved to \u001b[1mruns\\detect\\predict\u001b[0m\n" 126 | ] 127 | }, 128 | { 129 | "name": "stdout", 130 | "output_type": "stream", 131 | "text": [ 132 | "[0.0, 58.0, 59.0, 58.0, 41.0, 73.0, 56.0]\n", 133 | "Class: person\n", 134 | "Class: potted plant\n", 135 | "Class: bed\n", 136 | "Class: potted plant\n", 137 | "Class: cup\n", 138 | "Class: book\n", 139 | "Class: chair\n" 140 | ] 141 | } 142 | ], 143 | "source": [ 144 | "from ultralytics import YOLO\n", 145 | "\n", 146 | "import time\n", 147 | "import torch\n", 148 | "import cv2\n", 149 | "import torch.backends.cudnn as cudnn\n", 150 | "from PIL import Image\n", 151 | "import colorsys\n", 152 | "import numpy as np\n", 153 | "\n", 154 | "# Load a model\n", 155 | "model = YOLO(\"yolov8n.pt\") # load a pretrained model (recommended for training)\n", 156 | "\n", 157 | "results = model(\"images/person.jpg\", save=True)\n", 158 | "\n", 159 | "\n", 160 | "\n", 161 | "class_names = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']\n", 162 | "\n", 163 | "for result in results:\n", 164 | " boxes = result.boxes # Boxes object for bbox outputs\n", 165 | " probs = result.probs # Class probabilities for classification outputs\n", 166 | " cls = boxes.cls.tolist() # Convert tensor to list\n", 167 | " xyxy = boxes.xyxy\n", 168 | " xywh = boxes.xywh # box with xywh format, (N, 4)\n", 169 | " conf = boxes.conf\n", 170 | " print(cls)\n", 171 | " for class_index in cls:\n", 172 | " class_name = class_names[int(class_index)]\n", 173 | " print(\"Class:\", class_name)" 174 | ] 175 | }, 176 | { 177 | "cell_type": "markdown", 178 | "id": "461c7b6e", 179 | "metadata": {}, 180 | "source": [ 181 | "# DeepSORT" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": 3, 187 | "id": "945f584b", 188 | "metadata": {}, 189 | "outputs": [], 190 | "source": [ 191 | "from deep_sort.utils.parser import get_config\n", 192 | "from deep_sort.deep_sort import DeepSort\n", 193 | "from deep_sort.sort.tracker import Tracker\n", 194 | "\n", 195 | "deep_sort_weights = 'deep_sort/deep/checkpoint/ckpt.t7'\n", 196 | "tracker = DeepSort(model_path=deep_sort_weights, max_age=70)" 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": 4, 202 | "id": "2d74f1e2", 203 | "metadata": {}, 204 | "outputs": [], 205 | "source": [ 206 | "# Define the video path\n", 207 | "video_path = 'test_videos/2.mp4'\n", 208 | "\n", 209 | "cap = cv2.VideoCapture(video_path)\n", 210 | "\n", 211 | "# Get the video properties\n", 212 | "frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))\n", 213 | "frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))\n", 214 | "fps = cap.get(cv2.CAP_PROP_FPS)\n", 215 | "\n", 216 | "# Define the codec and create VideoWriter object\n", 217 | "fourcc = cv2.VideoWriter_fourcc(*'mp4v')\n", 218 | "output_path = 'output.mp4'\n", 219 | "out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))\n", 220 | "\n", 221 | "device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": 5, 227 | "id": "09056afd", 228 | "metadata": {}, 229 | "outputs": [], 230 | "source": [ 231 | "frames = []\n", 232 | "\n", 233 | "unique_track_ids = set()" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": null, 239 | "id": "533ff5cc", 240 | "metadata": {}, 241 | "outputs": [ 242 | { 243 | "name": "stderr", 244 | "output_type": "stream", 245 | "text": [ 246 | "\n", 247 | "0: 384x640 4 persons, 6.5ms\n", 248 | "Speed: 1.0ms preprocess, 6.5ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 249 | "\n", 250 | "0: 384x640 4 persons, 8.0ms\n", 251 | "Speed: 2.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 252 | "\n", 253 | "0: 384x640 4 persons, 7.0ms\n", 254 | "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 255 | "\n", 256 | "0: 384x640 4 persons, 7.0ms\n", 257 | "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 258 | "\n", 259 | "0: 384x640 4 persons, 8.0ms\n", 260 | "Speed: 1.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 261 | "\n", 262 | "0: 384x640 4 persons, 7.0ms\n", 263 | "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 264 | "\n", 265 | "0: 384x640 4 persons, 8.0ms\n", 266 | "Speed: 1.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 267 | "\n", 268 | "0: 384x640 4 persons, 7.0ms\n", 269 | "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 270 | "\n", 271 | "0: 384x640 4 persons, 10.0ms\n", 272 | "Speed: 1.0ms preprocess, 10.0ms inference, 11.0ms postprocess per image at shape (1, 3, 640, 640)\n", 273 | "\n", 274 | "0: 384x640 4 persons, 7.0ms\n", 275 | "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 276 | "\n", 277 | "0: 384x640 4 persons, 11.0ms\n", 278 | "Speed: 2.0ms preprocess, 11.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 279 | "\n", 280 | "0: 384x640 4 persons, 8.0ms\n", 281 | "Speed: 2.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 282 | "\n", 283 | "0: 384x640 4 persons, 7.0ms\n", 284 | "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 285 | "\n", 286 | "0: 384x640 4 persons, 7.0ms\n", 287 | "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 288 | "\n", 289 | "0: 384x640 4 persons, 6.0ms\n", 290 | "Speed: 1.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 291 | "\n", 292 | "0: 384x640 4 persons, 7.0ms\n", 293 | "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 294 | "\n", 295 | "0: 384x640 4 persons, 8.0ms\n", 296 | "Speed: 2.0ms preprocess, 8.0ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 640)\n", 297 | "\n", 298 | "0: 384x640 4 persons, 7.0ms\n", 299 | "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 300 | "\n", 301 | "0: 384x640 4 persons, 7.0ms\n", 302 | "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 303 | "\n", 304 | "0: 384x640 4 persons, 7.3ms\n", 305 | "Speed: 1.6ms preprocess, 7.3ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 640)\n", 306 | "\n", 307 | "0: 384x640 4 persons, 13.0ms\n", 308 | "Speed: 1.3ms preprocess, 13.0ms inference, 10.0ms postprocess per image at shape (1, 3, 640, 640)\n", 309 | "\n", 310 | "0: 384x640 4 persons, 8.0ms\n", 311 | "Speed: 1.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 312 | "\n", 313 | "0: 384x640 4 persons, 8.0ms\n", 314 | "Speed: 1.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 315 | "\n", 316 | "0: 384x640 4 persons, 6.0ms\n", 317 | "Speed: 2.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 318 | "\n", 319 | "0: 384x640 4 persons, 6.0ms\n", 320 | "Speed: 2.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 321 | "\n", 322 | "0: 384x640 4 persons, 8.0ms\n", 323 | "Speed: 1.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 324 | "\n", 325 | "0: 384x640 4 persons, 6.0ms\n", 326 | "Speed: 2.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 327 | "\n", 328 | "0: 384x640 4 persons, 7.0ms\n", 329 | "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 330 | "\n", 331 | "0: 384x640 4 persons, 8.0ms\n", 332 | "Speed: 1.0ms preprocess, 8.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n", 333 | "\n", 334 | "0: 384x640 4 persons, 7.0ms\n", 335 | "Speed: 1.0ms preprocess, 7.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n", 336 | "\n", 337 | "0: 384x640 4 persons, 7.0ms\n", 338 | "Speed: 1.0ms preprocess, 7.0ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 640)\n", 339 | "\n", 340 | "0: 384x640 4 persons, 6.0ms\n", 341 | "Speed: 2.0ms preprocess, 6.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n", 342 | "\n", 343 | "0: 384x640 4 persons, 9.0ms\n", 344 | "Speed: 2.0ms preprocess, 9.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 345 | "\n", 346 | "0: 384x640 4 persons, 8.0ms\n", 347 | "Speed: 2.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 348 | "\n", 349 | "0: 384x640 4 persons, 8.0ms\n", 350 | "Speed: 2.0ms preprocess, 8.0ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 640)\n", 351 | "\n", 352 | "0: 384x640 4 persons, 12.0ms\n", 353 | "Speed: 2.0ms preprocess, 12.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 354 | "\n", 355 | "0: 384x640 4 persons, 7.0ms\n", 356 | "Speed: 2.0ms preprocess, 7.0ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 640)\n", 357 | "\n", 358 | "0: 384x640 4 persons, 8.0ms\n", 359 | "Speed: 2.0ms preprocess, 8.0ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 640)\n", 360 | "\n", 361 | "0: 384x640 4 persons, 7.0ms\n", 362 | "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 363 | "\n", 364 | "0: 384x640 4 persons, 7.0ms\n", 365 | "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 366 | "\n", 367 | "0: 384x640 4 persons, 7.0ms\n", 368 | "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 369 | "\n", 370 | "0: 384x640 4 persons, 7.0ms\n", 371 | "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 372 | "\n", 373 | "0: 384x640 4 persons, 8.0ms\n", 374 | "Speed: 1.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 375 | "\n", 376 | "0: 384x640 4 persons, 8.0ms\n", 377 | "Speed: 2.0ms preprocess, 8.0ms inference, 3.1ms postprocess per image at shape (1, 3, 640, 640)\n", 378 | "\n", 379 | "0: 384x640 4 persons, 8.0ms\n", 380 | "Speed: 2.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 381 | "\n", 382 | "0: 384x640 4 persons, 8.0ms\n", 383 | "Speed: 2.0ms preprocess, 8.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n", 384 | "\n", 385 | "0: 384x640 4 persons, 7.0ms\n", 386 | "Speed: 2.0ms preprocess, 7.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n", 387 | "\n", 388 | "0: 384x640 4 persons, 12.0ms\n", 389 | "Speed: 3.0ms preprocess, 12.0ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 640)\n", 390 | "\n", 391 | "0: 384x640 4 persons, 7.0ms\n", 392 | "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 393 | "\n", 394 | "0: 384x640 4 persons, 7.0ms\n", 395 | "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 396 | "\n", 397 | "0: 384x640 4 persons, 6.0ms\n", 398 | "Speed: 1.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 399 | "\n", 400 | "0: 384x640 4 persons, 7.0ms\n", 401 | "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 402 | "\n", 403 | "0: 384x640 4 persons, 8.0ms\n", 404 | "Speed: 1.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 405 | "\n", 406 | "0: 384x640 4 persons, 7.0ms\n", 407 | "Speed: 1.0ms preprocess, 7.0ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)\n", 408 | "\n", 409 | "0: 384x640 4 persons, 8.0ms\n", 410 | "Speed: 2.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 411 | "\n", 412 | "0: 384x640 4 persons, 8.0ms\n", 413 | "Speed: 1.0ms preprocess, 8.0ms inference, 6.0ms postprocess per image at shape (1, 3, 640, 640)\n", 414 | "\n", 415 | "0: 384x640 4 persons, 6.0ms\n", 416 | "Speed: 2.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 417 | "\n", 418 | "0: 384x640 4 persons, 6.0ms\n", 419 | "Speed: 2.0ms preprocess, 6.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n", 420 | "\n", 421 | "0: 384x640 4 persons, 7.0ms\n", 422 | "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 423 | "\n", 424 | "0: 384x640 4 persons, 8.0ms\n", 425 | "Speed: 1.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 426 | "\n", 427 | "0: 384x640 4 persons, 6.0ms\n", 428 | "Speed: 1.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 429 | "\n", 430 | "0: 384x640 4 persons, 10.0ms\n", 431 | "Speed: 2.0ms preprocess, 10.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 432 | "\n", 433 | "0: 384x640 4 persons, 7.0ms\n", 434 | "Speed: 2.0ms preprocess, 7.0ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 640)\n", 435 | "\n", 436 | "0: 384x640 4 persons, 6.0ms\n", 437 | "Speed: 1.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 438 | "\n", 439 | "0: 384x640 4 persons, 7.0ms\n", 440 | "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 441 | "\n", 442 | "0: 384x640 4 persons, 6.0ms\n", 443 | "Speed: 1.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n" 444 | ] 445 | }, 446 | { 447 | "name": "stderr", 448 | "output_type": "stream", 449 | "text": [ 450 | "\n", 451 | "0: 384x640 4 persons, 10.0ms\n", 452 | "Speed: 2.0ms preprocess, 10.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n", 453 | "\n", 454 | "0: 384x640 4 persons, 7.0ms\n", 455 | "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 456 | "\n", 457 | "0: 384x640 4 persons, 6.0ms\n", 458 | "Speed: 2.0ms preprocess, 6.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n", 459 | "\n", 460 | "0: 384x640 4 persons, 7.0ms\n", 461 | "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 462 | "\n", 463 | "0: 384x640 4 persons, 6.0ms\n", 464 | "Speed: 1.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 465 | "\n", 466 | "0: 384x640 4 persons, 7.8ms\n", 467 | "Speed: 2.0ms preprocess, 7.8ms inference, 3.6ms postprocess per image at shape (1, 3, 640, 640)\n", 468 | "\n", 469 | "0: 384x640 4 persons, 9.0ms\n", 470 | "Speed: 1.0ms preprocess, 9.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 471 | "\n", 472 | "0: 384x640 4 persons, 8.0ms\n", 473 | "Speed: 1.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 474 | "\n", 475 | "0: 384x640 4 persons, 7.0ms\n", 476 | "Speed: 1.0ms preprocess, 7.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n", 477 | "\n", 478 | "0: 384x640 4 persons, 9.0ms\n", 479 | "Speed: 2.0ms preprocess, 9.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 480 | "\n", 481 | "0: 384x640 4 persons, 6.0ms\n", 482 | "Speed: 1.0ms preprocess, 6.0ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 640)\n", 483 | "\n", 484 | "0: 384x640 4 persons, 7.0ms\n", 485 | "Speed: 1.0ms preprocess, 7.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n", 486 | "\n", 487 | "0: 384x640 4 persons, 9.0ms\n", 488 | "Speed: 2.0ms preprocess, 9.0ms inference, 10.0ms postprocess per image at shape (1, 3, 640, 640)\n", 489 | "\n", 490 | "0: 384x640 4 persons, 7.1ms\n", 491 | "Speed: 1.0ms preprocess, 7.1ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n", 492 | "\n", 493 | "0: 384x640 4 persons, 8.0ms\n", 494 | "Speed: 1.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 495 | "\n", 496 | "0: 384x640 4 persons, 6.0ms\n", 497 | "Speed: 2.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 498 | "\n", 499 | "0: 384x640 4 persons, 7.0ms\n", 500 | "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 501 | "\n", 502 | "0: 384x640 4 persons, 8.0ms\n", 503 | "Speed: 1.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 504 | "\n", 505 | "0: 384x640 4 persons, 8.0ms\n", 506 | "Speed: 1.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 507 | "\n", 508 | "0: 384x640 4 persons, 8.0ms\n", 509 | "Speed: 1.0ms preprocess, 8.0ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 640)\n", 510 | "\n", 511 | "0: 384x640 4 persons, 7.0ms\n", 512 | "Speed: 2.0ms preprocess, 7.0ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 640)\n", 513 | "\n", 514 | "0: 384x640 4 persons, 7.0ms\n", 515 | "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 516 | "\n", 517 | "0: 384x640 4 persons, 7.0ms\n", 518 | "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 519 | "\n", 520 | "0: 384x640 4 persons, 7.0ms\n", 521 | "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 522 | "\n", 523 | "0: 384x640 4 persons, 7.0ms\n", 524 | "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 525 | "\n", 526 | "0: 384x640 4 persons, 6.0ms\n", 527 | "Speed: 2.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 528 | "\n", 529 | "0: 384x640 4 persons, 7.0ms\n", 530 | "Speed: 1.0ms preprocess, 7.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n", 531 | "\n", 532 | "0: 384x640 4 persons, 7.0ms\n", 533 | "Speed: 2.0ms preprocess, 7.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n", 534 | "\n", 535 | "0: 384x640 4 persons, 6.0ms\n", 536 | "Speed: 2.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 537 | "\n", 538 | "0: 384x640 4 persons, 7.0ms\n", 539 | "Speed: 2.0ms preprocess, 7.0ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 640)\n", 540 | "\n", 541 | "0: 384x640 4 persons, 7.4ms\n", 542 | "Speed: 1.0ms preprocess, 7.4ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 543 | "\n", 544 | "0: 384x640 4 persons, 6.0ms\n", 545 | "Speed: 2.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 546 | "\n", 547 | "0: 384x640 4 persons, 7.0ms\n", 548 | "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 549 | "\n", 550 | "0: 384x640 4 persons, 8.0ms\n", 551 | "Speed: 1.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 552 | "\n", 553 | "0: 384x640 4 persons, 7.0ms\n", 554 | "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 555 | "\n", 556 | "0: 384x640 4 persons, 9.0ms\n", 557 | "Speed: 1.0ms preprocess, 9.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 558 | "\n", 559 | "0: 384x640 4 persons, 8.0ms\n", 560 | "Speed: 1.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 561 | "\n", 562 | "0: 384x640 4 persons, 7.0ms\n", 563 | "Speed: 2.0ms preprocess, 7.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n", 564 | "\n", 565 | "0: 384x640 4 persons, 12.0ms\n", 566 | "Speed: 2.0ms preprocess, 12.0ms inference, 14.0ms postprocess per image at shape (1, 3, 640, 640)\n", 567 | "\n", 568 | "0: 384x640 4 persons, 7.0ms\n", 569 | "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 570 | "\n", 571 | "0: 384x640 4 persons, 7.0ms\n", 572 | "Speed: 1.0ms preprocess, 7.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n", 573 | "\n", 574 | "0: 384x640 4 persons, 6.0ms\n", 575 | "Speed: 1.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 576 | "\n", 577 | "0: 384x640 4 persons, 7.0ms\n", 578 | "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 579 | "\n", 580 | "0: 384x640 4 persons, 8.0ms\n", 581 | "Speed: 2.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 582 | "\n", 583 | "0: 384x640 4 persons, 7.0ms\n", 584 | "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 585 | "\n", 586 | "0: 384x640 4 persons, 7.0ms\n", 587 | "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 588 | "\n", 589 | "0: 384x640 4 persons, 6.0ms\n", 590 | "Speed: 2.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 591 | "\n", 592 | "0: 384x640 4 persons, 7.0ms\n", 593 | "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 594 | "\n", 595 | "0: 384x640 4 persons, 6.0ms\n", 596 | "Speed: 2.0ms preprocess, 6.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n", 597 | "\n", 598 | "0: 384x640 3 persons, 8.0ms\n", 599 | "Speed: 2.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 600 | "\n", 601 | "0: 384x640 3 persons, 8.0ms\n", 602 | "Speed: 2.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 603 | "\n", 604 | "0: 384x640 3 persons, 7.0ms\n", 605 | "Speed: 1.0ms preprocess, 7.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n", 606 | "\n", 607 | "0: 384x640 4 persons, 8.0ms\n", 608 | "Speed: 1.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 609 | "\n", 610 | "0: 384x640 4 persons, 8.0ms\n", 611 | "Speed: 2.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 612 | "\n", 613 | "0: 384x640 4 persons, 8.0ms\n", 614 | "Speed: 1.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 615 | "\n", 616 | "0: 384x640 4 persons, 7.8ms\n", 617 | "Speed: 1.0ms preprocess, 7.8ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 640)\n", 618 | "\n", 619 | "0: 384x640 4 persons, 7.0ms\n", 620 | "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 621 | "\n", 622 | "0: 384x640 4 persons, 7.0ms\n", 623 | "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 624 | "\n", 625 | "0: 384x640 4 persons, 6.0ms\n", 626 | "Speed: 1.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 627 | "\n", 628 | "0: 384x640 4 persons, 7.0ms\n", 629 | "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 630 | "\n", 631 | "0: 384x640 4 persons, 7.0ms\n", 632 | "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 633 | "\n", 634 | "0: 384x640 4 persons, 7.5ms\n", 635 | "Speed: 2.0ms preprocess, 7.5ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n", 636 | "\n", 637 | "0: 384x640 4 persons, 7.0ms\n", 638 | "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 639 | "\n", 640 | "0: 384x640 4 persons, 7.0ms\n", 641 | "Speed: 1.0ms preprocess, 7.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n", 642 | "\n", 643 | "0: 384x640 4 persons, 6.0ms\n", 644 | "Speed: 1.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 645 | "\n", 646 | "0: 384x640 4 persons, 7.0ms\n", 647 | "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n" 648 | ] 649 | }, 650 | { 651 | "name": "stderr", 652 | "output_type": "stream", 653 | "text": [ 654 | "\n", 655 | "0: 384x640 4 persons, 6.0ms\n", 656 | "Speed: 2.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 657 | "\n", 658 | "0: 384x640 4 persons, 8.0ms\n", 659 | "Speed: 2.0ms preprocess, 8.0ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 640)\n", 660 | "\n", 661 | "0: 384x640 4 persons, 6.0ms\n", 662 | "Speed: 1.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 663 | "\n", 664 | "0: 384x640 4 persons, 8.0ms\n", 665 | "Speed: 1.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 666 | "\n", 667 | "0: 384x640 4 persons, 7.0ms\n", 668 | "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 669 | "\n", 670 | "0: 384x640 4 persons, 7.0ms\n", 671 | "Speed: 1.0ms preprocess, 7.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n", 672 | "\n", 673 | "0: 384x640 4 persons, 7.0ms\n", 674 | "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 675 | "\n", 676 | "0: 384x640 4 persons, 7.0ms\n", 677 | "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 678 | "\n", 679 | "0: 384x640 4 persons, 7.0ms\n", 680 | "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 681 | "\n", 682 | "0: 384x640 4 persons, 7.0ms\n", 683 | "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 684 | "\n", 685 | "0: 384x640 4 persons, 6.0ms\n", 686 | "Speed: 2.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 687 | "\n", 688 | "0: 384x640 4 persons, 7.0ms\n", 689 | "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 690 | "\n", 691 | "0: 384x640 4 persons, 7.0ms\n", 692 | "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 693 | "\n", 694 | "0: 384x640 4 persons, 9.0ms\n", 695 | "Speed: 1.0ms preprocess, 9.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 696 | "\n", 697 | "0: 384x640 4 persons, 7.0ms\n", 698 | "Speed: 2.0ms preprocess, 7.0ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 640)\n", 699 | "\n", 700 | "0: 384x640 4 persons, 11.0ms\n", 701 | "Speed: 2.1ms preprocess, 11.0ms inference, 3.7ms postprocess per image at shape (1, 3, 640, 640)\n", 702 | "\n", 703 | "0: 384x640 4 persons, 6.0ms\n", 704 | "Speed: 1.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 705 | "\n", 706 | "0: 384x640 4 persons, 8.0ms\n", 707 | "Speed: 1.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 708 | "\n", 709 | "0: 384x640 4 persons, 8.0ms\n", 710 | "Speed: 1.0ms preprocess, 8.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n", 711 | "\n", 712 | "0: 384x640 4 persons, 7.0ms\n", 713 | "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 714 | "\n", 715 | "0: 384x640 4 persons, 7.0ms\n", 716 | "Speed: 1.0ms preprocess, 7.0ms inference, 6.0ms postprocess per image at shape (1, 3, 640, 640)\n", 717 | "\n", 718 | "0: 384x640 4 persons, 7.8ms\n", 719 | "Speed: 1.0ms preprocess, 7.8ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 640)\n", 720 | "\n", 721 | "0: 384x640 3 persons, 8.0ms\n", 722 | "Speed: 2.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 723 | "\n", 724 | "0: 384x640 3 persons, 6.0ms\n", 725 | "Speed: 2.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 726 | "\n", 727 | "0: 384x640 3 persons, 13.0ms\n", 728 | "Speed: 1.0ms preprocess, 13.0ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 640)\n", 729 | "\n", 730 | "0: 384x640 3 persons, 7.0ms\n", 731 | "Speed: 1.0ms preprocess, 7.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n", 732 | "\n", 733 | "0: 384x640 3 persons, 6.0ms\n", 734 | "Speed: 1.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 735 | "\n", 736 | "0: 384x640 4 persons, 6.0ms\n", 737 | "Speed: 2.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 738 | "\n", 739 | "0: 384x640 4 persons, 8.0ms\n", 740 | "Speed: 1.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 741 | "\n", 742 | "0: 384x640 4 persons, 6.0ms\n", 743 | "Speed: 2.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 744 | "\n", 745 | "0: 384x640 4 persons, 7.0ms\n", 746 | "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 747 | "\n", 748 | "0: 384x640 4 persons, 10.0ms\n", 749 | "Speed: 3.0ms preprocess, 10.0ms inference, 12.0ms postprocess per image at shape (1, 3, 640, 640)\n", 750 | "\n", 751 | "0: 384x640 4 persons, 6.0ms\n", 752 | "Speed: 2.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 753 | "\n", 754 | "0: 384x640 4 persons, 7.0ms\n", 755 | "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 756 | "\n", 757 | "0: 384x640 4 persons, 8.0ms\n", 758 | "Speed: 1.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 759 | "\n", 760 | "0: 384x640 4 persons, 8.0ms\n", 761 | "Speed: 1.0ms preprocess, 8.0ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 640)\n", 762 | "\n", 763 | "0: 384x640 4 persons, 6.0ms\n", 764 | "Speed: 2.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 765 | "\n", 766 | "0: 384x640 4 persons, 7.0ms\n", 767 | "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 768 | "\n", 769 | "0: 384x640 4 persons, 12.0ms\n", 770 | "Speed: 1.0ms preprocess, 12.0ms inference, 12.0ms postprocess per image at shape (1, 3, 640, 640)\n", 771 | "\n", 772 | "0: 384x640 4 persons, 7.0ms\n", 773 | "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 774 | "\n", 775 | "0: 384x640 4 persons, 7.0ms\n", 776 | "Speed: 1.0ms preprocess, 7.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n", 777 | "\n", 778 | "0: 384x640 4 persons, 7.0ms\n", 779 | "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 780 | "\n", 781 | "0: 384x640 4 persons, 10.0ms\n", 782 | "Speed: 2.0ms preprocess, 10.0ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 640)\n", 783 | "\n", 784 | "0: 384x640 4 persons, 7.0ms\n", 785 | "Speed: 1.0ms preprocess, 7.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n", 786 | "\n", 787 | "0: 384x640 4 persons, 6.0ms\n", 788 | "Speed: 2.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 789 | "\n", 790 | "0: 384x640 4 persons, 9.0ms\n", 791 | "Speed: 1.0ms preprocess, 9.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n", 792 | "\n", 793 | "0: 384x640 4 persons, 6.0ms\n", 794 | "Speed: 2.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 795 | "\n", 796 | "0: 384x640 4 persons, 8.0ms\n", 797 | "Speed: 1.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 798 | "\n", 799 | "0: 384x640 4 persons, 7.0ms\n", 800 | "Speed: 1.0ms preprocess, 7.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n", 801 | "\n", 802 | "0: 384x640 4 persons, 8.0ms\n", 803 | "Speed: 2.0ms preprocess, 8.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n", 804 | "\n", 805 | "0: 384x640 4 persons, 7.0ms\n", 806 | "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 807 | "\n", 808 | "0: 384x640 4 persons, 7.0ms\n", 809 | "Speed: 1.0ms preprocess, 7.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n", 810 | "\n", 811 | "0: 384x640 4 persons, 10.0ms\n", 812 | "Speed: 1.0ms preprocess, 10.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n", 813 | "\n", 814 | "0: 384x640 4 persons, 7.2ms\n", 815 | "Speed: 2.0ms preprocess, 7.2ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 816 | "\n", 817 | "0: 384x640 4 persons, 7.0ms\n", 818 | "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 819 | "\n", 820 | "0: 384x640 4 persons, 10.0ms\n", 821 | "Speed: 2.0ms preprocess, 10.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 822 | "\n", 823 | "0: 384x640 4 persons, 6.0ms\n", 824 | "Speed: 2.0ms preprocess, 6.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n", 825 | "\n", 826 | "0: 384x640 4 persons, 7.0ms\n", 827 | "Speed: 2.0ms preprocess, 7.0ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 640)\n", 828 | "\n", 829 | "0: 384x640 4 persons, 7.0ms\n", 830 | "Speed: 2.0ms preprocess, 7.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n", 831 | "\n", 832 | "0: 384x640 4 persons, 6.0ms\n", 833 | "Speed: 2.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 834 | "\n", 835 | "0: 384x640 4 persons, 6.0ms\n", 836 | "Speed: 2.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 837 | "\n", 838 | "0: 384x640 4 persons, 12.0ms\n", 839 | "Speed: 2.0ms preprocess, 12.0ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 640)\n", 840 | "\n", 841 | "0: 384x640 4 persons, 7.0ms\n", 842 | "Speed: 1.0ms preprocess, 7.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n", 843 | "\n", 844 | "0: 384x640 4 persons, 6.0ms\n", 845 | "Speed: 2.0ms preprocess, 6.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n", 846 | "\n", 847 | "0: 384x640 4 persons, 8.0ms\n", 848 | "Speed: 2.0ms preprocess, 8.0ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 640)\n", 849 | "\n", 850 | "0: 384x640 4 persons, 7.0ms\n", 851 | "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n" 852 | ] 853 | }, 854 | { 855 | "name": "stderr", 856 | "output_type": "stream", 857 | "text": [ 858 | "\n", 859 | "0: 384x640 4 persons, 7.3ms\n", 860 | "Speed: 1.6ms preprocess, 7.3ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 861 | "\n", 862 | "0: 384x640 4 persons, 6.2ms\n", 863 | "Speed: 2.0ms preprocess, 6.2ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 864 | "\n", 865 | "0: 384x640 4 persons, 7.0ms\n", 866 | "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 867 | "\n", 868 | "0: 384x640 4 persons, 6.0ms\n", 869 | "Speed: 2.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 870 | "\n", 871 | "0: 384x640 4 persons, 7.0ms\n", 872 | "Speed: 2.0ms preprocess, 7.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n", 873 | "\n", 874 | "0: 384x640 4 persons, 6.0ms\n", 875 | "Speed: 1.0ms preprocess, 6.0ms inference, 5.0ms postprocess per image at shape (1, 3, 640, 640)\n", 876 | "\n", 877 | "0: 384x640 4 persons, 9.1ms\n", 878 | "Speed: 1.0ms preprocess, 9.1ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 879 | "\n", 880 | "0: 384x640 4 persons, 7.0ms\n", 881 | "Speed: 1.0ms preprocess, 7.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n", 882 | "\n", 883 | "0: 384x640 4 persons, 6.0ms\n", 884 | "Speed: 2.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 885 | "\n", 886 | "0: 384x640 4 persons, 13.0ms\n", 887 | "Speed: 2.0ms preprocess, 13.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 888 | "\n", 889 | "0: 384x640 4 persons, 6.0ms\n", 890 | "Speed: 2.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 891 | "\n", 892 | "0: 384x640 4 persons, 7.0ms\n", 893 | "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 894 | "\n", 895 | "0: 384x640 4 persons, 7.0ms\n", 896 | "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 897 | "\n", 898 | "0: 384x640 4 persons, 7.0ms\n", 899 | "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 900 | "\n", 901 | "0: 384x640 4 persons, 7.0ms\n", 902 | "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 903 | "\n", 904 | "0: 384x640 4 persons, 7.0ms\n", 905 | "Speed: 1.0ms preprocess, 7.0ms inference, 5.0ms postprocess per image at shape (1, 3, 640, 640)\n", 906 | "\n", 907 | "0: 384x640 4 persons, 6.0ms\n", 908 | "Speed: 2.0ms preprocess, 6.0ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 640)\n", 909 | "\n", 910 | "0: 384x640 4 persons, 7.0ms\n", 911 | "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 912 | "\n", 913 | "0: 384x640 4 persons, 7.0ms\n", 914 | "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 915 | "\n", 916 | "0: 384x640 4 persons, 7.0ms\n", 917 | "Speed: 2.0ms preprocess, 7.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n", 918 | "\n", 919 | "0: 384x640 4 persons, 9.0ms\n", 920 | "Speed: 2.0ms preprocess, 9.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 921 | "\n", 922 | "0: 384x640 4 persons, 6.0ms\n", 923 | "Speed: 1.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 924 | "\n", 925 | "0: 384x640 4 persons, 7.0ms\n", 926 | "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 927 | "\n", 928 | "0: 384x640 4 persons, 7.0ms\n", 929 | "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 930 | "\n", 931 | "0: 384x640 4 persons, 7.0ms\n", 932 | "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 933 | "\n", 934 | "0: 384x640 4 persons, 13.2ms\n", 935 | "Speed: 1.8ms preprocess, 13.2ms inference, 3.1ms postprocess per image at shape (1, 3, 640, 640)\n", 936 | "\n", 937 | "0: 384x640 4 persons, 7.0ms\n", 938 | "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 939 | "\n", 940 | "0: 384x640 4 persons, 7.0ms\n", 941 | "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 942 | "\n", 943 | "0: 384x640 4 persons, 7.0ms\n", 944 | "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 945 | "\n", 946 | "0: 384x640 4 persons, 8.0ms\n", 947 | "Speed: 1.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 948 | "\n", 949 | "0: 384x640 4 persons, 6.0ms\n", 950 | "Speed: 4.0ms preprocess, 6.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 951 | "\n", 952 | "0: 384x640 4 persons, 7.9ms\n", 953 | "Speed: 1.0ms preprocess, 7.9ms inference, 3.6ms postprocess per image at shape (1, 3, 640, 640)\n", 954 | "\n", 955 | "0: 384x640 4 persons, 7.0ms\n", 956 | "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 957 | "\n", 958 | "0: 384x640 4 persons, 7.0ms\n", 959 | "Speed: 2.0ms preprocess, 7.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n", 960 | "\n", 961 | "0: 384x640 4 persons, 7.0ms\n", 962 | "Speed: 1.0ms preprocess, 7.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n", 963 | "\n", 964 | "0: 384x640 4 persons, 7.0ms\n", 965 | "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 966 | "\n", 967 | "0: 384x640 3 persons, 7.0ms\n", 968 | "Speed: 2.0ms preprocess, 7.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n", 969 | "\n", 970 | "0: 384x640 3 persons, 7.0ms\n", 971 | "Speed: 1.0ms preprocess, 7.0ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)\n", 972 | "\n", 973 | "0: 384x640 3 persons, 8.0ms\n", 974 | "Speed: 2.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 975 | "\n", 976 | "0: 384x640 3 persons, 8.0ms\n", 977 | "Speed: 2.0ms preprocess, 8.0ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 640)\n", 978 | "\n", 979 | "0: 384x640 3 persons, 7.0ms\n", 980 | "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 981 | "\n", 982 | "0: 384x640 3 persons, 10.0ms\n", 983 | "Speed: 2.0ms preprocess, 10.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 984 | "\n", 985 | "0: 384x640 3 persons, 7.0ms\n", 986 | "Speed: 2.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 987 | "\n", 988 | "0: 384x640 3 persons, 9.0ms\n", 989 | "Speed: 3.0ms preprocess, 9.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 990 | "\n", 991 | "0: 384x640 3 persons, 8.0ms\n", 992 | "Speed: 1.0ms preprocess, 8.0ms inference, 4.0ms postprocess per image at shape (1, 3, 640, 640)\n", 993 | "\n", 994 | "0: 384x640 3 persons, 8.0ms\n", 995 | "Speed: 2.0ms preprocess, 8.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 996 | "\n", 997 | "0: 384x640 3 persons, 7.0ms\n", 998 | "Speed: 1.0ms preprocess, 7.0ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 640)\n", 999 | "\n", 1000 | "0: 384x640 3 persons, 6.0ms\n" 1001 | ] 1002 | } 1003 | ], 1004 | "source": [ 1005 | "i = 0\n", 1006 | "counter, fps, elapsed = 0, 0, 0\n", 1007 | "start_time = time.perf_counter()\n", 1008 | "\n", 1009 | "while cap.isOpened():\n", 1010 | " ret, frame = cap.read()\n", 1011 | "\n", 1012 | " if ret:\n", 1013 | " \n", 1014 | " og_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)\n", 1015 | " frame = og_frame.copy()\n", 1016 | "\n", 1017 | " model = YOLO(\"yolov8n.pt\") # load a pretrained model (recommended for training)\n", 1018 | "\n", 1019 | " results = model(frame, device=0, classes=0, conf=0.8)\n", 1020 | "\n", 1021 | " class_names = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']\n", 1022 | "\n", 1023 | " for result in results:\n", 1024 | " boxes = result.boxes # Boxes object for bbox outputs\n", 1025 | " probs = result.probs # Class probabilities for classification outputs\n", 1026 | " cls = boxes.cls.tolist() # Convert tensor to list\n", 1027 | " xyxy = boxes.xyxy\n", 1028 | " conf = boxes.conf\n", 1029 | " xywh = boxes.xywh # box with xywh format, (N, 4)\n", 1030 | " for class_index in cls:\n", 1031 | " class_name = class_names[int(class_index)]\n", 1032 | " #print(\"Class:\", class_name)\n", 1033 | "\n", 1034 | " pred_cls = np.array(cls)\n", 1035 | " conf = conf.detach().cpu().numpy()\n", 1036 | " xyxy = xyxy.detach().cpu().numpy()\n", 1037 | " bboxes_xywh = xywh\n", 1038 | " bboxes_xywh = xywh.cpu().numpy()\n", 1039 | " bboxes_xywh = np.array(bboxes_xywh, dtype=float)\n", 1040 | " \n", 1041 | " tracks = tracker.update(bboxes_xywh, conf, og_frame)\n", 1042 | " \n", 1043 | " for track in tracker.tracker.tracks:\n", 1044 | " track_id = track.track_id\n", 1045 | " hits = track.hits\n", 1046 | " x1, y1, x2, y2 = track.to_tlbr() # Get bounding box coordinates in (x1, y1, x2, y2) format\n", 1047 | " w = x2 - x1 # Calculate width\n", 1048 | " h = y2 - y1 # Calculate height\n", 1049 | "\n", 1050 | " # Set color values for red, blue, and green\n", 1051 | " red_color = (0, 0, 255) # (B, G, R)\n", 1052 | " blue_color = (255, 0, 0) # (B, G, R)\n", 1053 | " green_color = (0, 255, 0) # (B, G, R)\n", 1054 | "\n", 1055 | " # Determine color based on track_id\n", 1056 | " color_id = track_id % 3\n", 1057 | " if color_id == 0:\n", 1058 | " color = red_color\n", 1059 | " elif color_id == 1:\n", 1060 | " color = blue_color\n", 1061 | " else:\n", 1062 | " color = green_color\n", 1063 | "\n", 1064 | " cv2.rectangle(og_frame, (int(x1), int(y1)), (int(x1 + w), int(y1 + h)), color, 2)\n", 1065 | "\n", 1066 | " text_color = (0, 0, 0) # Black color for text\n", 1067 | " cv2.putText(og_frame, f\"{class_name}-{track_id}\", (int(x1) + 10, int(y1) - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, text_color, 1, cv2.LINE_AA)\n", 1068 | "\n", 1069 | " # Add the track_id to the set of unique track IDs\n", 1070 | " unique_track_ids.add(track_id)\n", 1071 | "\n", 1072 | " # Update the person count based on the number of unique track IDs\n", 1073 | " person_count = len(unique_track_ids)\n", 1074 | "\n", 1075 | " # Update FPS and place on frame\n", 1076 | " current_time = time.perf_counter()\n", 1077 | " elapsed = (current_time - start_time)\n", 1078 | " counter += 1\n", 1079 | " if elapsed > 1:\n", 1080 | " fps = counter / elapsed\n", 1081 | " counter = 0\n", 1082 | " start_time = current_time\n", 1083 | "\n", 1084 | " # Draw person count on frame\n", 1085 | " cv2.putText(og_frame, f\"Person Count: {person_count}\", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)\n", 1086 | "\n", 1087 | " # Append the frame to the list\n", 1088 | " frames.append(og_frame)\n", 1089 | "\n", 1090 | " # Write the frame to the output video file\n", 1091 | " out.write(cv2.cvtColor(og_frame, cv2.COLOR_RGB2BGR))\n", 1092 | "\n", 1093 | " # Show the frame\n", 1094 | " #cv2.imshow(\"Video\", og_frame)\n", 1095 | "# if cv2.waitKey(1) & 0xFF == ord('q'):\n", 1096 | "# break\n", 1097 | "\n", 1098 | "cap.release()\n", 1099 | "out.release()\n", 1100 | "cv2.destroyAllWindows()\n" 1101 | ] 1102 | } 1103 | ], 1104 | "metadata": { 1105 | "kernelspec": { 1106 | "display_name": "Python 3 (ipykernel)", 1107 | "language": "python", 1108 | "name": "python3" 1109 | }, 1110 | "language_info": { 1111 | "codemirror_mode": { 1112 | "name": "ipython", 1113 | "version": 3 1114 | }, 1115 | "file_extension": ".py", 1116 | "mimetype": "text/x-python", 1117 | "name": "python", 1118 | "nbconvert_exporter": "python", 1119 | "pygments_lexer": "ipython3", 1120 | "version": "3.9.10" 1121 | } 1122 | }, 1123 | "nbformat": 4, 1124 | "nbformat_minor": 5 1125 | } 1126 | --------------------------------------------------------------------------------