├── README.md ├── main.py ├── requirements.txt └── tracker.py /README.md: -------------------------------------------------------------------------------- 1 | # object-tracking-yolov8-deep-sort 2 | 3 | Yolov8 object detection + deep sort object tracking ! 4 | 5 | [![Watch the video](https://img.youtube.com/vi/jIRRuGN0j5E/0.jpg)](https://www.youtube.com/watch?v=jIRRuGN0j5E) 6 | 7 | ## requirements 8 | 9 | - Python 3.7 10 | - requirements.txt 11 | 12 | ## Deep Sort 13 | 14 | We are working on [this fork](https://github.com/computervisiondeveloper/deep_sort) from deep sort official implementation. 15 | 16 | You can download deep sort feature extraction model [here](https://drive.google.com/open?id=18fKzfqnqhqW3s9zwsCbnVJ5XF2JFeqMp). 17 | 18 | ## data 19 | 20 | You can download the same data I use in the video [here](https://drive.google.com/drive/folders/1kZ0QVwlwMERyTyi5c72GeqKgr8qAUx2o?usp=sharing). 21 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | 4 | import cv2 5 | from ultralytics import YOLO 6 | 7 | from tracker import Tracker 8 | 9 | 10 | video_path = os.path.join('.', 'data', 'people.mp4') 11 | video_out_path = os.path.join('.', 'out.mp4') 12 | 13 | cap = cv2.VideoCapture(video_path) 14 | ret, frame = cap.read() 15 | 16 | cap_out = cv2.VideoWriter(video_out_path, cv2.VideoWriter_fourcc(*'MP4V'), cap.get(cv2.CAP_PROP_FPS), 17 | (frame.shape[1], frame.shape[0])) 18 | 19 | model = YOLO("yolov8n.pt") 20 | 21 | tracker = Tracker() 22 | 23 | colors = [(random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) for j in range(10)] 24 | 25 | detection_threshold = 0.5 26 | while ret: 27 | 28 | results = model(frame) 29 | 30 | for result in results: 31 | detections = [] 32 | for r in result.boxes.data.tolist(): 33 | x1, y1, x2, y2, score, class_id = r 34 | x1 = int(x1) 35 | x2 = int(x2) 36 | y1 = int(y1) 37 | y2 = int(y2) 38 | class_id = int(class_id) 39 | if score > detection_threshold: 40 | detections.append([x1, y1, x2, y2, score]) 41 | 42 | tracker.update(frame, detections) 43 | 44 | for track in tracker.tracks: 45 | bbox = track.bbox 46 | x1, y1, x2, y2 = bbox 47 | track_id = track.track_id 48 | 49 | cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (colors[track_id % len(colors)]), 3) 50 | 51 | cap_out.write(frame) 52 | ret, frame = cap.read() 53 | 54 | cap.release() 55 | cap_out.release() 56 | cv2.destroyAllWindows() 57 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | ultralytics==8.0.33 2 | scikit-learn==0.21.0 3 | tensorflow==2.11.0 4 | scikit-image==0.19.3 5 | filterpy==1.4.5 6 | -------------------------------------------------------------------------------- /tracker.py: -------------------------------------------------------------------------------- 1 | from deep_sort.deep_sort.tracker import Tracker as DeepSortTracker 2 | from deep_sort.tools import generate_detections as gdet 3 | from deep_sort.deep_sort import nn_matching 4 | from deep_sort.deep_sort.detection import Detection 5 | import numpy as np 6 | 7 | 8 | class Tracker: 9 | tracker = None 10 | encoder = None 11 | tracks = None 12 | 13 | def __init__(self): 14 | max_cosine_distance = 0.4 15 | nn_budget = None 16 | 17 | encoder_model_filename = 'model_data/mars-small128.pb' 18 | 19 | metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) 20 | self.tracker = DeepSortTracker(metric) 21 | self.encoder = gdet.create_box_encoder(encoder_model_filename, batch_size=1) 22 | 23 | def update(self, frame, detections): 24 | 25 | if len(detections) == 0: 26 | self.tracker.predict() 27 | self.tracker.update([]) 28 | self.update_tracks() 29 | return 30 | 31 | bboxes = np.asarray([d[:-1] for d in detections]) 32 | bboxes[:, 2:] = bboxes[:, 2:] - bboxes[:, 0:2] 33 | scores = [d[-1] for d in detections] 34 | 35 | features = self.encoder(frame, bboxes) 36 | 37 | dets = [] 38 | for bbox_id, bbox in enumerate(bboxes): 39 | dets.append(Detection(bbox, scores[bbox_id], features[bbox_id])) 40 | 41 | self.tracker.predict() 42 | self.tracker.update(dets) 43 | self.update_tracks() 44 | 45 | def update_tracks(self): 46 | tracks = [] 47 | for track in self.tracker.tracks: 48 | if not track.is_confirmed() or track.time_since_update > 1: 49 | continue 50 | bbox = track.to_tlbr() 51 | 52 | id = track.track_id 53 | 54 | tracks.append(Track(id, bbox)) 55 | 56 | self.tracks = tracks 57 | 58 | 59 | class Track: 60 | track_id = None 61 | bbox = None 62 | 63 | def __init__(self, id, bbox): 64 | self.track_id = id 65 | self.bbox = bbox 66 | --------------------------------------------------------------------------------