├── README.md ├── demo.gif ├── requirements.txt └── src ├── Constants.py ├── Constants.pyc ├── Input.py ├── Input.pyc ├── Scene.py ├── Scene.pyc ├── Twister.py ├── deep_sort ├── __init__.py ├── __init__.pyc ├── __pycache__ │ ├── __init__.cpython-36.pyc │ ├── detection.cpython-36.pyc │ ├── iou_matching.cpython-36.pyc │ ├── kalman_filter.cpython-36.pyc │ ├── linear_assignment.cpython-36.pyc │ ├── nn_matching.cpython-36.pyc │ ├── preprocessing.cpython-36.pyc │ ├── track.cpython-36.pyc │ └── tracker.cpython-36.pyc ├── detection.py ├── detection.pyc ├── iou_matching.py ├── iou_matching.pyc ├── kalman_filter.py ├── kalman_filter.pyc ├── linear_assignment.py ├── linear_assignment.pyc ├── my_filter.py ├── my_filter.pyc ├── nn_matching.py ├── nn_matching.pyc ├── preprocessing.py ├── preprocessing.pyc ├── track.py ├── track.pyc ├── tracker.py └── tracker.pyc ├── model_data └── mars-small128.pb ├── tools ├── __init__.py ├── __init__.pyc ├── __pycache__ │ ├── __init__.cpython-36.pyc │ └── generate_detections.cpython-36.pyc ├── freeze_model.py ├── generate_detections.py └── generate_detections.pyc ├── utils.py └── utils.pyc /README.md: -------------------------------------------------------------------------------- 1 | # Live tracker made with Open Pose and Deep SORT 2 | 3 | This trackers detects bodies on the video captured by the main camera, assigns an id to each seen person and keep track of that person along the video. 4 | 5 |

6 | 7 |

8 | 9 | This is the same code used for the analysis of the video, but working directly on the camera input. I can't test the code now but some people asked for it so here it is. Should be enough to clarify ideas about the OpenPose and Deep SORT integration. 10 | 11 | Updated to work with OpenPose v1.5.0! Make sure you installed the Python API. 12 | 13 | See more at https://www.youtube.com/watch?v=GIJjyjeFmF8 14 | 15 | * [OpenPose](https://github.com/CMU-Perceptual-Computing-Lab/openpose) 16 | * [Deep SORT](https://github.com/nwojke/deep_sort) 17 | 18 | ## Set up 19 | ### Prerequisites 20 | * [OpenPose](https://github.com/CMU-Perceptual-Computing-Lab/openpose) - For body detection. Install the Python API! 21 | * OpenCv - Used for image manipulation 22 | * PyGame - Used to simplify the workflow of Input - Output. 23 | * Numpy, ConfigParser, etc. 24 | 25 | 26 | ### Configuration 27 | 28 | - **Constants.py**: Screen resolution and tracker parameters. 29 | - **Constants.py**: Openpose parameters. 30 | 31 | ### Run 32 | On folder src, just do python Twister.py 33 | 34 | ### System design 35 | Most of the work is done on Input.py. There, the current frame is processed with OpenPose to get body parts detections, and the bounding boxes for those detections are feed into the Deep SORT tracker. These boxes and the given ids are shown on the screen using simple OpenCV. 36 | -------------------------------------------------------------------------------- /demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ortegatron/liveposetracker/cbd74e6ec90e099e8368b80d537b452caed64203/demo.gif -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pygame 2 | configparser 3 | -------------------------------------------------------------------------------- /src/Constants.py: -------------------------------------------------------------------------------- 1 | SCREEN_WIDTH = 640 2 | SCREEN_HEIGHT = 480 3 | PATH = '/home/marcelo/openpose' 4 | 5 | max_cosine_distance = 1 6 | nn_budget = None 7 | nms_max_overlap = 1.0 8 | max_age = 100 9 | n_init = 20 10 | 11 | openpose_modelfolder = "/home/marcelo/openpose/models/" 12 | -------------------------------------------------------------------------------- /src/Constants.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ortegatron/liveposetracker/cbd74e6ec90e099e8368b80d537b452caed64203/src/Constants.pyc -------------------------------------------------------------------------------- /src/Input.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import cv2 3 | import sys 4 | import time 5 | import numpy as np 6 | 7 | import pygame 8 | # Load OpenPose: 9 | sys.path.append('/usr/local/python') 10 | from openpose import pyopenpose as op 11 | 12 | 13 | from deep_sort.iou_matching import iou_cost 14 | from deep_sort.kalman_filter import KalmanFilter 15 | from deep_sort.detection import Detection 16 | from deep_sort.tracker import Tracker as DeepTracker 17 | from deep_sort import nn_matching 18 | from deep_sort import preprocessing 19 | from deep_sort.linear_assignment import min_cost_matching 20 | from deep_sort.detection import Detection as ddet 21 | from tools import generate_detections as gdet 22 | from utils import poses2boxes 23 | 24 | import Constants 25 | 26 | class Input(): 27 | def __init__(self, debug = False): 28 | #from openpose import * 29 | params = dict() 30 | params["model_folder"] = Constants.openpose_modelfolder 31 | params["net_resolution"] = "-1x320" 32 | self.openpose = op.WrapperPython() 33 | self.openpose.configure(params) 34 | self.openpose.start() 35 | 36 | 37 | max_cosine_distance = Constants.max_cosine_distance 38 | nn_budget = Constants.nn_budget 39 | self.nms_max_overlap = Constants.nms_max_overlap 40 | max_age = Constants.max_age 41 | n_init = Constants.n_init 42 | 43 | model_filename = 'model_data/mars-small128.pb' 44 | self.encoder = gdet.create_box_encoder(model_filename,batch_size=1) 45 | metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) 46 | self.tracker = DeepTracker(metric, max_age = max_age,n_init= n_init) 47 | 48 | self.capture = cv2.VideoCapture(0) 49 | if self.capture.isOpened(): # Checks the stream 50 | self.frameSize = (int(self.capture.get(cv2.CAP_PROP_FRAME_HEIGHT)), 51 | int(self.capture.get(cv2.CAP_PROP_FRAME_WIDTH))) 52 | Constants.SCREEN_HEIGHT = self.frameSize[0] 53 | Constants.SCREEN_WIDTH = self.frameSize[1] 54 | 55 | 56 | def getCurrentFrameAsImage(self): 57 | frame = self.currentFrame 58 | frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) 59 | pgImg = pygame.image.frombuffer(frame.tostring(), frame.shape[1::-1], "RGB") 60 | return pgImg 61 | 62 | 63 | def run(self): 64 | result, self.currentFrame = self.capture.read() 65 | datum = op.Datum() 66 | datum.cvInputData = self.currentFrame 67 | self.openpose.emplaceAndPop([datum]) 68 | 69 | keypoints, self.currentFrame = np.array(datum.poseKeypoints), datum.cvOutputData 70 | # print(keypoints) 71 | # Doesn't use keypoint confidence 72 | poses = keypoints[:,:,:2] 73 | # Get containing box for each seen body 74 | boxes = poses2boxes(poses) 75 | boxes_xywh = [[x1,y1,x2-x1,y2-y1] for [x1,y1,x2,y2] in boxes] 76 | features = self.encoder(self.currentFrame,boxes_xywh) 77 | # print(features) 78 | 79 | nonempty = lambda xywh: xywh[2] != 0 and xywh[3] != 0 80 | detections = [Detection(bbox, 1.0, feature, pose) for bbox, feature, pose in zip(boxes_xywh, features, poses) if nonempty(bbox)] 81 | # Run non-maxima suppression. 82 | boxes_det = np.array([d.tlwh for d in detections]) 83 | scores = np.array([d.confidence for d in detections]) 84 | indices = preprocessing.non_max_suppression(boxes_det, self.nms_max_overlap, scores) 85 | detections = [detections[i] for i in indices] 86 | # Call the tracker 87 | self.tracker.predict() 88 | self.tracker.update( self.currentFrame, detections) 89 | 90 | for track in self.tracker.tracks: 91 | color = None 92 | if not track.is_confirmed(): 93 | color = (0,0,255) 94 | else: 95 | color = (255,255,255) 96 | bbox = track.to_tlbr() 97 | print("Body keypoints:") 98 | print(track.last_seen_detection.pose) 99 | cv2.rectangle(self.currentFrame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])),color, 2) 100 | cv2.putText(self.currentFrame, "id%s - ts%s"%(track.track_id,track.time_since_update),(int(bbox[0]), int(bbox[1])-20),0, 5e-3 * 200, (0,255,0),2) 101 | 102 | cv2.waitKey(1) 103 | -------------------------------------------------------------------------------- /src/Input.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ortegatron/liveposetracker/cbd74e6ec90e099e8368b80d537b452caed64203/src/Input.pyc -------------------------------------------------------------------------------- /src/Scene.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from Input import Input 3 | import pygame 4 | import Constants 5 | 6 | 7 | class Scene(): 8 | def __init__(self, screen, input): 9 | self.input = input 10 | self.screen = screen 11 | 12 | self.sceneClock = pygame.time.Clock() 13 | self.backgroundColor = (0,0,0) 14 | 15 | def renderWebCam(self): 16 | frame = self.input.getCurrentFrameAsImage() 17 | self.screen.blit(frame, (0,0)) 18 | 19 | def render(self): 20 | self.renderWebCam() 21 | 22 | def run(self): 23 | self.screenDelay = self.sceneClock.tick() 24 | self.screen.fill(self.backgroundColor) 25 | self.render() 26 | pygame.display.flip() 27 | -------------------------------------------------------------------------------- /src/Scene.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ortegatron/liveposetracker/cbd74e6ec90e099e8368b80d537b452caed64203/src/Scene.pyc -------------------------------------------------------------------------------- /src/Twister.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | from Input import Input 4 | from Scene import Scene 5 | import sys 6 | import getopt 7 | import Constants 8 | import pygame 9 | 10 | class Twister(): 11 | 12 | def __init__(self): 13 | self.input = Input() 14 | pygame.init() 15 | pygame.display.set_mode((Constants.SCREEN_WIDTH, Constants.SCREEN_HEIGHT)) 16 | pygame.display.set_caption("Twister!") 17 | screen = pygame.display.get_surface() 18 | self.scene = Scene(screen, self.input) 19 | 20 | def run(self): 21 | while True: 22 | self.input.run() 23 | self.scene.run() 24 | 25 | 26 | 27 | if __name__ == "__main__": 28 | options, remainder = getopt.getopt(sys.argv[1:], 's:x:') 29 | for opt, arg in options: 30 | if opt in ('-s'): 31 | song = arg 32 | elif opt in ('-x'): 33 | speed = float(arg) 34 | game = Twister() 35 | game.run() 36 | -------------------------------------------------------------------------------- /src/deep_sort/__init__.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | -------------------------------------------------------------------------------- /src/deep_sort/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ortegatron/liveposetracker/cbd74e6ec90e099e8368b80d537b452caed64203/src/deep_sort/__init__.pyc -------------------------------------------------------------------------------- /src/deep_sort/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ortegatron/liveposetracker/cbd74e6ec90e099e8368b80d537b452caed64203/src/deep_sort/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /src/deep_sort/__pycache__/detection.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ortegatron/liveposetracker/cbd74e6ec90e099e8368b80d537b452caed64203/src/deep_sort/__pycache__/detection.cpython-36.pyc -------------------------------------------------------------------------------- /src/deep_sort/__pycache__/iou_matching.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ortegatron/liveposetracker/cbd74e6ec90e099e8368b80d537b452caed64203/src/deep_sort/__pycache__/iou_matching.cpython-36.pyc -------------------------------------------------------------------------------- /src/deep_sort/__pycache__/kalman_filter.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ortegatron/liveposetracker/cbd74e6ec90e099e8368b80d537b452caed64203/src/deep_sort/__pycache__/kalman_filter.cpython-36.pyc -------------------------------------------------------------------------------- /src/deep_sort/__pycache__/linear_assignment.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ortegatron/liveposetracker/cbd74e6ec90e099e8368b80d537b452caed64203/src/deep_sort/__pycache__/linear_assignment.cpython-36.pyc -------------------------------------------------------------------------------- /src/deep_sort/__pycache__/nn_matching.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ortegatron/liveposetracker/cbd74e6ec90e099e8368b80d537b452caed64203/src/deep_sort/__pycache__/nn_matching.cpython-36.pyc -------------------------------------------------------------------------------- /src/deep_sort/__pycache__/preprocessing.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ortegatron/liveposetracker/cbd74e6ec90e099e8368b80d537b452caed64203/src/deep_sort/__pycache__/preprocessing.cpython-36.pyc -------------------------------------------------------------------------------- /src/deep_sort/__pycache__/track.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ortegatron/liveposetracker/cbd74e6ec90e099e8368b80d537b452caed64203/src/deep_sort/__pycache__/track.cpython-36.pyc -------------------------------------------------------------------------------- /src/deep_sort/__pycache__/tracker.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ortegatron/liveposetracker/cbd74e6ec90e099e8368b80d537b452caed64203/src/deep_sort/__pycache__/tracker.cpython-36.pyc -------------------------------------------------------------------------------- /src/deep_sort/detection.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import numpy as np 3 | 4 | 5 | class Detection(object): 6 | """ 7 | This class represents a bounding box detection in a single image. 8 | 9 | Parameters 10 | ---------- 11 | tlwh : array_like 12 | Bounding box in format `(x, y, w, h)`. 13 | confidence : float 14 | Detector confidence score. 15 | feature : array_like 16 | A feature vector that describes the object contained in this image. 17 | 18 | Attributes 19 | ---------- 20 | tlwh : ndarray 21 | Bounding box in format `(top left x, top left y, width, height)`. 22 | confidence : ndarray 23 | Detector confidence score. 24 | feature : ndarray | NoneType 25 | A feature vector that describes the object contained in this image. 26 | 27 | """ 28 | 29 | def __init__(self, tlwh, confidence, feature, pose): 30 | self.tlwh = np.asarray(tlwh, dtype=np.float) 31 | self.confidence = float(confidence) 32 | self.feature = np.asarray(feature, dtype=np.float32) 33 | self.pose = pose 34 | 35 | def to_tlbr(self): 36 | """Convert bounding box to format `(min x, min y, max x, max y)`, i.e., 37 | `(top left, bottom right)`. 38 | """ 39 | ret = self.tlwh.copy() 40 | ret[2:] += ret[:2] 41 | return ret 42 | 43 | def to_xyah(self): 44 | """Convert bounding box to format `(center x, center y, aspect ratio, 45 | height)`, where the aspect ratio is `width / height`. 46 | """ 47 | ret = self.tlwh.copy() 48 | ret[:2] += ret[2:] / 2 49 | ret[2] /= ret[3] 50 | return ret 51 | -------------------------------------------------------------------------------- /src/deep_sort/detection.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ortegatron/liveposetracker/cbd74e6ec90e099e8368b80d537b452caed64203/src/deep_sort/detection.pyc -------------------------------------------------------------------------------- /src/deep_sort/iou_matching.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | from __future__ import absolute_import 3 | import numpy as np 4 | from . import linear_assignment 5 | 6 | 7 | def iou(bbox, candidates): 8 | """Computer intersection over union. 9 | 10 | Parameters 11 | ---------- 12 | bbox : ndarray 13 | A bounding box in format `(top left x, top left y, width, height)`. 14 | candidates : ndarray 15 | A matrix of candidate bounding boxes (one per row) in the same format 16 | as `bbox`. 17 | 18 | Returns 19 | ------- 20 | ndarray 21 | The intersection over union in [0, 1] between the `bbox` and each 22 | candidate. A higher score means a larger fraction of the `bbox` is 23 | occluded by the candidate. 24 | 25 | """ 26 | bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:] 27 | candidates_tl = candidates[:, :2] 28 | candidates_br = candidates[:, :2] + candidates[:, 2:] 29 | 30 | tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis], 31 | np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]] 32 | br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis], 33 | np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]] 34 | wh = np.maximum(0., br - tl) 35 | 36 | area_intersection = wh.prod(axis=1) 37 | area_bbox = bbox[2:].prod() 38 | area_candidates = candidates[:, 2:].prod(axis=1) 39 | 40 | return area_intersection / (area_bbox ) 41 | 42 | 43 | def iou_cost(tracks, detections, track_indices=None, 44 | detection_indices=None): 45 | """An intersection over union distance metric. 46 | 47 | Parameters 48 | ---------- 49 | tracks : List[deep_sort.track.Track] 50 | A list of tracks. 51 | detections : List[deep_sort.detection.Detection] 52 | A list of detections. 53 | track_indices : Optional[List[int]] 54 | A list of indices to tracks that should be matched. Defaults to 55 | all `tracks`. 56 | detection_indices : Optional[List[int]] 57 | A list of indices to detections that should be matched. Defaults 58 | to all `detections`. 59 | 60 | Returns 61 | ------- 62 | ndarray 63 | Returns a cost matrix of shape 64 | len(track_indices), len(detection_indices) where entry (i, j) is 65 | `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`. 66 | 67 | """ 68 | if track_indices is None: 69 | track_indices = np.arange(len(tracks)) 70 | if detection_indices is None: 71 | detection_indices = np.arange(len(detections)) 72 | 73 | cost_matrix = np.zeros((len(track_indices), len(detection_indices))) 74 | for row, track_idx in enumerate(track_indices): 75 | if tracks[track_idx].time_since_update > 1: 76 | cost_matrix[row, :] = linear_assignment.INFTY_COST 77 | continue 78 | 79 | bbox = tracks[track_idx].to_tlwh() 80 | candidates = np.asarray([detections[i].tlwh for i in detection_indices]) 81 | cost_matrix[row, :] = 1. - iou(bbox, candidates) 82 | return cost_matrix 83 | -------------------------------------------------------------------------------- /src/deep_sort/iou_matching.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ortegatron/liveposetracker/cbd74e6ec90e099e8368b80d537b452caed64203/src/deep_sort/iou_matching.pyc -------------------------------------------------------------------------------- /src/deep_sort/kalman_filter.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import numpy as np 3 | import scipy.linalg 4 | 5 | 6 | """ 7 | Table for the 0.95 quantile of the chi-square distribution with N degrees of 8 | freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv 9 | function and used as Mahalanobis gating threshold. 10 | """ 11 | chi2inv95 = { 12 | 1: 3.8415, 13 | 2: 5.9915, 14 | 3: 7.8147, 15 | 4: 9.4877, 16 | 5: 11.070, 17 | 6: 12.592, 18 | 7: 14.067, 19 | 8: 15.507, 20 | 9: 16.919} 21 | 22 | 23 | class KalmanFilter(object): 24 | """ 25 | A simple Kalman filter for tracking bounding boxes in image space. 26 | 27 | The 8-dimensional state space 28 | 29 | x, y, a, h, vx, vy, va, vh 30 | 31 | contains the bounding box center position (x, y), aspect ratio a, height h, 32 | and their respective velocities. 33 | 34 | Object motion follows a constant velocity model. The bounding box location 35 | (x, y, a, h) is taken as direct observation of the state space (linear 36 | observation model). 37 | 38 | """ 39 | def __init__(self): 40 | ndim, dt = 4, 1. 41 | 42 | # Create Kalman filter model matrices. 43 | self._motion_mat = np.eye(2 * ndim, 2 * ndim) 44 | for i in range(ndim): 45 | self._motion_mat[i, ndim + i] = dt 46 | self._update_mat = np.eye(ndim, 2 * ndim) 47 | 48 | # Motion and observation uncertainty are chosen relative to the current 49 | # state estimate. These weights control the amount of uncertainty in 50 | # the model. This is a bit hacky. 51 | self._std_weight_position = 1. / 20 52 | self._std_weight_velocity = 1. / 160 53 | 54 | def initiate(self, measurement): 55 | """Create track from unassociated measurement. 56 | 57 | Parameters 58 | ---------- 59 | measurement : ndarray 60 | Bounding box coordinates (x, y, a, h) with center position (x, y), 61 | aspect ratio a, and height h. 62 | 63 | Returns 64 | ------- 65 | (ndarray, ndarray) 66 | Returns the mean vector (8 dimensional) and covariance matrix (8x8 67 | dimensional) of the new track. Unobserved velocities are initialized 68 | to 0 mean. 69 | 70 | """ 71 | mean_pos = measurement 72 | mean_vel = np.zeros_like(mean_pos) 73 | mean = np.r_[mean_pos, mean_vel] 74 | 75 | std = [ 76 | 2 * self._std_weight_position * measurement[3], 77 | 2 * self._std_weight_position * measurement[3], 78 | 1e-2, 79 | 2 * self._std_weight_position * measurement[3], 80 | 10 * self._std_weight_velocity * measurement[3], 81 | 10 * self._std_weight_velocity * measurement[3], 82 | 1e-5, 83 | 10 * self._std_weight_velocity * measurement[3]] 84 | covariance = np.diag(np.square(std)) 85 | return mean, covariance 86 | 87 | def predict(self, mean, covariance): 88 | """Run Kalman filter prediction step. 89 | 90 | Parameters 91 | ---------- 92 | mean : ndarray 93 | The 8 dimensional mean vector of the object state at the previous 94 | time step. 95 | covariance : ndarray 96 | The 8x8 dimensional covariance matrix of the object state at the 97 | previous time step. 98 | 99 | Returns 100 | ------- 101 | (ndarray, ndarray) 102 | Returns the mean vector and covariance matrix of the predicted 103 | state. Unobserved velocities are initialized to 0 mean. 104 | 105 | """ 106 | std_pos = [ 107 | self._std_weight_position * mean[3], 108 | self._std_weight_position * mean[3], 109 | 1e-2, 110 | self._std_weight_position * mean[3]] 111 | std_vel = [ 112 | self._std_weight_velocity * mean[3], 113 | self._std_weight_velocity * mean[3], 114 | 1e-5, 115 | self._std_weight_velocity * mean[3]] 116 | motion_cov = np.diag(np.square(np.r_[std_pos, std_vel])) 117 | 118 | mean = np.dot(self._motion_mat, mean) 119 | covariance = np.linalg.multi_dot(( 120 | self._motion_mat, covariance, self._motion_mat.T)) + motion_cov 121 | 122 | return mean, covariance 123 | 124 | def project(self, mean, covariance): 125 | """Project state distribution to measurement space. 126 | 127 | Parameters 128 | ---------- 129 | mean : ndarray 130 | The state's mean vector (8 dimensional array). 131 | covariance : ndarray 132 | The state's covariance matrix (8x8 dimensional). 133 | 134 | Returns 135 | ------- 136 | (ndarray, ndarray) 137 | Returns the projected mean and covariance matrix of the given state 138 | estimate. 139 | 140 | """ 141 | std = [ 142 | self._std_weight_position * mean[3], 143 | self._std_weight_position * mean[3], 144 | 1e-1, 145 | self._std_weight_position * mean[3]] 146 | innovation_cov = np.diag(np.square(std)) 147 | 148 | mean = np.dot(self._update_mat, mean) 149 | covariance = np.linalg.multi_dot(( 150 | self._update_mat, covariance, self._update_mat.T)) 151 | return mean, covariance + innovation_cov 152 | 153 | def update(self, mean, covariance, measurement): 154 | """Run Kalman filter correction step. 155 | 156 | Parameters 157 | ---------- 158 | mean : ndarray 159 | The predicted state's mean vector (8 dimensional). 160 | covariance : ndarray 161 | The state's covariance matrix (8x8 dimensional). 162 | measurement : ndarray 163 | The 4 dimensional measurement vector (x, y, a, h), where (x, y) 164 | is the center position, a the aspect ratio, and h the height of the 165 | bounding box. 166 | 167 | Returns 168 | ------- 169 | (ndarray, ndarray) 170 | Returns the measurement-corrected state distribution. 171 | 172 | """ 173 | projected_mean, projected_cov = self.project(mean, covariance) 174 | 175 | chol_factor, lower = scipy.linalg.cho_factor( 176 | projected_cov, lower=True, check_finite=False) 177 | kalman_gain = scipy.linalg.cho_solve( 178 | (chol_factor, lower), np.dot(covariance, self._update_mat.T).T, 179 | check_finite=False).T 180 | innovation = measurement - projected_mean 181 | 182 | new_mean = mean + np.dot(innovation, kalman_gain.T) 183 | new_covariance = covariance - np.linalg.multi_dot(( 184 | kalman_gain, projected_cov, kalman_gain.T)) 185 | return new_mean, new_covariance 186 | 187 | def gating_distance(self, mean, covariance, measurements, 188 | only_position=False): 189 | """Compute gating distance between state distribution and measurements. 190 | 191 | A suitable distance threshold can be obtained from `chi2inv95`. If 192 | `only_position` is False, the chi-square distribution has 4 degrees of 193 | freedom, otherwise 2. 194 | 195 | Parameters 196 | ---------- 197 | mean : ndarray 198 | Mean vector over the state distribution (8 dimensional). 199 | covariance : ndarray 200 | Covariance of the state distribution (8x8 dimensional). 201 | measurements : ndarray 202 | An Nx4 dimensional matrix of N measurements, each in 203 | format (x, y, a, h) where (x, y) is the bounding box center 204 | position, a the aspect ratio, and h the height. 205 | only_position : Optional[bool] 206 | If True, distance computation is done with respect to the bounding 207 | box center position only. 208 | 209 | Returns 210 | ------- 211 | ndarray 212 | Returns an array of length N, where the i-th element contains the 213 | squared Mahalanobis distance between (mean, covariance) and 214 | `measurements[i]`. 215 | 216 | """ 217 | mean, covariance = self.project(mean, covariance) 218 | if only_position: 219 | mean, covariance = mean[:2], covariance[:2, :2] 220 | measurements = measurements[:, :2] 221 | 222 | cholesky_factor = np.linalg.cholesky(covariance) 223 | d = measurements - mean 224 | z = scipy.linalg.solve_triangular( 225 | cholesky_factor, d.T, lower=True, check_finite=False, 226 | overwrite_b=True) 227 | squared_maha = np.sum(z * z, axis=0) 228 | return squared_maha 229 | -------------------------------------------------------------------------------- /src/deep_sort/kalman_filter.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ortegatron/liveposetracker/cbd74e6ec90e099e8368b80d537b452caed64203/src/deep_sort/kalman_filter.pyc -------------------------------------------------------------------------------- /src/deep_sort/linear_assignment.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | from __future__ import absolute_import 3 | import numpy as np 4 | from sklearn.utils.linear_assignment_ import linear_assignment 5 | from . import kalman_filter 6 | 7 | INFTY_COST = 1e+5 8 | 9 | 10 | def min_cost_matching( 11 | distance_metric, max_distance, tracks, detections, track_indices=None, 12 | detection_indices=None): 13 | """Solve linear assignment problem. 14 | 15 | Parameters 16 | ---------- 17 | distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray 18 | The distance metric is given a list of tracks and detections as well as 19 | a list of N track indices and M detection indices. The metric should 20 | return the NxM dimensional cost matrix, where element (i, j) is the 21 | association cost between the i-th track in the given track indices and 22 | the j-th detection in the given detection_indices. 23 | max_distance : float 24 | Gating threshold. Associations with cost larger than this value are 25 | disregarded. 26 | tracks : List[track.Track] 27 | A list of predicted tracks at the current time step. 28 | detections : List[detection.Detection] 29 | A list of detections at the current time step. 30 | track_indices : List[int] 31 | List of track indices that maps rows in `cost_matrix` to tracks in 32 | `tracks` (see description above). 33 | detection_indices : List[int] 34 | List of detection indices that maps columns in `cost_matrix` to 35 | detections in `detections` (see description above). 36 | 37 | Returns 38 | ------- 39 | (List[(int, int)], List[int], List[int]) 40 | Returns a tuple with the following three entries: 41 | * A list of matched track and detection indices. 42 | * A list of unmatched track indices. 43 | * A list of unmatched detection indices. 44 | 45 | """ 46 | if track_indices is None: 47 | track_indices = np.arange(len(tracks)) 48 | if detection_indices is None: 49 | detection_indices = np.arange(len(detections)) 50 | 51 | if len(detection_indices) == 0 or len(track_indices) == 0: 52 | return [], track_indices, detection_indices # Nothing to match. 53 | 54 | cost_matrix = distance_metric( 55 | tracks, detections, track_indices, detection_indices) 56 | cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5 57 | indices = linear_assignment(cost_matrix) 58 | 59 | matches, unmatched_tracks, unmatched_detections = [], [], [] 60 | for col, detection_idx in enumerate(detection_indices): 61 | if col not in indices[:, 1]: 62 | unmatched_detections.append(detection_idx) 63 | for row, track_idx in enumerate(track_indices): 64 | if row not in indices[:, 0]: 65 | unmatched_tracks.append(track_idx) 66 | for row, col in indices: 67 | track_idx = track_indices[row] 68 | detection_idx = detection_indices[col] 69 | if cost_matrix[row, col] > max_distance: 70 | unmatched_tracks.append(track_idx) 71 | unmatched_detections.append(detection_idx) 72 | else: 73 | matches.append((track_idx, detection_idx)) 74 | return matches, unmatched_tracks, unmatched_detections 75 | 76 | 77 | def matching_cascade( 78 | distance_metric, max_distance, cascade_depth, tracks, detections, 79 | track_indices=None, detection_indices=None): 80 | """Run matching cascade. 81 | 82 | Parameters 83 | ---------- 84 | distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray 85 | The distance metric is given a list of tracks and detections as well as 86 | a list of N track indices and M detection indices. The metric should 87 | return the NxM dimensional cost matrix, where element (i, j) is the 88 | association cost between the i-th track in the given track indices and 89 | the j-th detection in the given detection indices. 90 | max_distance : float 91 | Gating threshold. Associations with cost larger than this value are 92 | disregarded. 93 | cascade_depth: int 94 | The cascade depth, should be se to the maximum track age. 95 | tracks : List[track.Track] 96 | A list of predicted tracks at the current time step. 97 | detections : List[detection.Detection] 98 | A list of detections at the current time step. 99 | track_indices : Optional[List[int]] 100 | List of track indices that maps rows in `cost_matrix` to tracks in 101 | `tracks` (see description above). Defaults to all tracks. 102 | detection_indices : Optional[List[int]] 103 | List of detection indices that maps columns in `cost_matrix` to 104 | detections in `detections` (see description above). Defaults to all 105 | detections. 106 | 107 | Returns 108 | ------- 109 | (List[(int, int)], List[int], List[int]) 110 | Returns a tuple with the following three entries: 111 | * A list of matched track and detection indices. 112 | * A list of unmatched track indices. 113 | * A list of unmatched detection indices. 114 | 115 | """ 116 | if track_indices is None: 117 | track_indices = list(range(len(tracks))) 118 | if detection_indices is None: 119 | detection_indices = list(range(len(detections))) 120 | 121 | unmatched_detections = detection_indices 122 | matches = [] 123 | for level in range(cascade_depth): 124 | if len(unmatched_detections) == 0: # No detections left 125 | break 126 | 127 | track_indices_l = [ 128 | k for k in track_indices 129 | if tracks[k].time_since_update == 1 + level 130 | ] 131 | if len(track_indices_l) == 0: # Nothing to match at this level 132 | continue 133 | 134 | matches_l, _, unmatched_detections = \ 135 | min_cost_matching( 136 | distance_metric, max_distance, tracks, detections, 137 | track_indices_l, unmatched_detections) 138 | matches += matches_l 139 | unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches)) 140 | return matches, unmatched_tracks, unmatched_detections 141 | 142 | 143 | def gate_cost_matrix(frame, kf, cost_matrix, tracks, detections, track_indices, detection_indices, 144 | gated_cost=INFTY_COST, only_position=False): 145 | """Invalidate infeasible entries in cost matrix based on the state 146 | distributions obtained by Kalman filtering. 147 | 148 | Parameters 149 | ---------- 150 | kf : The Kalman filter. 151 | cost_matrix : ndarray 152 | The NxM dimensional cost matrix, where N is the number of track indices 153 | and M is the number of detection indices, such that entry (i, j) is the 154 | association cost between `tracks[track_indices[i]]` and 155 | `detections[detection_indices[j]]`. 156 | tracks : List[track.Track] 157 | A list of predicted tracks at the current time step. 158 | detections : List[detection.Detection] 159 | A list of detections at the current time step. 160 | track_indices : List[int] 161 | List of track indices that maps rows in `cost_matrix` to tracks in 162 | `tracks` (see description above). 163 | detection_indices : List[int] 164 | List of detection indices that maps columns in `cost_matrix` to 165 | detections in `detections` (see description above). 166 | gated_cost : Optional[float] 167 | Entries in the cost matrix corresponding to infeasible associations are 168 | set this value. Defaults to a very large value. 169 | only_position : Optional[bool] 170 | If True, only the x, y position of the state distribution is considered 171 | during gating. Defaults to False. 172 | 173 | Returns 174 | ------- 175 | ndarray 176 | Returns the modified cost matrix. 177 | 178 | """ 179 | gating_dim = 2 if only_position else 4 180 | gating_threshold = kalman_filter.chi2inv95[gating_dim] 181 | measurements = np.asarray( 182 | [detections[i].to_xyah() for i in detection_indices]) 183 | for row, track_idx in enumerate(track_indices): 184 | track = tracks[track_idx] 185 | gating_distance = kf.gating_distance( 186 | track.mean, track.covariance, measurements, only_position) 187 | cost_matrix[row, gating_distance > gating_threshold] = gated_cost 188 | return cost_matrix 189 | -------------------------------------------------------------------------------- /src/deep_sort/linear_assignment.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ortegatron/liveposetracker/cbd74e6ec90e099e8368b80d537b452caed64203/src/deep_sort/linear_assignment.pyc -------------------------------------------------------------------------------- /src/deep_sort/my_filter.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import numpy as np 3 | import scipy.linalg 4 | from pykalman import KalmanFilter 5 | 6 | 7 | """ 8 | Table for the 0.95 quantile of the chi-square distribution with N degrees of 9 | freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv 10 | function and used as Mahalanobis gating threshold. 11 | """ 12 | chi2inv95 = { 13 | 1: 3.8415, 14 | 2: 5.9915, 15 | 3: 7.8147, 16 | 4: 9.4877, 17 | 5: 11.070, 18 | 6: 12.592, 19 | 7: 14.067, 20 | 8: 15.507, 21 | 9: 16.919} 22 | 23 | 24 | class MyKalmanFilter(object): 25 | """ 26 | Constructor que recibe como parametro las transition_matrices, pero si no las recibe las genera por defecto 27 | """ 28 | def __init__(self, motion_mat = None, observation_mat = None, transition_covariance = None, observation_covariance = None): 29 | ndim, dt = 4, 1. 30 | if motion_mat is None: 31 | self.motion_mat = np.eye(2 * ndim, 2 * ndim) 32 | for i in range(ndim): 33 | self.motion_mat[i, ndim + i] = dt 34 | else: 35 | self.motion_mat = motion_mat 36 | 37 | if observation_mat is None: 38 | self.observation_mat = np.eye(ndim, 2 * ndim) 39 | else: 40 | self.observation_mat = observation_mat 41 | print(self.motion_mat) 42 | print(self.observation_mat) 43 | self.kf = KalmanFilter(transition_matrices = self.motion_mat, 44 | observation_matrices = self.observation_mat, 45 | transition_covariance = transition_covariance, 46 | observation_covariance = observation_covariance) 47 | 48 | 49 | def initiate(self, measurement): 50 | mean_pos = measurement 51 | mean_vel = np.zeros_like(mean_pos) 52 | mean = np.r_[mean_pos, mean_vel] 53 | covariance = np.eye(8) 54 | return mean, covariance 55 | 56 | def predict(self, mean, covariance): 57 | return self.kf.filter_update(mean, covariance) 58 | 59 | def project(self, mean, covariance): 60 | mean = mean[:4] 61 | covariance = np.linalg.multi_dot(( 62 | self.observation_mat, covariance, self.observation_mat.T)) 63 | return mean.filled(), covariance 64 | 65 | def update(self, mean, covariance, measurement): 66 | mean, covariance = self.kf.filter_update(mean, covariance, measurement) 67 | return mean, covariance 68 | 69 | def gating_distance(self, mean, covariance, measurements, 70 | only_position=False): 71 | """Compute gating distance between state distribution and measurements. 72 | 73 | A suitable distance threshold can be obtained from `chi2inv95`. If 74 | `only_position` is False, the chi-square distribution has 4 degrees of 75 | freedom, otherwise 2. 76 | 77 | Parameters 78 | ---------- 79 | mean : ndarray 80 | Mean vector over the state distribution (8 dimensional). 81 | covariance : ndarray 82 | Covariance of the state distribution (8x8 dimensional). 83 | measurements : ndarray 84 | An Nx4 dimensional matrix of N measurements, each in 85 | format (x, y, a, h) where (x, y) is the bounding box center 86 | position, a the aspect ratio, and h the height. 87 | only_position : Optional[bool] 88 | If True, distance computation is done with respect to the bounding 89 | box center position only. 90 | 91 | Returns 92 | ------- 93 | ndarray 94 | Returns an array of length N, where the i-th element contains the 95 | squared Mahalanobis distance between (mean, covariance) and 96 | `measurements[i]`. 97 | 98 | """ 99 | mean, covariance = self.project(mean, covariance) 100 | if only_position: 101 | mean, covariance = mean[:2], covariance[:2, :2] 102 | measurements = measurements[:, :2] 103 | cholesky_factor = np.linalg.cholesky(covariance) 104 | d = measurements - mean 105 | z = scipy.linalg.solve_triangular( 106 | cholesky_factor, d.T, lower=True, check_finite=False, 107 | overwrite_b=True) 108 | squared_maha = np.sum(z * z, axis=0) 109 | return squared_maha 110 | -------------------------------------------------------------------------------- /src/deep_sort/my_filter.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ortegatron/liveposetracker/cbd74e6ec90e099e8368b80d537b452caed64203/src/deep_sort/my_filter.pyc -------------------------------------------------------------------------------- /src/deep_sort/nn_matching.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import numpy as np 3 | 4 | 5 | def _pdist(a, b): 6 | """Compute pair-wise squared distance between points in `a` and `b`. 7 | 8 | Parameters 9 | ---------- 10 | a : array_like 11 | An NxM matrix of N samples of dimensionality M. 12 | b : array_like 13 | An LxM matrix of L samples of dimensionality M. 14 | 15 | Returns 16 | ------- 17 | ndarray 18 | Returns a matrix of size len(a), len(b) such that eleement (i, j) 19 | contains the squared distance between `a[i]` and `b[j]`. 20 | 21 | """ 22 | a, b = np.asarray(a), np.asarray(b) 23 | if len(a) == 0 or len(b) == 0: 24 | return np.zeros((len(a), len(b))) 25 | a2, b2 = np.square(a).sum(axis=1), np.square(b).sum(axis=1) 26 | r2 = -2. * np.dot(a, b.T) + a2[:, None] + b2[None, :] 27 | r2 = np.clip(r2, 0., float(np.inf)) 28 | return r2 29 | 30 | 31 | def _cosine_distance(a, b, data_is_normalized=False): 32 | """Compute pair-wise cosine distance between points in `a` and `b`. 33 | 34 | Parameters 35 | ---------- 36 | a : array_like 37 | An NxM matrix of N samples of dimensionality M. 38 | b : array_like 39 | An LxM matrix of L samples of dimensionality M. 40 | data_is_normalized : Optional[bool] 41 | If True, assumes rows in a and b are unit length vectors. 42 | Otherwise, a and b are explicitly normalized to lenght 1. 43 | 44 | Returns 45 | ------- 46 | ndarray 47 | Returns a matrix of size len(a), len(b) such that eleement (i, j) 48 | contains the squared distance between `a[i]` and `b[j]`. 49 | 50 | """ 51 | if not data_is_normalized: 52 | a = np.asarray(a) / np.linalg.norm(a, axis=1, keepdims=True) 53 | b = np.asarray(b) / np.linalg.norm(b, axis=1, keepdims=True) 54 | return 1. - np.dot(a, b.T) 55 | 56 | 57 | def _nn_euclidean_distance(x, y): 58 | """ Helper function for nearest neighbor distance metric (Euclidean). 59 | 60 | Parameters 61 | ---------- 62 | x : ndarray 63 | A matrix of N row-vectors (sample points). 64 | y : ndarray 65 | A matrix of M row-vectors (query points). 66 | 67 | Returns 68 | ------- 69 | ndarray 70 | A vector of length M that contains for each entry in `y` the 71 | smallest Euclidean distance to a sample in `x`. 72 | 73 | """ 74 | distances = _pdist(x, y) 75 | return np.maximum(0.0, distances.min(axis=0)) 76 | 77 | 78 | def _nn_cosine_distance(x, y): 79 | """ Helper function for nearest neighbor distance metric (cosine). 80 | 81 | Parameters 82 | ---------- 83 | x : ndarray 84 | A matrix of N row-vectors (sample points). 85 | y : ndarray 86 | A matrix of M row-vectors (query points). 87 | 88 | Returns 89 | ------- 90 | ndarray 91 | A vector of length M that contains for each entry in `y` the 92 | smallest cosine distance to a sample in `x`. 93 | 94 | """ 95 | distances = _cosine_distance(x, y) 96 | return distances.min(axis=0) 97 | 98 | 99 | class NearestNeighborDistanceMetric(object): 100 | """ 101 | A nearest neighbor distance metric that, for each target, returns 102 | the closest distance to any sample that has been observed so far. 103 | 104 | Parameters 105 | ---------- 106 | metric : str 107 | Either "euclidean" or "cosine". 108 | matching_threshold: float 109 | The matching threshold. Samples with larger distance are considered an 110 | invalid match. 111 | budget : Optional[int] 112 | If not None, fix samples per class to at most this number. Removes 113 | the oldest samples when the budget is reached. 114 | 115 | Attributes 116 | ---------- 117 | samples : Dict[int -> List[ndarray]] 118 | A dictionary that maps from target identities to the list of samples 119 | that have been observed so far. 120 | 121 | """ 122 | 123 | def __init__(self, metric, matching_threshold, budget=None): 124 | 125 | 126 | if metric == "euclidean": 127 | self._metric = _nn_euclidean_distance 128 | elif metric == "cosine": 129 | self._metric = _nn_cosine_distance 130 | else: 131 | raise ValueError( 132 | "Invalid metric; must be either 'euclidean' or 'cosine'") 133 | self.matching_threshold = matching_threshold 134 | self.budget = budget 135 | self.samples = {} 136 | 137 | def partial_fit(self, features, targets, active_targets): 138 | """Update the distance metric with new data. 139 | 140 | Parameters 141 | ---------- 142 | features : ndarray 143 | An NxM matrix of N features of dimensionality M. 144 | targets : ndarray 145 | An integer array of associated target identities. 146 | active_targets : List[int] 147 | A list of targets that are currently present in the scene. 148 | 149 | """ 150 | for feature, target in zip(features, targets): 151 | self.samples.setdefault(target, []).append(feature) 152 | if self.budget is not None: 153 | self.samples[target] = self.samples[target][-self.budget:] 154 | self.samples = {k: self.samples[k] for k in active_targets} 155 | 156 | def distance(self, features, targets): 157 | """Compute distance between features and targets. 158 | 159 | Parameters 160 | ---------- 161 | features : ndarray 162 | An NxM matrix of N features of dimensionality M. 163 | targets : List[int] 164 | A list of targets to match the given `features` against. 165 | 166 | Returns 167 | ------- 168 | ndarray 169 | Returns a cost matrix of shape len(targets), len(features), where 170 | element (i, j) contains the closest squared distance between 171 | `targets[i]` and `features[j]`. 172 | 173 | """ 174 | cost_matrix = np.zeros((len(targets), len(features))) 175 | for i, target in enumerate(targets): 176 | cost_matrix[i, :] = self._metric(self.samples[target], features) 177 | return cost_matrix 178 | -------------------------------------------------------------------------------- /src/deep_sort/nn_matching.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ortegatron/liveposetracker/cbd74e6ec90e099e8368b80d537b452caed64203/src/deep_sort/nn_matching.pyc -------------------------------------------------------------------------------- /src/deep_sort/preprocessing.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import numpy as np 3 | import cv2 4 | 5 | 6 | def non_max_suppression(boxes, max_bbox_overlap, scores=None): 7 | """Suppress overlapping detections. 8 | 9 | Original code from [1]_ has been adapted to include confidence score. 10 | 11 | .. [1] http://www.pyimagesearch.com/2015/02/16/ 12 | faster-non-maximum-suppression-python/ 13 | 14 | Examples 15 | -------- 16 | 17 | >>> boxes = [d.roi for d in detections] 18 | >>> scores = [d.confidence for d in detections] 19 | >>> indices = non_max_suppression(boxes, max_bbox_overlap, scores) 20 | >>> detections = [detections[i] for i in indices] 21 | 22 | Parameters 23 | ---------- 24 | boxes : ndarray 25 | Array of ROIs (x, y, width, height). 26 | max_bbox_overlap : float 27 | ROIs that overlap more than this values are suppressed. 28 | scores : Optional[array_like] 29 | Detector confidence score. 30 | 31 | Returns 32 | ------- 33 | List[int] 34 | Returns indices of detections that have survived non-maxima suppression. 35 | 36 | """ 37 | if len(boxes) == 0: 38 | return [] 39 | 40 | boxes = boxes.astype(np.float) 41 | pick = [] 42 | 43 | x1 = boxes[:, 0] 44 | y1 = boxes[:, 1] 45 | x2 = boxes[:, 2] + boxes[:, 0] 46 | y2 = boxes[:, 3] + boxes[:, 1] 47 | 48 | area = (x2 - x1 + 1) * (y2 - y1 + 1) 49 | if scores is not None: 50 | idxs = np.argsort(scores) 51 | else: 52 | idxs = np.argsort(y2) 53 | 54 | while len(idxs) > 0: 55 | last = len(idxs) - 1 56 | i = idxs[last] 57 | pick.append(i) 58 | 59 | xx1 = np.maximum(x1[i], x1[idxs[:last]]) 60 | yy1 = np.maximum(y1[i], y1[idxs[:last]]) 61 | xx2 = np.minimum(x2[i], x2[idxs[:last]]) 62 | yy2 = np.minimum(y2[i], y2[idxs[:last]]) 63 | 64 | w = np.maximum(0, xx2 - xx1 + 1) 65 | h = np.maximum(0, yy2 - yy1 + 1) 66 | 67 | overlap = (w * h) / area[idxs[:last]] 68 | 69 | idxs = np.delete( 70 | idxs, np.concatenate( 71 | ([last], np.where(overlap > max_bbox_overlap)[0]))) 72 | 73 | return pick 74 | -------------------------------------------------------------------------------- /src/deep_sort/preprocessing.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ortegatron/liveposetracker/cbd74e6ec90e099e8368b80d537b452caed64203/src/deep_sort/preprocessing.pyc -------------------------------------------------------------------------------- /src/deep_sort/track.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | 3 | import numpy as np 4 | 5 | class TrackState: 6 | """ 7 | Enumeration type for the single target track state. Newly created tracks are 8 | classified as `tentative` until enough evidence has been collected. Then, 9 | the track state is changed to `confirmed`. Tracks that are no longer alive 10 | are classified as `deleted` to mark them for removal from the set of active 11 | tracks. 12 | 13 | """ 14 | 15 | Tentative = 1 16 | Confirmed = 2 17 | Deleted = 3 18 | 19 | 20 | class Track: 21 | """ 22 | A single target track with state space `(x, y, a, h)` and associated 23 | velocities, where `(x, y)` is the center of the bounding box, `a` is the 24 | aspect ratio and `h` is the height. 25 | 26 | Parameters 27 | ---------- 28 | mean : ndarray 29 | Mean vector of the initial state distribution. 30 | covariance : ndarray 31 | Covariance matrix of the initial state distribution. 32 | track_id : int 33 | A unique track identifier. 34 | n_init : int 35 | Number of consecutive detections before the track is confirmed. The 36 | track state is set to `Deleted` if a miss occurs within the first 37 | `n_init` frames. 38 | max_age : int 39 | The maximum number of consecutive misses before the track state is 40 | set to `Deleted`. 41 | feature : Optional[ndarray] 42 | Feature vector of the detection this track originates from. If not None, 43 | this feature is added to the `features` cache. 44 | 45 | Attributes 46 | ---------- 47 | mean : ndarray 48 | Mean vector of the initial state distribution. 49 | covariance : ndarray 50 | Covariance matrix of the initial state distribution. 51 | track_id : int 52 | A unique track identifier. 53 | hits : int 54 | Total number of measurement updates. 55 | age : int 56 | Total number of frames since first occurance. 57 | time_since_update : int 58 | Total number of frames since last measurement update. 59 | state : TrackState 60 | The current track state. 61 | features : List[ndarray] 62 | A cache of features. On each measurement update, the associated feature 63 | vector is added to this list. 64 | 65 | """ 66 | 67 | def __init__(self, mean , covariance, track_id, n_init, max_age, 68 | feature=None, detection = None): 69 | self.mean = mean 70 | self.covariance = covariance 71 | self.track_id = track_id 72 | self.hits = 1 73 | self.age = 1 74 | self.time_since_update = 0 75 | 76 | self.state = TrackState.Tentative 77 | self.features = [] 78 | if feature is not None: 79 | self.features.append(feature) 80 | self.last_seen_detection = detection 81 | 82 | self._n_init = n_init 83 | self._max_age = max_age 84 | 85 | 86 | 87 | def to_tlwh(self): 88 | """Get current position in bounding box format `(top left x, top left y, 89 | width, height)`. 90 | 91 | Returns 92 | ------- 93 | ndarray 94 | The bounding box. 95 | 96 | """ 97 | ret = self.mean[:4].copy() 98 | ret[2] *= ret[3] 99 | ret[:2] -= ret[2:] / 2 100 | return ret 101 | 102 | def to_tlbr(self): 103 | """Get current position in bounding box format `(min x, miny, max x, 104 | max y)`. 105 | 106 | Returns 107 | ------- 108 | ndarray 109 | The bounding box. 110 | 111 | """ 112 | ret = self.to_tlwh() 113 | ret[2:] = ret[:2] + ret[2:] 114 | return ret 115 | 116 | def predict(self,kf): 117 | """Propagate the state distribution to the current time step using a 118 | Kalman filter prediction step. 119 | 120 | Parameters 121 | ---------- 122 | kf : kalman_filter.KalmanFilter 123 | The Kalman filter. 124 | 125 | """ 126 | self.mean, self.covariance = kf.predict(self.mean, self.covariance) 127 | self.age += 1 128 | self.time_since_update += 1 129 | 130 | def update(self, kf, detection): 131 | """Perform Kalman filter measurement update step and update the feature 132 | cache. 133 | 134 | Parameters 135 | ---------- 136 | kf : kalman_filter.KalmanFilter 137 | The Kalman filter. 138 | detection : Detection 139 | The associated detection. 140 | 141 | """ 142 | self.mean, self.covariance = kf.update( 143 | self.mean, self.covariance, detection.to_xyah()) 144 | self.features.append(detection.feature) 145 | self.last_seen_detection = detection 146 | self.hits += 1 147 | self.time_since_update = 0 148 | if self.state == TrackState.Tentative and self.hits >= self._n_init: 149 | self.state = TrackState.Confirmed 150 | 151 | def mark_missed(self): 152 | """Mark this track as missed (no association at the current time step). 153 | """ 154 | if self.state == TrackState.Tentative: 155 | self.state = TrackState.Deleted 156 | elif self.time_since_update > self._max_age: 157 | self.state = TrackState.Deleted 158 | 159 | def is_tentative(self): 160 | """Returns True if this track is tentative (unconfirmed). 161 | """ 162 | return self.state == TrackState.Tentative 163 | 164 | def is_confirmed(self): 165 | """Returns True if this track is confirmed.""" 166 | return self.state == TrackState.Confirmed 167 | 168 | def is_deleted(self): 169 | """Returns True if this track is dead and should be deleted.""" 170 | return self.state == TrackState.Deleted 171 | -------------------------------------------------------------------------------- /src/deep_sort/track.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ortegatron/liveposetracker/cbd74e6ec90e099e8368b80d537b452caed64203/src/deep_sort/track.pyc -------------------------------------------------------------------------------- /src/deep_sort/tracker.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | from __future__ import absolute_import 3 | import numpy as np 4 | from . import kalman_filter 5 | from . import my_filter 6 | from . import linear_assignment 7 | from . import iou_matching 8 | from pykalman import KalmanFilter 9 | from .track import Track 10 | 11 | 12 | class Tracker: 13 | """ 14 | This is the multi-target tracker. 15 | 16 | Parameters 17 | ---------- 18 | metric : nn_matching.NearestNeighborDistanceMetric 19 | A distance metric for measurement-to-track association. 20 | max_age : int 21 | Maximum number of missed misses before a track is deleted. 22 | n_init : int 23 | Number of consecutive detections before the track is confirmed. The 24 | track state is set to `Deleted` if a miss occurs within the first 25 | `n_init` frames. 26 | 27 | Attributes 28 | ---------- 29 | metric : nn_matching.NearestNeighborDistanceMetric 30 | The distance metric used for measurement to track association. 31 | max_age : int 32 | Maximum number of missed misses before a track is deleted. 33 | n_init : int 34 | Number of frames that a track remains in initialization phase. 35 | kf : kalman_filter.KalmanFilter 36 | A Kalman filter to filter target trajectories in image space. 37 | tracks : List[Track] 38 | The list of active tracks at the current time step. 39 | 40 | """ 41 | 42 | def __init__(self, metric, max_iou_distance=0.7, max_age=30, n_init=3): 43 | self.metric = metric 44 | self.max_iou_distance = max_iou_distance 45 | self.max_age = max_age 46 | self.n_init = n_init 47 | 48 | transition_covariance = np.eye(8, 8) * 10 49 | observation_covariance = np.eye(4, 4) * 500 50 | self.kf = kalman_filter.KalmanFilter() 51 | self.mykf = my_filter.MyKalmanFilter( 52 | transition_covariance = transition_covariance, 53 | observation_covariance = observation_covariance) 54 | self.trackerinuse = self.mykf 55 | self.tracks = [] 56 | self._next_id = 1 57 | 58 | def predict(self): 59 | """Propagate track state distributions one time step forward. 60 | 61 | This function should be called once every time step, before `update`. 62 | """ 63 | # pass 64 | for track in self.tracks: 65 | track.predict(self.trackerinuse) 66 | 67 | def update(self, frame, detections): 68 | """Perform measurement update and track management. 69 | 70 | Parameters 71 | ---------- 72 | detections : List[deep_sort.detection.Detection] 73 | A list of detections at the current time step. 74 | 75 | """ 76 | # Run matching cascade. 77 | matches, unmatched_tracks, unmatched_detections = \ 78 | self._match(frame, detections) 79 | 80 | # Update track set. 81 | for track_idx, detection_idx in matches: 82 | self.tracks[track_idx].update( 83 | self.trackerinuse, detections[detection_idx]) 84 | for track_idx in unmatched_tracks: 85 | self.tracks[track_idx].mark_missed() 86 | for detection_idx in unmatched_detections: 87 | self._initiate_track(detections[detection_idx]) 88 | self.tracks = [t for t in self.tracks if not t.is_deleted()] 89 | 90 | # Update distance metric. 91 | active_targets = [t.track_id for t in self.tracks if t.is_confirmed()] 92 | features, targets = [], [] 93 | for track in self.tracks: 94 | if not track.is_confirmed(): 95 | continue 96 | features += track.features 97 | targets += [track.track_id for _ in track.features] 98 | track.features = [] 99 | self.metric.partial_fit( 100 | np.asarray(features), np.asarray(targets), active_targets) 101 | 102 | def _match(self, frame, detections): 103 | 104 | def gated_metric(tracks, dets, track_indices, detection_indices): 105 | features = np.array([dets[i].feature for i in detection_indices]) 106 | targets = np.array([tracks[i].track_id for i in track_indices]) 107 | cost_matrix = self.metric.distance(features, targets) 108 | cost_matrix = linear_assignment.gate_cost_matrix( 109 | frame, self.trackerinuse, cost_matrix, tracks, dets, track_indices, 110 | detection_indices, only_position = True) 111 | 112 | return cost_matrix 113 | 114 | # Split track set into confirmed and unconfirmed tracks. 115 | confirmed_tracks = [ 116 | i for i, t in enumerate(self.tracks) if t.is_confirmed()] 117 | unconfirmed_tracks = [ 118 | i for i, t in enumerate(self.tracks) if not t.is_confirmed()] 119 | 120 | # Associate confirmed tracks using appearance features. 121 | matches_a, unmatched_tracks_a, unmatched_detections = \ 122 | linear_assignment.matching_cascade( 123 | gated_metric, self.metric.matching_threshold, self.max_age, 124 | self.tracks, detections, confirmed_tracks) 125 | 126 | # Associate remaining tracks together with unconfirmed tracks using IOU. 127 | iou_track_candidates = unconfirmed_tracks + [ 128 | k for k in unmatched_tracks_a if 129 | self.tracks[k].time_since_update == 1] 130 | unmatched_tracks_a = [ 131 | k for k in unmatched_tracks_a if 132 | self.tracks[k].time_since_update != 1] 133 | matches_b, unmatched_tracks_b, unmatched_detections = \ 134 | linear_assignment.min_cost_matching( 135 | iou_matching.iou_cost, self.max_iou_distance, self.tracks, 136 | detections, iou_track_candidates, unmatched_detections) 137 | matches = matches_a + matches_b 138 | unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b)) 139 | return matches, unmatched_tracks, unmatched_detections 140 | 141 | def _initiate_track(self, detection): 142 | mean, covariance = self.trackerinuse.initiate(detection.to_xyah()) 143 | self.tracks.append(Track( 144 | mean, covariance, self._next_id, self.n_init, self.max_age, 145 | detection.feature, detection)) 146 | self._next_id += 1 147 | -------------------------------------------------------------------------------- /src/deep_sort/tracker.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ortegatron/liveposetracker/cbd74e6ec90e099e8368b80d537b452caed64203/src/deep_sort/tracker.pyc -------------------------------------------------------------------------------- /src/model_data/mars-small128.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ortegatron/liveposetracker/cbd74e6ec90e099e8368b80d537b452caed64203/src/model_data/mars-small128.pb -------------------------------------------------------------------------------- /src/tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ortegatron/liveposetracker/cbd74e6ec90e099e8368b80d537b452caed64203/src/tools/__init__.py -------------------------------------------------------------------------------- /src/tools/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ortegatron/liveposetracker/cbd74e6ec90e099e8368b80d537b452caed64203/src/tools/__init__.pyc -------------------------------------------------------------------------------- /src/tools/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ortegatron/liveposetracker/cbd74e6ec90e099e8368b80d537b452caed64203/src/tools/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /src/tools/__pycache__/generate_detections.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ortegatron/liveposetracker/cbd74e6ec90e099e8368b80d537b452caed64203/src/tools/__pycache__/generate_detections.cpython-36.pyc -------------------------------------------------------------------------------- /src/tools/freeze_model.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import argparse 3 | import tensorflow as tf 4 | import tensorflow.contrib.slim as slim 5 | 6 | 7 | def _batch_norm_fn(x, scope=None): 8 | if scope is None: 9 | scope = tf.get_variable_scope().name + "/bn" 10 | return slim.batch_norm(x, scope=scope) 11 | 12 | 13 | def create_link( 14 | incoming, network_builder, scope, nonlinearity=tf.nn.elu, 15 | weights_initializer=tf.truncated_normal_initializer(stddev=1e-3), 16 | regularizer=None, is_first=False, summarize_activations=True): 17 | if is_first: 18 | network = incoming 19 | else: 20 | network = _batch_norm_fn(incoming, scope=scope + "/bn") 21 | network = nonlinearity(network) 22 | if summarize_activations: 23 | tf.summary.histogram(scope+"/activations", network) 24 | 25 | pre_block_network = network 26 | post_block_network = network_builder(pre_block_network, scope) 27 | 28 | incoming_dim = pre_block_network.get_shape().as_list()[-1] 29 | outgoing_dim = post_block_network.get_shape().as_list()[-1] 30 | if incoming_dim != outgoing_dim: 31 | assert outgoing_dim == 2 * incoming_dim, \ 32 | "%d != %d" % (outgoing_dim, 2 * incoming) 33 | projection = slim.conv2d( 34 | incoming, outgoing_dim, 1, 2, padding="SAME", activation_fn=None, 35 | scope=scope+"/projection", weights_initializer=weights_initializer, 36 | biases_initializer=None, weights_regularizer=regularizer) 37 | network = projection + post_block_network 38 | else: 39 | network = incoming + post_block_network 40 | return network 41 | 42 | 43 | def create_inner_block( 44 | incoming, scope, nonlinearity=tf.nn.elu, 45 | weights_initializer=tf.truncated_normal_initializer(1e-3), 46 | bias_initializer=tf.zeros_initializer(), regularizer=None, 47 | increase_dim=False, summarize_activations=True): 48 | n = incoming.get_shape().as_list()[-1] 49 | stride = 1 50 | if increase_dim: 51 | n *= 2 52 | stride = 2 53 | 54 | incoming = slim.conv2d( 55 | incoming, n, [3, 3], stride, activation_fn=nonlinearity, padding="SAME", 56 | normalizer_fn=_batch_norm_fn, weights_initializer=weights_initializer, 57 | biases_initializer=bias_initializer, weights_regularizer=regularizer, 58 | scope=scope + "/1") 59 | if summarize_activations: 60 | tf.summary.histogram(incoming.name + "/activations", incoming) 61 | 62 | incoming = slim.dropout(incoming, keep_prob=0.6) 63 | 64 | incoming = slim.conv2d( 65 | incoming, n, [3, 3], 1, activation_fn=None, padding="SAME", 66 | normalizer_fn=None, weights_initializer=weights_initializer, 67 | biases_initializer=bias_initializer, weights_regularizer=regularizer, 68 | scope=scope + "/2") 69 | return incoming 70 | 71 | 72 | def residual_block(incoming, scope, nonlinearity=tf.nn.elu, 73 | weights_initializer=tf.truncated_normal_initializer(1e3), 74 | bias_initializer=tf.zeros_initializer(), regularizer=None, 75 | increase_dim=False, is_first=False, 76 | summarize_activations=True): 77 | 78 | def network_builder(x, s): 79 | return create_inner_block( 80 | x, s, nonlinearity, weights_initializer, bias_initializer, 81 | regularizer, increase_dim, summarize_activations) 82 | 83 | return create_link( 84 | incoming, network_builder, scope, nonlinearity, weights_initializer, 85 | regularizer, is_first, summarize_activations) 86 | 87 | 88 | def _create_network(incoming, reuse=None, weight_decay=1e-8): 89 | nonlinearity = tf.nn.elu 90 | conv_weight_init = tf.truncated_normal_initializer(stddev=1e-3) 91 | conv_bias_init = tf.zeros_initializer() 92 | conv_regularizer = slim.l2_regularizer(weight_decay) 93 | fc_weight_init = tf.truncated_normal_initializer(stddev=1e-3) 94 | fc_bias_init = tf.zeros_initializer() 95 | fc_regularizer = slim.l2_regularizer(weight_decay) 96 | 97 | def batch_norm_fn(x): 98 | return slim.batch_norm(x, scope=tf.get_variable_scope().name + "/bn") 99 | 100 | network = incoming 101 | network = slim.conv2d( 102 | network, 32, [3, 3], stride=1, activation_fn=nonlinearity, 103 | padding="SAME", normalizer_fn=batch_norm_fn, scope="conv1_1", 104 | weights_initializer=conv_weight_init, biases_initializer=conv_bias_init, 105 | weights_regularizer=conv_regularizer) 106 | network = slim.conv2d( 107 | network, 32, [3, 3], stride=1, activation_fn=nonlinearity, 108 | padding="SAME", normalizer_fn=batch_norm_fn, scope="conv1_2", 109 | weights_initializer=conv_weight_init, biases_initializer=conv_bias_init, 110 | weights_regularizer=conv_regularizer) 111 | 112 | # NOTE(nwojke): This is missing a padding="SAME" to match the CNN 113 | # architecture in Table 1 of the paper. Information on how this affects 114 | # performance on MOT 16 training sequences can be found in 115 | # issue 10 https://github.com/nwojke/deep_sort/issues/10 116 | network = slim.max_pool2d(network, [3, 3], [2, 2], scope="pool1") 117 | 118 | network = residual_block( 119 | network, "conv2_1", nonlinearity, conv_weight_init, conv_bias_init, 120 | conv_regularizer, increase_dim=False, is_first=True) 121 | network = residual_block( 122 | network, "conv2_3", nonlinearity, conv_weight_init, conv_bias_init, 123 | conv_regularizer, increase_dim=False) 124 | 125 | network = residual_block( 126 | network, "conv3_1", nonlinearity, conv_weight_init, conv_bias_init, 127 | conv_regularizer, increase_dim=True) 128 | network = residual_block( 129 | network, "conv3_3", nonlinearity, conv_weight_init, conv_bias_init, 130 | conv_regularizer, increase_dim=False) 131 | 132 | network = residual_block( 133 | network, "conv4_1", nonlinearity, conv_weight_init, conv_bias_init, 134 | conv_regularizer, increase_dim=True) 135 | network = residual_block( 136 | network, "conv4_3", nonlinearity, conv_weight_init, conv_bias_init, 137 | conv_regularizer, increase_dim=False) 138 | 139 | feature_dim = network.get_shape().as_list()[-1] 140 | network = slim.flatten(network) 141 | 142 | network = slim.dropout(network, keep_prob=0.6) 143 | network = slim.fully_connected( 144 | network, feature_dim, activation_fn=nonlinearity, 145 | normalizer_fn=batch_norm_fn, weights_regularizer=fc_regularizer, 146 | scope="fc1", weights_initializer=fc_weight_init, 147 | biases_initializer=fc_bias_init) 148 | 149 | features = network 150 | 151 | # Features in rows, normalize axis 1. 152 | features = slim.batch_norm(features, scope="ball", reuse=reuse) 153 | feature_norm = tf.sqrt( 154 | tf.constant(1e-8, tf.float32) + 155 | tf.reduce_sum(tf.square(features), [1], keepdims=True)) 156 | features = features / feature_norm 157 | return features, None 158 | 159 | 160 | def _network_factory(weight_decay=1e-8): 161 | 162 | def factory_fn(image, reuse): 163 | with slim.arg_scope([slim.batch_norm, slim.dropout], 164 | is_training=False): 165 | with slim.arg_scope([slim.conv2d, slim.fully_connected, 166 | slim.batch_norm, slim.layer_norm], 167 | reuse=reuse): 168 | features, logits = _create_network( 169 | image, reuse=reuse, weight_decay=weight_decay) 170 | return features, logits 171 | 172 | return factory_fn 173 | 174 | 175 | def _preprocess(image): 176 | image = image[:, :, ::-1] # BGR to RGB 177 | return image 178 | 179 | 180 | def parse_args(): 181 | """Parse command line arguments. 182 | """ 183 | parser = argparse.ArgumentParser(description="Freeze old model") 184 | parser.add_argument( 185 | "--checkpoint_in", 186 | default="resources/networks/mars-small128.ckpt-68577", 187 | help="Path to checkpoint file") 188 | parser.add_argument( 189 | "--graphdef_out", 190 | default="resources/networks/mars-small128.pb") 191 | return parser.parse_args() 192 | 193 | 194 | def main(): 195 | args = parse_args() 196 | 197 | with tf.Session(graph=tf.Graph()) as session: 198 | input_var = tf.placeholder( 199 | tf.uint8, (None, 128, 64, 3), name="images") 200 | image_var = tf.map_fn( 201 | lambda x: _preprocess(x), tf.cast(input_var, tf.float32), 202 | back_prop=False) 203 | 204 | factory_fn = _network_factory() 205 | features, _ = factory_fn(image_var, reuse=None) 206 | features = tf.identity(features, name="features") 207 | 208 | saver = tf.train.Saver(slim.get_variables_to_restore()) 209 | saver.restore(session, args.checkpoint_in) 210 | 211 | output_graph_def = tf.graph_util.convert_variables_to_constants( 212 | session, tf.get_default_graph().as_graph_def(), 213 | [features.name.split(":")[0]]) 214 | with tf.gfile.GFile(args.graphdef_out, "wb") as file_handle: 215 | file_handle.write(output_graph_def.SerializeToString()) 216 | 217 | 218 | if __name__ == "__main__": 219 | main() 220 | -------------------------------------------------------------------------------- /src/tools/generate_detections.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import os 3 | import errno 4 | import argparse 5 | import numpy as np 6 | import cv2 7 | import tensorflow as tf 8 | 9 | 10 | def _run_in_batches(f, data_dict, out, batch_size): 11 | data_len = len(out) 12 | num_batches = int(data_len / batch_size) 13 | 14 | s, e = 0, 0 15 | for i in range(num_batches): 16 | s, e = i * batch_size, (i + 1) * batch_size 17 | batch_data_dict = {k: v[s:e] for k, v in data_dict.items()} 18 | out[s:e] = f(batch_data_dict) 19 | if e < len(out): 20 | batch_data_dict = {k: v[e:] for k, v in data_dict.items()} 21 | out[e:] = f(batch_data_dict) 22 | 23 | 24 | def extract_image_patch(image, bbox, patch_shape): 25 | """Extract image patch from bounding box. 26 | 27 | Parameters 28 | ---------- 29 | image : ndarray 30 | The full image. 31 | bbox : array_like 32 | The bounding box in format (x, y, width, height). 33 | patch_shape : Optional[array_like] 34 | This parameter can be used to enforce a desired patch shape 35 | (height, width). First, the `bbox` is adapted to the aspect ratio 36 | of the patch shape, then it is clipped at the image boundaries. 37 | If None, the shape is computed from :arg:`bbox`. 38 | 39 | Returns 40 | ------- 41 | ndarray | NoneType 42 | An image patch showing the :arg:`bbox`, optionally reshaped to 43 | :arg:`patch_shape`. 44 | Returns None if the bounding box is empty or fully outside of the image 45 | boundaries. 46 | 47 | """ 48 | bbox = np.array(bbox) 49 | if patch_shape is not None: 50 | # correct aspect ratio to patch shape 51 | target_aspect = float(patch_shape[1]) / patch_shape[0] 52 | new_width = target_aspect * bbox[3] 53 | bbox[0] -= (new_width - bbox[2]) / 2 54 | bbox[2] = new_width 55 | 56 | # convert to top left, bottom right 57 | bbox[2:] += bbox[:2] 58 | bbox = bbox.astype(np.int) 59 | 60 | # clip at image boundaries 61 | bbox[:2] = np.maximum(0, bbox[:2]) 62 | bbox[2:] = np.minimum(np.asarray(image.shape[:2][::-1]) - 1, bbox[2:]) 63 | if np.any(bbox[:2] >= bbox[2:]): 64 | return None 65 | sx, sy, ex, ey = bbox 66 | image = image[sy:ey, sx:ex] 67 | image = cv2.resize(image, tuple(patch_shape[::-1])) 68 | return image 69 | 70 | 71 | class ImageEncoder(object): 72 | 73 | def __init__(self, checkpoint_filename, input_name="images", 74 | output_name="features"): 75 | config = tf.ConfigProto( 76 | device_count = {'GPU': 0} 77 | ) 78 | self.session = tf.Session(config=config) 79 | with tf.gfile.GFile(checkpoint_filename, "rb") as file_handle: 80 | graph_def = tf.GraphDef() 81 | graph_def.ParseFromString(file_handle.read()) 82 | tf.import_graph_def(graph_def, name="net") 83 | self.input_var = tf.get_default_graph().get_tensor_by_name( 84 | "net/%s:0" % input_name) 85 | self.output_var = tf.get_default_graph().get_tensor_by_name( 86 | "net/%s:0" % output_name) 87 | 88 | assert len(self.output_var.get_shape()) == 2 89 | assert len(self.input_var.get_shape()) == 4 90 | self.feature_dim = self.output_var.get_shape().as_list()[-1] 91 | self.image_shape = self.input_var.get_shape().as_list()[1:] 92 | 93 | def __call__(self, data_x, batch_size=32): 94 | out = np.zeros((len(data_x), self.feature_dim), np.float32) 95 | _run_in_batches( 96 | lambda x: self.session.run(self.output_var, feed_dict=x), 97 | {self.input_var: data_x}, out, batch_size) 98 | return out 99 | 100 | 101 | def create_box_encoder(model_filename, input_name="images", 102 | output_name="features", batch_size=32): 103 | image_encoder = ImageEncoder(model_filename, input_name, output_name) 104 | image_shape = image_encoder.image_shape 105 | 106 | def encoder(image, boxes): 107 | image_patches = [] 108 | for box in boxes: 109 | patch = extract_image_patch(image, box, image_shape[:2]) 110 | if patch is None: 111 | print("WARNING: Failed to extract image patch: %s." % str(box)) 112 | patch = np.random.uniform( 113 | 0., 255., image_shape).astype(np.uint8) 114 | image_patches.append(patch) 115 | image_patches = np.asarray(image_patches) 116 | return image_encoder(image_patches, batch_size) 117 | 118 | return encoder 119 | 120 | 121 | def generate_detections(encoder, mot_dir, output_dir, detection_dir=None): 122 | """Generate detections with features. 123 | 124 | Parameters 125 | ---------- 126 | encoder : Callable[image, ndarray] -> ndarray 127 | The encoder function takes as input a BGR color image and a matrix of 128 | bounding boxes in format `(x, y, w, h)` and returns a matrix of 129 | corresponding feature vectors. 130 | mot_dir : str 131 | Path to the MOTChallenge directory (can be either train or test). 132 | output_dir 133 | Path to the output directory. Will be created if it does not exist. 134 | detection_dir 135 | Path to custom detections. The directory structure should be the default 136 | MOTChallenge structure: `[sequence]/det/det.txt`. If None, uses the 137 | standard MOTChallenge detections. 138 | 139 | """ 140 | if detection_dir is None: 141 | detection_dir = mot_dir 142 | try: 143 | os.makedirs(output_dir) 144 | except OSError as exception: 145 | if exception.errno == errno.EEXIST and os.path.isdir(output_dir): 146 | pass 147 | else: 148 | raise ValueError( 149 | "Failed to created output directory '%s'" % output_dir) 150 | 151 | for sequence in os.listdir(mot_dir): 152 | print("Processing %s" % sequence) 153 | sequence_dir = os.path.join(mot_dir, sequence) 154 | 155 | image_dir = os.path.join(sequence_dir, "img1") 156 | image_filenames = { 157 | int(os.path.splitext(f)[0]): os.path.join(image_dir, f) 158 | for f in os.listdir(image_dir)} 159 | 160 | detection_file = os.path.join( 161 | detection_dir, sequence, "det/det.txt") 162 | detections_in = np.loadtxt(detection_file, delimiter=',') 163 | detections_out = [] 164 | 165 | frame_indices = detections_in[:, 0].astype(np.int) 166 | min_frame_idx = frame_indices.astype(np.int).min() 167 | max_frame_idx = frame_indices.astype(np.int).max() 168 | for frame_idx in range(min_frame_idx, max_frame_idx + 1): 169 | print("Frame %05d/%05d" % (frame_idx, max_frame_idx)) 170 | mask = frame_indices == frame_idx 171 | rows = detections_in[mask] 172 | 173 | if frame_idx not in image_filenames: 174 | print("WARNING could not find image for frame %d" % frame_idx) 175 | continue 176 | bgr_image = cv2.imread( 177 | image_filenames[frame_idx], cv2.IMREAD_COLOR) 178 | features = encoder(bgr_image, rows[:, 2:6].copy()) 179 | detections_out += [np.r_[(row, feature)] for row, feature 180 | in zip(rows, features)] 181 | 182 | output_filename = os.path.join(output_dir, "%s.npy" % sequence) 183 | np.save( 184 | output_filename, np.asarray(detections_out), allow_pickle=False) 185 | 186 | 187 | def parse_args(): 188 | """Parse command line arguments. 189 | """ 190 | parser = argparse.ArgumentParser(description="Re-ID feature extractor") 191 | parser.add_argument( 192 | "--model", 193 | default="resources/networks/mars-small128.pb", 194 | help="Path to freezed inference graph protobuf.") 195 | parser.add_argument( 196 | "--mot_dir", help="Path to MOTChallenge directory (train or test)", 197 | required=True) 198 | parser.add_argument( 199 | "--detection_dir", help="Path to custom detections. Defaults to " 200 | "standard MOT detections Directory structure should be the default " 201 | "MOTChallenge structure: [sequence]/det/det.txt", default=None) 202 | parser.add_argument( 203 | "--output_dir", help="Output directory. Will be created if it does not" 204 | " exist.", default="detections") 205 | return parser.parse_args() 206 | 207 | 208 | def main(): 209 | args = parse_args() 210 | encoder = create_box_encoder(args.model, batch_size=32) 211 | generate_detections(encoder, args.mot_dir, args.output_dir, 212 | args.detection_dir) 213 | 214 | 215 | if __name__ == "__main__": 216 | main() 217 | -------------------------------------------------------------------------------- /src/tools/generate_detections.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ortegatron/liveposetracker/cbd74e6ec90e099e8368b80d537b452caed64203/src/tools/generate_detections.pyc -------------------------------------------------------------------------------- /src/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numba import jit 3 | import itertools 4 | 5 | def pairwise(iterable): 6 | "s -> (s0,s1), (s1,s2), (s2, s3), ..." 7 | a, b = itertools.tee(iterable) 8 | next(b, None) 9 | return izip(a, b) 10 | 11 | def poses2boxes(poses): 12 | global seen_bodyparts 13 | """ 14 | Parameters 15 | ---------- 16 | poses: ndarray of human 2D poses [People * BodyPart] 17 | Returns 18 | ---------- 19 | boxes: ndarray of containing boxes [People * [x1,y1,x2,y2]] 20 | """ 21 | boxes = [] 22 | for person in poses: 23 | seen_bodyparts = person[np.where((person[:,0] != 0) | (person[:,1] != 0))] 24 | # box = [ int(min(seen_bodyparts[:,0])),int(min(seen_bodyparts[:,1])), 25 | # int(max(seen_bodyparts[:,0])),int(max(seen_bodyparts[:,1]))] 26 | mean = np.mean(seen_bodyparts, axis=0) 27 | deviation = np.std(seen_bodyparts, axis = 0) 28 | box = [int(mean[0]-deviation[0]), int(mean[1]-deviation[1]), int(mean[0]+deviation[0]), int(mean[1]+deviation[1])] 29 | boxes.append(box) 30 | return np.array(boxes) 31 | 32 | def distancia_midpoints(mid1, mid2): 33 | return np.linalg.norm(np.array(mid1)-np.array(mid2)) 34 | 35 | def pose2midpoint(pose): 36 | """ 37 | Parameters 38 | ---------- 39 | poses: ndarray of human 2D pose [BodyPart] 40 | Returns 41 | ---------- 42 | boxes: pose midpint [x,y] 43 | """ 44 | box = poses2boxes([pose])[0] 45 | midpoint = [np.mean([box[0],box[2]]), np.mean([box[1],box[3]])] 46 | return np.array(midpoint) 47 | 48 | @jit 49 | def iou(bb_test,bb_gt): 50 | """ 51 | Computes IUO between two bboxes in the form [x1,y1,x2,y2] 52 | """ 53 | xx1 = np.maximum(bb_test[0], bb_gt[0]) 54 | yy1 = np.maximum(bb_test[1], bb_gt[1]) 55 | xx2 = np.minimum(bb_test[2], bb_gt[2]) 56 | yy2 = np.minimum(bb_test[3], bb_gt[3]) 57 | w = np.maximum(0., xx2 - xx1) 58 | h = np.maximum(0., yy2 - yy1) 59 | wh = w * h 60 | o = wh / ((bb_test[2]-bb_test[0])*(bb_test[3]-bb_test[1]) 61 | + (bb_gt[2]-bb_gt[0])*(bb_gt[3]-bb_gt[1]) - wh) 62 | return(o) 63 | -------------------------------------------------------------------------------- /src/utils.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ortegatron/liveposetracker/cbd74e6ec90e099e8368b80d537b452caed64203/src/utils.pyc --------------------------------------------------------------------------------