├── .gitignore ├── README.md ├── objectTracker ├── __init__.py ├── data │ └── mars-small128.pb ├── deep_sort │ ├── detection.py │ ├── generate_detections.py │ ├── iou_matching.py │ ├── kalman_filter.py │ ├── linear_assignment.py │ ├── nn_matching.py │ ├── preprocessing.py │ ├── track.py │ └── tracker.py └── tracking.py ├── setup.py └── test_video_tracking.gif /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Custom Object Tracking 2 | ## Introduction 3 | This repo provides function call to track multi-objects in videos with a given trained object detection model and a source video file as inputs. The tracking approach used in the repo is [DeepSort](https://github.com/nwojke/deep_sort) - [Simple Online and Realtime Tracking with a Deep Association Metric](https://arxiv.org/pdf/1703.07402.pdf) 4 | 5 | ## Installation 6 | Please install [Tensorflow 2 Object Detection API](https://tensorflow-object-detection-api-tutorial.readthedocs.io/en/latest/install.html) and add the path to your environment. 7 | 8 | ## Usage 9 | ### 1. Clone the github repository 10 | ```shell 11 | git clone https://github.com/sek788432/Custom_Object_Tracking.git 12 | ``` 13 | 14 | ### 2. Execution (In two ways) 15 | * saved_model_path: your own object detection model ckpt path 16 | * test_path: test video path 17 | * label_map_path: label_map.pbtxt path 18 | * deep_sort_model: deep sort model path 19 | * output_path: output video path 20 | * min_score_thresh: the minimum score threshold of object detection model 21 | 22 | 1. Run tracking.py 23 | * e.g. 24 | ```shell 25 | cd Custom_Object_Tracking/objectTracker/ 26 | python tracking.py \ 27 | --saved_model_path=exported-models/ssd_resnet50_119ckpt \ 28 | --test_path=test_video.mp4 \ 29 | --label_map_path=label_map.pbtxt \ 30 | --deep_sort_model=data/mars-small128.pb \ 31 | --output_path=test_video_tracking.mp4 \ 32 | --min_score_thresh=.5 33 | ``` 34 | 2. Call TrackVideo function 35 | * e.g. 36 | ```python 37 | from Custom_Object_Tracking.objectTracker.tracking import TrackVideo 38 | TrackVideo(label_path, model_path, video_path, 39 | output_path, threshold, deep_sort_model) 40 | ``` 41 | 42 | ## Result 43 | ### Tracking vehicle by our own model (SSD ResNet50 trained on Waymo Dataset) 44 | ![Vehicle Tracking](test_video_tracking.gif?raw=true "video") 45 | -------------------------------------------------------------------------------- /objectTracker/__init__.py: -------------------------------------------------------------------------------- 1 | # from .data import * 2 | # from .deep_sort import * 3 | # from .tracking import * 4 | -------------------------------------------------------------------------------- /objectTracker/data/mars-small128.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sek788432/Custom_Object_Tracking/976f794d1fe3217a074e967bd1e1e9d8bdc8339a/objectTracker/data/mars-small128.pb -------------------------------------------------------------------------------- /objectTracker/deep_sort/detection.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import numpy as np 3 | 4 | 5 | class Detection(object): 6 | """ 7 | This class represents a bounding box detection in a single image. 8 | 9 | Parameters 10 | ---------- 11 | tlwh : array_like 12 | Bounding box in format `(x, y, w, h)`. 13 | confidence : float 14 | Detector confidence score. 15 | feature : array_like 16 | A feature vector that describes the object contained in this image. 17 | 18 | Attributes 19 | ---------- 20 | tlwh : ndarray 21 | Bounding box in format `(top left x, top left y, width, height)`. 22 | confidence : ndarray 23 | Detector confidence score. 24 | class_name : ndarray 25 | Detector class. 26 | feature : ndarray | NoneType 27 | A feature vector that describes the object contained in this image. 28 | 29 | """ 30 | 31 | def __init__(self, tlwh, confidence, class_name, feature): 32 | self.tlwh = np.asarray(tlwh, dtype=np.float) 33 | self.confidence = float(confidence) 34 | self.class_name = class_name 35 | self.feature = np.asarray(feature, dtype=np.float32) 36 | 37 | def get_class(self): 38 | return self.class_name 39 | 40 | def to_tlbr(self): 41 | """Convert bounding box to format `(min x, min y, max x, max y)`, i.e., 42 | `(top left, bottom right)`. 43 | """ 44 | ret = self.tlwh.copy() 45 | ret[2:] += ret[:2] 46 | return ret 47 | 48 | def to_xyah(self): 49 | """Convert bounding box to format `(center x, center y, aspect ratio, 50 | height)`, where the aspect ratio is `width / height`. 51 | """ 52 | ret = self.tlwh.copy() 53 | ret[:2] += ret[2:] / 2 54 | ret[2] /= ret[3] 55 | return ret 56 | -------------------------------------------------------------------------------- /objectTracker/deep_sort/generate_detections.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import os 3 | import errno 4 | import argparse 5 | import numpy as np 6 | import cv2 7 | import tensorflow.compat.v1 as tf 8 | 9 | physical_devices = tf.config.experimental.list_physical_devices('GPU') 10 | if len(physical_devices) > 0: 11 | tf.config.experimental.set_memory_growth(physical_devices[0], True) 12 | 13 | def _run_in_batches(f, data_dict, out, batch_size): 14 | data_len = len(out) 15 | num_batches = int(data_len / batch_size) 16 | 17 | s, e = 0, 0 18 | for i in range(num_batches): 19 | s, e = i * batch_size, (i + 1) * batch_size 20 | batch_data_dict = {k: v[s:e] for k, v in data_dict.items()} 21 | out[s:e] = f(batch_data_dict) 22 | if e < len(out): 23 | batch_data_dict = {k: v[e:] for k, v in data_dict.items()} 24 | out[e:] = f(batch_data_dict) 25 | 26 | 27 | def extract_image_patch(image, bbox, patch_shape): 28 | """Extract image patch from bounding box. 29 | 30 | Parameters 31 | ---------- 32 | image : ndarray 33 | The full image. 34 | bbox : array_like 35 | The bounding box in format (x, y, width, height). 36 | patch_shape : Optional[array_like] 37 | This parameter can be used to enforce a desired patch shape 38 | (height, width). First, the `bbox` is adapted to the aspect ratio 39 | of the patch shape, then it is clipped at the image boundaries. 40 | If None, the shape is computed from :arg:`bbox`. 41 | 42 | Returns 43 | ------- 44 | ndarray | NoneType 45 | An image patch showing the :arg:`bbox`, optionally reshaped to 46 | :arg:`patch_shape`. 47 | Returns None if the bounding box is empty or fully outside of the image 48 | boundaries. 49 | 50 | """ 51 | bbox = np.array(bbox) 52 | if patch_shape is not None: 53 | # correct aspect ratio to patch shape 54 | target_aspect = float(patch_shape[1]) / patch_shape[0] 55 | new_width = target_aspect * bbox[3] 56 | bbox[0] -= (new_width - bbox[2]) / 2 57 | bbox[2] = new_width 58 | 59 | # convert to top left, bottom right 60 | bbox[2:] += bbox[:2] 61 | bbox = bbox.astype(np.int) 62 | 63 | # clip at image boundaries 64 | bbox[:2] = np.maximum(0, bbox[:2]) 65 | bbox[2:] = np.minimum(np.asarray(image.shape[:2][::-1]) - 1, bbox[2:]) 66 | if np.any(bbox[:2] >= bbox[2:]): 67 | return None 68 | sx, sy, ex, ey = bbox 69 | image = image[sy:ey, sx:ex] 70 | image = cv2.resize(image, tuple(patch_shape[::-1])) 71 | return image 72 | 73 | 74 | class ImageEncoder(object): 75 | 76 | def __init__(self, checkpoint_filename, input_name="images", output_name="features"): 77 | self.session = tf.Session() 78 | with tf.gfile.GFile(checkpoint_filename, "rb") as file_handle: 79 | graph_def = tf.GraphDef() 80 | graph_def.ParseFromString(file_handle.read()) 81 | tf.import_graph_def(graph_def) 82 | try: 83 | self.input_var = tf.get_default_graph().get_tensor_by_name(input_name) 84 | self.output_var = tf.get_default_graph().get_tensor_by_name(output_name) 85 | except KeyError: 86 | layers = [i.name for i in tf.get_default_graph().get_operations()] 87 | self.input_var = tf.get_default_graph().get_tensor_by_name(layers[0]+':0') 88 | self.output_var = tf.get_default_graph().get_tensor_by_name(layers[-1]+':0') 89 | 90 | assert len(self.output_var.get_shape()) == 2 91 | assert len(self.input_var.get_shape()) == 4 92 | self.feature_dim = self.output_var.get_shape().as_list()[-1] 93 | self.image_shape = self.input_var.get_shape().as_list()[1:] 94 | 95 | def __call__(self, data_x, batch_size=32): 96 | out = np.zeros((len(data_x), self.feature_dim), np.float32) 97 | _run_in_batches( 98 | lambda x: self.session.run(self.output_var, feed_dict=x), 99 | {self.input_var: data_x}, out, batch_size) 100 | return out 101 | 102 | 103 | def create_box_encoder(model_filename, input_name="images:0", output_name="features:0", batch_size=32): 104 | image_encoder = ImageEncoder(model_filename, input_name, output_name) 105 | image_shape = image_encoder.image_shape 106 | 107 | def encoder(image, boxes): 108 | image_patches = [] 109 | for box in boxes: 110 | patch = extract_image_patch(image, box, image_shape[:2]) 111 | if patch is None: 112 | print("WARNING: Failed to extract image patch: %s." % str(box)) 113 | patch = np.random.uniform(0., 255., image_shape).astype(np.uint8) 114 | image_patches.append(patch) 115 | image_patches = np.asarray(image_patches) 116 | return image_encoder(image_patches, batch_size) 117 | 118 | return encoder 119 | 120 | 121 | def generate_detections(encoder, mot_dir, output_dir, detection_dir=None): 122 | """Generate detections with features. 123 | 124 | Parameters 125 | ---------- 126 | encoder : Callable[image, ndarray] -> ndarray 127 | The encoder function takes as input a BGR color image and a matrix of 128 | bounding boxes in format `(x, y, w, h)` and returns a matrix of 129 | corresponding feature vectors. 130 | mot_dir : str 131 | Path to the MOTChallenge directory (can be either train or test). 132 | output_dir 133 | Path to the output directory. Will be created if it does not exist. 134 | detection_dir 135 | Path to custom detections. The directory structure should be the default 136 | MOTChallenge structure: `[sequence]/det/det.txt`. If None, uses the 137 | standard MOTChallenge detections. 138 | 139 | """ 140 | if detection_dir is None: 141 | detection_dir = mot_dir 142 | try: 143 | os.makedirs(output_dir) 144 | except OSError as exception: 145 | if exception.errno == errno.EEXIST and os.path.isdir(output_dir): 146 | pass 147 | else: 148 | raise ValueError( 149 | "Failed to created output directory '%s'" % output_dir) 150 | 151 | for sequence in os.listdir(mot_dir): 152 | print("Processing %s" % sequence) 153 | sequence_dir = os.path.join(mot_dir, sequence) 154 | 155 | image_dir = os.path.join(sequence_dir, "img1") 156 | image_filenames = { 157 | int(os.path.splitext(f)[0]): os.path.join(image_dir, f) 158 | for f in os.listdir(image_dir)} 159 | 160 | detection_file = os.path.join( 161 | detection_dir, sequence, "det/det.txt") 162 | detections_in = np.loadtxt(detection_file, delimiter=',') 163 | detections_out = [] 164 | 165 | frame_indices = detections_in[:, 0].astype(np.int) 166 | min_frame_idx = frame_indices.astype(np.int).min() 167 | max_frame_idx = frame_indices.astype(np.int).max() 168 | for frame_idx in range(min_frame_idx, max_frame_idx + 1): 169 | print("Frame %05d/%05d" % (frame_idx, max_frame_idx)) 170 | mask = frame_indices == frame_idx 171 | rows = detections_in[mask] 172 | 173 | if frame_idx not in image_filenames: 174 | print("WARNING could not find image for frame %d" % frame_idx) 175 | continue 176 | bgr_image = cv2.imread( 177 | image_filenames[frame_idx], cv2.IMREAD_COLOR) 178 | features = encoder(bgr_image, rows[:, 2:6].copy()) 179 | detections_out += [np.r_[(row, feature)] for row, feature 180 | in zip(rows, features)] 181 | 182 | output_filename = os.path.join(output_dir, "%s.npy" % sequence) 183 | np.save( 184 | output_filename, np.asarray(detections_out), allow_pickle=False) 185 | 186 | 187 | def parse_args(): 188 | """Parse command line arguments. 189 | """ 190 | parser = argparse.ArgumentParser(description="Re-ID feature extractor") 191 | parser.add_argument( 192 | "--model", 193 | default="resources/networks/mars-small128.pb", 194 | help="Path to freezed inference graph protobuf.") 195 | parser.add_argument( 196 | "--mot_dir", help="Path to MOTChallenge directory (train or test)", 197 | required=True) 198 | parser.add_argument( 199 | "--detection_dir", help="Path to custom detections. Defaults to " 200 | "standard MOT detections Directory structure should be the default " 201 | "MOTChallenge structure: [sequence]/det/det.txt", default=None) 202 | parser.add_argument( 203 | "--output_dir", help="Output directory. Will be created if it does not" 204 | " exist.", default="detections") 205 | return parser.parse_args() 206 | 207 | 208 | def main(): 209 | args = parse_args() 210 | encoder = create_box_encoder(args.model, batch_size=32) 211 | generate_detections(encoder, args.mot_dir, args.output_dir, 212 | args.detection_dir) 213 | 214 | 215 | if __name__ == "__main__": 216 | main() 217 | -------------------------------------------------------------------------------- /objectTracker/deep_sort/iou_matching.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | from __future__ import absolute_import 3 | import numpy as np 4 | from . import linear_assignment 5 | 6 | 7 | def iou(bbox, candidates): 8 | """Computer intersection over union. 9 | 10 | Parameters 11 | ---------- 12 | bbox : ndarray 13 | A bounding box in format `(top left x, top left y, width, height)`. 14 | candidates : ndarray 15 | A matrix of candidate bounding boxes (one per row) in the same format 16 | as `bbox`. 17 | 18 | Returns 19 | ------- 20 | ndarray 21 | The intersection over union in [0, 1] between the `bbox` and each 22 | candidate. A higher score means a larger fraction of the `bbox` is 23 | occluded by the candidate. 24 | 25 | """ 26 | bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:] 27 | candidates_tl = candidates[:, :2] 28 | candidates_br = candidates[:, :2] + candidates[:, 2:] 29 | 30 | tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis], 31 | np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]] 32 | br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis], 33 | np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]] 34 | wh = np.maximum(0., br - tl) 35 | 36 | area_intersection = wh.prod(axis=1) 37 | area_bbox = bbox[2:].prod() 38 | area_candidates = candidates[:, 2:].prod(axis=1) 39 | return area_intersection / (area_bbox + area_candidates - area_intersection) 40 | 41 | 42 | def iou_cost(tracks, detections, track_indices=None, 43 | detection_indices=None): 44 | """An intersection over union distance metric. 45 | 46 | Parameters 47 | ---------- 48 | tracks : List[deep_sort.track.Track] 49 | A list of tracks. 50 | detections : List[deep_sort.detection.Detection] 51 | A list of detections. 52 | track_indices : Optional[List[int]] 53 | A list of indices to tracks that should be matched. Defaults to 54 | all `tracks`. 55 | detection_indices : Optional[List[int]] 56 | A list of indices to detections that should be matched. Defaults 57 | to all `detections`. 58 | 59 | Returns 60 | ------- 61 | ndarray 62 | Returns a cost matrix of shape 63 | len(track_indices), len(detection_indices) where entry (i, j) is 64 | `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`. 65 | 66 | """ 67 | if track_indices is None: 68 | track_indices = np.arange(len(tracks)) 69 | if detection_indices is None: 70 | detection_indices = np.arange(len(detections)) 71 | 72 | cost_matrix = np.zeros((len(track_indices), len(detection_indices))) 73 | for row, track_idx in enumerate(track_indices): 74 | if tracks[track_idx].time_since_update > 1: 75 | cost_matrix[row, :] = linear_assignment.INFTY_COST 76 | continue 77 | 78 | bbox = tracks[track_idx].to_tlwh() 79 | candidates = np.asarray([detections[i].tlwh for i in detection_indices]) 80 | cost_matrix[row, :] = 1. - iou(bbox, candidates) 81 | return cost_matrix 82 | -------------------------------------------------------------------------------- /objectTracker/deep_sort/kalman_filter.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import numpy as np 3 | import scipy.linalg 4 | 5 | 6 | """ 7 | Table for the 0.95 quantile of the chi-square distribution with N degrees of 8 | freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv 9 | function and used as Mahalanobis gating threshold. 10 | """ 11 | chi2inv95 = { 12 | 1: 3.8415, 13 | 2: 5.9915, 14 | 3: 7.8147, 15 | 4: 9.4877, 16 | 5: 11.070, 17 | 6: 12.592, 18 | 7: 14.067, 19 | 8: 15.507, 20 | 9: 16.919} 21 | 22 | 23 | class KalmanFilter(object): 24 | """ 25 | A simple Kalman filter for tracking bounding boxes in image space. 26 | 27 | The 8-dimensional state space 28 | 29 | x, y, a, h, vx, vy, va, vh 30 | 31 | contains the bounding box center position (x, y), aspect ratio a, height h, 32 | and their respective velocities. 33 | 34 | Object motion follows a constant velocity model. The bounding box location 35 | (x, y, a, h) is taken as direct observation of the state space (linear 36 | observation model). 37 | 38 | """ 39 | 40 | def __init__(self): 41 | ndim, dt = 4, 1. 42 | 43 | # Create Kalman filter model matrices. 44 | self._motion_mat = np.eye(2 * ndim, 2 * ndim) 45 | for i in range(ndim): 46 | self._motion_mat[i, ndim + i] = dt 47 | self._update_mat = np.eye(ndim, 2 * ndim) 48 | 49 | # Motion and observation uncertainty are chosen relative to the current 50 | # state estimate. These weights control the amount of uncertainty in 51 | # the model. This is a bit hacky. 52 | self._std_weight_position = 1. / 20 53 | self._std_weight_velocity = 1. / 160 54 | 55 | def initiate(self, measurement): 56 | """Create track from unassociated measurement. 57 | 58 | Parameters 59 | ---------- 60 | measurement : ndarray 61 | Bounding box coordinates (x, y, a, h) with center position (x, y), 62 | aspect ratio a, and height h. 63 | 64 | Returns 65 | ------- 66 | (ndarray, ndarray) 67 | Returns the mean vector (8 dimensional) and covariance matrix (8x8 68 | dimensional) of the new track. Unobserved velocities are initialized 69 | to 0 mean. 70 | 71 | """ 72 | mean_pos = measurement 73 | mean_vel = np.zeros_like(mean_pos) 74 | mean = np.r_[mean_pos, mean_vel] 75 | 76 | std = [ 77 | 2 * self._std_weight_position * measurement[3], 78 | 2 * self._std_weight_position * measurement[3], 79 | 1e-2, 80 | 2 * self._std_weight_position * measurement[3], 81 | 10 * self._std_weight_velocity * measurement[3], 82 | 10 * self._std_weight_velocity * measurement[3], 83 | 1e-5, 84 | 10 * self._std_weight_velocity * measurement[3]] 85 | covariance = np.diag(np.square(std)) 86 | return mean, covariance 87 | 88 | def predict(self, mean, covariance): 89 | """Run Kalman filter prediction step. 90 | 91 | Parameters 92 | ---------- 93 | mean : ndarray 94 | The 8 dimensional mean vector of the object state at the previous 95 | time step. 96 | covariance : ndarray 97 | The 8x8 dimensional covariance matrix of the object state at the 98 | previous time step. 99 | 100 | Returns 101 | ------- 102 | (ndarray, ndarray) 103 | Returns the mean vector and covariance matrix of the predicted 104 | state. Unobserved velocities are initialized to 0 mean. 105 | 106 | """ 107 | std_pos = [ 108 | self._std_weight_position * mean[3], 109 | self._std_weight_position * mean[3], 110 | 1e-2, 111 | self._std_weight_position * mean[3]] 112 | std_vel = [ 113 | self._std_weight_velocity * mean[3], 114 | self._std_weight_velocity * mean[3], 115 | 1e-5, 116 | self._std_weight_velocity * mean[3]] 117 | motion_cov = np.diag(np.square(np.r_[std_pos, std_vel])) 118 | 119 | mean = np.dot(self._motion_mat, mean) 120 | covariance = np.linalg.multi_dot(( 121 | self._motion_mat, covariance, self._motion_mat.T)) + motion_cov 122 | 123 | return mean, covariance 124 | 125 | def project(self, mean, covariance): 126 | """Project state distribution to measurement space. 127 | 128 | Parameters 129 | ---------- 130 | mean : ndarray 131 | The state's mean vector (8 dimensional array). 132 | covariance : ndarray 133 | The state's covariance matrix (8x8 dimensional). 134 | 135 | Returns 136 | ------- 137 | (ndarray, ndarray) 138 | Returns the projected mean and covariance matrix of the given state 139 | estimate. 140 | 141 | """ 142 | std = [ 143 | self._std_weight_position * mean[3], 144 | self._std_weight_position * mean[3], 145 | 1e-1, 146 | self._std_weight_position * mean[3]] 147 | innovation_cov = np.diag(np.square(std)) 148 | 149 | mean = np.dot(self._update_mat, mean) 150 | covariance = np.linalg.multi_dot(( 151 | self._update_mat, covariance, self._update_mat.T)) 152 | return mean, covariance + innovation_cov 153 | 154 | def update(self, mean, covariance, measurement): 155 | """Run Kalman filter correction step. 156 | 157 | Parameters 158 | ---------- 159 | mean : ndarray 160 | The predicted state's mean vector (8 dimensional). 161 | covariance : ndarray 162 | The state's covariance matrix (8x8 dimensional). 163 | measurement : ndarray 164 | The 4 dimensional measurement vector (x, y, a, h), where (x, y) 165 | is the center position, a the aspect ratio, and h the height of the 166 | bounding box. 167 | 168 | Returns 169 | ------- 170 | (ndarray, ndarray) 171 | Returns the measurement-corrected state distribution. 172 | 173 | """ 174 | projected_mean, projected_cov = self.project(mean, covariance) 175 | 176 | chol_factor, lower = scipy.linalg.cho_factor( 177 | projected_cov, lower=True, check_finite=False) 178 | kalman_gain = scipy.linalg.cho_solve( 179 | (chol_factor, lower), np.dot(covariance, self._update_mat.T).T, 180 | check_finite=False).T 181 | innovation = measurement - projected_mean 182 | 183 | new_mean = mean + np.dot(innovation, kalman_gain.T) 184 | new_covariance = covariance - np.linalg.multi_dot(( 185 | kalman_gain, projected_cov, kalman_gain.T)) 186 | return new_mean, new_covariance 187 | 188 | def gating_distance(self, mean, covariance, measurements, 189 | only_position=False): 190 | """Compute gating distance between state distribution and measurements. 191 | 192 | A suitable distance threshold can be obtained from `chi2inv95`. If 193 | `only_position` is False, the chi-square distribution has 4 degrees of 194 | freedom, otherwise 2. 195 | 196 | Parameters 197 | ---------- 198 | mean : ndarray 199 | Mean vector over the state distribution (8 dimensional). 200 | covariance : ndarray 201 | Covariance of the state distribution (8x8 dimensional). 202 | measurements : ndarray 203 | An Nx4 dimensional matrix of N measurements, each in 204 | format (x, y, a, h) where (x, y) is the bounding box center 205 | position, a the aspect ratio, and h the height. 206 | only_position : Optional[bool] 207 | If True, distance computation is done with respect to the bounding 208 | box center position only. 209 | 210 | Returns 211 | ------- 212 | ndarray 213 | Returns an array of length N, where the i-th element contains the 214 | squared Mahalanobis distance between (mean, covariance) and 215 | `measurements[i]`. 216 | 217 | """ 218 | mean, covariance = self.project(mean, covariance) 219 | if only_position: 220 | mean, covariance = mean[:2], covariance[:2, :2] 221 | measurements = measurements[:, :2] 222 | 223 | cholesky_factor = np.linalg.cholesky(covariance) 224 | d = measurements - mean 225 | z = scipy.linalg.solve_triangular( 226 | cholesky_factor, d.T, lower=True, check_finite=False, 227 | overwrite_b=True) 228 | squared_maha = np.sum(z * z, axis=0) 229 | return squared_maha 230 | -------------------------------------------------------------------------------- /objectTracker/deep_sort/linear_assignment.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | from __future__ import absolute_import 3 | import numpy as np 4 | from scipy.optimize import linear_sum_assignment 5 | from . import kalman_filter 6 | 7 | 8 | INFTY_COST = 1e+5 9 | 10 | 11 | def min_cost_matching( 12 | distance_metric, max_distance, tracks, detections, track_indices=None, 13 | detection_indices=None): 14 | """Solve linear assignment problem. 15 | 16 | Parameters 17 | ---------- 18 | distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray 19 | The distance metric is given a list of tracks and detections as well as 20 | a list of N track indices and M detection indices. The metric should 21 | return the NxM dimensional cost matrix, where element (i, j) is the 22 | association cost between the i-th track in the given track indices and 23 | the j-th detection in the given detection_indices. 24 | max_distance : float 25 | Gating threshold. Associations with cost larger than this value are 26 | disregarded. 27 | tracks : List[track.Track] 28 | A list of predicted tracks at the current time step. 29 | detections : List[detection.Detection] 30 | A list of detections at the current time step. 31 | track_indices : List[int] 32 | List of track indices that maps rows in `cost_matrix` to tracks in 33 | `tracks` (see description above). 34 | detection_indices : List[int] 35 | List of detection indices that maps columns in `cost_matrix` to 36 | detections in `detections` (see description above). 37 | 38 | Returns 39 | ------- 40 | (List[(int, int)], List[int], List[int]) 41 | Returns a tuple with the following three entries: 42 | * A list of matched track and detection indices. 43 | * A list of unmatched track indices. 44 | * A list of unmatched detection indices. 45 | 46 | """ 47 | if track_indices is None: 48 | track_indices = np.arange(len(tracks)) 49 | if detection_indices is None: 50 | detection_indices = np.arange(len(detections)) 51 | 52 | if len(detection_indices) == 0 or len(track_indices) == 0: 53 | return [], track_indices, detection_indices # Nothing to match. 54 | 55 | cost_matrix = distance_metric( 56 | tracks, detections, track_indices, detection_indices) 57 | cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5 58 | indices = linear_sum_assignment(cost_matrix) 59 | indices = np.asarray(indices) 60 | indices = np.transpose(indices) 61 | 62 | matches, unmatched_tracks, unmatched_detections = [], [], [] 63 | for col, detection_idx in enumerate(detection_indices): 64 | if col not in indices[:, 1]: 65 | unmatched_detections.append(detection_idx) 66 | for row, track_idx in enumerate(track_indices): 67 | if row not in indices[:, 0]: 68 | unmatched_tracks.append(track_idx) 69 | for row, col in indices: 70 | track_idx = track_indices[row] 71 | detection_idx = detection_indices[col] 72 | if cost_matrix[row, col] > max_distance: 73 | unmatched_tracks.append(track_idx) 74 | unmatched_detections.append(detection_idx) 75 | else: 76 | matches.append((track_idx, detection_idx)) 77 | return matches, unmatched_tracks, unmatched_detections 78 | 79 | 80 | def matching_cascade( 81 | distance_metric, max_distance, cascade_depth, tracks, detections, 82 | track_indices=None, detection_indices=None): 83 | """Run matching cascade. 84 | 85 | Parameters 86 | ---------- 87 | distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray 88 | The distance metric is given a list of tracks and detections as well as 89 | a list of N track indices and M detection indices. The metric should 90 | return the NxM dimensional cost matrix, where element (i, j) is the 91 | association cost between the i-th track in the given track indices and 92 | the j-th detection in the given detection indices. 93 | max_distance : float 94 | Gating threshold. Associations with cost larger than this value are 95 | disregarded. 96 | cascade_depth: int 97 | The cascade depth, should be se to the maximum track age. 98 | tracks : List[track.Track] 99 | A list of predicted tracks at the current time step. 100 | detections : List[detection.Detection] 101 | A list of detections at the current time step. 102 | track_indices : Optional[List[int]] 103 | List of track indices that maps rows in `cost_matrix` to tracks in 104 | `tracks` (see description above). Defaults to all tracks. 105 | detection_indices : Optional[List[int]] 106 | List of detection indices that maps columns in `cost_matrix` to 107 | detections in `detections` (see description above). Defaults to all 108 | detections. 109 | 110 | Returns 111 | ------- 112 | (List[(int, int)], List[int], List[int]) 113 | Returns a tuple with the following three entries: 114 | * A list of matched track and detection indices. 115 | * A list of unmatched track indices. 116 | * A list of unmatched detection indices. 117 | 118 | """ 119 | if track_indices is None: 120 | track_indices = list(range(len(tracks))) 121 | if detection_indices is None: 122 | detection_indices = list(range(len(detections))) 123 | 124 | unmatched_detections = detection_indices 125 | matches = [] 126 | for level in range(cascade_depth): 127 | if len(unmatched_detections) == 0: # No detections left 128 | break 129 | 130 | track_indices_l = [ 131 | k for k in track_indices 132 | if tracks[k].time_since_update == 1 + level 133 | ] 134 | if len(track_indices_l) == 0: # Nothing to match at this level 135 | continue 136 | 137 | matches_l, _, unmatched_detections = \ 138 | min_cost_matching( 139 | distance_metric, max_distance, tracks, detections, 140 | track_indices_l, unmatched_detections) 141 | matches += matches_l 142 | unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches)) 143 | return matches, unmatched_tracks, unmatched_detections 144 | 145 | 146 | def gate_cost_matrix( 147 | kf, cost_matrix, tracks, detections, track_indices, detection_indices, 148 | gated_cost=INFTY_COST, only_position=False): 149 | """Invalidate infeasible entries in cost matrix based on the state 150 | distributions obtained by Kalman filtering. 151 | 152 | Parameters 153 | ---------- 154 | kf : The Kalman filter. 155 | cost_matrix : ndarray 156 | The NxM dimensional cost matrix, where N is the number of track indices 157 | and M is the number of detection indices, such that entry (i, j) is the 158 | association cost between `tracks[track_indices[i]]` and 159 | `detections[detection_indices[j]]`. 160 | tracks : List[track.Track] 161 | A list of predicted tracks at the current time step. 162 | detections : List[detection.Detection] 163 | A list of detections at the current time step. 164 | track_indices : List[int] 165 | List of track indices that maps rows in `cost_matrix` to tracks in 166 | `tracks` (see description above). 167 | detection_indices : List[int] 168 | List of detection indices that maps columns in `cost_matrix` to 169 | detections in `detections` (see description above). 170 | gated_cost : Optional[float] 171 | Entries in the cost matrix corresponding to infeasible associations are 172 | set this value. Defaults to a very large value. 173 | only_position : Optional[bool] 174 | If True, only the x, y position of the state distribution is considered 175 | during gating. Defaults to False. 176 | 177 | Returns 178 | ------- 179 | ndarray 180 | Returns the modified cost matrix. 181 | 182 | """ 183 | gating_dim = 2 if only_position else 4 184 | gating_threshold = kalman_filter.chi2inv95[gating_dim] 185 | measurements = np.asarray( 186 | [detections[i].to_xyah() for i in detection_indices]) 187 | for row, track_idx in enumerate(track_indices): 188 | track = tracks[track_idx] 189 | gating_distance = kf.gating_distance( 190 | track.mean, track.covariance, measurements, only_position) 191 | cost_matrix[row, gating_distance > gating_threshold] = gated_cost 192 | return cost_matrix 193 | -------------------------------------------------------------------------------- /objectTracker/deep_sort/nn_matching.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import numpy as np 3 | 4 | 5 | def _pdist(a, b): 6 | """Compute pair-wise squared distance between points in `a` and `b`. 7 | 8 | Parameters 9 | ---------- 10 | a : array_like 11 | An NxM matrix of N samples of dimensionality M. 12 | b : array_like 13 | An LxM matrix of L samples of dimensionality M. 14 | 15 | Returns 16 | ------- 17 | ndarray 18 | Returns a matrix of size len(a), len(b) such that eleement (i, j) 19 | contains the squared distance between `a[i]` and `b[j]`. 20 | 21 | """ 22 | a, b = np.asarray(a), np.asarray(b) 23 | if len(a) == 0 or len(b) == 0: 24 | return np.zeros((len(a), len(b))) 25 | a2, b2 = np.square(a).sum(axis=1), np.square(b).sum(axis=1) 26 | r2 = -2. * np.dot(a, b.T) + a2[:, None] + b2[None, :] 27 | r2 = np.clip(r2, 0., float(np.inf)) 28 | return r2 29 | 30 | 31 | def _cosine_distance(a, b, data_is_normalized=False): 32 | """Compute pair-wise cosine distance between points in `a` and `b`. 33 | 34 | Parameters 35 | ---------- 36 | a : array_like 37 | An NxM matrix of N samples of dimensionality M. 38 | b : array_like 39 | An LxM matrix of L samples of dimensionality M. 40 | data_is_normalized : Optional[bool] 41 | If True, assumes rows in a and b are unit length vectors. 42 | Otherwise, a and b are explicitly normalized to lenght 1. 43 | 44 | Returns 45 | ------- 46 | ndarray 47 | Returns a matrix of size len(a), len(b) such that eleement (i, j) 48 | contains the squared distance between `a[i]` and `b[j]`. 49 | 50 | """ 51 | if not data_is_normalized: 52 | a = np.asarray(a) / np.linalg.norm(a, axis=1, keepdims=True) 53 | b = np.asarray(b) / np.linalg.norm(b, axis=1, keepdims=True) 54 | return 1. - np.dot(a, b.T) 55 | 56 | 57 | def _nn_euclidean_distance(x, y): 58 | """ Helper function for nearest neighbor distance metric (Euclidean). 59 | 60 | Parameters 61 | ---------- 62 | x : ndarray 63 | A matrix of N row-vectors (sample points). 64 | y : ndarray 65 | A matrix of M row-vectors (query points). 66 | 67 | Returns 68 | ------- 69 | ndarray 70 | A vector of length M that contains for each entry in `y` the 71 | smallest Euclidean distance to a sample in `x`. 72 | 73 | """ 74 | distances = _pdist(x, y) 75 | return np.maximum(0.0, distances.min(axis=0)) 76 | 77 | 78 | def _nn_cosine_distance(x, y): 79 | """ Helper function for nearest neighbor distance metric (cosine). 80 | 81 | Parameters 82 | ---------- 83 | x : ndarray 84 | A matrix of N row-vectors (sample points). 85 | y : ndarray 86 | A matrix of M row-vectors (query points). 87 | 88 | Returns 89 | ------- 90 | ndarray 91 | A vector of length M that contains for each entry in `y` the 92 | smallest cosine distance to a sample in `x`. 93 | 94 | """ 95 | distances = _cosine_distance(x, y) 96 | return distances.min(axis=0) 97 | 98 | 99 | class NearestNeighborDistanceMetric(object): 100 | """ 101 | A nearest neighbor distance metric that, for each target, returns 102 | the closest distance to any sample that has been observed so far. 103 | 104 | Parameters 105 | ---------- 106 | metric : str 107 | Either "euclidean" or "cosine". 108 | matching_threshold: float 109 | The matching threshold. Samples with larger distance are considered an 110 | invalid match. 111 | budget : Optional[int] 112 | If not None, fix samples per class to at most this number. Removes 113 | the oldest samples when the budget is reached. 114 | 115 | Attributes 116 | ---------- 117 | samples : Dict[int -> List[ndarray]] 118 | A dictionary that maps from target identities to the list of samples 119 | that have been observed so far. 120 | 121 | """ 122 | 123 | def __init__(self, metric, matching_threshold, budget=None): 124 | 125 | 126 | if metric == "euclidean": 127 | self._metric = _nn_euclidean_distance 128 | elif metric == "cosine": 129 | self._metric = _nn_cosine_distance 130 | else: 131 | raise ValueError( 132 | "Invalid metric; must be either 'euclidean' or 'cosine'") 133 | self.matching_threshold = matching_threshold 134 | self.budget = budget 135 | self.samples = {} 136 | 137 | def partial_fit(self, features, targets, active_targets): 138 | """Update the distance metric with new data. 139 | 140 | Parameters 141 | ---------- 142 | features : ndarray 143 | An NxM matrix of N features of dimensionality M. 144 | targets : ndarray 145 | An integer array of associated target identities. 146 | active_targets : List[int] 147 | A list of targets that are currently present in the scene. 148 | 149 | """ 150 | for feature, target in zip(features, targets): 151 | self.samples.setdefault(target, []).append(feature) 152 | if self.budget is not None: 153 | self.samples[target] = self.samples[target][-self.budget:] 154 | self.samples = {k: self.samples[k] for k in active_targets} 155 | 156 | def distance(self, features, targets): 157 | """Compute distance between features and targets. 158 | 159 | Parameters 160 | ---------- 161 | features : ndarray 162 | An NxM matrix of N features of dimensionality M. 163 | targets : List[int] 164 | A list of targets to match the given `features` against. 165 | 166 | Returns 167 | ------- 168 | ndarray 169 | Returns a cost matrix of shape len(targets), len(features), where 170 | element (i, j) contains the closest squared distance between 171 | `targets[i]` and `features[j]`. 172 | 173 | """ 174 | cost_matrix = np.zeros((len(targets), len(features))) 175 | for i, target in enumerate(targets): 176 | cost_matrix[i, :] = self._metric(self.samples[target], features) 177 | return cost_matrix 178 | -------------------------------------------------------------------------------- /objectTracker/deep_sort/preprocessing.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import numpy as np 3 | import cv2 4 | 5 | 6 | def non_max_suppression(boxes, max_bbox_overlap, scores=None): 7 | """Suppress overlapping detections. 8 | 9 | Original code from [1]_ has been adapted to include confidence score. 10 | 11 | .. [1] http://www.pyimagesearch.com/2015/02/16/ 12 | faster-non-maximum-suppression-python/ 13 | 14 | Examples 15 | -------- 16 | 17 | >>> boxes = [d.roi for d in detections] 18 | >>> scores = [d.confidence for d in detections] 19 | >>> indices = non_max_suppression(boxes, max_bbox_overlap, scores) 20 | >>> detections = [detections[i] for i in indices] 21 | 22 | Parameters 23 | ---------- 24 | boxes : ndarray 25 | Array of ROIs (x, y, width, height). 26 | max_bbox_overlap : float 27 | ROIs that overlap more than this values are suppressed. 28 | scores : Optional[array_like] 29 | Detector confidence score. 30 | 31 | Returns 32 | ------- 33 | List[int] 34 | Returns indices of detections that have survived non-maxima suppression. 35 | 36 | """ 37 | if len(boxes) == 0: 38 | return [] 39 | 40 | boxes = boxes.astype(np.float) 41 | pick = [] 42 | 43 | x1 = boxes[:, 0] 44 | y1 = boxes[:, 1] 45 | x2 = boxes[:, 2] + boxes[:, 0] 46 | y2 = boxes[:, 3] + boxes[:, 1] 47 | 48 | area = (x2 - x1 + 1) * (y2 - y1 + 1) 49 | if scores is not None: 50 | idxs = np.argsort(scores) 51 | else: 52 | idxs = np.argsort(y2) 53 | 54 | while len(idxs) > 0: 55 | last = len(idxs) - 1 56 | i = idxs[last] 57 | pick.append(i) 58 | 59 | xx1 = np.maximum(x1[i], x1[idxs[:last]]) 60 | yy1 = np.maximum(y1[i], y1[idxs[:last]]) 61 | xx2 = np.minimum(x2[i], x2[idxs[:last]]) 62 | yy2 = np.minimum(y2[i], y2[idxs[:last]]) 63 | 64 | w = np.maximum(0, xx2 - xx1 + 1) 65 | h = np.maximum(0, yy2 - yy1 + 1) 66 | 67 | overlap = (w * h) / area[idxs[:last]] 68 | 69 | idxs = np.delete( 70 | idxs, np.concatenate( 71 | ([last], np.where(overlap > max_bbox_overlap)[0]))) 72 | 73 | return pick 74 | -------------------------------------------------------------------------------- /objectTracker/deep_sort/track.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | 3 | 4 | class TrackState: 5 | """ 6 | Enumeration type for the single target track state. Newly created tracks are 7 | classified as `tentative` until enough evidence has been collected. Then, 8 | the track state is changed to `confirmed`. Tracks that are no longer alive 9 | are classified as `deleted` to mark them for removal from the set of active 10 | tracks. 11 | 12 | """ 13 | 14 | Tentative = 1 15 | Confirmed = 2 16 | Deleted = 3 17 | 18 | 19 | class Track: 20 | """ 21 | A single target track with state space `(x, y, a, h)` and associated 22 | velocities, where `(x, y)` is the center of the bounding box, `a` is the 23 | aspect ratio and `h` is the height. 24 | 25 | Parameters 26 | ---------- 27 | mean : ndarray 28 | Mean vector of the initial state distribution. 29 | covariance : ndarray 30 | Covariance matrix of the initial state distribution. 31 | track_id : int 32 | A unique track identifier. 33 | n_init : int 34 | Number of consecutive detections before the track is confirmed. The 35 | track state is set to `Deleted` if a miss occurs within the first 36 | `n_init` frames. 37 | max_age : int 38 | The maximum number of consecutive misses before the track state is 39 | set to `Deleted`. 40 | feature : Optional[ndarray] 41 | Feature vector of the detection this track originates from. If not None, 42 | this feature is added to the `features` cache. 43 | 44 | Attributes 45 | ---------- 46 | mean : ndarray 47 | Mean vector of the initial state distribution. 48 | covariance : ndarray 49 | Covariance matrix of the initial state distribution. 50 | track_id : int 51 | A unique track identifier. 52 | hits : int 53 | Total number of measurement updates. 54 | age : int 55 | Total number of frames since first occurance. 56 | time_since_update : int 57 | Total number of frames since last measurement update. 58 | state : TrackState 59 | The current track state. 60 | features : List[ndarray] 61 | A cache of features. On each measurement update, the associated feature 62 | vector is added to this list. 63 | 64 | """ 65 | 66 | def __init__(self, mean, covariance, track_id, n_init, max_age, 67 | feature=None, class_name=None): 68 | self.mean = mean 69 | self.covariance = covariance 70 | self.track_id = track_id 71 | self.hits = 1 72 | self.age = 1 73 | self.time_since_update = 0 74 | 75 | self.state = TrackState.Tentative 76 | self.features = [] 77 | if feature is not None: 78 | self.features.append(feature) 79 | 80 | self._n_init = n_init 81 | self._max_age = max_age 82 | self.class_name = class_name 83 | 84 | def to_tlwh(self): 85 | """Get current position in bounding box format `(top left x, top left y, 86 | width, height)`. 87 | 88 | Returns 89 | ------- 90 | ndarray 91 | The bounding box. 92 | 93 | """ 94 | ret = self.mean[:4].copy() 95 | ret[2] *= ret[3] 96 | ret[:2] -= ret[2:] / 2 97 | return ret 98 | 99 | def to_tlbr(self): 100 | """Get current position in bounding box format `(min x, miny, max x, 101 | max y)`. 102 | 103 | Returns 104 | ------- 105 | ndarray 106 | The bounding box. 107 | 108 | """ 109 | ret = self.to_tlwh() 110 | ret[2:] = ret[:2] + ret[2:] 111 | return ret 112 | 113 | def get_class(self): 114 | return self.class_name 115 | 116 | def predict(self, kf): 117 | """Propagate the state distribution to the current time step using a 118 | Kalman filter prediction step. 119 | 120 | Parameters 121 | ---------- 122 | kf : kalman_filter.KalmanFilter 123 | The Kalman filter. 124 | 125 | """ 126 | self.mean, self.covariance = kf.predict(self.mean, self.covariance) 127 | self.age += 1 128 | self.time_since_update += 1 129 | 130 | def update(self, kf, detection): 131 | """Perform Kalman filter measurement update step and update the feature 132 | cache. 133 | 134 | Parameters 135 | ---------- 136 | kf : kalman_filter.KalmanFilter 137 | The Kalman filter. 138 | detection : Detection 139 | The associated detection. 140 | 141 | """ 142 | self.mean, self.covariance = kf.update( 143 | self.mean, self.covariance, detection.to_xyah()) 144 | self.features.append(detection.feature) 145 | 146 | self.hits += 1 147 | self.time_since_update = 0 148 | if self.state == TrackState.Tentative and self.hits >= self._n_init: 149 | self.state = TrackState.Confirmed 150 | 151 | def mark_missed(self): 152 | """Mark this track as missed (no association at the current time step). 153 | """ 154 | if self.state == TrackState.Tentative: 155 | self.state = TrackState.Deleted 156 | elif self.time_since_update > self._max_age: 157 | self.state = TrackState.Deleted 158 | 159 | def is_tentative(self): 160 | """Returns True if this track is tentative (unconfirmed). 161 | """ 162 | return self.state == TrackState.Tentative 163 | 164 | def is_confirmed(self): 165 | """Returns True if this track is confirmed.""" 166 | return self.state == TrackState.Confirmed 167 | 168 | def is_deleted(self): 169 | """Returns True if this track is dead and should be deleted.""" 170 | return self.state == TrackState.Deleted 171 | -------------------------------------------------------------------------------- /objectTracker/deep_sort/tracker.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | from __future__ import absolute_import 3 | import numpy as np 4 | from . import kalman_filter 5 | from . import linear_assignment 6 | from . import iou_matching 7 | from .track import Track 8 | 9 | 10 | class Tracker: 11 | """ 12 | This is the multi-target tracker. 13 | 14 | Parameters 15 | ---------- 16 | metric : nn_matching.NearestNeighborDistanceMetric 17 | A distance metric for measurement-to-track association. 18 | max_age : int 19 | Maximum number of missed misses before a track is deleted. 20 | n_init : int 21 | Number of consecutive detections before the track is confirmed. The 22 | track state is set to `Deleted` if a miss occurs within the first 23 | `n_init` frames. 24 | 25 | Attributes 26 | ---------- 27 | metric : nn_matching.NearestNeighborDistanceMetric 28 | The distance metric used for measurement to track association. 29 | max_age : int 30 | Maximum number of missed misses before a track is deleted. 31 | n_init : int 32 | Number of frames that a track remains in initialization phase. 33 | kf : kalman_filter.KalmanFilter 34 | A Kalman filter to filter target trajectories in image space. 35 | tracks : List[Track] 36 | The list of active tracks at the current time step. 37 | 38 | """ 39 | 40 | def __init__(self, metric, max_iou_distance=0.7, max_age=30, n_init=3): 41 | self.metric = metric 42 | self.max_iou_distance = max_iou_distance 43 | self.max_age = max_age 44 | self.n_init = n_init 45 | 46 | self.kf = kalman_filter.KalmanFilter() 47 | self.tracks = [] 48 | self._next_id = 1 49 | 50 | def predict(self): 51 | """Propagate track state distributions one time step forward. 52 | 53 | This function should be called once every time step, before `update`. 54 | """ 55 | for track in self.tracks: 56 | track.predict(self.kf) 57 | 58 | def update(self, detections): 59 | """Perform measurement update and track management. 60 | 61 | Parameters 62 | ---------- 63 | detections : List[deep_sort.detection.Detection] 64 | A list of detections at the current time step. 65 | 66 | """ 67 | # Run matching cascade. 68 | matches, unmatched_tracks, unmatched_detections = \ 69 | self._match(detections) 70 | 71 | # Update track set. 72 | for track_idx, detection_idx in matches: 73 | self.tracks[track_idx].update( 74 | self.kf, detections[detection_idx]) 75 | for track_idx in unmatched_tracks: 76 | self.tracks[track_idx].mark_missed() 77 | for detection_idx in unmatched_detections: 78 | self._initiate_track(detections[detection_idx]) 79 | self.tracks = [t for t in self.tracks if not t.is_deleted()] 80 | 81 | # Update distance metric. 82 | active_targets = [t.track_id for t in self.tracks if t.is_confirmed()] 83 | features, targets = [], [] 84 | for track in self.tracks: 85 | if not track.is_confirmed(): 86 | continue 87 | features += track.features 88 | targets += [track.track_id for _ in track.features] 89 | track.features = [] 90 | self.metric.partial_fit( 91 | np.asarray(features), np.asarray(targets), active_targets) 92 | 93 | def _match(self, detections): 94 | 95 | def gated_metric(tracks, dets, track_indices, detection_indices): 96 | features = np.array([dets[i].feature for i in detection_indices]) 97 | targets = np.array([tracks[i].track_id for i in track_indices]) 98 | cost_matrix = self.metric.distance(features, targets) 99 | cost_matrix = linear_assignment.gate_cost_matrix( 100 | self.kf, cost_matrix, tracks, dets, track_indices, 101 | detection_indices) 102 | 103 | return cost_matrix 104 | 105 | # Split track set into confirmed and unconfirmed tracks. 106 | confirmed_tracks = [ 107 | i for i, t in enumerate(self.tracks) if t.is_confirmed()] 108 | unconfirmed_tracks = [ 109 | i for i, t in enumerate(self.tracks) if not t.is_confirmed()] 110 | 111 | # Associate confirmed tracks using appearance features. 112 | matches_a, unmatched_tracks_a, unmatched_detections = \ 113 | linear_assignment.matching_cascade( 114 | gated_metric, self.metric.matching_threshold, self.max_age, 115 | self.tracks, detections, confirmed_tracks) 116 | 117 | # Associate remaining tracks together with unconfirmed tracks using IOU. 118 | iou_track_candidates = unconfirmed_tracks + [ 119 | k for k in unmatched_tracks_a if 120 | self.tracks[k].time_since_update == 1] 121 | unmatched_tracks_a = [ 122 | k for k in unmatched_tracks_a if 123 | self.tracks[k].time_since_update != 1] 124 | matches_b, unmatched_tracks_b, unmatched_detections = \ 125 | linear_assignment.min_cost_matching( 126 | iou_matching.iou_cost, self.max_iou_distance, self.tracks, 127 | detections, iou_track_candidates, unmatched_detections) 128 | 129 | matches = matches_a + matches_b 130 | unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b)) 131 | return matches, unmatched_tracks, unmatched_detections 132 | 133 | def _initiate_track(self, detection): 134 | mean, covariance = self.kf.initiate(detection.to_xyah()) 135 | class_name = detection.get_class() 136 | self.tracks.append(Track( 137 | mean, covariance, self._next_id, self.n_init, self.max_age, 138 | detection.feature, class_name)) 139 | self._next_id += 1 140 | -------------------------------------------------------------------------------- /objectTracker/tracking.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import cv2 4 | import numpy as np 5 | from PIL import Image 6 | from PIL import ImageColor 7 | import matplotlib.pyplot as plt 8 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # Suppress TensorFlow logging (1) 9 | import pathlib 10 | import tensorflow as tf 11 | 12 | tf.get_logger().setLevel('ERROR') # Suppress TensorFlow logging (2) 13 | 14 | # Enable GPU dynamic memory allocation 15 | gpus = tf.config.experimental.list_physical_devices('GPU') 16 | for gpu in gpus: 17 | tf.config.experimental.set_memory_growth(gpu, True) 18 | 19 | import time 20 | from object_detection.utils import label_map_util 21 | from object_detection.utils import visualization_utils as viz_utils 22 | from .deep_sort import nn_matching 23 | from .deep_sort import preprocessing 24 | from .deep_sort.detection import Detection 25 | from .deep_sort.tracker import Tracker 26 | from .deep_sort import generate_detections as gdet 27 | 28 | def hex_to_rgb(value): 29 | value = value.lstrip('#') 30 | lv = len(value) 31 | return tuple(int(value[i:i + lv // 3], 16) for i in range(0, lv, lv // 3)) 32 | 33 | def draw_boxes(image, boxes, category_index): 34 | colors = list(ImageColor.colormap.values()) 35 | for xmin, ymin, xmax, ymax, tracking_id, class_name in boxes: 36 | xmin, ymin, xmax, ymax = int(xmin), int(ymin), int(xmax), int(ymax) 37 | label_txt = class_name + ' ' + str(tracking_id) 38 | color = hex_to_rgb(colors[hash(class_name) % len(colors)]) 39 | thickness = 2 40 | fontScale = 0.5 41 | fontFace = cv2.FONT_HERSHEY_COMPLEX 42 | labelSize = cv2.getTextSize(label_txt, fontFace, fontScale, thickness) 43 | cv2.rectangle(image, (xmin,ymin), (xmax,ymax), color, thickness) 44 | cv2.rectangle(image, (xmin,ymin-labelSize[0][1]), (xmin+labelSize[0][0],ymin), color, -1) 45 | cv2.putText(image, label_txt, (xmin,ymin), fontFace, fontScale, (0,0,0), thickness) 46 | 47 | def TrackVideo(PATH_TO_LABELS, PATH_TO_SAVED_MODEL, PATH_TO_TEST_VIDEO, 48 | PATH_TO_OUTPUT_VIDEO, MIN_SCORE_THRESH, DEEP_SORT_MODEL): 49 | 50 | PATH_TO_SAVED_MODEL = os.path.join(PATH_TO_SAVED_MODEL, "saved_model") 51 | category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, 52 | use_display_name=True) 53 | 54 | # Load saved model and build the detection function 55 | print('Loading model...', end='') 56 | start_time = time.time() 57 | detect_fn = tf.saved_model.load(PATH_TO_SAVED_MODEL) 58 | end_time = time.time() 59 | elapsed_time = end_time - start_time 60 | print('Done! Took {} seconds'.format(elapsed_time)) 61 | 62 | # Definition of the parameters 63 | max_cosine_distance = 0.7 64 | nms_max_overlap = 0.5 65 | nn_budget = None 66 | id_table = {} 67 | new_index = 1 68 | 69 | # Video setting 70 | cap = cv2.VideoCapture(PATH_TO_TEST_VIDEO) 71 | width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) 72 | height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) 73 | fps = int(cap.get(cv2.CAP_PROP_FPS)) 74 | codec = cv2.VideoWriter_fourcc(*'XVID') 75 | out = cv2.VideoWriter(PATH_TO_OUTPUT_VIDEO, codec, fps, (width, height)) # output_path must be .mp4 76 | 77 | 78 | #initialize deep sort object 79 | model_filename = DEEP_SORT_MODEL 80 | encoder = gdet.create_box_encoder(model_filename, batch_size=1) 81 | metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) 82 | tracker = Tracker(metric) 83 | 84 | while True: 85 | ret, frame = cap.read() 86 | if ret == False: 87 | break 88 | 89 | image_np = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) 90 | 91 | # The input needs to be a tensor, convert it using `tf.convert_to_tensor`. 92 | input_tensor = tf.convert_to_tensor(image_np) 93 | 94 | # The model expects a batch of images, so add an axis with `tf.newaxis`. 95 | input_tensor = input_tensor[tf.newaxis, ...] 96 | detections = detect_fn(input_tensor) 97 | 98 | # All outputs are batches tensors. 99 | # Convert to numpy arrays, and take index [0] to remove the batch dimension. 100 | # We're only interested in the first num_detections. 101 | num_detections = int(detections.pop('num_detections')) 102 | detections = {key: value[0, :num_detections].numpy() 103 | for key, value in detections.items()} 104 | detections['num_detections'] = num_detections 105 | 106 | # detection_classes should be ints. 107 | detections['detection_classes'] = detections['detection_classes'].astype(np.int64) 108 | image_np_with_detections = image_np.copy() 109 | 110 | # filter score > MIN_SCORE_THRESH 111 | indexes = [ i for i in range(len(detections["detection_scores"])) 112 | if detections["detection_scores"][i] > MIN_SCORE_THRESH] 113 | 114 | detections["detection_boxes"] = detections["detection_boxes"][indexes, ...] 115 | detections["detection_scores"] = detections["detection_scores"][indexes, ...] 116 | detections["detection_classes"] = detections["detection_classes"][indexes, ...] 117 | 118 | height, width = image_np_with_detections.shape[:2] 119 | boxes, scores, names = [], [], [] 120 | Track_only = [category_index[ID]['name'] for ID in category_index] 121 | 122 | for i in range(len(indexes)): 123 | ymin, xmin, ymax, xmax = detections["detection_boxes"][i] 124 | score, classes = detections["detection_scores"][i], detections["detection_classes"][i] 125 | xmin = int(xmin * width) 126 | xmax = int(xmax * width) 127 | ymin = int(ymin * height) 128 | ymax = int(ymax * height) 129 | if len(Track_only) !=0 and category_index[classes]['name'] in Track_only or len(Track_only) == 0: 130 | boxes.append([xmin, ymin, xmax-xmin, ymax-ymin]) 131 | scores.append(score) 132 | names.append(category_index[classes]['name']) 133 | 134 | # Obtain all the detections for the given frame. 135 | boxes = np.array(boxes) 136 | names = np.array(names) 137 | scores = np.array(scores) 138 | features = np.array(encoder(image_np.copy(), boxes)) 139 | track_detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip(boxes, scores, names, features)] 140 | 141 | 142 | # Run non-maxima suppression. 143 | boxes = np.array([d.tlwh for d in track_detections]) 144 | scores = np.array([d.confidence for d in track_detections]) 145 | indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) 146 | track_detections = [track_detections[i] for i in indices] 147 | 148 | # Pass detections to the deepsort object and obtain the track information. 149 | tracker.predict() 150 | tracker.update(track_detections) 151 | 152 | # Obtain info from the tracks 153 | tracked_bboxes = [] 154 | for track in tracker.tracks: 155 | if not track.is_confirmed() or track.time_since_update > 5: 156 | continue 157 | 158 | bbox = track.to_tlbr() # Get the corrected/predicted bounding box 159 | class_name = track.get_class() #Get the class name of particular object 160 | tracking_id = track.track_id # Get the ID for the particular track 161 | 162 | if tracking_id not in id_table: 163 | id_table[tracking_id] = new_index 164 | new_index += 1 165 | 166 | # index = key_list[val_list.index(class_name)] # Get predicted object index by object name 167 | # Structure data, that we could use it with our draw_bbox function 168 | tracked_bboxes.append(bbox.tolist() + [id_table[tracking_id], class_name]) 169 | 170 | draw_boxes( 171 | image_np_with_detections, 172 | tracked_bboxes, 173 | category_index 174 | ) 175 | 176 | out.write(image_np_with_detections[:,:,::-1]) 177 | print('Done') 178 | 179 | 180 | 181 | if __name__ == '__main__': 182 | parser = argparse.ArgumentParser(description='Download and process tf files') 183 | parser.add_argument('--saved_model_path', required=True, 184 | help='path to saved model') 185 | parser.add_argument('--test_path', required=True, 186 | help='path to test video') 187 | parser.add_argument('--label_map_path', required=True, help='path to label map') 188 | parser.add_argument('--deep_sort_model', required=True, help='path to deep sort model') 189 | parser.add_argument('--output_path', required=True, 190 | help='path to output predicted video') 191 | parser.add_argument('--min_score_thresh', required=False, default=0.0, 192 | help='min score threshold') 193 | 194 | args = parser.parse_args() 195 | 196 | # Path definition 197 | PATH_TO_SAVED_MODEL = args.saved_model_path 198 | PATH_TO_TEST_VIDEO = args.test_path 199 | PATH_TO_OUTPUT_VIDEO = args.output_path 200 | PATH_TO_LABELS = args.label_map_path 201 | DEEP_SORT_MODEL = args.deep_sort_model 202 | MIN_SCORE_THRESH = float(args.min_score_thresh) 203 | 204 | TrackVideo(PATH_TO_LABELS, PATH_TO_SAVED_MODEL, PATH_TO_TEST_VIDEO, PATH_TO_OUTPUT_VIDEO, MIN_SCORE_THRESH, DEEP_SORT_MODEL) 205 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | # Note: To use the 'upload' functionality of this file, you must: 5 | # $ pipenv install twine --dev 6 | 7 | import io 8 | import os 9 | import sys 10 | from shutil import rmtree 11 | 12 | from setuptools import find_packages, setup, Command 13 | 14 | # Package meta-data. 15 | NAME = 'objectTracker' 16 | DESCRIPTION = 'An object tracking API' 17 | URL = 'https://github.com/sek788432/Custom-Object-Tracking' 18 | EMAIL = 'jefflo.861104@gmail.com, gyes00205@gmail.com' 19 | AUTHOR = 'Jeff Lo, Chia-Lun Hsu' 20 | REQUIRES_PYTHON = '>=3.6.0' 21 | VERSION = '0.1.0' 22 | 23 | # What packages are required for this module to be executed? 24 | REQUIRED = [ 25 | # 'requests', 'maya', 'records', 26 | ] 27 | 28 | # What packages are optional? 29 | EXTRAS = { 30 | # 'fancy feature': ['django'], 31 | } 32 | 33 | # The rest you shouldn't have to touch too much :) 34 | # ------------------------------------------------ 35 | # Except, perhaps the License and Trove Classifiers! 36 | # If you do change the License, remember to change the Trove Classifier for that! 37 | 38 | here = os.path.abspath(os.path.dirname(__file__)) 39 | 40 | # Import the README and use it as the long-description. 41 | # Note: this will only work if 'README.md' is present in your MANIFEST.in file! 42 | try: 43 | with io.open(os.path.join(here, 'README.md'), encoding='utf-8') as f: 44 | long_description = '\n' + f.read() 45 | except FileNotFoundError: 46 | long_description = DESCRIPTION 47 | 48 | # Load the package's __version__.py module as a dictionary. 49 | about = {} 50 | if not VERSION: 51 | project_slug = NAME.lower().replace("-", "_").replace(" ", "_") 52 | with open(os.path.join(here, project_slug, '__version__.py')) as f: 53 | exec(f.read(), about) 54 | else: 55 | about['__version__'] = VERSION 56 | 57 | 58 | class UploadCommand(Command): 59 | """Support setup.py upload.""" 60 | 61 | description = 'Build and publish the package.' 62 | user_options = [] 63 | 64 | @staticmethod 65 | def status(s): 66 | """Prints things in bold.""" 67 | print('\033[1m{0}\033[0m'.format(s)) 68 | 69 | def initialize_options(self): 70 | pass 71 | 72 | def finalize_options(self): 73 | pass 74 | 75 | def run(self): 76 | try: 77 | self.status('Removing previous builds…') 78 | rmtree(os.path.join(here, 'dist')) 79 | except OSError: 80 | pass 81 | 82 | self.status('Building Source and Wheel (universal) distribution…') 83 | os.system('{0} setup.py sdist bdist_wheel --universal'.format(sys.executable)) 84 | 85 | self.status('Uploading the package to PyPI via Twine…') 86 | os.system('twine upload dist/*') 87 | 88 | self.status('Pushing git tags…') 89 | os.system('git tag v{0}'.format(about['__version__'])) 90 | os.system('git push --tags') 91 | 92 | sys.exit() 93 | 94 | 95 | # Where the magic happens: 96 | setup( 97 | name=NAME, 98 | version=about['__version__'], 99 | description=DESCRIPTION, 100 | long_description=long_description, 101 | long_description_content_type='text/markdown', 102 | author=AUTHOR, 103 | author_email=EMAIL, 104 | python_requires=REQUIRES_PYTHON, 105 | url=URL, 106 | packages=find_packages(exclude=["tests", "*.tests", "*.tests.*", "tests.*"]), 107 | # If your package is a single module, use this instead of 'packages': 108 | # py_modules=['mypackage'], 109 | 110 | # entry_points={ 111 | # 'console_scripts': ['mycli=mymodule:cli'], 112 | # }, 113 | install_requires=REQUIRED, 114 | extras_require=EXTRAS, 115 | include_package_data=True, 116 | license='MIT', 117 | classifiers=[ 118 | # Trove classifiers 119 | # Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers 120 | 'License :: OSI Approved :: MIT License', 121 | 'Programming Language :: Python', 122 | 'Programming Language :: Python :: 3', 123 | 'Programming Language :: Python :: 3.6', 124 | 'Programming Language :: Python :: Implementation :: CPython', 125 | 'Programming Language :: Python :: Implementation :: PyPy' 126 | ], 127 | # $ setup.py publish support. 128 | cmdclass={ 129 | 'upload': UploadCommand, 130 | }, 131 | ) 132 | -------------------------------------------------------------------------------- /test_video_tracking.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sek788432/Custom_Object_Tracking/976f794d1fe3217a074e967bd1e1e9d8bdc8339a/test_video_tracking.gif --------------------------------------------------------------------------------