├── .gitignore ├── LICENSE ├── README.md ├── embed_all.py ├── fakenews.py ├── heatmap_2d_tracker.py ├── lib ├── __init__.py ├── dfext.py └── models │ ├── __init__.py │ ├── lunet2.py │ ├── lunet2b.py │ ├── lunet2c.py │ └── lunext.py ├── neural.py ├── requirements.txt ├── semifake.py ├── simple_2d_tracker_duke.py ├── simple_track_duke.py └── track.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Visual Computing Institute 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Towards a Principled Integration of Multi-Camera Re-Identification and Tracking through Optimal Bayes Filters 2 | 3 | This is the code for reproducing the experiments from our paper [Towards a Principled Integration of Multi-Camera Re-Identification and Tracking through Optimal Bayes Filters](https://arxiv.org/abs/1705.04608). 4 | If you end up using any of this in your publication or otherwise find it useful, please cite our work as: 5 | 6 | ``` 7 | @article{BeyerBreuers2017Arxiv, 8 | author = {Lucas Beyer and 9 | Stefan Breuers and 10 | Vitaly Kurin and 11 | Bastian Leibe}, 12 | title = {Towards a Principled Integration of Multi-Camera Re-Identification 13 | and Tracking through Optimal Bayes Filters}, 14 | journal = {arXiv preprint arXiv:1705.04608}, 15 | year = {2017}, 16 | } 17 | ``` 18 | 19 | Please note that this is very much research code, and the paper is a very exploratory one. 20 | It's made public for reference so that others can see what exactly we did, as the paper in no way can explain everything in enough detail. 21 | **It is not production-quality code**, rather it is nice code that got ever more messy as the deadline approached. 22 | 23 | Due to the nature of the code, many things might still be confusing and non-obvious to others, so feel free to ask us, either by opening an issue here on github (preferably), or shooting us an e-mail! 24 | 25 | ## The neural networks 26 | 27 | The training code of the neural networks is not public yet as it's pending publication of the dependency at https://github.com/VisualComputingInstitute/triplet-reid. 28 | 29 | However, the code creating the models and loading the trained weights is included. 30 | It is based on a custom deep-learning library on top of Theano called [DeepFried2](https://github.com/lucasb-eyer/DeepFried2) and a small toolbox called [lbtoolbox](https://github.com/lucasb-eyer/lbtoolbox) that you'll need to install. 31 | This can be easily done using `pip install -e git+GITHUB_URL`, see the corresponding READMEs. 32 | 33 | The model we used for final experiments is `lunet2c` and [the weights we used can be downloaded here](https://omnomnom.vision.rwth-aachen.de/data/lunet2c-noscale-nobg-2to32-aug.pkl). 34 | 35 | ## The dataset and evaluation 36 | 37 | This experimental work has been evaluated on the [dukeMTMC dataset](http://vision.cs.duke.edu/DukeMTMC/). 38 | Please refer to this project page for the used images, annotations, evaluation script, etc. 39 | 40 | ## The run parameters 41 | 42 | The below settings correspond to Table 1 of the paper. Details on the parameters can be found in Section 4. 43 | 44 | ``` 45 | NN-KF 46 | DIST_THRESH = 200, det_init_thresh = 0.3, det_continue_thresh = 0.0 init_thresh = 3, delete_thresh = 5 47 | 48 | +GT init 49 | --gt_init 50 | DIST_THRESH = 200, DET_INIT_THRESH = 0.3, DET_CONTINUE_THRESH = -0.3, init_thresh=1, delete_thresh=90 51 | 52 | +ReID 53 | --gt_init --use_appearance 54 | DIST_THRESH = 200, APP_THRESH = 6, DET_INIT_THRESH = 0.3, DET_CONTINUE_THRESH = -0.3, init_thresh=1, delete_thresh=90 55 | 56 | only ReID 57 | --gt_init --use_appearance 58 | DIST_THRESH = 6, DET_INIT_THRESH = 0.3, DET_CONTINUE_THRESH = -0.3, init_thresh=1, delete_thresh=90 59 | 60 | Full 61 | --dist_thresh 6 --unmiss_thresh 2 62 | 63 | +entropy 64 | --dist_thresh 5.5 --ent_thresh 0.25 --maxlife 8000 --unmiss_thresh 5 65 | killed of age: 4 66 | ``` 67 | 68 | Final raw bounding box results [can be found here](https://omnomnom.vision.rwth-aachen.de/data/bbmtrack-results/). 69 | 70 | -------------------------------------------------------------------------------- /embed_all.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import argparse 3 | from importlib import import_module 4 | from os.path import splitext, join as pjoin 5 | 6 | import cv2 7 | import numpy as np 8 | import h5py 9 | 10 | import lib 11 | from lib.models import add_defaults 12 | 13 | 14 | if __name__ == '__main__': 15 | parser = argparse.ArgumentParser(description='Embed many images.') 16 | parser.add_argument('--basedir', default='.', 17 | help='Path to the folder containing all images.') 18 | parser.add_argument('--outfile', default='embeddings.h5', 19 | help='Name of the output hdf5 file in which to store the embeddings.') 20 | parser.add_argument('--model', default='lunet2', 21 | help='Name of the model to load. Corresponds to module names in lib/models. Or `fake`') 22 | parser.add_argument('--weights', default='/work/breuers/dukeMTMC/models/lunet2-final.pkl', 23 | help='Name of the weights to load for the model (path to .pkl file).') 24 | parser.add_argument('--scale', default=1.0, type=float, 25 | help='Scale factor to scale images before embedding them.') 26 | parser.add_argument('--t0', type=int) 27 | parser.add_argument('--t1', type=int) 28 | args = parser.parse_args() 29 | print(args) 30 | 31 | 32 | mod = import_module('lib.models.' + args.model) 33 | net = add_defaults(mod.mknet()) 34 | 35 | try: 36 | net.load(args.weights) 37 | except ValueError: 38 | print("!!!!!!!THE WEIGHTS YOU LOADED DON'T BELONG TO THE MODEL YOU'RE USING!!!!!!") 39 | raise 40 | 41 | # Shares the weights, just replaces the avg-pooling layer. 42 | net_hires = mod.hires_shared_twin(net) 43 | net_hires.evaluate() 44 | 45 | if args.t0 is None or args.t1 is None: 46 | all_files = sane_listdir(args.basedir, sortkey=lambda f: int(splitext(f)[0])) 47 | else: 48 | all_files = ['{}.jpg'.format(i) for i in range(args.t0, args.t1+1)] 49 | 50 | print("Precompiling network...", end='', flush=True) 51 | img = lib.imread(pjoin(args.basedir, all_files[0])) 52 | img = lib.img2df(img, lib.scale_shape(img.shape, args.scale)) 53 | out = net_hires.forward(img[None]) 54 | print(" Done", flush=True) 55 | 56 | with h5py.File(args.outfile, 'w') as f_out: 57 | ds = f_out.create_dataset('embs', shape=(len(all_files),) + out.shape[1:], dtype=out.dtype) 58 | for i, fname in enumerate(all_files): 59 | print("\r{} ({}/{})".format(fname, i, len(all_files)), end='', flush=True) 60 | 61 | img = lib.imread(pjoin(args.basedir, fname)) 62 | img = lib.img2df(img, lib.scale_shape(img.shape, args.scale)) 63 | ds[i] = net_hires.forward(img[None]) 64 | 65 | if i % 100 == 0: 66 | f_out.flush() 67 | 68 | print(" Done") 69 | -------------------------------------------------------------------------------- /fakenews.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import lib 3 | 4 | 5 | class FakeNeuralNewsNetwork: 6 | def __init__(self, dets, shape=(33, 60)): 7 | self.already_tracked_ids = [[], [], [], [], [], [], [], []] 8 | self.dets = dets 9 | self.shape = shape 10 | 11 | 12 | def tick(self, curr_frame): 13 | self.curr_dets = lib.slice_all(self.dets, self.dets['GFIDs'] == curr_frame) 14 | 15 | 16 | def fake_camera(self, icam): 17 | self.curr_cam_dets = lib.slice_all(self.curr_dets, self.curr_dets['Cams'] == icam) 18 | self.fake_curr_cam = icam 19 | 20 | 21 | def embed_crop(self, crop, fake_id): 22 | return fake_id 23 | 24 | 25 | def embed_image(self, image): 26 | return None # z.B. (30,60,128) 27 | 28 | 29 | def search_person(self, img_embs, person_emb, fake_track_id): 30 | id_det_boxes = self.curr_cam_dets['boxes'][self.curr_cam_dets['TIDs'] == fake_track_id] 31 | return self._heatmap_sampling_for_dets(id_det_boxes) 32 | 33 | 34 | def personness(self, image, known_embs, return_pose=False): 35 | already_tracked_ids = self.already_tracked_ids[self.fake_curr_cam - 1] 36 | new_det_indices = np.where(np.logical_not(np.in1d(self.curr_cam_dets['TIDs'], already_tracked_ids)))[0] 37 | new_heatmaps_and_ids = [] 38 | for each_det_idx in new_det_indices: 39 | det = self.curr_cam_dets['boxes'][each_det_idx] 40 | #new_heatmap = self._heatmap_sampling_for_dets([det]) 41 | new_heatmap = self._one_fake_gaussian(lib.rebox_centered(det, det[3]*2, det[2]*2, bounds=None)) 42 | if return_pose: 43 | new_heatmap = (new_heatmap, lib.box_center_xy(lib.box_rel2abs(det, h=self.shape[0], w=self.shape[1]))) 44 | new_id = self.curr_cam_dets['TIDs'][each_det_idx] 45 | already_tracked_ids.append(new_id) 46 | new_heatmaps_and_ids.append((new_heatmap, new_id)) 47 | return new_heatmaps_and_ids 48 | 49 | 50 | def _one_fake_gaussian(self, box_rel): 51 | # NOTE: There certainly is a better way, but I'm tired. 52 | heatmap = np.zeros(self.shape) 53 | box_abs = lib.box_rel2abs(box_rel, h=self.shape[0], w=self.shape[1]) 54 | x, y = lib.box_center_xy(box_abs) 55 | heatmap[int(round(y)),int(round(x))] = 1 56 | _, __, w, h = box_abs 57 | return lib.convolve_edge_zeropad(heatmap, lib.gauss2d_xy([[(w/4)**2, 0], [0, (h/4)**2]], nstd=2)) 58 | 59 | 60 | def _heatmap_sampling_for_dets(self, dets_boxes): 61 | heatmap = np.random.rand(*self.shape) 62 | for l, t, w, h in dets_boxes: 63 | # score is how many times more samples than pixels in the detection box. 64 | score = np.random.randint(1,5) 65 | add_idx = np.random.multivariate_normal([l+w/2, t+h/2], [[(w/6)**2, 0], [0, (h/6)**2]], int(np.prod(heatmap.shape)*h*w*score)) 66 | np.add.at(heatmap, [[int(np.clip(y, 0, 0.999)*self.shape[0]) for x,y in add_idx], 67 | [int(np.clip(x, 0, 0.999)*self.shape[1]) for x,y in add_idx]], 1) 68 | return lib.softmax(heatmap) 69 | 70 | 71 | def fix_shape(self, net_output, orig_shape, out_shape, fill_value=0): 72 | if net_output.shape == out_shape: 73 | return np.array(net_output) 74 | else: 75 | return lib.resize_map(net_output, out_shape, interp='bicubic') 76 | -------------------------------------------------------------------------------- /heatmap_2d_tracker.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*-. 3 | 4 | from __future__ import print_function 5 | from __future__ import division 6 | 7 | import argparse 8 | from os.path import join as pjoin 9 | from os import makedirs 10 | import time, datetime 11 | 12 | # the usual suspects 13 | import h5py 14 | import numpy as np 15 | import matplotlib as mpl 16 | mpl.use('Agg') 17 | #mpl.use('GTK') 18 | import matplotlib.pyplot as plt 19 | import matplotlib.patches as patches 20 | from mpl_toolkits.axes_grid1 import ImageGrid 21 | 22 | #tracker stuff 23 | import lib 24 | from track import Track 25 | from fakenews import FakeNeuralNewsNetwork 26 | from semifake import SemiFakeNews 27 | #from neural import RealNews 28 | 29 | 30 | SEQ_FPS = 60.0 31 | SEQ_DT = 1./SEQ_FPS 32 | SEQ_SHAPE = (1080, 1920) 33 | STATE_SHAPE = (135, 240) # Heatmaps: (26, 58) -> (33, 60) 34 | STATE_PADDING = ((5,5), (10,10)) # state shape is this much larger on the sides, see np.pad. 35 | 36 | 37 | g_frames = 0 # Global counter for correct FPS in all cases 38 | 39 | 40 | try: 41 | profile 42 | except NameError: 43 | def profile(f): 44 | return f 45 | 46 | 47 | def n_active_tracks(tracklist): 48 | return '{:2d} +{:2d} +{:2d} ={:2d}'.format( 49 | sum(t.status == 'matched' for t in tracklist), 50 | sum(t.status == 'missed' for t in tracklist), 51 | sum(t.status == 'init' for t in tracklist), 52 | len(tracklist), 53 | ) 54 | # from collections import Counter 55 | #return str(Counter(t.status for t in tracklist).most_common()) 56 | 57 | 58 | def shall_vis(args, curr_frame): 59 | return args.vis and (curr_frame - args.t0) % args.vis == 0 60 | 61 | 62 | @lib.lru_cache(maxsize=16) # In theory 1 is enough here, but whatever =) 63 | def get_image(basedir, icam, frame): 64 | #framedir = 'frames-0.5' if SCALE_FACTOR == 0.5 else 'frames' 65 | # TODO: Use basedir again, from args. 66 | return plt.imread(pjoin('/work3/beyer/', 'frames-0.5', 'camera{}/{}.jpg'.format(icam, lib.glob2loc(frame, icam)))) 67 | 68 | 69 | @profile 70 | def main(net, args): 71 | eval_path = pjoin(args.outdir, 'results/run_{:%Y-%m-%d_%H:%M:%S}.txt'.format(datetime.datetime.now())) 72 | 73 | debug_dir = None 74 | if args.debug: 75 | debug_dir = pjoin(args.outdir, 'debug/run_{:%Y-%m-%d_%H:%M:%S}'.format(datetime.datetime.now())) 76 | makedirs(pjoin(debug_dir, 'crops'), exist_ok=True) 77 | 78 | track_lists = [[] for _ in args.cams] 79 | track_id = 1 80 | 81 | # Open embedding cache 82 | if args.embcache is not None: 83 | embs_caches = [h5py.File(args.embcache.format(icam), 'r')['embs'] for icam in args.cams] 84 | else: 85 | embs_caches = [None]*len(args.cams) 86 | 87 | # ===Tracking fun begins: iterate over frames=== 88 | # TODO: global time (duke) 89 | for curr_frame in range(args.t0, args.t1+1): 90 | print("\rFrame {}, {} matched/missed/init/total tracks, {} total seen".format(curr_frame, ', '.join(map(n_active_tracks, track_lists)), sum(map(len, track_lists))), end='', flush=True) 91 | net.tick(curr_frame) 92 | 93 | for icam, track_list, embs_cache in zip(args.cams, track_lists, embs_caches): 94 | net.fake_camera(icam) 95 | 96 | image_getter = lambda: get_image(args.basedir, icam, curr_frame) 97 | 98 | # Either embed the image, or load embedding from cache. 99 | if embs_cache is not None: 100 | image_embedding = np.array(embs_cache[curr_frame-127720]) # That's where the cache starts! 101 | else: 102 | image_embedding = net.embed_images([image_getter()])[0] 103 | 104 | 105 | # ===visualization=== 106 | # First, plot what data we have before doing anything. 107 | if shall_vis(args, curr_frame): 108 | #fig, axes = plt.subplots(3, 2, sharex=True, sharey=True, figsize=(20,12)) 109 | #(ax_tl, ax_tr), (ax_ml, ax_mr), (ax_bl, ax_br) = axes 110 | fig, axes = plt.subplots(2, 2, sharex=True, sharey=True, figsize=(20,12)) 111 | (ax_ml, ax_mr), (ax_bl, ax_br) = axes 112 | axes = axes.flatten() 113 | 114 | for ax in axes: 115 | ax.imshow(image_getter(), extent=[0, SEQ_SHAPE[1], SEQ_SHAPE[0], 0]) 116 | 117 | # plot (active) tracks 118 | #ax_tl.set_title('Raw Personness') 119 | #ax_tr.set_title('Filtered Personness') 120 | ax_ml.set_title('Prior') 121 | ax_mr.set_title('All ID-specific') 122 | ax_bl.set_title('Posterior') 123 | ax_br.set_title('All Tracks') 124 | # ===/visualization=== 125 | 126 | 127 | ### A) update existing tracks 128 | for itracker, track in enumerate(track_list): 129 | # ---PREDICT--- 130 | track.track_predict() 131 | if shall_vis(args, curr_frame): 132 | track.plot_pred_heatmap(ax_ml) 133 | 134 | # ---SEARCH--- 135 | id_distmap = net.search_person(image_embedding, track.embedding, T=1, 136 | fake_track_id=track.track_id) # Unused by real net. 137 | # FIXME: should be image.shape, or at least use scale-factor. 138 | id_distmap = net.fix_shape(id_distmap, (1080//2, 1920//2), STATE_SHAPE, fill_value=1/np.prod(STATE_SHAPE)) 139 | id_heatmap = lib.softmin(id_distmap, T=1) 140 | #id_heatmap /= np.sum(id_heatmap) 141 | 142 | # ---UPDATE--- 143 | track.track_update(id_heatmap, id_distmap, curr_frame, image_getter) 144 | 145 | if shall_vis(args, curr_frame): 146 | track.plot_id_heatmap(ax_mr) 147 | 148 | ### B) get new tracks from general heatmap 149 | viz_per_cam_personnesses = [] 150 | 151 | #known_embs = [track.embedding for track in track_lists[icam-1]] 152 | #personness = net.clear_known(image_personnesses[icam-1], image_embeddings[icam-1], known_embs=known_embs) 153 | #personness = net.fix_shape(personness, images[icam-1].shape, STATE_SHAPE, fill_value=0) 154 | #viz_per_cam_personnesses.append(personness) 155 | 156 | # B.1) COMMENT IN FOR SEMI-FAKE 157 | # TODO: Make semi-fake by generating heatmap and clearing out known_embs 158 | # TODO: use image instead of None for real one here 159 | for (new_heatmap, init_pose), new_id in net.personness(None, known_embs=None, return_pose=True): 160 | # TODO: get correct track_id (loop heatmap, instead of function call?# ) 161 | # TODO: get id_heatmap of that guy for init_heatmap 162 | # Don't fix shape yet, cuz we don't emulate the avg-pool shape screw-up. 163 | #new_heatmap = net.fix_shape(new_heatmap, images[icam-1].shape, STATE_SHAPE, fill_value=0) 164 | #init_pose = lib.argmax2d_xy(new_heatmap) 165 | new_track = Track(net.embed_crops, 166 | curr_frame, init_pose, image_getter(), track_id=new_id, 167 | state_shape=STATE_SHAPE, state_pad=STATE_PADDING, output_shape=SEQ_SHAPE, 168 | dist_thresh=args.dist_thresh, entropy_thresh=args.ent_thresh, 169 | unmiss_thresh=args.unmiss_thresh, delete_thresh=args.delete_thresh, 170 | maxlife=args.maxlife, tp_hack=args.tp_hack, 171 | debug_out_dir=debug_dir) 172 | new_track.init_heatmap(new_heatmap) 173 | #new_track.init_heatmap(np.full(STATE_SHAPE, 1/np.prod(STATE_SHAPE))) 174 | track_list.append(new_track) 175 | 176 | # B.2) REAL NEWS 177 | # TODO: Missing non-max suppression 178 | # for y_idx, x_idx in zip(*np.where(personness>1.5)): 179 | # init_pose = [y_idx, x_idx] 180 | # new_track = Track(net.embed_crop, SEQ_DT, 181 | # curr_frame, init_pose, images[icam-1], track_id=track_id, 182 | # state_shape=STATE_SHAPE, output_shape=SEQ_SHAPE, 183 | # debug_out_dir=debug_dir) 184 | 185 | # # Embed around the initial pose and compute an initial heatmap. 186 | # id_heatmap = net.search_person(image_embeddings[icam-1], new_track.embedding) 187 | # id_heatmap = net.fix_shape(id_heatmap, images[icam-1].shape, STATE_SHAPE, fill_value=0) 188 | # new_track.init_heatmap(id_heatmap) 189 | # track_id += 1 190 | # track_list.append(new_track) 191 | 192 | if shall_vis(args, curr_frame): 193 | for track in track_list: 194 | track.plot_pos_heatmap(ax_bl) 195 | track.plot_track(ax_br, plot_past_trajectory=True, time_scale=args.vis) 196 | 197 | for ax in axes: 198 | # TODO: Flex 199 | ax.set_adjustable('box-forced') 200 | ax.set_xlim(0, SEQ_SHAPE[1]) 201 | ax.set_ylim(SEQ_SHAPE[0], 0) 202 | fig.savefig(pjoin(args.outdir, 'camera{}/res_img_{:06d}.jpg'.format(icam, curr_frame)), 203 | quality=80, bbox_inches='tight', pad_inches=0.2) 204 | plt.close() 205 | 206 | ### C) further track-management 207 | # delete tracks marked as 'deleted' in this tracking cycle #TODO: manage in other list for re-id 208 | track_list[:] = [i for i in track_list if i.status != 'deleted'] 209 | 210 | 211 | # ==evaluation=== 212 | with open(eval_path, 'a') as eval_file: 213 | for icam, track_list in zip(args.cams, track_lists): 214 | for track in track_list: 215 | track_eval_line = track.get_track_eval_line(cid=icam, frame=curr_frame) 216 | eval_file.write('{} {} {} {} {} {} {} {} {}\n'.format(*track_eval_line)) 217 | 218 | global g_frames 219 | g_frames += 1 220 | 221 | 222 | # Heavily adapted and fixed from http://robotics.usc.edu/~ampereir/wordpress/?p=626 223 | def savefig(fname, fig=None, orig_size=None, **kw): 224 | if fig is None: 225 | fig = plt.gcf() 226 | fig.patch.set_alpha(0) 227 | 228 | w, h = fig.get_size_inches() 229 | if orig_size is not None: # Aspect ratio scaling if required 230 | fw, fh = w, h 231 | w, h = orig_size 232 | fig.set_size_inches((fw, (fw/w)*h)) 233 | fig.set_dpi((fw/w)*fig.get_dpi()) 234 | 235 | ax = fig.gca() 236 | ax.set_frame_on(False) 237 | ax.set_xticks([]); ax.set_yticks([]) 238 | ax.set_axis_off() 239 | #ax.set_xlim(0, w); ax.set_ylim(h, 0) 240 | fig.savefig(fname, transparent=True, bbox_inches='tight', pad_inches=0, **kw) 241 | 242 | 243 | if __name__ == '__main__': 244 | 245 | parser = argparse.ArgumentParser(description='2D tracker test.') 246 | parser.add_argument('--basedir', nargs='?', default='/work/breuers/dukeMTMC/', 247 | help='Path to `train` folder of 2DMOT2015.') 248 | parser.add_argument('--outdir', nargs='?', default='/home/breuers/results/duke_mtmc/', 249 | help='Where to store generated output. Only needed if `--vis` is also passed.') 250 | parser.add_argument('--model', default='lunet2c', 251 | help='Name of the model to load. Corresponds to module names in lib/models. Or `fake`') 252 | parser.add_argument('--weights', default='/work/breuers/dukeMTMC/models/lunet2c-noscale-nobg-2to32-aug.pkl', 253 | help='Name of the weights to load for the model (path to .pkl file).') 254 | parser.add_argument('--t0', default=127720, type=int, 255 | help='Time of first frame.') 256 | parser.add_argument('--t1', default=187540, type=int, 257 | help='Time of last frame, inclusive.') 258 | parser.add_argument('--large_gpu', action='store_true', 259 | help='Large GPU can forward more at once.') 260 | parser.add_argument('--vis', default=0, type=int, 261 | help='Generate and save visualization of the results, every X frame.') 262 | parser.add_argument('--debug', action='store_true', 263 | help='Generate extra many debugging outputs (in outdir).') 264 | parser.add_argument('--cams', default='1,2,3,4,5,6,7,8', 265 | help='Array of cameras numbers (1-8) to consider.') 266 | parser.add_argument('--embcache', 267 | help='Optional path to embeddings-cache file for speeding things up. Put a `{}` as placeholder for camera-number.') 268 | parser.add_argument('--dist_thresh', default=7, type=float, 269 | help='Distance threshold to evaluate measurment certainty.') 270 | parser.add_argument('--ent_thresh', default=0.1, type=float, 271 | help='Entropy threshold to evaluate measurment certainty.') 272 | parser.add_argument('--maxlife', type=int) 273 | parser.add_argument('--tp_hack', type=float) 274 | parser.add_argument('--unmiss_thresh', type=int, default=2) 275 | parser.add_argument('--delete_thresh', type=int, default=90) 276 | args = parser.parse_args() 277 | args.cams = eval('[' + args.cams + ']') 278 | print(args) 279 | 280 | # This is all for faking the network. 281 | if args.model == 'fake': 282 | net = FakeNeuralNewsNetwork(lib.load_trainval(pjoin(args.basedir, 'ground_truth', 'trainval.mat'), time_range=[args.t0, args.t1])) 283 | else: 284 | #net = RealNews( 285 | net = SemiFakeNews( 286 | model=args.model, 287 | weights=args.weights, 288 | input_scale_factor=1.0, 289 | fake_dets=lib.load_trainval(pjoin(args.basedir, 'ground_truth', 'trainval.mat'), time_range=[args.t0, args.t1]), 290 | fake_shape=STATE_SHAPE, 291 | ) 292 | 293 | # Prepare output dirs 294 | for icam in args.cams: 295 | makedirs(pjoin(args.outdir, 'camera{}'.format(icam)), exist_ok=True) 296 | makedirs(pjoin(args.outdir, 'results'), exist_ok=True) 297 | 298 | tstart = time.time() 299 | try: 300 | main(net, args) 301 | except KeyboardInterrupt: 302 | print() 303 | 304 | print('FPS: {:.3f}'.format(g_frames / (time.time() - tstart))) 305 | -------------------------------------------------------------------------------- /lib/__init__.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import json 3 | import os 4 | from os.path import join as pjoin 5 | 6 | # Only for loading annotations 7 | import h5py 8 | from scipy.io import loadmat 9 | 10 | from scipy.stats import multivariate_normal 11 | from scipy import signal 12 | import matplotlib.pyplot as plt 13 | from matplotlib.colors import ListedColormap 14 | 15 | try: 16 | from functools import lru_cache # Needs at least Python 3.2 17 | except ImportError: 18 | def lru_cache(fn, *a, **kw): return fn 19 | 20 | 21 | START_TIMES = [5543, 3607, 27244, 31182, 1, 22402, 18968, 46766] 22 | TRAIN_START, TRAIN_END = 49700, 227540 23 | 24 | 25 | ############################################################################### 26 | # Generic utilities 27 | 28 | 29 | def scale_shape(shape, factors): 30 | try: 31 | len(factors) 32 | except TypeError: 33 | # It's a number 34 | factors = (factors, factors) 35 | 36 | return int(shape[0]*factors[0]), int(shape[1]*factors[1]) 37 | 38 | 39 | def argmax2d_xy(arr, smooth=None): 40 | if smooth is not None: 41 | sy, sx = smooth 42 | arr = cv2.GaussianBlur(arr, (sx,sy), 0, borderType=cv2.BORDER_CONSTANT) 43 | 44 | idx = np.unravel_index(arr.argmax(), arr.shape) 45 | return np.array([idx[1], idx[0]]) 46 | 47 | 48 | def expected_xy(p, thresh=None, magic_thresh=None): 49 | if magic_thresh is not None: # Some weak-ass heuristic 50 | T = magic_thresh 51 | 52 | # And reduce until we can afford it. 53 | for _ in range(999): # Avoids infinite loop. 54 | thresh = T/np.prod(p.shape) 55 | if np.sum(thresh <= p): 56 | break 57 | T = 1 + (T-1)/2 58 | else: 59 | print("Warning: expected_xy ran out of steps") 60 | 61 | if thresh is not None: 62 | p = np.array(p) 63 | p[p < thresh] = 0 64 | p /= np.clip(np.sum(p), 1e-5, 1) # Avoid NaNs 65 | 66 | return np.sum(np.mgrid[:p.shape[0],:p.shape[1]] * p, axis=(1,2))[::-1] 67 | 68 | 69 | def softmax(x, T=1): 70 | x = x - np.max(x) 71 | eh = np.exp(x/T) 72 | return eh / np.sum(eh) 73 | 74 | 75 | def softmin(x, T=1): 76 | return softmax(-x, T=T) 77 | 78 | 79 | def entropy(x): 80 | return -np.sum(x*np.log2(np.clip(x, 1e-14, 1))) 81 | 82 | 83 | def entropy_avg(x): 84 | return -np.mean(x*np.log2(np.clip(x, 1e-14, 1))) 85 | 86 | 87 | def entropy_score(x): 88 | """ Returns a score between 0 and 1 directly proportional to the entropy. 89 | It is exactly 0 for (near) uniform distributions, and 1 for single peaks. 90 | """ 91 | e0 = entropy(softmax(np.zeros_like(x))) 92 | return (e0 - entropy(x))/e0 93 | 94 | 95 | def entropy_score_avg(x): 96 | """ Returns a score between 0 and 1 directly proportional to the entropy. 97 | It is exactly 0 for (near) uniform distributions, and 1 for single peaks. 98 | """ 99 | e0 = entropy_avg(np.full_like(x, 1/np.prod(x.shape))) 100 | x = np.array(x) 101 | x[x < 2/np.prod(x.shape)] = 0 102 | x /= np.sum(x) 103 | return (e0 - entropy_avg(x))/e0 104 | 105 | 106 | def xent(p, q): 107 | return -np.sum(p*np.log2(np.clip(q, 1e-14, 1))) 108 | 109 | 110 | def xent_avg(p, q): 111 | return -np.mean(p*np.log2(np.clip(q, 1e-14, 1))) 112 | 113 | 114 | def my_choice(candidates, n): 115 | return np.random.choice(candidates, n, len(candidates) < n) 116 | 117 | 118 | def randin(lo, hi): 119 | return lo + np.random.rand()*(hi-lo) 120 | 121 | 122 | def ramp(e, e0, v0, e1, v1): 123 | """ 124 | Return `v0` until `e` reaches `e0`, then linearly interpolate 125 | to `v1` when `e` reaches `e1` and return `v1` thereafter. 126 | 127 | Copyright (C) 2017 Lucas Beyer - http://lucasb.eyer.be =) 128 | """ 129 | if e < e0: 130 | return v0 131 | elif e < e1: 132 | return v0 + (v1-v0)*(e-e0)/(e1-e0) 133 | else: 134 | return v1 135 | 136 | 137 | def expdec(e, e0, v0, e1, v1): 138 | """ 139 | Return `v0` until `e` reaches `e0`, then exponentially decay 140 | to `v1` when `e` reaches `e1` and return `v1` thereafter. 141 | 142 | Copyright (C) 2017 Lucas Beyer - http://lucasb.eyer.be =) 143 | """ 144 | if e < e0: 145 | return v0 146 | elif e < e1: 147 | return v0 * (v1/v0)**((e-e0)/(e1-e0)) 148 | else: 149 | return v1 150 | 151 | 152 | def sane_listdir(where, ext='', sortkey=None): 153 | """ 154 | Intended for internal use. 155 | Like `os.listdir`, but: 156 | - Doesn't include hidden files, 157 | - Always returns results in a sorted order (pass `sortkey=int` for numeric sort), 158 | - Optionally only return entries whose name ends in `ext`. 159 | """ 160 | return sorted((i for i in os.listdir(where) if not i.startswith('.') and i.endswith(ext)), key=sortkey) 161 | 162 | 163 | def img2df(img, shape=None): 164 | """ 165 | Convert raw images into what's needed by DeepFried. 166 | This means: BGR->RGB, HWC->CHW and [0,255]->[0.0,1.0] 167 | 168 | Note that `shape` is (H,W). 169 | """ 170 | img = resize_img(img, shape=shape) 171 | img = np.rollaxis(img, 2, 0) # HWC to CHW 172 | img = img.astype(np.float32) / 255.0 173 | return img 174 | 175 | 176 | def gauss2d_xy(cov, nstd=2, mean=[0,0]): 177 | """ 178 | guaranteed to return filter of odd shape which also keeps probabilities as probabilities. 179 | """ 180 | sx, sy = np.sqrt(cov[0][0]), np.sqrt(cov[1][1]) 181 | dx = max(1, round(nstd*sx + abs(mean[0]))) 182 | dy = max(1, round(nstd*sy + abs(mean[1]))) 183 | x, y = np.mgrid[-dy:dy+1:1, -dx:dx+1:1] # This formulation ensures odd sizes. 184 | pos = np.dstack((y, x)) 185 | rv = multivariate_normal(mean, cov) 186 | filt = rv.pdf(pos) 187 | return filt/np.sum(filt) # Make sure it's a probability-preserving 188 | 189 | 190 | def paste_into_middle_2d(x, out_shape=None, fill_value=0, out=None): 191 | if out is None: 192 | out = np.full(out_shape, fill_value, dtype=x.dtype) 193 | 194 | out_shape = out.shape 195 | 196 | if x.shape == out_shape: 197 | return np.array(x) 198 | 199 | h, w = x.shape 200 | H, W = out_shape 201 | dy_out, dx_out = max(0, (H-h)/2), max(0, (W-w)/2) 202 | dy_in, dx_in = max(0, (h-H)/2), max(0, (w-W)/2) 203 | #assert 0 <= dy and 0 <= dx, "Something wrong with shape-fixing! {} = ({}-{})//2 and {} = ({}-{})//2".format(dy, H, h, dx, W, w) 204 | 205 | # Stick to bottom on ambiguity 206 | #out[int(dy_out+0.5):H-int(dy_out),int(dx_out+0.5):W-int(dx_out)] = x[int(dy_in+0.5):h-int(dy_in),int(dx_in+0.5):w-int(dx_in)] 207 | 208 | # Stick to top on ambiguity 209 | out[int(dy_out):H-int(dy_out+0.5),int(dx_out):W-int(dx_out+0.5)] = x[int(dy_in):h-int(dy_in+0.5),int(dx_in):w-int(dx_in+0.5)] 210 | 211 | # Doesn't work correctly on perfect ambiguity. 212 | #out[round(dy_out):H-round(dy_out),round(dx_out):W-round(dx_out)] = x[round(dy_in):h-round(dy_in),round(dx_in):w-round(dx_in)] 213 | 214 | return out 215 | 216 | 217 | ############################################################################### 218 | # Video handling, only with OpenCV 219 | 220 | try: 221 | import cv2 222 | 223 | 224 | def resize_img(img, shape=None, interp=None, is_chw=False): 225 | if shape is None: 226 | return np.array(img) 227 | 228 | if interp is None: 229 | interp = cv2.INTER_AREA 230 | elif interp is 'bicubic': 231 | interp = cv2.INTER_CUBIC 232 | else: 233 | raise NotImplementedError("TODO: Interpolation {} in OpenCV".format(interp)) 234 | 235 | if is_chw: 236 | img = np.rollaxis(img, 0, 3) # CHW to HWC 237 | 238 | img = cv2.resize(img, (shape[1], shape[0]), interpolation=interp) 239 | 240 | if is_chw: 241 | img = np.rollaxis(img, 2, 0) # HWC to CHW 242 | 243 | return img 244 | 245 | 246 | def resize_map(img, shape, interp='bicubic'): 247 | return resize_img(img, shape, interp) 248 | 249 | 250 | def imread(fname): 251 | f = cv2.imread(fname) 252 | if f is None: 253 | raise ValueError("Couldn't load file {}".format(fname)) 254 | return f[:,:,::-1] 255 | 256 | 257 | def imwrite(fname, img): 258 | cv2.imwrite(fname, img[:,:,::-1]) 259 | 260 | 261 | def convolve_edge_same(image, filt): 262 | # 64F is actually faster than 32?! 263 | return cv2.filter2D(image, cv2.CV_64F, filt, borderType=cv2.BORDER_REPLICATE) 264 | 265 | 266 | def convolve_edge_zeropad(image, filt): 267 | dx1, dx2 = filt.shape[1]//2, filt.shape[1]//2 268 | dy1, dy2 = filt.shape[0]//2, filt.shape[0]//2 269 | x = cv2.copyMakeBorder(image, dy1, dy2, dx1, dx2, cv2.BORDER_CONSTANT) 270 | x = cv2.filter2D(x, -1, filt) 271 | return x[dy1:-dy2,dx1:-dx2] 272 | 273 | 274 | def video_or_open(video): 275 | # Because can't access cv2.VideoCapture type (only function exposed) 276 | if type(video).__name__ == 'VideoCapture': 277 | return video 278 | else: 279 | return cv2.VideoCapture(video) 280 | 281 | 282 | def vidframes(video): 283 | return int(video_or_open(video).get(cv2.CAP_PROP_FRAME_COUNT)) 284 | 285 | 286 | def itervid(video): 287 | video = video_or_open(video) 288 | 289 | while True: 290 | good, img = video.read() 291 | 292 | if not good: 293 | return 294 | 295 | yield img 296 | 297 | 298 | def vid2tensor(video, imgproc=lambda x: x, progress=None): 299 | video = video_or_open(video) 300 | 301 | T = vidframes(video) 302 | vid = None 303 | 304 | for t, img in enumerate(itervid(video)): 305 | img = imgproc(img) 306 | 307 | if vid is None: 308 | vid = np.empty((T,) + img.shape, img.dtype) 309 | 310 | vid[t] = img 311 | 312 | if progress is not None: 313 | progress(t, T) 314 | 315 | return vid 316 | 317 | 318 | def total_frames(basedir, ext='.MTS', subsample=1): 319 | T = 0 320 | for f in sane_listdir(basedir, ext=ext): 321 | T += vidframes(pjoin(basedir, f))//subsample 322 | 323 | return T 324 | 325 | 326 | except ImportError: 327 | import scipy 328 | 329 | 330 | def resize_img(img, shape=None, interp='bilinear'): 331 | if shape is None: 332 | return np.array(img) 333 | 334 | return scipy.misc.imresize(img, shape, interp=interp, mode='RGB') 335 | 336 | 337 | def resize_map(img, shape, interp='bicubic'): 338 | return scipy.misc.imresize(img, shape, interp=interp, mode='F') 339 | 340 | 341 | def imwrite(fname, img): 342 | scipy.misc.imsave(fname, img) 343 | 344 | 345 | def convolve_edge_same(image, filt): 346 | pad_width = int(filt.shape[1] / 2) 347 | pad_height = int(filt.shape[0] / 2) 348 | out_img = np.pad(image, ((pad_height, pad_height), (pad_width, pad_width)), mode='edge') 349 | out_img = signal.convolve2d(out_img, filt, mode='valid', boundary='fill', fillvalue=0) 350 | return out_img 351 | 352 | 353 | ############################################################################### 354 | # Box utils 355 | # Unittest see notebook 356 | 357 | 358 | def intersect(box1, box2): 359 | l1, t1, w1, h1 = box1 360 | l2, t2, w2, h2 = box2 361 | 362 | l3 = max(l1, l2) 363 | t3 = max(t1, t2) 364 | return l3, t3, min(l1+w1, l2+w2)-l3, min(t1+h1, t2+h2)-t3 365 | 366 | 367 | def iou(box1, box2): 368 | l1, t1, w1, h1 = box1 369 | l2, t2, w2, h2 = box2 370 | 371 | _, _, wi, hi = intersect(box1, box2) 372 | 373 | # They (practically) don't intersect. 374 | if wi < 1e-5 or hi < 1e-5: 375 | return 0.0 376 | 377 | i = wi*hi 378 | u = w1*h1 + w2*h2 - i 379 | return i/u 380 | 381 | 382 | def max_iou(r, others): 383 | if len(others) == 0: 384 | return 0 385 | return max(iou(r, o) for o in others) 386 | 387 | 388 | def argmax_iou(r, others): 389 | return np.argmax([iou(r, o) for o in others]) 390 | 391 | 392 | def sample_around(boxes, size, imsize=(1,1), nstd=3): 393 | H, W = imsize 394 | h, w = size 395 | 396 | # pick one box 397 | ml, mt, mw, mh = boxes[np.random.choice(len(boxes))] 398 | 399 | # Go around it but stay in image-space! 400 | #rand = np.random.randint 401 | #rand = lambda m, M: m + np.random.rand()*(M-m) 402 | rand = lambda m, M: np.clip((m+M)/2 + np.random.randn()*(M-m)/(2*nstd), m, M) 403 | l = rand(max(ml-w, 0), min(ml+mw, W-w)) 404 | t = rand(max(mt-h, 0), min(mt+mh, H-h)) 405 | return l, t, w, h 406 | 407 | 408 | def sample_lonely(boxes, size, region=(0,0,1,1), thresh=1e-2): 409 | # NOTE: size is HW whereas boxes and region are LTWH 410 | # TODO: make smarter? 411 | H, W = size 412 | xmin, ymin = region[0], region[1] 413 | xmax, ymax = region[2] - W, region[3] - H 414 | x = xmin + (xmax-xmin)*np.random.rand() 415 | y = ymin + (ymax-ymin)*np.random.rand() 416 | while thresh < max_iou((x, y, W, H), boxes): 417 | x = xmin + (xmax-xmin)*np.random.rand() 418 | y = ymin + (ymax-ymin)*np.random.rand() 419 | return x, y, W, H 420 | 421 | 422 | def stick_to_bounds(box, bounds=(0,0,1,1)): 423 | """ 424 | Sticks the given `box`, which is a `(l, t, w, h)`-tuple to the given bounds 425 | which are also expressed as `(l, t, w, h)`-tuple. 426 | """ 427 | if bounds is None: 428 | return box 429 | 430 | l, t, w, h = box 431 | bl, bt, bw, bh = bounds 432 | 433 | l += max(bl - l, 0) 434 | l -= max((l+w) - (bl+bw), 0) 435 | 436 | t += max(bt - t, 0) 437 | t -= max((t+h) - (bt+bh), 0) 438 | 439 | return l, t, w, h 440 | 441 | 442 | def box_centered(cx, cy, h, w, bounds=(0, 0, 1, 1)): 443 | """ 444 | Returns a box of size `(h,w)` centered around `(cy,cx)`, but sticked to `bounds`. 445 | """ 446 | return stick_to_bounds((cx - w / 2, cy - h / 2, w, h), bounds) 447 | 448 | 449 | def rebox_centered(box, h, w, bounds=(0,0,1,1)): 450 | """ 451 | Returns a new box of size `(h,w)` centered around the same center as the 452 | given `box`, which is a `(l,t,w,h)`-tuple, and sticked to `bounds`. 453 | """ 454 | # box is l t w h 455 | # size is h w 456 | l, t, bw, bh = box 457 | cy, cx = t + bh/2, l + bw/2 458 | return stick_to_bounds((cx - w/2, cy - h/2, w, h), bounds) 459 | 460 | 461 | def wiggle_box(box, pct_move=None, factor_size=None): 462 | """ 463 | As usual, box is `(l,t,w,h)`. 464 | `pct_move` is max change, in % of boxsize: 0 means no change allowed, 1 means full box-size change allowed. 465 | `factor_size` is maximum factor of up/down scaling, so 2 means up to half/double bbox-size. 466 | """ 467 | l, t, w, h = box 468 | 469 | if pct_move is not None: 470 | try: 471 | len(pct_move) 472 | except TypeError: 473 | pct_move = (pct_move, pct_move) 474 | l += randin(-w*pct_move[1], w*pct_move[1]) 475 | t += randin(-h*pct_move[0], h*pct_move[0]) 476 | 477 | nw, nh = w, h 478 | if factor_size is not None: 479 | sf = randin(1, factor_size) 480 | if np.random.rand() < 0.5: 481 | sf = 1/sf 482 | nw *= sf 483 | nh *= sf 484 | 485 | return rebox_centered((l, t, w, h), nh, nw) 486 | 487 | 488 | def cutout_rel_chw(img, box): 489 | """ 490 | Returns a cut-out of `img` (which is CHW) at the *relative* `box` location. 491 | `box` is a `(l,t,w,h)`-tuple as usual, but in [0,1]-coordinates relative to 492 | the image size. 493 | """ 494 | _, H, W = img.shape 495 | l, t, w, h = box 496 | return img[:,int(t*H):int(t*H)+int(h*H) 497 | ,int(l*W):int(l*W)+int(w*W)] 498 | 499 | 500 | def cutout_abs_hwc(img, box): 501 | """ 502 | Returns a cut-out of `img` (which is HWC) at the *absolute* `box` location. 503 | `box` is a `(l,t,w,h)`-tuple as usual, in absolute coordinates. 504 | """ 505 | l, t, w, h = map(int, box) 506 | return img[t:t+h,l:l+w] 507 | 508 | 509 | def box_rel2abs(box,h=1080,w=1920): 510 | return [box[0]*w, box[1]*h, box[2]*w, box[3]*h] 511 | 512 | 513 | def box_center_xy(box): 514 | return [box[0] + box[2] / 2., box[1] + box[3] / 2.] 515 | 516 | 517 | def ltrb_to_box(ltrb): 518 | l, t, r, b = ltrb[0], ltrb[1], ltrb[2], ltrb[3] 519 | return [l, t, r-l, b-t] 520 | 521 | ############################################################################### 522 | # Frame-switching 523 | 524 | 525 | def loc2glob(loc, cam): 526 | # Compute global frame numbers once. 527 | offset = START_TIMES[cam-1] - 1 528 | return loc + offset 529 | 530 | def glob2loc(glob, cam): 531 | # Compute global frame numbers once. 532 | offset = START_TIMES[cam-1] - 1 533 | return glob - offset 534 | 535 | assert loc2glob(1, 1) == 5543 536 | assert glob2loc(loc2glob(2,1),1) == 2 537 | 538 | 539 | 540 | ############################################################################### 541 | # Data-handling 542 | 543 | 544 | def slice_all(f, s): 545 | return {k: v[s] for k,v in f.items()} 546 | 547 | 548 | def load_trainval(fname, time_range=[TRAIN_START, TRAIN_END]): 549 | try: 550 | m = loadmat(fname)['trainData'] 551 | except NotImplementedError: 552 | with h5py.File(fname, 'r') as f: 553 | m = np.array(f['trainData']).T 554 | 555 | data = { 556 | 'Cams': np.array(m[:,0], dtype=int), 557 | 'TIDs': np.array(m[:,1], dtype=int), 558 | 'LFIDs': np.array(m[:,2], dtype=int), 559 | 'boxes': np.array(m[:,3:7], dtype=float), 560 | 'world': np.array(m[:,7:9]), 561 | 'feet': np.array(m[:,9:]), 562 | } 563 | 564 | # boxes are l t w h 565 | data['boxes'][:,0] /= 1920 566 | data['boxes'][:,1] /= 1080 567 | data['boxes'][:,2] /= 1920 568 | data['boxes'][:,3] /= 1080 569 | 570 | # Compute global frame numbers once. 571 | data['GFIDs'] = np.array(data['LFIDs']) 572 | for icam, t0 in zip(range(1,9), START_TIMES): 573 | data['GFIDs'][data['Cams'] == icam] += t0 - 1 574 | 575 | #return data 576 | return slice_all(data, (time_range[0] <= data['GFIDs']) & (data['GFIDs'] <= time_range[1])) 577 | 578 | 579 | def load_dat(basename): 580 | desc = json.load(open(basename + '.json', 'r')) 581 | dtype, shape = desc['dtype'], tuple(desc['shape']) 582 | Xm = np.memmap(basename, mode='r', dtype=dtype, shape=shape) 583 | Xa = np.ndarray.__new__(np.ndarray, dtype=dtype, shape=shape, buffer=Xm) 584 | return Xa 585 | 586 | ############################################################################### 587 | # Plotting 588 | def get_transparent_colormap(cmap=plt.cm.inferno): 589 | out_cmap = cmap(np.arange(cmap.N)) 590 | out_cmap[:, -1] = np.linspace(0, 1, cmap.N) 591 | out_cmap = ListedColormap(out_cmap) 592 | return out_cmap 593 | -------------------------------------------------------------------------------- /lib/dfext.py: -------------------------------------------------------------------------------- 1 | import DeepFried2 as df 2 | 3 | 4 | def resblock(chan_in, chan_out=None, chan_mid=None, stride=1, 5 | mkbn=lambda chan: df.BatchNormalization(chan, 0.95), 6 | mknl=lambda: df.ReLU()): 7 | chan_out = chan_out or chan_in 8 | chan_mid = chan_mid or chan_in 9 | return df.Sequential( 10 | df.RepeatInput( 11 | df.Sequential( 12 | mkbn(chan_in), mknl(), 13 | df.SpatialConvolutionCUDNN(chan_in, chan_mid, (3,3), border='same', stride=stride, init=df.init.prelu(), bias=False), 14 | mkbn(chan_mid), mknl(), 15 | df.SpatialConvolutionCUDNN(chan_mid, chan_out, (3,3), border='same', init=df.init.prelu()), 16 | ), 17 | df.Identity() if chan_in == chan_out else df.SpatialConvolutionCUDNN(chan_in, chan_out, (1,1), stride=stride) 18 | ), 19 | df.zoo.resnet.Add() 20 | ) 21 | 22 | 23 | def resblock2(chan_in, chan_out=None, chan_mid=None, stride=1, 24 | mkbn=lambda chan: df.BatchNormalization(chan, 0.95), 25 | mknl=lambda: df.ReLU()): 26 | chan_out = chan_out or chan_in 27 | chan_mid = chan_mid or chan_in 28 | 29 | identity_or_projection = df.Identity() 30 | if chan_in != chan_out: 31 | identity_or_projection = df.Sequential( 32 | mkbn(chan_in), mknl(), 33 | df.SpatialConvolutionCUDNN(chan_in, chan_out, (1,1), stride=stride, init=df.init.prelu()), 34 | ) 35 | 36 | return df.Sequential( 37 | df.RepeatInput( 38 | df.Sequential( 39 | mkbn(chan_in), mknl(), 40 | df.SpatialConvolutionCUDNN(chan_in, chan_mid, (3,3), border='same', stride=stride, init=df.init.prelu(), bias=False), 41 | mkbn(chan_mid), mknl(), 42 | df.SpatialConvolutionCUDNN(chan_mid, chan_out, (3,3), border='same', init=df.init.prelu()), 43 | ), 44 | identity_or_projection, 45 | ), 46 | df.zoo.resnet.Add() 47 | ) 48 | 49 | 50 | def resblock_bottle(chan_in, chan_out=None, chan_mid=None, stride=1, 51 | mkbn=lambda chan: df.BatchNormalization(chan, 0.95), 52 | mknl=lambda: df.ReLU()): 53 | chan_out = chan_out or chan_in 54 | chan_mid = chan_mid or chan_out//4 55 | return df.Sequential( 56 | df.RepeatInput( 57 | df.Sequential( 58 | mkbn(chan_in), mknl(), 59 | df.SpatialConvolutionCUDNN(chan_in, chan_mid, (1,1), stride=stride, init=df.init.prelu(), bias=False), 60 | 61 | mkbn(chan_mid), mknl(), 62 | df.SpatialConvolutionCUDNN(chan_mid, chan_mid, (3,3), border='same', init=df.init.prelu(), bias=False), 63 | 64 | mkbn(chan_mid), mknl(), 65 | df.SpatialConvolutionCUDNN(chan_mid, chan_out, (1,1), init=df.init.prelu()), 66 | ), 67 | df.Identity() if chan_in == chan_out else df.SpatialConvolutionCUDNN(chan_in, chan_out, (1,1), stride=stride) 68 | ), 69 | df.zoo.resnet.Add() 70 | ) 71 | 72 | 73 | def resblock_bottle2(chan_in, chan_out=None, chan_mid=None, stride=1, 74 | mkbn=lambda chan: df.BatchNormalization(chan, 0.95), 75 | mknl=lambda: df.ReLU()): 76 | chan_out = chan_out or chan_in 77 | chan_mid = chan_mid or chan_out//4 78 | 79 | identity_or_projection = df.Identity() 80 | if chan_in != chan_out: 81 | identity_or_projection = df.Sequential( 82 | mkbn(chan_in), mknl(), 83 | df.SpatialConvolutionCUDNN(chan_in, chan_out, (1,1), stride=stride, init=df.init.prelu()), 84 | ) 85 | 86 | return df.Sequential( 87 | df.RepeatInput( 88 | df.Sequential( 89 | mkbn(chan_in), mknl(), 90 | df.SpatialConvolutionCUDNN(chan_in, chan_mid, (1,1), init=df.init.prelu(), bias=False), 91 | 92 | mkbn(chan_mid), mknl(), 93 | df.SpatialConvolutionCUDNN(chan_mid, chan_mid, (3,3), stride=stride, border='same', init=df.init.prelu(), bias=False), 94 | 95 | mkbn(chan_mid), mknl(), 96 | df.SpatialConvolutionCUDNN(chan_mid, chan_out, (1,1), init=df.init.prelu()), 97 | ), 98 | identity_or_projection, 99 | ), 100 | df.zoo.resnet.Add() 101 | ) 102 | 103 | 104 | def repeat_apply_merge(modules, merger, *tail): 105 | return df.Sequential(df.RepeatInput(*modules), merger, *tail) 106 | 107 | 108 | def nextblock_a(chan_in, cardin, chan_out=None, chan_mid=None, stride=1, 109 | mkbn=lambda chan: df.BatchNormalization(chan, 0.95), 110 | mknl=lambda: df.ReLU()): 111 | chan_out = chan_out or chan_in 112 | chan_mid = chan_mid or chan_out//cardin//2 113 | 114 | identity_or_projection = df.Identity() 115 | if chan_in != chan_out: 116 | identity_or_projection = df.Sequential( 117 | df.SpatialConvolutionCUDNN(chan_in, chan_out, (1,1), stride=stride, init=df.init.prelu()), 118 | mkbn(chan_out), 119 | ) 120 | 121 | return repeat_apply_merge([ 122 | repeat_apply_merge([ 123 | df.Sequential( 124 | df.SpatialConvolutionCUDNN(chan_in, chan_mid, (1,1), init=df.init.prelu(), bias=False), 125 | mkbn(chan_mid), mknl(), 126 | 127 | df.SpatialConvolutionCUDNN(chan_mid, chan_mid, (3,3), init=df.init.prelu(), bias=False, 128 | stride=stride, border='same'), 129 | mkbn(chan_mid), mknl(), 130 | 131 | df.SpatialConvolutionCUDNN(chan_mid, chan_out, (1,1), init=df.init.prelu(), bias=False), 132 | ) for _ in range(cardin) 133 | ], df.zoo.resnet.Add(), mkbn(chan_out)), 134 | identity_or_projection 135 | ], df.zoo.resnet.Add(), mknl()) 136 | 137 | 138 | def nextblock_b(chan_in, cardin, chan_out=None, chan_mid=None, stride=1, 139 | mkbn=lambda chan: df.BatchNormalization(chan, 0.95), 140 | mknl=lambda: df.ReLU()): 141 | chan_out = chan_out or chan_in 142 | chan_mid = chan_mid or chan_out//cardin//2 143 | 144 | identity_or_projection = df.Identity() 145 | if chan_in != chan_out: 146 | identity_or_projection = df.Sequential( 147 | df.SpatialConvolutionCUDNN(chan_in, chan_out, (1,1), stride=stride, init=df.init.prelu()), 148 | mkbn(chan_out), 149 | ) 150 | 151 | return repeat_apply_merge([ 152 | repeat_apply_merge([ 153 | df.Sequential( 154 | df.SpatialConvolutionCUDNN(chan_in, chan_mid, (1,1), init=df.init.prelu(), bias=False), 155 | mkbn(chan_mid), mknl(), 156 | 157 | df.SpatialConvolutionCUDNN(chan_mid, chan_mid, (3,3), init=df.init.prelu(), bias=False, 158 | stride=stride, border='same'), 159 | mkbn(chan_mid), mknl(), 160 | ) for _ in range(cardin) 161 | ], 162 | df.Concat(), 163 | df.SpatialConvolutionCUDNN(chan_mid*cardin, chan_out, (1,1), init=df.init.prelu(), bias=False), 164 | mkbn(chan_out) 165 | ), 166 | identity_or_projection 167 | ], df.zoo.resnet.Add(), mknl()) 168 | -------------------------------------------------------------------------------- /lib/models/__init__.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | 4 | def _raise_fn(ex): 5 | def fn(*a, **kw): 6 | raise ex 7 | return fn 8 | 9 | 10 | def add_defaults(net): 11 | #net.embed = lambda f, idxs: np.concatenate([net.forward(net.raw2df(get(f['X'], ib))) for ib in batched(128, idxs)]) 12 | if not hasattr(net, 'load'): 13 | net.load = lambda fname: net.__setstate__(pickle.load(open(fname, 'rb'))) 14 | return net 15 | -------------------------------------------------------------------------------- /lib/models/lunet2.py: -------------------------------------------------------------------------------- 1 | import DeepFried2 as df 2 | from .. import dfext 3 | 4 | 5 | def mknet(mkbn=lambda chan: df.BatchNormalization(chan, 0.95)): 6 | kw = dict(mkbn=mkbn) 7 | 8 | net = df.Sequential( 9 | # -> 128x48 10 | df.SpatialConvolutionCUDNN(3, 64, (7,7), border='same', bias=None), 11 | dfext.resblock(64, **kw), 12 | df.PoolingCUDNN((2,2)), # -> 64x24 13 | dfext.resblock(64, **kw), 14 | dfext.resblock(64, **kw), 15 | dfext.resblock(64, 96, **kw), 16 | df.PoolingCUDNN((2,2)), # -> 32x12 17 | dfext.resblock(96, **kw), 18 | dfext.resblock(96, **kw), 19 | df.PoolingCUDNN((2,2)), # -> 16x6 20 | dfext.resblock(96, **kw), 21 | dfext.resblock(96, **kw), 22 | dfext.resblock(96, 128, **kw), 23 | df.PoolingCUDNN((2,2)), # -> 8x3 24 | dfext.resblock(128, **kw), 25 | dfext.resblock(128, **kw), 26 | df.PoolingCUDNN((2,3)), # -> 4x1 27 | dfext.resblock(128, **kw), 28 | 29 | # Eq. to flatten + linear 30 | df.SpatialConvolutionCUDNN(128, 256, (4,1), bias=None), 31 | mkbn(256), df.ReLU(), 32 | 33 | df.StoreOut(df.SpatialConvolutionCUDNN(256, 128, (1,1))) 34 | ) 35 | 36 | net.emb_mod = net[-1] 37 | net.in_shape = (128, 48) 38 | net.scale_factor = (2*2*2*2*2, 2*2*2*2*3) 39 | 40 | print("Net has {:.2f}M params".format(df.utils.count_params(net)/1000/1000), flush=True) 41 | return net 42 | 43 | 44 | def add_piou(lunet2): 45 | newnet = lunet2[:-1] 46 | newnet.emb_mod = lunet2[-1] 47 | newnet.iou_mod = df.StoreOut(df.Sequential(df.SpatialConvolutionCUDNN(256, 1, (1,1)), df.Sigmoid())) 48 | newnet.add(df.RepeatInput(newnet.emb_mod, newnet.iou_mod)) 49 | 50 | newnet.embs_from_out = lambda out: out[0] 51 | newnet.ious_from_out = lambda out: out[1][:,0] # Also remove the first size-1 dimension. 52 | 53 | newnet.in_shape = lunet2.in_shape 54 | newnet.scale_factor = lunet2.scale_factor 55 | 56 | print("Added {:.2f}k params".format(df.utils.count_params(newnet.iou_mod)/1000), flush=True) 57 | return newnet 58 | -------------------------------------------------------------------------------- /lib/models/lunet2b.py: -------------------------------------------------------------------------------- 1 | import DeepFried2 as df 2 | from .. import dfext 3 | 4 | 5 | def mknet(mkbn=lambda chan: df.BatchNormalization(chan, 0.95), avg=True, initlast=df.init.xavier()): 6 | kw = dict(mkbn=mkbn) 7 | 8 | net = df.Sequential( 9 | # -> 128x48 10 | df.SpatialConvolutionCUDNN(3, 64, (7,7), border='same', bias=None), 11 | dfext.resblock2(64, **kw), 12 | df.PoolingCUDNN((2,2)), # -> 64x24 13 | dfext.resblock2(64, **kw), 14 | dfext.resblock2(64, **kw), 15 | dfext.resblock2(64, 96, **kw), 16 | df.PoolingCUDNN((2,2)), # -> 32x12 17 | dfext.resblock2(96, **kw), 18 | dfext.resblock2(96, **kw), 19 | dfext.resblock2(96, 128, **kw), 20 | df.PoolingCUDNN((2,2)), # -> 16x6 21 | dfext.resblock2(128, **kw), 22 | dfext.resblock2(128, **kw), 23 | dfext.resblock2(128, 192, **kw), 24 | df.PoolingCUDNN((2,2)), # -> 8x3 25 | dfext.resblock2(192, **kw), 26 | dfext.resblock2(192, **kw), 27 | ) 28 | 29 | if avg: 30 | net.add(dfext.resblock2(192, 256, **kw)) 31 | net.add(mkbn(256)) 32 | net.add(df.ReLU()) 33 | net.add(df.PoolingCUDNN((8,3), mode='avg')) # -> 1x1 34 | 35 | net.add(df.SpatialConvolutionCUDNN(256, 192, (1,1), bias=None, init=df.init.prelu())) 36 | net.add(mkbn(192)) 37 | net.add(df.ReLU()) 38 | else: 39 | net.add(df.PoolingCUDNN((2,3))) # -> 4x1 40 | net.add(dfext.resblock2(128, **kw)) 41 | net.add(mkbn(128)) 42 | net.add(df.ReLU()) 43 | net.add(df.SpatialConvolutionCUDNN(128, 256, (4,1), bias=None, init=df.init.prelu())) 44 | net.add(mkbn(256)) 45 | net.add(df.ReLU()) 46 | 47 | net.add(df.StoreOut(df.SpatialConvolutionCUDNN(256, 128, (1,1), init=initlast))) 48 | 49 | net.emb_mod = net[-1] 50 | net.in_shape = (128, 48) 51 | net.scale_factor = None # TODO (2*2*2*2*2, 2*2*2*2*3) 52 | 53 | print("Net has {:.2f}M params".format(df.utils.count_params(net)/1000/1000), flush=True) 54 | return net 55 | -------------------------------------------------------------------------------- /lib/models/lunet2c.py: -------------------------------------------------------------------------------- 1 | import DeepFried2 as df 2 | from .. import dfext 3 | 4 | 5 | def mknet(): 6 | net = df.Sequential( 7 | # -> 128x48 8 | df.SpatialConvolutionCUDNN(3, 128, (7,7), border='same', bias=None), 9 | df.BatchNormalization(128, 0.95), df.ReLU(), 10 | 11 | dfext.resblock_bottle(128), 12 | df.PoolingCUDNN((2,2)), # -> 64x24 13 | dfext.resblock_bottle(128), 14 | dfext.resblock_bottle(128), 15 | dfext.resblock_bottle(128, chan_out=256), 16 | df.PoolingCUDNN((2,2)), # -> 32x12 17 | dfext.resblock_bottle(256), 18 | dfext.resblock_bottle(256), 19 | df.PoolingCUDNN((2,2)), # -> 16x6 20 | dfext.resblock_bottle(256), 21 | dfext.resblock_bottle(256), 22 | dfext.resblock_bottle(256, chan_out=512), 23 | df.PoolingCUDNN((2,2)), # -> 8x3 24 | dfext.resblock_bottle(512), 25 | dfext.resblock_bottle(512), 26 | df.PoolingCUDNN((8,3), mode='avg'), 27 | df.SpatialConvolutionCUDNN(512, 256, (1,1), bias=None), 28 | df.BatchNormalization(256, 0.95), df.ReLU(), 29 | df.StoreOut(df.SpatialConvolutionCUDNN(256, 128, (1,1))), 30 | ) 31 | 32 | net.emb_mod = net[-1] 33 | net.in_shape = (128, 48) 34 | net.scale_factor = (2*2*2*2, 2*2*2*2) 35 | 36 | print("Net has {:.2f}M params".format(df.utils.count_params(net)/1000/1000), flush=True) 37 | return net 38 | 39 | 40 | def hires_shared_twin(net): 41 | new_net = net[:] 42 | 43 | assert isinstance(new_net.modules[-5], df.PoolingCUDNN) 44 | new_net.modules[-5] = df.PoolingCUDNN((8,3), mode='average_exc_pad', stride=(1,1), padding=(4,1)) 45 | 46 | return new_net 47 | 48 | 49 | class Restrict(df.Module): 50 | def symb_forward(self, x): 51 | return x[:,:,1:,1:] 52 | 53 | 54 | def ultrahires_shared_twin(net_hires): 55 | new_net = net_hires[:] 56 | 57 | assert isinstance(new_net.modules[-5], df.PoolingCUDNN) 58 | 59 | new_net.modules[4] = df.Sequential(df.PoolingCUDNN((2,2), stride=(1,1), padding=(1,1)), Restrict(), df.SpatialOverfeatRoll()) 60 | new_net.modules[8] = df.Sequential(df.PoolingCUDNN((2,2), stride=(1,1), padding=(1,1)), Restrict(), df.SpatialOverfeatRoll()) 61 | new_net.modules[11] = df.Sequential(df.PoolingCUDNN((2,2), stride=(1,1), padding=(1,1)), Restrict(), df.SpatialOverfeatRoll()) 62 | new_net.modules[15] = df.Sequential(df.PoolingCUDNN((2,2), stride=(1,1), padding=(1,1)), Restrict(), df.SpatialOverfeatRoll()) 63 | new_net.add(df.SpatialOverfeatUnroll()) 64 | new_net.add(df.SpatialOverfeatUnroll()) 65 | new_net.add(df.SpatialOverfeatUnroll()) 66 | new_net.add(df.SpatialOverfeatUnroll()) 67 | 68 | return new_net 69 | -------------------------------------------------------------------------------- /lib/models/lunext.py: -------------------------------------------------------------------------------- 1 | import DeepFried2 as df 2 | from .. import dfext 3 | 4 | 5 | def mknet(): 6 | net = df.Sequential( 7 | # -> 128x48 8 | df.SpatialConvolutionCUDNN(3, 128, (7,7), border='same', bias=None, init=df.init.prelu()), 9 | df.BatchNormalization(128, 0.95), df.ReLU(), 10 | 11 | dfext.nextblock_b(128, cardin=16, chan_mid=4), 12 | df.PoolingCUDNN((2,2)), # -> 64x24 13 | dfext.nextblock_b(128, cardin=16, chan_mid=4), 14 | dfext.nextblock_b(128, cardin=16, chan_mid=4), 15 | dfext.nextblock_b(128, cardin=16, chan_mid=4, chan_out=256), 16 | df.PoolingCUDNN((2,2)), # -> 32x12 17 | dfext.nextblock_b(256, cardin=16, chan_mid=8), 18 | dfext.nextblock_b(256, cardin=16, chan_mid=8), 19 | df.PoolingCUDNN((2,2)), # -> 16x6 20 | dfext.nextblock_b(256, cardin=16, chan_mid=8), 21 | dfext.nextblock_b(256, cardin=16, chan_mid=8), 22 | dfext.nextblock_b(256, cardin=16, chan_mid=8, chan_out=512), 23 | df.PoolingCUDNN((2,2)), # -> 8x3 24 | dfext.nextblock_b(512, cardin=16, chan_mid=16), 25 | dfext.nextblock_b(512, cardin=16, chan_mid=16), 26 | df.PoolingCUDNN((8,3), mode='avg'), 27 | df.SpatialConvolutionCUDNN(512, 256, (1,1), bias=None, init=df.init.prelu()), 28 | df.BatchNormalization(256, 0.95), df.ReLU(), 29 | df.StoreOut(df.SpatialConvolutionCUDNN(256, 128, (1,1))) 30 | ) 31 | 32 | net.emb_mod = net[-1] 33 | net.in_shape = (128, 48) 34 | net.scale_factor = None # TODO 35 | 36 | print("Net has {:.2f}M params".format(df.utils.count_params(net)/1000/1000), flush=True) 37 | return net 38 | -------------------------------------------------------------------------------- /neural.py: -------------------------------------------------------------------------------- 1 | from importlib import import_module 2 | 3 | import numpy as np 4 | import DeepFried2 as df 5 | 6 | import lib 7 | from lib.models import add_defaults 8 | from fakenews import FakeNeuralNewsNetwork 9 | 10 | 11 | class RealNews: 12 | def __init__(self, model, weights, scale_factor): 13 | self.scale_factor = scale_factor 14 | 15 | mod = import_module('lib.models.' + model) 16 | self.net = add_defaults(mod.add_piou(mod.mknet())) 17 | 18 | try: 19 | self.net.load(weights) 20 | except ValueError: 21 | print("!!!!!!!THE WEIGHTS YOU LOADED DON'T BELONG TO THE MODEL YOU'RE USING!!!!!!") 22 | raise 23 | 24 | self.net.evaluate() 25 | 26 | print("Precompiling network...", end='', flush=True) 27 | #self.net.forward(np.zeros((1,3) + self.net.in_shape, df.floatX)) 28 | self.net.forward(np.zeros((1,3,int(1080*scale_factor),int(1920*scale_factor)), df.floatX)) 29 | print("Done", flush=True) 30 | 31 | 32 | def tick(self, curr_frame): 33 | pass # Not needed for real network. 34 | 35 | 36 | def fake_camera(self, *fakea, **fakekw): 37 | pass # Note needed for real network. 38 | 39 | 40 | def embed_crop(self, crop, *fakea, **fakekw): 41 | assert (crop.shape[0]*self.scale_factor, crop.shape[1]*self.scale_factor) == self.net.in_shape 42 | X = lib.img2df(crop, shape=self.net.in_shape) 43 | return self.net.embs_from_out(self.net.forward(X[None]))[0,:,0,0] 44 | 45 | 46 | def embed_image(self, image): 47 | print("You better use `embed_and_personness_multi`, you lazy bastard") 48 | return self.embed_and_personness_multi([image])[0][0] 49 | 50 | 51 | def search_person(self, img_embs, person_emb, *fakea, **fakekw): 52 | # compute distance between embeddings and person's embedding. 53 | d = np.sqrt(np.sum((img_embs - person_emb[:,None,None])**2, axis=0)) 54 | 55 | # Convert distance to probability. 56 | # TODO: Might be better to fit a sigmoid or something. 57 | return lib.softmin(d) 58 | #return = 1/(0.01+d) 59 | 60 | 61 | def fix_shape(self, net_output, orig_shape, out_shape, fill_value=0): 62 | orig_shape = (orig_shape[0]*self.scale_factor, orig_shape[1]*self.scale_factor) 63 | 64 | # Scale to `out_shape` but keeping correct aspect ratio. 65 | h = int(self.net.scale_factor[0]/orig_shape[0]*net_output.shape[0]*out_shape[0]) 66 | w = int(self.net.scale_factor[1]/orig_shape[1]*net_output.shape[1]*out_shape[1]) 67 | scaled_out = lib.resize_map(net_output, (h, w)) 68 | 69 | # Paste into the middle. 70 | out = np.full(out_shape, fill_value, dtype=net_output.dtype) 71 | dy, dx = (out.shape[0]-h)//2, (out.shape[1]-w)//2 72 | 73 | # TODO: Is there a better way? 'cause :-0 fails. I guess do shape[0]-dx? 74 | if 0 < dy and 0 < dx: 75 | out[dy:-dy,dx:-dx] = scaled_out 76 | elif dx == 0: 77 | out[dy:-dy,:] = scaled_out 78 | elif dy == 0: 79 | out[:,dx:-dx] = scaled_out 80 | else: 81 | print("{} = ({}-{})//2".format(dy, out.shape[0], h)) 82 | print("{} = ({}-{})//2".format(dx, out.shape[1], w)) 83 | assert False, "Something wrong with shape-fixing, see above!" 84 | 85 | return out 86 | 87 | 88 | def personness(self, image, known_embs): 89 | raise NotImplementedError("TODO. Use `embed_and_personness_multi` instead, don't be wasteful!") 90 | 91 | 92 | def embed_and_personness_multi(self, images, batch=True): 93 | H, W, _ = images[0].shape 94 | 95 | if batch: 96 | out = self.net.forward(np.array([lib.img2df(img, shape=(int(H*self.scale_factor), int(W*self.scale_factor))) for img in images])) 97 | return self.net.embs_from_out(out), self.net.ious_from_out(out) 98 | else: 99 | embs, ious = [], [] 100 | for img in images: 101 | out = self.net.forward(lib.img2df(img, shape=(int(H * self.scale_factor), int(W * self.scale_factor)))[None]) 102 | embs.append(self.net.embs_from_out(out)[0]) 103 | ious.append(self.net.ious_from_out(out)[0]) 104 | return np.array(embs), np.array(ious) 105 | 106 | 107 | def clear_known(self, image_personness, image_embs, known_embs): 108 | p_iou = np.array(image_personness) 109 | for emb in known_embs: 110 | p_emb = self.search_person(image_embs, emb) 111 | p_iou *= 1-p_emb 112 | return p_iou 113 | 114 | 115 | class SemiFakeNews: 116 | def __init__(self, model, weights, scale_factor, fake_dets): 117 | self.real = RealNews(model, weights, scale_factor) 118 | 119 | out = self.real.embed_image(np.zeros((3,1080,1920), df.floatX)) 120 | self.fake = FakeNeuralNewsNetwork(fake_dets, fake_shape=out.shape[2:]) 121 | 122 | 123 | def tick(self, *a, **kw): 124 | self.real.tick(*a, **kw) 125 | self.fake.tick(*a, **kw) 126 | 127 | 128 | def fake_camera(self, *a, **kw): 129 | self.real.fake_camera(*a, **kw) 130 | self.fake.fake_camera(*a, **kw) 131 | 132 | 133 | def embed_crop(self, crop, *fakea, **fakekw): 134 | return self.real.embed_crop(crop) 135 | 136 | 137 | def embed_image(self, image): 138 | return self.real.embed_image(image) 139 | 140 | 141 | def search_person(self, img_embs, person_emb, *fakea, **fakekw): 142 | return self.real.search_person(img_embs, person_emb) 143 | 144 | 145 | def fix_shape(self, net_output, orig_shape, out_shape, fill_value=0): 146 | return self.real.fix_shape(net_output, orig_shape, out_shape) 147 | 148 | 149 | def personness(self, image, known_embs): 150 | return self.fake.personness(image, known_embs) 151 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | filterpy 2 | munkres 3 | pillow 4 | h5py -------------------------------------------------------------------------------- /semifake.py: -------------------------------------------------------------------------------- 1 | from importlib import import_module 2 | 3 | import numpy as np 4 | import DeepFried2 as df 5 | from scipy.spatial.distance import cdist 6 | 7 | from lbtoolbox.util import batched 8 | 9 | import lib 10 | from lib.models import add_defaults 11 | from fakenews import FakeNeuralNewsNetwork 12 | 13 | DIST_THRESH = 7 14 | 15 | 16 | class SemiFakeNews: 17 | def __init__(self, model, weights, input_scale_factor, fake_shape, fake_dets, debug_skip_full_image=False): 18 | self.input_scale_factor = input_scale_factor 19 | 20 | mod = import_module('lib.models.' + model) 21 | 22 | self.net = mod.mknet() 23 | add_defaults(self.net) 24 | 25 | try: 26 | self.net.load(weights) 27 | except ValueError: 28 | print("!!!!!!!THE WEIGHTS YOU LOADED DON'T BELONG TO THE MODEL YOU'RE USING!!!!!!") 29 | raise 30 | 31 | # Shares the weights, just replaces the avg-pooling layer. 32 | self.net_hires = mod.hires_shared_twin(self.net) 33 | add_defaults(self.net_hires) 34 | 35 | self.net.evaluate() 36 | self.net_hires.evaluate() 37 | 38 | print("Precompiling network... 1/2", end='', flush=True) 39 | #self.net.forward(np.zeros((1,3) + self.net.in_shape, df.floatX)) 40 | print("\rPrecompiling network... 2/2", end='', flush=True) 41 | #if not (debug_skip_full_image and fake_dets is None): 42 | #out = self.net_hires.forward(np.zeros((1,3,1080//2,1920//2), df.floatX)) 43 | print(" Done", flush=True) 44 | 45 | #fake_shape = out.shape[2:] # We didn't fake the avg-pool effect yet, so don't! 46 | self.fake = FakeNeuralNewsNetwork(fake_dets, shape=fake_shape) if fake_dets is not None else None 47 | 48 | 49 | def _scale_input_shape(self, shape): 50 | return lib.scale_shape(shape, self.input_scale_factor) 51 | 52 | 53 | # Only for fake 54 | def tick(self, *a, **kw): 55 | if self.fake is not None: 56 | self.fake.tick(*a, **kw) 57 | 58 | 59 | # Only for fake 60 | def fake_camera(self, *a, **kw): 61 | if self.fake is not None: 62 | self.fake.fake_camera(*a, **kw) 63 | 64 | 65 | def embed_crops(self, crops, *fakea, batchsize=32, **fakekw): 66 | assert all(self._scale_input_shape(crop.shape) == self.net.in_shape for crop in crops) 67 | 68 | X = np.array([lib.img2df(crop, shape=self.net.in_shape) for crop in crops]) 69 | out = np.concatenate([self.net.forward(Xb) for Xb in batched(batchsize, X)]) 70 | return out[:,:,0,0] # Output is Dx1x1 71 | 72 | 73 | def embeddings_cdist(self, embsA, embsB): 74 | return cdist(embsA, embsB) 75 | 76 | 77 | #@profile 78 | def embed_images(self, images, batch=True): 79 | # TODO: batch=False 80 | X = np.array([lib.img2df(img, shape=self._scale_input_shape(img.shape)) for img in images]) 81 | return self.net_hires.forward(X) 82 | 83 | 84 | def search_person(self, img_embs, person_emb, *fakea, **fakekw): 85 | # compute distance between embeddings and person's embedding. 86 | return np.sqrt(np.sum((img_embs - person_emb[:,None,None])**2, axis=0)) 87 | 88 | #d[d > DIST_THRESH] = 9999 # Will go to zero/uniform in the softmin 89 | 90 | # Convert distance to probability. 91 | #return lib.softmin(d, T), d # TODO: Might be better to fit a sigmoid or something. 92 | 93 | 94 | def fix_shape(self, net_output, orig_shape, out_shape, fill_value=0): 95 | orig_shape = self._scale_input_shape(orig_shape) 96 | 97 | # Scale to `out_shape` but keeping correct aspect ratio. 98 | h = net_output.shape[0]*self.net.scale_factor[0] /orig_shape[0]*out_shape[0] 99 | w = net_output.shape[1]*self.net.scale_factor[1] /orig_shape[1]*out_shape[1] 100 | 101 | return lib.paste_into_middle_2d(lib.resize_map(net_output, (int(h), int(w))), out_shape, fill_value) 102 | 103 | 104 | # THIS IS THE ONLY THING FAKE :( 105 | # TODO: Make semi-fake, by clearing out known_embs. 106 | def personness(self, image, known_embs, return_pose=False): 107 | assert self.fake is not None, "The world doesn't work that way my friend!" 108 | return self.fake.personness(image, known_embs, return_pose) 109 | -------------------------------------------------------------------------------- /simple_2d_tracker_duke.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function 5 | from __future__ import division 6 | 7 | import argparse 8 | from os.path import join as pjoin 9 | from os import makedirs 10 | import time, datetime 11 | 12 | # the usual suspects 13 | import numpy as np 14 | import matplotlib as mpl 15 | #mpl.use('Agg') 16 | #mpl.use('GTK') 17 | import matplotlib.pyplot as plt 18 | import matplotlib.patches as patches 19 | from mpl_toolkits.axes_grid1 import ImageGrid 20 | from scipy.linalg import block_diag, inv 21 | from scipy.io import loadmat 22 | 23 | #tracker stuff 24 | import lib 25 | from simple_track_duke import Track 26 | import h5py 27 | from scipy.spatial.distance import euclidean,mahalanobis 28 | from munkres import Munkres, print_matrix 29 | from semifake import SemiFakeNews 30 | 31 | SEQ_FPS = 60.0 32 | SEQ_DT = 1./SEQ_FPS 33 | SEQ_SHAPE = (1080, 1920) 34 | STATE_SHAPE = (270, 480) 35 | HOT_CMAP = lib.get_transparent_colormap() 36 | #NUM_CAMS = 2 # which cam to consider (from 1 to NUM_CAMS), max: 8 37 | SCALE_FACTOR = 0.5 38 | 39 | 40 | g_frames = 0 # Global counter for correct FPS in all cases 41 | 42 | def n_active_tracks(tracklist): 43 | return '{:2d} +{:2d} +{:2d} ={:2d}'.format( 44 | sum(t.status == 'matched' for t in tracklist), 45 | sum(t.status == 'missed' for t in tracklist), 46 | sum(t.status == 'init' for t in tracklist), 47 | len(tracklist), 48 | ) 49 | # from collections import Counter 50 | #return str(Counter(t.status for t in tracklist).most_common()) 51 | 52 | 53 | def shall_vis(args, curr_frame): 54 | return args.vis and (curr_frame - args.t0) % args.vis == 0 55 | 56 | 57 | def embed_crops_at(net, image, xys, debug_out_dir=None, debug_cam=None, debug_curr_frame=None): 58 | H, W, _ = image.shape 59 | crops = [lib.cutout_abs_hwc(image, lib.box_centered(xy[0]*SCALE_FACTOR, xy[1]*SCALE_FACTOR, 60 | h=128*2*SCALE_FACTOR, w=48*2*SCALE_FACTOR, bounds=(0, 0, W, H))) for xy in xys] 61 | 62 | if debug_out_dir is not None: 63 | for icrop, crop in enumerate(crops): 64 | lib.imwrite(pjoin(debug_out_dir, 'crops', 'cam{}-frame{}-{}.jpg'.format(debug_cam, debug_curr_frame, icrop)), crop) 65 | 66 | return net.embed_crops(crops) 67 | 68 | 69 | def load_or_reuse(image, args, icam, frame): 70 | if image is not None: 71 | return image 72 | framedir = 'frames-0.5' if SCALE_FACTOR == 0.5 else 'frames' 73 | return plt.imread(pjoin(args.basedir, framedir, 'camera{}/{}.jpg'.format(icam, lib.glob2loc(frame, icam)))) 74 | 75 | 76 | #@profile 77 | def main(net, args): 78 | eval_path = pjoin(args.outdir, 'results/run_{:%Y-%m-%d_%H:%M:%S}.txt'.format(datetime.datetime.now())) 79 | if args.debug: 80 | debug_dir = pjoin(args.outdir, 'debug/run_{:%Y-%m-%d_%H:%M:%S}'.format(datetime.datetime.now())) 81 | makedirs(pjoin(debug_dir, 'crops'), exist_ok=True) 82 | else: 83 | debug_dir = None 84 | 85 | 86 | CAMS = args.cams 87 | 88 | track_lists = [[] for _ in range(len(CAMS))] 89 | already_tracked_gids = [[] for _ in range(len(CAMS))] 90 | track_id = 1 91 | det_lists = read_detections(CAMS) 92 | gt_list = load_trainval(pjoin(args.basedir, 'ground_truth/trainval.mat'),time_range=[127720, 187540]) #train_val_mini 93 | APP_THRESH = 6 #7 for ReID embeddings, 200 for euclidean pixel distance 94 | DIST_THRESH = 200 # 7 for ReID embeddings, 200 for euclidean pixel distance 95 | DET_INIT_THRESH = 0.3 96 | DET_CONTINUE_THRESH = -0.3 97 | m = Munkres() 98 | 99 | per_cam_gts = [lib.slice_all(gt_list, gt_list['Cams'] == icam) for icam in CAMS] 100 | 101 | # ===Tracking fun begins: iterate over frames=== 102 | # TODO: global time (duke) 103 | for curr_frame in range(args.t0, args.t1+1): 104 | print("\rFrame {}, {} matched/missed/init/total tracks, {} total seen".format(curr_frame, ', '.join(map(n_active_tracks, track_lists)), sum(map(len, track_lists))), end='', flush=True) 105 | 106 | for icam, det_list, gt_list, track_list, already_tracked in zip(CAMS, det_lists, per_cam_gts, track_lists, already_tracked_gids): 107 | image = None 108 | 109 | curr_dets = det_list[np.where(det_list[:,1] == lib.glob2loc(curr_frame, icam))[0]] 110 | curr_dets = curr_dets[curr_dets[:,-1] > DET_CONTINUE_THRESH] 111 | 112 | curr_gts = lib.slice_all(gt_list, gt_list['GFIDs'] == curr_frame) 113 | 114 | 115 | # ===visualization=== 116 | # First, plot what data we have before doing anything. 117 | if shall_vis(args, curr_frame): 118 | fig, axes = plt.subplots(2, 2, sharex=True, sharey=True, figsize=(20, 12)) 119 | (ax_tl, ax_tr), (ax_bl, ax_br) = axes 120 | axes = axes.flatten() 121 | 122 | for ax in axes: 123 | image = load_or_reuse(image, args, icam, curr_frame) 124 | ax.imshow(image, extent=[0, 1920, 1080, 0]) 125 | 126 | # plot (active) tracks 127 | ax_tl.set_title('Groundtruth') 128 | ax_tr.set_title('Filtered Groundtruth') 129 | ax_bl.set_title('Thresholded Detections') 130 | ax_br.set_title('All Tracks') 131 | 132 | for det in curr_dets: 133 | ax_bl.add_patch(patches.Rectangle((det[2], det[3]), det[4] - det[2], det[5] - det[3], 134 | fill=False, linewidth=det[-1] + 1.5, edgecolor="red")) 135 | 136 | for tid, box in zip(curr_gts['TIDs'], curr_gts['boxes']): 137 | vis_box = lib.box_rel2abs(box) 138 | ax_tl.add_patch(patches.Rectangle((vis_box[0], vis_box[1]), vis_box[2], vis_box[3], 139 | fill=False, linewidth=2.0, edgecolor="blue")) 140 | # ===/visualization=== 141 | 142 | # ---PREDICT--- 143 | for track in track_list: 144 | track.track_predict() 145 | 146 | num_curr_dets = len(curr_dets) 147 | if num_curr_dets > 0 and len(track_list) > 0: 148 | if args.use_appearance: 149 | track_embs = np.array([track.embedding for track in track_list]) 150 | det_xys = [lib.box_center_xy(lib.ltrb_to_box(det[2:])) for det in curr_dets] 151 | image = load_or_reuse(image, args, icam, curr_frame) 152 | det_embs = embed_crops_at(net, image, det_xys, 153 | debug_out_dir=debug_dir, debug_cam=icam, debug_curr_frame=curr_frame) 154 | dist_matrix = net.embeddings_cdist(track_embs, det_embs) 155 | #print() 156 | #print("dists-pct: {} | {} | {}".format(*np.percentile(dist_matrix.flatten(), [0, 50, 100]))) 157 | #print("dists-top: " + " | ".join(map(str, np.sort(dist_matrix, axis=None)[:5]))) 158 | 159 | # apply dist threshold here to keep munkres from finding strange compromises 160 | dist_matrix = dist_matrix / APP_THRESH 161 | dist_matrix[dist_matrix > 1.0] = 999999 162 | 163 | # * Euclidean dist! 164 | #dist_matrix_euc = np.zeros((len(track_list), num_curr_dets)) 165 | #for itrack, track in enumerate(track_list): 166 | # dist_matrix_euc[itrack] = [euclidean(track.KF.x[::2], lib.box_center_xy(lib.ltrb_to_box(det[2:]))) for det in curr_dets] 167 | #dist_matrix_euc = dist_matrix_euc/DIST_THRESH 168 | #dist_matrix_euc[dist_matrix_euc > 1.0] = 999999 169 | 170 | dist_matrix = dist_matrix#*dist_matrix_euc 171 | 172 | else: 173 | dist_matrix = np.zeros((len(track_list), num_curr_dets)) 174 | 175 | for itrack, track in enumerate(track_list): 176 | # ---BUILD DISTANCE MATRIX--- 177 | # TODO: IoU (outsource distance measure) 178 | # #dist_matrix = [euclidean(tracker.x[0::2],curr_dets[i][2:4]) for i in range(len(curr_dets))] 179 | #inv_P = inv(each_tracker.KF.P[::2,::2]) 180 | dist_matrix[itrack] = [euclidean(track.KF.x[::2], lib.box_center_xy(lib.ltrb_to_box(det[2:]))) for det in curr_dets] 181 | # #dist_matrix_line = np.array([mahalanobis(each_tracker.KF.x[::2], 182 | # (curr_dets[i][2]+curr_dets[i][4]/2., 183 | # curr_dets[i][3]+curr_dets[i][5]/2.), 184 | # inv_P) for i in range(len(curr_dets))]) 185 | # apply the threshold here (munkres apparently can't deal 100% with inf, so use 999999) 186 | # dist_matrix_line[np.where(dist_matrix_line>dist_thresh)] = 999999 187 | # dist_matrix.append(dist_matrix_line.tolist()) 188 | 189 | # apply dist threshold here to keep munkres from finding strange compromises 190 | dist_matrix = dist_matrix / DIST_THRESH 191 | dist_matrix[dist_matrix > 1.0] = 999999 192 | 193 | # Do the Munkres! (Hungarian algo) to find best matching tracks<->dets 194 | # at first, all detections (if any) are unassigend 195 | unassigned_dets = set(range(num_curr_dets)) 196 | 197 | nn_indexes = m.compute(dist_matrix.tolist()) 198 | # perform update step for each match (check for threshold, to see, if it's actually a miss) 199 | for nn_match_idx in range(len(nn_indexes)): 200 | # ---UPDATE--- 201 | if (dist_matrix[nn_indexes[nn_match_idx][0]][nn_indexes[nn_match_idx][1]] <= 1.0): 202 | nn_det = curr_dets[nn_indexes[nn_match_idx][1]] # 1st: track_idx, 2nd: 0=track_idx, 1 det_idx 203 | track_list[nn_indexes[nn_match_idx][0]].track_update(lib.box_center_xy(lib.ltrb_to_box(nn_det[2:]))) 204 | track_list[nn_indexes[nn_match_idx][0]].track_is_matched(curr_frame) 205 | # remove detection from being unassigend 206 | unassigned_dets.remove(nn_indexes[nn_match_idx][1]) 207 | else: 208 | track_list[nn_indexes[nn_match_idx][0]].track_is_missed(curr_frame) 209 | 210 | # set tracks without any match to miss 211 | for miss_idx in list(set(range(len(track_list))) - set([i[0] for i in nn_indexes])): 212 | track_list[miss_idx].track_is_missed(curr_frame) 213 | 214 | else: # No dets => all missed 215 | for track in track_list: 216 | track.track_is_missed(curr_frame) 217 | 218 | 219 | if not args.gt_init: 220 | ### B) 1: get new tracks from unassigned detections 221 | for unassigend_det_idx in unassigned_dets: 222 | if curr_dets[unassigend_det_idx][-1] > DET_INIT_THRESH: 223 | init_pose = lib.box_center_xy(lib.ltrb_to_box(curr_dets[unassigend_det_idx][2:])) 224 | image = load_or_reuse(image, args, icam, curr_frame) 225 | new_track = Track(SEQ_DT, curr_frame, init_pose, track_id=track_id, 226 | embedding=embed_crops_at(net, image, [init_pose])[0] if args.use_appearance else None) 227 | track_id = track_id + 1 228 | track_list.append(new_track) 229 | else: 230 | ### B) 2: new tracks from (unassigend) ground truth 231 | for tid, box in zip(curr_gts['TIDs'],curr_gts['boxes']): 232 | if tid in already_tracked: 233 | continue 234 | abs_box = lib.box_rel2abs(box) 235 | init_pose = lib.box_center_xy(abs_box) 236 | image = load_or_reuse(image, args, icam, curr_frame) 237 | new_track = Track(SEQ_DT, curr_frame, init_pose, track_id=tid, 238 | embedding=embed_crops_at(net, image, [init_pose])[0] if args.use_appearance else None, 239 | init_thresh=1,delete_thresh=90) 240 | track_list.append(new_track) 241 | already_tracked.append(tid) 242 | 243 | if shall_vis(args, curr_frame): 244 | ax_tr.add_patch(patches.Rectangle((abs_box[0], abs_box[1]), abs_box[2], abs_box[3], 245 | fill=False, linewidth=2.0, edgecolor="lime")) 246 | 247 | ### C) further track-management 248 | # delete tracks marked as 'deleted' in this tracking cycle 249 | # Modifies track_list in-place, like de-referencing a pointer in C 250 | track_list[:] = [i for i in track_list if i.status != 'deleted'] 251 | 252 | # ===visualization=== 253 | ### Plot the current state of tracks. 254 | if shall_vis(args, curr_frame): 255 | for tracker in track_list: 256 | tracker.plot_track(ax_br, plot_past_trajectory=True) 257 | # plt.gca().add_patch(patches.Rectangle((tracker.KF.x[0]-50, tracker.KF.x[2]-200), 100, 200, 258 | # fill=False, linewidth=3, edgecolor=tracker.color)) 259 | 260 | for ax in axes: 261 | ax.set_adjustable('box-forced') 262 | ax.set_xlim(0, 1920) 263 | ax.set_ylim(1080, 0) 264 | 265 | # plt.imshow(curr_heatmap,alpha=0.5,interpolation='none',cmap='hot',extent=[0,curr_image.shape[1],curr_image.shape[0],0],clim=(0, 10)) 266 | # savefig(pjoin(args.outdir, 'camera{}/res_img_{:06d}.jpg'.format(icam, curr_frame)), quality=80) 267 | fig.savefig(pjoin(args.outdir, 'camera{}/res_img_{:06d}.jpg'.format(icam, curr_frame)), 268 | quality=80, bbox_inches='tight', pad_inches=0.2) 269 | # plt.show() 270 | # fig.close() 271 | plt.close() 272 | 273 | 274 | # ==evaluation=== 275 | if True: 276 | with open(eval_path, 'a') as eval_file: 277 | for icam, track_list in zip(CAMS, track_lists): 278 | for tracker in track_list: 279 | track_eval_line = tracker.get_track_eval_line(cid=icam,frame=curr_frame) 280 | if track_eval_line is not None: 281 | eval_file.write('{} {} {} {} {} {} {} {} {}\n'.format(*track_eval_line)) 282 | 283 | global g_frames 284 | g_frames += 1 285 | 286 | 287 | # Heavily adapted and fixed from http://robotics.usc.edu/~ampereir/wordpress/?p=626 288 | def savefig(fname, fig=None, orig_size=None, **kw): 289 | if fig is None: 290 | fig = plt.gcf() 291 | fig.patch.set_alpha(0) 292 | 293 | w, h = fig.get_size_inches() 294 | if orig_size is not None: # Aspect ratio scaling if required 295 | fw, fh = w, h 296 | w, h = orig_size 297 | fig.set_size_inches((fw, (fw/w)*h)) 298 | fig.set_dpi((fw/w)*fig.get_dpi()) 299 | 300 | ax = fig.gca() 301 | ax.set_frame_on(False) 302 | ax.set_xticks([]); ax.set_yticks([]) 303 | ax.set_axis_off() 304 | #ax.set_xlim(0, w); ax.set_ylim(h, 0) 305 | fig.savefig(fname, transparent=True, bbox_inches='tight', pad_inches=0, **kw) 306 | 307 | def read_detections(cams): 308 | print("Reading detections...") 309 | det_list = [[] for _ in range(len(cams))] 310 | for icam in cams: 311 | print("Camera {}...".format(icam)) 312 | fname = pjoin(args.basedir, 'detections/camera{}_trainval-mini.mat'.format(icam)) 313 | try: 314 | det_list[cams.index(icam)] = loadmat(fname)['detections'] 315 | except NotImplementedError: 316 | with h5py.File(fname, 'r') as det_file: 317 | det_list[cams.index(icam)] = np.array(det_file['detections']).T 318 | # ===setup list of all detections (dukeMTMC format)=== 319 | #with h5py.File(fname, 'r') as det_file: 320 | # det_list[CAMS.index(icam)] = np.array(det_file['detections']).T 321 | print("done!") 322 | return det_list 323 | 324 | 325 | def slice_all(f, s): 326 | return {k: v[s] for k,v in f.items()} 327 | 328 | def load_trainval(fname, time_range=[49700, 227540]): 329 | try: 330 | m = loadmat(fname)['trainData'] 331 | except NotImplementedError: 332 | with h5py.File(fname, 'r') as f: 333 | m = np.array(f['trainData']).T 334 | 335 | data = { 336 | 'Cams': np.array(m[:,0], dtype=int), 337 | 'TIDs': np.array(m[:,1], dtype=int), 338 | 'LFIDs': np.array(m[:,2], dtype=int), 339 | 'boxes': np.array(m[:,3:7], dtype=float), 340 | 'world': np.array(m[:,7:9]), 341 | 'feet': np.array(m[:,9:]), 342 | } 343 | 344 | # boxes are l t w h 345 | data['boxes'][:,0] /= 1920 346 | data['boxes'][:,1] /= 1080 347 | data['boxes'][:,2] /= 1920 348 | data['boxes'][:,3] /= 1080 349 | 350 | # Compute global frame numbers once. 351 | start_times = [5543, 3607, 27244, 31182, 1, 22402, 18968, 46766] 352 | data['GFIDs'] = np.array(data['LFIDs']) 353 | for icam, t0 in zip(range(1,9), start_times): 354 | data['GFIDs'][data['Cams'] == icam] += t0 - 1 355 | 356 | #return data 357 | return slice_all(data, (time_range[0] <= data['GFIDs']) & (data['GFIDs'] <= time_range[1])) 358 | 359 | 360 | if __name__ == '__main__': 361 | 362 | parser = argparse.ArgumentParser(description='2D tracker test.') 363 | parser.add_argument('--basedir', nargs='?', default='/work/breuers/dukeMTMC/', 364 | help='Path to `train` folder of 2DMOT2015.') 365 | parser.add_argument('--outdir', nargs='?', default='/work/breuers/dukeMTMC/results/', 366 | help='Where to store generated output. Only needed if `--vis` is also passed.') 367 | parser.add_argument('--use_appearance', action='store_true', 368 | help='Whether or not to use the deep net as appearance model.') 369 | parser.add_argument('--model', default='lunet2c', 370 | help='Name of the model to load. Corresponds to module names in lib/models. Or `fake`') 371 | parser.add_argument('--weights', default='/work/breuers/dukeMTMC/models/lunet2c-noscale-nobg-2to32-aug.pkl', 372 | help='Name of the weights to load for the model (path to .pkl file).') 373 | parser.add_argument('--t0', default=127720, type=int, 374 | help='Time of first frame.') 375 | parser.add_argument('--t1', default=187540, type=int, 376 | help='Time of last frame, inclusive.') 377 | parser.add_argument('--vis', default=0, type=int, 378 | help='Generate and save visualization of the results, every X frame.') 379 | parser.add_argument('--debug', action='store_true', 380 | help='Generate extra many debugging outputs (in outdir).') 381 | parser.add_argument('--gt_init', action='store_true', 382 | help='Use first groundtruth to init tracks.') 383 | parser.add_argument('--cams', default='1,2,3,4,5,6,7,8', 384 | help='Array of cameras numbers (1-8) to consider.') 385 | args = parser.parse_args() 386 | args.cams = eval('[' + args.cams + ']') 387 | print(args) 388 | 389 | # This is all for faking the network. 390 | net = SemiFakeNews(model=args.model, weights=args.weights, 391 | input_scale_factor=1.0 if SCALE_FACTOR==0.5 else 0.5, # ASK LUCAS 392 | debug_skip_full_image=True, # Goes with the above. 393 | fake_dets=None, 394 | fake_shape=None, 395 | ) if args.use_appearance else None 396 | 397 | # Prepare output dirs 398 | for icam in args.cams: 399 | makedirs(pjoin(args.outdir, 'camera{}'.format(icam)), exist_ok=True) 400 | makedirs(pjoin(args.outdir, 'results'), exist_ok=True) 401 | 402 | tstart = time.time() 403 | try: 404 | main(net, args) 405 | except KeyboardInterrupt: 406 | print() 407 | 408 | print('FPS: {:.3f}'.format(g_frames / (time.time() - tstart))) 409 | -------------------------------------------------------------------------------- /simple_track_duke.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | #TODO: comments/doc 3 | 4 | import numpy as np 5 | from filterpy.kalman import KalmanFilter 6 | import scipy 7 | from scipy import ndimage 8 | from scipy import signal 9 | from scipy.linalg import block_diag,inv 10 | from filterpy.common import Q_discrete_white_noise 11 | from filterpy.stats import plot_covariance_ellipse 12 | import matplotlib.pyplot as plt 13 | from os.path import join as pjoin 14 | 15 | import lib 16 | import lbtoolbox.plotting as lbplt 17 | 18 | # all_bs for bbox regression 19 | all_bs = np.array([[256.3190, -0.0207, 136.6533, 0.1978], 20 | [212.9634, 0.0055, 126.0157, 0.2036], 21 | [277.3869, -0.0154, 5.2019, 0.4442], 22 | [-296.1867, 0.3356, 54.3528, 0.3093], 23 | [258.1709, -0.0258, 144.2437, 0.2030], 24 | [152.2878, 0.0296, -271.9162, 0.6985], 25 | [208.9894, 0.0349, -298.6897, 0.7266], 26 | [170.6156, 0.0128, 81.8043, 0.1659]]) 27 | 28 | HOT_CMAP = lib.get_transparent_colormap() 29 | 30 | 31 | class Track(object): 32 | 33 | """ Implements a track (not a tracker, a track). 34 | With KalmanFilter and some other stuff like status for track management 35 | 36 | Attributes 37 | ---------- 38 | TODO 39 | 40 | """ 41 | 42 | def __init__(self, dt, curr_frame, init_pose, track_dim=4, det_dim=2, track_id=-1, 43 | embedding=None, debug_out_dir=None, init_thresh=3, delete_thresh=5,): 44 | self.debug_out_dir = debug_out_dir 45 | 46 | init_x = [init_pose[0], 0.0, init_pose[1], 0.0] 47 | init_P = [[200.0, 0, 0, 0], [0, 100.0, 0, 0], [0, 0, 200.0, 0], [0, 0, 0, 100.0]] 48 | 49 | self.track_id = track_id 50 | self.color = np.random.rand(3) 51 | self.xs=[init_x] 52 | self.Ps=[init_P] 53 | 54 | self.KF = KalmanFilter(dim_x=track_dim, dim_z=det_dim) 55 | self.KF.F = np.array([[1, dt, 0, 0], 56 | [0, 1, 0, 0], 57 | [0, 0, 1, dt], 58 | [0, 0, 0, 1]], dtype=np.float64) 59 | q = Q_discrete_white_noise(dim=2, dt=dt, var=50.) 60 | self.KF.Q = block_diag(q, q) 61 | self.KF.H = np.array([[1, 0, 0, 0], 62 | [0, 0, 1, 0]], dtype=np.float64) 63 | self.KF.R = np.array([[50.0, 0], 64 | [0, 50.0]], dtype=np.float64) 65 | self.KF.x = init_x 66 | self.KF.P = init_P 67 | 68 | self.missed_for = 0 69 | self.deleted_at = 0 70 | self.last_matched_at = curr_frame 71 | self.created_at = curr_frame 72 | 73 | self.age = 1 #age in frames 74 | 75 | #missed for [delete_thresh] times? delete! 76 | self.delete_thresh = delete_thresh #240=4 seconds ("occluded by car"-scenario in cam1) 77 | self.init_thresh = init_thresh #of consecutive detection responses before reporting this track 78 | # set status: {init, matched, missed, deleted} 79 | if self.init_thresh == 1: 80 | self.status='matched' 81 | else: 82 | self.status='init' 83 | 84 | self.poses=[init_pose] 85 | 86 | #only if ReID is used for DA 87 | self.embedding = embedding 88 | 89 | # ==Track state== 90 | def track_predict(self): 91 | # standard KF 92 | self.KF.predict() 93 | 94 | def track_update(self, z): 95 | self.KF.update(z) 96 | 97 | # ==Track status management== 98 | def track_is_missed(self,curr_frame): 99 | self.missed_for += 1 100 | self.status = 'missed' 101 | if (self.missed_for >= self.delete_thresh) or (self.status=='init'): 102 | self.track_is_deleted(curr_frame) 103 | else: 104 | self.age += 1 105 | self.xs.append(self.KF.x) 106 | self.Ps.append(self.KF.P) 107 | self.poses.append([self.KF.x[0],self.KF.x[2]]) 108 | 109 | def track_is_matched(self,curr_frame): 110 | self.last_matched_at = curr_frame 111 | self.missed_for = 0 112 | self.age += 1 113 | self.xs.append(self.KF.x) 114 | self.Ps.append(self.KF.P) 115 | self.poses.append([self.KF.x[0],self.KF.x[2]]) 116 | if ((self.status=='init') and (curr_frame-self.created_at+1 < self.init_thresh)): 117 | pass # stay in init as long as threshold not exceeded 118 | else: 119 | self.status = 'matched' # in all other cases, go to matched 120 | 121 | def track_is_deleted(self,curr_frame): 122 | self.deleted_at = curr_frame 123 | self.status = 'deleted' 124 | 125 | # ==Evaluation== 126 | def get_track_eval_line(self,cid=1,frame=0): 127 | if (self.status == 'deleted' or self.status == 'init'): 128 | return None 129 | 130 | #pymot format 131 | #[height,width,id,y,x,z] 132 | #return {"height": 0, "width": 0, "id": self.track_id, "y": self.KF.x[2], "x": self.KF.x[0], "z": 0} 133 | #motchallenge format 134 | #TODO 135 | #dukeMTMC format 136 | #[cam, ID, frame, left, top, width, height, worldX, worldY] 137 | cX,cY = self.poses[-1] 138 | h = int(((all_bs[cid-1][0]+all_bs[cid-1][1]*cX) + (all_bs[cid-1][2]+all_bs[cid-1][3]*cY))/2) 139 | w = int(0.4*h) 140 | l = int(cX-w/2) 141 | t = int(cY-h/2) 142 | # id-shift-quick-hack for multi-cam eval. 143 | return [cid, self.track_id+cid*100000, lib.glob2loc(frame,cid), l, t, w, h, -1, -1] 144 | 145 | 146 | # ==Visualization== 147 | def plot_track(self, ax, plot_past_trajectory=False, output_shape=None): 148 | if (self.status == 'deleted' or self.status == 'init'): 149 | return 150 | 151 | #plot_covariance_ellipse((self.KF.x[0], self.KF.x[2]), self.KF.P, fc=self.color, alpha=0.4, std=[1,2,3]) 152 | #print(self.poses) 153 | cX, vX, cY, vY = self.xs[-1] 154 | #print('vX: {}, vY: {}'.format(vX,vY)) 155 | ax.plot(cX, cY, color=self.color, marker='o') 156 | ax.arrow(cX, cY, vX, vY, head_width=50, head_length=20, fc=self.color, ec=self.color) 157 | plot_covariance_ellipse((cX+vX, cY+vY), self.KF.P[1::2,1::2], fc=self.color, alpha=0.5, std=[3]) 158 | plot_covariance_ellipse((cX, cY), self.KF.P[::2,::2], fc=self.color, alpha=0.5, std=[1, 2, 3]) 159 | #plt.text(*self.state_to_output(*self.poses[-1], output_shape=output_shape), s='{}'.format(self.track_id)) 160 | if plot_past_trajectory and len(self.poses)>1: 161 | outputs_xy = np.array(self.poses) 162 | ax.plot(*outputs_xy.T, linewidth=2.0, color=self.color) 163 | -------------------------------------------------------------------------------- /track.py: -------------------------------------------------------------------------------- 1 | #TODO: comments/doc 2 | 3 | import numpy as np 4 | from filterpy.kalman import KalmanFilter 5 | import scipy 6 | from scipy import ndimage 7 | from scipy import signal 8 | from scipy.linalg import block_diag,inv 9 | from filterpy.common import Q_discrete_white_noise 10 | from filterpy.stats import plot_covariance_ellipse 11 | import matplotlib.pyplot as plt 12 | from os.path import join as pjoin 13 | 14 | import lib 15 | import lbtoolbox.plotting as lbplt 16 | 17 | # all_bs for bbox regression 18 | all_bs = np.array([[256.3190, -0.0207, 136.6533, 0.1978], 19 | [212.9634, 0.0055, 126.0157, 0.2036], 20 | [277.3869, -0.0154, 5.2019, 0.4442], 21 | [-296.1867, 0.3356, 54.3528, 0.3093], 22 | [258.1709, -0.0258, 144.2437, 0.2030], 23 | [152.2878, 0.0296, -271.9162, 0.6985], 24 | [208.9894, 0.0349, -298.6897, 0.7266], 25 | [170.6156, 0.0128, 81.8043, 0.1659]]) 26 | 27 | HOT_CMAP = lib.get_transparent_colormap() 28 | 29 | 30 | class Track(object): 31 | 32 | """ Implements a track (not a tracker, a track). 33 | With KalmanFilter and some other stuff like status for track management 34 | 35 | Attributes 36 | ---------- 37 | TODO: Move to time using dt 38 | 39 | """ 40 | 41 | def __init__(self, embed_crops_fn, curr_frame, init_pose, image, 42 | state_shape, state_pad, output_shape, track_id=-1, 43 | dist_thresh=7, entropy_thresh=0.10, 44 | unmiss_thresh=2, delete_thresh=90, 45 | tp_hack=None, maxlife=None, 46 | debug_out_dir=None): 47 | self.embed_crops_fn = embed_crops_fn 48 | self.debug_out_dir = debug_out_dir 49 | 50 | init_x = [0.0, 0.0] 51 | #self.init_P_scale = 200.0 52 | #self.init_P_scale = 5.0 53 | self.init_P_scale = 5.0**2 54 | 55 | self.DIST_THRESH = dist_thresh 56 | self.ENT_THRESH = entropy_thresh 57 | #self.VEL_MEAS_CERT_THRESH = 0.015 58 | 59 | self.KF = KalmanFilter(dim_x=2, dim_z=2) 60 | self.KF.F = np.array([[1, 0], 61 | [0, 1]], dtype=np.float64) 62 | #q = Q_discrete_white_noise(dim=2, dt=dt, var=200.) 63 | #self.KF.Q = block_diag(q, q) # TODO: matrix design for all the filters 64 | #self.KF.Q = q # heatmap v only 65 | # 0.02 66 | #self.KF.Q = 0.02*np.eye(2) # Process noise. Always added to prediction. Higher = uncertainty grows faster when no measurement 67 | self.KF.Q = 0.3**2*np.eye(2) # Process noise. Always added to prediction. Higher = uncertainty grows faster when no measurement 68 | self.KF.H = np.array([[1, 0], 69 | [0, 1]], dtype=np.float64) 70 | #self.KF.R = 100.0*np.eye(2) # Measurement variance. Lower: jump more to measurement 71 | self.KF.R = 20.0**2*np.eye(2) # Lower: jump more to measurement 72 | self.KF.x = init_x 73 | self.KF.P = self.init_P_scale*np.eye(2) 74 | 75 | self.track_id = track_id 76 | self.color = np.random.rand(3) 77 | self.hm_colormap = lbplt.linear_map((1,1,1), self.color) 78 | self.hm_colormap = lib.get_transparent_colormap(self.hm_colormap) 79 | self.xs=[self.KF.x] 80 | self.Ps=[self.KF.P] 81 | 82 | self.missed_for = 0 83 | self.missed_sightings = 0 84 | self.deleted_at = 0 85 | self.last_matched_at = curr_frame 86 | self.created_at = curr_frame 87 | self.n_exits = 0 88 | 89 | self.status = 'matched' # matched, missed, deleted 90 | self.age = 1 #age in frames 91 | self.MAXLIFE = maxlife 92 | self.TP_HACK = tp_hack 93 | 94 | #missed for [delete_thresh] times? delete! 95 | #self.DELETE_THRESH = 300 #90 # 1.5s 96 | self.DELETE_THRESH = delete_thresh # 1.5s 97 | 98 | # How many times do I need to see him while he's missing to un-miss him? 99 | self.UNMISS_THRESH = unmiss_thresh 100 | 101 | self.state_shape = state_shape 102 | self.state_pad = state_pad 103 | self.output_shape = output_shape 104 | 105 | pad_y, pad_x = state_pad[0][0], state_pad[1][0] 106 | self.poses=[np.array([init_pose[0]+pad_x, init_pose[1]+pad_y])] 107 | 108 | self.embedding = None 109 | self.update_embedding(self.get_embedding_at_current_pos(image, curr_frame)) 110 | 111 | def init_heatmap(self, heatmap): 112 | #self.pos_heatmap = self.resize_map_to_state(np.full_like(heatmap, 1/np.prod(heatmap.shape))) 113 | self.pos_heatmap = self.resize_map_to_state(heatmap) 114 | self.old_heatmap = None 115 | #self.id_heatmap = np.full_like(heatmap, 1/np.prod(self.pos_heatmap.shape)) 116 | self.id_heatmap = self.resize_map_to_state(np.full_like(heatmap, 1/np.prod(heatmap.shape))) 117 | 118 | self.idmap_ent = 0.0 #lib.entropy_score_avg(self.id_heatmap) 119 | self.idmap_score = 9999 # np.min(id_distmap) 120 | self.this_map_good = False #self.idmap_score < self.DIST_THRESH and self.ENT_THRESH < self.idmap_ent 121 | 122 | # ==Heatmap stuff== 123 | def resize_map_to_state(self, heatmap, keep_sum=True): 124 | assert heatmap.shape == self.state_shape, "Lying Lucas giving me a heatmap that's not state-shaped!" 125 | #hm = np.pad(heatmap, self.state_pad, mode='constant', constant_values=1/np.prod(heatmap.shape)) 126 | hm = np.pad(heatmap, self.state_pad, mode='edge') 127 | if keep_sum: 128 | hm /= np.sum(hm)*np.sum(heatmap) 129 | return hm 130 | #return lib.resize_map(heatmap, self.state_shape, interp='bicubic') 131 | 132 | def unpad_state_map(self, statemap): 133 | return statemap[self.state_pad[0][0]:-self.state_pad[0][1], 134 | self.state_pad[1][0]:-self.state_pad[1][1]] 135 | 136 | def get_crop_at_pos(self,pos,image): 137 | # TODO: fix bb: 128x48 138 | x, y = pos 139 | box_c = lib.box_centered(x, y, 128, 48, bounds=(0,0,image.shape[1],image.shape[0])) 140 | crop = lib.cutout_abs_hwc(image, box_c) 141 | return crop 142 | 143 | def get_embedding_at_current_pos(self, image, debug_curr_frame): 144 | crop = self.get_crop_at_pos( 145 | self.state_to_output(*self.poses[-1], output_shape=(image.shape[0], image.shape[1])), 146 | image 147 | ) 148 | if self.debug_out_dir is not None: 149 | lib.imwrite(pjoin(self.debug_out_dir, 'crops', '{}-{}.jpg'.format(self.track_id, debug_curr_frame)), crop) 150 | return self.embed_crops_fn(crop[None], fake_id=self.track_id)[0] 151 | 152 | def update_embedding(self, new_embedding): 153 | if self.embedding is None: 154 | self.embedding = new_embedding 155 | self.n_embs_seen = 1 156 | else: 157 | return # For this paper, we ignore new embeddings as the first is almost perfect. 158 | #self.embedding = self.embedding*self.n_embs_seen + new_embedding 159 | #self.n_embs_seen += 1 160 | #self.embedding /= self.n_embs_seen 161 | 162 | # ==Track state== 163 | def state_to_output(self, x, y, output_shape=None, ignore_padding=False): 164 | """ 165 | The optional `output_shape` is in (H,W) format. 166 | """ 167 | if output_shape is None: 168 | output_shape = self.output_shape 169 | 170 | if not ignore_padding: 171 | x = x - self.state_pad[1][0] 172 | y = y - self.state_pad[0][0] 173 | 174 | return np.array([ 175 | x/self.state_shape[1]*output_shape[1], 176 | y/self.state_shape[0]*output_shape[0] 177 | ]) 178 | 179 | 180 | def states_to_outputs(self, xy, output_shape, ignore_padding=False): 181 | # xy is of shape (N,2) 182 | if output_shape is None: 183 | output_shape = self.output_shape 184 | 185 | if not ignore_padding: 186 | xy = xy - np.array([[self.state_pad[1][0], self.state_pad[0][0]]]) 187 | 188 | factors = [output_shape[1]/self.state_shape[1], 189 | output_shape[0]/self.state_shape[0]] 190 | return xy*factors 191 | 192 | def estimate_peak_xy(self, heatmap): 193 | #return lib.argmax2d_xy(heatmap) 194 | return lib.expected_xy(heatmap, magic_thresh=2) 195 | 196 | def get_velocity_estimate(self, old_heatmap, pos_heatmap): 197 | old_peak = self.estimate_peak_xy(old_heatmap) 198 | new_peak = self.estimate_peak_xy(pos_heatmap) 199 | return new_peak - old_peak 200 | 201 | def track_predict(self): 202 | vx, vy = self.KF.x 203 | #self.pred_heatmap = scipy.ndimage.shift(self.pos_heatmap, [vy, vx]) 204 | gaussian = lib.gauss2d_xy(np.clip(self.KF.P, 1e-5, self.init_P_scale), nstd=2, mean=[-vx, -vy]) 205 | self.pred_heatmap = lib.convolve_edge_same(self.pos_heatmap, gaussian) 206 | self.pred_heatmap /= np.sum(self.pred_heatmap) # Re-normalize to probabilities 207 | 208 | # standard KF 209 | self.KF.predict() 210 | 211 | def track_update(self, id_heatmap, id_distmap, curr_frame, image_getter): 212 | self.age += 1 213 | 214 | # Hard rule for pathological cases. 215 | if self.MAXLIFE is not None and self.MAXLIFE < self.age: 216 | print("WARNING: Killing one of age.") 217 | return self.track_is_deleted(curr_frame) 218 | 219 | self.old_heatmap = self.pos_heatmap 220 | self.old_map_good = self.this_map_good 221 | 222 | self.id_heatmap = self.resize_map_to_state(id_heatmap) 223 | 224 | self.idmap_ent = lib.entropy_score_avg(self.id_heatmap) 225 | self.idmap_score = np.min(id_distmap) 226 | self.this_map_good = self.idmap_score < self.DIST_THRESH and self.ENT_THRESH < self.idmap_ent 227 | 228 | if self.this_map_good: 229 | self.pos_heatmap = self.pred_heatmap*self.id_heatmap 230 | self.pos_heatmap /= np.sum(self.pos_heatmap) # Re-normalize to probabilities 231 | 232 | # Discard impossible jumps. TODO: It's a hack 233 | if self.TP_HACK is not None: 234 | xy = self.estimate_peak_xy(self.pos_heatmap) 235 | tpdist = np.sqrt(np.sum((self.poses[-1] - xy)**2)) 236 | if tpdist > self.TP_HACK: 237 | self.pos_heatmap = self.pred_heatmap 238 | self.this_map_good = False 239 | else: 240 | self.pos_heatmap = self.pred_heatmap 241 | #self.pos_heatmap = self.pred_heatmap*lib.softmax(self.id_heatmap, T=10) 242 | #self.pos_heatmap /= np.sum(self.pos_heatmap) # Re-normalize to probabilities 243 | #self.pos_heatmap = self.pred_heatmap*self.id_heatmap 244 | #self.pos_heatmap /= np.sum(self.pos_heatmap) # Re-normalize to probabilities 245 | 246 | # Compute a velocity measurement from previous and current peaks in heatmap. 247 | # The certainty of the velocity measurement is a function of the certainties of 248 | # both position "measurements", i.e. how peaky both heatmaps are. 249 | #self.vel_meas_certainty = lib.entropy_score_avg(self.old_heatmap)*lib.entropy_score_avg(self.pos_heatmap) 250 | #self.vel_meas_certainty = prev_id_heatmap_ent*this_id_heatmap_ent 251 | #if self.VEL_MEAS_CERT_THRESH < self.vel_meas_certainty: 252 | if self.old_map_good and self.this_map_good: 253 | vel_measurement = self.get_velocity_estimate(self.old_heatmap, self.pos_heatmap) 254 | #self.KF.R = ... 255 | self.KF.update(vel_measurement) 256 | 257 | self.xs.append(self.KF.x) 258 | self.Ps.append(self.KF.P) 259 | self.poses.append(self.estimate_peak_xy(self.pos_heatmap)) 260 | 261 | if self.this_map_good: 262 | self.track_is_matched(curr_frame) 263 | 264 | # update embedding. Needs to happen after the above, as that updates current_pos. 265 | # TODO: Future work. Currently we only keep initial one. 266 | #self.update_embedding(self.get_embedding_at_current_pos(image_getter(), curr_frame)) 267 | else: 268 | self.track_is_missed(curr_frame) 269 | 270 | # ==Track status management== 271 | def track_is_missed(self, curr_frame): 272 | self.missed_for += 1 273 | self.status = 'missed' 274 | if self.missed_for >= self.DELETE_THRESH: # or self.n_exits > 10: 275 | self.track_is_deleted(curr_frame) 276 | else: 277 | pass 278 | # TODO: Such "exit zones" are a workaround, a larger-than-image map would be better. 279 | #x, y = self.poses[-1] 280 | #vx, vy = self.xs[-1] 281 | #if (x == 0 and vx < 0) or \ 282 | # (x == self.pos_heatmap.shape[1]-1 and 0 < vx) or \ 283 | # (y == 0 and vy < 0) or \ 284 | # (y == self.pos_heatmap.shape[0]-1 and 0 < vy): 285 | # self.n_exits += 1 286 | 287 | def track_is_matched(self, curr_frame): 288 | if 0 < self.missed_for: 289 | # Been missing until now, but... 290 | self.missed_sightings += 1 291 | 292 | # ...Only revive if seen enough times! 293 | if self.missed_sightings < self.UNMISS_THRESH: 294 | return 295 | 296 | self.last_matched_at = curr_frame 297 | self.status = 'matched' 298 | self.missed_for = 0 299 | self.missed_sightings = 0 300 | self.n_exits = 0 301 | 302 | def track_is_deleted(self,curr_frame): 303 | self.deleted_at = curr_frame 304 | self.status = 'deleted' 305 | 306 | # ==Evaluation== 307 | def get_track_eval_line(self, cid, frame): 308 | #dukeMTMC format 309 | #[cam, ID, frame, left, top, width, height, worldX, worldY] 310 | cX, cY = self.state_to_output(*self.poses[-1]) 311 | h = int(((all_bs[cid-1][0]+all_bs[cid-1][1]*cX) + (all_bs[cid-1][2]+all_bs[cid-1][3]*cY))/2) 312 | w = int(0.4*h) 313 | l = int(cX-w/2) 314 | t = int(cY-h/2) 315 | # id-shift-quick-hack for multi-cam eval. 316 | return [cid, self.track_id+cid*100000, lib.glob2loc(frame, cid), l, t, w, h, -1, -1] 317 | 318 | 319 | 320 | # ==Visualization== 321 | def plot_track(self, ax, plot_past_trajectory=False, output_shape=None, time_scale=1): 322 | if output_shape is None: 323 | output_shape = self.output_shape 324 | 325 | if self.status == 'deleted': 326 | return 327 | 328 | #plot_covariance_ellipse((self.KF.x[0], self.KF.x[2]), self.KF.P, fc=self.color, alpha=0.4, std=[1,2,3]) 329 | #print(self.poses) 330 | cX, cY = self.state_to_output(*self.poses[-1], output_shape=output_shape) 331 | vX, vY = self.state_to_output(*self.xs[-1], output_shape=output_shape, ignore_padding=True)*time_scale 332 | #print('vX: {}, vY: {}'.format(vX,vY)) 333 | ax.plot(cX, cY, color=self.color, marker='o') 334 | ax.arrow(cX, cY, vX, vY, head_width=20, head_length=7, fc=self.color, ec=self.color, linestyle='--') 335 | # TODO: The cov is not in output space! 336 | #plot_covariance_ellipse((cX+vX, cY+vY), self.Ps[-1], fc=self.color, alpha=0.5, std=[1, 2, 3]) 337 | #plt.text(*self.state_to_output(*self.poses[-1], output_shape=output_shape), s='{}'.format(self.embedding)) 338 | if plot_past_trajectory and len(self.poses)>1: 339 | outputs_xy = self.states_to_outputs(np.array(self.poses), output_shape) 340 | ax.plot(*outputs_xy.T, linewidth=2.0, color=self.color) 341 | 342 | 343 | def _plot_heatmap(self, ax, hm, output_shape=None): 344 | if self.status == 'deleted': 345 | return 346 | 347 | if output_shape is None: 348 | output_shape = self.output_shape 349 | 350 | return ax.imshow(self.unpad_state_map(hm), interpolation='none', cmap=self.hm_colormap, 351 | #clim=(0, lib.ramp(lib.entropy_score(hm), 0.2, 1, 0.8, np.max(hm))), #alpha=0.5, 352 | extent=[0, output_shape[1], output_shape[0], 0]) 353 | 354 | def plot_pos_heatmap(self, ax, output_shape=None): 355 | hm = self._plot_heatmap(ax, self.pos_heatmap, output_shape) 356 | vX, vY = self.state_to_output(*self.xs[-1], output_shape=output_shape, ignore_padding=True) 357 | ax.text(*self.state_to_output(*self.poses[-1], output_shape=output_shape), s='{:.2f} ({:.2f}, {:.2f})'.format(np.sqrt(vX*vX + vY*vY), vX, vY)) 358 | return hm 359 | 360 | def plot_pred_heatmap(self, ax, output_shape=None): 361 | hm = self._plot_heatmap(ax, self.pred_heatmap, output_shape) 362 | if hasattr(self, 'vel_meas_certainty'): 363 | ax.text(*self.state_to_output(*self.poses[-1], output_shape=output_shape), s='{:.8f}'.format(self.vel_meas_certainty)) 364 | return hm 365 | 366 | def plot_id_heatmap(self, ax, output_shape=None): 367 | hm = self._plot_heatmap(ax, self.id_heatmap, output_shape) 368 | if hasattr(self, 'idmap_score'): 369 | ax.text(*self.state_to_output(*self.poses[-1], output_shape=output_shape), s='{:.2f} | {:.3f}'.format(self.idmap_score, self.idmap_ent)) 370 | return hm 371 | --------------------------------------------------------------------------------