├── data
    └── brno_homographies.zip
├── models
    └── trained_transformer.pt
├── datasets
    ├── __init__.py
    ├── video_dataset.py
    ├── video.py
    └── brnocompspeed.py
├── archs
    ├── __init__.py
    └── transformer.py
├── tools
    ├── tracker
    │   ├── bbox.py
    │   └── tracker.py
    ├── common.py
    ├── geometry.py
    ├── speed.py
    └── tracks.py
├── NOTICE
├── run_brnocompspeed.py
├── README.md
└── LICENSE


/data/brno_homographies.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/naver/cctv/main/data/brno_homographies.zip


--------------------------------------------------------------------------------
/models/trained_transformer.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/naver/cctv/main/models/trained_transformer.pt


--------------------------------------------------------------------------------
/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2021-present NAVER Corp.
2 | # CC BY-NC-SA 3.0
3 | # Available only for non-commercial use
4 | 
5 | from .video_dataset import collection
6 | from .brnocompspeed import BrnoCompSpeed
7 | 


--------------------------------------------------------------------------------
/archs/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021-present NAVER Corp.
 2 | # CC BY-NC-SA 3.0
 3 | # Available only for non-commercial use
 4 | 
 5 | from pdb import set_trace as bb
 6 | import torch
 7 | from tools import common
 8 | from .transformer import Transformer
 9 | 
10 | 
11 | def load_net( model_path ):
12 |     print(f"Loading network from {model_path} ...")
13 |     model = torch.load(model_path, map_location=torch.device('cpu'))
14 | 
15 |     arch = f"{model['arch'][:-1]}, embdim=64, outdim=8)"
16 |     print(f">> Building network = {arch}")
17 |     net = eval(arch)
18 |     net.load_state_dict(model['weights'])
19 |     print(f"   model has {common.model_size(net)/10**6:.1f}M parameters")
20 | 
21 |     return net
22 | 


--------------------------------------------------------------------------------
/tools/tracker/bbox.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021-present NAVER Corp.
 2 | # CC BY-NC-SA 3.0
 3 | # Available only for non-commercial use
 4 | 
 5 | import numpy as np
 6 | 
 7 | 
 8 | def rect_area( bb ):
 9 |     (lef,top,rig,bot) = bb
10 |     return (rig-lef) * (bot-top)
11 | 
12 | 
13 | def inter_over_union(a, b):
14 |     ''' intersection over union
15 |         intersection(a,b) / union(a, b)
16 |     '''
17 |     inter = intersection_area(a, b)
18 |     return inter / (rect_area(a) + rect_area(b) - inter)
19 | 
20 | 
21 | def intersection_area(a, b):
22 |     ''' area of intersection(a, b)
23 |     '''
24 |     return intersection_line(a[0:4:2], b[0:4:2]) * intersection_line(a[1:4:2], b[1:4:2])
25 | 
26 | 
27 | def intersection_line( a, b ):
28 |     (x,y) = b
29 |     (a,b) = a
30 |     return (np.minimum(b,y) - np.maximum(a,x)).clip(min=0)
31 | 


--------------------------------------------------------------------------------
/NOTICE:
--------------------------------------------------------------------------------
 1 | CCTV
 2 | Copyright 2021-present NAVER Corp.
 3 | 
 4 | This project contains subcomponents with separate copyright notices and license terms. 
 5 | Your use of the source code for these subcomponents is subject to the terms and conditions of the following licenses.
 6 | 
 7 | =====
 8 | 
 9 | scanner-research/scanner
10 | https://github.com/scanner-research/scanner
11 | 
12 | 
13 |    Licensed under the Apache License, Version 2.0 (the "License");
14 |    you may not use this file except in compliance with the License.
15 |    You may obtain a copy of the License at
16 | 
17 |        http://www.apache.org/licenses/LICENSE-2.0
18 | 
19 |    Unless required by applicable law or agreed to in writing, software
20 |    distributed under the License is distributed on an "AS IS" BASIS,
21 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22 |    See the License for the specific language governing permissions and
23 |    limitations under the License.
24 | 
25 | =====
26 | 


--------------------------------------------------------------------------------
/archs/transformer.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021-present NAVER Corp.
 2 | # CC BY-NC-SA 3.0
 3 | # Available only for non-commercial use
 4 | 
 5 | from pdb import set_trace as bb
 6 | 
 7 | import torch
 8 | import torch.nn as nn
 9 | import torch.nn.functional as F
10 | 
11 | 
12 | class Transformer (nn.Module):
13 |     """ Input shape = (seq_len, batch_size, inchan)
14 |        Output shape = (seq_len, batch_size, outchan)
15 |     """
16 |     def __init__(self, embdim, outdim, nlayers=2, nhead=2, nhid=256, dropout=0.0):
17 |         super().__init__()
18 |         trf_layers = nn.TransformerEncoderLayer(embdim, nhead, nhid, dropout)
19 |         self.transformer = nn.TransformerEncoder(trf_layers, nlayers)
20 |         self.decoder = nn.Linear(embdim, outdim)
21 |         self.src_mask = None
22 |         self.embdim = embdim
23 | 
24 |     def forward(self, src, **kw):
25 |         self.src_mask = None
26 |         src = zero_pad(src, self.embdim)
27 | 
28 |         output = self.transformer(src.transpose(0,1), self.src_mask)
29 |         output = self.decoder(output)
30 |         return output.transpose(0,1)
31 | 
32 | 
33 | def zero_pad( vec, embdim ):
34 |     D = vec.shape[-1]
35 |     assert D <= embdim
36 |     res = torch.zeros(vec.shape[:-1]+(embdim,), dtype=torch.float32, device=vec.device)
37 |     res[..., :D] = vec
38 |     return res
39 | 


--------------------------------------------------------------------------------
/tools/common.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021-present NAVER Corp.
 2 | # CC BY-NC-SA 3.0
 3 | # Available only for non-commercial use
 4 | 
 5 | import os
 6 | import numpy as np
 7 | import torch
 8 | 
 9 | 
10 | def mkdir_for( file_path ):
11 |     os.makedirs(os.path.split(file_path)[0], exist_ok=True)
12 |     return file_path
13 | 
14 | 
15 | def model_size(model):
16 |     ''' Computes the number of parameters of the model 
17 |     '''
18 |     size = 0
19 |     for weights in model.state_dict().values():
20 |         size += np.prod(weights.shape)
21 |     return size
22 | 
23 | 
24 | def select_device( gpu_idx ):
25 |     """ set gpu_idx = -1 for CPU only, otherwise gpu_idx >= 0 represents the GPU index.
26 |     """
27 |     gpus = [gpu_idx]
28 |     cuda = any(gpu>=0 for gpu in gpus)
29 |     if cuda:
30 |         assert all(gpu>=0 for gpu in gpus), 'cannot mix CPU and GPU devices'
31 |         
32 |         os.environ['CUDA_VISIBLE_DEVICES'] = ','.join([str(gpu) for gpu in gpus])
33 |         assert torch.cuda.is_available(), "%s has GPUs %s unavailable" % (
34 |             os.environ['HOSTNAME'],os.environ['CUDA_VISIBLE_DEVICES'])
35 | 
36 |         torch.backends.cudnn.benchmark = False # speed-up cudnn for constant shapes
37 |         torch.backends.cudnn.fastest = True # even more speed-up?
38 | 
39 |         print( 'Launching on GPUs #' + os.environ['CUDA_VISIBLE_DEVICES'] )
40 |     else:
41 |         print( 'Launching on CPU only' )
42 | 
43 |     return torch.device('cuda' if cuda else 'cpu')
44 | 
45 | 
46 | def todevice( obj, device ):
47 |     """ Transfer an object to another device (i.e. GPU, CPU:torch, CPU:numpy).
48 |     
49 |     obj: list, tuple, dict of tensors or other things
50 |     device: pytorch device or 'numpy'
51 |     """
52 |     if isinstance(obj, dict):
53 |         return {k:todevice(v, device) for k,v in obj.items()}
54 |     
55 |     if isinstance(obj, (tuple,list)):
56 |         return type(obj)(todevice(x, device) for x in obj)
57 | 
58 |     if device == 'numpy':
59 |         if isinstance(obj, torch.Tensor):
60 |             obj = obj.detach().cpu().numpy()
61 |     elif obj is not None:
62 |         if isinstance(obj, np.ndarray):
63 |             obj = torch.from_numpy(obj)
64 |         obj = obj.to(device)
65 |     return obj
66 | 


--------------------------------------------------------------------------------
/run_brnocompspeed.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021-present NAVER Corp.
 2 | # CC BY-NC-SA 3.0
 3 | # Available only for non-commercial use
 4 | 
 5 | import os
 6 | import argparse
 7 | import numpy as np
 8 | 
 9 | from datasets import collection, BrnoCompSpeed
10 | from tools import common, tracks, speed
11 | from archs import load_net
12 | 
13 | 
14 | parser = argparse.ArgumentParser('Run experiments on BrnoCompSpeed')
15 | 
16 | parser.add_argument('todo', choices=[
17 |         'extract_tracks',
18 |         'compute_homographies',
19 |         'evaluate_homographies',
20 |         'evaluate_speeds',
21 |         'export_json'])
22 | parser.add_argument('--dataset-dir', type=str, required=True, help='BrnoCompSpeed directory')
23 | parser.add_argument('--num-frames', type=int, default=0, help='crop video to N frames')
24 | 
25 | parser.add_argument('--model-path', type=str, default=None, help='Path to model')
26 | 
27 | args = parser.parse_args()
28 | 
29 | 
30 | # Create dataset object. Each element is a video.
31 | dataset = collection(BrnoCompSpeed, root=args.dataset_dir, frame_step=2, nframes=args.num_frames)
32 | 
33 | if args.todo == 'extract_tracks':
34 |     tracks.extract(dataset)
35 | 
36 | if args.todo == 'compute_homographies':
37 |     assert args.model_path
38 |     model = load_net(args.model_path)
39 |     speed.compute(dataset, model)
40 | 
41 | if args.todo == 'evaluate_homographies':
42 |     speed.evaluate( dataset )
43 | 
44 | if args.todo == 'export_json':
45 |     import json
46 |     name = 'transformer'
47 | 
48 |     def get_tracks( video ):
49 |         from tools.tracks import sort_tracks, filter_tracks, box_bottom_center
50 |         tracks = sort_tracks(filter_tracks(video, video.tracks))
51 |         tracks['centers'] = box_bottom_center(tracks['boxes'])
52 |         return tracks
53 | 
54 |     def tracks_to_json( tt, H_from_px, scale=1 ):
55 |         res = {}
56 |         # road geometry
57 |         res["camera_calibration"] = dict(H_from_px = H_from_px.ravel().tolist(), scale=scale)
58 | 
59 |         # car detections
60 |         res["cars"] = cars = []
61 |         for cid, track in tracks.enumerate_tracks(tt, dic=True):
62 |             cx, cy = track['centers'].T.tolist()
63 |             cars.append(dict(id=cid, frames = track['timestamps'].tolist(), posX = cx, posY = cy))
64 |         return res
65 | 
66 |     for video in dataset:
67 |         json_path = os.path.join(args.dataset_dir, 'results', video.session, 'system_'+name+'.json')
68 |         print(f'>> Exporting {json_path}')
69 |         if os.path.isfile(json_path): raise IOError('File exists: '+json_path) 
70 | 
71 |         car_tracks = get_tracks(video)
72 |         car_tracks['timestamps'] *= video.frame_step
73 |         H_from_px = video.homography['H_from_px']
74 | 
75 |         data = tracks_to_json(car_tracks, H_from_px)
76 |         with open(common.mkdir_for(json_path), 'w') as f:
77 |             f.write(json.dumps(data))
78 | 
79 |     print(f"cd {args.dataset_dir}/code && python eval.py -rc")
80 | 


--------------------------------------------------------------------------------
/datasets/video_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021-present NAVER Corp.
 2 | # CC BY-NC-SA 3.0
 3 | # Available only for non-commercial use
 4 | 
 5 | import os
 6 | from os.path import *
 7 | import numpy as np
 8 | 
 9 | from .video import Video
10 | 
11 | 
12 | class VideoDataset (Video):
13 |     """ Each instance is a single video, but the collection can be accessed as:
14 | 
15 |     >>> for video in collection(MyVideoClass, **options):
16 |     >>>    ...
17 |     """
18 |     VIDEO_FOLDER = 'videos'
19 |     _join_path = lambda self, *args: join(*args)
20 |     
21 |     def __init__(self, video_num, video_name, root=None, **video_options):
22 |         self.root = root
23 |         self.video_num = video_num
24 |         self.video_name = video_name
25 |         video_path = self._join_path(root, self.VIDEO_FOLDER, video_name)
26 |         Video.__init__(self, video_path, **video_options)
27 |         self.set_video_options(**video_options)
28 | 
29 |     def set_video_options(self, **options):
30 |         assert self.frame_step == options.get('frame_step',self.frame_step), 'not implemented'
31 |         for k,v in options.items(): setattr(self,k,v)
32 |         opt = '' 
33 |         if self.frame_step != 1: opt += f'_framestep{self.frame_step}'
34 |         if self.nframes:     opt += f'_nframes{self.nframes}'
35 |         if self.start_frame: opt += f'_startframe{self.start_frame}'
36 |         self.options_str = opt[1:]
37 |         self._set_paths(self.video_name)
38 | 
39 |     def _set_paths(self, video_name):
40 |         self.detections_path =  self._join_path(self.root, 'detections', self.options_str, video_name+'.npz')
41 |         self.tracks_path =      self._join_path(self.root, 'tracks', self.options_str, video_name+'.npz')
42 |         self.homography_path =  self._join_path(self.root, 'homography', video_name+'.npz')
43 | 
44 |     @property
45 |     def detections(self):
46 |         return self._load_dic(self.detections_path)
47 | 
48 |     @property
49 |     def tracks(self):
50 |         return self._load_dic(self.tracks_path)
51 | 
52 |     @property
53 |     def homography(self):
54 |         return self._load_dic(self.homography_path)
55 | 
56 |     def _load_dic(self, path):
57 |         return dict(np.load(path))
58 | 
59 | 
60 | class collection:
61 |     """ iterate over the full collection 
62 |     if dummy: but without actually loading videos
63 |     """
64 |     def __init__(self, cls, **options):
65 |         self.cls = cls
66 |         self.video_nums = cls.COLLECTION
67 |         self.options = options
68 | 
69 |     def __repr__(self):
70 |         return f"collection of {len(self)} {self.cls.__name__}({', '.join(f'{k}={v}' for k,v in self.options.items())})"
71 | 
72 |     def __len__(self):
73 |         return len(self.video_nums)
74 | 
75 |     def __getitem__(self, idx):
76 |         video = self.cls(self.video_nums[idx], **self.options)
77 |         video.__cmd__ = f"{self.cls.__name__}({self.video_nums[idx]},{','.join(str(k)+'='+str(v) for k,v in self.options.items())})".replace(',)',')')
78 |         return video
79 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Robust Automatic Monocular Vehicle Speed Estimation for Traffic Surveillance #
 2 | This repository contains the implementation of the following [paper](https://europe.naverlabs.com/research/publications/robust-and-automatic-monocular-vehicle-speed-estimation-for-traffic-surveillance/):
 3 | 
 4 | ```text
 5 | @inproceedings{icctv,
 6 |   author    = {Jerome Revaud and Martin Humenberger},
 7 |   title     = {Robust Automatic Monocular Vehicle Speed Estimation for Traffic Surveillance},
 8 |   booktitle = {ICCV},
 9 |   year      = {2021},
10 | }
11 | ```
12 | 
13 | License
14 | -------
15 | Our code is released under the Creative Commons BY-NC-SA 3.0 (see [LICENSE](LICENSE) for more details), available only for non-commercial use.
16 | 
17 | 
18 | Requirements
19 | ------------
20 | You need 
21 |   - Python 3.8+ equipped with standard scientific packages and PyTorch / TorchVision:
22 |     ```
23 |     tqdm >= 4
24 |     PIL >= 8.1.1
25 |     numpy >= 1.19
26 |     scipy >= 1.6.1
27 |     torch >= 1.8.0
28 |     torchvision >= 0.9.0
29 |     cv2 >= 4.5.1
30 |     filterpy >= 1.4.5
31 |     ```
32 |  - An object tracker. In the [ICCV paper](https://europe.naverlabs.com/research/publications/robust-and-automatic-monocular-vehicle-speed-estimation-for-traffic-surveillance/), 
33 |    we used the [SORT](https://github.com/abewley/sort) tracker, but any object tracker of your preference can do.
34 | 
35 | 
36 | Reproducing results on the BrnoCompSpeed dataset
37 | ------------------------------------------------
38 | 
39 | *Note*: Since we cannot share the 3D car models from the Unity library due to license issues, 
40 |          this code only reproduces results from the learned method given a model pretrained based
41 |          on these 3D car models.
42 | 
43 | 1. Download BrnoCompSpeed dataset and evaluation code as explained in [JakubSochor's github](https://github.com/JakubSochor/BrnoCompSpeed).
44 | 
45 | 2. Extract car tracks.
46 | 
47 |     `python run_brnocompspeed.py extract_tracks --dataset-dir /path/to/brnocompspeed --num-frames 5000`
48 | 
49 |     Here we are limiting the extraction to the first 5000 frames.
50 |     It will save car detections (boxes) in `/path/to/brnocompspeed/detections`
51 |     and tracks in `/path/to/brnocompspeed/tracks`.
52 | 
53 | 
54 | 3. Compute homographies given a pretrained model.
55 | 
56 |     `python run_brnocompspeed.py compute_homographies --dataset-dir /path/to/brnocompspeed --num-frames 5000 --model models/trained_transformer.pt`
57 | 
58 |     This will write all homographies in `/path/to/brnocompspeed/homography/`.
59 | 
60 |     *Note*: Since this process involves randomness due to RANSAC, you may obtain a slighlty different
61 |     results compared to what is published in the paper. 
62 |     Therefore we provide the homographies used in the paper in `data/brno_homographies.zip`.
63 | 
64 | 
65 | 4. Optionally, you can have a quick evaluation of the homographies.
66 | 
67 |     `python run_brnocompspeed.py evaluate_homographies --dataset-dir /path/to/brnocompspeed`
68 | 
69 |     *Note*: these results are computed based on **ground-truth boxes**, 
70 |     hence the output results does not reflect the actual accuracy of the full system
71 |     (i.e. detector, tracker and speed estimator jointly). Rather, it provides a quick
72 |     estimates of how good are the estimated homographies based on ground-truth tracks.
73 | 
74 | 
75 | 5. Evaluate results using BrnoCompSpeed's evaluation code.
76 | 
77 |     ```bash
78 |     # re-compute tracks, this time for entire videos (it will take a while)
79 |     python run_brnocompspeed.py extract_tracks --dataset-dir /path/to/brnocompspeed 
80 |     # export homographies and tracks in json format
81 |     python run_brnocompspeed.py export_json --dataset-dir /path/to/brnocompspeed
82 |     ```
83 | 
84 |     Then run the [evaluation code](https://github.com/JakubSochor/BrnoCompSpeed) (caution: it's written in python2).
85 |     - First, set `RUN_FOR_SYSTEMS = ["transformer"]` in `/path/to/brnocompspeed/code/config.py`.
86 |     - Then, execute the evaluation code:
87 |       `cd /path/to/brnocompspeed/code && python eval.py -rc`
88 | 
89 | 
90 | CCTV dataset
91 | ------------
92 | We are currently working to release the CCTV dataset proposed in the paper.
93 | In the meantime, please reach out to us if you need this dataset.
94 | 


--------------------------------------------------------------------------------
/datasets/video.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2021-present NAVER Corp.
  2 | # CC BY-NC-SA 3.0
  3 | # Available only for non-commercial use
  4 | 
  5 | from pdb import set_trace as bb
  6 | from os.path import join
  7 | 
  8 | import numpy as np
  9 | import cv2
 10 | from torchvision import transforms
 11 | 
 12 | 
 13 | class Video:
 14 |     """ Video object. Frames can ba accessed (almost) randomly:
 15 | 
 16 |     >>> video = Video(path)
 17 |     >>> frame_20 = video[20] --> torch.FloatTensor
 18 |     >>> frame_10 = video[10] --> torch.FloatTensor
 19 |     """
 20 |     def __init__(self, video_path, nframes=0, frame_step=1, start_frame=0, 
 21 |                        cache_size=256):
 22 |         assert isinstance(nframes, int)
 23 |         assert isinstance(start_frame, int)
 24 |         assert isinstance(frame_step, int)
 25 |         self.video_path = video_path
 26 |         self.frame_step = frame_step
 27 |         self.start_frame = start_frame
 28 |         self.nframes = nframes
 29 |         self.transform = transforms.ToTensor()
 30 | 
 31 |         self._init_video(cache_size)
 32 | 
 33 |     def _init_video(self, cache_size):
 34 |         self.video = cv2.VideoCapture(self.video_path)
 35 |         self._video_nframes = int(self.video.get(cv2.CAP_PROP_FRAME_COUNT))
 36 |         self.fps = self.video.get(cv2.CAP_PROP_FPS) / self.frame_step
 37 |         self._cache_size = cache_size or float('inf')
 38 |         self._cached_frames = {}
 39 |         self._cur_frame = 0
 40 | 
 41 |     def __repr__(self):
 42 |         return f"Video('{self.video_path}', {len(self)} frames)"
 43 | 
 44 |     def __len__(self):
 45 |         return max(0, min(self.nframes or 999999, 1 + (self._video_nframes - 1) // self.frame_step - self.start_frame))
 46 | 
 47 |     @property 
 48 |     def imsize(self): 
 49 |         # returns (width, height)
 50 |         return self.shape[1::-1]
 51 | 
 52 |     @property 
 53 |     def shape(self):  
 54 |         # returns (height, width, #channels)
 55 |         if not self._cached_frames: self[0] # access the first frame
 56 |         frame = next(iter(self._cached_frames.values())) 
 57 |         return frame.shape
 58 | 
 59 |     @property 
 60 |     def imcenter(self):
 61 |         return (np.float32(self.imsize) - 1) / 2
 62 | 
 63 |     def __getitem__(self, idx):
 64 |         if not(0 <= idx < (self.nframes or 999999)): 
 65 |             raise IndexError()
 66 |         idx += self.start_frame
 67 |         if idx * self.frame_step > self._video_nframes: 
 68 |             raise IndexError()
 69 |         if idx not in self._cached_frames:
 70 |             self._fill_cache( idx )
 71 |         frame = self._cached_frames[idx]
 72 |         return self.transform(frame)
 73 | 
 74 |     def _fill_cache(self, idx):
 75 |         if self._cached_frames and idx < next(iter(self._cached_frames)):
 76 |             raise ValueError(f'Cannot rewind video more than {self._cache_size} frames')
 77 |             # THIS IS BROKEN:
 78 |             #self.video.set(cv2.CAP_PROP_POS_FRAMES, idx-1) 
 79 |             #self._cur_frame = idx-1
 80 | 
 81 |         for _ in range(self._cur_frame*self.frame_step, self.start_frame*self.frame_step):
 82 |             flag, frame = self.video.read()
 83 |             if not flag: return # should not happen: start_frame > video_nframes
 84 |         self._cur_frame = max(self._cur_frame, self.start_frame)
 85 | 
 86 |         while self._cur_frame <= idx:
 87 |             for _ in range(self.frame_step):
 88 |                 flag, frame = self.video.read()
 89 |                 if not flag: # no more frame, returns the last one
 90 |                     frame = self._cached_frames[max(self._cached_frames)]
 91 |                     break
 92 |             # delete old frames so that cache size stays reasonable
 93 |             if len(self._cached_frames) >= self._cache_size:
 94 |                 del self._cached_frames[ next(iter(self._cached_frames)) ]
 95 |             self._cached_frames[self._cur_frame] = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
 96 |             self._cur_frame += 1
 97 | 
 98 | 
 99 | if __name__ == '__main__':
100 |     db = Video('/local/cctv/BrnoCompSpeed/dataset/session0_right/video.avi')
101 |     print(db)
102 |     
103 |     from matplotlib import pyplot as pl
104 |     for frame in range(100):
105 |         pl.clf()
106 |         pl.imshow(db[frame].permute(1,2,0))
107 |         pl.pause(0.001)
108 | 


--------------------------------------------------------------------------------
/tools/geometry.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2021-present NAVER Corp.
  2 | # CC BY-NC-SA 3.0
  3 | # Available only for non-commercial use
  4 | 
  5 | import pdb; bb = pdb.set_trace
  6 | import numpy as np
  7 | 
  8 | 
  9 | def normed( vec, **kw ):
 10 |     return vec / np.linalg.norm(vec, keepdims=True, **kw)
 11 | 
 12 | 
 13 | def line_intersection_2d(line1, line2):
 14 |     ''' Intersection between 2 lines in 2d space.
 15 |         Each line is defined as (a,b,c) with a*x+b*y+c==0
 16 |     '''
 17 |     assert line1.shape == (3,) and line2.shape == (3,)
 18 |     res = np.cross(line1, line2)
 19 |     return res[:2] / res[2]
 20 | 
 21 | 
 22 | def pointToLineDistance(p, l):
 23 |     return abs(np.dot(l,p/p[2]))/np.linalg.norm(l[0:2])
 24 | 
 25 | 
 26 | def pointToLineProjection(l, p):
 27 |     p = p/p[-1]
 28 |     c = p[0]*l[1] - p[1]*l[0]
 29 |     perpendicularLine = np.array([-l[1], l[0], c])
 30 |     intersection = np.cross(l, perpendicularLine)
 31 |     return intersection/intersection[-1]
 32 | 
 33 | 
 34 | def isPointBetweenLines(p, l1, l2):
 35 |     return np.dot(p,l1)*np.dot(p,l2)*np.dot(l1[0:2],l2[0:2]) <= 0
 36 | 
 37 | 
 38 | def getLaneForPoint(p, lines):
 39 |     for i in range(len(lines)-1):
 40 |         if isPointBetweenLines(p, lines[i], lines[i+1]):
 41 |             return i
 42 |     return -1
 43 | 
 44 | 
 45 | def applyh(H, p, ncol=2, norm=True, front=False):
 46 |     """ Apply the homography to a list of 2d points in homogeneous coordinates.
 47 | 
 48 |     H: 3x3 matrix = Homography
 49 |     p: numpy/torch/tuple of coordinates. Shape must be (...,2) or (...,3)
 50 |     
 51 |     ncol: int. number of columns of the result (2 or 3)
 52 |     norm: boolean. if True, the resut is projected on the z=1 plane.
 53 |     front: boolean or float. if not False, points that are behind the camera plane z=front are removed.
 54 |     
 55 |     Returns an array of projected 2d points.
 56 |     """
 57 |     if isinstance(H, np.ndarray):
 58 |         p = np.asarray(p)
 59 |     elif isinstance(H, torch.Tensor):
 60 |         p = torch.as_tensor(p, dtype=H.dtype)
 61 | 
 62 |     if p.shape[-1]+1 == H.shape[-1]:
 63 |         p = p @ H[:,:-1].T + H[:,-1]
 64 |     else:
 65 |         p = H @ p.T
 66 |         if p.ndim >= 2: p = p.swapaxes(-1,-2)
 67 |     if front is not False:
 68 |         p = p[p[...,-1] > front]
 69 |     if norm: 
 70 |         p = p / p[...,-1:]
 71 |     return p[...,:ncol]
 72 | 
 73 | 
 74 | def jacobianh(H, p):
 75 |     """ H is an homography that maps: f_H(x,y) --> (f_1, f_2)
 76 |     So the Jacobian J_H evaluated at p=(x,y) is a 2x2 matrix
 77 |     Output shape = (2, 2, N) = (f_, xy, N)
 78 | 
 79 |     Example of derivative:
 80 |                   numx    a*X + b*Y + c*Z
 81 |         since x = ----- = ---------------
 82 |                   denom   u*X + v*Y + w*Z
 83 | 
 84 |                 numx' * denom - denom' * numx   a*denom - u*numx
 85 |         dx/dX = ----------------------------- = ----------------
 86 |                            denom**2                 denom**2
 87 |     """
 88 |     (a, b, c), (d, e, f), (u, v, w) = H
 89 |     numx, numy, denom = applyh(H, p, ncol=3, norm=False).T
 90 | 
 91 |     #                column x          column x
 92 |     J = np.float32(((a*denom - u*numx, b*denom - v*numx),  # row f_1
 93 |                     (d*denom - u*numy, e*denom - v*numy))) # row f_2
 94 |     return J / np.where(denom, denom*denom, np.nan)
 95 | 
 96 | 
 97 | def recover_homography_from_derivatives(p1, p2, j1, j2):
 98 |     """ p1, p2: 2 different points
 99 |         j1, j2: (transposed) jacobian at these points. 
100 |                 Normally j1[0,1] == j2[0,1] == 0 (moving on x in the image => no Y motion, i.e horizon is horizontal)
101 | 
102 |     Example:
103 |         H = H_from_px # from pixels to meters
104 |         j1, j2 = jacobianh(H, (p1,p2)).T[:,:2]
105 |         H_ = recover_homography_from_derivatives(p1, p2, j1, j2)
106 |         assert np.allclose(jacobianh(H_, (p1,p2)).T[:,:2], (j1,j2))
107 |     """
108 |     assert j1[0,1] == j2[0,1] == 0, "Horizon should be horizontal" # try calling upright_homography before
109 |     x1,y1 = p1
110 |     x2,y2 = p2
111 |     K1 = j1[0,0]
112 |     K2 = j2[0,0]
113 |     v = (K2-K1) / (K1*y1 - K2*y2 + 1e-16)
114 |     a = K2 + K2*y2*v
115 |     e = j1[1,1] * (v*y1 + 1)**2
116 |     b = j1[1,0] * (v*y1 + 1)**2 + x1 * a * v
117 |     return np.float32((a,b,0,0,e,0,0,v,1)).reshape(3,3)
118 | 


--------------------------------------------------------------------------------
/datasets/brnocompspeed.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2021-present NAVER Corp.
  2 | # CC BY-NC-SA 3.0
  3 | # Available only for non-commercial use
  4 | 
  5 | from pdb import set_trace as bb
  6 | from os.path import *
  7 | from collections import defaultdict
  8 | import pickle
  9 | from PIL import Image
 10 | import numpy as np
 11 | 
 12 | from .video_dataset import VideoDataset
 13 | from tools.geometry import *
 14 | 
 15 | 
 16 | class BrnoCompSpeed (VideoDataset):
 17 |     """ Annotation file contains:
 18 |      - fps: float
 19 |      
 20 |      - invalidLanes: set()
 21 |      - laneDivLines: list of 2d lines
 22 |      
 23 |      - distanceMeasurement: list of point pairs + distance
 24 |      - measurementLines: list of 2d lines
 25 |      
 26 |      - cars: list of annotated cars with their true speed and passage points
 27 |     """
 28 |     SESSIONS = [0,1,2,3,4,5,6]
 29 |     SIDES = ['left','center', 'right']
 30 |     COLLECTION = list(range(len(SESSIONS) * len(SIDES)))
 31 | 
 32 |     def __init__(self, video_num, root, **kw):
 33 |         super().__init__(video_num, 'video.avi', root=root, **kw)
 34 |         # load annotations
 35 |         with open(join(split(self.video_path)[0], "gt_data.pkl"), 'rb') as af:
 36 |             self.annots = pickle.load(af, encoding='latin-1')
 37 |         # override with the true values (video codec is broken?)
 38 |         true_fps = self.annots['fps'] / self.frame_step
 39 |         if self.__dict__.get('fps',0): self._video_nframes = int(self._video_nframes * true_fps // self.fps)
 40 |         self.fps = true_fps
 41 | 
 42 |     def _join_path(self, root, folder, *args):
 43 |         self.session = f"session{self.SESSIONS[self.video_num // 3]}_{self.SIDES[self.video_num % 3]}"
 44 |         if folder == 'videos':
 45 |             return join(root, "dataset", self.session, *args)
 46 |         else:
 47 |             ext = splitext(args[-1])[1]
 48 |             return join(root, folder, *args[:-1], self.session+ext )
 49 | 
 50 |     def __repr__(self):
 51 |         session = self.video_path.split('/')[-2]
 52 |         W,H = self.imsize
 53 |         return f"BrnoCompSpeed( #{self.video_num}={session}, {len(self)} frames, {W}x{H} pixels, {self.fps} fps )"
 54 | 
 55 |     @property
 56 |     def screen(self):
 57 |         return Image.open(self._join_path(self.root, 'videos', 'screen.png'))
 58 | 
 59 |     @property
 60 |     def video_mask(self):
 61 |         return np.array(Image.open(self._join_path(self.root, 'videos', 'video_mask.png'))) != 0
 62 | 
 63 |     @property
 64 |     def groundtruth_tracks(self):
 65 |         # prepare boxes in advance
 66 |         nLineIds = len(self.annots['measurementLines'])
 67 |         nLanes = len(self.annots['laneDivLines']) - 1
 68 |         rects = np.empty((nLineIds, nLanes, 4), dtype=np.int32)
 69 |         front = np.empty((nLineIds, nLanes, 2), dtype=np.int32)
 70 |         R = 0.6 * np.float32([[0,-1],[1, 0]])
 71 |         for lid,line in enumerate(self.annots['measurementLines']):
 72 |             pts = [line_intersection_2d(line, lane) for lane in self.annots['laneDivLines']]
 73 |             for i in range(len(pts)-1):
 74 |                 p0, p1 = pts[i : i+2]
 75 |                 front[lid,i] = (p0 + p1) / 2
 76 |                 p2 = p0 + (p1 - p0) @ R
 77 |                 assert p2[1] < max(p0[1], p1[1])
 78 |                 p3 = p2 + p1 - p0
 79 |                 pp = np.c_[p0,p1,p2,p3].T
 80 |                 rects[lid,i] = np.r_[pp.min(0), pp.max(0)]
 81 | 
 82 |         boxes = []
 83 |         timestamps = []
 84 |         track_ids = []
 85 |         speeds = []
 86 |         centers = []
 87 |         for track_id, car in enumerate(self.annots['cars']):
 88 |             if not car['valid']: continue
 89 |             frames = car['intersections']
 90 |             lanes = list(car['laneIndex'])
 91 |             nd = len(frames)
 92 |             # assert nd == nLineIds, bb()
 93 |             # car_id = car['carId'] # sometimes not unique
 94 |             track_ids.append( np.full(nd, track_id, np.int32) )
 95 |             speeds.append( np.full(nd, car['speed'], np.float32) ) # in km/h
 96 |             
 97 |             for cp in car['intersections']:
 98 |                 lid = cp['measurementLineId']
 99 |                 frame = int(cp['videoTime'] * self.fps)
100 |                 timestamps.append( frame )
101 |                 boxes.append( rects[lid][lanes] )#.mean(axis=0).reshape(1,4) )
102 |                 centers.append( front[lid][lanes] )
103 | 
104 |         order = np.int32(timestamps).argsort()
105 |         res = dict(timestamps=[timestamps], track_ids=track_ids, boxes=boxes, centers=centers, speeds=speeds)
106 |         return {key:np.concatenate(vals)[order] for key,vals in res.items()}
107 | 
108 | 
109 | 
110 | if __name__ == '__main__':
111 |     from .video_dataset import collection
112 | 
113 |     db = collection(BrnoCompSpeed, root='/local/cctv/BrnoCompSpeed/', frame_step=2, nframes=100)
114 |     print(db)
115 | 
116 |     for video in db:
117 |         print(video)
118 | 


--------------------------------------------------------------------------------
/tools/tracker/tracker.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2021-present NAVER Corp.
  2 | # CC BY-NC-SA 3.0
  3 | # Available only for non-commercial use
  4 | # Parts of the code in this file are from https://github.com/scanner-research/scanner (under Apache-2.0 license)
  5 | 
  6 | from collections import defaultdict
  7 | from tqdm import tqdm
  8 | import numpy as np
  9 | 
 10 | 
 11 | class Tracker:
 12 |     def __init__(self, kf_params, **kwargs):
 13 |         a, b1, b2, c, d, e, f = kf_params
 14 |         kf_params = dict(
 15 |             R_diag=[a, a, b1, b2],
 16 |             P_diag=[c, c, c, c, d, d, d],
 17 |             Q_diag=[e, e, e, e, f, f, f * f])
 18 | 
 19 |         raise NotImplementedError("TODO: insert your tracker init code here")
 20 |         self._my_object_tracker = None
 21 | 
 22 |     def push(self, boxes, scores):
 23 |         # non maxima suppression, remove overlapping boxes with lower scores
 24 |         idx_maxima = get_maxima_idx(boxes, scores)
 25 |         boxes = boxes[idx_maxima]
 26 |         scores = scores[idx_maxima]
 27 | 
 28 |         # tracker input: array Nx5 for N detections
 29 |         # where each row corresponds to a bounding box: [left, top, right, bottom, score]
 30 | 
 31 |         detection_tracker_in = np.c_[boxes, scores]
 32 |         result = self._my_object_tracker.update( detection_tracker_in )
 33 | 
 34 |         # Tracker returns a Mx6 array of M ongoing tracks
 35 |         # each row = [left, top, right, bottom, unique_track_id, input_box_idx]
 36 | 
 37 |         boxes = result[:, 0:4]
 38 |         uids = result[:, 4:5].astype(int)
 39 |         idxs = result[:, 5:6].astype(int)
 40 |         if len(result): idxs = idx_maxima[idxs] # mapping back to original box indexes before nms
 41 |         return uids, boxes, idxs
 42 | 
 43 | 
 44 | class TrackerSet:
 45 |     def __init__(self, categories, **kwargs):
 46 |         self._trackers = {category: Tracker(**kwargs) for category in categories}
 47 | 
 48 |     def push(self, boxes, scores, labels):
 49 |         tracked_boxes = []
 50 |         tracked_uids = []
 51 |         tracked_idxs = []
 52 |         for cls, tracker in self._trackers.items():
 53 |             sel = (labels == cls).nonzero()[0]
 54 |             track_uids, track_boxes, track_idxs = tracker.push(boxes[sel], scores[sel])
 55 |             
 56 |             tracked_idxs.append(sel[track_idxs])
 57 |             tracked_boxes.append(track_boxes)
 58 |             tracked_uids.append(track_uids)
 59 | 
 60 |         return np.vstack(tracked_boxes), np.vstack(tracked_uids), np.vstack(tracked_idxs)
 61 | 
 62 | 
 63 | def track_vehicles( dets, categories=None, update_box=True, dbg=(), **kwargs):
 64 |     from tools.tracks import enumerate_frames
 65 |     if not categories:
 66 |         categories = set(np.unique(dets['labels']).tolist())
 67 |     print(f'nb categories = {len(categories)}')
 68 | 
 69 |     tracker = TrackerSet(categories, **kwargs)
 70 |     tracks = defaultdict(list)
 71 |     dont_copy = {'img_hashes'}
 72 |     if update_box: dont_copy.add('boxes')
 73 | 
 74 |     for timestamp, sl in tqdm(enumerate_frames(dets), total=np.unique(dets['timestamps']).size):
 75 |         boxes  = dets['boxes'] [sl]
 76 |         scores = dets['scores'][sl]
 77 |         labels = dets['labels'][sl]
 78 |         new_boxes, uids, idxs = tracker.push(boxes, scores, labels)
 79 |         idxs = idxs.ravel()
 80 | 
 81 |         tracks['track_ids'].append( uids.ravel() )
 82 |         if update_box:
 83 |             tracks['boxes'].append( new_boxes.astype(np.float32) )
 84 |         for key in dets:
 85 |             if key in dont_copy: continue
 86 |             tracks[key].append( dets[key][sl][idxs] )
 87 | 
 88 |     # repack list of chunks 
 89 |     return {key:np.concatenate(vals, axis=0) for key, vals in tracks.items()}
 90 | 
 91 | 
 92 | def get_maxima_idx(boxes, scores, thr_iou=0.5, thr_score=0.3):
 93 |     """ Remove overlapping bounding boxes that have inferior scores.
 94 |     thr_iou: threshold minimum intersection over union
 95 |     thr_score: minimum score for a bounding box
 96 |     Returns a list of indices of the selected frames.
 97 |     """
 98 |     # if there are no boxes, return an empty list
 99 |     if len(boxes) == 0:
100 |         return []
101 | 
102 |     # if the bounding boxes integers, convert them to floats --
103 |     # this is important since we'll be doing a bunch of divisions
104 |     if boxes.dtype.kind == "i":
105 |         boxes = boxes.astype("float")
106 | 
107 |     # initialize the list of picked indexes
108 |     pick_idx = []
109 | 
110 |     # grab the coordinates of the bounding boxes
111 |     x1 = boxes[:, 0]
112 |     y1 = boxes[:, 1]
113 |     x2 = boxes[:, 2]
114 |     y2 = boxes[:, 3]
115 | 
116 |     # compute the area of the bounding boxes and sort the bounding
117 |     # boxes by the bottom-right y-coordinate of the bounding box
118 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
119 |     idxs = np.argsort(scores)
120 | 
121 |     # keep looping while some indexes still remain in the indexes
122 |     # list
123 |     while len(idxs) > 0:
124 |         # grab the last index in the indexes list and add the
125 |         # index value to the list of picked indexes
126 |         last = len(idxs) - 1
127 |         idx = idxs[last]
128 |         if scores[idx] < thr_score: break
129 |         pick_idx.append(idx)
130 | 
131 |         # find the largest (x, y) coordinates for the start of
132 |         # the bounding box and the smallest (x, y) coordinates
133 |         # for the end of the bounding box
134 |         xx1 = np.maximum(x1[idx], x1[idxs[:last]])
135 |         yy1 = np.maximum(y1[idx], y1[idxs[:last]])
136 |         xx2 = np.minimum(x2[idx], x2[idxs[:last]])
137 |         yy2 = np.minimum(y2[idx], y2[idxs[:last]])
138 | 
139 |         # compute the width and height of the bounding box
140 |         w = np.maximum(0, xx2 - xx1 + 1)
141 |         h = np.maximum(0, yy2 - yy1 + 1)
142 | 
143 |         # compute the ratio of overlap = intersection / original area
144 |         overlap = (w * h) / areas[idxs[:last]]
145 | 
146 |         # delete all indexes from the index list that have
147 |         idxs = np.delete(idxs, np.concatenate(([last],
148 |                                                np.where(overlap > thr_iou)[0])))
149 | 
150 |     return np.int32(pick_idx)
151 | 


--------------------------------------------------------------------------------
/tools/speed.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2021-present NAVER Corp.
  2 | # CC BY-NC-SA 3.0
  3 | # Available only for non-commercial use
  4 | 
  5 | from pdb import set_trace as bb
  6 | import numpy as np
  7 | import torch
  8 | 
  9 | from tools.common import todevice, mkdir_for
 10 | from tools.tracks import enumerate_tracks, sort_tracks, filter_tracks, print_stats
 11 | from tools.geometry import normed, recover_homography_from_derivatives, jacobianh, applyh
 12 | numpy = lambda x: todevice(x, 'numpy')
 13 | 
 14 | 
 15 | def compute( dataset, model ):
 16 |     for video in dataset:
 17 |         print(f'>> Processing video {video}')
 18 |         estimate_homography( video, model )
 19 | 
 20 | 
 21 | def estimate_homography( video, net, motion_delta=0.04, **track_filters ):
 22 |     try:
 23 |         return video.homography
 24 | 
 25 |     except IOError as error:
 26 |         tracks = filter_tracks(video, video.tracks, sample_tracks=100, sample_boxes=10, **track_filters)
 27 | 
 28 |         tracks['motion'] = tracks['centers'].copy()
 29 |         for tid, track in enumerate_tracks(tracks, dic=True):
 30 |             track['motion'][:] = compute_motion(track['timestamps'], track['centers'], delta=motion_delta*video.fps)
 31 | 
 32 |         H_from_px = homography_from_transformer( video, tracks, net )
 33 | 
 34 |         np.savez( mkdir_for(error.filename), H_from_px=H_from_px)
 35 |         return H_from_px
 36 | 
 37 | 
 38 | def homography_from_transformer( video, tracks, net,  
 39 |         sampling_mode = 'random_tracks', sampling_size = 10, sampling_iters = 0,
 40 |         ransac_iters = 1024 ):
 41 | 
 42 |     sampled_tracks = sample_tracks( tracks, sampling_mode, sampling_size, sampling_iters)
 43 |     
 44 |     S = max(video.imsize)
 45 |     all_preds = []
 46 |     for sel in sampled_tracks:
 47 |         # extract a random subset of boxes
 48 |         boxes = {k:arr[sel] for k,arr in tracks.items()}
 49 |         assert len(boxes['timestamps'])
 50 | 
 51 |         # build embeddings
 52 |         embs = build_embeddings(boxes, S)
 53 | 
 54 |         # extract the homography
 55 |         embs = torch.from_numpy(embs)
 56 |         with torch.no_grad():
 57 |             preds = net(embs[None])
 58 |         all_preds.append( preds[0] )
 59 | 
 60 |     # ransac on all predictions
 61 |     H_from_px = ransac_homography_from_jacobians( torch.cat(all_preds), niter=ransac_iters)
 62 | 
 63 |     # scale back to image size
 64 |     H_from_px = H_from_px @ np.diag((1/S, 1/S, 1))
 65 |     return H_from_px
 66 | 
 67 | 
 68 | def sample_tracks( tracks, sampling_mode='random_tracks', sampling_size=10, sampling_iters=1 ):
 69 |     sampling_iters = sampling_iters or 99999
 70 |     
 71 |     if sampling_mode == 'random_boxes':
 72 |         nboxes = len(tracks['boxes'])
 73 |         sampling_iters = min(nboxes // sampling_size, sampling_iters)
 74 |         sel = np.random.choice(nboxes, size=(sampling_iters, sampling_size), replace=False)
 75 | 
 76 |     elif sampling_mode == 'random_tracks':
 77 |         track_ids = np.unique(tracks['track_ids'])
 78 |         ntracks = track_ids.size
 79 |         sampling_iters = min(ntracks, sampling_iters)
 80 |         sel_tracks = np.random.choice(track_ids, size=sampling_iters, replace=False)
 81 |         sel_tracks = [np.nonzero( tracks['track_ids'] == tid )[0] for tid in sel_tracks]
 82 |         sel = [np.random.choice(track, size=min(len(track), sampling_size), replace=False)
 83 |                 for track in sel_tracks]
 84 |     else:
 85 |         raise ValueError(f'bad sampling mode: {sampling_mode}')
 86 |     return sel
 87 | 
 88 | 
 89 | def build_embeddings( boxes, scale, norm_motion=True ):
 90 |     centers, ellipsis = get_ellipsis(boxes['boxes'] / scale, boxes['masks'])
 91 |     motion = normed(boxes['motion'], axis=1)
 92 |     if norm_motion:
 93 |         motion *= np.sign(motion[:,1:2]) # make sure it's going down
 94 |     embs = np.c_[centers, 1001 * ellipsis, motion]
 95 |     return embs
 96 | 
 97 | 
 98 | def get_ellipsis(scaled_boxes, masks, mode=0.5):
 99 |     lt = scaled_boxes[:,0:2] # top-left corner
100 |     wh = scaled_boxes[:,2:4] - lt # width, height of boxes
101 | 
102 |     # X,Y coordinate grid    
103 |     H, W = masks.shape[-2:]
104 |     xy = np.mgrid[0:H, 0:W].reshape(2, -1)[::-1].T
105 |     # normalize xy in [0,1]
106 |     if mode == 0:
107 |         xy = xy / (W-1, H-1)
108 |     if mode == 0.5:
109 |         xy = (xy + 0.5) / (W, H)
110 |     xy = xy[None] * wh[:,None] + lt[:,None]
111 |     # corresponding weights
112 |     w = masks.reshape(len(masks), -1)
113 |     w = w / w.sum(axis=1, keepdims=True)
114 |     assert np.isfinite(w).all(), bb()
115 | 
116 |     # weighted centers
117 |     centers = (w[:, :, None] * xy).sum(axis=1, keepdims=True)
118 |     xy -= centers
119 | 
120 |     # weithed covariance
121 |     cov = w[:,None] * xy.transpose(0,2,1) @ xy
122 |     return centers[:,0,:], cov.reshape(-1, 4)
123 | 
124 | 
125 | def sqr_norm(x, **kw): 
126 |     return np.square(x).sum(axis=-1)
127 | 
128 | 
129 | def jacobian_from_preds(pred):
130 |     # estimate jacobian from predicted output
131 |     S, D = pred.shape
132 |     pred = pred.view(S, 4, 2)
133 |     centers = pred.mean(dim = -2)
134 | 
135 |     pred_jcam = torch.stack(((pred[...,2,:] - pred[...,1,:])/4, (pred[...,3,:] - pred[...,0,:])/2), dim=-2)
136 |     pred_jw = torch.inverse( pred_jcam )
137 |     return centers, pred_jw
138 | 
139 | 
140 | def ransac_homography_from_jacobians(preds, niter=0):
141 |     assert preds.ndim == 2
142 |     pos, jw = numpy(jacobian_from_preds(preds))
143 | 
144 |     jcam = np.linalg.inv(jw)[...,::-1,:] # back to (dx,dy)
145 |     jw = np.linalg.inv(jcam)
146 | 
147 |     jcam2 = jcam.copy()
148 |     jcam2[..., 0, 1] = 0 # should be horizontal if there is no roll
149 |     jw2 = np.linalg.inv(jcam2)
150 |     jw2[:, 0, 1] = 0 # make sure it's zero
151 |     norm_j = sqr_norm(jcam)
152 | 
153 |     # random order of triangular matrix
154 |     N = len(jw)
155 |     order = np.random.permutation((N-1)*(N-2)//2)
156 | 
157 |     best = 0, None
158 |     for trial,o in enumerate(order[:niter or None]):
159 |         # gets to indices from triangular number
160 |         i = int(np.sqrt(8*o+1) + 1) // 2
161 |         j = o - i*(i-1)//2
162 | 
163 |         # compute an hypothesis
164 |         H_from_px = recover_homography_from_derivatives(pos[i], pos[j], jw2[i], jw2[j])
165 | 
166 |         # compute a robust fitting score
167 |         jw_ = jacobianh(H_from_px, pos).T[:,:2]
168 |         if np.isnan(jw_).any(): continue
169 | 
170 |         # dot-product normalized by largest norm
171 |         # norm_dot_prod = |a| * |b| * cos(a,b) / (|a|*|b|) * min(|a|,|b|)/max(|a|,|b|)
172 |         try: jcam_ = np.linalg.inv(jw_)
173 |         except np.linalg.LinAlgError: continue # one of the matrix is singular
174 |         score = (jcam * jcam_).sum(axis=2) / np.maximum(norm_j, sqr_norm(jcam_))
175 |         score = score.prod(axis=1)
176 | 
177 |         score = np.sum( score )
178 |         if score > best[0]: best = score, H_from_px
179 | 
180 |     assert best[1] is not None, bb()
181 |     return best[1]
182 | 
183 | 
184 | def compute_motion( timestamps, centers, delta=1, non_null=1.05 ):
185 |     N, D = centers.shape
186 |     assert len(timestamps) == N
187 |     
188 |     ts = timestamps
189 |     c = centers
190 |     # timestamps before / after
191 |     delta /= 2
192 |     tba = np.r_[ts - delta, ts + delta]
193 |     tb_ta = tba.reshape(2,-1)
194 | 
195 |     for trial in range(999):
196 |         # positions before / after
197 |         before, after = np.c_[[np.interp(tba, ts, c[:,i]) for i in range(D)]].T.reshape(2,N,D)
198 | 
199 |         # increase the time interval until motion is non null
200 |         nulls = (after == before).all(axis=1)
201 |         if not(non_null and nulls.any()): break
202 |         tb_ta[:,nulls] = ts[nulls] + non_null*(tb_ta[:,nulls] - ts[nulls])
203 |     else:
204 |         raise RuntimeError(f'could not get a non-null motion!\nts={ts}\ncenters={c}')
205 | 
206 |     # cropping timestamps to (ts[0], ts[-1])
207 |     tb, ta = np.interp(tba, ts, ts).reshape(2,N,1)
208 | 
209 |     motion = (after - before) / (ta - tb)
210 |     return motion
211 | 
212 | 
213 | def compute_speed( track, H_from_px, video_fps, delta=5, dbg=()):
214 |     centers_3d = applyh(H_from_px, track['centers'])
215 | 
216 |     ts = track['timestamps']
217 |     speeds = np.linalg.norm(compute_motion(ts, centers_3d, delta=delta*video_fps), axis=1)
218 |     speeds *= video_fps * 3.6 # meters/s to km/h
219 |     
220 |     assert np.isfinite(speeds).all(), bb()
221 |     return speeds
222 | 
223 | 
224 | def evaluate( dataset, 
225 |         get_tracks = lambda video: sort_tracks(video.groundtruth_tracks), 
226 |         gt_matcher = lambda video, tracks: (tracks['speeds'], np.abs(tracks['speeds']-tracks['est_speeds'])) ):
227 | 
228 |     all_diff_abs = []
229 |     all_diff_rel = []
230 | 
231 |     for video in dataset:
232 |         print(f'>> Evaluating video {video}')
233 | 
234 |         # load homography
235 |         H_from_px = video.homography['H_from_px']
236 | 
237 |         # load tracks 
238 |         tracks = get_tracks(video)
239 |         print_stats(tracks)
240 | 
241 |         tracks['est_speeds'] = np.zeros_like(tracks['timestamps'], dtype='float32')
242 |         for tid, track in enumerate_tracks(tracks, dic=True):
243 |             track['est_speeds'][:] = compute_speed(track, H_from_px, video.fps)
244 | 
245 |         gt_speeds, diff = gt_matcher( video, tracks )
246 |         print(f' >> Median error = {np.median(diff):.1f} km/h')
247 |         all_diff_abs.append( diff )
248 |         all_diff_rel.append( diff / gt_speeds )
249 | 
250 |     all_diff_abs = np.concatenate(all_diff_abs)
251 |     all_diff_rel = 100 * np.concatenate(all_diff_rel)
252 | 
253 |     print("\nSummary:")
254 |     print(f" >> absolute error: mean = {np.mean(all_diff_abs):.2f} km/h, median = {np.median(all_diff_abs):.2f} km/h")
255 |     print(f" >> relative error: mean = {np.mean(all_diff_rel):.2f} %   , median = {np.median(all_diff_rel):.2f} %")
256 | 


--------------------------------------------------------------------------------
/tools/tracks.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2021-present NAVER Corp.
  2 | # CC BY-NC-SA 3.0
  3 | # Available only for non-commercial use
  4 | 
  5 | from pdb import set_trace as bb
  6 | from copy import deepcopy
  7 | from tqdm import tqdm
  8 | import numpy as np
  9 | 
 10 | from tools.common import *
 11 | 
 12 | 
 13 | def extract( dataset ):
 14 |     """ Extract all vehicle tracks from a given video dataset, and save it to
 15 |         `output_dir/tracks/video_id/tracks.npz`
 16 |     """
 17 |     for video in dataset:
 18 |         print(f'>> Processing video {video} ...')
 19 |         track_cars(video)
 20 | 
 21 | 
 22 | def detect_cars(video, gpu_idx=0, batch_size=4, threads=8):
 23 |     try:
 24 |         return video.detections
 25 |     except IOError as error:
 26 |         # detect cars using MaskRCNN
 27 |         import torch
 28 | 
 29 |         print(f'\n>> Starting detection...')
 30 |         device = select_device(gpu_idx)
 31 |         gpu = lambda x: todevice(x,device)
 32 |         numpy = lambda x: todevice(x,'numpy')
 33 | 
 34 |         data_loader = torch.utils.data.DataLoader(video, batch_size=batch_size, shuffle=False)
 35 | 
 36 |         net = mask_rcnn() # load network
 37 |         net.eval()
 38 |         net.to(device)
 39 |             
 40 |         with torch.no_grad():
 41 |             dets = []
 42 |             for batch in tqdm(data_loader):
 43 |                 frames = net(gpu(batch))
 44 |                 for frame in frames: # convert masks to bytes
 45 |                     frame['masks'] *= 255.99 
 46 |                     frame['masks'] = frame['masks'][:,0,:,:].byte()
 47 |                 dets += numpy(frames)
 48 | 
 49 |         print('>> Concatenating and saving...')
 50 |         frame_step = getattr(video, 'frame_step', 1)
 51 |         for idx, frame in enumerate(dets): 
 52 |             frame['timestamps'] = np.full_like(frame['labels'], idx)
 53 |         dets = {key:np.concatenate([frame[key] for frame in dets]) for key in dets[0]}
 54 | 
 55 |         np.savez_compressed( mkdir_for(error.filename), **dets)
 56 |         return dets    
 57 | 
 58 | 
 59 | def mask_rcnn(filter_classes=['car', 'motorcycle', 'truck', 'bus'], mask_subsample=2):
 60 |     import torch
 61 |     from torchvision.models.detection import maskrcnn_resnet50_fpn, transform as trf
 62 |     from types import MethodType
 63 | 
 64 |     detector = maskrcnn_resnet50_fpn(pretrained=True)
 65 | 
 66 |     COCO_CLASSES = [
 67 |         '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
 68 |         'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',
 69 |         'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
 70 |         'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A',
 71 |         'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
 72 |         'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
 73 |         'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
 74 |         'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
 75 |         'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table',
 76 |         'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 
 77 |         'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book',
 78 |         'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
 79 |     ]
 80 |     def dont_touch_masks(self, result, image_shapes, original_image_sizes):
 81 |         assert not self.training
 82 |         for i, (pred, im_s, o_im_s) in enumerate(zip(result, image_shapes, original_image_sizes)):
 83 |             result[i]["boxes"] = trf.resize_boxes(pred["boxes"], im_s, o_im_s)
 84 |             if "masks" in pred:
 85 |                 masks = pred["masks"]
 86 |                 if mask_subsample != 1:
 87 |                     masks = torch.nn.functional.avg_pool2d(masks, mask_subsample, stride=mask_subsample) if masks.numel() else masks[:,:,::mask_subsample,::mask_subsample] # empty
 88 |                 result[i]["masks"] = masks
 89 |         return result
 90 | 
 91 |     # leave mask output as it is (do not post-process it to full image resize)
 92 |     detector.transform.postprocess = MethodType(dont_touch_masks, detector)
 93 | 
 94 |     if filter_classes:
 95 |         ok_classes = [COCO_CLASSES.index(c) for c in filter_classes]
 96 |         cls_map = torch.zeros(len(COCO_CLASSES), dtype=torch.int64)
 97 |         for i,c in enumerate(ok_classes, 1): cls_map[c] = i
 98 |         
 99 |         class FilteredDetector (type(detector)):
100 |             CLASSES = {idx+1:name for idx,name in enumerate(filter_classes)}
101 |             def __call__(self, *args, **kwargs):
102 |                 list_res = super().__call__(*args, **kwargs)
103 |                 for res in list_res:
104 |                     valid = [l in ok_classes for l in res['labels']]
105 |                     for key in res:
106 |                         res[key] = res[key][valid]
107 |                         if key == 'labels':
108 |                             res[key] = cls_map[res[key]].to(res[key].device)
109 |                 return list_res
110 |         # modify class type dynamically
111 |         detector.__class__ = FilteredDetector
112 |     return detector
113 | 
114 | 
115 | def track_cars( video ):
116 |     try:
117 |         return video.tracks
118 |     except IOError as error:
119 |         from .tracker import tracker
120 | 
121 |         coef = np.sqrt(np.prod(video.imsize) / (480.*320.))
122 |         assert coef > 0, 'image size is null'
123 |         kf_params = [(coef * 5) ** 2,  # measure noise: variance of x,y
124 |                      (coef * 5) ** 4,  # measure noise: variance of area
125 |                      0.5 ** 2,  # measure noise: variance of aspect ratio (ar)
126 |                      coef**2 * 100,  # init variance of x,y,area,ar
127 |                      1e6,  # init variance of x,y,area speeds
128 |                      coef ** 2 * 100,  # running variance of x,y,area,ar
129 |                      coef**2 * 0.2]  # running variance of speeds
130 | 
131 |         detections = detect_cars(video)
132 |         tracks = tracker.track_vehicles(detections, kf_params=kf_params, max_age=5, min_hits=1, update_box=False)
133 | 
134 |         np.savez_compressed( mkdir_for(error.filename), **tracks)
135 |         return tracks
136 | 
137 | 
138 | def enumerate_things( tracks, what, ids=None, dic=False, tqdm=False ):
139 |     """ tracks: dict of arrays. Each array has a shape = (N, ...)
140 |                 where N is the total number of bounding boxes.
141 | 
142 |     tracks = {'timestamps': shape= (N,),
143 |               'scores': shape = (N,),
144 |               'labels': shape = (N,)
145 |               'boxes': shape = (N, 4),
146 |               'track_ids': shape = (N,),
147 |                etc.}
148 |     """
149 |     # check if tracks are already sorted
150 |     idxs = tracks[what]
151 |     if not idxs.size: return
152 |     well_sorted = len(idxs) == 1 or (idxs[1:] - idxs[:-1]).min() >= 0
153 |     assert well_sorted, f"You need to sort_tracks(tracks, '{what}') beforehand"
154 | 
155 |     track_sizes = np.r_[0, np.bincount(idxs).cumsum()]
156 |     if ids is None: ids = np.unique(idxs)
157 |     if tqdm: ids = globals()['tqdm'](ids)
158 |     for idx in ids:
159 |         sl = slice(track_sizes[idx], track_sizes[idx+1])
160 |         assert idx == idxs[sl.start] == idxs[sl.stop-1]
161 |         yield (int(idx), {key:val[sl] for key,val in tracks.items()} if dic else sl)
162 | 
163 | def enumerate_frames( tracks, *args, **kw ):
164 |     return enumerate_things(tracks, 'timestamps', *args, **kw)
165 | 
166 | def enumerate_tracks( tracks, *args, **kw ):
167 |     return enumerate_things(tracks, 'track_ids', *args, **kw)
168 | 
169 | 
170 | def sort_tracks( tracks, key='track_ids' ):
171 |     idxs = tracks[key]
172 |     if key != 'timestamps':
173 |         # make sure that timestamps is the secondary order
174 |         idxs = np.c_[tracks['timestamps'], idxs].astype(np.int32).view(np.int64).ravel()
175 |     order = idxs.argsort()
176 | 
177 |     for k,vals in tracks.items():
178 |         tracks[k] = vals[order] # modify input dictionary
179 |     return tracks
180 | 
181 | 
182 | def print_stats( tracks ):
183 |     ts = tracks['timestamps']
184 |     nfr = np.unique(ts).size
185 |     if nfr == 0: print(">> empty tracks!"); return
186 |     box_track_ids = tracks['track_ids']
187 |     track_ids = np.unique(box_track_ids)
188 |     
189 |     from scipy.ndimage import minimum, maximum, sum
190 |     nboxes_per_track = sum(np.ones_like(ts), labels=box_track_ids, index=track_ids) 
191 |     track_len = ( maximum(ts, labels=box_track_ids, index=track_ids) 
192 |                 - minimum(ts, labels=box_track_ids, index=track_ids) )
193 |     
194 |     print(f">> found {track_ids.size} vehicle tracks from {nfr} frames and {len(ts)} boxes", 
195 |           f"(track length = {np.median(nboxes_per_track):.1f}, duration = {np.median(track_len)})")
196 | 
197 |     from collections import namedtuple
198 |     TrackStats = namedtuple('TrackStats', 'nframes ntracks nboxes nboxes_per_track')
199 |     return TrackStats(nfr, track_ids.size, len(ts), nboxes_per_track)
200 | 
201 | 
202 | def filter_tracks( video, tracks, rm_truck = 0, rm_boundary = 0.02, rm_masked = True,
203 |                    rm_static = 0.5, sample_tracks = 0, sample_boxes = 0 ):
204 |     tracks = deepcopy(tracks) # makesure we don't modify the original tracks
205 | 
206 |     if rm_truck>=0: tracks = remove_non_cars( tracks, min_num=rm_truck )
207 |     if rm_boundary: tracks = remove_boundary_boxes( tracks, video.imsize, rm_boundary )
208 |     if rm_masked:   tracks = remove_masked_boxes( tracks, video.video_mask )
209 |     if rm_static:   tracks = remove_static_tracks( tracks, video.fps, min_len=5, iou_thr=rm_static )
210 |     if sample_tracks: tracks = subsample_tracks( tracks, sample_tracks )
211 | 
212 |     tracks['boxes'] = clip_boxes( tracks['boxes'], video.imsize, min_car_size=5)
213 |     tracks['centers'] = box_center( tracks['boxes'] )
214 | 
215 |     if sample_boxes: tracks = subsample_boxes( tracks, sample_boxes, prop_size=True )
216 |     return tracks
217 | 
218 | 
219 | def box_center( box ):
220 |     return box.reshape(-1,2,2).mean(axis=1).squeeze()
221 | 
222 | def box_bottom_center( box ):
223 |     return np.c_[box_center(box)[...,0], box[...,1::2].max(1)]
224 | 
225 | def box_wh( box ):
226 |     return box[...,2:4] - box[...,0:2]
227 | 
228 | def box_area( box ):
229 |     wh = box_wh( box )
230 |     return np.prod(wh, axis=-1)
231 | 
232 | def ltrb_to_xywh( boxes, half=False ):
233 |     xy = box_center( boxes )
234 |     if half:
235 |         return np.c_[xy, boxes[:,2:4]-xy]
236 |     else:
237 |         return np.c_[xy, boxes[:,2:4]-boxes[:,0:2]]
238 | 
239 | def valid_boxes(boxes, imsize, bnd=0.02):
240 |     im_w, im_h = imsize
241 |     l,t,r,b = boxes.T
242 |     return np.c_[l > im_w*bnd, t > im_h*bnd, r < im_w*(1-bnd), b < im_h*(1-bnd)]
243 | 
244 | def clip_boxes(boxes, imsize, min_car_size=5):
245 |     # we make sure that boxes are big enough (both width and height)
246 |     # valid = Nx4 array, indicates which box coordinates are valid/invalid
247 |     xywh = ltrb_to_xywh(boxes, half=True)
248 |     too_small = (min_car_size/2 - xywh[:,2:4]).clip(min=0)
249 | 
250 |     if too_small.any():
251 |         valid = valid_boxes( boxes, imsize).view(np.int8)
252 |         xywh = xywh + np.c_[(valid[:,0:2] - valid[:,2:4]) * too_small, too_small]
253 |     else:
254 |         return boxes # unchanged input
255 | 
256 |     x,y,w,h = xywh.T
257 |     return np.c_[x-w,y-h,x+w,y+h]
258 | 
259 | 
260 | def remove_static_tracks( tracks, fps, min_len=5, iou_thr=0.5, time_gap=0.5):
261 |     """ Remove trivially wrong tracks (based on simple tests).
262 | 
263 |     min_len: (int) minimum length of a track
264 |     iou_thr: (float) maximum IoU that a track can have within itself.
265 |     """
266 |     from .tracker.bbox import inter_over_union
267 | 
268 |     # ts = tracks['timestamps']
269 |     track_ids = tracks['track_ids']
270 |     keep = np.zeros_like(track_ids)
271 |     
272 |     trks = [(track_ids == tid).nonzero()[0] for tid in np.unique(track_ids)][::-1]
273 |     tid2 = 0
274 |     while trks:
275 |         sel = trks.pop()
276 |     
277 |         # not an empty track
278 |         if len(sel) < min_len: continue
279 | 
280 |         # start and end are not overlapping
281 |         boxes = tracks['boxes'][sel]
282 |         start, end = boxes[[0,-1]]
283 |         if inter_over_union(start, end) > 0: continue
284 | 
285 |         # same test for 30 km/h and 4m-long car
286 |         # 10 km/h = 30/3.6 = 8.3 m/s
287 |         # so 2 boxes must not overlap after 4/2.8 = 1.44s ~= 0.5s
288 |         min_gap = int(np.ceil(fps * time_gap))
289 | 
290 |         # let's pretend that all timestamps are contiguous...
291 |         overlaps = inter_over_union(boxes[min_gap:].T, boxes[:-min_gap].T)
292 | 
293 |         # cut into multiple sub-tracks each time it overlaps
294 |         bad = (overlaps > iou_thr).nonzero()[0]
295 |         if bad.size == 0: 
296 |             tid2 += 1
297 |             keep[sel] = tid2
298 |             continue
299 | 
300 |         accu = np.zeros(len(boxes)+1, np.int32)
301 |         accu[bad] += 1
302 |         accu[bad+min_gap+1] -= 1
303 |         good = (accu.cumsum()[:-1] == 0).view(np.int8) # all null segments are bad boxes
304 |         good = np.r_[0, good, 0] # surrounded by bad segments
305 |         start_end = good[1:] - good[:-1] # mark the beginning/end of all good segments
306 |         for start, end in start_end.nonzero()[0].reshape(-1,2):
307 |             # print(start, end)
308 |             trks.append( sel[start:end] )
309 | 
310 |     # keep only valid instances
311 |     valid = keep > 0
312 |     return {key: val[valid] for key,val in tracks.items()}
313 | 
314 | 
315 | def remap_labels( labels ):
316 |     assert labels.min() >= 0
317 |     remap = np.zeros(labels.max()+1, dtype=np.int32)
318 |     tids = np.unique(labels)
319 |     remap[tids] = np.arange(tids.size)
320 |     return remap[labels]
321 | 
322 | 
323 | def overlapping_tracks( tracks, targets, iou_thr=0.9, min_gap=5, min_num=1 ):
324 |     """ This generator yields, for each target track, the mask of all similar tracks
325 | 
326 |     tracks: dictionary of tracks
327 |     target: 1d array of track ids
328 |     yields: array of boolean (one per box)
329 |     """
330 |     from tools.tracker.bbox import inter_over_union
331 |     assert min_num >= 1 and iou_thr > 0 
332 |     
333 |     ts = tracks['timestamps']
334 |     boxes = tracks['boxes']
335 |     track_ids = tracks['track_ids']
336 |     last_track = track_ids.max()
337 | 
338 |     # look for vehicle tracks that strongly overlap
339 |     for tid in np.unique(targets):
340 |         # timestamps of this track
341 |         track = (track_ids == tid).nonzero()[0]
342 |         trk_ts = ts[track] # track's timestamps
343 |         ious = np.zeros((len(trk_ts), last_track+1), dtype=np.float32)
344 | 
345 |         for i, (t,sel) in enumerate(enumerate_frames(tracks, ids=trk_ts)):
346 |             assert isinstance(sel, slice), "tracks must be ordered by timestamps"
347 |             # compare this bounding box to all others
348 |             box = boxes[track[i]] # this vehicle's box
349 |             ious[i, track_ids[sel]] = inter_over_union( box, boxes[sel].T )
350 | 
351 |         # check which tracks are matching
352 |         # ious = nframes(trk_ts) x ntracks
353 |         # pl.plot(ious) # show all track temporal overlaps
354 |         ious = (ious > iou_thr)
355 |         t_start = trk_ts[ious.argmax(axis=0)]
356 |         t_end = trk_ts[len(trk_ts)-1-ious[::-1].argmax(axis=0)]
357 |         same_track = (ious.sum(axis=0) >= min_num) & (t_end - t_start >= min_gap)
358 |         yield same_track
359 | 
360 | 
361 | def remove_non_cars(tracks, car_label=1, min_num=0, iou_thr=0.9, min_gap=5 ):
362 |     """ Remove tracks that are not cars, based on the labels.
363 |     """
364 |     sort_tracks(tracks, 'timestamps')
365 |     try:
366 |         labels = tracks['labels']
367 |     except KeyError:
368 |         return tracks # not labels, do nothing
369 |     valid = np.isin(labels, car_label)
370 | 
371 |     if min_num > 0:
372 |         track_ids = tracks['track_ids']
373 |         track_ids[:] = remap_labels(track_ids) # renumber tracks for efficiency
374 |     
375 |         non_car_track_ids = np.unique(track_ids[~valid])
376 |         for same_track in overlapping_tracks( tracks, non_car_track_ids, iou_thr, min_gap, min_num ):
377 |             # remove tracks that are overlapping
378 |             valid[same_track[track_ids]] = False
379 |     
380 |     return {key: val[valid] for key,val in tracks.items()}
381 | 
382 | 
383 | def remove_boundary_boxes( tracks, imsize, frame_bnd=0.02):
384 |     W, H = imsize
385 |     boxes = tracks['boxes']
386 |     valid = valid_boxes(boxes, imsize, bnd=frame_bnd).all(axis=1)
387 |     return {key: val[valid] for key,val in tracks.items()}
388 | 
389 | 
390 | def remove_masked_boxes( tracks, mask ):
391 |     x, y = np.int32(0.5 + box_center(tracks['boxes'])).T
392 |     valid = mask[y, x]
393 |     return {key: val[valid] for key,val in tracks.items()}
394 | 
395 | 
396 | def subsample_tracks( tracks, max_num, criterion='length'):
397 |     tracks = sort_tracks(tracks)
398 |     ts = tracks['timestamps']
399 |     scores = {}
400 |     for tid, track in enumerate_tracks(tracks):
401 |         if criterion == 'length':
402 |             scores[tid] = len(ts[track])
403 |         else:
404 |             raise ValueError(f'bad criterion {criterion}')
405 | 
406 |     sorted_scores = sorted(scores.items(), key=lambda p:p[1])
407 |     whitelist = {tid for tid,_ in sorted_scores[-max_num:]}
408 |     
409 |     keep = []
410 |     for tid, track in enumerate_tracks(tracks):
411 |         if tid in whitelist:
412 |             if isinstance(track, slice): track = np.mgrid[track]
413 |             keep.append( track )
414 |     keep = np.concatenate(keep)
415 |     keep.sort() # keep original order
416 |     return {key:val[keep] for key,val in tracks.items()}
417 | 
418 | 
419 | def subsample_boxes( tracks, max_len, prop_size=True ):
420 |     if prop_size:
421 |         box_sizes = np.sqrt(box_area(tracks['boxes']))
422 |     x = (np.arange(max_len) + 0.5 ) / max_len
423 | 
424 |     keep = []
425 |     for tid, track in enumerate_tracks(tracks):
426 |         if isinstance(track, slice): 
427 |             track = np.mgrid[track]
428 |         if len(track) <= max_len:
429 |             keep.append( track )
430 |             continue
431 | 
432 |         if prop_size:
433 |             sizes = box_sizes[track]
434 |             cum = np.r_[0, sizes.cumsum()]
435 |         else:
436 |             cum = np.arange(len(track)+1, dtype=np.float32)
437 | 
438 |         idxs = np.interp(x, cum / cum[-1], np.arange(len(track)+1))
439 |         keep.append(track[np.int32(idxs)])
440 | 
441 |     keep = np.concatenate(keep)
442 |     keep.sort() # keep original order
443 |     return {key:val[keep] for key,val in tracks.items()}
444 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Creative Commons
 2 | 
 3 | Attribution-NonCommercial-ShareAlike 3.0 Unported
 4 | 
 5 | CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE LEGAL SERVICES. DISTRIBUTION OF THIS LICENSE DOES NOT CREATE AN ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES REGARDING THE INFORMATION PROVIDED, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM ITS USE.
 6 | License
 7 | THE WORK (AS DEFINED BELOW) IS PROVIDED UNDER THE TERMS OF THIS CREATIVE COMMONS PUBLIC LICENSE ("CCPL" OR "LICENSE"). THE WORK IS PROTECTED BY COPYRIGHT AND/OR OTHER APPLICABLE LAW. ANY USE OF THE WORK OTHER THAN AS AUTHORIZED UNDER THIS LICENSE OR COPYRIGHT LAW IS PROHIBITED.
 8 | 
 9 | BY EXERCISING ANY RIGHTS TO THE WORK PROVIDED HERE, YOU ACCEPT AND AGREE TO BE BOUND BY THE TERMS OF THIS LICENSE. TO THE EXTENT THIS LICENSE MAY BE CONSIDERED TO BE A CONTRACT, THE LICENSOR GRANTS YOU THE RIGHTS CONTAINED HERE IN CONSIDERATION OF YOUR ACCEPTANCE OF SUCH TERMS AND CONDITIONS.
10 | 
11 | 1. Definitions
12 | 
13 | "Adaptation" means a work based upon the Work, or upon the Work and other pre-existing works, such as a translation, adaptation, derivative work, arrangement of music or other alterations of a literary or artistic work, or phonogram or performance and includes cinematographic adaptations or any other form in which the Work may be recast, transformed, or adapted including in any form recognizably derived from the original, except that a work that constitutes a Collection will not be considered an Adaptation for the purpose of this License. For the avoidance of doubt, where the Work is a musical work, performance or phonogram, the synchronization of the Work in timed-relation with a moving image ("synching") will be considered an Adaptation for the purpose of this License.
14 | "Collection" means a collection of literary or artistic works, such as encyclopedias and anthologies, or performances, phonograms or broadcasts, or other works or subject matter other than works listed in Section 1(g) below, which, by reason of the selection and arrangement of their contents, constitute intellectual creations, in which the Work is included in its entirety in unmodified form along with one or more other contributions, each constituting separate and independent works in themselves, which together are assembled into a collective whole. A work that constitutes a Collection will not be considered an Adaptation (as defined above) for the purposes of this License.
15 | "Distribute" means to make available to the public the original and copies of the Work or Adaptation, as appropriate, through sale or other transfer of ownership.
16 | "License Elements" means the following high-level license attributes as selected by Licensor and indicated in the title of this License: Attribution, Noncommercial, ShareAlike.
17 | "Licensor" means the individual, individuals, entity or entities that offer(s) the Work under the terms of this License.
18 | "Original Author" means, in the case of a literary or artistic work, the individual, individuals, entity or entities who created the Work or if no individual or entity can be identified, the publisher; and in addition (i) in the case of a performance the actors, singers, musicians, dancers, and other persons who act, sing, deliver, declaim, play in, interpret or otherwise perform literary or artistic works or expressions of folklore; (ii) in the case of a phonogram the producer being the person or legal entity who first fixes the sounds of a performance or other sounds; and, (iii) in the case of broadcasts, the organization that transmits the broadcast.
19 | "Work" means the literary and/or artistic work offered under the terms of this License including without limitation any production in the literary, scientific and artistic domain, whatever may be the mode or form of its expression including digital form, such as a book, pamphlet and other writing; a lecture, address, sermon or other work of the same nature; a dramatic or dramatico-musical work; a choreographic work or entertainment in dumb show; a musical composition with or without words; a cinematographic work to which are assimilated works expressed by a process analogous to cinematography; a work of drawing, painting, architecture, sculpture, engraving or lithography; a photographic work to which are assimilated works expressed by a process analogous to photography; a work of applied art; an illustration, map, plan, sketch or three-dimensional work relative to geography, topography, architecture or science; a performance; a broadcast; a phonogram; a compilation of data to the extent it is protected as a copyrightable work; or a work performed by a variety or circus performer to the extent it is not otherwise considered a literary or artistic work.
20 | "You" means an individual or entity exercising rights under this License who has not previously violated the terms of this License with respect to the Work, or who has received express permission from the Licensor to exercise rights under this License despite a previous violation.
21 | "Publicly Perform" means to perform public recitations of the Work and to communicate to the public those public recitations, by any means or process, including by wire or wireless means or public digital performances; to make available to the public Works in such a way that members of the public may access these Works from a place and at a place individually chosen by them; to perform the Work to the public by any means or process and the communication to the public of the performances of the Work, including by public digital performance; to broadcast and rebroadcast the Work by any means including signs, sounds or images.
22 | "Reproduce" means to make copies of the Work by any means including without limitation by sound or visual recordings and the right of fixation and reproducing fixations of the Work, including storage of a protected performance or phonogram in digital form or other electronic medium.
23 | 
24 | 2. Fair Dealing Rights. Nothing in this License is intended to reduce, limit, or restrict any uses free from copyright or rights arising from limitations or exceptions that are provided for in connection with the copyright protection under copyright law or other applicable laws.
25 | 
26 | 3. License Grant. Subject to the terms and conditions of this License, Licensor hereby grants You a worldwide, royalty-free, non-exclusive, perpetual (for the duration of the applicable copyright) license to exercise the rights in the Work as stated below:
27 | 
28 | to Reproduce the Work, to incorporate the Work into one or more Collections, and to Reproduce the Work as incorporated in the Collections;
29 | to create and Reproduce Adaptations provided that any such Adaptation, including any translation in any medium, takes reasonable steps to clearly label, demarcate or otherwise identify that changes were made to the original Work. For example, a translation could be marked "The original work was translated from English to Spanish," or a modification could indicate "The original work has been modified.";
30 | to Distribute and Publicly Perform the Work including as incorporated in Collections; and,
31 | to Distribute and Publicly Perform Adaptations.
32 | The above rights may be exercised in all media and formats whether now known or hereafter devised. The above rights include the right to make such modifications as are technically necessary to exercise the rights in other media and formats. Subject to Section 8(f), all rights not expressly granted by Licensor are hereby reserved, including but not limited to the rights described in Section 4(e).
33 | 
34 | 4. Restrictions. The license granted in Section 3 above is expressly made subject to and limited by the following restrictions:
35 | 
36 | You may Distribute or Publicly Perform the Work only under the terms of this License. You must include a copy of, or the Uniform Resource Identifier (URI) for, this License with every copy of the Work You Distribute or Publicly Perform. You may not offer or impose any terms on the Work that restrict the terms of this License or the ability of the recipient of the Work to exercise the rights granted to that recipient under the terms of the License. You may not sublicense the Work. You must keep intact all notices that refer to this License and to the disclaimer of warranties with every copy of the Work You Distribute or Publicly Perform. When You Distribute or Publicly Perform the Work, You may not impose any effective technological measures on the Work that restrict the ability of a recipient of the Work from You to exercise the rights granted to that recipient under the terms of the License. This Section 4(a) applies to the Work as incorporated in a Collection, but this does not require the Collection apart from the Work itself to be made subject to the terms of this License. If You create a Collection, upon notice from any Licensor You must, to the extent practicable, remove from the Collection any credit as required by Section 4(d), as requested. If You create an Adaptation, upon notice from any Licensor You must, to the extent practicable, remove from the Adaptation any credit as required by Section 4(d), as requested.
37 | You may Distribute or Publicly Perform an Adaptation only under: (i) the terms of this License; (ii) a later version of this License with the same License Elements as this License; (iii) a Creative Commons jurisdiction license (either this or a later license version) that contains the same License Elements as this License (e.g., Attribution-NonCommercial-ShareAlike 3.0 US) ("Applicable License"). You must include a copy of, or the URI, for Applicable License with every copy of each Adaptation You Distribute or Publicly Perform. You may not offer or impose any terms on the Adaptation that restrict the terms of the Applicable License or the ability of the recipient of the Adaptation to exercise the rights granted to that recipient under the terms of the Applicable License. You must keep intact all notices that refer to the Applicable License and to the disclaimer of warranties with every copy of the Work as included in the Adaptation You Distribute or Publicly Perform. When You Distribute or Publicly Perform the Adaptation, You may not impose any effective technological measures on the Adaptation that restrict the ability of a recipient of the Adaptation from You to exercise the rights granted to that recipient under the terms of the Applicable License. This Section 4(b) applies to the Adaptation as incorporated in a Collection, but this does not require the Collection apart from the Adaptation itself to be made subject to the terms of the Applicable License.
38 | You may not exercise any of the rights granted to You in Section 3 above in any manner that is primarily intended for or directed toward commercial advantage or private monetary compensation. The exchange of the Work for other copyrighted works by means of digital file-sharing or otherwise shall not be considered to be intended for or directed toward commercial advantage or private monetary compensation, provided there is no payment of any monetary compensation in con-nection with the exchange of copyrighted works.
39 | If You Distribute, or Publicly Perform the Work or any Adaptations or Collections, You must, unless a request has been made pursuant to Section 4(a), keep intact all copyright notices for the Work and provide, reasonable to the medium or means You are utilizing: (i) the name of the Original Author (or pseudonym, if applicable) if supplied, and/or if the Original Author and/or Licensor designate another party or parties (e.g., a sponsor institute, publishing entity, journal) for attribution ("Attribution Parties") in Licensor's copyright notice, terms of service or by other reasonable means, the name of such party or parties; (ii) the title of the Work if supplied; (iii) to the extent reasonably practicable, the URI, if any, that Licensor specifies to be associated with the Work, unless such URI does not refer to the copyright notice or licensing information for the Work; and, (iv) consistent with Section 3(b), in the case of an Adaptation, a credit identifying the use of the Work in the Adaptation (e.g., "French translation of the Work by Original Author," or "Screenplay based on original Work by Original Author"). The credit required by this Section 4(d) may be implemented in any reasonable manner; provided, however, that in the case of a Adaptation or Collection, at a minimum such credit will appear, if a credit for all contributing authors of the Adaptation or Collection appears, then as part of these credits and in a manner at least as prominent as the credits for the other contributing authors. For the avoidance of doubt, You may only use the credit required by this Section for the purpose of attribution in the manner set out above and, by exercising Your rights under this License, You may not implicitly or explicitly assert or imply any connection with, sponsorship or endorsement by the Original Author, Licensor and/or Attribution Parties, as appropriate, of You or Your use of the Work, without the separate, express prior written permission of the Original Author, Licensor and/or Attribution Parties.
40 | For the avoidance of doubt:
41 | 
42 | Non-waivable Compulsory License Schemes. In those jurisdictions in which the right to collect royalties through any statutory or compulsory licensing scheme cannot be waived, the Licensor reserves the exclusive right to collect such royalties for any exercise by You of the rights granted under this License;
43 | Waivable Compulsory License Schemes. In those jurisdictions in which the right to collect royalties through any statutory or compulsory licensing scheme can be waived, the Licensor reserves the exclusive right to collect such royalties for any exercise by You of the rights granted under this License if Your exercise of such rights is for a purpose or use which is otherwise than noncommercial as permitted under Section 4(c) and otherwise waives the right to collect royalties through any statutory or compulsory licensing scheme; and,
44 | Voluntary License Schemes. The Licensor reserves the right to collect royalties, whether individually or, in the event that the Licensor is a member of a collecting society that administers voluntary licensing schemes, via that society, from any exercise by You of the rights granted under this License that is for a purpose or use which is otherwise than noncommercial as permitted under Section 4(c).
45 | Except as otherwise agreed in writing by the Licensor or as may be otherwise permitted by applicable law, if You Reproduce, Distribute or Publicly Perform the Work either by itself or as part of any Adaptations or Collections, You must not distort, mutilate, modify or take other derogatory action in relation to the Work which would be prejudicial to the Original Author's honor or reputation. Licensor agrees that in those jurisdictions (e.g. Japan), in which any exercise of the right granted in Section 3(b) of this License (the right to make Adaptations) would be deemed to be a distortion, mutilation, modification or other derogatory action prejudicial to the Original Author's honor and reputation, the Licensor will waive or not assert, as appropriate, this Section, to the fullest extent permitted by the applicable national law, to enable You to reasonably exercise Your right under Section 3(b) of this License (right to make Adaptations) but not otherwise.
46 | 5. Representations, Warranties and Disclaimer
47 | 
48 | UNLESS OTHERWISE MUTUALLY AGREED TO BY THE PARTIES IN WRITING AND TO THE FULLEST EXTENT PERMITTED BY APPLICABLE LAW, LICENSOR OFFERS THE WORK AS-IS AND MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE WORK, EXPRESS, IMPLIED, STATUTORY OR OTHERWISE, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, ACCURACY, OR THE PRESENCE OF ABSENCE OF ERRORS, WHETHER OR NOT DISCOVERABLE. SOME JURISDICTIONS DO NOT ALLOW THE EXCLUSION OF IMPLIED WARRANTIES, SO THIS EXCLUSION MAY NOT APPLY TO YOU.
49 | 
50 | 6. Limitation on Liability. EXCEPT TO THE EXTENT REQUIRED BY APPLICABLE LAW, IN NO EVENT WILL LICENSOR BE LIABLE TO YOU ON ANY LEGAL THEORY FOR ANY SPECIAL, INCIDENTAL, CONSEQUENTIAL, PUNITIVE OR EXEMPLARY DAMAGES ARISING OUT OF THIS LICENSE OR THE USE OF THE WORK, EVEN IF LICENSOR HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
51 | 
52 | 7. Termination
53 | 
54 | This License and the rights granted hereunder will terminate automatically upon any breach by You of the terms of this License. Individuals or entities who have received Adaptations or Collections from You under this License, however, will not have their licenses terminated provided such individuals or entities remain in full compliance with those licenses. Sections 1, 2, 5, 6, 7, and 8 will survive any termination of this License.
55 | Subject to the above terms and conditions, the license granted here is perpetual (for the duration of the applicable copyright in the Work). Notwithstanding the above, Licensor reserves the right to release the Work under different license terms or to stop distributing the Work at any time; provided, however that any such election will not serve to withdraw this License (or any other license that has been, or is required to be, granted under the terms of this License), and this License will continue in full force and effect unless terminated as stated above.
56 | 8. Miscellaneous
57 | 
58 | Each time You Distribute or Publicly Perform the Work or a Collection, the Licensor offers to the recipient a license to the Work on the same terms and conditions as the license granted to You under this License.
59 | Each time You Distribute or Publicly Perform an Adaptation, Licensor offers to the recipient a license to the original Work on the same terms and conditions as the license granted to You under this License.
60 | If any provision of this License is invalid or unenforceable under applicable law, it shall not affect the validity or enforceability of the remainder of the terms of this License, and without further action by the parties to this agreement, such provision shall be reformed to the minimum extent necessary to make such provision valid and enforceable.
61 | No term or provision of this License shall be deemed waived and no breach consented to unless such waiver or consent shall be in writing and signed by the party to be charged with such waiver or consent.
62 | This License constitutes the entire agreement between the parties with respect to the Work licensed here. There are no understandings, agreements or representations with respect to the Work not specified here. Licensor shall not be bound by any additional provisions that may appear in any communication from You. This License may not be modified without the mutual written agreement of the Licensor and You.
63 | The rights granted under, and the subject matter referenced, in this License were drafted utilizing the terminology of the Berne Convention for the Protection of Literary and Artistic Works (as amended on September 28, 1979), the Rome Convention of 1961, the WIPO Copyright Treaty of 1996, the WIPO Performances and Phonograms Treaty of 1996 and the Universal Copyright Convention (as revised on July 24, 1971). These rights and subject matter take effect in the relevant jurisdiction in which the License terms are sought to be enforced according to the corresponding provisions of the implementation of those treaty provisions in the applicable national law. If the standard suite of rights granted under applicable copyright law includes additional rights not granted under this License, such additional rights are deemed to be included in the License; this License is not intended to restrict the license of any rights under applicable law.
64 | Creative Commons Notice
65 | Creative Commons is not a party to this License, and makes no warranty whatsoever in connection with the Work. Creative Commons will not be liable to You or any party on any legal theory for any damages whatsoever, including without limitation any general, special, incidental or consequential damages arising in connection to this license. Notwithstanding the foregoing two (2) sentences, if Creative Commons has expressly identified itself as the Licensor hereunder, it shall have all rights and obligations of Licensor.
66 | 
67 | Except for the limited purpose of indicating to the public that the Work is licensed under the CCPL, Creative Commons does not authorize the use by either party of the trademark "Creative Commons" or any related trademark or logo of Creative Commons without the prior written consent of Creative Commons. Any permitted use will be in compliance with Creative Commons' then-current trademark usage guidelines, as may be published on its website or otherwise made available upon request from time to time. For the avoidance of doubt, this trademark restriction does not form part of this License.
68 | 
69 | Creative Commons may be contacted at https://creativecommons.org/.
70 | 


--------------------------------------------------------------------------------