├── Dockerfile
├── configs
    ├── config_quick.json
    └── config_refined.json
├── LICENSE
├── download.py
├── external
    ├── nms.py
    └── sort.py
├── train.py
├── utils.py
├── mask.py
├── .gitignore
├── render.py
├── test.py
├── detrk.py
└── README.md


/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM pytorch/pytorch:1.10.0-cuda11.3-cudnn8-runtime
2 | 
3 | RUN apt-get update && apt-get install -y build-essential git ffmpeg libsm6 libxext6 fonts-freefont-ttf
4 | RUN pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu113/torch1.10/index.html
5 | RUN pip install jupyter opencv-python scikit-image filterpy
6 | RUN cd /workspace && git clone https://github.com/facebookresearch/detectron2.git
7 | 


--------------------------------------------------------------------------------
/configs/config_quick.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"config": "COCO-Detection/retinanet_R_101_FPN_3x.yaml",
 3 |     "dataloader_num_workers": 4,
 4 |     "batch_size_per_image": 8,
 5 |     "anchor_generator_sizes": [[8, 16, 32, 64, 128]],
 6 |     "ims_per_batch": 1,
 7 |     "base_lr": 1e-4,
 8 |     "max_iter": 5000,
 9 | 	"weights": "output/model_final.pth",
10 | 	"num_classes": 1,
11 | 	"score_threshold": 0.45,
12 | 	"nms_threshold": 0.25,
13 | 	"detections_per_image": 2000
14 | }
15 | 


--------------------------------------------------------------------------------
/configs/config_refined.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"config": "COCO-Detection/retinanet_R_101_FPN_3x.yaml",
 3 |     "dataloader_num_workers": 4,
 4 |     "batch_size_per_image": 128,
 5 |     "anchor_generator_sizes": [[8, 16, 32, 64, 128, 256]],
 6 |     "ims_per_batch": 2,
 7 |     "base_lr": 1e-4,
 8 |     "max_iter": 100000,
 9 | 	"weights": "output/model_final.pth",
10 | 	"num_classes": 1,
11 | 	"score_threshold": 0.45,
12 | 	"nms_threshold": 0.25,
13 | 	"detections_per_image": 2000
14 | }
15 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 b3d-project
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | THIS SOFTWARE AND/OR DATA WAS DEPOSITED IN THE BAIR OPEN RESEARCH COMMONS 
24 | REPOSITORY ON OCTOBER 6th, 2022.
25 | 


--------------------------------------------------------------------------------
/download.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import gdown
 3 | import subprocess
 4 | 
 5 | 
 6 | def parse_args():
 7 |     parser = argparse.ArgumentParser(description='Dataset download script')
 8 |     parser.add_argument(
 9 |         '--skip_videos', action='store_true',
10 |         help='Skip downloading videos')
11 |     parser.add_argument(
12 |         '--skip_images', action='store_true',
13 |         help='Skip downloading annotated images')
14 |     parser.add_argument(
15 |         '--pull_model', action='store_true',
16 |         help='Download the model trained with config_refined.json')
17 |     return parser.parse_args()
18 | 
19 | 
20 | def main(args):
21 |     if not args.skip_videos:
22 |         print('Downloading videos...')
23 |         cmd = 'mkdir -p videos'
24 |         subprocess.run(cmd.split(' '))
25 |         gdown.download_folder(id='1UcVuWcqHdxq4D5O8M02o4zZKSvDRtEd6')
26 |     if not args.skip_images:
27 |         print('Downloading annotated images...')
28 |         gdown.download(id='1v2Go30iTtbNDnOcmoSPueF4Mp93P5Lbg')
29 |         cmd = 'unzip vision.zip'
30 |         subprocess.run(cmd.split(' '))
31 |         cmd = 'rm vision.zip'
32 |         subprocess.run(cmd.split(' '))
33 |     if args.pull_model:
34 |         print('Downloading model...')
35 |         gdown.download(id='17ZiwW_11q5oLldTCXuXjCpd8FQ7MjKaD')
36 |         cmd = 'mkdir -p output'
37 |         subprocess.run(cmd.split(' '))
38 |         cmd = 'mv model_final.pth output/'
39 |         subprocess.run(cmd.split(' '))
40 | 
41 | 
42 | if __name__ == '__main__':
43 |     main(parse_args())
44 | 


--------------------------------------------------------------------------------
/external/nms.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def nms(bounding_boxes, confidence_scores, threshold):
 5 |     # https://github.com/amusi/Non-Maximum-Suppression/blob/master/nms.py
 6 |     # If no bounding boxes, return empty list
 7 |     if len(bounding_boxes) == 0:
 8 |         return [], []
 9 | 
10 |     # Bounding boxes
11 |     boxes = np.array(bounding_boxes)
12 | 
13 |     # coordinates of bounding boxes
14 |     start_x = boxes[:, 0]
15 |     start_y = boxes[:, 1]
16 |     end_x = boxes[:, 2]
17 |     end_y = boxes[:, 3]
18 | 
19 |     # Confidence scores of bounding boxes
20 |     score = np.array(confidence_scores)
21 | 
22 |     # Picked bounding boxes
23 |     picked_boxes = []
24 |     picked_scores = []
25 | 
26 |     # Compute areas of bounding boxes
27 |     areas = (end_x - start_x + 1) * (end_y - start_y + 1)
28 | 
29 |     # Sort by confidence score of bounding boxes
30 |     order = np.argsort(score)
31 | 
32 |     # Iterate bounding boxes
33 |     while order.size > 0:
34 |         # The index of largest confidence score
35 |         index = order[-1]
36 | 
37 |         # Pick the bounding box with largest confidence score
38 |         picked_boxes.append(bounding_boxes[index])
39 |         picked_scores.append(confidence_scores[index])
40 | 
41 |         # Compute ordinates of intersection-over-union(IOU)
42 |         x1 = np.maximum(start_x[index], start_x[order[:-1]])
43 |         x2 = np.minimum(end_x[index], end_x[order[:-1]])
44 |         y1 = np.maximum(start_y[index], start_y[order[:-1]])
45 |         y2 = np.minimum(end_y[index], end_y[order[:-1]])
46 | 
47 |         # Compute areas of intersection-over-union
48 |         w = np.maximum(0.0, x2 - x1 + 1)
49 |         h = np.maximum(0.0, y2 - y1 + 1)
50 |         intersection = w * h
51 | 
52 |         # Compute the ratio between intersection and union
53 |         ratio = intersection / (areas[index] + areas[order[:-1]] - intersection)
54 | 
55 |         left = np.where(ratio < threshold)
56 |         order = order[left]
57 | 
58 |     return picked_boxes, picked_scores


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from detectron2 import model_zoo
 3 | from detectron2.config import get_cfg
 4 | from detectron2.data import MetadataCatalog
 5 | from detectron2.data.datasets import register_coco_instances
 6 | from detectron2.engine import DefaultTrainer
 7 | from detectron2.utils.logger import setup_logger
 8 | import json
 9 | import os
10 | import torch
11 | setup_logger()
12 | print(
13 |     'Torch version:', torch.__version__, 
14 |     'CUDA availability:', torch.cuda.is_available())
15 | 
16 | 
17 | def parse_args():
18 |     parser = argparse.ArgumentParser(description='Example train script')
19 |     parser.add_argument('-c', '--config', required=True,
20 |                         help='Detection model configuration')
21 |     return parser.parse_args()
22 | 
23 | 
24 | def main(args):
25 |     dataset_name = 'b3d_train'
26 |     annotation_path = 'vision/annotations/train.json'
27 |     image_path = 'vision/images/train'
28 |     register_coco_instances(dataset_name, {}, annotation_path, image_path)
29 |     MetadataCatalog.get(dataset_name).thing_classes = ['vehicle']
30 | 
31 |     with open(args.config) as fp:
32 |         config = json.load(fp)
33 |     cfg = get_cfg()
34 |     cfg.merge_from_file(model_zoo.get_config_file(config['config']))
35 |     cfg.DATASETS.TRAIN = ('b3d_train',)
36 |     cfg.DATASETS.TEST = ()
37 |     cfg.DATALOADER.NUM_WORKERS = config['dataloader_num_workers']
38 |     cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(config['config'])
39 |     cfg.MODEL.ROI_HEADS.NUM_CLASSES = config['num_classes']
40 |     cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = config['batch_size_per_image']
41 |     cfg.MODEL.ANCHOR_GENERATOR.SIZES = config['anchor_generator_sizes']
42 |     cfg.SOLVER.IMS_PER_BATCH = config['ims_per_batch']
43 |     cfg.SOLVER.BASE_LR = config['base_lr']
44 |     cfg.SOLVER.MAX_ITER = config['max_iter']
45 | 
46 |     os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
47 |     trainer = DefaultTrainer(cfg) 
48 |     trainer.resume_or_load(resume=False)
49 |     trainer.train()
50 | 
51 | 
52 | if __name__ == '__main__':
53 |     main(parse_args())
54 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
 1 | from matplotlib.path import Path
 2 | import numpy as np
 3 | 
 4 | 
 5 | def mask_frame(frame, mask):
 6 |     domain = mask.find('.//polygon[@label="domain"]').attrib['points']
 7 |     domain = domain.replace(';', ',')
 8 |     domain = np.array([
 9 |         float(pt) for pt in domain.split(',')]).reshape((-1, 2))
10 |     tl = (int(np.min(domain[:, 1])), int(np.min(domain[:, 0])))
11 |     br = (int(np.max(domain[:, 1])), int(np.max(domain[:, 0])))
12 |     domain_poly = Path(domain)
13 |     width, height = int(frame.shape[1]), int(frame.shape[0])
14 |     x, y = np.meshgrid(np.arange(width), np.arange(height))
15 |     x, y = x.flatten(), y.flatten()
16 |     pixel_points = np.vstack((x, y)).T
17 |     bitmap = domain_poly.contains_points(pixel_points)
18 |     bitmap = bitmap.reshape((height, width))
19 |     frame[bitmap == 0] = 0
20 |     frame_masked = frame[tl[0]:br[0], tl[1]:br[1], :]
21 |     return frame_masked
22 | 
23 | 
24 | def parse_outputs(outputs, offset):
25 |     instances = outputs['instances'].to('cpu')
26 |     bboxes = []
27 |     scores = []
28 |     classes = []
29 |     for bbox, score, pred_class in zip(
30 |             instances.pred_boxes, instances.scores, instances.pred_classes):
31 |         bbox[0] += offset[0]
32 |         bbox[1] += offset[1]
33 |         bbox[2] += offset[0]
34 |         bbox[3] += offset[1]
35 |         bboxes.append(bbox.numpy())
36 |         scores.append(score.numpy())
37 |         classes.append(pred_class.numpy())
38 |     return bboxes, scores, classes
39 | 
40 | 
41 | def regionize_image(image):
42 |     height, width, _ = image.shape
43 |     split_width = width
44 |     while(split_width / height > 4):
45 |         split_width = int(split_width / 2)
46 |     batch = []
47 |     covered_width = 0
48 |     while(covered_width < width):
49 |         stop_width = min(covered_width + split_width, width)
50 |         if (stop_width - covered_width < 0.75 * split_width):
51 |             break
52 |         batch.append(
53 |             [image[:, covered_width:stop_width, :], (covered_width, 0)])
54 |         covered_width = min(covered_width + int(split_width / 2), width)
55 |     return batch
56 | 


--------------------------------------------------------------------------------
/mask.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import cv2
 3 | import matplotlib.patches as patches
 4 | from matplotlib.path import Path
 5 | import matplotlib.pyplot as plt
 6 | import numpy as np
 7 | import os
 8 | from xml.etree import ElementTree
 9 | 
10 | 
11 | def parse_args():
12 |     parser = argparse.ArgumentParser(description='Example masking script')
13 |     parser.add_argument('-i', '--image', required=True,
14 |                         help='Sample image')
15 |     parser.add_argument('-m', '--mask', required=True,
16 |                         help='Specification of the mask')
17 |     return parser.parse_args()
18 | 
19 | 
20 | def visualize_masking(image, domain_poly):
21 |     image = image[:, :, ::-1]
22 |     fig = plt.figure(dpi=300, frameon=False)
23 |     ax = fig.add_subplot(1, 1, 1)
24 |     ax.set_axis_off()
25 |     ax.imshow(image)
26 |     patch = patches.PathPatch(
27 |         domain_poly, facecolor='r', alpha=0.5, edgecolor='none')
28 |     ax.add_patch(patch)
29 |     plt.savefig('output/mask_overlay.png', bbox_inches='tight', pad_inches=0)
30 | 
31 | 
32 | def main(args):
33 |     tree = ElementTree.parse(args.mask)
34 |     root = tree.getroot()
35 |     domain = root.find('.//polygon[@label="domain"]').attrib['points']
36 |     domain = domain.replace(';', ',')
37 |     domain = np.array([
38 |         float(pt) for pt in domain.split(',')]).reshape((-1, 2))
39 |     tl = (int(np.min(domain[:, 1])), int(np.min(domain[:, 0])))
40 |     br = (int(np.max(domain[:, 1])), int(np.max(domain[:, 0])))
41 |     domain_poly = Path(domain)
42 | 
43 |     image = cv2.imread(args.image)
44 | 
45 |     visualize_masking(image, domain_poly)
46 | 
47 |     width, height = int(image.shape[1]), int(image.shape[0])
48 |     x, y = np.meshgrid(np.arange(width), np.arange(height))
49 |     x, y = x.flatten(), y.flatten()
50 |     pixel_points = np.vstack((x, y)).T
51 |     bitmap = domain_poly.contains_points(pixel_points)
52 |     bitmap = bitmap.reshape((height, width))
53 |     image[bitmap == 0] = 0
54 |     image_masked = image[tl[0]:br[0], tl[1]:br[1], :]
55 |     os.makedirs('output', exist_ok=True)
56 |     cv2.imwrite('output/masked_image.png', image_masked)
57 | 
58 | 
59 | if __name__ == '__main__':
60 |     main(parse_args())
61 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | # Project specific folders
132 | .idea/
133 | .vscode/
134 | videos/
135 | vision.zip
136 | vision/
137 | output/
138 | shared/
139 | .DS_Store
140 | 


--------------------------------------------------------------------------------
/render.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import cv2
 3 | import json
 4 | from utils import mask_frame
 5 | from xml.etree import ElementTree
 6 | import os
 7 | 
 8 | 
 9 | def parse_args():
10 |     parser = argparse.ArgumentParser(description='Example rendering script')
11 |     parser.add_argument('-v', '--video', required=True,
12 |                         help='Input video')
13 |     parser.add_argument('-d', '--data', required=True,
14 |                         help='Data for rendering detected and tracking results')
15 |     parser.add_argument('-m', '--mask', required=True,
16 |                         help='Mask for the video')
17 |     return parser.parse_args()
18 | 
19 | 
20 | def main(args):
21 |     tree = ElementTree.parse(args.mask)
22 |     mask = tree.getroot()
23 |     cap = cv2.VideoCapture(os.path.expanduser(args.video))
24 |     with open(args.rendering) as fp:
25 |         rendering = json.load(fp)
26 |     frame_index = 0
27 |     frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
28 |     out = None
29 |     while cap.isOpened():
30 |         print('Parsing frame {:d} / {:d}...'.format(frame_index, frame_count))
31 |         success, frame = cap.read()
32 |         if not success:
33 |             break
34 |         masked_frame = mask_frame(frame, mask)
35 |         tracked_objects = rendering['{:d}'.format(frame_index)]
36 |         for tracked_object in tracked_objects:
37 |             object_index = int(tracked_object[0])
38 |             tl = (int(tracked_object[1]), int(tracked_object[2]))
39 |             br = (int(tracked_object[3]), int(tracked_object[4]))
40 |             cv2.rectangle(masked_frame, tl, br, (255, 0, 0), 2)
41 |             cv2.putText(
42 |                 masked_frame, '{:d}'.format(object_index), (br[0]+10, br[1]),
43 |                 cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
44 | 
45 |         display_width = int(masked_frame.shape[1] * 0.5)
46 |         display_height = int(masked_frame.shape[0] * 0.5)
47 |         resized_frame = cv2.resize(
48 |             masked_frame, (display_width, display_height))
49 |         if out is None:
50 |             scenario = args.video.replace('videos/', '').replace('.mp4', '')
51 |             out = cv2.VideoWriter(
52 |                 'output/{}.mp4'.format(scenario),
53 |                 cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), 30,
54 |                 (display_width,display_height))
55 |         out.write(resized_frame)
56 | 
57 |         # cv2.imshow('Frame', resized_frame)
58 |         frame_index = frame_index + 1
59 |         if cv2.waitKey(1) & 0xFF == ord('q'):
60 |             break
61 |     cap.release()
62 |     out.release()
63 |     cv2.destroyAllWindows()
64 | 
65 | 
66 | if __name__ == '__main__':
67 |     main(parse_args())
68 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import cv2
 3 | from detectron2 import model_zoo
 4 | from detectron2.config import get_cfg
 5 | from detectron2.data.datasets import register_coco_instances
 6 | from detectron2.data import MetadataCatalog
 7 | from detectron2.engine import DefaultPredictor
 8 | from external.nms import nms
 9 | import json
10 | import matplotlib.pyplot as plt
11 | import matplotlib.patches as patches
12 | import numpy as np
13 | import os
14 | from utils import parse_outputs, regionize_image
15 | 
16 | plt.rcParams['font.family'] = 'sans-serif'
17 | plt.rcParams['font.sans-serif'] = \
18 |     ['FreeSans'] + plt.rcParams['font.sans-serif']
19 | 
20 | 
21 | def parse_args():
22 |     parser = argparse.ArgumentParser(description='Example test script')
23 |     parser.add_argument('-i', '--image', required=True,
24 |                         help='Sample image')
25 |     parser.add_argument('-c', '--config', required=True,
26 |                         help='Detection model configuration')
27 |     return parser.parse_args()
28 | 
29 | 
30 | def visualize_outputs(image, bboxes, scores, save_path):
31 |     fig = plt.figure(dpi=400, frameon=False)
32 |     ax = fig.add_subplot(1, 1, 1)
33 |     ax.set_axis_off()
34 |     ax.imshow(image)
35 |     cmap = plt.cm.get_cmap('terrain', len(bboxes))
36 |     for index, (bbox, score) in enumerate(zip(bboxes, scores)):
37 |         origin = (bbox[0], bbox[1])
38 |         width = bbox[2] - bbox[0]
39 |         length = bbox[3] - bbox[1]
40 |         rect = patches.Rectangle(
41 |             origin, width, length,
42 |             linewidth=2, edgecolor=cmap(index),
43 |             facecolor='w', alpha=0.5)
44 |         ax.add_patch(rect)
45 |         ax.text(
46 |             bbox[0] + 2, bbox[3] - 5,
47 |             '{:.2f}'.format(score), color='k', fontsize=3.0)
48 |     plt.savefig(save_path, bbox_inches='tight', pad_inches=0)
49 | 
50 | 
51 | def main(args):
52 |     dataset_name = 'b3d_test'
53 |     annotations_path = 'vision/annotations/test.json'
54 |     images_path = 'vision/images/test'
55 |     register_coco_instances(dataset_name, {}, annotations_path, images_path)
56 |     MetadataCatalog.get(dataset_name).thing_classes = ['vehicle']
57 | 
58 |     with open(args.config) as fp:
59 |         config = json.load(fp)
60 |     cfg = get_cfg()
61 |     cfg.merge_from_file(model_zoo.get_config_file(config['config']))
62 |     cfg.MODEL.WEIGHTS = config['weights']
63 |     cfg.MODEL.ROI_HEADS.NUM_CLASSES = config['num_classes']
64 |     cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = config['score_threshold']
65 |     cfg.MODEL.RETINANET.SCORE_THRESH_TEST = config['score_threshold']
66 |     cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST = config['nms_threshold']
67 |     cfg.MODEL.RETINANET.NMS_THRESH_TEST = config['nms_threshold']
68 |     cfg.TEST.DETECTIONS_PER_IMAGE = config['detections_per_image']
69 |     cfg.MODEL.ANCHOR_GENERATOR.SIZES = config['anchor_generator_sizes']
70 | 
71 |     image_path = args.image
72 |     image = cv2.imread(image_path)
73 |     predictor = DefaultPredictor(cfg)
74 |     image_regions = regionize_image(image)
75 |     bboxes = []
76 |     scores = []
77 |     for _image, _offset in image_regions:
78 |         _outputs = predictor(_image)
79 |         _bboxes, _scores, _ = parse_outputs(_outputs, _offset)
80 |         bboxes += _bboxes
81 |         scores += _scores
82 |     nms_threshold = config['nms_threshold']
83 |     nms_bboxes, nms_scores = nms(bboxes, scores, nms_threshold)
84 | 
85 |     save_path = os.path.join(cfg.OUTPUT_DIR, 'out.jpg')
86 |     visualize_outputs(image, nms_bboxes, nms_scores, save_path)
87 | 
88 | 
89 | if __name__ == '__main__':
90 |     main(parse_args())
91 | 


--------------------------------------------------------------------------------
/detrk.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import cv2
  3 | from detectron2 import model_zoo
  4 | from detectron2.config import get_cfg
  5 | from detectron2.data.datasets import register_coco_instances
  6 | from detectron2.engine import DefaultPredictor
  7 | from external.nms import nms
  8 | from external.sort import Sort
  9 | import json
 10 | import numpy as np
 11 | import os
 12 | from utils import mask_frame, parse_outputs, regionize_image
 13 | from xml.etree import ElementTree
 14 | 
 15 | 
 16 | def parse_args():
 17 |     parser = argparse.ArgumentParser(
 18 |         description='Example detection and tracking script')
 19 |     parser.add_argument('-v', '--video', required=True,
 20 |                         help='Input video')
 21 |     parser.add_argument('-c', '--config', required=True,
 22 |                         help='Detection model configuration')
 23 |     parser.add_argument('-m', '--mask', required=True,
 24 |                         help='Mask for the video')
 25 |     return parser.parse_args()
 26 | 
 27 | 
 28 | def main(args):
 29 |     with open(args.config) as fp:
 30 |         config = json.load(fp)
 31 |     cfg = get_cfg()
 32 |     cfg.merge_from_file(model_zoo.get_config_file(config['config']))
 33 |     cfg.MODEL.WEIGHTS = config['weights']
 34 |     cfg.MODEL.ROI_HEADS.NUM_CLASSES = config['num_classes']
 35 |     cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = config['score_threshold']
 36 |     cfg.MODEL.RETINANET.SCORE_THRESH_TEST = config['score_threshold']
 37 |     cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST = config['nms_threshold']
 38 |     cfg.MODEL.RETINANET.NMS_THRESH_TEST = config['nms_threshold']
 39 |     cfg.TEST.DETECTIONS_PER_IMAGE = config['detections_per_image']
 40 |     cfg.MODEL.ANCHOR_GENERATOR.SIZES = config['anchor_generator_sizes']
 41 |     predictor = DefaultPredictor(cfg)
 42 |     tree = ElementTree.parse(args.mask)
 43 |     mask = tree.getroot()
 44 | 
 45 |     tracker = Sort(max_age=5)
 46 |     cap = cv2.VideoCapture(os.path.expanduser(args.video))
 47 |     trajectories = {}
 48 |     rendering = {}
 49 |     frame_index = 0
 50 |     frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
 51 |     while cap.isOpened():
 52 |         print('Parsing frame {:d} / {:d}...'.format(frame_index, frame_count))
 53 |         success, frame = cap.read()
 54 |         if not success:
 55 |             break
 56 |         frame_masked = mask_frame(frame, mask)
 57 | 
 58 |         image_regions = regionize_image(frame_masked)
 59 |         bboxes = []
 60 |         scores = []
 61 |         for _image, _offset in image_regions:
 62 |             _outputs = predictor(_image)
 63 |             _bboxes, _scores, _ = parse_outputs(_outputs, _offset)
 64 |             bboxes += _bboxes
 65 |             scores += _scores
 66 |         nms_threshold = config['nms_threshold']
 67 |         nms_bboxes, nms_scores = nms(bboxes, scores, nms_threshold)
 68 |         detections = np.zeros((len(nms_bboxes), 5))
 69 |         detections[:, 0:4] = nms_bboxes
 70 |         detections[:, 4] = nms_scores
 71 | 
 72 |         tracked_objects = tracker.update(detections)
 73 |         rendering[frame_index] = []
 74 |         for tracked_object in tracked_objects:
 75 |             tl = (int(tracked_object[0]), int(tracked_object[1]))
 76 |             br = (int(tracked_object[2]), int(tracked_object[3]))
 77 |             object_index = int(tracked_object[4])
 78 |             if object_index not in trajectories:
 79 |                 trajectories[object_index] = []
 80 |             trajectories[object_index].append([
 81 |                 frame_index, tl[0], tl[1], br[0], br[1]])
 82 |             rendering[frame_index].append([
 83 |                 object_index, tl[0], tl[1], br[0], br[1]])
 84 | 
 85 |         frame_index = frame_index + 1
 86 |         if cv2.waitKey(1) & 0xFF == ord('q'):
 87 |             break
 88 |     cap.release()
 89 |     cv2.destroyAllWindows()
 90 | 
 91 |     scenario = args.video.replace('videos/', '').replace('.mp4', '')
 92 |     with open('output/{}_t.json'.format(scenario), 'w') as fp:
 93 |         json.dump(trajectories, fp)
 94 |     with open('output/{}_r.json'.format(scenario), 'w') as fp:
 95 |         json.dump(rendering, fp)
 96 | 
 97 | 
 98 | if __name__ == '__main__':
 99 |     main(parse_args())
100 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Berkeley DeepDrive Drone Dataset
  2 | 
  3 | ## Introduction
  4 | 
  5 | The Berkeley DeepDrive Drone (B3D) Dataset allows researchers to study implicit driving etiquette in *understructured* road environments.
  6 | The dataset consists:
  7 | 1. A set of 20 aerial videos recording understructured driving,
  8 | 2. A collection of 16002 images and annotations to train vehicle detection models, and
  9 | 3. A few example scripts for illustrating typical usages.
 10 | 
 11 | To download the videos and annotated images, run
 12 | ```
 13 | pip install gdown
 14 | python download.py
 15 | ```
 16 | 
 17 | After downloading, the *full* structure of the dataset repository should be as follows:
 18 | ```
 19 | .
 20 | ├── configs
 21 | │   ├── config_quick.json
 22 | │   └── config_refined.json
 23 | ├── Dockerfile
 24 | ├── download.py
 25 | ├── LICENSE
 26 | ├── README.md
 27 | ├── test.py
 28 | ├── train.py
 29 | ├── videos
 30 | │   └── <20 mp4 files>
 31 | └── vision
 32 |     ├── annotations
 33 |     │   ├── test.json
 34 |     │   ├── train.json
 35 |     │   └── val.json
 36 |     └── images
 37 |         ├── test
 38 |         │   └── <1636 jpg files>
 39 |         ├── train
 40 |         │   └── <12700 jpg files>
 41 |         └── val
 42 |             └── <1666 jpg files>
 43 | ```
 44 | 
 45 | ## Getting Started
 46 | We recommend running the script in a Docker container.
 47 | Please follow the instructions [here](https://docs.docker.com/engine/install/) to install Docker and 
 48 | instructions [here](https://github.com/NVIDIA/nvidia-container-toolkit) to install NVIDIA Container Toolkit.
 49 | 
 50 | After installing Docker and NVIDIA Container Toolkit, build the required Docker image
 51 | ```
 52 | docker build -t detectron2:latest .
 53 | ```
 54 | 
 55 | ## Usage
 56 | To inspect and edit the annotations, please use the open source image annotation tool CVAT. 
 57 | Note that the training dataset might need to be split into several smaller datasets for it to be properly parsed by CVAT.
 58 | 
 59 | To train a vehicle detection model using the annotated images, one could use the Detectron2 library.
 60 | The example `train.py` script is provided to show how to use Detectron2 to train for a vehicle detection model.
 61 | 
 62 | To run the trainer script, open a docker container and run
 63 | ```
 64 | docker run --shm-size 16G -p 8899:8888 --rm --gpus all -it -v [path/to/b3d]:/data -w /data detectron2 bash
 65 | # Use config_refined.json for better accuracy
 66 | python train.py -c configs/config_quick.json
 67 | ```
 68 | The trained model will be saved to `output/model_final.pth`.
 69 | 
 70 | Alternatively, one can skip the training by downloading a pre-trained model as follows
 71 | ```
 72 | python download.py --skip_videos --skip_images --pull_model
 73 | ```
 74 | The trained model will be downloaded to `output/model_final.pth`.
 75 | Note that this model is trained with `config_refined.json`.
 76 | 
 77 | A test script `test.py` is provided to run the trained model on a sample image.
 78 | 
 79 | For instance, to use the test script on the image `vision/images/test/01_034_01.jpg`, run
 80 | ```
 81 | # Use config_refined.json if the model_final.pth is generated by it
 82 | python test.py -i vision/images/test/01_034_01.jpg -c configs/config_quick.json
 83 | ```
 84 | The result will be exported to `output/out.jpg`.
 85 | 
 86 | Lastly, we provide a masking script `mask.py` that crops an image according to a pre-defined polygonal mask.
 87 | The mask is expected to be made in CVAT through creating a polygon with the category name `domain`.
 88 | For example, please download the 
 89 | [example mask](https://drive.google.com/file/d/1JdOlkYjYV_lI79tDA79WhXseun61E6SM/view?usp=sharing) with the corresponding 
 90 | [example image](https://drive.google.com/file/d/1xOHCyKPunfHpzbr64n5oB8rNS6vz-fpM/view?usp=sharing).
 91 | Move those files into the `output/` directory and run
 92 | ```
 93 | python mask.py --image output/example_masking.png --mask output/example_masking.xml
 94 | ```
 95 | The masked image will be saved to `output/masked_image.png` and a visualization of the mask to `output/mask_overlay.png`.
 96 | To mask a video, simply apply the same masking to every frame of the video.
 97 | 
 98 | ## Citation
 99 | If you find this dataset useful, please consider cite the accompanying paper below:
100 | ```
101 | @article{wu2022b3d,
102 |   title={Decentralized Vehicle Coordination: The Berkeley DeepDrive Drone Dataset},
103 |   author={Fangyu Wu and Dequan Wang and Minjune Hwang and Chenhui Hao and Jiawei Lu and Jiamu Zhang and Christopher Chou and Trevor Darrell and Alexandre Byen},
104 |   journal={arXiv},
105 |   year={2022}
106 | }
107 | ```
108 | 


--------------------------------------------------------------------------------
/external/sort.py:
--------------------------------------------------------------------------------
  1 | """
  2 |     SORT: A Simple, Online and Realtime Tracker
  3 |     Copyright (C) 2016-2020 Alex Bewley alex@bewley.ai
  4 | 
  5 |     This program is free software: you can redistribute it and/or modify
  6 |     it under the terms of the GNU General Public License as published by
  7 |     the Free Software Foundation, either version 3 of the License, or
  8 |     (at your option) any later version.
  9 | 
 10 |     This program is distributed in the hope that it will be useful,
 11 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 |     GNU General Public License for more details.
 14 | 
 15 |     You should have received a copy of the GNU General Public License
 16 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
 17 | """
 18 | from __future__ import print_function
 19 | 
 20 | import os
 21 | import numpy as np
 22 | import matplotlib
 23 | matplotlib.use('TkAgg')
 24 | import matplotlib.pyplot as plt
 25 | import matplotlib.patches as patches
 26 | from skimage import io
 27 | 
 28 | import glob
 29 | import time
 30 | import argparse
 31 | from filterpy.kalman import KalmanFilter
 32 | 
 33 | np.random.seed(0)
 34 | 
 35 | 
 36 | def linear_assignment(cost_matrix):
 37 |   try:
 38 |     import lap
 39 |     _, x, y = lap.lapjv(cost_matrix, extend_cost=True)
 40 |     return np.array([[y[i],i] for i in x if i >= 0]) #
 41 |   except ImportError:
 42 |     from scipy.optimize import linear_sum_assignment
 43 |     x, y = linear_sum_assignment(cost_matrix)
 44 |     return np.array(list(zip(x, y)))
 45 | 
 46 | 
 47 | def iou_batch(bb_test, bb_gt):
 48 |   """
 49 |   From SORT: Computes IOU between two bboxes in the form [x1,y1,x2,y2]
 50 |   """
 51 |   bb_gt = np.expand_dims(bb_gt, 0)
 52 |   bb_test = np.expand_dims(bb_test, 1)
 53 |   
 54 |   xx1 = np.maximum(bb_test[..., 0], bb_gt[..., 0])
 55 |   yy1 = np.maximum(bb_test[..., 1], bb_gt[..., 1])
 56 |   xx2 = np.minimum(bb_test[..., 2], bb_gt[..., 2])
 57 |   yy2 = np.minimum(bb_test[..., 3], bb_gt[..., 3])
 58 |   w = np.maximum(0., xx2 - xx1)
 59 |   h = np.maximum(0., yy2 - yy1)
 60 |   wh = w * h
 61 |   o = wh / ((bb_test[..., 2] - bb_test[..., 0]) * (bb_test[..., 3] - bb_test[..., 1])                                      
 62 |     + (bb_gt[..., 2] - bb_gt[..., 0]) * (bb_gt[..., 3] - bb_gt[..., 1]) - wh)                                              
 63 |   return(o)  
 64 | 
 65 | 
 66 | def convert_bbox_to_z(bbox):
 67 |   """
 68 |   Takes a bounding box in the form [x1,y1,x2,y2] and returns z in the form
 69 |     [x,y,s,r] where x,y is the centre of the box and s is the scale/area and r is
 70 |     the aspect ratio
 71 |   """
 72 |   w = bbox[2] - bbox[0]
 73 |   h = bbox[3] - bbox[1]
 74 |   x = bbox[0] + w/2.
 75 |   y = bbox[1] + h/2.
 76 |   s = w * h    #scale is just area
 77 |   r = w / float(h)
 78 |   return np.array([x, y, s, r]).reshape((4, 1))
 79 | 
 80 | 
 81 | def convert_x_to_bbox(x,score=None):
 82 |   """
 83 |   Takes a bounding box in the centre form [x,y,s,r] and returns it in the form
 84 |     [x1,y1,x2,y2] where x1,y1 is the top left and x2,y2 is the bottom right
 85 |   """
 86 |   w = np.sqrt(x[2] * x[3])
 87 |   h = x[2] / w
 88 |   if(score==None):
 89 |     return np.array([x[0]-w/2.,x[1]-h/2.,x[0]+w/2.,x[1]+h/2.]).reshape((1,4))
 90 |   else:
 91 |     return np.array([x[0]-w/2.,x[1]-h/2.,x[0]+w/2.,x[1]+h/2.,score]).reshape((1,5))
 92 | 
 93 | 
 94 | class KalmanBoxTracker(object):
 95 |   """
 96 |   This class represents the internal state of individual tracked objects observed as bbox.
 97 |   """
 98 |   count = 0
 99 |   def __init__(self,bbox):
100 |     """
101 |     Initialises a tracker using initial bounding box.
102 |     """
103 |     #define constant velocity model
104 |     self.kf = KalmanFilter(dim_x=7, dim_z=4) 
105 |     self.kf.F = np.array([[1,0,0,0,1,0,0],[0,1,0,0,0,1,0],[0,0,1,0,0,0,1],[0,0,0,1,0,0,0],  [0,0,0,0,1,0,0],[0,0,0,0,0,1,0],[0,0,0,0,0,0,1]])
106 |     self.kf.H = np.array([[1,0,0,0,0,0,0],[0,1,0,0,0,0,0],[0,0,1,0,0,0,0],[0,0,0,1,0,0,0]])
107 | 
108 |     self.kf.R[2:,2:] *= 10.
109 |     self.kf.P[4:,4:] *= 1000. #give high uncertainty to the unobservable initial velocities
110 |     self.kf.P *= 10.
111 |     self.kf.Q[-1,-1] *= 0.01
112 |     self.kf.Q[4:,4:] *= 0.01
113 | 
114 |     self.kf.x[:4] = convert_bbox_to_z(bbox)
115 |     self.time_since_update = 0
116 |     self.id = KalmanBoxTracker.count
117 |     KalmanBoxTracker.count += 1
118 |     self.history = []
119 |     self.hits = 0
120 |     self.hit_streak = 0
121 |     self.age = 0
122 | 
123 |   def update(self,bbox):
124 |     """
125 |     Updates the state vector with observed bbox.
126 |     """
127 |     self.time_since_update = 0
128 |     self.history = []
129 |     self.hits += 1
130 |     self.hit_streak += 1
131 |     self.kf.update(convert_bbox_to_z(bbox))
132 | 
133 |   def predict(self):
134 |     """
135 |     Advances the state vector and returns the predicted bounding box estimate.
136 |     """
137 |     if((self.kf.x[6]+self.kf.x[2])<=0):
138 |       self.kf.x[6] *= 0.0
139 |     self.kf.predict()
140 |     self.age += 1
141 |     if(self.time_since_update>0):
142 |       self.hit_streak = 0
143 |     self.time_since_update += 1
144 |     self.history.append(convert_x_to_bbox(self.kf.x))
145 |     return self.history[-1]
146 | 
147 |   def get_state(self):
148 |     """
149 |     Returns the current bounding box estimate.
150 |     """
151 |     return convert_x_to_bbox(self.kf.x)
152 | 
153 | 
154 | def associate_detections_to_trackers(detections,trackers,iou_threshold = 0.3):
155 |   """
156 |   Assigns detections to tracked object (both represented as bounding boxes)
157 | 
158 |   Returns 3 lists of matches, unmatched_detections and unmatched_trackers
159 |   """
160 |   if(len(trackers)==0):
161 |     return np.empty((0,2),dtype=int), np.arange(len(detections)), np.empty((0,5),dtype=int)
162 | 
163 |   iou_matrix = iou_batch(detections, trackers)
164 | 
165 |   if min(iou_matrix.shape) > 0:
166 |     a = (iou_matrix > iou_threshold).astype(np.int32)
167 |     if a.sum(1).max() == 1 and a.sum(0).max() == 1:
168 |         matched_indices = np.stack(np.where(a), axis=1)
169 |     else:
170 |       matched_indices = linear_assignment(-iou_matrix)
171 |   else:
172 |     matched_indices = np.empty(shape=(0,2))
173 | 
174 |   unmatched_detections = []
175 |   for d, det in enumerate(detections):
176 |     if(d not in matched_indices[:,0]):
177 |       unmatched_detections.append(d)
178 |   unmatched_trackers = []
179 |   for t, trk in enumerate(trackers):
180 |     if(t not in matched_indices[:,1]):
181 |       unmatched_trackers.append(t)
182 | 
183 |   #filter out matched with low IOU
184 |   matches = []
185 |   for m in matched_indices:
186 |     if(iou_matrix[m[0], m[1]]<iou_threshold):
187 |       unmatched_detections.append(m[0])
188 |       unmatched_trackers.append(m[1])
189 |     else:
190 |       matches.append(m.reshape(1,2))
191 |   if(len(matches)==0):
192 |     matches = np.empty((0,2),dtype=int)
193 |   else:
194 |     matches = np.concatenate(matches,axis=0)
195 | 
196 |   return matches, np.array(unmatched_detections), np.array(unmatched_trackers)
197 | 
198 | 
199 | class Sort(object):
200 |   def __init__(self, max_age=1, min_hits=3, iou_threshold=0.3):
201 |     """
202 |     Sets key parameters for SORT
203 |     """
204 |     self.max_age = max_age
205 |     self.min_hits = min_hits
206 |     self.iou_threshold = iou_threshold
207 |     self.trackers = []
208 |     self.frame_count = 0
209 | 
210 |   def update(self, dets=np.empty((0, 5))):
211 |     """
212 |     Params:
213 |       dets - a numpy array of detections in the format [[x1,y1,x2,y2,score],[x1,y1,x2,y2,score],...]
214 |     Requires: this method must be called once for each frame even with empty detections (use np.empty((0, 5)) for frames without detections).
215 |     Returns the a similar array, where the last column is the object ID.
216 | 
217 |     NOTE: The number of objects returned may differ from the number of detections provided.
218 |     """
219 |     self.frame_count += 1
220 |     # get predicted locations from existing trackers.
221 |     trks = np.zeros((len(self.trackers), 5))
222 |     to_del = []
223 |     ret = []
224 |     for t, trk in enumerate(trks):
225 |       pos = self.trackers[t].predict()[0]
226 |       trk[:] = [pos[0], pos[1], pos[2], pos[3], 0]
227 |       if np.any(np.isnan(pos)):
228 |         to_del.append(t)
229 |     trks = np.ma.compress_rows(np.ma.masked_invalid(trks))
230 |     for t in reversed(to_del):
231 |       self.trackers.pop(t)
232 |     matched, unmatched_dets, unmatched_trks = associate_detections_to_trackers(dets,trks, self.iou_threshold)
233 | 
234 |     # update matched trackers with assigned detections
235 |     for m in matched:
236 |       self.trackers[m[1]].update(dets[m[0], :])
237 | 
238 |     # create and initialise new trackers for unmatched detections
239 |     for i in unmatched_dets:
240 |         trk = KalmanBoxTracker(dets[i,:])
241 |         self.trackers.append(trk)
242 |     i = len(self.trackers)
243 |     for trk in reversed(self.trackers):
244 |         d = trk.get_state()[0]
245 |         if (trk.time_since_update < 1) and (trk.hit_streak >= self.min_hits or self.frame_count <= self.min_hits):
246 |           ret.append(np.concatenate((d,[trk.id+1])).reshape(1,-1)) # +1 as MOT benchmark requires positive
247 |         i -= 1
248 |         # remove dead tracklet
249 |         if(trk.time_since_update > self.max_age):
250 |           self.trackers.pop(i)
251 |     if(len(ret)>0):
252 |       return np.concatenate(ret)
253 |     return np.empty((0,5))
254 | 
255 | def parse_args():
256 |     """Parse input arguments."""
257 |     parser = argparse.ArgumentParser(description='SORT demo')
258 |     parser.add_argument('--display', dest='display', help='Display online tracker output (slow) [False]',action='store_true')
259 |     parser.add_argument("--seq_path", help="Path to detections.", type=str, default='data')
260 |     parser.add_argument("--phase", help="Subdirectory in seq_path.", type=str, default='train')
261 |     parser.add_argument("--max_age", 
262 |                         help="Maximum number of frames to keep alive a track without associated detections.", 
263 |                         type=int, default=1)
264 |     parser.add_argument("--min_hits", 
265 |                         help="Minimum number of associated detections before track is initialised.", 
266 |                         type=int, default=3)
267 |     parser.add_argument("--iou_threshold", help="Minimum IOU for match.", type=float, default=0.3)
268 |     args = parser.parse_args()
269 |     return args
270 | 
271 | if __name__ == '__main__':
272 |   # all train
273 |   args = parse_args()
274 |   display = args.display
275 |   phase = args.phase
276 |   total_time = 0.0
277 |   total_frames = 0
278 |   colours = np.random.rand(32, 3) #used only for display
279 |   if(display):
280 |     if not os.path.exists('mot_benchmark'):
281 |       print('\n\tERROR: mot_benchmark link not found!\n\n    Create a symbolic link to the MOT benchmark\n    (https://motchallenge.net/data/2D_MOT_2015/#download). E.g.:\n\n    $ ln -s /path/to/MOT2015_challenge/2DMOT2015 mot_benchmark\n\n')
282 |       exit()
283 |     plt.ion()
284 |     fig = plt.figure()
285 |     ax1 = fig.add_subplot(111, aspect='equal')
286 | 
287 |   if not os.path.exists('output'):
288 |     os.makedirs('output')
289 |   pattern = os.path.join(args.seq_path, phase, '*', 'det', 'det.txt')
290 |   for seq_dets_fn in glob.glob(pattern):
291 |     mot_tracker = Sort(max_age=args.max_age, 
292 |                        min_hits=args.min_hits,
293 |                        iou_threshold=args.iou_threshold) #create instance of the SORT tracker
294 |     seq_dets = np.loadtxt(seq_dets_fn, delimiter=',')
295 |     seq = seq_dets_fn[pattern.find('*'):].split(os.path.sep)[0]
296 |     
297 |     with open(os.path.join('output', '%s.txt'%(seq)),'w') as out_file:
298 |       print("Processing %s."%(seq))
299 |       for frame in range(int(seq_dets[:,0].max())):
300 |         frame += 1 #detection and frame numbers begin at 1
301 |         dets = seq_dets[seq_dets[:, 0]==frame, 2:7]
302 |         dets[:, 2:4] += dets[:, 0:2] #convert to [x1,y1,w,h] to [x1,y1,x2,y2]
303 |         total_frames += 1
304 | 
305 |         if(display):
306 |           fn = os.path.join('mot_benchmark', phase, seq, 'img1', '%06d.jpg'%(frame))
307 |           im =io.imread(fn)
308 |           ax1.imshow(im)
309 |           plt.title(seq + ' Tracked Targets')
310 | 
311 |         start_time = time.time()
312 |         trackers = mot_tracker.update(dets)
313 |         cycle_time = time.time() - start_time
314 |         total_time += cycle_time
315 | 
316 |         for d in trackers:
317 |           print('%d,%d,%.2f,%.2f,%.2f,%.2f,1,-1,-1,-1'%(frame,d[4],d[0],d[1],d[2]-d[0],d[3]-d[1]),file=out_file)
318 |           if(display):
319 |             d = d.astype(np.int32)
320 |             ax1.add_patch(patches.Rectangle((d[0],d[1]),d[2]-d[0],d[3]-d[1],fill=False,lw=3,ec=colours[d[4]%32,:]))
321 | 
322 |         if(display):
323 |           fig.canvas.flush_events()
324 |           plt.draw()
325 |           ax1.cla()
326 | 
327 |   print("Total Tracking took: %.3f seconds for %d frames or %.1f FPS" % (total_time, total_frames, total_frames / total_time))
328 | 
329 |   if(display):
330 |     print("Note: to get real runtime results run without the option: --display")
331 | 


--------------------------------------------------------------------------------