├── Dockerfile
├── configs
├── config_quick.json
└── config_refined.json
├── LICENSE
├── download.py
├── external
├── nms.py
└── sort.py
├── train.py
├── utils.py
├── mask.py
├── .gitignore
├── render.py
├── test.py
├── detrk.py
└── README.md
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM pytorch/pytorch:1.10.0-cuda11.3-cudnn8-runtime
2 |
3 | RUN apt-get update && apt-get install -y build-essential git ffmpeg libsm6 libxext6 fonts-freefont-ttf
4 | RUN pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu113/torch1.10/index.html
5 | RUN pip install jupyter opencv-python scikit-image filterpy
6 | RUN cd /workspace && git clone https://github.com/facebookresearch/detectron2.git
7 |
--------------------------------------------------------------------------------
/configs/config_quick.json:
--------------------------------------------------------------------------------
1 | {
2 | "config": "COCO-Detection/retinanet_R_101_FPN_3x.yaml",
3 | "dataloader_num_workers": 4,
4 | "batch_size_per_image": 8,
5 | "anchor_generator_sizes": [[8, 16, 32, 64, 128]],
6 | "ims_per_batch": 1,
7 | "base_lr": 1e-4,
8 | "max_iter": 5000,
9 | "weights": "output/model_final.pth",
10 | "num_classes": 1,
11 | "score_threshold": 0.45,
12 | "nms_threshold": 0.25,
13 | "detections_per_image": 2000
14 | }
15 |
--------------------------------------------------------------------------------
/configs/config_refined.json:
--------------------------------------------------------------------------------
1 | {
2 | "config": "COCO-Detection/retinanet_R_101_FPN_3x.yaml",
3 | "dataloader_num_workers": 4,
4 | "batch_size_per_image": 128,
5 | "anchor_generator_sizes": [[8, 16, 32, 64, 128, 256]],
6 | "ims_per_batch": 2,
7 | "base_lr": 1e-4,
8 | "max_iter": 100000,
9 | "weights": "output/model_final.pth",
10 | "num_classes": 1,
11 | "score_threshold": 0.45,
12 | "nms_threshold": 0.25,
13 | "detections_per_image": 2000
14 | }
15 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2022 b3d-project
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
23 | THIS SOFTWARE AND/OR DATA WAS DEPOSITED IN THE BAIR OPEN RESEARCH COMMONS
24 | REPOSITORY ON OCTOBER 6th, 2022.
25 |
--------------------------------------------------------------------------------
/download.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import gdown
3 | import subprocess
4 |
5 |
6 | def parse_args():
7 | parser = argparse.ArgumentParser(description='Dataset download script')
8 | parser.add_argument(
9 | '--skip_videos', action='store_true',
10 | help='Skip downloading videos')
11 | parser.add_argument(
12 | '--skip_images', action='store_true',
13 | help='Skip downloading annotated images')
14 | parser.add_argument(
15 | '--pull_model', action='store_true',
16 | help='Download the model trained with config_refined.json')
17 | return parser.parse_args()
18 |
19 |
20 | def main(args):
21 | if not args.skip_videos:
22 | print('Downloading videos...')
23 | cmd = 'mkdir -p videos'
24 | subprocess.run(cmd.split(' '))
25 | gdown.download_folder(id='1UcVuWcqHdxq4D5O8M02o4zZKSvDRtEd6')
26 | if not args.skip_images:
27 | print('Downloading annotated images...')
28 | gdown.download(id='1v2Go30iTtbNDnOcmoSPueF4Mp93P5Lbg')
29 | cmd = 'unzip vision.zip'
30 | subprocess.run(cmd.split(' '))
31 | cmd = 'rm vision.zip'
32 | subprocess.run(cmd.split(' '))
33 | if args.pull_model:
34 | print('Downloading model...')
35 | gdown.download(id='17ZiwW_11q5oLldTCXuXjCpd8FQ7MjKaD')
36 | cmd = 'mkdir -p output'
37 | subprocess.run(cmd.split(' '))
38 | cmd = 'mv model_final.pth output/'
39 | subprocess.run(cmd.split(' '))
40 |
41 |
42 | if __name__ == '__main__':
43 | main(parse_args())
44 |
--------------------------------------------------------------------------------
/external/nms.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | def nms(bounding_boxes, confidence_scores, threshold):
5 | # https://github.com/amusi/Non-Maximum-Suppression/blob/master/nms.py
6 | # If no bounding boxes, return empty list
7 | if len(bounding_boxes) == 0:
8 | return [], []
9 |
10 | # Bounding boxes
11 | boxes = np.array(bounding_boxes)
12 |
13 | # coordinates of bounding boxes
14 | start_x = boxes[:, 0]
15 | start_y = boxes[:, 1]
16 | end_x = boxes[:, 2]
17 | end_y = boxes[:, 3]
18 |
19 | # Confidence scores of bounding boxes
20 | score = np.array(confidence_scores)
21 |
22 | # Picked bounding boxes
23 | picked_boxes = []
24 | picked_scores = []
25 |
26 | # Compute areas of bounding boxes
27 | areas = (end_x - start_x + 1) * (end_y - start_y + 1)
28 |
29 | # Sort by confidence score of bounding boxes
30 | order = np.argsort(score)
31 |
32 | # Iterate bounding boxes
33 | while order.size > 0:
34 | # The index of largest confidence score
35 | index = order[-1]
36 |
37 | # Pick the bounding box with largest confidence score
38 | picked_boxes.append(bounding_boxes[index])
39 | picked_scores.append(confidence_scores[index])
40 |
41 | # Compute ordinates of intersection-over-union(IOU)
42 | x1 = np.maximum(start_x[index], start_x[order[:-1]])
43 | x2 = np.minimum(end_x[index], end_x[order[:-1]])
44 | y1 = np.maximum(start_y[index], start_y[order[:-1]])
45 | y2 = np.minimum(end_y[index], end_y[order[:-1]])
46 |
47 | # Compute areas of intersection-over-union
48 | w = np.maximum(0.0, x2 - x1 + 1)
49 | h = np.maximum(0.0, y2 - y1 + 1)
50 | intersection = w * h
51 |
52 | # Compute the ratio between intersection and union
53 | ratio = intersection / (areas[index] + areas[order[:-1]] - intersection)
54 |
55 | left = np.where(ratio < threshold)
56 | order = order[left]
57 |
58 | return picked_boxes, picked_scores
--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | from detectron2 import model_zoo
3 | from detectron2.config import get_cfg
4 | from detectron2.data import MetadataCatalog
5 | from detectron2.data.datasets import register_coco_instances
6 | from detectron2.engine import DefaultTrainer
7 | from detectron2.utils.logger import setup_logger
8 | import json
9 | import os
10 | import torch
11 | setup_logger()
12 | print(
13 | 'Torch version:', torch.__version__,
14 | 'CUDA availability:', torch.cuda.is_available())
15 |
16 |
17 | def parse_args():
18 | parser = argparse.ArgumentParser(description='Example train script')
19 | parser.add_argument('-c', '--config', required=True,
20 | help='Detection model configuration')
21 | return parser.parse_args()
22 |
23 |
24 | def main(args):
25 | dataset_name = 'b3d_train'
26 | annotation_path = 'vision/annotations/train.json'
27 | image_path = 'vision/images/train'
28 | register_coco_instances(dataset_name, {}, annotation_path, image_path)
29 | MetadataCatalog.get(dataset_name).thing_classes = ['vehicle']
30 |
31 | with open(args.config) as fp:
32 | config = json.load(fp)
33 | cfg = get_cfg()
34 | cfg.merge_from_file(model_zoo.get_config_file(config['config']))
35 | cfg.DATASETS.TRAIN = ('b3d_train',)
36 | cfg.DATASETS.TEST = ()
37 | cfg.DATALOADER.NUM_WORKERS = config['dataloader_num_workers']
38 | cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(config['config'])
39 | cfg.MODEL.ROI_HEADS.NUM_CLASSES = config['num_classes']
40 | cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = config['batch_size_per_image']
41 | cfg.MODEL.ANCHOR_GENERATOR.SIZES = config['anchor_generator_sizes']
42 | cfg.SOLVER.IMS_PER_BATCH = config['ims_per_batch']
43 | cfg.SOLVER.BASE_LR = config['base_lr']
44 | cfg.SOLVER.MAX_ITER = config['max_iter']
45 |
46 | os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
47 | trainer = DefaultTrainer(cfg)
48 | trainer.resume_or_load(resume=False)
49 | trainer.train()
50 |
51 |
52 | if __name__ == '__main__':
53 | main(parse_args())
54 |
--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
1 | from matplotlib.path import Path
2 | import numpy as np
3 |
4 |
5 | def mask_frame(frame, mask):
6 | domain = mask.find('.//polygon[@label="domain"]').attrib['points']
7 | domain = domain.replace(';', ',')
8 | domain = np.array([
9 | float(pt) for pt in domain.split(',')]).reshape((-1, 2))
10 | tl = (int(np.min(domain[:, 1])), int(np.min(domain[:, 0])))
11 | br = (int(np.max(domain[:, 1])), int(np.max(domain[:, 0])))
12 | domain_poly = Path(domain)
13 | width, height = int(frame.shape[1]), int(frame.shape[0])
14 | x, y = np.meshgrid(np.arange(width), np.arange(height))
15 | x, y = x.flatten(), y.flatten()
16 | pixel_points = np.vstack((x, y)).T
17 | bitmap = domain_poly.contains_points(pixel_points)
18 | bitmap = bitmap.reshape((height, width))
19 | frame[bitmap == 0] = 0
20 | frame_masked = frame[tl[0]:br[0], tl[1]:br[1], :]
21 | return frame_masked
22 |
23 |
24 | def parse_outputs(outputs, offset):
25 | instances = outputs['instances'].to('cpu')
26 | bboxes = []
27 | scores = []
28 | classes = []
29 | for bbox, score, pred_class in zip(
30 | instances.pred_boxes, instances.scores, instances.pred_classes):
31 | bbox[0] += offset[0]
32 | bbox[1] += offset[1]
33 | bbox[2] += offset[0]
34 | bbox[3] += offset[1]
35 | bboxes.append(bbox.numpy())
36 | scores.append(score.numpy())
37 | classes.append(pred_class.numpy())
38 | return bboxes, scores, classes
39 |
40 |
41 | def regionize_image(image):
42 | height, width, _ = image.shape
43 | split_width = width
44 | while(split_width / height > 4):
45 | split_width = int(split_width / 2)
46 | batch = []
47 | covered_width = 0
48 | while(covered_width < width):
49 | stop_width = min(covered_width + split_width, width)
50 | if (stop_width - covered_width < 0.75 * split_width):
51 | break
52 | batch.append(
53 | [image[:, covered_width:stop_width, :], (covered_width, 0)])
54 | covered_width = min(covered_width + int(split_width / 2), width)
55 | return batch
56 |
--------------------------------------------------------------------------------
/mask.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import cv2
3 | import matplotlib.patches as patches
4 | from matplotlib.path import Path
5 | import matplotlib.pyplot as plt
6 | import numpy as np
7 | import os
8 | from xml.etree import ElementTree
9 |
10 |
11 | def parse_args():
12 | parser = argparse.ArgumentParser(description='Example masking script')
13 | parser.add_argument('-i', '--image', required=True,
14 | help='Sample image')
15 | parser.add_argument('-m', '--mask', required=True,
16 | help='Specification of the mask')
17 | return parser.parse_args()
18 |
19 |
20 | def visualize_masking(image, domain_poly):
21 | image = image[:, :, ::-1]
22 | fig = plt.figure(dpi=300, frameon=False)
23 | ax = fig.add_subplot(1, 1, 1)
24 | ax.set_axis_off()
25 | ax.imshow(image)
26 | patch = patches.PathPatch(
27 | domain_poly, facecolor='r', alpha=0.5, edgecolor='none')
28 | ax.add_patch(patch)
29 | plt.savefig('output/mask_overlay.png', bbox_inches='tight', pad_inches=0)
30 |
31 |
32 | def main(args):
33 | tree = ElementTree.parse(args.mask)
34 | root = tree.getroot()
35 | domain = root.find('.//polygon[@label="domain"]').attrib['points']
36 | domain = domain.replace(';', ',')
37 | domain = np.array([
38 | float(pt) for pt in domain.split(',')]).reshape((-1, 2))
39 | tl = (int(np.min(domain[:, 1])), int(np.min(domain[:, 0])))
40 | br = (int(np.max(domain[:, 1])), int(np.max(domain[:, 0])))
41 | domain_poly = Path(domain)
42 |
43 | image = cv2.imread(args.image)
44 |
45 | visualize_masking(image, domain_poly)
46 |
47 | width, height = int(image.shape[1]), int(image.shape[0])
48 | x, y = np.meshgrid(np.arange(width), np.arange(height))
49 | x, y = x.flatten(), y.flatten()
50 | pixel_points = np.vstack((x, y)).T
51 | bitmap = domain_poly.contains_points(pixel_points)
52 | bitmap = bitmap.reshape((height, width))
53 | image[bitmap == 0] = 0
54 | image_masked = image[tl[0]:br[0], tl[1]:br[1], :]
55 | os.makedirs('output', exist_ok=True)
56 | cv2.imwrite('output/masked_image.png', image_masked)
57 |
58 |
59 | if __name__ == '__main__':
60 | main(parse_args())
61 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # IPython
81 | profile_default/
82 | ipython_config.py
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # pipenv
88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
91 | # install all needed dependencies.
92 | #Pipfile.lock
93 |
94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95 | __pypackages__/
96 |
97 | # Celery stuff
98 | celerybeat-schedule
99 | celerybeat.pid
100 |
101 | # SageMath parsed files
102 | *.sage.py
103 |
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 |
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 |
117 | # Rope project settings
118 | .ropeproject
119 |
120 | # mkdocs documentation
121 | /site
122 |
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 |
128 | # Pyre type checker
129 | .pyre/
130 |
131 | # Project specific folders
132 | .idea/
133 | .vscode/
134 | videos/
135 | vision.zip
136 | vision/
137 | output/
138 | shared/
139 | .DS_Store
140 |
--------------------------------------------------------------------------------
/render.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import cv2
3 | import json
4 | from utils import mask_frame
5 | from xml.etree import ElementTree
6 | import os
7 |
8 |
9 | def parse_args():
10 | parser = argparse.ArgumentParser(description='Example rendering script')
11 | parser.add_argument('-v', '--video', required=True,
12 | help='Input video')
13 | parser.add_argument('-d', '--data', required=True,
14 | help='Data for rendering detected and tracking results')
15 | parser.add_argument('-m', '--mask', required=True,
16 | help='Mask for the video')
17 | return parser.parse_args()
18 |
19 |
20 | def main(args):
21 | tree = ElementTree.parse(args.mask)
22 | mask = tree.getroot()
23 | cap = cv2.VideoCapture(os.path.expanduser(args.video))
24 | with open(args.rendering) as fp:
25 | rendering = json.load(fp)
26 | frame_index = 0
27 | frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
28 | out = None
29 | while cap.isOpened():
30 | print('Parsing frame {:d} / {:d}...'.format(frame_index, frame_count))
31 | success, frame = cap.read()
32 | if not success:
33 | break
34 | masked_frame = mask_frame(frame, mask)
35 | tracked_objects = rendering['{:d}'.format(frame_index)]
36 | for tracked_object in tracked_objects:
37 | object_index = int(tracked_object[0])
38 | tl = (int(tracked_object[1]), int(tracked_object[2]))
39 | br = (int(tracked_object[3]), int(tracked_object[4]))
40 | cv2.rectangle(masked_frame, tl, br, (255, 0, 0), 2)
41 | cv2.putText(
42 | masked_frame, '{:d}'.format(object_index), (br[0]+10, br[1]),
43 | cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
44 |
45 | display_width = int(masked_frame.shape[1] * 0.5)
46 | display_height = int(masked_frame.shape[0] * 0.5)
47 | resized_frame = cv2.resize(
48 | masked_frame, (display_width, display_height))
49 | if out is None:
50 | scenario = args.video.replace('videos/', '').replace('.mp4', '')
51 | out = cv2.VideoWriter(
52 | 'output/{}.mp4'.format(scenario),
53 | cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), 30,
54 | (display_width,display_height))
55 | out.write(resized_frame)
56 |
57 | # cv2.imshow('Frame', resized_frame)
58 | frame_index = frame_index + 1
59 | if cv2.waitKey(1) & 0xFF == ord('q'):
60 | break
61 | cap.release()
62 | out.release()
63 | cv2.destroyAllWindows()
64 |
65 |
66 | if __name__ == '__main__':
67 | main(parse_args())
68 |
--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import cv2
3 | from detectron2 import model_zoo
4 | from detectron2.config import get_cfg
5 | from detectron2.data.datasets import register_coco_instances
6 | from detectron2.data import MetadataCatalog
7 | from detectron2.engine import DefaultPredictor
8 | from external.nms import nms
9 | import json
10 | import matplotlib.pyplot as plt
11 | import matplotlib.patches as patches
12 | import numpy as np
13 | import os
14 | from utils import parse_outputs, regionize_image
15 |
16 | plt.rcParams['font.family'] = 'sans-serif'
17 | plt.rcParams['font.sans-serif'] = \
18 | ['FreeSans'] + plt.rcParams['font.sans-serif']
19 |
20 |
21 | def parse_args():
22 | parser = argparse.ArgumentParser(description='Example test script')
23 | parser.add_argument('-i', '--image', required=True,
24 | help='Sample image')
25 | parser.add_argument('-c', '--config', required=True,
26 | help='Detection model configuration')
27 | return parser.parse_args()
28 |
29 |
30 | def visualize_outputs(image, bboxes, scores, save_path):
31 | fig = plt.figure(dpi=400, frameon=False)
32 | ax = fig.add_subplot(1, 1, 1)
33 | ax.set_axis_off()
34 | ax.imshow(image)
35 | cmap = plt.cm.get_cmap('terrain', len(bboxes))
36 | for index, (bbox, score) in enumerate(zip(bboxes, scores)):
37 | origin = (bbox[0], bbox[1])
38 | width = bbox[2] - bbox[0]
39 | length = bbox[3] - bbox[1]
40 | rect = patches.Rectangle(
41 | origin, width, length,
42 | linewidth=2, edgecolor=cmap(index),
43 | facecolor='w', alpha=0.5)
44 | ax.add_patch(rect)
45 | ax.text(
46 | bbox[0] + 2, bbox[3] - 5,
47 | '{:.2f}'.format(score), color='k', fontsize=3.0)
48 | plt.savefig(save_path, bbox_inches='tight', pad_inches=0)
49 |
50 |
51 | def main(args):
52 | dataset_name = 'b3d_test'
53 | annotations_path = 'vision/annotations/test.json'
54 | images_path = 'vision/images/test'
55 | register_coco_instances(dataset_name, {}, annotations_path, images_path)
56 | MetadataCatalog.get(dataset_name).thing_classes = ['vehicle']
57 |
58 | with open(args.config) as fp:
59 | config = json.load(fp)
60 | cfg = get_cfg()
61 | cfg.merge_from_file(model_zoo.get_config_file(config['config']))
62 | cfg.MODEL.WEIGHTS = config['weights']
63 | cfg.MODEL.ROI_HEADS.NUM_CLASSES = config['num_classes']
64 | cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = config['score_threshold']
65 | cfg.MODEL.RETINANET.SCORE_THRESH_TEST = config['score_threshold']
66 | cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST = config['nms_threshold']
67 | cfg.MODEL.RETINANET.NMS_THRESH_TEST = config['nms_threshold']
68 | cfg.TEST.DETECTIONS_PER_IMAGE = config['detections_per_image']
69 | cfg.MODEL.ANCHOR_GENERATOR.SIZES = config['anchor_generator_sizes']
70 |
71 | image_path = args.image
72 | image = cv2.imread(image_path)
73 | predictor = DefaultPredictor(cfg)
74 | image_regions = regionize_image(image)
75 | bboxes = []
76 | scores = []
77 | for _image, _offset in image_regions:
78 | _outputs = predictor(_image)
79 | _bboxes, _scores, _ = parse_outputs(_outputs, _offset)
80 | bboxes += _bboxes
81 | scores += _scores
82 | nms_threshold = config['nms_threshold']
83 | nms_bboxes, nms_scores = nms(bboxes, scores, nms_threshold)
84 |
85 | save_path = os.path.join(cfg.OUTPUT_DIR, 'out.jpg')
86 | visualize_outputs(image, nms_bboxes, nms_scores, save_path)
87 |
88 |
89 | if __name__ == '__main__':
90 | main(parse_args())
91 |
--------------------------------------------------------------------------------
/detrk.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import cv2
3 | from detectron2 import model_zoo
4 | from detectron2.config import get_cfg
5 | from detectron2.data.datasets import register_coco_instances
6 | from detectron2.engine import DefaultPredictor
7 | from external.nms import nms
8 | from external.sort import Sort
9 | import json
10 | import numpy as np
11 | import os
12 | from utils import mask_frame, parse_outputs, regionize_image
13 | from xml.etree import ElementTree
14 |
15 |
16 | def parse_args():
17 | parser = argparse.ArgumentParser(
18 | description='Example detection and tracking script')
19 | parser.add_argument('-v', '--video', required=True,
20 | help='Input video')
21 | parser.add_argument('-c', '--config', required=True,
22 | help='Detection model configuration')
23 | parser.add_argument('-m', '--mask', required=True,
24 | help='Mask for the video')
25 | return parser.parse_args()
26 |
27 |
28 | def main(args):
29 | with open(args.config) as fp:
30 | config = json.load(fp)
31 | cfg = get_cfg()
32 | cfg.merge_from_file(model_zoo.get_config_file(config['config']))
33 | cfg.MODEL.WEIGHTS = config['weights']
34 | cfg.MODEL.ROI_HEADS.NUM_CLASSES = config['num_classes']
35 | cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = config['score_threshold']
36 | cfg.MODEL.RETINANET.SCORE_THRESH_TEST = config['score_threshold']
37 | cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST = config['nms_threshold']
38 | cfg.MODEL.RETINANET.NMS_THRESH_TEST = config['nms_threshold']
39 | cfg.TEST.DETECTIONS_PER_IMAGE = config['detections_per_image']
40 | cfg.MODEL.ANCHOR_GENERATOR.SIZES = config['anchor_generator_sizes']
41 | predictor = DefaultPredictor(cfg)
42 | tree = ElementTree.parse(args.mask)
43 | mask = tree.getroot()
44 |
45 | tracker = Sort(max_age=5)
46 | cap = cv2.VideoCapture(os.path.expanduser(args.video))
47 | trajectories = {}
48 | rendering = {}
49 | frame_index = 0
50 | frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
51 | while cap.isOpened():
52 | print('Parsing frame {:d} / {:d}...'.format(frame_index, frame_count))
53 | success, frame = cap.read()
54 | if not success:
55 | break
56 | frame_masked = mask_frame(frame, mask)
57 |
58 | image_regions = regionize_image(frame_masked)
59 | bboxes = []
60 | scores = []
61 | for _image, _offset in image_regions:
62 | _outputs = predictor(_image)
63 | _bboxes, _scores, _ = parse_outputs(_outputs, _offset)
64 | bboxes += _bboxes
65 | scores += _scores
66 | nms_threshold = config['nms_threshold']
67 | nms_bboxes, nms_scores = nms(bboxes, scores, nms_threshold)
68 | detections = np.zeros((len(nms_bboxes), 5))
69 | detections[:, 0:4] = nms_bboxes
70 | detections[:, 4] = nms_scores
71 |
72 | tracked_objects = tracker.update(detections)
73 | rendering[frame_index] = []
74 | for tracked_object in tracked_objects:
75 | tl = (int(tracked_object[0]), int(tracked_object[1]))
76 | br = (int(tracked_object[2]), int(tracked_object[3]))
77 | object_index = int(tracked_object[4])
78 | if object_index not in trajectories:
79 | trajectories[object_index] = []
80 | trajectories[object_index].append([
81 | frame_index, tl[0], tl[1], br[0], br[1]])
82 | rendering[frame_index].append([
83 | object_index, tl[0], tl[1], br[0], br[1]])
84 |
85 | frame_index = frame_index + 1
86 | if cv2.waitKey(1) & 0xFF == ord('q'):
87 | break
88 | cap.release()
89 | cv2.destroyAllWindows()
90 |
91 | scenario = args.video.replace('videos/', '').replace('.mp4', '')
92 | with open('output/{}_t.json'.format(scenario), 'w') as fp:
93 | json.dump(trajectories, fp)
94 | with open('output/{}_r.json'.format(scenario), 'w') as fp:
95 | json.dump(rendering, fp)
96 |
97 |
98 | if __name__ == '__main__':
99 | main(parse_args())
100 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Berkeley DeepDrive Drone Dataset
2 |
3 | ## Introduction
4 |
5 | The Berkeley DeepDrive Drone (B3D) Dataset allows researchers to study implicit driving etiquette in *understructured* road environments.
6 | The dataset consists:
7 | 1. A set of 20 aerial videos recording understructured driving,
8 | 2. A collection of 16002 images and annotations to train vehicle detection models, and
9 | 3. A few example scripts for illustrating typical usages.
10 |
11 | To download the videos and annotated images, run
12 | ```
13 | pip install gdown
14 | python download.py
15 | ```
16 |
17 | After downloading, the *full* structure of the dataset repository should be as follows:
18 | ```
19 | .
20 | ├── configs
21 | │ ├── config_quick.json
22 | │ └── config_refined.json
23 | ├── Dockerfile
24 | ├── download.py
25 | ├── LICENSE
26 | ├── README.md
27 | ├── test.py
28 | ├── train.py
29 | ├── videos
30 | │ └── <20 mp4 files>
31 | └── vision
32 | ├── annotations
33 | │ ├── test.json
34 | │ ├── train.json
35 | │ └── val.json
36 | └── images
37 | ├── test
38 | │ └── <1636 jpg files>
39 | ├── train
40 | │ └── <12700 jpg files>
41 | └── val
42 | └── <1666 jpg files>
43 | ```
44 |
45 | ## Getting Started
46 | We recommend running the script in a Docker container.
47 | Please follow the instructions [here](https://docs.docker.com/engine/install/) to install Docker and
48 | instructions [here](https://github.com/NVIDIA/nvidia-container-toolkit) to install NVIDIA Container Toolkit.
49 |
50 | After installing Docker and NVIDIA Container Toolkit, build the required Docker image
51 | ```
52 | docker build -t detectron2:latest .
53 | ```
54 |
55 | ## Usage
56 | To inspect and edit the annotations, please use the open source image annotation tool CVAT.
57 | Note that the training dataset might need to be split into several smaller datasets for it to be properly parsed by CVAT.
58 |
59 | To train a vehicle detection model using the annotated images, one could use the Detectron2 library.
60 | The example `train.py` script is provided to show how to use Detectron2 to train for a vehicle detection model.
61 |
62 | To run the trainer script, open a docker container and run
63 | ```
64 | docker run --shm-size 16G -p 8899:8888 --rm --gpus all -it -v [path/to/b3d]:/data -w /data detectron2 bash
65 | # Use config_refined.json for better accuracy
66 | python train.py -c configs/config_quick.json
67 | ```
68 | The trained model will be saved to `output/model_final.pth`.
69 |
70 | Alternatively, one can skip the training by downloading a pre-trained model as follows
71 | ```
72 | python download.py --skip_videos --skip_images --pull_model
73 | ```
74 | The trained model will be downloaded to `output/model_final.pth`.
75 | Note that this model is trained with `config_refined.json`.
76 |
77 | A test script `test.py` is provided to run the trained model on a sample image.
78 |
79 | For instance, to use the test script on the image `vision/images/test/01_034_01.jpg`, run
80 | ```
81 | # Use config_refined.json if the model_final.pth is generated by it
82 | python test.py -i vision/images/test/01_034_01.jpg -c configs/config_quick.json
83 | ```
84 | The result will be exported to `output/out.jpg`.
85 |
86 | Lastly, we provide a masking script `mask.py` that crops an image according to a pre-defined polygonal mask.
87 | The mask is expected to be made in CVAT through creating a polygon with the category name `domain`.
88 | For example, please download the
89 | [example mask](https://drive.google.com/file/d/1JdOlkYjYV_lI79tDA79WhXseun61E6SM/view?usp=sharing) with the corresponding
90 | [example image](https://drive.google.com/file/d/1xOHCyKPunfHpzbr64n5oB8rNS6vz-fpM/view?usp=sharing).
91 | Move those files into the `output/` directory and run
92 | ```
93 | python mask.py --image output/example_masking.png --mask output/example_masking.xml
94 | ```
95 | The masked image will be saved to `output/masked_image.png` and a visualization of the mask to `output/mask_overlay.png`.
96 | To mask a video, simply apply the same masking to every frame of the video.
97 |
98 | ## Citation
99 | If you find this dataset useful, please consider cite the accompanying paper below:
100 | ```
101 | @article{wu2022b3d,
102 | title={Decentralized Vehicle Coordination: The Berkeley DeepDrive Drone Dataset},
103 | author={Fangyu Wu and Dequan Wang and Minjune Hwang and Chenhui Hao and Jiawei Lu and Jiamu Zhang and Christopher Chou and Trevor Darrell and Alexandre Byen},
104 | journal={arXiv},
105 | year={2022}
106 | }
107 | ```
108 |
--------------------------------------------------------------------------------
/external/sort.py:
--------------------------------------------------------------------------------
1 | """
2 | SORT: A Simple, Online and Realtime Tracker
3 | Copyright (C) 2016-2020 Alex Bewley alex@bewley.ai
4 |
5 | This program is free software: you can redistribute it and/or modify
6 | it under the terms of the GNU General Public License as published by
7 | the Free Software Foundation, either version 3 of the License, or
8 | (at your option) any later version.
9 |
10 | This program is distributed in the hope that it will be useful,
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 | GNU General Public License for more details.
14 |
15 | You should have received a copy of the GNU General Public License
16 | along with this program. If not, see .
17 | """
18 | from __future__ import print_function
19 |
20 | import os
21 | import numpy as np
22 | import matplotlib
23 | matplotlib.use('TkAgg')
24 | import matplotlib.pyplot as plt
25 | import matplotlib.patches as patches
26 | from skimage import io
27 |
28 | import glob
29 | import time
30 | import argparse
31 | from filterpy.kalman import KalmanFilter
32 |
33 | np.random.seed(0)
34 |
35 |
36 | def linear_assignment(cost_matrix):
37 | try:
38 | import lap
39 | _, x, y = lap.lapjv(cost_matrix, extend_cost=True)
40 | return np.array([[y[i],i] for i in x if i >= 0]) #
41 | except ImportError:
42 | from scipy.optimize import linear_sum_assignment
43 | x, y = linear_sum_assignment(cost_matrix)
44 | return np.array(list(zip(x, y)))
45 |
46 |
47 | def iou_batch(bb_test, bb_gt):
48 | """
49 | From SORT: Computes IOU between two bboxes in the form [x1,y1,x2,y2]
50 | """
51 | bb_gt = np.expand_dims(bb_gt, 0)
52 | bb_test = np.expand_dims(bb_test, 1)
53 |
54 | xx1 = np.maximum(bb_test[..., 0], bb_gt[..., 0])
55 | yy1 = np.maximum(bb_test[..., 1], bb_gt[..., 1])
56 | xx2 = np.minimum(bb_test[..., 2], bb_gt[..., 2])
57 | yy2 = np.minimum(bb_test[..., 3], bb_gt[..., 3])
58 | w = np.maximum(0., xx2 - xx1)
59 | h = np.maximum(0., yy2 - yy1)
60 | wh = w * h
61 | o = wh / ((bb_test[..., 2] - bb_test[..., 0]) * (bb_test[..., 3] - bb_test[..., 1])
62 | + (bb_gt[..., 2] - bb_gt[..., 0]) * (bb_gt[..., 3] - bb_gt[..., 1]) - wh)
63 | return(o)
64 |
65 |
66 | def convert_bbox_to_z(bbox):
67 | """
68 | Takes a bounding box in the form [x1,y1,x2,y2] and returns z in the form
69 | [x,y,s,r] where x,y is the centre of the box and s is the scale/area and r is
70 | the aspect ratio
71 | """
72 | w = bbox[2] - bbox[0]
73 | h = bbox[3] - bbox[1]
74 | x = bbox[0] + w/2.
75 | y = bbox[1] + h/2.
76 | s = w * h #scale is just area
77 | r = w / float(h)
78 | return np.array([x, y, s, r]).reshape((4, 1))
79 |
80 |
81 | def convert_x_to_bbox(x,score=None):
82 | """
83 | Takes a bounding box in the centre form [x,y,s,r] and returns it in the form
84 | [x1,y1,x2,y2] where x1,y1 is the top left and x2,y2 is the bottom right
85 | """
86 | w = np.sqrt(x[2] * x[3])
87 | h = x[2] / w
88 | if(score==None):
89 | return np.array([x[0]-w/2.,x[1]-h/2.,x[0]+w/2.,x[1]+h/2.]).reshape((1,4))
90 | else:
91 | return np.array([x[0]-w/2.,x[1]-h/2.,x[0]+w/2.,x[1]+h/2.,score]).reshape((1,5))
92 |
93 |
94 | class KalmanBoxTracker(object):
95 | """
96 | This class represents the internal state of individual tracked objects observed as bbox.
97 | """
98 | count = 0
99 | def __init__(self,bbox):
100 | """
101 | Initialises a tracker using initial bounding box.
102 | """
103 | #define constant velocity model
104 | self.kf = KalmanFilter(dim_x=7, dim_z=4)
105 | self.kf.F = np.array([[1,0,0,0,1,0,0],[0,1,0,0,0,1,0],[0,0,1,0,0,0,1],[0,0,0,1,0,0,0], [0,0,0,0,1,0,0],[0,0,0,0,0,1,0],[0,0,0,0,0,0,1]])
106 | self.kf.H = np.array([[1,0,0,0,0,0,0],[0,1,0,0,0,0,0],[0,0,1,0,0,0,0],[0,0,0,1,0,0,0]])
107 |
108 | self.kf.R[2:,2:] *= 10.
109 | self.kf.P[4:,4:] *= 1000. #give high uncertainty to the unobservable initial velocities
110 | self.kf.P *= 10.
111 | self.kf.Q[-1,-1] *= 0.01
112 | self.kf.Q[4:,4:] *= 0.01
113 |
114 | self.kf.x[:4] = convert_bbox_to_z(bbox)
115 | self.time_since_update = 0
116 | self.id = KalmanBoxTracker.count
117 | KalmanBoxTracker.count += 1
118 | self.history = []
119 | self.hits = 0
120 | self.hit_streak = 0
121 | self.age = 0
122 |
123 | def update(self,bbox):
124 | """
125 | Updates the state vector with observed bbox.
126 | """
127 | self.time_since_update = 0
128 | self.history = []
129 | self.hits += 1
130 | self.hit_streak += 1
131 | self.kf.update(convert_bbox_to_z(bbox))
132 |
133 | def predict(self):
134 | """
135 | Advances the state vector and returns the predicted bounding box estimate.
136 | """
137 | if((self.kf.x[6]+self.kf.x[2])<=0):
138 | self.kf.x[6] *= 0.0
139 | self.kf.predict()
140 | self.age += 1
141 | if(self.time_since_update>0):
142 | self.hit_streak = 0
143 | self.time_since_update += 1
144 | self.history.append(convert_x_to_bbox(self.kf.x))
145 | return self.history[-1]
146 |
147 | def get_state(self):
148 | """
149 | Returns the current bounding box estimate.
150 | """
151 | return convert_x_to_bbox(self.kf.x)
152 |
153 |
154 | def associate_detections_to_trackers(detections,trackers,iou_threshold = 0.3):
155 | """
156 | Assigns detections to tracked object (both represented as bounding boxes)
157 |
158 | Returns 3 lists of matches, unmatched_detections and unmatched_trackers
159 | """
160 | if(len(trackers)==0):
161 | return np.empty((0,2),dtype=int), np.arange(len(detections)), np.empty((0,5),dtype=int)
162 |
163 | iou_matrix = iou_batch(detections, trackers)
164 |
165 | if min(iou_matrix.shape) > 0:
166 | a = (iou_matrix > iou_threshold).astype(np.int32)
167 | if a.sum(1).max() == 1 and a.sum(0).max() == 1:
168 | matched_indices = np.stack(np.where(a), axis=1)
169 | else:
170 | matched_indices = linear_assignment(-iou_matrix)
171 | else:
172 | matched_indices = np.empty(shape=(0,2))
173 |
174 | unmatched_detections = []
175 | for d, det in enumerate(detections):
176 | if(d not in matched_indices[:,0]):
177 | unmatched_detections.append(d)
178 | unmatched_trackers = []
179 | for t, trk in enumerate(trackers):
180 | if(t not in matched_indices[:,1]):
181 | unmatched_trackers.append(t)
182 |
183 | #filter out matched with low IOU
184 | matches = []
185 | for m in matched_indices:
186 | if(iou_matrix[m[0], m[1]]= self.min_hits or self.frame_count <= self.min_hits):
246 | ret.append(np.concatenate((d,[trk.id+1])).reshape(1,-1)) # +1 as MOT benchmark requires positive
247 | i -= 1
248 | # remove dead tracklet
249 | if(trk.time_since_update > self.max_age):
250 | self.trackers.pop(i)
251 | if(len(ret)>0):
252 | return np.concatenate(ret)
253 | return np.empty((0,5))
254 |
255 | def parse_args():
256 | """Parse input arguments."""
257 | parser = argparse.ArgumentParser(description='SORT demo')
258 | parser.add_argument('--display', dest='display', help='Display online tracker output (slow) [False]',action='store_true')
259 | parser.add_argument("--seq_path", help="Path to detections.", type=str, default='data')
260 | parser.add_argument("--phase", help="Subdirectory in seq_path.", type=str, default='train')
261 | parser.add_argument("--max_age",
262 | help="Maximum number of frames to keep alive a track without associated detections.",
263 | type=int, default=1)
264 | parser.add_argument("--min_hits",
265 | help="Minimum number of associated detections before track is initialised.",
266 | type=int, default=3)
267 | parser.add_argument("--iou_threshold", help="Minimum IOU for match.", type=float, default=0.3)
268 | args = parser.parse_args()
269 | return args
270 |
271 | if __name__ == '__main__':
272 | # all train
273 | args = parse_args()
274 | display = args.display
275 | phase = args.phase
276 | total_time = 0.0
277 | total_frames = 0
278 | colours = np.random.rand(32, 3) #used only for display
279 | if(display):
280 | if not os.path.exists('mot_benchmark'):
281 | print('\n\tERROR: mot_benchmark link not found!\n\n Create a symbolic link to the MOT benchmark\n (https://motchallenge.net/data/2D_MOT_2015/#download). E.g.:\n\n $ ln -s /path/to/MOT2015_challenge/2DMOT2015 mot_benchmark\n\n')
282 | exit()
283 | plt.ion()
284 | fig = plt.figure()
285 | ax1 = fig.add_subplot(111, aspect='equal')
286 |
287 | if not os.path.exists('output'):
288 | os.makedirs('output')
289 | pattern = os.path.join(args.seq_path, phase, '*', 'det', 'det.txt')
290 | for seq_dets_fn in glob.glob(pattern):
291 | mot_tracker = Sort(max_age=args.max_age,
292 | min_hits=args.min_hits,
293 | iou_threshold=args.iou_threshold) #create instance of the SORT tracker
294 | seq_dets = np.loadtxt(seq_dets_fn, delimiter=',')
295 | seq = seq_dets_fn[pattern.find('*'):].split(os.path.sep)[0]
296 |
297 | with open(os.path.join('output', '%s.txt'%(seq)),'w') as out_file:
298 | print("Processing %s."%(seq))
299 | for frame in range(int(seq_dets[:,0].max())):
300 | frame += 1 #detection and frame numbers begin at 1
301 | dets = seq_dets[seq_dets[:, 0]==frame, 2:7]
302 | dets[:, 2:4] += dets[:, 0:2] #convert to [x1,y1,w,h] to [x1,y1,x2,y2]
303 | total_frames += 1
304 |
305 | if(display):
306 | fn = os.path.join('mot_benchmark', phase, seq, 'img1', '%06d.jpg'%(frame))
307 | im =io.imread(fn)
308 | ax1.imshow(im)
309 | plt.title(seq + ' Tracked Targets')
310 |
311 | start_time = time.time()
312 | trackers = mot_tracker.update(dets)
313 | cycle_time = time.time() - start_time
314 | total_time += cycle_time
315 |
316 | for d in trackers:
317 | print('%d,%d,%.2f,%.2f,%.2f,%.2f,1,-1,-1,-1'%(frame,d[4],d[0],d[1],d[2]-d[0],d[3]-d[1]),file=out_file)
318 | if(display):
319 | d = d.astype(np.int32)
320 | ax1.add_patch(patches.Rectangle((d[0],d[1]),d[2]-d[0],d[3]-d[1],fill=False,lw=3,ec=colours[d[4]%32,:]))
321 |
322 | if(display):
323 | fig.canvas.flush_events()
324 | plt.draw()
325 | ax1.cla()
326 |
327 | print("Total Tracking took: %.3f seconds for %d frames or %.1f FPS" % (total_time, total_frames, total_frames / total_time))
328 |
329 | if(display):
330 | print("Note: to get real runtime results run without the option: --display")
331 |
--------------------------------------------------------------------------------