├── utils ├── __init__.py ├── decoding.py ├── lr_scheduler.py └── box_utils.py ├── inference ├── __init__.py ├── trt_inference │ ├── __init__.py │ ├── cls.py │ ├── cerberus_trt.py │ └── trt_infer.py ├── profiler.py ├── postproc.py ├── run_tensorrt.py └── run.py ├── conf ├── __init__.py ├── experiments │ ├── resnet101_bifpn.json │ ├── resnet34_bifpn.json │ ├── resnet50_bifpn.json │ ├── mobilenetv2_bifpn.json │ ├── efficientnetb2_bifpn.json │ └── resnet34_simple.json └── conf.py ├── models ├── __init__.py ├── losses │ ├── __init__.py │ ├── multitask_loss.py │ ├── task_losses.py │ └── heatmap_loss.py ├── backbones │ ├── __init__.py │ ├── misc.py │ ├── mobilenetv2.py │ ├── resnet.py │ └── efficientnet.py ├── heads.py ├── layers.py ├── cerberus.py └── necks.py ├── docs ├── paper.pdf ├── architecutre_github.png └── deps.sh ├── dataset ├── utils │ ├── __init__.py │ ├── cls.py │ ├── transforms.py │ ├── heatmaps.py │ ├── process_detection.py │ └── process_lanes.py ├── __init__.py ├── collate.py └── multitask_dataset.py ├── requirements.txt ├── .gitignore ├── data ├── heatmap_utils.py ├── lane_heatmaps.py ├── tools.py └── bdd100k_lane_keypoints.py ├── main.py ├── trainer.py ├── evaluate.py └── Readme.md /utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /inference/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /inference/trt_inference/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /conf/__init__.py: -------------------------------------------------------------------------------- 1 | from conf.conf import Conf 2 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- 1 | from models.cerberus import CerberusModel 2 | -------------------------------------------------------------------------------- /docs/paper.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cscribano/CERBERUS/HEAD/docs/paper.pdf -------------------------------------------------------------------------------- /models/losses/__init__.py: -------------------------------------------------------------------------------- 1 | from models.losses.multitask_loss import MultiTaskLoss 2 | -------------------------------------------------------------------------------- /docs/architecutre_github.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cscribano/CERBERUS/HEAD/docs/architecutre_github.png -------------------------------------------------------------------------------- /dataset/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .process_lanes import LaneProcessor 2 | from .process_detection import DetProcessor 3 | -------------------------------------------------------------------------------- /models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | from models.backbones.resnet import * 2 | from models.backbones.efficientnet import * 3 | -------------------------------------------------------------------------------- /dataset/__init__.py: -------------------------------------------------------------------------------- 1 | from dataset.multitask_dataset import MultitaskDataset 2 | from dataset.collate import ignore_collate 3 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | python-dateutil>=2.7.3 2 | pytz==2021.1 3 | click==8.1.2 4 | pytorch_lightning==1.6.2 5 | opencv-contrib-python==4.1.2.30 6 | numpy==1.21.0 7 | albumentations==1.1.0 8 | onnx==1.11.0 9 | onnxsim==0.4.8.0 10 | scipy==1.7.2 11 | pycuda==2021.1 12 | cupy-cuda111==10.5.0 13 | numba==0.55.1 14 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ### Pycharm stuffs 2 | .idea/ 3 | *__pycache__/ 4 | 5 | ### Pytorch model weights 6 | *.pth 7 | *.pt 8 | *.onnx 9 | ./weights 10 | 11 | ### logs 12 | /log/* 13 | 14 | ### onnx 15 | /weights/* 16 | 17 | ## python 18 | .ipynb_checkpoints 19 | 20 | ### images 21 | *.jpg 22 | *.jpeg 23 | *.png 24 | *.tif 25 | *.tiff 26 | *.avi 27 | *.mp4 28 | 29 | # misc 30 | /experimental/* 31 | /scripts/* 32 | -------------------------------------------------------------------------------- /docs/deps.sh: -------------------------------------------------------------------------------- 1 | sudo apt-get install -y libhdf5-serial-dev hdf5-tools libcanberra-gtk-module 2 | sudo -H pip3 install Cython 3 | sudo pip3 -H install numpy==1.19 4 | 5 | # SciPy and Sklearn 6 | sudo apt-get install -y libatlas-base-dev gfortran 7 | sudo apt-get install -y libpcap-dev libpq-dev 8 | sudo -H pip3 install scikit-learn 9 | 10 | # Numba 11 | sudo apt-get install -y llvm-8 llvm-8-dev 12 | sudo -H LLVM_CONFIG=/usr/bin/llvm-config-8 pip3 install numba==0.48 13 | 14 | # CuPy 15 | echo "Installing CuPy, this may take a while..." 16 | sudo -H CUPY_NVCC_GENERATE_CODE="current" CUPY_NUM_BUILD_JOBS=$(nproc) pip3 install cupy==9.2 17 | 18 | # end 19 | echo " ====================================" 20 | echo " | REBOOT IS REQUIRED |" 21 | echo " ====================================" 22 | -------------------------------------------------------------------------------- /inference/profiler.py: -------------------------------------------------------------------------------- 1 | import time 2 | from collections import Counter 3 | 4 | 5 | class Profiler: 6 | __call_count = Counter() 7 | __time_elapsed = Counter() 8 | warmup = 0 9 | 10 | def __init__(self, name, aggregate=False): 11 | self.name = name 12 | if not aggregate and Profiler.warmup == 0: 13 | Profiler.__call_count[self.name] += 1 14 | 15 | def __enter__(self): 16 | self.start = time.perf_counter() 17 | return self 18 | 19 | def __exit__(self, type, value, traceback): 20 | self.end = time.perf_counter() 21 | self.duration = self.end - self.start 22 | if Profiler.warmup == 0: 23 | Profiler.__time_elapsed[self.name] += self.duration 24 | else: 25 | Profiler.warmup -= 1 26 | 27 | @classmethod 28 | def set_warmup(cls, warmup): 29 | cls.warmup = warmup 30 | 31 | @classmethod 32 | def reset(cls): 33 | cls.__call_count.clear() 34 | cls.__time_elapsed.clear() 35 | 36 | @classmethod 37 | def get_avg_millis(cls, name): 38 | call_count = cls.__call_count[name] 39 | if call_count == 0: 40 | return 0. 41 | return cls.__time_elapsed[name] * 1000 / call_count 42 | -------------------------------------------------------------------------------- /dataset/utils/cls.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # --------------------- 3 | 4 | LANE_CLS = { 5 | 'single yellow': 0, 6 | 'single white': 1, 7 | 'crosswalk': 2, 8 | 'double white': 3, 9 | 'double other': 4, 10 | 'road curb': 5, 11 | 'single other': 6, 12 | 'double yellow': 7 13 | } 14 | 15 | DET_CLS = { 16 | 'pedestrian' : 0, 17 | 'rider' : 1, 18 | 'car' : 2, 19 | 'truck' : 3, 20 | 'bus' : 4, 21 | 'train' : 5, 22 | 'motorcycle' : 6, 23 | 'bicycle' : 7, 24 | 'traffic light' : 8, 25 | 'traffic sign' : 9, 26 | 'other vehicle': 10, 27 | 'other person': 11, 28 | 'trailer': 12 29 | } 30 | 31 | OCL_VEHICLES = [2, 3, 4, 6] #<- not used! 32 | 33 | """ 34 | - weather: "rainy|snowy|clear|overcast|undefined|partly cloudy|foggy" 35 | - scene: "tunnel|residential|parking lot|undefined|city street|gas stations|highway|" 36 | - timeofday: "daytime|night|dawn/dusk|undefined" 37 | 38 | """ 39 | 40 | WTR_CLS = { 41 | "rainy": 0, 42 | "snowy": 1, 43 | "clear": 2, 44 | "overcast": 3, 45 | "partly cloudy": 4, 46 | "foggy": 5, 47 | "undefined": 6 48 | } 49 | 50 | SN_CLS = { 51 | "tunnel": 0, 52 | "residential": 1, 53 | "parking lot": 2, 54 | "city street": 3, 55 | "gas stations": 4, 56 | "highway": 5, 57 | "undefined": 6 58 | } 59 | 60 | TD_CLS = { 61 | "daytime": 0, 62 | "night": 1, 63 | "dawn/dusk": 2, 64 | "undefined": 3 65 | } 66 | -------------------------------------------------------------------------------- /inference/trt_inference/cls.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # --------------------- 3 | 4 | LANE_CLS = { 5 | 'single yellow': 0, 6 | 'single white': 1, 7 | 'crosswalk': 2, 8 | 'double white': 3, 9 | 'double other': 4, 10 | 'road curb': 5, 11 | 'single other': 6, 12 | 'double yellow': 7 13 | } 14 | 15 | DET_CLS = { 16 | 'pedestrian' : 0, 17 | 'rider' : 1, 18 | 'car' : 2, 19 | 'truck' : 3, 20 | 'bus' : 4, 21 | 'train' : 5, 22 | 'motorcycle' : 6, 23 | 'bicycle' : 7, 24 | 'traffic light' : 8, 25 | 'traffic sign' : 9, 26 | 'other vehicle': 10, 27 | 'other person': 11, 28 | 'trailer': 12 29 | } 30 | 31 | DET_CLS_IND = list(DET_CLS.keys()) 32 | OCL_VEHICLES = [2, 3, 4, 6] 33 | 34 | """ 35 | - weather: "rainy|snowy|clear|overcast|undefined|partly cloudy|foggy" 36 | - scene: "tunnel|residential|parking lot|undefined|city street|gas stations|highway|" 37 | - timeofday: "daytime|night|dawn/dusk|undefined" 38 | 39 | """ 40 | 41 | WTR_CLS = { 42 | "rainy": 0, 43 | "snowy": 1, 44 | "clear": 2, 45 | "overcast": 3, 46 | "partly cloudy": 4, 47 | "foggy": 5, 48 | "undefined": 6 49 | } 50 | 51 | SN_CLS = { 52 | "tunnel": 0, 53 | "residential": 1, 54 | "parking lot": 2, 55 | "city street": 3, 56 | "gas stations": 4, 57 | "highway": 5, 58 | "undefined": 6 59 | } 60 | 61 | TD_CLS = { 62 | "daytime": 0, 63 | "night": 1, 64 | "dawn/dusk": 2, 65 | "undefined": 3 66 | } 67 | -------------------------------------------------------------------------------- /utils/decoding.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # --------------------- 3 | 4 | from typing import List, Tuple 5 | import torch 6 | 7 | class PseudoNMS(torch.nn.Module): 8 | def __init__(self, nms_kernels): 9 | # type: (List[Tuple[int, int]]) -> None 10 | 11 | super().__init__() 12 | 13 | pooling = [] 14 | for k in nms_kernels: 15 | padding = ((k[0] - 1) // 2, (k[1] - 1) // 2) 16 | pool = torch.nn.MaxPool2d(kernel_size=k, stride=1, padding=padding) 17 | pooling.append(pool) 18 | 19 | self.pooling = torch.nn.ModuleList(pooling) 20 | 21 | def forward(self, heatmap): 22 | 23 | masks = [] 24 | for pool in self.pooling: 25 | nms_mask = pool(heatmap) 26 | nms_mask = (nms_mask == heatmap) 27 | masks.append(nms_mask) 28 | 29 | for mask in masks: 30 | heatmap = heatmap * mask 31 | 32 | return heatmap 33 | 34 | 35 | def kp_from_heatmap(heatmap, th, nms_kernel=3, pseudo_nms=True): 36 | 37 | # 1. pseudo-nms via max pool 38 | if pseudo_nms: 39 | padding = (nms_kernel - 1) // 2 40 | mask = torch.nn.functional.max_pool2d(heatmap, kernel_size=nms_kernel, stride=1, padding=padding) == heatmap 41 | heatmap = heatmap * mask 42 | 43 | # Get best candidate at each heatmap location, since box regression is shared 44 | heatmap, labels = torch.max(heatmap, dim=1) 45 | 46 | # Flatten and get values 47 | indices = torch.nonzero(heatmap.gt(th), as_tuple=False).flip(1) 48 | scores = heatmap[0, indices[:, 1], indices[:, 0]] 49 | labels = labels[0, indices[:, 1], indices[:, 0]] 50 | 51 | return scores, indices, labels 52 | -------------------------------------------------------------------------------- /models/losses/multitask_loss.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # --------------------- 3 | 4 | import torch 5 | from torch import nn 6 | from models.losses.task_losses import ObjectsLoss, LanesLoss, ClsLoss 7 | from models.losses.heatmap_loss import * 8 | 9 | from conf import Conf 10 | 11 | class MultiTaskLoss(nn.Module): 12 | 13 | def __init__(self, cnf): 14 | # type: (Conf) -> () 15 | 16 | super().__init__() 17 | self.cnf = cnf 18 | 19 | # Configuration 20 | self.lane_det = cnf.base.get("lane_det", True) 21 | self.obj_det = cnf.base.get("object_det", True) 22 | self.scene_cls = cnf.base.get("scene_cls", False) 23 | 24 | # Task specific losses 25 | self.lanes_loss = LanesLoss(cnf) 26 | self.objects_loss = ObjectsLoss(cnf) 27 | self.cls_loss = ClsLoss(cnf) 28 | 29 | def forward(self, preds, targets): 30 | # type: (dict[torch.tensor, ...], dict[torch.tensor, ...]) -> torch.tensor 31 | 32 | l_loss, d_loss, scn_loss = 0.0, 0.0, 0.0 33 | l_detail, d_detail, s_detail = {}, {}, {} 34 | 35 | #lane_pred, det_pred, scn_pred = preds 36 | 37 | # Lane estimation loss (only heatmaps) 38 | if self.lane_det: 39 | lane_true = targets["lane_det"] 40 | lane_pred = preds["lane_det"] 41 | l_loss, l_detail = self.lanes_loss(lane_pred, lane_true) 42 | 43 | # Object detection loss (only heatmaps) 44 | if self.obj_det: 45 | det_true = targets["obj_det"] 46 | det_pred = preds["obj_det"] 47 | d_loss, d_detail = self.objects_loss(det_pred, det_true) 48 | 49 | # Scene classification loss 50 | if self.scene_cls: 51 | scn_true = targets["scn_cls"] 52 | scn_pred = preds["scn_cls"] 53 | scn_loss, s_detail = self.cls_loss(scn_pred, scn_true) 54 | 55 | loss_detail = {k : v for d in (d_detail, l_detail, s_detail) for k,v in d.items()} 56 | 57 | return l_loss + d_loss + scn_loss, loss_detail 58 | -------------------------------------------------------------------------------- /data/heatmap_utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # --------------------- 3 | 4 | import torch 5 | 6 | def kps_to_heatmaps(annotation, w, h, sigma): 7 | 8 | heatmaps_list = [] 9 | 10 | for cls_keypoints in annotation: 11 | 12 | # generate heatmap from list of (x, y, z) coordinates 13 | # retrieve one (W,H) heatmap for each keypoint 14 | if len(cls_keypoints) != 0: 15 | # Normalize coordinates 16 | # cls_keypoints = torch.tensor(cls_keypoints) / torch.tensor([IMG_HEIGHT, IMG_WIDTH]) 17 | 18 | # Generate heatmap 19 | kern = make_gkern_2d(h, w, sigma) 20 | heatmaps = torch.stack([kern(x) for x in cls_keypoints], dim=0) 21 | else: 22 | heatmaps = torch.zeros(1, h, w) 23 | 24 | # Combine individual heatmaps in a single tensor 25 | heatmap = torch.max(heatmaps, dim=0)[0] 26 | heatmaps_list.append(heatmap) 27 | 28 | # Combine keypoints heatmaps in a single tensor 29 | total_heatmap = torch.stack(heatmaps_list, 0) 30 | 31 | return total_heatmap 32 | 33 | def make_gkern_2d(h, w, s, device='cpu'): 34 | def gk(head): 35 | return gkern_2d(h, w, head, s, device=device) 36 | 37 | return gk 38 | 39 | def gkern_2d(h, w, center, s, device='cuda'): 40 | # type: (int, int, Tuple[int, int], float, str) -> torch.Tensor 41 | """ 42 | :param h: heatmap image height 43 | :param w: heatmap image width 44 | :param center: Gaussian center (x,y,z) 45 | :param s: Gaussian sigma 46 | :param device: 'cuda' or 'cpu' -> device used do compute heatmaps 47 | :return: Torch tensor with shape (h, w, d) with A Gaussian centered in `center` 48 | """ 49 | 50 | x = torch.arange(0, w, 1).type('torch.FloatTensor').to(device) 51 | y = torch.arange(0, h, 1).type('torch.FloatTensor').to(device) 52 | 53 | y = y.unsqueeze(1) 54 | 55 | x0 = center[0] * w 56 | y0 = center[1] * h 57 | 58 | return torch.exp(-1 * ((x - x0) ** 2 + (y - y0) ** 2) / s ** 2) 59 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # --------------------- 3 | 4 | import torch 5 | import logging 6 | from conf import Conf 7 | 8 | import click 9 | import torch.backends.cudnn as cudnn 10 | 11 | from trainer import trainer_run 12 | 13 | torch.backends.cudnn.deterministic = True 14 | torch.backends.cudnn.benchmark = False 15 | 16 | @click.command() 17 | @click.option('--exp_name', type=str, default=None) 18 | @click.option('--conf_file_path', type=str, default=None) 19 | @click.option('--seed', type=int, default=None) 20 | def main(exp_name, conf_file_path, seed): 21 | # type: (str, str, int) -> None 22 | 23 | assert torch.backends.cudnn.enabled, "Running without cuDNN is discouraged" 24 | 25 | # if `exp_name` is None, 26 | # ask the user to enter it 27 | if exp_name is None: 28 | exp_name = input('>> experiment name: ') 29 | 30 | # if `exp_name` contains '!', 31 | # `log_each_step` becomes `False` 32 | log_each_step = True 33 | if '!' in exp_name: 34 | exp_name = exp_name.replace('!', '') 35 | log_each_step = False 36 | 37 | # if `exp_name` contains a '@' character, 38 | # the number following '@' is considered as 39 | # the desired random seed for the experiment 40 | split = exp_name.split('@') 41 | if len(split) == 2: 42 | seed = int(split[1]) 43 | exp_name = split[0] 44 | 45 | cnf = Conf(conf_file_path=conf_file_path, seed=seed, 46 | exp_name=exp_name, log_each_step=log_each_step) 47 | print(f'\n▶ Starting Experiment \'{exp_name}\' [seed: {cnf.seed}]') 48 | 49 | # Setup logging 50 | logging.basicConfig( 51 | format='[%(asctime)s] [p%(process)s] [%(pathname)s:%(lineno)d] [%(levelname)s] %(message)s', 52 | level=logging.INFO, 53 | ) 54 | 55 | cnf_attrs = vars(cnf) 56 | for k in cnf_attrs: 57 | s = f'{k} : {cnf_attrs[k]}' 58 | logging.info(s) 59 | 60 | # Run training 61 | trainer_run(cnf) 62 | 63 | if __name__ == '__main__': 64 | main() 65 | -------------------------------------------------------------------------------- /data/lane_heatmaps.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # --------------------- 3 | 4 | import cv2 5 | import click 6 | from pathlib import Path 7 | 8 | from math import ceil 9 | import numpy as np 10 | import torch 11 | 12 | from heatmap_utils import kps_to_heatmaps 13 | 14 | IMG_WIDTH = 1280 15 | IMG_HEIGHT = 720 16 | 17 | I_SCALE = 2 18 | O_SCALE = 4 19 | 20 | PPK = 25 # Number of pixels per keypoint 21 | 22 | CLS = { 23 | 'single yellow': 0, 24 | 'single white': 1, 25 | 'crosswalk': 2, 26 | 'double white': 3, 27 | 'double other': 4, 28 | 'road curb': 5, 29 | 'single other': 6, 30 | 'double yellow': 7 31 | } 32 | 33 | @click.command() 34 | @click.option('--img_root', '-i', type=click.Path(exists=True), default=None, required=False) 35 | def main(img_root): 36 | # type: (Path) -> None 37 | 38 | split = "val" 39 | 40 | # Load Images 41 | img_root = Path(img_root) / split 42 | images = {p.name: p for p in img_root.glob("*.jpg")} 43 | # Load annotation file 44 | annot_file = Path(f"{split}_{PPK}.pt") 45 | annotations = torch.load(annot_file) 46 | 47 | # List lane classes 48 | classes = set([l['category'] for a in annotations for l in a.get('labels', [])]) 49 | print(classes) 50 | 51 | w = ceil(IMG_WIDTH / O_SCALE) 52 | h = ceil(IMG_HEIGHT / O_SCALE) 53 | 54 | 55 | for lanes in annotations: 56 | lbc = [[] for _ in range(8)] 57 | 58 | labels = lanes.get("labels", []) 59 | for l in labels: 60 | cls_id = CLS[l['category']] 61 | lbc[cls_id] += l["keypoints"] 62 | 63 | # Load image 64 | image_file = img_root / images[lanes["name"]] 65 | frame = cv2.imread(str(image_file)) 66 | 67 | # Numpy 68 | lane_np = np.concatenate([np.array(l['keypoints']) for l in labels]).astype(np.int32) 69 | for c in lane_np: 70 | frame = cv2.circle(frame, (c[0], c[1]), 3, (0, 255, 0), thickness=3) 71 | 72 | # Generate heatmaps 73 | n = torch.tensor([IMG_WIDTH, IMG_HEIGHT]) 74 | lbc = [torch.tensor(l) / n if len(l) > 0 else torch.tensor(l) for l in lbc] 75 | heatmaps = kps_to_heatmaps(lbc, w, h, sigma=2) 76 | 77 | # Display 78 | hm_show, _ = torch.max(heatmaps, dim=0) 79 | hm_show = hm_show.numpy() * 255 80 | hm_show = hm_show.astype(np.uint8) 81 | hm_show = cv2.applyColorMap(hm_show, cv2.COLORMAP_JET) 82 | 83 | 84 | 85 | cv2.imshow("heatmap", hm_show) 86 | cv2.imshow("frame", frame) 87 | while cv2.waitKey(1) != ord('q'): 88 | pass 89 | 90 | 91 | if __name__ == '__main__': 92 | main() 93 | -------------------------------------------------------------------------------- /conf/experiments/resnet101_bifpn.json: -------------------------------------------------------------------------------- 1 | { 2 | "experiment" : { 3 | "epochs": 75, 4 | "device": "cuda", 5 | "ck_epoch_step": 1, 6 | "val_epoch_step": 1, 7 | "logdir": "/data/logs" 8 | }, 9 | 10 | "base": { 11 | "lane_det": true, 12 | "object_det": true, 13 | "scene_cls": true, 14 | "lane_classes": 8, 15 | "det_classes": 10, 16 | 17 | "scn_classes": { 18 | "weather": 7, 19 | "scene": 7, 20 | "timeofday": 4 21 | } 22 | }, 23 | 24 | "model": { 25 | "backbone": { 26 | "name": "resnet101", 27 | "args": { 28 | "pretrained": true 29 | } 30 | }, 31 | "neck": { 32 | "name": "BiFPNNeck", 33 | "args": {} 34 | }, 35 | "head_channel": 64, 36 | "bn_momentum": 0.1 37 | }, 38 | 39 | "optimizer": { 40 | "name": "Adam", 41 | "args": { 42 | "lr": 2.5e-4 43 | } 44 | }, 45 | 46 | "lr_scheduler": { 47 | "name": "WarmupMultiStepLR", 48 | "args": { 49 | "milestones": [100000, 200000], 50 | "gamma": 0.5, 51 | "warmup_iters": 3500 52 | } 53 | }, 54 | 55 | "loss": { 56 | "heatmap_loss": { 57 | "name": "WMSELoss", 58 | "args": {} 59 | }, 60 | "scn_loss": { 61 | "name": "nn.CrossEntropyLoss", 62 | "args": {} 63 | } 64 | }, 65 | 66 | 67 | "dataset" : { 68 | "images_root": "/data/BDD100K/bdd100k_images/bdd100k_images/images/100k", 69 | 70 | "input_w": 640, 71 | "input_h": 320, 72 | "output_stride": 4, 73 | 74 | "lane_det": { 75 | "data_root": "/data/BDD100K", 76 | "ppm": 25, 77 | "sigma": { 78 | "name": "CornerNetRadius", 79 | "args": {} 80 | } 81 | }, 82 | "obj_det": { 83 | "data_root": "/data/BDD100K/bdd100k_det/labels/det_20" 84 | }, 85 | 86 | "train_dataset": { 87 | "name": "MultitaskDataset", 88 | "args": { 89 | "mode": "train" 90 | }, 91 | "loader_args": { 92 | "shuffle": true, 93 | "batch_size": 32, 94 | "num_workers": 6 95 | } 96 | }, 97 | 98 | "val_dataset": { 99 | "name": "MultitaskDataset", 100 | "args": { 101 | "mode": "val" 102 | }, 103 | "loader_args": { 104 | "shuffle": false, 105 | "batch_size": 16, 106 | "num_workers": 4 107 | } 108 | } 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /conf/experiments/resnet34_bifpn.json: -------------------------------------------------------------------------------- 1 | { 2 | "experiment" : { 3 | "epochs": 75, 4 | "device": "cuda", 5 | "ck_epoch_step": 1, 6 | "val_epoch_step": 1, 7 | "logdir": "/data/logs" 8 | }, 9 | 10 | "base": { 11 | "lane_det": true, 12 | "object_det": true, 13 | "scene_cls": true, 14 | "lane_classes": 8, 15 | "det_classes": 10, 16 | 17 | "scn_classes": { 18 | "weather": 7, 19 | "scene": 7, 20 | "timeofday": 4 21 | } 22 | }, 23 | 24 | "model": { 25 | "backbone": { 26 | "name": "resnet34", 27 | "args": { 28 | "pretrained": true 29 | } 30 | }, 31 | "neck": { 32 | "name": "BiFPNNeck", 33 | "args": {} 34 | }, 35 | "head_channel": 64, 36 | "bn_momentum": 0.1 37 | }, 38 | 39 | "optimizer": { 40 | "name": "Adam", 41 | "args": { 42 | "lr": 2.5e-4 43 | } 44 | }, 45 | 46 | "lr_scheduler": { 47 | "name": "WarmupMultiStepLR", 48 | "args": { 49 | "milestones": [100000, 200000], 50 | "gamma": 0.5, 51 | "warmup_iters": 3500 52 | } 53 | }, 54 | 55 | "loss": { 56 | "heatmap_loss": { 57 | "name": "WMSELoss", 58 | "args": {} 59 | }, 60 | "scn_loss": { 61 | "name": "nn.CrossEntropyLoss", 62 | "args": {} 63 | } 64 | }, 65 | 66 | 67 | "dataset" : { 68 | "images_root": "/data/BDD100K/bdd100k_images/bdd100k_images/images/100k", 69 | 70 | "input_w": 640, 71 | "input_h": 320, 72 | "output_stride": 4, 73 | 74 | "lane_det": { 75 | "data_root": "/data/BDD100K", 76 | "ppm": 25, 77 | "sigma": { 78 | "name": "CornerNetRadius", 79 | "args": {} 80 | } 81 | }, 82 | "obj_det": { 83 | "data_root": "/data/BDD100K/bdd100k_det/labels/det_20" 84 | }, 85 | 86 | "train_dataset": { 87 | "name": "MultitaskDataset", 88 | "args": { 89 | "mode": "train" 90 | }, 91 | "loader_args": { 92 | "shuffle": true, 93 | "batch_size": 32, 94 | "num_workers": 6 95 | } 96 | }, 97 | 98 | "val_dataset": { 99 | "name": "MultitaskDataset", 100 | "args": { 101 | "mode": "val" 102 | }, 103 | "loader_args": { 104 | "shuffle": false, 105 | "batch_size": 16, 106 | "num_workers": 4 107 | } 108 | } 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /conf/experiments/resnet50_bifpn.json: -------------------------------------------------------------------------------- 1 | { 2 | "experiment" : { 3 | "epochs": 75, 4 | "device": "cuda", 5 | "ck_epoch_step": 1, 6 | "val_epoch_step": 1, 7 | "logdir": "/data/logs" 8 | }, 9 | 10 | "base": { 11 | "lane_det": true, 12 | "object_det": true, 13 | "scene_cls": true, 14 | "lane_classes": 8, 15 | "det_classes": 10, 16 | 17 | "scn_classes": { 18 | "weather": 7, 19 | "scene": 7, 20 | "timeofday": 4 21 | } 22 | }, 23 | 24 | "model": { 25 | "backbone": { 26 | "name": "resnet50", 27 | "args": { 28 | "pretrained": true 29 | } 30 | }, 31 | "neck": { 32 | "name": "BiFPNNeck", 33 | "args": {} 34 | }, 35 | "head_channel": 64, 36 | "bn_momentum": 0.1 37 | }, 38 | 39 | "optimizer": { 40 | "name": "Adam", 41 | "args": { 42 | "lr": 2.5e-4 43 | } 44 | }, 45 | 46 | "lr_scheduler": { 47 | "name": "WarmupMultiStepLR", 48 | "args": { 49 | "milestones": [100000, 200000], 50 | "gamma": 0.5, 51 | "warmup_iters": 3500 52 | } 53 | }, 54 | 55 | "loss": { 56 | "heatmap_loss": { 57 | "name": "WMSELoss", 58 | "args": {} 59 | }, 60 | "scn_loss": { 61 | "name": "nn.CrossEntropyLoss", 62 | "args": {} 63 | } 64 | }, 65 | 66 | 67 | "dataset" : { 68 | "images_root": "/data/BDD100K/bdd100k_images/bdd100k_images/images/100k", 69 | 70 | "input_w": 640, 71 | "input_h": 320, 72 | "output_stride": 4, 73 | 74 | "lane_det": { 75 | "data_root": "/data/BDD100K", 76 | "ppm": 25, 77 | "sigma": { 78 | "name": "CornerNetRadius", 79 | "args": {} 80 | } 81 | }, 82 | "obj_det": { 83 | "data_root": "/data/BDD100K/bdd100k_det/labels/det_20" 84 | }, 85 | 86 | "train_dataset": { 87 | "name": "MultitaskDataset", 88 | "args": { 89 | "mode": "train" 90 | }, 91 | "loader_args": { 92 | "shuffle": true, 93 | "batch_size": 32, 94 | "num_workers": 6 95 | } 96 | }, 97 | 98 | "val_dataset": { 99 | "name": "MultitaskDataset", 100 | "args": { 101 | "mode": "val" 102 | }, 103 | "loader_args": { 104 | "shuffle": false, 105 | "batch_size": 16, 106 | "num_workers": 4 107 | } 108 | } 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /conf/experiments/mobilenetv2_bifpn.json: -------------------------------------------------------------------------------- 1 | { 2 | "experiment" : { 3 | "epochs": 75, 4 | "device": "cuda", 5 | "ck_epoch_step": 1, 6 | "val_epoch_step": 1, 7 | "logdir": "/data/logs" 8 | }, 9 | 10 | "base": { 11 | "lane_det": true, 12 | "object_det": true, 13 | "scene_cls": true, 14 | "lane_classes": 8, 15 | "det_classes": 10, 16 | 17 | "scn_classes": { 18 | "weather": 7, 19 | "scene": 7, 20 | "timeofday": 4 21 | } 22 | }, 23 | 24 | "model": { 25 | "backbone": { 26 | "name": "mobilenet_v2", 27 | "args": { 28 | "pretrained": true 29 | } 30 | }, 31 | "neck": { 32 | "name": "BiFPNNeck", 33 | "args": {} 34 | }, 35 | "head_channel": 64, 36 | "bn_momentum": 0.1 37 | }, 38 | 39 | "optimizer": { 40 | "name": "Adam", 41 | "args": { 42 | "lr": 2.5e-4 43 | } 44 | }, 45 | 46 | "lr_scheduler": { 47 | "name": "WarmupMultiStepLR", 48 | "args": { 49 | "milestones": [100000, 200000], 50 | "gamma": 0.5, 51 | "warmup_iters": 3500 52 | } 53 | }, 54 | 55 | "loss": { 56 | "heatmap_loss": { 57 | "name": "WMSELoss", 58 | "args": {} 59 | }, 60 | "scn_loss": { 61 | "name": "nn.CrossEntropyLoss", 62 | "args": {} 63 | } 64 | }, 65 | 66 | 67 | "dataset" : { 68 | "images_root": "/data/BDD100K/bdd100k_images/bdd100k_images/images/100k", 69 | 70 | "input_w": 640, 71 | "input_h": 320, 72 | "output_stride": 4, 73 | 74 | "lane_det": { 75 | "data_root": "/data/BDD100K", 76 | "ppm": 25, 77 | "sigma": { 78 | "name": "CornerNetRadius", 79 | "args": {} 80 | } 81 | }, 82 | "obj_det": { 83 | "data_root": "/data/BDD100K/bdd100k_det/labels/det_20" 84 | }, 85 | 86 | "train_dataset": { 87 | "name": "MultitaskDataset", 88 | "args": { 89 | "mode": "train" 90 | }, 91 | "loader_args": { 92 | "shuffle": true, 93 | "batch_size": 32, 94 | "num_workers": 6 95 | } 96 | }, 97 | 98 | "val_dataset": { 99 | "name": "MultitaskDataset", 100 | "args": { 101 | "mode": "val" 102 | }, 103 | "loader_args": { 104 | "shuffle": false, 105 | "batch_size": 16, 106 | "num_workers": 4 107 | } 108 | } 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /conf/experiments/efficientnetb2_bifpn.json: -------------------------------------------------------------------------------- 1 | { 2 | "experiment" : { 3 | "epochs": 75, 4 | "device": "cuda", 5 | "ck_epoch_step": 1, 6 | "val_epoch_step": 1, 7 | "logdir": "/data/logs" 8 | }, 9 | 10 | "base": { 11 | "lane_det": true, 12 | "object_det": true, 13 | "scene_cls": true, 14 | "lane_classes": 8, 15 | "det_classes": 10, 16 | 17 | "scn_classes": { 18 | "weather": 7, 19 | "scene": 7, 20 | "timeofday": 4 21 | } 22 | }, 23 | 24 | "model": { 25 | "backbone": { 26 | "name": "efficientnet_b2", 27 | "args": { 28 | "pretrained": true 29 | } 30 | }, 31 | "neck": { 32 | "name": "BiFPNNeck", 33 | "args": {} 34 | }, 35 | "head_channel": 64, 36 | "bn_momentum": 0.1 37 | }, 38 | 39 | "optimizer": { 40 | "name": "Adam", 41 | "args": { 42 | "lr": 2.5e-4 43 | } 44 | }, 45 | 46 | "lr_scheduler": { 47 | "name": "WarmupMultiStepLR", 48 | "args": { 49 | "milestones": [100000, 200000], 50 | "gamma": 0.5, 51 | "warmup_iters": 3500 52 | } 53 | }, 54 | 55 | "loss": { 56 | "heatmap_loss": { 57 | "name": "WMSELoss", 58 | "args": {} 59 | }, 60 | "scn_loss": { 61 | "name": "nn.CrossEntropyLoss", 62 | "args": {} 63 | } 64 | }, 65 | 66 | 67 | "dataset" : { 68 | "images_root": "/data/BDD100K/bdd100k_images/bdd100k_images/images/100k", 69 | 70 | "input_w": 640, 71 | "input_h": 320, 72 | "output_stride": 4, 73 | 74 | "lane_det": { 75 | "data_root": "/data/BDD100K", 76 | "ppm": 25, 77 | "sigma": { 78 | "name": "CornerNetRadius", 79 | "args": {} 80 | } 81 | }, 82 | "obj_det": { 83 | "data_root": "/data/BDD100K/bdd100k_det/labels/det_20" 84 | }, 85 | 86 | "train_dataset": { 87 | "name": "MultitaskDataset", 88 | "args": { 89 | "mode": "train" 90 | }, 91 | "loader_args": { 92 | "shuffle": true, 93 | "batch_size": 32, 94 | "num_workers": 6 95 | } 96 | }, 97 | 98 | "val_dataset": { 99 | "name": "MultitaskDataset", 100 | "args": { 101 | "mode": "val" 102 | }, 103 | "loader_args": { 104 | "shuffle": false, 105 | "batch_size": 16, 106 | "num_workers": 4 107 | } 108 | } 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /conf/experiments/resnet34_simple.json: -------------------------------------------------------------------------------- 1 | { 2 | "experiment" : { 3 | "epochs": 75, 4 | "device": "cuda", 5 | "ck_epoch_step": 1, 6 | "val_epoch_step": 1, 7 | "logdir": "/data/logs" 8 | }, 9 | 10 | "base": { 11 | "lane_det": true, 12 | "object_det": true, 13 | "scene_cls": true, 14 | "lane_classes": 8, 15 | "det_classes": 10, 16 | 17 | "scn_classes": { 18 | "weather": 7, 19 | "scene": 7, 20 | "timeofday": 4 21 | } 22 | }, 23 | 24 | "model": { 25 | "backbone": { 26 | "name": "resnet34", 27 | "args": { 28 | "pretrained": true 29 | } 30 | }, 31 | "neck": { 32 | "name": "SimpleNeck", 33 | "args": { 34 | "upsample_channels": [256, 256, 256] 35 | } 36 | }, 37 | "head_channel": 64, 38 | "bn_momentum": 0.1 39 | }, 40 | 41 | "optimizer": { 42 | "name": "Adam", 43 | "args": { 44 | "lr": 2.5e-4 45 | } 46 | }, 47 | 48 | "lr_scheduler": { 49 | "name": "WarmupMultiStepLR", 50 | "args": { 51 | "milestones": [100000, 200000], 52 | "gamma": 0.5, 53 | "warmup_iters": 3500 54 | } 55 | }, 56 | 57 | "loss": { 58 | "heatmap_loss": { 59 | "name": "WMSELoss", 60 | "args": {} 61 | }, 62 | "scn_loss": { 63 | "name": "nn.CrossEntropyLoss", 64 | "args": {} 65 | } 66 | }, 67 | 68 | 69 | "dataset" : { 70 | "images_root": "/data/BDD100K/bdd100k_images/bdd100k_images/images/100k", 71 | 72 | "input_w": 640, 73 | "input_h": 320, 74 | "output_stride": 4, 75 | 76 | "lane_det": { 77 | "data_root": "/data/BDD100K", 78 | "ppm": 25, 79 | "sigma": { 80 | "name": "CornerNetRadius", 81 | "args": {} 82 | } 83 | }, 84 | "obj_det": { 85 | "data_root": "/data/BDD100K/bdd100k_det/labels/det_20" 86 | }, 87 | 88 | "train_dataset": { 89 | "name": "MultitaskDataset", 90 | "args": { 91 | "mode": "train" 92 | }, 93 | "loader_args": { 94 | "shuffle": true, 95 | "batch_size": 32, 96 | "num_workers": 6 97 | } 98 | }, 99 | 100 | "val_dataset": { 101 | "name": "MultitaskDataset", 102 | "args": { 103 | "mode": "val" 104 | }, 105 | "loader_args": { 106 | "shuffle": false, 107 | "batch_size": 16, 108 | "num_workers": 4 109 | } 110 | } 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /dataset/utils/transforms.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # --------------------- 3 | from abc import ABCMeta 4 | 5 | from albumentations import Compose, KeypointParams, BboxParams, \ 6 | RandomBrightnessContrast, GaussNoise, RGBShift, CLAHE,\ 7 | RandomGamma, HorizontalFlip, Resize, Normalize, CenterCrop, RandomCrop, ShiftScaleRotate 8 | from albumentations.pytorch.transforms import ToTensorV2 9 | 10 | class BaseTransform(object, metaclass=ABCMeta): 11 | def __init__(self, w, h, input_w, input_h): 12 | 13 | # Find resize dimension (before crop) 14 | ws = w // input_w 15 | hs = h // input_h 16 | s = min(ws, hs) 17 | self.rw, self.rh = int(w // s), int(h // s) 18 | 19 | self.tsfm = ... 20 | 21 | def __call__(self, img, keypoints=None, kp_labels=None, kp_ids = None, bboxes=None, bb_labels=None, bb_occl=None): 22 | if keypoints is None: 23 | keypoints = [] 24 | kp_labels = [] 25 | kp_ids = [] 26 | if bboxes is None: 27 | bboxes = [] 28 | bb_labels = [] 29 | bb_occl = [] 30 | 31 | augmented = self.tsfm(image=img, keypoints=keypoints, kp_labels=kp_labels, 32 | kp_ids=kp_ids, bboxes=bboxes, bb_labels=bb_labels, bb_occl=bb_occl) 33 | img, kp, kp_l, kp_i, bb, bb_l, bb_o = augmented['image'], augmented['keypoints'], augmented['kp_labels'],\ 34 | augmented['kp_ids'], augmented['bboxes'], augmented['bb_labels'], augmented['bb_occl'] 35 | return img, kp, kp_l, kp_i, bb, bb_l, bb_o 36 | 37 | class RandomAspect(BaseTransform): 38 | def __init__(self, w, h, input_w, input_h): 39 | super().__init__(w, h, input_w, input_h) 40 | 41 | self.tsfm = Compose([ 42 | Resize(self.rh, self.rw), 43 | ShiftScaleRotate(), 44 | # CenterCrop(320, 640), 45 | RandomCrop(320, 640), 46 | HorizontalFlip(), 47 | RandomBrightnessContrast(0.4, 0.4), 48 | GaussNoise(), 49 | RGBShift(), 50 | CLAHE(), 51 | RandomGamma(), 52 | Normalize(), 53 | ToTensorV2() 54 | ], keypoint_params=KeypointParams(format='xy', label_fields=['kp_labels', 'kp_ids']), 55 | bbox_params=BboxParams(format='pascal_voc', label_fields=['bb_labels', 'bb_occl'])) 56 | 57 | 58 | class Preproc(BaseTransform): 59 | def __init__(self, w, h, input_w, input_h): 60 | super().__init__(w, h, input_w, input_h) 61 | 62 | self.tsfm = Compose([ 63 | Resize(self.rh, self.rw), 64 | CenterCrop(320, 640), 65 | Normalize(), 66 | ToTensorV2() 67 | ], keypoint_params=KeypointParams(format='xy', label_fields=['kp_labels', 'kp_ids']), 68 | bbox_params=BboxParams(format='pascal_voc', label_fields=['bb_labels', 'bb_occl'])) 69 | 70 | -------------------------------------------------------------------------------- /utils/lr_scheduler.py: -------------------------------------------------------------------------------- 1 | import math 2 | from bisect import bisect_right 3 | from torch.optim.lr_scheduler import _LRScheduler 4 | 5 | __all__ = ['WarmupMultiStepLR', 'WarmupCosineLR'] 6 | 7 | class WarmupMultiStepLR(_LRScheduler): 8 | def __init__( 9 | self, 10 | optimizer, 11 | milestones, 12 | gamma=0.1, 13 | warmup_factor=0.001, 14 | warmup_iters=1000, 15 | warmup_method="linear", 16 | last_epoch=-1, 17 | ): 18 | if not list(milestones) == sorted(milestones): 19 | raise ValueError( 20 | "Milestones should be a list of" " increasing integers. Got {}", 21 | milestones, 22 | ) 23 | self.milestones = milestones 24 | self.gamma = gamma 25 | self.warmup_factor = warmup_factor 26 | self.warmup_iters = warmup_iters 27 | self.warmup_method = warmup_method 28 | super().__init__(optimizer, last_epoch) 29 | 30 | def get_lr(self): 31 | warmup_factor = _get_warmup_factor_at_iter( 32 | self.warmup_method, self.last_epoch, self.warmup_iters, self.warmup_factor 33 | ) 34 | return [ 35 | base_lr * warmup_factor 36 | * self.gamma ** bisect_right(self.milestones, self.last_epoch) 37 | for base_lr in self.base_lrs 38 | ] 39 | 40 | def _compute_values(self): 41 | return self.get_lr() 42 | 43 | 44 | class WarmupCosineLR(_LRScheduler): 45 | def __init__( 46 | self, 47 | optimizer, 48 | max_iters, 49 | warmup_factor=0.001, 50 | warmup_iters=1000, 51 | warmup_method="linear", 52 | last_epoch=-1, 53 | ): 54 | self.max_iters = max_iters 55 | self.warmup_factor = warmup_factor 56 | self.warmup_iters = warmup_iters 57 | self.warmup_method = warmup_method 58 | super().__init__(optimizer, last_epoch) 59 | 60 | def get_lr(self): 61 | warmup_factor = _get_warmup_factor_at_iter( 62 | self.warmup_method, self.last_epoch, self.warmup_iters, self.warmup_factor 63 | ) 64 | 65 | return [ 66 | base_lr * warmup_factor * 0.5 67 | * (1.0 + math.cos(math.pi * self.last_epoch / self.max_iters)) 68 | for base_lr in self.base_lrs 69 | ] 70 | 71 | def _compute_values(self): 72 | return self.get_lr() 73 | 74 | 75 | def _get_warmup_factor_at_iter(method, iter, warmup_iters, warmup_factor): 76 | if iter >= warmup_iters: 77 | return 1.0 78 | 79 | if method == "constant": 80 | return warmup_factor 81 | elif method == "linear": 82 | alpha = iter / warmup_iters 83 | return warmup_factor * (1 - alpha) + alpha 84 | elif method == "burnin": 85 | return (iter / warmup_iters) ** 4 86 | else: 87 | raise ValueError("Unknown warmup method: {}".format(method)) 88 | -------------------------------------------------------------------------------- /utils/box_utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # --------------------- 3 | 4 | from __future__ import division 5 | import scipy.optimize 6 | import numpy as np 7 | 8 | 9 | def bbox_iou(boxA, boxB): 10 | # https://www.pyimagesearch.com/2016/11/07/intersection-over-union-iou-for-object-detection/ 11 | # ^^ corrected. 12 | 13 | # Determine the (x, y)-coordinates of the intersection rectangle 14 | xA = max(boxA[0], boxB[0]) 15 | yA = max(boxA[1], boxB[1]) 16 | xB = min(boxA[2], boxB[2]) 17 | yB = min(boxA[3], boxB[3]) 18 | 19 | interW = xB - xA + 1 20 | interH = yB - yA + 1 21 | 22 | # Correction: reject non-overlapping boxes 23 | if interW <= 0 or interH <= 0: 24 | return -1.0 25 | 26 | interArea = interW * interH 27 | boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1) 28 | boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1) 29 | iou = interArea / float(boxAArea + boxBArea - interArea) 30 | return iou 31 | 32 | 33 | def match_bboxes(bbox_gt, bbox_pred, IOU_THRESH=0.5): 34 | ''' 35 | Given sets of true and predicted bounding-boxes, 36 | determine the best possible match. 37 | 38 | Parameters 39 | ---------- 40 | bbox_gt, bbox_pred : N1x4 and N2x4 np array of bboxes [x1,y1,x2,y2]. 41 | The number of bboxes, N1 and N2, need not be the same. 42 | 43 | Returns 44 | ------- 45 | (idxs_true, idxs_pred, ious, labels) 46 | idxs_true, idxs_pred : indices into gt and pred for matches 47 | ious : corresponding IOU value of each match 48 | labels: vector of 0/1 values for the list of detections 49 | ''' 50 | n_true = bbox_gt.shape[0] 51 | n_pred = bbox_pred.shape[0] 52 | MAX_DIST = 1.0 53 | MIN_IOU = 0.0 54 | 55 | # NUM_GT x NUM_PRED 56 | iou_matrix = np.zeros((n_true, n_pred)) 57 | for i in range(n_true): 58 | for j in range(n_pred): 59 | iou_matrix[i, j] = bbox_iou(bbox_gt[i, :], bbox_pred[j, :]) 60 | 61 | if n_pred > n_true: 62 | # there are more predictions than ground-truth - add dummy rows 63 | diff = n_pred - n_true 64 | iou_matrix = np.concatenate((iou_matrix, 65 | np.full((diff, n_pred), MIN_IOU)), 66 | axis=0) 67 | 68 | if n_true > n_pred: 69 | # more ground-truth than predictions - add dummy columns 70 | diff = n_true - n_pred 71 | iou_matrix = np.concatenate((iou_matrix, 72 | np.full((n_true, diff), MIN_IOU)), 73 | axis=1) 74 | 75 | # call the Hungarian matching 76 | idxs_true, idxs_pred = scipy.optimize.linear_sum_assignment(1 - iou_matrix) 77 | 78 | if (not idxs_true.size) or (not idxs_pred.size): 79 | ious = np.array([]) 80 | else: 81 | ious = iou_matrix[idxs_true, idxs_pred] 82 | 83 | # remove dummy assignments 84 | sel_pred = idxs_pred < n_pred 85 | idx_pred_actual = idxs_pred[sel_pred] 86 | idx_gt_actual = idxs_true[sel_pred] 87 | ious_actual = iou_matrix[idx_gt_actual, idx_pred_actual] 88 | sel_valid = (ious_actual > IOU_THRESH) 89 | label = sel_valid.astype(int) 90 | 91 | return idx_gt_actual[sel_valid], idx_pred_actual[sel_valid], ious_actual[sel_valid], label 92 | 93 | -------------------------------------------------------------------------------- /models/heads.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # --------------------- 3 | 4 | import torch 5 | from torch import nn 6 | from abc import ABCMeta, abstractmethod 7 | 8 | from utils.decoding import PseudoNMS 9 | from .layers import make_conv, ConvReluConv 10 | 11 | class BaseHead(nn.Module, metaclass=ABCMeta): 12 | 13 | def __init__(self): 14 | # type: () -> None 15 | super().__init__() 16 | 17 | def forward(self, x, decode): 18 | # type: (torch.Tensor, bool) -> dict[str, torch.Tensor, ...] 19 | ... 20 | 21 | class ObjectHead(BaseHead): 22 | 23 | def __init__(self, num_classes=80, in_channels=256, conv_channels=64): 24 | 25 | super(ObjectHead, self).__init__() 26 | self.cls_head = ConvReluConv(in_channels, conv_channels, num_classes, bias_fill=True, bias_value=-4.6) 27 | self.ofs_out = ConvReluConv(in_channels, in_channels, 4) 28 | self.occl = ConvReluConv(in_channels, in_channels, 1) 29 | 30 | self.nms = PseudoNMS(nms_kernels=[(3, 3)]) 31 | 32 | def forward(self, x, nms=False): 33 | hm = self.cls_head(x).sigmoid() 34 | wh = self.ofs_out(x) 35 | oc = self.occl(x) 36 | 37 | if nms: 38 | hm = self.nms(hm) 39 | 40 | ret = { 41 | "heatmaps": hm, 42 | "offsets": wh, 43 | "occlusion": oc 44 | } 45 | 46 | return ret 47 | 48 | class LaneHead(BaseHead): 49 | 50 | def __init__(self, num_classes=80, in_channels=256, quant_offsets=False, conv_channels=64): 51 | super(LaneHead, self).__init__() 52 | self.cls_head = ConvReluConv(in_channels, conv_channels, num_classes, bias_fill=True, bias_value=-4.6) 53 | self.emb_out = ConvReluConv(in_channels, in_channels, 2, bias_fill=True, bias_value=0.1) 54 | 55 | # Dequantizzation offsets 56 | self.quant_offsets = quant_offsets 57 | if quant_offsets: 58 | self.quant_out = ConvReluConv(in_channels, in_channels, 2, bias_fill=True, bias_value=0.1) 59 | 60 | self.nms = PseudoNMS(nms_kernels=[(1, 3), (3, 1)]) 61 | 62 | def forward(self, x, nms=False): 63 | hm = self.cls_head(x).sigmoid() 64 | emb = self.emb_out(x) 65 | 66 | if nms: 67 | hm = self.nms(hm) 68 | 69 | ret = { 70 | "heatmaps": hm, 71 | "offsets": emb, 72 | } 73 | 74 | if self.quant_offsets: 75 | quant = self.quant_out(x) 76 | ret["quant"] = quant 77 | 78 | return ret 79 | 80 | class ScnHead(BaseHead): 81 | def __init__(self, classes, in_channels): 82 | super(ScnHead, self).__init__() 83 | 84 | self.cls_splits = classes 85 | 86 | self.c1 = make_conv(in_channels, 64) 87 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) 88 | self.fc = nn.Linear(64, sum(classes)) 89 | 90 | def forward(self, x, argmax=False): 91 | x = self.c1(x) 92 | x = self.avgpool(x) 93 | x = torch.flatten(x, 1) 94 | x = self.fc(x) 95 | 96 | w_pred, s_pred, t_pred = torch.split(x, self.cls_splits, 1) 97 | 98 | if argmax: 99 | w_pred = w_pred.argmax(-1) 100 | s_pred = s_pred.argmax(-1) 101 | t_pred = t_pred.argmax(-1) 102 | 103 | ret = { 104 | "weather": w_pred, 105 | "scene": s_pred, 106 | "timeofday": t_pred 107 | } 108 | 109 | return ret 110 | -------------------------------------------------------------------------------- /inference/postproc.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # --------------------- 3 | 4 | import numpy as np 5 | from sklearn.metrics import mean_squared_error 6 | from sklearn.cluster import AgglomerativeClustering 7 | from sklearn.linear_model import RANSACRegressor 8 | 9 | import warnings 10 | warnings.simplefilter('ignore', np.RankWarning) 11 | 12 | def get_clusters(X, y): 13 | s = np.argsort(y) 14 | return np.split(X[s], np.unique(y[s], return_index=True)[1][1:]) 15 | 16 | class PolynomialRegression(object): 17 | def __init__(self, degree=2, coeffs=None): 18 | self.degree = degree 19 | self.coeffs = coeffs 20 | 21 | def fit(self, X, y): 22 | self.coeffs = np.polyfit(X.ravel(), y, self.degree) 23 | 24 | def get_params(self, deep=False): 25 | return {'coeffs': self.coeffs} 26 | 27 | def set_params(self, coeffs=None, random_state=None): 28 | self.coeffs = coeffs 29 | 30 | def predict(self, X): 31 | poly_eqn = np.poly1d(self.coeffs) 32 | y_hat = poly_eqn(X.ravel()) 33 | return y_hat 34 | 35 | def score(self, X, y): 36 | return mean_squared_error(y, self.predict(X)) 37 | 38 | def cluster_lane_preds(lanes, lanes_cls, lanes_votes): 39 | lane_clusters = [[] for _ in range(8)] 40 | for lc in range(8): 41 | current_cls = lanes_cls.eq(lc).nonzero() 42 | lind = lanes[current_cls, :2].squeeze() 43 | votes = lanes_votes[:, current_cls].squeeze() 44 | 45 | if lind.shape[0] == 0 or len(lind.shape) != 2: 46 | continue 47 | 48 | votes = (votes.T + lind).cpu().numpy() 49 | clusters = AgglomerativeClustering(n_clusters=None, 50 | distance_threshold=8.0 * 4, linkage='ward').fit_predict(votes) 51 | 52 | clusters = get_clusters(lind.cpu().numpy(), clusters) 53 | lane_clusters[lc] += clusters 54 | 55 | return lane_clusters 56 | 57 | def fast_clustering(lanes, lanes_cls, lanes_votes): 58 | lane_clusters = [[] for _ in range(8)] 59 | for lc in range(8): 60 | current_cls = (lanes_cls == lc).nonzero() 61 | lind = lanes[current_cls, :2].squeeze() 62 | votes = lanes_votes[:, current_cls].squeeze() 63 | 64 | if lind.shape[0] == 0 or len(lind.shape) != 2: 65 | continue 66 | 67 | votes = (votes.T + lind) # .cpu().numpy() 68 | clusters = AgglomerativeClustering(n_clusters=None, 69 | distance_threshold=8.0 * 4, linkage='ward').fit_predict(votes) 70 | 71 | clusters = get_clusters(lind, clusters) 72 | lane_clusters[lc] += clusters 73 | return lane_clusters 74 | 75 | def fit_lanes(lane_clusters): 76 | 77 | lanes_fitted = {i : [] for i in range(len(lane_clusters))} 78 | 79 | for cla, cls_clusters in enumerate(lane_clusters): 80 | for cl in cls_clusters: 81 | 82 | if cl.shape[0] < 5: 83 | continue 84 | 85 | x = cl[:, 0] 86 | y = cl[:, 1] 87 | 88 | ransac = RANSACRegressor(PolynomialRegression(degree=3), 89 | residual_threshold=0.5 * np.std(x), 90 | random_state=0) 91 | 92 | # calculate polynomial 93 | try: 94 | ransac.fit(np.expand_dims(x, axis=1), y) 95 | except ValueError: 96 | continue 97 | 98 | # calculate new x's and y's 99 | x_new = np.linspace(min(x), max(x), len(x)) 100 | y_new = ransac.predict(np.expand_dims(x_new, axis=1)) 101 | 102 | newlane = np.stack([x_new, y_new], axis=-1) 103 | lanes_fitted[cla].append(newlane) 104 | 105 | return lanes_fitted 106 | -------------------------------------------------------------------------------- /dataset/utils/heatmaps.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # --------------------- 3 | 4 | import numpy as np 5 | import torch 6 | 7 | class FixedRadius: 8 | def __init__(self, r: float = 1.): 9 | self.r = r 10 | 11 | def __call__(self, w, h): 12 | return self.r#, self.r 13 | 14 | class CornerNetRadius: 15 | def __init__(self, min_overlap: float = 0.7): 16 | self.min_overlap = min_overlap 17 | 18 | # Explanation: https://github.com/princeton-vl/CornerNet/issues/110 19 | # Source: https://github.com/princeton-vl/CornerNet/blob/master/sample/utils.py 20 | def __call__(self, width, height): 21 | a1 = 1 22 | b1 = (height + width) 23 | c1 = width * height * (1 - self.min_overlap) / (1 + self.min_overlap) 24 | sq1 = np.sqrt(b1 ** 2 - 4 * a1 * c1) 25 | r1 = (b1 + sq1) / 2 26 | 27 | a2 = 4 28 | b2 = 2 * (height + width) 29 | c2 = (1 - self.min_overlap) * width * height 30 | sq2 = np.sqrt(b2 ** 2 - 4 * a2 * c2) 31 | r2 = (b2 + sq2) / 2 32 | 33 | a3 = 4 * self.min_overlap 34 | b3 = -2 * self.min_overlap * (height + width) 35 | c3 = (self.min_overlap - 1) * width * height 36 | sq3 = np.sqrt(b3 ** 2 - 4 * a3 * c3) 37 | r3 = (b3 + sq3) / 2 38 | return max(min(r1, r2, r3) / 6, 2) 39 | 40 | def kps_to_heatmaps(annotation, w, h, sigma=None): 41 | 42 | heatmaps_list = [] 43 | 44 | for cls_keypoints in annotation: 45 | 46 | # generate heatmap from list of (x, y, z) coordinates 47 | # retrieve one (W,H) heatmap for each keypoint 48 | if len(cls_keypoints) != 0: 49 | # Normalize coordinates 50 | # cls_keypoints = torch.tensor(cls_keypoints) / torch.tensor([IMG_HEIGHT, IMG_WIDTH]) 51 | 52 | # Generate heatmap 53 | if sigma is None: 54 | assert cls_keypoints.shape[-1] == 3 55 | kern = make_gkern_2d(h, w, None) 56 | heatmaps = torch.stack([kern(x, s) for x, s in zip(cls_keypoints[..., :2], 57 | cls_keypoints[..., -1])], dim=0) 58 | else: 59 | assert cls_keypoints.shape[-1] == 2 60 | kern = make_gkern_2d(h, w, sigma) 61 | heatmaps = torch.stack([kern(x) for x in cls_keypoints], dim=0) 62 | else: 63 | heatmaps = torch.zeros(1, h, w) 64 | 65 | # Combine individual heatmaps in a single tensor 66 | heatmap = torch.max(heatmaps, dim=0)[0] 67 | heatmaps_list.append(heatmap) 68 | 69 | # Combine keypoints heatmaps in a single tensor 70 | total_heatmap = torch.stack(heatmaps_list, 0) 71 | 72 | return total_heatmap 73 | 74 | def make_gkern_2d(h, w, s=None, device='cpu'): 75 | if s is None: 76 | def gk(x, s): 77 | return gkern_2d(h, w, x, s, device=device) 78 | else: 79 | def gk(x): 80 | return gkern_2d(h, w, x, s, device=device) 81 | 82 | return gk 83 | 84 | def gkern_2d(h, w, center, s, device='cuda'): 85 | # type: (int, int, Tuple[int, int], float, str) -> torch.Tensor 86 | """ 87 | :param h: heatmap image height 88 | :param w: heatmap image width 89 | :param center: Gaussian center (x,y,z) 90 | :param s: Gaussian sigma 91 | :param device: 'cuda' or 'cpu' -> device used do compute heatmaps 92 | :return: Torch tensor with shape (h, w, d) with A Gaussian centered in `center` 93 | """ 94 | 95 | x = torch.arange(0, w, 1).type('torch.FloatTensor').to(device) 96 | y = torch.arange(0, h, 1).type('torch.FloatTensor').to(device) 97 | 98 | y = y.unsqueeze(1) 99 | 100 | x0 = center[0] # * w 101 | y0 = center[1] # * h 102 | 103 | g = torch.exp(-1 * ((x - x0) ** 2 + (y - y0) ** 2) / s ** 2) 104 | 105 | return g 106 | -------------------------------------------------------------------------------- /dataset/collate.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # --------------------- 3 | 4 | import abc 5 | import torch 6 | import re 7 | import collections 8 | from torch._six import string_classes 9 | 10 | np_str_obj_array_pattern = re.compile(r'[SaUO]') 11 | 12 | default_collate_err_msg_format = ( 13 | "default_collate: batch must contain tensors, numpy arrays, numbers, " 14 | "dicts or lists; found {}") 15 | 16 | def ignore_collate(ignore_keys): 17 | 18 | def collate_fn(batch): 19 | return _default_collate(batch, ignore_keys) 20 | return collate_fn 21 | 22 | def _default_collate(batch, ignore_keys): 23 | elem = batch[0] 24 | elem_type = type(elem) 25 | if isinstance(elem, torch.Tensor): 26 | out = None 27 | if torch.utils.data.get_worker_info() is not None: 28 | # If we're in a background process, concatenate directly into a 29 | # shared memory tensor to avoid an extra copy 30 | numel = sum(x.numel() for x in batch) 31 | storage = elem.storage()._new_shared(numel) 32 | out = elem.new(storage).resize_(len(batch), *list(elem.size())) 33 | return torch.stack(batch, 0, out=out) 34 | elif elem_type.__module__ == 'numpy' and elem_type.__name__ != 'str_' \ 35 | and elem_type.__name__ != 'string_': 36 | if elem_type.__name__ == 'ndarray' or elem_type.__name__ == 'memmap': 37 | # array of string classes and object 38 | if np_str_obj_array_pattern.search(elem.dtype.str) is not None: 39 | raise TypeError(default_collate_err_msg_format.format(elem.dtype)) 40 | 41 | return _default_collate([torch.as_tensor(b) for b in batch], ignore_keys) 42 | elif elem.shape == (): # scalars 43 | return torch.as_tensor(batch) 44 | elif isinstance(elem, float): 45 | return torch.tensor(batch, dtype=torch.float64) 46 | elif isinstance(elem, int): 47 | return torch.tensor(batch) 48 | elif isinstance(elem, string_classes): 49 | return batch 50 | elif isinstance(elem, collections.abc.Mapping): 51 | try: 52 | return elem_type({key: _default_collate([d[key] for d in batch], ignore_keys) 53 | if key not in ignore_keys else [d[key] for d in batch] for key in elem}) 54 | except TypeError: 55 | # The mapping type may not support `__init__(iterable)`. 56 | return {key: _default_collate([d[key] for d in batch], ignore_keys) for key in elem} 57 | elif isinstance(elem, tuple) and hasattr(elem, '_fields'): # namedtuple 58 | return elem_type(*(_default_collate(samples, ignore_keys) for samples in zip(*batch))) 59 | elif isinstance(elem, collections.abc.Sequence): 60 | # check to make sure that the elements in batch have consistent size 61 | it = iter(batch) 62 | elem_size = len(next(it)) 63 | if not all(len(elem) == elem_size for elem in it): 64 | raise RuntimeError('each element in list of batch should be of equal size') 65 | transposed = list(zip(*batch)) # It may be accessed twice, so we use a list. 66 | 67 | if isinstance(elem, tuple): 68 | return [_default_collate(samples, ignore_keys) for samples in transposed] # Backwards compatibility. 69 | else: 70 | try: 71 | return elem_type([_default_collate(samples, ignore_keys) for samples in transposed]) 72 | except TypeError: 73 | # The sequence type may not support `__init__(iterable)` (e.g., `range`). 74 | return [_default_collate(samples, ignore_keys) for samples in transposed] 75 | 76 | raise TypeError(default_collate_err_msg_format.format(elem_type)) 77 | -------------------------------------------------------------------------------- /dataset/utils/process_detection.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # --------------------- 3 | 4 | import torch 5 | import numpy as np 6 | 7 | from .heatmaps import kps_to_heatmaps 8 | from .heatmaps import CornerNetRadius, FixedRadius 9 | from .cls import DET_CLS, WTR_CLS, TD_CLS, SN_CLS 10 | 11 | 12 | class DetProcessor: 13 | 14 | def __init__(self, classes, output_s, target_w, target_h): 15 | # type: (int, int, int, int) -> None 16 | """ 17 | :param classes: Number of lane classes 18 | :param output_s: Output stride wrt input shape 19 | :param target_w: output width (output_s * input width) 20 | :param target_h: output height (output_s * input height) 21 | """ 22 | 23 | self.classes = classes 24 | self.output_s = output_s 25 | self.target_w = target_w 26 | self.target_h = target_h 27 | 28 | self.sigma = CornerNetRadius() 29 | 30 | def bounding_boxes(self, annot): 31 | # Preprocess label (boxes) 32 | labels = annot.get("labels", None) 33 | if labels is not None: 34 | boxes = torch.stack([torch.tensor([*l['box2d'].values()]) for l in labels]) # x1,y1,x2,y2 35 | classes = [DET_CLS[l['category']] for l in labels] 36 | occlusion = [l['attributes']['occluded'] for l in labels] 37 | 38 | # Remove 'other vehicle' 39 | io = [i for i, v in enumerate(classes) if v >= 10] 40 | boxes = [k for i, k in enumerate(boxes) if i not in io] 41 | classes = [c for i, c in enumerate(classes) if i not in io] 42 | occlusion = [o for i, o in enumerate(occlusion) if i not in io] 43 | 44 | else: 45 | boxes = [] 46 | classes = [] 47 | occlusion = [] 48 | 49 | return labels, boxes, classes, occlusion 50 | 51 | def scene_classification(self, annot): 52 | attrs = annot.get("attributes", None) 53 | cls = {} 54 | 55 | if attrs is not None: 56 | cls["weather"] = WTR_CLS[attrs["weather"]] 57 | cls["scene"] = SN_CLS[attrs["scene"]] 58 | cls["timeofday"] = TD_CLS[attrs["timeofday"]] 59 | 60 | return cls 61 | 62 | def targets(self, labels, bboxes, classes): 63 | if labels is not None and len(bboxes) > 0: 64 | 65 | # Obtain box centers in output space 66 | boxes_pt = torch.tensor(bboxes) / self.output_s 67 | boxes_cwh = self.xyxy2cxcywh(boxes_pt) 68 | radii = torch.tensor([self.sigma(w, h) for w, h in boxes_cwh[..., 2:] * self.output_s]) 69 | 70 | # Clip and round 71 | centers = boxes_cwh[:, :2] 72 | centers[:, 0] = torch.clip(centers[:, 0], 0, self.target_w - 1) 73 | centers[:, 1] = torch.clip(centers[:, 1], 0, self.target_h - 1) 74 | centers = torch.round(centers) 75 | 76 | assert centers[:, 0].max() < self.target_w and centers[:, 1].max() < self.target_h # <-- shit here 77 | assert centers[:, 0].min() >= 0 and centers[:, 1].min() >= 0 78 | 79 | # Compute target heatmaps 80 | kp_cls = [[] for _ in range(self.classes)] 81 | for ic, c in enumerate(classes): 82 | kp_cls[c].append(torch.cat([centers[ic], radii[ic].unsqueeze(0)])) # cx, cy, sigma 83 | 84 | kp_cls = [torch.stack(t) if len(t) > 0 else torch.tensor([]) for t in kp_cls] 85 | 86 | # Generate target (Heatmap) 87 | heatmap = kps_to_heatmaps(kp_cls, self.target_w, self.target_h, sigma=None) 88 | 89 | # Compute target offsets 90 | ofs_x = boxes_pt[..., 0::2] - centers[..., 0].unsqueeze(-1) 91 | ofs_y = boxes_pt[..., 1::2] - centers[..., 1].unsqueeze(-1) 92 | ofs = torch.cat([ofs_x, ofs_y], dim=-1) # (x1-cx, x2-cx), (y1-cy, y2-cy) 93 | 94 | else: 95 | heatmap = torch.zeros((self.classes, self.target_h, self.target_w), dtype=torch.float32) 96 | centers = torch.zeros((0, 2), dtype=torch.int) 97 | ofs = torch.zeros((0, 4), dtype=torch.float32) 98 | 99 | return heatmap, centers, ofs 100 | 101 | @staticmethod 102 | def xyxy2cxcywh(boxes): 103 | w = (boxes[:, 2] - boxes[:, 0]) 104 | h = (boxes[:, 3] - boxes[:, 1]) 105 | cx = boxes[:, 0] + w / 2 106 | cy = boxes[:, 1] + h / 2 107 | 108 | return torch.stack([cx, cy, w, h], dim=1) 109 | -------------------------------------------------------------------------------- /data/tools.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # --------------------- 3 | 4 | import numpy as np 5 | 6 | from scipy.special import comb 7 | from scipy.spatial.distance import cdist 8 | from scipy.optimize import linear_sum_assignment 9 | 10 | def bernstein_poly(i, n, t): 11 | """ 12 | The Bernstein polynomial of n, i as a function of t 13 | """ 14 | 15 | return comb(n, i) * ( t**(n-i) ) * (1 - t)**i 16 | 17 | 18 | def bezier_curve(points, nTimes=1000): 19 | """ 20 | Given a set of control points, return the 21 | bezier curve defined by the control points. 22 | 23 | points should be a list of lists, or list of tuples 24 | such as [ [1,1], 25 | [2,3], 26 | [4,5], ..[Xn, Yn] ] 27 | nTimes is the number of time steps, defaults to 1000 28 | 29 | See http://processingjs.nihongoresources.com/bezierinfo/ 30 | """ 31 | 32 | nPoints = len(points) 33 | xPoints = np.array([p[0] for p in points]) 34 | yPoints = np.array([p[1] for p in points]) 35 | 36 | t = np.linspace(0.0, 1.0, nTimes) 37 | 38 | polynomial_array = np.array([ bernstein_poly(i, nPoints-1, t) for i in range(0, nPoints) ]) 39 | 40 | xvals = np.dot(xPoints, polynomial_array) 41 | yvals = np.dot(yPoints, polynomial_array) 42 | 43 | return xvals, yvals 44 | 45 | def get_bezier_parameters(X, Y, degree=3): 46 | """ Least square qbezier fit using penrose pseudoinverse. 47 | 48 | Parameters: 49 | 50 | X: array of x data. 51 | Y: array of y data. Y[0] is the y point for X[0]. 52 | degree: degree of the Bézier curve. 2 for quadratic, 3 for cubic. 53 | 54 | Based on https://stackoverflow.com/questions/12643079/b%C3%A9zier-curve-fitting-with-scipy 55 | and probably on the 1998 thesis by Tim Andrew Pastva, "Bézier Curve Fitting". 56 | """ 57 | if degree < 1: 58 | raise ValueError('degree must be 1 or greater.') 59 | 60 | if len(X) != len(Y): 61 | raise ValueError('X and Y must be of the same length.') 62 | 63 | if len(X) < degree + 1: 64 | raise ValueError(f'There must be at least {degree + 1} points to ' 65 | f'determine the parameters of a degree {degree} curve. ' 66 | f'Got only {len(X)} points.') 67 | 68 | def bpoly(n, t, k): 69 | """ Bernstein polynomial when a = 0 and b = 1. """ 70 | return t ** k * (1 - t) ** (n - k) * comb(n, k) 71 | 72 | # return comb(n, i) * ( t**(n-i) ) * (1 - t)**i 73 | 74 | def bmatrix(T): 75 | """ Bernstein matrix for Bézier curves. """ 76 | return np.matrix([[bpoly(degree, t, k) for k in range(degree + 1)] for t in T]) 77 | 78 | def least_square_fit(points, M): 79 | M_ = np.linalg.pinv(M) 80 | return M_ * points 81 | 82 | T = np.linspace(0, 1, len(X)) 83 | M = bmatrix(T) 84 | points = np.array(list(zip(X, Y))) 85 | 86 | final = least_square_fit(points, M).tolist() 87 | final[0] = [X[0], Y[0]] 88 | final[len(final) - 1] = [X[len(X) - 1], Y[len(Y) - 1]] 89 | return final 90 | 91 | 92 | def compare_labels(l1, l2): 93 | 94 | pt1 = l1["keypoints"] 95 | pt2 = l2["keypoints"] 96 | 97 | l = max(len(pt1), len(pt2)) 98 | assert l >= 3 99 | 100 | if len(pt1) == l: 101 | pts = l2["poly2d"][0]['vertices'] 102 | xvals, yvals = bezier_curve(pts, nTimes=l) 103 | pt2 = np.stack([xvals, yvals], axis=-1) 104 | else: 105 | pts = l1["poly2d"][0]['vertices'] 106 | xvals, yvals = bezier_curve(pts, nTimes=l) 107 | pt1 = np.stack([xvals, yvals], axis=-1) 108 | 109 | pt1, pt2 = np.array(pt1), np.array(pt2) 110 | closest = cdist(pt1, pt2).argmin(0) 111 | 112 | return pt1, pt2[closest] 113 | 114 | def dist(k1, k2): 115 | 116 | if k1 is None or k2 is None: 117 | return 1e5 118 | 119 | if k1['id'] == k2['id']: 120 | return 1e5 121 | 122 | c1 = (k1['attributes']['laneDirection'] == k1['attributes']['laneDirection'] == 'parallel') 123 | c2 = k1['category'] == k2['category'] 124 | c3 =True #"double" not in k1['category'] 125 | 126 | if not (c1 and c2 and c3): 127 | return 1e5 128 | 129 | pt1, pt2 = compare_labels(k1, k2) 130 | dist = np.linalg.norm(pt1 - pt2, axis=-1).mean() 131 | 132 | return dist 133 | -------------------------------------------------------------------------------- /dataset/utils/process_lanes.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # --------------------- 3 | import torch 4 | import numpy as np 5 | 6 | from dataset.utils.heatmaps import kps_to_heatmaps 7 | from dataset.utils.cls import LANE_CLS 8 | 9 | class LaneProcessor: 10 | 11 | def __init__(self, classes, output_s, target_w, target_h): 12 | # type: (int, int, int, int) -> None 13 | """ 14 | :param classes: Number of lane classes 15 | :param output_s: Output stride wrt input shape 16 | :param target_w: output width (output_s * input width) 17 | :param target_h: output height (output_s * input height) 18 | """ 19 | 20 | self.classes = classes 21 | self.output_s = output_s 22 | self.target_w = target_w 23 | self.target_h = target_h 24 | 25 | def keypoints(self, annot): 26 | # type: (dict) -> (list, list, list, list) 27 | """ 28 | :param annot: 29 | :return: 30 | """ 31 | 32 | labels = annot.get("labels", None) 33 | if labels is not None: 34 | labels = [l for l in labels if l is not None] 35 | if len(labels) == 0: 36 | return labels, [], [], [] 37 | 38 | keypoints = torch.cat([torch.tensor(l['keypoints']) for l in labels]) 39 | assert len(keypoints.shape) == 2 and keypoints.shape[0] >= 1 40 | 41 | lenghts = [len(l['keypoints']) for l in labels] 42 | cls = [LANE_CLS[l['category']] for i, l in enumerate(labels) for _ in range(lenghts[i])] 43 | ids = [int(l['id']) for i, l in enumerate(labels) for _ in range(lenghts[i])] 44 | 45 | # Remove non visible keypoints 46 | visible = torch.stack([keypoints[:, 0].ceil() < 1280, keypoints[:, 1].ceil() < 720, 47 | keypoints[:, 0].floor() >= 0, keypoints[:, 1].floor() >= 0]) 48 | assert len(visible.shape) == 2 and visible.shape[0] >= 1 49 | visible = visible.min(dim=0)[0] 50 | 51 | keypoints = keypoints[visible.nonzero().squeeze(1)].tolist() 52 | classes = [cls[c] for c in visible.nonzero().squeeze(1)] 53 | ids = [ids[c] for c in visible.nonzero().squeeze(1)] 54 | 55 | return labels, keypoints, classes, ids 56 | 57 | return labels, [], [], [] 58 | 59 | def targets(self, labels, keypoints, classes, ids): 60 | 61 | if labels is not None and len(keypoints) > 0: 62 | 63 | all_ids = set(ids) 64 | 65 | # Clip and round 66 | keypoints = torch.tensor(keypoints) / self.output_s 67 | centers = keypoints.clone() 68 | 69 | centers[:, 0] = torch.clip(centers[:, 0], 0, self.target_w - 1) 70 | centers[:, 1] = torch.clip(centers[:, 1], 0, self.target_h - 1) 71 | centers = torch.round( 72 | centers) 73 | 74 | assert centers[:, 0].max() < self.target_w and centers[:, 1].max() < self.target_h 75 | assert centers[:, 0].min() >= 0 and centers[:, 1].min() >= 0 76 | 77 | # Generate target (Heatmap) 78 | kp_cls = [[] for _ in range(self.classes)] 79 | for ic, c in enumerate(classes): 80 | # kp_cls[c].append(centers[ic]) #<--- to enable rounding 81 | kp_cls[c].append(keypoints[ic]) 82 | 83 | kp_cls = [torch.stack(t) if len(t) > 0 else torch.tensor([]) for t in kp_cls] 84 | heatmap = kps_to_heatmaps(kp_cls, self.target_w, self.target_h, sigma=2) 85 | 86 | # Generate dequantizzation offsets 87 | quant_offsets = (keypoints - centers).to(torch.float32) 88 | 89 | # Group keypoints belonging to the same lane 90 | lane_ids = torch.tensor(ids) 91 | lanes_kp = [keypoints[lane_ids.eq(i).nonzero().squeeze(1)] for i in all_ids] 92 | centers = [centers[lane_ids.eq(i).nonzero().squeeze(1)] for i in all_ids] 93 | quant_offsets = [quant_offsets[lane_ids.eq(i).nonzero().squeeze(1)] for i in all_ids] 94 | 95 | # Generate offsets to lane center 96 | l_offsets = [b[len(b) // 2] - b for b in centers] 97 | 98 | # Flatten 99 | centers = torch.cat(centers) 100 | center_offsets = torch.cat(l_offsets) 101 | quant_offsets = torch.cat(quant_offsets) 102 | 103 | else: 104 | heatmap = torch.zeros((self.classes, self.target_h, self.target_w), dtype=torch.float32) 105 | centers = torch.zeros((0, 2), dtype=torch.int) 106 | lanes_kp = torch.zeros((0, 2), dtype=torch.float32) 107 | center_offsets = torch.zeros((0, 2), dtype=torch.float32) 108 | quant_offsets = torch.zeros((0, 2), dtype=torch.float32) 109 | 110 | return heatmap, centers, center_offsets, quant_offsets, lanes_kp 111 | -------------------------------------------------------------------------------- /models/losses/task_losses.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # --------------------- 3 | 4 | import torch 5 | from torch import nn 6 | import torch.nn.functional as F 7 | from models.losses.heatmap_loss import * 8 | 9 | from conf import Conf 10 | 11 | class LanesLoss(nn.Module): 12 | 13 | def __init__(self, cnf): 14 | # type: (Conf) -> () 15 | 16 | super().__init__() 17 | self.cnf = cnf 18 | self.q_offsets = cnf.base.get("lane_q_offsets", False) 19 | 20 | heatmap_loss = self.cnf.loss.heatmap_loss.get("name", "nn.MSELoss") 21 | self.heatmap_loss = eval(heatmap_loss)(**self.cnf.loss.heatmap_loss.args) 22 | self.offset_loss = nn.L1Loss() 23 | 24 | def forward(self, preds, targets): 25 | # type: (dict[torch.tensor, ...], dict[torch.tensor, ...]) -> torch.tensor 26 | 27 | hm_true, kp_true, ofs_true, q_ofs = targets["heatmaps"], targets["keypoints"], \ 28 | targets["offsets"], targets["quant_offsets"] 29 | 30 | hm_pred, ofs_pred = preds["heatmaps"], preds["offsets"] 31 | 32 | # Heatmap 33 | hm_loss = self.heatmap_loss(hm_pred, hm_true) 34 | 35 | # Embeddings 36 | b_idx = torch.tensor([i for i, b in enumerate(kp_true) for _ in range(b.shape[0])]) 37 | kp_true = torch.cat(kp_true).long() 38 | 39 | embs_pred = ofs_pred[b_idx, :, kp_true[:, 1], kp_true[:, 0]] 40 | embs_true = torch.cat(ofs_true) 41 | 42 | embd_loss = self.offset_loss(embs_pred, embs_true) * 0.8 # 0.4 43 | 44 | # Dequantizzation offsets 45 | quant_loss = torch.tensor(0, device=hm_true.device) 46 | if self.q_offsets: 47 | q_pred = targets["quant"] 48 | q_pred = q_pred[b_idx, :, kp_true[:, 1], kp_true[:, 0]] 49 | q_ofs = torch.cat(q_ofs) 50 | 51 | quant_loss = self.offset_loss(q_pred, q_ofs) 52 | 53 | return embd_loss + hm_loss + quant_loss, {"l_heat": hm_loss.item(), 54 | "l_emb": embd_loss.item(), "l_quant": quant_loss.item()} 55 | 56 | class ObjectsLoss(nn.Module): 57 | 58 | def __init__(self, cnf): 59 | # type: (Conf) -> () 60 | 61 | super().__init__() 62 | self.cnf = cnf 63 | self.occlusion = cnf.base.get("occlusion_cls", True) 64 | 65 | # Task specific losses 66 | heatmap_loss = self.cnf.loss.heatmap_loss.get("name", "nn.MSELoss") 67 | self.heatmap_loss = eval(heatmap_loss)(**self.cnf.loss.heatmap_loss.args) 68 | self.offset_loss = nn.L1Loss() 69 | 70 | def forward(self, preds, targets): 71 | # type: (dict[torch.tensor, ...], dict[torch.tensor, ...]) -> torch.tensor 72 | 73 | hm_true, oc_true, ofs_true, ocl_true = targets["heatmaps"], targets["centers"], \ 74 | targets["offsets"], targets["occlusion"] 75 | 76 | hm_pred, ofs_pred, ocl_pred = preds["heatmaps"], preds["offsets"], preds["occlusion"] 77 | 78 | # Heatmap 79 | hm_loss = self.heatmap_loss(hm_pred, hm_true) 80 | 81 | # xxyy offsets 82 | # (x1-cx, x2-cx), (y1-cy, y2-cy) 83 | b_idx = torch.tensor([i for i, b in enumerate(oc_true) for _ in range(b.shape[0])]) 84 | oc_true = torch.cat(oc_true).long() 85 | 86 | ofs_pred = ofs_pred[b_idx, :, oc_true[:, 1], oc_true[:, 0]] 87 | ofs_true = torch.cat(ofs_true) 88 | 89 | ofs_loss = self.offset_loss(ofs_pred, ofs_true) 90 | 91 | # Occlusion classification 92 | if self.occlusion: 93 | ocl_pred = ocl_pred[b_idx, :, oc_true[:, 1], oc_true[:, 0]] 94 | ocl_true = torch.cat(ocl_true) 95 | ocl_loss = F.binary_cross_entropy_with_logits(ocl_pred.squeeze(-1), ocl_true) #* 0.5 96 | else: 97 | ocl_loss = torch.tensor(0.0, device=hm_pred.device) 98 | 99 | 100 | return hm_loss + ofs_loss + ocl_loss, {"d_heat": hm_loss.item(), 101 | "d_ofs": ofs_loss.item(), "d_ocl": ocl_loss.item()} 102 | 103 | class ClsLoss(nn.Module): 104 | 105 | def __init__(self, cnf): 106 | # type: (Conf) -> () 107 | 108 | super().__init__() 109 | self.cnf = cnf 110 | 111 | scn_loss = self.cnf.loss.scn_loss.get("name", "nn.CrossEntropyLoss") 112 | self.scn_loss = eval(scn_loss)(**self.cnf.loss.scn_loss.args) 113 | 114 | def forward(self, preds, targets): 115 | # type: (dict[torch.tensor, ...], dict[torch.tensor, ...]) -> torch.tensor 116 | 117 | # weather, scene, timeofday 118 | w_pred = preds["weather"] 119 | s_pred = preds["scene"] 120 | t_pred = preds["timeofday"] 121 | 122 | s1 = self.scn_loss(w_pred, targets["weather"]) 123 | s2 = self.scn_loss(s_pred, targets["scene"]) 124 | s3 = self.scn_loss(t_pred, targets["timeofday"]) 125 | 126 | scn_loss = (s1 + s2 + s3) * 0.1 127 | 128 | return scn_loss, {"scn": scn_loss.item()} 129 | -------------------------------------------------------------------------------- /inference/trt_inference/cerberus_trt.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import numpy as np 3 | import numba as nb 4 | 5 | from profiler import Profiler 6 | from .trt_infer import TRTModel, TRTInference 7 | 8 | def cerberus_model(model): 9 | 10 | class Cerberus(TRTModel): 11 | if model is None: 12 | ENGINE_PATH = Path(__file__).parent.parent.parent / 'weights' / 'last_sim.trt' 13 | MODEL_PATH = Path(__file__).parent.parent.parent / 'weights' / 'last_sim.onnx' 14 | else: 15 | MODEL_PATH = Path(model) 16 | ENGINE_PATH = Path(model).with_suffix('.trt') 17 | 18 | INPUT_SHAPE = (3, 320, 640) 19 | OUTPUT_LAYOUT = 1 20 | 21 | return Cerberus 22 | 23 | class CerberusInference: 24 | def __init__(self, model=None): 25 | 26 | self.model = cerberus_model(model) 27 | self.batch_size = 1 28 | 29 | self.backend = TRTInference(self.model, 1) 30 | self.inp_handle = self.backend.input.host.reshape(*self.model.INPUT_SHAPE) 31 | 32 | self.preds = [] 33 | 34 | def __call__(self, frame, raw=False): 35 | """Extract feature embeddings from bounding boxes synchronously.""" 36 | self.extract_async(frame) 37 | return self.postprocess(raw) 38 | 39 | def extract_async(self, frame): 40 | # pipeline inference and preprocessing the next batch in parallel 41 | self._preprocess(frame) 42 | self.backend.infer_async() 43 | 44 | def postprocess(self, raw=False): 45 | """Synchronizes, applies postprocessing, and returns a NxM matrix of N 46 | extracted embeddings with dimension M. 47 | This API should be called after `extract_async`. 48 | """ 49 | 50 | preds_out = self.backend.synchronize() 51 | 52 | if raw: 53 | return preds_out 54 | 55 | with Profiler('inference_decode'): 56 | ## Decode boxes 57 | d_offsets = preds_out[2].reshape(-1, 80, 160) 58 | d_heatmaps = preds_out[4].reshape(-1, 80, 160) 59 | d_occl = preds_out[3].reshape(-1, 80, 160) 60 | 61 | d_scores, d_indices, d_labels = self._decode_heatmap(d_heatmaps, th=0.6) 62 | d_occl = self._sigmoid(d_occl[0, d_indices[:, 1], d_indices[:, 0]]) 63 | 64 | bb_ofs = d_offsets[:, d_indices[:, 1], d_indices[:, 0]] 65 | x1x2 = (bb_ofs[:2] + d_indices[..., 0][np.newaxis, :]) * 4 66 | y1y2 = (bb_ofs[2:] + d_indices[..., 1][np.newaxis, :]) * 4 67 | boxes = np.stack([x1x2[0], y1y2[0], x1x2[1], y1y2[1], d_scores, d_labels, d_occl], axis=-1) 68 | 69 | # Decode lanes 70 | l_heatmaps = preds_out[1].reshape(-1, 80, 160) # 8 71 | l_offsets = preds_out[0].reshape(-1, 80, 160) # 2 72 | 73 | l_scores, l_indices, l_labels = self._decode_heatmap(l_heatmaps, th=0.6) 74 | 75 | l_votes = l_offsets[:, l_indices[:, 1], l_indices[:, 0]] * 4 76 | l_indices = l_indices * 4 77 | lanes = np.concatenate([l_indices.astype(np.float32), l_scores[..., np.newaxis]], axis=-1) 78 | 79 | # Decode classification results 80 | cls = tuple(preds_out[5:]) 81 | 82 | return boxes, (lanes, l_labels, l_votes), cls 83 | 84 | 85 | @staticmethod 86 | def _decode_heatmap(heatmap, th=0.6): 87 | labels = np.argmax(heatmap, axis=0) 88 | heatmap = np.take_along_axis(heatmap, labels[np.newaxis,], 0)[0] 89 | 90 | indices = np.stack(np.nonzero(heatmap > th), axis=-1)[:, ::-1] 91 | scores = heatmap[indices[:, 1], indices[:, 0]] 92 | labels = labels[indices[:, 1], indices[:, 0]] 93 | 94 | return scores, indices, labels 95 | 96 | def _preprocess(self, img): 97 | self._normalize(img, self.inp_handle) 98 | 99 | @staticmethod 100 | @nb.njit(fastmath=True, nogil=True, cache=True) 101 | def _normalize(img, out): 102 | # HWC -> CHW 103 | chw = img.transpose(2, 0, 1) 104 | # Normalize using ImageNet's mean and std 105 | out[0, ...] = (chw[0, ...] / 255. - 0.485) / 0.229 106 | out[1, ...] = (chw[1, ...] / 255. - 0.456) / 0.224 107 | out[2, ...] = (chw[2, ...] / 255. - 0.406) / 0.225 108 | @staticmethod 109 | @nb.njit(fastmath=True, nogil=True, cache=True) 110 | def _sigmoid(z): 111 | return 1 / (1 + np.exp(-z)) 112 | 113 | if __name__ == '__main__': 114 | cb = CerberusInference() 115 | 116 | mdt = 0 117 | for _ in range(100): 118 | src = np.random.rand(320, 640, 3) 119 | y = cb(src) 120 | dt = cb.backend.get_infer_time() 121 | mdt += dt 122 | print(mdt/100) 123 | -------------------------------------------------------------------------------- /data/bdd100k_lane_keypoints.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # --------------------- 3 | 4 | import os 5 | import cv2 6 | import click 7 | import json 8 | from pathlib import Path 9 | 10 | import torch 11 | from tqdm import tqdm 12 | 13 | import numpy as np 14 | import cv2 15 | 16 | from tools import * 17 | 18 | IMG_WIDTH = 1280 19 | IMG_HEIGHT = 720 20 | 21 | I_SCALE = 2 22 | O_SCALE = 8 23 | 24 | PPK = 25 # Number of pixels per keypoint 25 | 26 | 27 | @click.command() 28 | @click.option('--img_root', '-i', type=click.Path(exists=True), default=None, required=False) 29 | @click.option('--labels_root', '-l', type=click.Path(exists=True), default=None, required=True) 30 | @click.option('--display', '-d', type=bool, default=True, required=False) 31 | @click.option('--out', '-o', type=click.Path(), default=Path('.'), required=True) 32 | def main(img_root, labels_root, display, out): 33 | # type: (Path, Path, bool, Path) -> None 34 | 35 | if display: 36 | assert img_root is not None 37 | 38 | img_root = Path(img_root) 39 | labels_root = Path(labels_root) 40 | 41 | for split in ["val"]: #, "train"]: 42 | 43 | print(f"=> Processing split {split}") 44 | 45 | out_file = out / f"{split}_{PPK}_new.pt" 46 | 47 | # Load annotation file 48 | masks_path = labels_root / "masks"/ split 49 | 50 | polygons_file = labels_root / "polygons" / f"lane_{split}.json" 51 | polygons = json.load(open(polygons_file, "r")) 52 | 53 | for p_index, p_lane in enumerate(tqdm(polygons)): 54 | 55 | frame = None 56 | lanes = None 57 | 58 | # Broken annotation, skip.... 59 | if type(p_lane) == list: 60 | continue 61 | 62 | if display: 63 | 64 | # Load frame 65 | image_file = p_lane['name'] 66 | image_file = img_root / split / image_file 67 | frame = cv2.imread(str(image_file)) 68 | 69 | # Load mask 70 | mask_file = Path(p_lane['name']).stem + '.png' 71 | mask_file = masks_path / mask_file 72 | mask = cv2.imread(str(mask_file))#[..., 0] 73 | 74 | """ 75 | for p in mask[mask != 255]: 76 | d = (p & 32) >> 5 # direction (parallel or perpendicular) 77 | s = (p & 16) >> 4 # style (full or dashed) 78 | b = (p & 8) >> 3 # background (lane (0) or background (1)) 79 | c = (p & 7) # class (road curb, crosswalk, double white, double yellow, double other color, 80 | # single white, single yellow, single other color.) (8) 81 | """ 82 | lanes = 1-((mask & 8) >> 3) # direction (parallel or perpendicular) 83 | 84 | labels = p_lane.get('labels', None) 85 | if labels is None: 86 | continue 87 | 88 | for il, l in enumerate(labels): 89 | assert len(l["poly2d"]) == 1 90 | pts = l["poly2d"][0]['vertices'] 91 | 92 | # Define number of points according to length 93 | nppt = np.array(pts) 94 | tot_l = 0 95 | for ip, p in enumerate(nppt): 96 | if ip == len(pts) - 1: 97 | break 98 | 99 | l = np.linalg.norm(p-nppt[ip+1]) 100 | tot_l += l 101 | 102 | # Compute beizer cube curve 103 | xvals, yvals = bezier_curve(pts, nTimes=max(3, int(tot_l//PPK))) 104 | pt = np.stack([xvals, yvals], axis=-1) #.astype(np.int32) 105 | labels[il]['keypoints'] = pt#.tolist() 106 | 107 | # ---- Filter double lines ---- 108 | all_dist = [] 109 | for i1, l1 in enumerate(labels): 110 | 111 | i_dist = [] 112 | for i2, l2 in enumerate(labels): 113 | d = dist(l1, l2) 114 | i_dist.append(d) 115 | 116 | all_dist.append(i_dist) 117 | 118 | all_dist = np.array(all_dist) 119 | min_dist = np.argmin(all_dist, -1) 120 | 121 | pairs = [] 122 | for id, d in enumerate(min_dist): 123 | if min_dist[id] == d and min_dist[d] == id: 124 | if [d, id] not in pairs: 125 | pairs.append([id, d]) 126 | 127 | # Replace double lines with mean line 128 | for p in pairs: 129 | if all_dist[p[0], p[1]] < 80: 130 | 131 | # Compute mean line 132 | l1 = labels[p[0]] 133 | l2 = labels[p[1]] 134 | pt1, pt2 = compare_labels(l1, l2) 135 | pt3 = (pt1 + pt2) / 2 136 | 137 | # Fit new curve 138 | n = pt3.shape[0] 139 | x, y = pt3[:, 0], pt3[:, 1] 140 | 141 | if n > 3: 142 | v = get_bezier_parameters(x, y) 143 | xvals, yvals = bezier_curve(v, nTimes=n) 144 | pt3 = np.stack([xvals, yvals], axis=-1) 145 | 146 | # Update 147 | labels[p[0]]["keypoints"] = pt3 148 | labels[p[1]] = None 149 | 150 | # plot 151 | if display: 152 | for l in labels: 153 | if l is None: 154 | continue 155 | 156 | pt = l["keypoints"] 157 | pt = np.array(pt).astype(np.int32) 158 | for c in pt: 159 | frame = cv2.circle(frame, (c[0], c[1]), 3, (0,255,0), thickness=3) 160 | 161 | # Append 162 | polygons[p_index]['labels'] = labels 163 | 164 | # Display result 165 | if display: 166 | cv2.imshow("frame", frame) 167 | cv2.imshow("lanes", lanes * 255) 168 | 169 | while cv2.waitKey(1) != ord('q'): 170 | pass 171 | 172 | # Save 173 | #torch.save(polygons, out_file) 174 | 175 | 176 | if __name__ == '__main__': 177 | main() 178 | """ 179 | -i /home/carmelo/DATASETS/BDD100K/bdd100k_images/images/100k 180 | -l /home/carmelo/DATASETS/BDD100K/bdd100k_lanes/labels/lane 181 | """ 182 | -------------------------------------------------------------------------------- /models/losses/heatmap_loss.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # --------------------- 3 | 4 | import torch 5 | from torch import nn 6 | import torch.nn.functional as F 7 | 8 | __all__ = [ 9 | 'CornerNetFocalLoss', 'QualityFocalLoss', 'AdaptiveWingLoss', 'WMSELoss' 10 | ] 11 | 12 | # reference: https://github.com/open-mmlab/mmdetection/blob/master/mmdet/models/losses/gaussian_focal_loss.py 13 | # https://github.com/gau-nernst/centernet-lightning/blob/9fa4571904f1d68703f1cf4fa6e93e3c53d2971f/centernet_lightning/losses/heatmap_losses.py 14 | class CornerNetFocalLoss(nn.Module): 15 | """CornerNet Focal Loss. Use logits to improve numerical stability. CornerNet: https://arxiv.org/abs/1808.01244 16 | """ 17 | 18 | # reference implementations 19 | # https://github.com/open-mmlab/mmdetection/blob/master/mmdet/models/losses/gaussian_focal_loss.py 20 | def __init__(self, alpha: float = 2, beta: float = 2, reduction: str = "mean"): 21 | """CornerNet Focal Loss. Default values from the paper 22 | 23 | Args: 24 | alpha: control the modulating factor to reduce the impact of easy examples. This is gamma in the original Focal loss 25 | beta: control the additional weight for negative examples when y is between 0 and 1 26 | reduction: either none, sum, or mean 27 | """ 28 | super().__init__() 29 | assert reduction in ("none", "sum", "mean") 30 | self.alpha = alpha 31 | self.beta = beta 32 | self.reduction = reduction 33 | 34 | def forward(self, inputs: torch.Tensor, targets: torch.Tensor): 35 | 36 | pos_inds = targets.eq(1).float() 37 | neg_inds = targets.lt(1).float() 38 | 39 | neg_weights = torch.pow(1 - targets, 4) 40 | # clamp min value is set to 1e-12 to maintain the numerical stability 41 | pred = torch.clamp(inputs, 1e-12) 42 | 43 | pos_loss = torch.log(pred) * torch.pow(1 - pred, 2) * pos_inds 44 | neg_loss = torch.log(1 - pred) * torch.pow(pred, 2) * neg_weights * neg_inds 45 | 46 | num_pos = pos_inds.float().sum() 47 | pos_loss = pos_loss.sum() 48 | neg_loss = neg_loss.sum() 49 | 50 | if num_pos == 0: 51 | loss = -neg_loss 52 | else: 53 | loss = -(pos_loss + neg_loss) / num_pos 54 | 55 | return loss 56 | 57 | class WMSELoss(nn.Module): 58 | 59 | def __init__(self, alpha: float=4, beta: float = 2, reduction: str = 'mean'): 60 | 61 | super().__init__() 62 | assert reduction in ('none', 'sum', 'mean') 63 | self.alpha = alpha 64 | self.beta = beta 65 | self.reduction = reduction 66 | 67 | def forward(self, inputs: torch.Tensor, targets: torch.Tensor): 68 | 69 | mse = F.mse_loss(inputs, targets, reduction='none') 70 | mf_t = (torch.pow(1 + targets, self.alpha)) 71 | mf_p = (torch.pow(1 + inputs.detach(), self.beta)) 72 | modulating_factor = torch.maximum(mf_t, mf_p) 73 | # modulating_factor = torch.pow(1 + torch.abs(targets.detach() - inputs), self.beta) 74 | 75 | loss = modulating_factor * mse 76 | if self.reduction == 'none': 77 | return loss 78 | 79 | bs = loss.shape[0] 80 | loss = torch.sum(loss) 81 | if self.reduction == 'mean': 82 | loss = loss / (1 + targets.gt(0.96).sum().float()) 83 | loss = loss / bs 84 | 85 | return loss 86 | 87 | 88 | class QualityFocalLoss(nn.Module): 89 | """Quality Focal Loss. Generalized Focal Loss: https://arxiv.org/abs/2006.04388 90 | """ 91 | 92 | def __init__(self, beta: float = 2, reduction: str = "mean"): 93 | """Quality Focal Loss. Default values are from the paper 94 | 95 | Args: 96 | beta: control the scaling/modulating factor to reduce the impact of easy examples 97 | reduction: either none, sum, or mean 98 | """ 99 | super().__init__() 100 | assert reduction in ("none", "sum", "mean") 101 | self.beta = beta 102 | self.reduction = reduction 103 | 104 | def forward(self, inputs: torch.Tensor, targets: torch.Tensor): 105 | probs = torch.sigmoid(inputs) 106 | 107 | ce_loss = F.binary_cross_entropy_with_logits(inputs, targets, reduction="none") 108 | modulating_factor = torch.abs(targets - probs) ** self.beta 109 | 110 | loss = modulating_factor * ce_loss 111 | 112 | if self.reduction == "sum": 113 | return torch.sum(loss) 114 | 115 | if self.reduction == "mean": 116 | return torch.sum(loss) / targets.eq(1).float().sum() 117 | 118 | return loss 119 | 120 | # torch.log and math.log is e based 121 | # https://github.com/elliottzheng/AdaptiveWingLoss/blob/master/adaptive_wing_loss.py 122 | class AdaptiveWingLoss(nn.Module): 123 | def __init__(self, omega=14, theta=0.5, epsilon=1, alpha=2.1): 124 | super(AdaptiveWingLoss, self).__init__() 125 | self.omega = omega 126 | self.theta = theta 127 | self.epsilon = epsilon 128 | self.alpha = alpha 129 | 130 | def forward(self, inputs: torch.Tensor, targets: torch.Tensor): 131 | ''' 132 | :param pred: BxNxHxH 133 | :param target: BxNxHxH 134 | :return: 135 | ''' 136 | 137 | y = targets 138 | y_hat = inputs 139 | delta_y = (y - y_hat).abs() 140 | delta_y1 = delta_y[delta_y < self.theta] 141 | delta_y2 = delta_y[delta_y >= self.theta] 142 | y1 = y[delta_y < self.theta] 143 | y2 = y[delta_y >= self.theta] 144 | loss1 = self.omega * torch.log(1 + torch.pow(delta_y1 / self.omega, self.alpha - y1)) 145 | A = self.omega * (1 / (1 + torch.pow(self.theta / self.epsilon, self.alpha - y2))) * (self.alpha - y2) * ( 146 | torch.pow(self.theta / self.epsilon, self.alpha - y2 - 1)) * (1 / self.epsilon) 147 | C = self.theta * A - self.omega * torch.log(1 + torch.pow(self.theta / self.epsilon, self.alpha - y2)) 148 | loss2 = A * delta_y2 - C 149 | return (loss1.sum() + loss2.sum()) / (len(loss1) + len(loss2)) 150 | -------------------------------------------------------------------------------- /inference/run_tensorrt.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # --------------------- 3 | 4 | import click 5 | 6 | import cv2 7 | import numpy as np 8 | import logging 9 | from time import time, sleep 10 | 11 | from trt_inference.cerberus_trt import CerberusInference 12 | from trt_inference.cls import WTR_CLS, SN_CLS, TD_CLS, DET_CLS_IND 13 | from postproc import get_clusters, fast_clustering 14 | 15 | from profiler import Profiler 16 | 17 | logging.basicConfig(format='%(asctime)s [%(levelname)8s] %(message)s', datefmt='%Y-%m-%d %H:%M:%S') 18 | LOGGER = logging.getLogger(__name__) 19 | LOGGER.setLevel(logging.DEBUG) 20 | Profiler.set_warmup(25) 21 | 22 | cls_col = [(153, 255, 102), (255, 255, 255), (0, 255, 255), (52, 255, 52), (51, 153, 51), 23 | (0, 255, 0), (153, 51, 51), (0, 0, 255), (255, 0, 0)] 24 | 25 | lancol = [(0, 255, 255), (255, 255, 255), (255, 150, 50), (0, 0, 255), 26 | (102, 0, 102), (10, 255, 0), (255, 255, 0), (0, 153, 255)] 27 | 28 | @click.command() 29 | @click.option('--model_file', '-m', type=click.Path(exists=True), default=None, required=False) 30 | @click.option('--video', '-v', type=click.Path(exists=True), default=None, required=True) 31 | @click.option('--max_frames', '-f', type=int, default=None, required=False) 32 | @click.option('--infer_only', '-i', type=click.BOOL, default=False, required=False) 33 | def main(model_file, video, max_frames, infer_only): 34 | 35 | # load video 36 | # video = "../videos/dashcam_demo.mp4" 37 | cap = cv2.VideoCapture(video) 38 | cap.set(cv2.CAP_PROP_POS_FRAMES, 150 * 30) 39 | 40 | """# writer 41 | fourcc = cv2.VideoWriter_fourcc(*"MJPG") 42 | writer = cv2.VideoWriter('../videos/result_trt.avi', fourcc, 80, (640, 320))""" 43 | 44 | # Classes 45 | wtr = {v: k for k, v in WTR_CLS.items()} 46 | scn = {v: k for k, v in SN_CLS.items()} 47 | td = {v: k for k, v in TD_CLS.items()} 48 | 49 | model = CerberusInference(model_file) 50 | 51 | times = [] 52 | infer_times = [] 53 | frames = 0 54 | while cap.isOpened(): 55 | t = time() 56 | 57 | with Profiler('acquire'): 58 | _, frame = cap.read() 59 | frame = cv2.resize(frame, (640, 360)) 60 | frame = frame[20:340, :, :] 61 | 62 | with Profiler('inference_all'): 63 | preds = model(frame, raw=infer_only) 64 | 65 | it = model.backend.get_infer_time() 66 | infer_times.append(it) 67 | 68 | if not infer_only: 69 | 70 | det_out, lane_out, scn_out = preds 71 | boxes = det_out 72 | lanes, lanes_cls, lanes_votes = lane_out 73 | 74 | # Classification results 75 | w_cls = wtr[scn_out[0].item()] 76 | s_cls = scn[scn_out[1].item()] 77 | td_cls = td[scn_out[2].item()] 78 | 79 | # Lane clustering 80 | with Profiler('lane_clustering'): 81 | lane_clusters = fast_clustering(lanes, lanes_cls, lanes_votes) 82 | 83 | # Draw keypoints 84 | with Profiler('lane_drawing'): 85 | for cla, cls_clusters in enumerate(lane_clusters): 86 | for cl in cls_clusters: 87 | 88 | col = lancol[cla] 89 | if cl.shape[0] < 5: 90 | continue 91 | 92 | x = cl[:, 0] 93 | y = cl[:, 1] 94 | 95 | # calculate polynomial 96 | try: 97 | z = np.polyfit(x, y, 2) 98 | f = np.poly1d(z) 99 | except ValueError: 100 | continue 101 | 102 | # calculate new x's and y's 103 | x_new = np.linspace(min(x), max(x), len(x) * 2) 104 | y_new = f(x_new) 105 | 106 | for cx, cy in zip(x_new, y_new): 107 | frame = cv2.circle(frame, (int(cx), int(cy)), 1, col, thickness=2, ) 108 | 109 | # Draw boxes 110 | with Profiler('det_drawing'): 111 | for b in boxes: 112 | cls = DET_CLS_IND[int(b[5])].split(" ")[-1] 113 | tl = (int(b[2]), int(b[3])) 114 | br = (int(b[0]), int(b[1])) 115 | 116 | color = (0, 255, 0) if b[6] < 0.5 else (0,0,255) 117 | cv2.rectangle(frame, tl, br, color, 2) 118 | 119 | (text_width, text_height), _ = cv2.getTextSize(cls, cv2.FONT_HERSHEY_DUPLEX, 0.3, 1) 120 | cv2.rectangle(frame, br, (br[0] + text_width - 1, br[1] + text_height - 1), 121 | color, cv2.FILLED) 122 | cv2.putText(frame, cls, (br[0], br[1] + text_height - 1), cv2.FONT_HERSHEY_DUPLEX, 123 | 0.3, 0, 1, cv2.LINE_AA) 124 | 125 | # Add text 126 | with Profiler('cls_drawing'): 127 | text = f"WEATHER: {w_cls} SCENE: {s_cls} DAYTIME: {td_cls}" 128 | frame = cv2.rectangle(frame, (10, 5), (550, 25), (0, 0, 0), -1) 129 | frame = cv2.putText(frame, text, (15, 20), cv2.FONT_HERSHEY_DUPLEX, 0.5, 130 | (255,255,255), 1, cv2.LINE_AA, False) 131 | 132 | # writer.write(frame) 133 | cv2.imshow("result", frame) 134 | 135 | dt = time() - t 136 | times.append(dt) 137 | frames +=1 138 | 139 | if cv2.waitKey(1) == ord('q') or (frames == max_frames): 140 | print('=================Timing Stats=================') 141 | print(f"{'Frame Acquiring:':<37}{Profiler.get_avg_millis('acquire'):>6.3f} ms") 142 | print(f"{'Inference total:':<37}{Profiler.get_avg_millis('inference_all'):>6.3f} ms") 143 | print(f"\t{'Inference DNN:':<37}{np.array(infer_times[10:]).mean():>6.3f} ms") 144 | print(f"\t{'Inference Decoding:':<37}{Profiler.get_avg_millis('inference_decode'):>6.3f} ms") 145 | print('----------------------------------------------') 146 | print(f"{'Lanes clustering:':<37}{Profiler.get_avg_millis('lane_clustering'):>6.3f} ms") 147 | print(f"{'Lanes Fitting and Drawing:':<37}{Profiler.get_avg_millis('lane_drawing'):>6.3f} ms") 148 | print(f"{'Detection Drawing:':<37}{Profiler.get_avg_millis('det_drawing'):>6.3f} ms") 149 | print(f"{'Cls Drawing:':<37}{Profiler.get_avg_millis('cls_drawing'):>6.3f} ms") 150 | print(f"{'AVERAGE TIME:':<37}{np.array(times[10:]).mean()*1000:>6.3f} ms") 151 | break 152 | 153 | cap.release() 154 | # writer.release() 155 | 156 | 157 | if __name__ == '__main__': 158 | main() 159 | -------------------------------------------------------------------------------- /trainer.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # --------------------- 3 | 4 | from conf import Conf 5 | 6 | import torch 7 | import numpy as np 8 | import cv2 9 | 10 | import pytorch_lightning as pl 11 | import torchvision as tv 12 | from torch.utils.data import DataLoader 13 | from pytorch_lightning import loggers as pl_loggers 14 | 15 | from torch.optim import * 16 | from utils.lr_scheduler import * 17 | 18 | from models import CerberusModel 19 | from models.losses import MultiTaskLoss 20 | from dataset import MultitaskDataset, ignore_collate 21 | 22 | 23 | class PL_trainable(pl.LightningModule): 24 | def __init__(self, cnf): 25 | super().__init__() 26 | 27 | self.cnf = cnf 28 | self.backbone = CerberusModel(cnf) 29 | self.criterion = MultiTaskLoss(cnf) 30 | 31 | self.plot_images = 10 32 | 33 | def forward(self, img): 34 | pred = self.backbone(img) 35 | return pred 36 | 37 | def training_step(self, batch, batch_idx): 38 | 39 | img, targets = batch 40 | preds = self.forward(img) 41 | 42 | # Loss 43 | loss, loss_detail = self.criterion(preds, targets) 44 | 45 | # single scheduler 46 | sch = self.lr_schedulers() 47 | sch.step() 48 | 49 | lr = sch.get_last_lr()[0] 50 | self.log('train_loss', loss, on_step=True, on_epoch=False) 51 | self.log('lr', lr, on_step=True, on_epoch=False) 52 | for k, v in loss_detail.items(): 53 | self.log(f'train_{k}_loss', v, on_step=True, on_epoch=False) 54 | 55 | return loss 56 | 57 | def validation_step(self, batch, batch_idx): 58 | # Inference 59 | img, targets = batch 60 | preds = self.forward(img) 61 | 62 | # Loss 63 | loss, loss_detail = self.criterion(preds, targets) 64 | 65 | # plot 66 | if self.plot_images > 0: 67 | true, pred = [], [] 68 | 69 | if self.cnf.base.get("object_det", False): 70 | true.append(targets["obj_det"]["heatmaps"]) 71 | pred.append(preds["obj_det"]["heatmaps"]) 72 | 73 | if self.cnf.base.get("lane_det", False): 74 | true.append(targets["lane_det"]["heatmaps"]) 75 | pred.append(preds["lane_det"]["heatmaps"]) 76 | 77 | true = torch.cat(true, dim=1) 78 | pred = torch.cat(pred, dim=1) 79 | img_resize = torch_input_img(img[0].cpu().detach()) 80 | hm_true = torch_heatmap_img(true[0].cpu().detach()) 81 | hm_pred = torch_heatmap_img(pred[0].cpu().detach()) 82 | grid = torch.stack([img_resize, hm_true, hm_pred], dim=0) 83 | 84 | grid = tv.utils.make_grid(grid.float()) 85 | self.logger.experiment.add_image(tag=f'results_{self.plot_images}', 86 | img_tensor=grid, global_step=self.global_step) 87 | self.plot_images -= 1 88 | 89 | # Log 90 | self.log('val_loss', loss, on_step=False, on_epoch=True) 91 | 92 | for k, v in loss_detail.items(): 93 | self.log(f'val_{k}_loss', v, on_step=False, on_epoch=True) 94 | 95 | return loss 96 | 97 | 98 | def test_step(self, batch, batch_idx): 99 | pass 100 | 101 | def validation_epoch_end(self, outputs) -> None: 102 | self.plot_images = 10 103 | 104 | def configure_optimizers(self): 105 | optimizer = eval(self.cnf.optimizer.name)(self.parameters(), **self.cnf.optimizer.args) 106 | 107 | if self.cnf.lr_scheduler.get("name", None) is not None: 108 | scheduler = eval(self.cnf.lr_scheduler.name)(optimizer, **self.cnf.lr_scheduler.args) 109 | return [optimizer], [scheduler] 110 | return [optimizer] 111 | 112 | def on_validation_epoch_end(self): 113 | torch.save(self.backbone.state_dict(), f'{self.cnf.exp_log_path}/last.pth') 114 | 115 | 116 | def torch_heatmap_img(heatmap): 117 | hm_show, _ = torch.max(heatmap, dim=0) 118 | hm_show = hm_show.numpy() * 255 119 | hm_show = hm_show.astype(np.uint8) 120 | hm_show = cv2.applyColorMap(hm_show, cv2.COLORMAP_JET) 121 | hm_show = cv2.cvtColor(hm_show, cv2.COLOR_BGR2RGB) 122 | hm_show = cv2.resize(hm_show, (640, 480)) / 255 123 | 124 | return torch.from_numpy(hm_show).permute(2, 0, 1) 125 | 126 | 127 | def torch_input_img(img): 128 | invTrans = tv.transforms.Compose([tv.transforms.Normalize(mean=[0., 0., 0.], 129 | std=[1 / 0.229, 1 / 0.224, 1 / 0.225]), 130 | tv.transforms.Normalize(mean=[-0.485, -0.456, -0.406], 131 | std=[1., 1., 1.]), 132 | ]) 133 | img = invTrans(img) 134 | img = tv.transforms.Resize((480, 640))(img) 135 | return img 136 | 137 | def trainer_run(cnf): 138 | # type: (Conf) -> None 139 | 140 | # ------------ 141 | # data 142 | # ------------ 143 | trainset = MultitaskDataset(cnf, mode="train") 144 | valset = MultitaskDataset(cnf, mode="val") 145 | 146 | collate_fn = ignore_collate(["centers", "offsets", "keypoints", "occlusion", "quant_offsets"]) 147 | train_loader = DataLoader(trainset, collate_fn=collate_fn, **cnf.dataset.train_dataset.loader_args) 148 | val_loader = DataLoader(valset, collate_fn=collate_fn, **cnf.dataset.val_dataset.loader_args) 149 | 150 | # ------------ 151 | # model 152 | # ------------ 153 | model = PL_trainable(cnf) 154 | 155 | # ------------ 156 | # training 157 | # ------------ 158 | gpus = [0] 159 | tb_logger = pl_loggers.TensorBoardLogger(save_dir=cnf.exp_log_path, name="", version="") 160 | 161 | trainer = pl.Trainer(default_root_dir=cnf.exp_log_path, logger=tb_logger, 162 | max_epochs=cnf.epochs, gpus=gpus) 163 | trainer.fit(model, train_loader, val_loader) 164 | 165 | -------------------------------------------------------------------------------- /models/layers.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | from torch import nn 5 | import torch.nn.functional as F 6 | 7 | 8 | def ConvReluConv(in_channel, conv_channels, out_channel, bias_fill=False, bias_value=0.0): 9 | """ Userful for Head output""" 10 | feat_conv = nn.Conv2d(in_channel, conv_channels, kernel_size=3, padding=1, bias=True) 11 | relu = nn.ReLU() 12 | out_conv = nn.Conv2d(conv_channels, out_channel, kernel_size=1, stride=1, padding=0) 13 | if bias_fill: 14 | out_conv.bias.data.fill_(bias_value) 15 | 16 | return nn.Sequential(feat_conv, relu, out_conv) 17 | 18 | def make_conv(in_channels, out_channels, conv_type="normal", kernel_size=3, padding=None, stride=1, 19 | depth_multiplier=1, **kwargs): 20 | """Create a convolution layer. Options: deformable, separable, or normal convolution 21 | """ 22 | assert conv_type in ("separable", "normal") 23 | if padding is None: 24 | padding = (kernel_size - 1) // 2 25 | 26 | if conv_type == "separable": 27 | hidden_channels = in_channels * depth_multiplier 28 | conv_layer = nn.Sequential( 29 | # dw 30 | nn.Conv2d(in_channels, hidden_channels, kernel_size, padding=padding, stride=stride, 31 | groups=in_channels, bias=False), 32 | nn.BatchNorm2d(in_channels), 33 | nn.ReLU6(inplace=True), 34 | # pw 35 | nn.Conv2d(hidden_channels, out_channels, 1, bias=False, stride=stride), 36 | nn.BatchNorm2d(out_channels), 37 | nn.ReLU6(inplace=True) 38 | ) 39 | nn.init.kaiming_normal_(conv_layer[0].weight, mode="fan_out", nonlinearity="relu") 40 | nn.init.kaiming_normal_(conv_layer[3].weight, mode="fan_out", nonlinearity="relu") 41 | 42 | else: # normal convolution 43 | conv_layer = nn.Sequential( 44 | nn.Conv2d(in_channels, out_channels, kernel_size, padding=padding, stride=stride, bias=False), 45 | nn.BatchNorm2d(out_channels), 46 | nn.ReLU(inplace=True) 47 | ) 48 | nn.init.kaiming_normal_(conv_layer[0].weight, mode="fan_out", nonlinearity="relu") 49 | 50 | return conv_layer 51 | 52 | 53 | def make_upsample(upsample_type="nearest", deconv_channels=None, deconv_kernel=4, deconv_init_bilinear=True, **kwargs): 54 | """Create an upsample layer. Options: convolution transpose, bilinear upsampling, or nearest upsampling 55 | """ 56 | assert upsample_type in ("conv_transpose", "bilinear", "nearest") 57 | 58 | if upsample_type == "conv_transpose": 59 | output_padding = deconv_kernel % 2 60 | padding = (deconv_kernel + output_padding) // 2 - 1 61 | 62 | upsample = nn.ConvTranspose2d(deconv_channels, deconv_channels, deconv_kernel, stride=2, padding=padding, 63 | output_padding=output_padding, bias=False) 64 | bn = nn.BatchNorm2d(deconv_channels) 65 | relu = nn.ReLU(inplace=True) 66 | upsample_layer = nn.Sequential(upsample, bn, relu) 67 | 68 | if deconv_init_bilinear: # TF CenterNet does not do this 69 | _init_bilinear_upsampling(upsample) 70 | 71 | else: 72 | upsample_layer = nn.Upsample(scale_factor=2, mode=upsample_type) 73 | 74 | return upsample_layer 75 | 76 | 77 | def _init_bilinear_upsampling(deconv_layer): 78 | """Initialize convolution transpose layer as bilinear upsampling to help with training stability 79 | """ 80 | # https://github.com/ucbdrive/dla/blob/master/dla_up.py#L26-L33 81 | w = deconv_layer.weight.data 82 | f = math.ceil(w.size(2) / 2) 83 | c = (2 * f - 1 - f % 2) / (f * 2.) 84 | 85 | for i in range(w.size(2)): 86 | for j in range(w.size(3)): 87 | w[0, 0, i, j] = (1 - math.fabs(i / f - c)) * (1 - math.fabs(j / f - c)) 88 | 89 | for c in range(1, w.size(0)): 90 | w[c, 0, :, :] = w[0, 0, :, :] 91 | 92 | 93 | def make_downsample(downsample_type="max", conv_channels=None, conv_kernel=3, **kwargs): 94 | """Create a downsample layer. Options: convolution, max pooling, or average pooling 95 | """ 96 | assert downsample_type in ("max", "average", "conv") 97 | 98 | if downsample_type == "conv": 99 | downsample = nn.Conv2d(conv_channels, conv_channels, conv_kernel, stride=2, padding="same", bias=False) 100 | bn = nn.BatchNorm2d(conv_channels) 101 | relu = nn.ReLU(inplace=True) 102 | downsample_layer = nn.Sequential(downsample, bn, relu) 103 | 104 | nn.init.kaiming_normal_(downsample.weight, mode="fan_out", nonlinearity="relu") 105 | 106 | elif downsample_type == "max": 107 | downsample_layer = nn.MaxPool2d(2, 2) 108 | else: 109 | downsample_layer = nn.AvgPool2d(2, 2) 110 | 111 | return downsample_layer 112 | 113 | 114 | class Fuse(nn.Module): 115 | """Fusion node to be used for feature fusion. To be used in `BiFPNNeck` and `IDANeck`. The last input will be resized. 116 | 117 | Formula 118 | no weight: out = conv(in1 + resize(in2)) 119 | weighted: out = conv((in1*w1 + resize(in2)*w2) / (w1 + w2 + eps)) 120 | """ 121 | 122 | def __init__(self, num_fused, out, resize, upsample="nearest", downsample="max", conv_type="normal", 123 | weighted_fusion=False): 124 | super().__init__() 125 | assert resize in ("up", "down") 126 | assert num_fused >= 2 127 | 128 | self.weighted_fusion = weighted_fusion 129 | self.num_fused = num_fused 130 | if weighted_fusion: 131 | self.weights = nn.Parameter(torch.ones(num_fused), requires_grad=True) 132 | 133 | if resize == "up": 134 | self.resize = make_upsample(upsample_type=upsample, deconv_channels=out) 135 | else: 136 | self.resize = make_downsample(downsample=downsample, conv_channels=out) 137 | 138 | self.output_conv = make_conv(out, out, conv_type=conv_type) 139 | 140 | def forward(self, *features, eps=1e-6): 141 | 142 | last = self.resize(features[-1]) 143 | 144 | if self.weighted_fusion: 145 | weights = F.relu(self.weights) 146 | weights = weights / (torch.sum(weights) + eps) 147 | out = features[0] * weights[0] 148 | for i in range(1, self.num_fused-1): 149 | out = out + (features[i] * weights[i]) 150 | out = out + (last * weights[-1]) 151 | else: 152 | out = features[0] 153 | for i in range(1, self.num_fused-1): 154 | out = out + features[i] 155 | out = out + last 156 | 157 | out = self.output_conv(out) 158 | return out 159 | 160 | 161 | 162 | 163 | -------------------------------------------------------------------------------- /models/cerberus.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # --------------------- 3 | 4 | import torch 5 | from torch import nn 6 | 7 | from models.backbones.resnet import * 8 | from models.backbones.efficientnet import * 9 | from models.backbones.mobilenetv2 import * 10 | 11 | from models.necks import SimpleNeck, BiFPNNeck 12 | from models.heads import ScnHead, ObjectHead, LaneHead 13 | from utils.decoding import kp_from_heatmap 14 | 15 | from conf import Conf 16 | 17 | class CerberusModel(nn.Module): 18 | def __init__(self, cnf): 19 | # type: (Conf) -> None 20 | super(CerberusModel, self).__init__() 21 | self.cnf = cnf 22 | 23 | # Configuration 24 | self.lane_det = cnf.base.get("lane_det", True) 25 | self.obj_det = cnf.base.get("object_det", True) 26 | self.scene_cls = cnf.base.get("scene_cls", True) 27 | self.obj_occl = cnf.base.get("occlusion_cls", True) 28 | 29 | self.det_classes = cnf.base.get("det_classes", 10) 30 | self.lane_classes = cnf.base.get("lane_classes", 8) 31 | scn_classes = cnf.base.get("scn_classes", {}) 32 | self.scn_classes = [v for v in scn_classes.values()] 33 | 34 | # Backbone 35 | assert self.lane_det or self.obj_det, "At least one task must be enabled!" 36 | self.backbone = eval(cnf.model.backbone.name)(**cnf.model.backbone.args) 37 | 38 | # Neck 39 | self.neck = eval(cnf.model.neck.name)(self.backbone.outplanes, **cnf.model.neck.args) 40 | 41 | # LANE DETECTION HEAD 42 | if self.lane_det: 43 | self.lane_q_offsets = cnf.base.get("lane_q_offsets", False) 44 | self.head_lane = LaneHead(num_classes=self.lane_classes, in_channels=self.neck.out_channels, 45 | conv_channels=cnf.model.head_channel, quant_offsets=self.lane_q_offsets) 46 | 47 | # OBJECT DETECTION HEAD 48 | if self.obj_det: 49 | self.head_obj = ObjectHead(num_classes=self.det_classes, 50 | in_channels=self.neck.out_channels, conv_channels=cnf.model.head_channel) 51 | 52 | # SCENE CLASSIFICATION HEAD 53 | if self.scene_cls: 54 | self.head_scn = ScnHead(in_channels=self.neck.out_channels, 55 | classes=self.scn_classes) 56 | 57 | def forward(self, x, inference=False): 58 | # type: (torch.tensor, bool) -> dict[str, torch.Tensor, ...] 59 | 60 | # Features 61 | feats = self.backbone(x) 62 | 63 | # Upsample 64 | big, small = self.neck(feats) 65 | 66 | # Output 67 | outputs = {} 68 | 69 | if self.lane_det: 70 | lane_out = self.head_lane(big, nms=inference) 71 | outputs["lane_det"] = lane_out 72 | 73 | if self.obj_det: 74 | obj_out = self.head_obj(big, nms=inference) 75 | outputs["obj_det"] = obj_out 76 | 77 | if self.scene_cls: 78 | scn_out = self.head_scn(small, argmax=inference) 79 | outputs["scn_cls"] = scn_out 80 | 81 | return outputs 82 | 83 | def inference(self, x, benchmarking=False): 84 | 85 | assert x.shape[0] == 1, "Only BS=1 is supported!" 86 | 87 | # inference 88 | predictions = self.forward(x, inference=True) 89 | 90 | if benchmarking: 91 | return predictions 92 | 93 | # ------------------ 94 | # Lane decoding 95 | # ------------------ 96 | if self.lane_det: 97 | lane_preds = predictions["lane_det"] 98 | hm_lane, ofs_lane, = lane_preds["heatmaps"], lane_preds["offsets"] 99 | 100 | l_scores, l_indices, l_labels = kp_from_heatmap(hm_lane, th=0.6, pseudo_nms=False) 101 | l_votes = ofs_lane[0, :, l_indices[:, 1], l_indices[:, 0]] * 4 102 | 103 | if self.lane_q_offsets: 104 | quant_ofs = lane_preds["quant"] 105 | quant_ofs = quant_ofs[0, :, l_indices[:, 1], l_indices[:, 0]] 106 | l_indices = l_indices.float() 107 | l_indices[:, 1] += quant_ofs[1] 108 | l_indices[:, 0] += quant_ofs[0] 109 | 110 | l_indices = l_indices * 4 111 | lanes = torch.cat([l_indices.float(), l_scores.unsqueeze(-1)], dim=-1) 112 | 113 | lane_pred = { 114 | "lanes": lanes, 115 | "lanes_labels": l_labels, 116 | "lanes_votes": l_votes 117 | } 118 | 119 | predictions["lane_det"]["decoded"] = lane_pred 120 | 121 | # ------------------ 122 | # Boxes decoding 123 | # ------------------ 124 | if self.obj_det: 125 | det_preds = predictions["obj_det"] 126 | hm_det, ofs_det, occlu_det = det_preds["heatmaps"], det_preds["offsets"], det_preds["occlusion"] 127 | d_scores, d_indices, d_labels = kp_from_heatmap(hm_det, th=0.6, pseudo_nms=False) 128 | 129 | bb_ofs = ofs_det[0, :, d_indices[:, 1], d_indices[:, 0]] 130 | x1x2 = (bb_ofs[:2] + d_indices[..., 0].unsqueeze(0)) * 4 131 | y1y2 = (bb_ofs[2:] + d_indices[..., 1].unsqueeze(0)) * 4 132 | 133 | # better safe than sorry 134 | x1x2 = torch.clip(x1x2, 0, 640) 135 | y1y2 = torch.clip(y1y2, 0, 320) 136 | 137 | boxes = torch.stack([x1x2[0], y1y2[0], x1x2[1], y1y2[1], d_scores], dim=-1) 138 | 139 | det_pred = { 140 | "boxes": boxes, 141 | "labels": d_labels 142 | } 143 | 144 | if self.obj_occl: 145 | occl = occlu_det[0, 0, d_indices[:, 1], d_indices[:, 0]].sigmoid() 146 | det_pred["occlusion"] = occl 147 | 148 | predictions["obj_det"]["decoded"] = det_pred 149 | 150 | return predictions 151 | 152 | if __name__ == '__main__': 153 | from torchinfo import summary 154 | 155 | cnf = Conf(exp_name='mobilenetv2_bifpn', log=False) 156 | model = CerberusModel(cnf).cuda() 157 | summary(model, input_size=(1, 3, 640,320), depth=5) 158 | 159 | x = torch.rand((1,3,320,640), dtype=torch.float32).cuda() 160 | y = model(x) 161 | -------------------------------------------------------------------------------- /models/necks.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | import torch.nn.functional as F 4 | 5 | from .layers import Fuse, make_conv, make_upsample 6 | 7 | class SimpleNeck(nn.Module): 8 | """(conv + upsample) a few times (first proposed in PoseNet https://arxiv.org/abs/1804.06208) 9 | 10 | Equations 11 | stride 16: out_4 = up(conv(in_5)) 12 | stride 8: out_3 = up(conv(out_4)) 13 | stride 4: out_2 = up(conv(out_3)) 14 | """ 15 | 16 | def __init__(self, backbone_channels, upsample_channels=[256, 128, 64], conv_type="normal", 17 | upsample_type="conv_transpose", **kwargs): 18 | super().__init__() 19 | layers = [] 20 | 21 | # first (conv + upsample) from backbone 22 | self.conv_layer = make_conv(backbone_channels[-1], upsample_channels[0], conv_type=conv_type) 23 | 24 | up_layer = make_upsample(upsample_type, upsample_channels[0], **kwargs) 25 | layers.append(up_layer) 26 | 27 | for i in range(1, len(upsample_channels)): 28 | conv_layer = make_conv(upsample_channels[i - 1], upsample_channels[i], conv_type=conv_type) 29 | up_layer = make_upsample(upsample_type, upsample_channels[i], deconv_init_bilinear=True, **kwargs) 30 | layers.append(conv_layer) 31 | layers.append(up_layer) 32 | 33 | self.upsample = nn.Sequential(*layers) 34 | self.out_channels = upsample_channels[-1] 35 | self.upsample_stride = 2 ** len(upsample_channels) 36 | 37 | def forward(self, features): 38 | small = self.conv_layer(features[-1]) 39 | big = self.upsample(small) 40 | return big, small 41 | 42 | 43 | class FPNNeck(nn.Module): 44 | """FPN neck with some modifications. Paper: https://arxiv.org/abs/1612.03144 45 | - Weighted fusion is used in Bi-FPN: https://arxiv.org/abs/1911.09070 46 | - Fusion factor (same as weighted fusion): https://arxiv.org/abs/2011.02298 47 | 48 | Equations 49 | stride 32: out_5 = conv_skip(in_5) 50 | stride 16: out_4 = conv(skip(in_4) + up(out_5) x w_4) 51 | stride 8: out_3 = conv(skip(in_3) + up(out_4) x w_3) 52 | stride 4: out_2 = conv(skip(in_2) + up(out_3) x w_2) 53 | """ 54 | 55 | def __init__(self, backbone_channels, upsample_channels=[256, 128, 64], upsample_type="nearest", conv_type="normal", 56 | weighted_fusion=False, **kwargs): 57 | super().__init__() 58 | self.top_conv = nn.Conv2d(backbone_channels[-1], upsample_channels[0], 1) 59 | self.skip_connections = nn.ModuleList() 60 | self.up_layers = nn.ModuleList() 61 | self.conv_layers = nn.ModuleList() 62 | if weighted_fusion: 63 | # indexing ParameterList of scalars might be slightly faster than indexing Parameter of 1-d tensor 64 | self.weights = [nn.Parameter(torch.tensor(1., dtype=torch.float32, requires_grad=True)) for _ in 65 | range(len(upsample_channels))] 66 | self.weights = nn.ParameterList(self.weights) 67 | else: 68 | self.weights = None 69 | 70 | for i in range(len(upsample_channels)): 71 | # build skip connections 72 | in_channels = backbone_channels[-2 - i] 73 | out_channels = upsample_channels[i] 74 | skip_conv = nn.Conv2d(in_channels, out_channels, 1) 75 | self.skip_connections.append(skip_conv) 76 | 77 | # build upsample layers 78 | upsample = make_upsample(upsample_type=upsample_type, deconv_channels=out_channels, **kwargs) 79 | self.up_layers.append(upsample) 80 | 81 | # build output conv layers 82 | out_conv_channels = upsample_channels[i + 1] if i < len(upsample_channels) - 1 else upsample_channels[-1] 83 | conv = make_conv(out_channels, out_conv_channels, conv_type=conv_type, **kwargs) 84 | self.conv_layers.append(conv) 85 | 86 | self.out_channels = upsample_channels[-1] 87 | self.upsample_stride = 2 ** len(upsample_channels) 88 | 89 | def forward(self, features): 90 | out = features[-1] 91 | out = self.top_conv(out) 92 | 93 | for i in range(len(self.conv_layers)): 94 | skip = self.skip_connections[i](features[-2 - i]) # skip connection 95 | up = self.up_layers[i](out) # upsample 96 | 97 | if self.weights is not None: 98 | w = F.relu(self.weights[i]) 99 | out = (skip + up * w) / (1 + w) # combine with fusion weight 100 | else: 101 | out = skip + up 102 | out = self.conv_layers[i](out) # output conv 103 | 104 | return out 105 | 106 | class BiFPNLayer(nn.Module): 107 | """""" 108 | 109 | def __init__(self, num_features=4, num_channels=64, upsample_type="nearest", downsample_type="max", 110 | conv_type="normal", weighted_fusion=True, **kwargs): 111 | super().__init__() 112 | assert isinstance(num_channels, int) 113 | self.num_features = num_features 114 | self.top_down = nn.ModuleList() 115 | self.bottom_up = nn.ModuleList() 116 | 117 | # build top down 118 | for _ in range(num_features - 1): 119 | fuse = Fuse(2, num_channels, "up", upsample=upsample_type, conv_type=conv_type, 120 | weighted_fusion=weighted_fusion) 121 | self.top_down.append(fuse) 122 | 123 | # build bottom up 124 | for _ in range(1, num_features - 1): 125 | fuse = Fuse(3, num_channels, "down", downsample=downsample_type, conv_type=conv_type, 126 | weighted_fusion=weighted_fusion) 127 | self.bottom_up.append(fuse) 128 | 129 | self.last_fuse = Fuse(2, num_channels, "down", downsample=downsample_type, conv_type=conv_type, 130 | weighted_fusion=weighted_fusion) 131 | 132 | def forward(self, features): 133 | # top down: Ptd_6 = conv(Pin_6 + up(Ptd_7)) 134 | topdowns = [None] * len(features) 135 | topdowns[-1] = features[-1] 136 | for i in range(len(self.top_down)): 137 | topdowns[-2 - i] = self.top_down[i](features[-2 - i], topdowns[-1 - i]) 138 | 139 | # bottom up: Pout_6 = conv(Pin_6 + Ptd_6 + down(Pout_5)) 140 | out = [None] * len(features) 141 | out[0] = topdowns[0] 142 | for i in range(len(self.bottom_up)): 143 | out[i + 1] = self.bottom_up[i](features[i + 1], topdowns[i + 1], out[i]) 144 | out[-1] = self.last_fuse(features[-1], out[-2]) 145 | 146 | return out 147 | 148 | 149 | class BiFPNNeck(nn.Module): 150 | def __init__(self, backbone_channels, num_layers=3, num_features=4, num_channels=64, upsample_type="nearest", 151 | downsample_type="max", conv_type="normal", weighted_fusion=True, **kwargs): 152 | super().__init__() 153 | self.project = nn.ModuleList() 154 | self.layers = nn.ModuleList() 155 | self.num_features = num_features 156 | 157 | for b_channels in backbone_channels[-num_features:]: 158 | conv = nn.Conv2d(b_channels, num_channels, 1) 159 | self.project.append(conv) 160 | 161 | for _ in range(num_layers): 162 | bifpn_layer = BiFPNLayer(num_features=num_features, num_channels=num_channels, upsample_type=upsample_type, 163 | downsample_type=downsample_type, conv_type=conv_type, 164 | weighted_fusion=weighted_fusion, **kwargs) 165 | self.layers.append(bifpn_layer) 166 | 167 | self.out_channels = num_channels 168 | self.upsample_stride = 2 ** (num_features - 1) 169 | 170 | def forward(self, features): 171 | out = [project(x) for project, x in zip(self.project, features[-self.num_features:])] 172 | 173 | for bifpn_layer in self.layers: 174 | out = bifpn_layer(out) 175 | 176 | return out[0], out[-1] 177 | -------------------------------------------------------------------------------- /models/backbones/misc.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # --------------------- 3 | 4 | import torch 5 | from typing import Optional 6 | 7 | def _make_divisible(v: float, divisor: int, min_value: Optional[int] = None) -> int: 8 | """ 9 | This function is taken from the original tf repo. 10 | It ensures that all layers have a channel number that is divisible by 8 11 | It can be seen here: 12 | https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py 13 | """ 14 | if min_value is None: 15 | min_value = divisor 16 | new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) 17 | # Make sure that round down does not go down by more than 10%. 18 | if new_v < 0.9 * v: 19 | new_v += divisor 20 | return new_v 21 | 22 | class FrozenBatchNorm2d(torch.nn.Module): 23 | # https://github.com/facebookresearch/detr/blob/master/models/backbone.py 24 | """ 25 | BatchNorm2d where the batch statistics and the affine parameters are fixed. 26 | Copy-paste from torchvision.misc.ops with added eps before rqsrt, 27 | without which any other models than torchvision.models.resnet[18,34,50,101] 28 | produce nans. 29 | """ 30 | 31 | def __init__(self, n): 32 | super(FrozenBatchNorm2d, self).__init__() 33 | self.register_buffer("weight", torch.ones(n)) 34 | self.register_buffer("bias", torch.zeros(n)) 35 | self.register_buffer("running_mean", torch.zeros(n)) 36 | self.register_buffer("running_var", torch.ones(n)) 37 | 38 | def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, 39 | missing_keys, unexpected_keys, error_msgs): 40 | num_batches_tracked_key = prefix + 'num_batches_tracked' 41 | if num_batches_tracked_key in state_dict: 42 | del state_dict[num_batches_tracked_key] 43 | 44 | super(FrozenBatchNorm2d, self)._load_from_state_dict( 45 | state_dict, prefix, local_metadata, strict, 46 | missing_keys, unexpected_keys, error_msgs) 47 | 48 | def forward(self, x): 49 | # move reshapes to the beginning 50 | # to make it fuser-friendly 51 | w = self.weight.reshape(1, -1, 1, 1) 52 | b = self.bias.reshape(1, -1, 1, 1) 53 | rv = self.running_var.reshape(1, -1, 1, 1) 54 | rm = self.running_mean.reshape(1, -1, 1, 1) 55 | eps = 1e-5 56 | scale = w * (rv + eps).rsqrt() 57 | bias = b - rm * scale 58 | return x * scale + bias 59 | 60 | class ConvNormActivation(torch.nn.Sequential): 61 | """ 62 | Configurable block used for Convolution-Normalzation-Activation blocks. 63 | 64 | Args: 65 | in_channels (int): Number of channels in the input image 66 | out_channels (int): Number of channels produced by the Convolution-Normalzation-Activation block 67 | kernel_size: (int, optional): Size of the convolving kernel. Default: 3 68 | stride (int, optional): Stride of the convolution. Default: 1 69 | padding (int, tuple or str, optional): Padding added to all four sides of the input. Default: None, in wich case it will calculated as ``padding = (kernel_size - 1) // 2 * dilation`` 70 | groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1 71 | norm_layer (Callable[..., torch.nn.Module], optional): Norm layer that will be stacked on top of the convolutiuon layer. If ``None`` this layer wont be used. Default: ``torch.nn.BatchNorm2d`` 72 | activation_layer (Callable[..., torch.nn.Module], optinal): Activation function which will be stacked on top of the normalization layer (if not None), otherwise on top of the conv layer. If ``None`` this layer wont be used. Default: ``torch.nn.ReLU`` 73 | dilation (int): Spacing between kernel elements. Default: 1 74 | inplace (bool): Parameter for the activation layer, which can optionally do the operation in-place. Default ``True`` 75 | bias (bool, optional): Whether to use bias in the convolution layer. By default, biases are included if ``norm_layer is None``. 76 | 77 | """ 78 | 79 | def __init__( 80 | self, 81 | in_channels: int, 82 | out_channels: int, 83 | kernel_size: int = 3, 84 | stride: int = 1, 85 | padding: int = None, 86 | groups: int = 1, 87 | norm_layer: torch.nn.Module = torch.nn.BatchNorm2d, 88 | activation_layer: torch.nn.Module = torch.nn.ReLU, 89 | dilation: int = 1, 90 | inplace: bool = True, 91 | bias: bool = None, 92 | ) -> None: 93 | if padding is None: 94 | padding = (kernel_size - 1) // 2 * dilation 95 | if bias is None: 96 | bias = norm_layer is None 97 | layers = [ 98 | torch.nn.Conv2d( 99 | in_channels, 100 | out_channels, 101 | kernel_size, 102 | stride, 103 | padding, 104 | dilation=dilation, 105 | groups=groups, 106 | bias=bias, 107 | ) 108 | ] 109 | if norm_layer is not None: 110 | layers.append(norm_layer(out_channels)) 111 | if activation_layer is not None: 112 | params = {} if inplace is None else {"inplace": inplace} 113 | layers.append(activation_layer(**params)) 114 | super().__init__(*layers) 115 | self.out_channels = out_channels 116 | 117 | 118 | class SqueezeExcitation(torch.nn.Module): 119 | """ 120 | This block implements the Squeeze-and-Excitation block from https://arxiv.org/abs/1709.01507 (see Fig. 1). 121 | Parameters ``activation``, and ``scale_activation`` correspond to ``delta`` and ``sigma`` in in eq. 3. 122 | 123 | Args: 124 | input_channels (int): Number of channels in the input image 125 | squeeze_channels (int): Number of squeeze channels 126 | activation (Callable[..., torch.nn.Module], optional): ``delta`` activation. Default: ``torch.nn.ReLU`` 127 | scale_activation (Callable[..., torch.nn.Module]): ``sigma`` activation. Default: ``torch.nn.Sigmoid`` 128 | """ 129 | 130 | def __init__( 131 | self, 132 | input_channels: int, 133 | squeeze_channels: int, 134 | activation: torch.nn.Module = torch.nn.ReLU, 135 | scale_activation: torch.nn.Module = torch.nn.Sigmoid, 136 | ) -> None: 137 | super().__init__() 138 | self.avgpool = torch.nn.AdaptiveAvgPool2d(1) 139 | self.fc1 = torch.nn.Conv2d(input_channels, squeeze_channels, 1) 140 | self.fc2 = torch.nn.Conv2d(squeeze_channels, input_channels, 1) 141 | self.activation = activation() 142 | self.scale_activation = scale_activation() 143 | 144 | def _scale(self, input: torch.Tensor) -> torch.Tensor: 145 | scale = self.avgpool(input) 146 | scale = self.fc1(scale) 147 | scale = self.activation(scale) 148 | scale = self.fc2(scale) 149 | return self.scale_activation(scale) 150 | 151 | def forward(self, input: torch.Tensor) -> torch.Tensor: 152 | scale = self._scale(input) 153 | return scale * input 154 | -------------------------------------------------------------------------------- /evaluate.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # --------------------- 3 | 4 | from tqdm import tqdm 5 | from pprint import pprint 6 | 7 | import cv2 8 | import click 9 | import torch 10 | import numpy as np 11 | 12 | from torchinfo import summary 13 | from torch.utils.data import DataLoader 14 | from torchmetrics.detection.mean_ap import MeanAveragePrecision 15 | from torchmetrics.classification import F1Score, Accuracy 16 | from torchmetrics import JaccardIndex 17 | from torchvision import transforms 18 | 19 | from conf import Conf 20 | from models import CerberusModel 21 | from utils.box_utils import match_bboxes 22 | from inference.postproc import cluster_lane_preds, fit_lanes 23 | from dataset import MultitaskDataset, ignore_collate 24 | 25 | 26 | @click.command() 27 | @click.option('--conf_file', '-c', type=click.Path(exists=True), default=None, required=True) 28 | @click.option('--weights_file', '-w', type=click.Path(exists=True), default=None, required=False) 29 | @click.option('--show', '-s', type=click.BOOL, default=False, required=False) 30 | def main(conf_file, weights_file, show): 31 | 32 | cnf = Conf(conf_file_path=conf_file, log=False) 33 | cnf.dataset.images_root = "/home/carmelo/DATASETS/BDD100K/bdd100k_images/images/100k" 34 | cnf.dataset.lane_det.data_root = "/home/carmelo/CEMP/MT_ADASNET/data" 35 | cnf.dataset.obj_det.data_root = "/home/carmelo/DATASETS/BDD100K/bdd100k_det/labels/det_20" 36 | 37 | # Select tasks 38 | eval_lane_det = cnf.base.get("lane_det", True) 39 | eval_obj_det = cnf.base.get("object_det", True) 40 | eval_obj_occl = cnf.base.get("occlusion_cls", True) 41 | eval_scene_cls = cnf.base.get("scene_cls", True) 42 | 43 | device = "cuda" if torch.cuda.is_available() else 'cpu' 44 | 45 | # Inverse normalization (for display) 46 | invTrans = transforms.Compose([transforms.Normalize(mean=[0., 0., 0.], 47 | std=[1 / 0.229, 1 / 0.224, 1 / 0.225]), 48 | transforms.Normalize(mean=[-0.485, -0.456, -0.406], 49 | std=[1., 1., 1.]), 50 | ]) 51 | 52 | # Torchmetrics 53 | map = MeanAveragePrecision() 54 | iou = JaccardIndex(num_classes=2) 55 | 56 | wtr_f1 = F1Score(num_classes=7, average='micro') 57 | scn_f1 = F1Score(num_classes=7, average='micro') 58 | td_f1 = F1Score(num_classes=4, average='micro') 59 | 60 | occl_acc = Accuracy() 61 | 62 | # Load data 63 | collate_fn = ignore_collate(["centers", "offsets", "keypoints", 64 | "occlusion", "boxes", "classes", "lanes"]) 65 | 66 | valset = MultitaskDataset(cnf, mode="val", gt=True) 67 | val_loader = DataLoader(valset, collate_fn=collate_fn, batch_size=1) 68 | 69 | # load model 70 | model = CerberusModel(cnf).to(device) 71 | ck = torch.load(weights_file, map_location=device) 72 | model.load_state_dict(ck, strict=True) 73 | 74 | model.eval() 75 | 76 | # Print stats 77 | # summary(model, input_size=(1, 3, 640, 320)) 78 | 79 | # Run evaluation loop 80 | for batch_idx, batch in enumerate(tqdm(val_loader)): 81 | img, targets = batch 82 | img = img.to(cnf.device) 83 | 84 | with torch.no_grad(): 85 | pred = model.inference(img) 86 | 87 | """det_out, lane_out, scn_out, heatmaps_out = pred 88 | boxes, boxes_cls, boxes_occl = 89 | lanes, lanes_cls, lanes_votes = lane_out""" 90 | 91 | # ======================= 92 | # Object detection metric 93 | # ======================= 94 | if eval_obj_det: 95 | det_out = pred["obj_det"]["decoded"] 96 | boxes, boxes_cls = det_out["boxes"], det_out["labels"] 97 | 98 | car_pred = torch.nonzero(boxes_cls == 2).squeeze(1) 99 | det_pred = { 100 | 'boxes': boxes[:, :4].cpu(), 101 | 'scores': boxes[:, 4].cpu(), 102 | 'labels': boxes_cls.cpu(), 103 | } 104 | 105 | car_true = (targets["obj_det"]["classes"][0] == 2).nonzero().squeeze(1) 106 | det_target = { 107 | 'boxes': targets["obj_det"]["boxes"][0], 108 | 'labels': targets["obj_det"]["classes"][0], 109 | } 110 | 111 | if eval_obj_occl: 112 | boxes_occl = det_out["occlusion"] 113 | det_pred['occlusion'] = boxes_occl.cpu() 114 | det_target['occlusion'] = targets["obj_det"]["occlusion"][0] 115 | 116 | # TODO: testare con decodifica del GT! 117 | map.update([det_pred], [det_target]) 118 | 119 | # ------------------------------- 120 | # Occlusion Classification Metric 121 | # ------------------------------- 122 | if eval_obj_occl: 123 | gt_valid, pred_valid, _, _ = match_bboxes(det_target["boxes"][car_true], det_pred["boxes"][car_pred]) 124 | occlu_true = det_target["occlusion"][car_true][gt_valid].int() 125 | occlu_pred = det_pred["occlusion"][car_pred][pred_valid] 126 | 127 | if len(gt_valid) >= 1: 128 | occl_acc.update(occlu_pred, occlu_true) 129 | 130 | # =============================== 131 | # Scene Classification Metric 132 | # =============================== 133 | if eval_scene_cls: 134 | scn_out = pred["scene_cls"] 135 | wtr_f1.update(scn_out['weather'].cpu(), targets['scn_cls']['weather']) 136 | scn_f1.update(scn_out['scene'].cpu(), targets['scn_cls']['scene']) 137 | td_f1.update(scn_out['timeofday'].cpu(), targets['scn_cls']['timeofday']) 138 | 139 | # ======================= 140 | # Lane Estimation Metric 141 | # ======================= 142 | if eval_lane_det: 143 | lane_out = pred["lane_est"]["decoded"] 144 | lanes, lanes_cls, lanes_votes = lane_out["lanes"], lane_out["lanes_labels"], lane_out["lanes_votes"] 145 | 146 | # Build GT mask 147 | gt_lanes = targets["lane_det"]["lanes"][0] 148 | gt_lanes = [l.numpy() * 4 for l in gt_lanes] 149 | gt_mask = lanes_to_mask(gt_lanes, cnf.dataset.input_h, cnf.dataset.input_w) 150 | gm = torch.from_numpy(gt_mask).long().unsqueeze(0) 151 | 152 | # Build predicted mask 153 | lane_clusters = cluster_lane_preds(lanes, lanes_cls, lanes_votes) 154 | lanes_pred = fit_lanes(lane_clusters) 155 | 156 | pred_lanes = [] 157 | for i in range(8): 158 | pred_lanes += lanes_pred[i] 159 | 160 | pred_mask = lanes_to_mask(pred_lanes, cnf.dataset.input_h, cnf.dataset.input_w) 161 | pm = torch.from_numpy(pred_mask).long().unsqueeze(0) 162 | 163 | iou.update(gm, pm) 164 | 165 | #if batch_idx > 500: 166 | # break 167 | 168 | # Display results 169 | if show: 170 | frame = invTrans(img[0]) 171 | frame = frame.cpu().numpy().transpose(1, 2, 0) 172 | 173 | if eval_obj_det: 174 | # true 175 | boxes_pred = boxes[:, :4].cpu().numpy() 176 | for b in boxes_pred: 177 | color = (0, 255, 0) 178 | frame = cv2.rectangle(frame, (int(b[2]), int(b[3])), (int(b[0]), int(b[1])), color, 2) 179 | 180 | # objects pred 181 | boxes_true = targets["obj_det"]["boxes"][0] 182 | for b in boxes_true: 183 | color = (0, 0, 255) 184 | frame = cv2.rectangle(frame, (int(b[2]), int(b[3])), (int(b[0]), int(b[1])), color, 2) 185 | 186 | #Lane masks 187 | if eval_lane_det: 188 | all_mask = np.zeros((cnf.dataset.input_h, cnf.dataset.input_w, 3), dtype=np.uint8) 189 | all_mask[:, :, 1] = pred_mask*255 190 | all_mask[:, :, 2] = gt_mask*255 191 | 192 | while cv2.waitKey(1) != ord('q'): 193 | if eval_obj_det: cv2.imshow("detection", frame) 194 | if eval_lane_det: cv2.imshow("lanes", all_mask) 195 | 196 | 197 | 198 | if eval_obj_det: 199 | print("--- OBJECT DETECTION ---") 200 | pprint(map.compute()) 201 | 202 | if eval_lane_det: 203 | print("--- LANE ESTIMATION ---") 204 | pprint(iou.compute()) 205 | 206 | if eval_scene_cls: 207 | print("--- SCENE CLASSIFICATION F1 (weather, scene, time of day) ---") 208 | pprint(wtr_f1.compute()) 209 | pprint(scn_f1.compute()) 210 | pprint(td_f1.compute()) 211 | 212 | if eval_obj_det and eval_obj_occl: 213 | print("--- OCCLUSION CLASSIFICATION ACCURACY ---") 214 | pprint(occl_acc.compute()) 215 | 216 | def lanes_to_mask(lanes, h, w): 217 | gt_mask = np.zeros((h, w), dtype=np.uint8) 218 | for l in lanes: 219 | points = l.astype(np.int32) 220 | 221 | # Draw mask 222 | points = points.reshape((-1, 1, 2)) 223 | gt_mask = cv2.polylines(gt_mask, [points], False, (1), 2) 224 | 225 | return gt_mask 226 | 227 | if __name__ == '__main__': 228 | # baseline: 'map_50': tensor(0.5604), 229 | main() 230 | 231 | -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | # CERBERUS: CEnterR Based Ent-to-end peRception Using a Single model [[arXiv]](https://arxiv.org/abs/2210.00756) 2 | 3 | This is the official code repository for **"CERBERUS: Simple and Effective All-In-One Automotive Perception 4 | Model with Multi Task Learning"** 5 | 6 | ## Model 7 | 8 |
9 |
10 |