├── model ├── __init__.py ├── group_pointcloud.py ├── rpn.py └── model.py ├── utils ├── __init__.py ├── setup.py ├── nms.py ├── preprocess.py ├── colorize.py ├── box_overlaps.pyx ├── data_aug.py └── utils.py ├── parse.sh ├── train.sh ├── test.sh ├── eval └── KITTI │ ├── launch_test.sh │ ├── README.md │ ├── mail.h │ ├── evaluate_object_3d.cpp │ └── evaluate_object_3d_offline.cpp ├── notes ├── QUESTIONS.md ├── DESCRIPTIONS.md └── REMARKS.md ├── LICENSE ├── .gitignore ├── preproc ├── split.py └── crop.py ├── parse.py ├── loader └── kitti.py ├── test.py ├── README.md ├── config.py └── train.py /model/__init__.py: -------------------------------------------------------------------------------- 1 | from model.rpn import * -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | from utils.box_overlaps import * -------------------------------------------------------------------------------- /parse.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | python parse.py preds -------------------------------------------------------------------------------- /train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | python train.py --alpha 1.5 --beta 1 -------------------------------------------------------------------------------- /test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | python test.py --tag default --resumed_model kitti_Car_best.pth.tar -------------------------------------------------------------------------------- /eval/KITTI/launch_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Crooped gt dir 3 | GT_DIR=./data/MD_KITTI/validation/label_2 4 | # pred dir 5 | PRED_DIR=$1 6 | 7 | # Output log 8 | OUTPUT=$2 9 | # Start test 10 | nohup `pwd`/eval/KITTI/evaluate_object_3d_offline $GT_DIR $PRED_DIR > $OUTPUT 2>&1 & 11 | -------------------------------------------------------------------------------- /notes/QUESTIONS.md: -------------------------------------------------------------------------------- 1 | - Some details of functions in [utils\utils.py](..\utils\utils.py) are not fully understood, especially ones pre-processing dataset. 2 | They involve multiple coordinates projection and transformation. 3 | 4 | - The maximum number of non-voxels (K) is different across samples, so it is not strightforward to use multiple GPUs in PyTorch. 5 | Because it is hard to pack a batch of samples into a single tensor. (TensorFlow can assign certain samples to a certain GPU.) -------------------------------------------------------------------------------- /utils/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:UTF-8 -*- 3 | 4 | # File Name : setup.py 5 | # Purpose : 6 | # Creation Date : 11-12-2017 7 | # Last Modified : Sat 23 Dec 2017 03:19:46 PM CST 8 | # Created By : Jeasine Ma [jeasinema[at]gmail[dot]com] 9 | 10 | 11 | from distutils.core import setup 12 | from Cython.Build import cythonize 13 | import numpy 14 | 15 | setup( 16 | name = 'box overlaps', 17 | ext_modules = cythonize('./utils/box_overlaps.pyx'), 18 | include_dirs = [numpy.get_include()] 19 | ) 20 | -------------------------------------------------------------------------------- /eval/KITTI/README.md: -------------------------------------------------------------------------------- 1 | # KITTI Evaluation 2 | 3 | `evaluate_object_3d_offline.cpp` evaluates your KITTI detection locally on your own computer using your validation data selected from KITTI training dataset, with the following metrics: 4 | 5 | - overlap on image (AP) 6 | - oriented overlap on image (AOS) 7 | - overlap on ground-plane (AP) 8 | - overlap in 3D (AP) 9 | 10 | Compile `evaluate_object_3d_offline.cpp` with dependency of Boost and Linux `dirent.h` (You should already have it under most Linux). 11 | 12 | Run the evalutaion by: 13 | 14 | ./evaluate_object_3d_offline groundtruth_dir result_dir 15 | 16 | Note that you don't have to detect over all KITTI training data. The evaluator only evaluates samples whose result files exist. 17 | 18 | 19 | ### Updates 20 | 21 | - June, 2017: 22 | * Fixed the bug of detection box filtering based on min height according to KITTI's note on 25.04.2017. 23 | -------------------------------------------------------------------------------- /eval/KITTI/mail.h: -------------------------------------------------------------------------------- 1 | #ifndef MAIL_H 2 | #define MAIL_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | class Mail { 9 | 10 | public: 11 | 12 | Mail (std::string email = "") { 13 | if (email.compare("")) { 14 | mail = popen("/usr/lib/sendmail -t -f noreply@cvlibs.net","w"); 15 | fprintf(mail,"To: %s\n", email.c_str()); 16 | fprintf(mail,"From: noreply@cvlibs.net\n"); 17 | fprintf(mail,"Subject: KITTI Evaluation Benchmark\n"); 18 | fprintf(mail,"\n\n"); 19 | } else { 20 | mail = 0; 21 | } 22 | } 23 | 24 | ~Mail() { 25 | if (mail) { 26 | pclose(mail); 27 | } 28 | } 29 | 30 | void msg (const char *format, ...) { 31 | va_list args; 32 | va_start(args,format); 33 | if (mail) { 34 | vfprintf(mail,format,args); 35 | fprintf(mail,"\n"); 36 | } 37 | vprintf(format,args); 38 | printf("\n"); 39 | va_end(args); 40 | } 41 | 42 | private: 43 | 44 | FILE *mail; 45 | 46 | }; 47 | 48 | #endif 49 | -------------------------------------------------------------------------------- /notes/DESCRIPTIONS.md: -------------------------------------------------------------------------------- 1 | # data_object_image_2.zip 2 | Images from the second color camera. 3 | 4 | # data_object_velodyne.zip 5 | Velodyne scanned points. Each file (.bin) corresponds to an image and contains around 12K 3D points. 6 | Each point is stored in the format (x, y, z, r), where r is the reflectance value. 7 | 8 | # data_object_label_2.zip 9 | Label format can be found [here](https://github.com/NVIDIA/DIGITS/blob/v4.0.0-rc.3/digits/extensions/data/objectDetection/README.md#label-format). 10 | The last value is only used for online submission. Original description can be downloaded [here](https://s3.eu-central-1.amazonaws.com/avg-kitti/devkit_object.zip). 11 | 12 | 13 | 14 | # data_object_calib.zip 15 | Pi: projection matrix after rectification, size of 3x4; i is camera index 16 | 17 | R0_rect: rectifying rotation matrix of the reference camera (camera 0) 18 | 19 | Tr_velo_to_cam: rotation and translation matrix from the Velodyne coordinate to the camera coordinate 20 | 21 | To transform a 3D point x in Velodyne coordinates to a point y in i-th camera image using: **y = Pi * R0_rect * Tr_velo_to_cam * x** -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 huanghao-code 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | -------------------------------------------------------------------------------- /notes/REMARKS.md: -------------------------------------------------------------------------------- 1 | # Sparse Tensor Representation 2 | - Use `torch.sparse.FloatTensor` to replace `tf.scatter_nd` in [model\group_pointcloud.py](..\model\group_pointcloud.py) 3 | - A toy example for illustration: 4 | ``` 5 | size = torch.Size([2, 3, 3, 3, 5]) # similary to '[batch_size, DEPTH, HEIGHT, WIDTH, 128]' 6 | src = torch.Tensor([[1, 1, 1, 1, 1], [2, 2, 2, 2, 2], [3, 3, 3, 3, 3]]) # similary to 'voxelwise' 7 | index = torch.Tensor([[0, 0, 1, 2], [0, 1, 1, 1], [1, 2, 1, 0]]) # similary to 'coordinate' 8 | dst = torch.sparse.FloatTensor(index.t(), src, size) # similary to 'outputs' 9 | ``` 10 | 11 | # Deconv2D 12 | - According to the formula of [`ConvTranspose2d`](https://pytorch.org/docs/0.4.1/nn.html?highlight=convtranspose2d#torch.nn.ConvTranspose2d) in PyTorch, 13 | H_out=(H_in−1)×stride\[0\]−2×padding\[0\]+kernel_size\[0\], if the first Deconv2D is set to (128, 256, 3, 1, 0), we cannot keep the spatial size unchanged. 14 | - In [model\rpn.py](..\model\rpn.py), change the first Deconv2D to (128, 256, 3, 1, 1). 15 | 16 | # Gradient Clip 17 | - In the TensorFlow implementation, it contains many lines to clip gradients in the file [model/model.py](https://github.com/qianguih/voxelnet/blob/b74823daa328fc2fa99452bf79793e1f3c32c72a/model/model.py#L98). 18 | In PyTorch implementation [train.py](../train.py), only use a single function `'clip_grad_norm_'`. 19 | 20 | # Non-maximum Suppression 21 | - In the TensorFlow implementation, it uses Tensorflow API `'tf.image.non_max_suppression'` to conduct non-maximum suppression [model/model.py](https://github.com/qianguih/voxelnet/blob/b74823daa328fc2fa99452bf79793e1f3c32c72a/model/model.py#L136). 22 | PyTorch does not provide such API. 23 | - In PyTorch implementation, an third-party open source [NMS](https://gist.github.com/mkocabas/a2f565b27331af0da740c11c78699185) code is adpoted. A potential problem is the number of candidate boxes is zero. 24 | - An alternative option is a C language based code which can be found [here](https://github.com/multimodallearning/pytorch-mask-rcnn/tree/master/nms). It needs compiling but may be more faster. 25 | -------------------------------------------------------------------------------- /model/group_pointcloud.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from config import cfg 5 | 6 | import pdb 7 | 8 | 9 | class VFELayer(nn.Module): 10 | def __init__(self, in_channels, out_channels): 11 | super(VFELayer, self).__init__() 12 | 13 | self.in_channels = in_channels 14 | self.out_channels = out_channels 15 | self.units = int(out_channels / 2) 16 | 17 | self.dense = nn.Sequential(nn.Linear(self.in_channels, self.units), nn.ReLU()) 18 | self.batch_norm = nn.BatchNorm1d(self.units) 19 | 20 | 21 | def forward(self, inputs, mask): 22 | # [ΣK, T, in_ch] -> [ΣK, T, units] -> [ΣK, units, T] 23 | tmp = self.dense(inputs).transpose(1, 2) 24 | # [ΣK, units, T] -> [ΣK, T, units] 25 | pointwise = self.batch_norm(tmp).transpose(1, 2) 26 | 27 | # [ΣK, 1, units] 28 | aggregated, _ = torch.max(pointwise, dim = 1, keepdim = True) 29 | 30 | # [ΣK, T, units] 31 | repeated = aggregated.expand(-1, cfg.VOXEL_POINT_COUNT, -1) 32 | 33 | # [ΣK, T, 2 * units] 34 | concatenated = torch.cat([pointwise, repeated], dim = 2) 35 | 36 | # [ΣK, T, 1] -> [ΣK, T, 2 * units] 37 | mask = mask.expand(-1, -1, 2 * self.units) 38 | 39 | concatenated = concatenated * mask.float() 40 | 41 | return concatenated 42 | 43 | 44 | class FeatureNet(nn.Module): 45 | def __init__(self): 46 | super(FeatureNet, self).__init__() 47 | 48 | self.vfe1 = VFELayer(7, 32) 49 | self.vfe2 = VFELayer(32, 128) 50 | 51 | 52 | def forward(self, feature, number, coordinate): 53 | 54 | batch_size = len(feature) 55 | 56 | feature = torch.cat(feature, dim = 0) # [ΣK, cfg.VOXEL_POINT_COUNT, 7]; cfg.VOXEL_POINT_COUNT = 35/45 57 | coordinate = torch.cat(coordinate, dim = 0) # [ΣK, 4]; each row stores (batch, d, h, w) 58 | 59 | vmax, _ = torch.max(feature, dim = 2, keepdim = True) 60 | mask = (vmax != 0) # [ΣK, T, 1] 61 | 62 | x = self.vfe1(feature, mask) 63 | x = self.vfe2(x, mask) 64 | 65 | # [ΣK, 128] 66 | voxelwise, _ = torch.max(x, dim = 1) 67 | 68 | # Car: [B, 10, 400, 352, 128]; Pedestrain/Cyclist: [B, 10, 200, 240, 128] 69 | outputs = torch.sparse.FloatTensor(coordinate.t(), voxelwise, torch.Size([batch_size, cfg.INPUT_DEPTH, cfg.INPUT_HEIGHT, cfg.INPUT_WIDTH, 128])) 70 | 71 | outputs = outputs.to_dense() 72 | 73 | return outputs -------------------------------------------------------------------------------- /preproc/split.py: -------------------------------------------------------------------------------- 1 | import os 2 | from shutil import copyfile 3 | 4 | import pdb 5 | 6 | 7 | def rearrange(original_folders, target_folders, split_file): 8 | for _, v in original_folders.items(): 9 | if not os.path.isdir(v): 10 | raise Exception('No such folder: %s' % v) 11 | 12 | for _, v in target_folders.items(): 13 | if not os.path.isdir(v): 14 | os.makedirs(v) 15 | 16 | if not os.path.exists(split_file): 17 | raise Exception('No such split file: %s' % split_file) 18 | else: 19 | with open(split_file) as f: 20 | content = f.readlines() 21 | content = [x.strip() for x in content] 22 | 23 | for file in content: 24 | src_img = os.path.join(original_folders['IMG_ROOT'], file + '.png') 25 | src_pc = os.path.join(original_folders['PC_ROOT'], file + '.bin') 26 | src_label = os.path.join(original_folders['LABEL_ROOT'], file + '.txt') 27 | 28 | if (not os.path.exists(src_img)) or (not os.path.exists(src_pc)) or (not os.path.exists(src_label)): 29 | print('No such file: %s' % file) 30 | else: 31 | dst_img = os.path.join(target_folders['IMG_ROOT'], file + '.png') 32 | dst_pc = os.path.join(target_folders['PC_ROOT'], file + '.bin') 33 | dst_label = os.path.join(target_folders['LABEL_ROOT'], file + '.txt') 34 | 35 | copyfile(src_img, dst_img) 36 | copyfile(src_pc, dst_pc) 37 | copyfile(src_label, dst_label) 38 | 39 | 40 | if __name__ == '__main__': 41 | # Original folder 42 | original_folders = dict() 43 | original_folders['IMG_ROOT'] = './data/KITTI/image/training/image_2/' 44 | original_folders['PC_ROOT'] = './data/KITTI/point_cloud/training/velodyne/' 45 | original_folders['LABEL_ROOT'] = './data/KITTI/label/training/label_2/' 46 | 47 | # Modified folder 48 | train_folders = dict() 49 | train_folders['IMG_ROOT'] = './data/MD_KITTI/training/image_2/' 50 | train_folders['PC_ROOT'] = './data/MD_KITTI/training/velodyne/' 51 | train_folders['LABEL_ROOT'] = './data/MD_KITTI/training/label_2/' 52 | 53 | val_folders = dict() 54 | val_folders['IMG_ROOT'] = './data/MD_KITTI/validation/image_2/' 55 | val_folders['PC_ROOT'] = './data/MD_KITTI/validation/velodyne/' 56 | val_folders['LABEL_ROOT'] = './data/MD_KITTI/validation/label_2/' 57 | 58 | # Split file 59 | train_split_file = './data/KITTI/imagesets/ImageSets/train.txt' 60 | val_split_file = './data/KITTI/imagesets/ImageSets/val.txt' 61 | 62 | rearrange(original_folders, train_folders, train_split_file) 63 | rearrange(original_folders, val_folders, val_split_file) -------------------------------------------------------------------------------- /utils/nms.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | import pdb 4 | 5 | # Original author: Francisco Massa: 6 | # https://github.com/fmassa/object-detection.torch 7 | # Ported to PyTorch by Max deGroot (02/01/2017) 8 | 9 | def nms(boxes, scores, overlap = 0.5, top_k = 200): 10 | """Apply non-maximum suppression at test time to avoid detecting too many 11 | overlapping bounding boxes for a given object. 12 | Args: 13 | boxes: (tensor) The location preds for the img, Shape: [num_priors,4]. 14 | scores: (tensor) The class predscores for the img, Shape:[num_priors]. 15 | overlap: (float) The overlap thresh for suppressing unnecessary boxes. 16 | top_k: (int) The Maximum number of box preds to consider. 17 | Return: 18 | The indices of the kept boxes with respect to num_priors. 19 | """ 20 | 21 | keep = scores.new(scores.size(0)).zero_().long() 22 | count = 0 23 | if boxes.numel() == 0: 24 | return keep, count 25 | x1 = boxes[:, 0] 26 | y1 = boxes[:, 1] 27 | x2 = boxes[:, 2] 28 | y2 = boxes[:, 3] 29 | area = torch.mul(x2 - x1, y2 - y1) 30 | v, idx = scores.sort(0) # sort in ascending order 31 | # I = I[v >= 0.01] 32 | idx = idx[-top_k:] # indices of the top-k largest vals 33 | xx1 = boxes.new() 34 | yy1 = boxes.new() 35 | xx2 = boxes.new() 36 | yy2 = boxes.new() 37 | w = boxes.new() 38 | h = boxes.new() 39 | 40 | # keep = torch.Tensor() 41 | while idx.numel() > 0: 42 | i = idx[-1] # index of current largest val 43 | # keep.append(i) 44 | keep[count] = i 45 | count += 1 46 | if idx.size(0) == 1: 47 | break 48 | idx = idx[:-1] # remove kept element from view 49 | # load bboxes of next highest vals 50 | torch.index_select(x1, 0, idx, out = xx1) 51 | torch.index_select(y1, 0, idx, out = yy1) 52 | torch.index_select(x2, 0, idx, out = xx2) 53 | torch.index_select(y2, 0, idx, out = yy2) 54 | # store element-wise max with next highest score 55 | xx1 = torch.clamp(xx1, min = x1[i]) 56 | yy1 = torch.clamp(yy1, min = y1[i]) 57 | xx2 = torch.clamp(xx2, max = x2[i]) 58 | yy2 = torch.clamp(yy2, max = y2[i]) 59 | w.resize_as_(xx2) 60 | h.resize_as_(yy2) 61 | w = xx2 - xx1 62 | h = yy2 - yy1 63 | # check sizes of xx1 and xx2.. after each iteration 64 | w = torch.clamp(w, min = 0.0) 65 | h = torch.clamp(h, min = 0.0) 66 | inter = w * h 67 | # IoU = i / (area(a) + area(b) - i) 68 | rem_areas = torch.index_select(area, 0, idx) # load remaining areas) 69 | union = (rem_areas - inter) + area[i] 70 | IoU = inter / union # store result in iou 71 | # keep only elements with an IoU <= overlap 72 | idx = idx[IoU.le(overlap)] 73 | 74 | return keep, count -------------------------------------------------------------------------------- /utils/preprocess.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:UTF-8 -*- 3 | 4 | # File Name : preprocess.py 5 | # Purpose : 6 | # Creation Date : 10-12-2017 7 | # Last Modified : Thu 18 Jan 2018 05:34:42 PM CST 8 | # Created By : Jeasine Ma [jeasinema[at]gmail[dot]com] 9 | 10 | import numpy as np 11 | 12 | from config import cfg 13 | 14 | data_dir = 'velodyne' 15 | 16 | def process_pointcloud(point_cloud, cls = cfg.DETECT_OBJ): 17 | # Input: 18 | # (N, 4) 19 | # Output: 20 | # voxel_dict 21 | if cls == 'Car': 22 | scene_size = np.array([4, 80, 70.4], dtype = np.float32) 23 | voxel_size = np.array([0.4, 0.2, 0.2], dtype = np.float32) 24 | grid_size = np.array([10, 400, 352], dtype = np.int64) 25 | lidar_coord = np.array([0, 40, 3], dtype = np.float32) 26 | max_point_number = 35 27 | else: 28 | scene_size = np.array([4, 40, 48], dtype = np.float32) 29 | voxel_size = np.array([0.4, 0.2, 0.2], dtype = np.float32) 30 | grid_size = np.array([10, 200, 240], dtype = np.int64) 31 | lidar_coord = np.array([0, 20, 3], dtype = np.float32) 32 | max_point_number = 45 33 | 34 | np.random.shuffle(point_cloud) 35 | 36 | shifted_coord = point_cloud[:, :3] + lidar_coord 37 | # reverse the point cloud coordinate (X, Y, Z) -> (Z, Y, X) 38 | voxel_index = np.floor( 39 | shifted_coord[:, ::-1] / voxel_size).astype(np.int) 40 | 41 | bound_x = np.logical_and( 42 | voxel_index[:, 2] >= 0, voxel_index[:, 2] < grid_size[2]) 43 | bound_y = np.logical_and( 44 | voxel_index[:, 1] >= 0, voxel_index[:, 1] < grid_size[1]) 45 | bound_z = np.logical_and( 46 | voxel_index[:, 0] >= 0, voxel_index[:, 0] < grid_size[0]) 47 | 48 | bound_box = np.logical_and(np.logical_and(bound_x, bound_y), bound_z) 49 | 50 | point_cloud = point_cloud[bound_box] 51 | voxel_index = voxel_index[bound_box] 52 | 53 | # [K, 3] coordinate buffer as described in the paper 54 | coordinate_buffer = np.unique(voxel_index, axis = 0) 55 | 56 | K = len(coordinate_buffer) 57 | T = max_point_number 58 | 59 | # [K, 1] store number of points in each voxel grid 60 | number_buffer = np.zeros(shape = (K), dtype = np.int64) 61 | 62 | # [K, T, 7] feature buffer as described in the paper 63 | feature_buffer = np.zeros(shape = (K, T, 7), dtype = np.float32) 64 | 65 | # build a reverse index for coordinate buffer 66 | index_buffer = {} 67 | for i in range(K): 68 | index_buffer[tuple(coordinate_buffer[i])] = i 69 | 70 | for voxel, point in zip(voxel_index, point_cloud): 71 | index = index_buffer[tuple(voxel)] 72 | number = number_buffer[index] 73 | if number < T: 74 | feature_buffer[index, number, :4] = point 75 | number_buffer[index] += 1 76 | 77 | feature_buffer[:, :, -3:] = feature_buffer[:, :, :3] - \ 78 | feature_buffer[:, :, :3].sum(axis = 1, keepdims = True)/number_buffer.reshape(K, 1, 1) 79 | 80 | voxel_dict = {'feature_buffer': feature_buffer, 81 | 'coordinate_buffer': coordinate_buffer, 82 | 'number_buffer': number_buffer} 83 | 84 | return voxel_dict 85 | 86 | -------------------------------------------------------------------------------- /utils/colorize.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:UTF-8 -*- 3 | 4 | # File Name : colorize.py 5 | # Purpose : 6 | # Creation Date : 21-12-2017 7 | # Last Modified : Thu 21 Dec 2017 09:02:22 PM CST 8 | # Created By : Jeasine Ma [jeasinema[at]gmail[dot]com] 9 | 10 | # ref: https://gist.github.com/jimfleming/c1adfdb0f526465c99409cc143dea97b 11 | 12 | import cv2 13 | cv2.setNumThreads(0) 14 | 15 | import numpy as np 16 | 17 | 18 | def colorize(value, factor = 1, vmin = None, vmax = None): 19 | """ 20 | A utility function for TensorFlow that maps a grayscale image to a matplotlib 21 | colormap for use with TensorBoard image summaries. 22 | 23 | By default it will normalize the input value to the range 0..1 before mapping 24 | to a grayscale colormap. 25 | 26 | Arguments: 27 | - value: 2D Tensor of shape [height, width] or 3D Tensor of shape 28 | [height, width, 1]. 29 | - factor: resize factor, scalar 30 | - vmin: the minimum value of the range used for normalization. 31 | (Default: value minimum) 32 | - vmax: the maximum value of the range used for normalization. 33 | (Default: value maximum) 34 | 35 | Example usage: 36 | 37 | ``` 38 | output = tf.random_uniform(shape=[256, 256, 1]) 39 | output_color = colorize(output, vmin=0.0, vmax=1.0, cmap='viridis') 40 | tf.summary.image('output', output_color) 41 | ``` 42 | 43 | Returns a 3D tensor of shape [height, width, 3]. 44 | """ 45 | 46 | # normalize 47 | value = np.sum(value, axis = -1) 48 | vmin = np.min(value) if vmin is None else vmin 49 | vmax = np.max(value) if vmax is None else vmax 50 | value = (value - vmin) / (vmax - vmin) # vmin..vmax 51 | 52 | value = (value * 255).astype(np.uint8) 53 | value = cv2.applyColorMap(value, cv2.COLORMAP_JET) 54 | value = cv2.cvtColor(value, cv2.COLOR_BGR2RGB) 55 | x, y, _ = value.shape 56 | value = cv2.resize(value, (y * factor, x * factor)) 57 | 58 | return value 59 | 60 | 61 | # def tf_colorize(value, factor = 1, vmin = None, vmax = None, cmap = None): 62 | # """ 63 | # A utility function for TensorFlow that maps a grayscale image to a matplotlib 64 | # colormap for use with TensorBoard image summaries. 65 | # 66 | # By default it will normalize the input value to the range 0..1 before mapping 67 | # to a grayscale colormap. 68 | # 69 | # Arguments: 70 | # - value: 2D Tensor of shape [height, width] or 3D Tensor of shape 71 | # [height, width, 1]. 72 | # - factor: resize factor, scalar 73 | # - vmin: the minimum value of the range used for normalization. 74 | # (Default: value minimum) 75 | # - vmax: the maximum value of the range used for normalization. 76 | # (Default: value maximum) 77 | # - cmap: a valid cmap named for use with matplotlib's `get_cmap`. 78 | # (Default: 'gray') 79 | # 80 | # Example usage: 81 | # 82 | # ``` 83 | # output = tf.random_uniform(shape=[256, 256, 1]) 84 | # output_color = colorize(output, vmin=0.0, vmax=1.0, cmap='viridis') 85 | # tf.summary.image('output', output_color) 86 | # ``` 87 | # 88 | # Returns a 3D tensor of shape [height, width, 3]. 89 | # """ 90 | # 91 | # # normalize 92 | # vmin = tf.reduce_min(value) if vmin is None else vmin 93 | # vmax = tf.reduce_max(value) if vmax is None else vmax 94 | # value = (value - vmin) / (vmax - vmin) # vmin..vmax 95 | # 96 | # # squeeze last dim if it exists 97 | # value = tf.squeeze(value) 98 | # 99 | # # quantize 100 | # indices = tf.to_int32(tf.round(value * 255)) 101 | # 102 | # # gather 103 | # cm = matplotlib.cm.get_cmap(cmap if cmap is not None else 'gray') 104 | # colors = tf.constant(cm.colors, dtype = tf.float32) 105 | # value = tf.gather(colors, indices) 106 | # 107 | # return value 108 | -------------------------------------------------------------------------------- /parse.py: -------------------------------------------------------------------------------- 1 | import matplotlib 2 | matplotlib.use('Agg') 3 | import matplotlib.pyplot as plt 4 | 5 | import os 6 | import sys 7 | from collections import OrderedDict 8 | 9 | 10 | linestyles = OrderedDict( 11 | [('solid', (0, ())), 12 | ('loosely dotted', (0, (1, 10))), 13 | ('dotted', (0, (1, 5))), 14 | ('densely dotted', (0, (1, 1))), 15 | ('loosely dashed', (0, (5, 10))), 16 | ('dashed', (0, (5, 5))), 17 | ('densely dashed', (0, (5, 1))), 18 | ('loosely dashdotted', (0, (3, 10, 1, 10))), 19 | ('dashdotted', (0, (3, 5, 1, 5))), 20 | ('densely dashdotted', (0, (3, 1, 1, 1))), 21 | ('loosely dashdotdotted', (0, (3, 10, 1, 10, 1, 10))), 22 | ('dashdotdotted', (0, (3, 5, 1, 5, 1, 5))), 23 | ('densely dashdotdotted', (0, (3, 1, 1, 1, 1, 1)))]) 24 | 25 | # These are the "Tableau 20" colors as RGB. 26 | tableau20 = [(31, 119, 180), (174, 199, 232), (255, 127, 14), (255, 187, 120), 27 | (44, 160, 44), (152, 223, 138), (214, 39, 40), (255, 152, 150), 28 | (148, 103, 189), (197, 176, 213), (140, 86, 75), (196, 156, 148), 29 | (227, 119, 194), (247, 182, 210), (127, 127, 127), (199, 199, 199), 30 | (188, 189, 34), (219, 219, 141), (23, 190, 207), (158, 218, 229)] 31 | 32 | # Scale the RGB values to the [0, 1] range, which is the format matplotlib accepts. 33 | for i in range(len(tableau20)): 34 | r, g, b = tableau20[i] 35 | tableau20[i] = (r / 255., g / 255., b / 255.) 36 | 37 | 38 | 39 | ROOT_DIR = sys.argv[1] 40 | 41 | det_3d = [[] for _ in range(3)] 42 | det_bv = [[] for _ in range(3)] 43 | 44 | for epoch in range(9, 200, 10): 45 | log_file = os.path.join(ROOT_DIR, str(epoch), 'log') 46 | if not os.path.exists(log_file): 47 | break 48 | else: 49 | lines = open(log_file).readlines() 50 | for line in lines: 51 | line = line.split() 52 | if line[0] == 'car_detection_ground': 53 | det_bv[0].append( float(line[-3] )) 54 | det_bv[1].append( float(line[-2] )) 55 | det_bv[2].append( float(line[-1] )) 56 | elif line[0] == 'car_detection_3d': 57 | det_3d[0].append( float(line[-3])) 58 | det_3d[1].append( float(line[-2])) 59 | det_3d[2].append( float(line[-1])) 60 | 61 | RANGE = range(len(det_bv[0])) 62 | 63 | plt.figure(figsize = (10, 7)) 64 | 65 | plt.plot(RANGE, det_3d[0], linestyle = linestyles['solid'], linewidth = 1.5, color = tableau20[0]) 66 | plt.plot(RANGE, det_3d[1], linestyle = linestyles['solid'], linewidth = 1.5, color = tableau20[2]) 67 | plt.plot(RANGE, det_3d[2], linestyle = linestyles['solid'], linewidth = 1.5, color = tableau20[4]) 68 | plt.plot(RANGE, det_bv[0], linestyle = linestyles['densely dotted'], linewidth = 1.5, color = tableau20[0]) 69 | plt.plot(RANGE, det_bv[1], linestyle = linestyles['densely dotted'], linewidth = 1.5, color = tableau20[2]) 70 | plt.plot(RANGE, det_bv[2], linestyle = linestyles['densely dotted'], linewidth = 1.5, color = tableau20[4]) 71 | 72 | 73 | plt.legend(['3d easy', '3d moderate', '3d hard', 'bird view easy', 'bird view moderate', 'bird view hard'], loc = 4) 74 | 75 | plt.xlabel('Epoch', fontsize = 16) 76 | plt.xticks(RANGE, range(9, len(RANGE) * 10, 10)) 77 | plt.xticks(fontsize = 14) 78 | 79 | plt.ylabel('AP', fontsize = 16) 80 | plt.ylim(35, 95) 81 | plt.yticks(range(35, 95, 5)) 82 | plt.yticks(fontsize = 14) 83 | 84 | plt.grid(linestyle = linestyles['dotted']) 85 | 86 | DIR_NAME = ROOT_DIR.split('/')[-1] 87 | 88 | OUTPUT_NAME = DIR_NAME + '.jpg' 89 | plt.savefig(OUTPUT_NAME) 90 | 91 | print('results parsed and saved in: ' + OUTPUT_NAME) 92 | 93 | 94 | 95 | 96 | 97 | -------------------------------------------------------------------------------- /preproc/crop.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.misc import imread 3 | 4 | import pdb 5 | 6 | CAM = 2 7 | 8 | def load_velodyne_points(filename): 9 | # Load data points 10 | points = np.fromfile(filename, dtype = np.float32).reshape(-1, 4) # (N, 4); N is around 10k, 4 means [x, y, z, r] 11 | 12 | return points 13 | 14 | 15 | def load_calib(calib_dir): 16 | # P2 * R0_rect * Tr_velo_to_cam * y 17 | lines = open(calib_dir).readlines() 18 | lines = [line.split()[1:] for line in lines][:-1] 19 | 20 | P = np.array(lines[CAM]).reshape(3, 4) 21 | 22 | Tr_velo_to_cam = np.array(lines[5]).reshape(3, 4) 23 | Tr_velo_to_cam = np.concatenate([Tr_velo_to_cam, np.array([0, 0, 0, 1]).reshape(1, 4)], 0) 24 | 25 | R_cam_to_rect = np.eye(4) 26 | R_cam_to_rect[:3, :3] = np.array(lines[4][:9]).reshape(3, 3) 27 | 28 | P = P.astype('float32') 29 | Tr_velo_to_cam = Tr_velo_to_cam.astype('float32') 30 | R_cam_to_rect = R_cam_to_rect.astype('float32') 31 | 32 | return P, Tr_velo_to_cam, R_cam_to_rect 33 | 34 | 35 | def prepare_velo_points(pts3d_raw): 36 | # Replaces the reflectance value by 1, and tranposes the array, 37 | # so points can be directly multiplied by the camera projection matrix 38 | pts3d = pts3d_raw 39 | 40 | # Reflectance > 0 41 | indices = pts3d[:, 3] > 0 42 | pts3d = pts3d[indices ,:] 43 | pts3d[:, 3] = 1 44 | 45 | return pts3d.transpose(), indices 46 | 47 | 48 | def project_velo_points_in_img(pts3d, T_cam_velo, Rrect, Prect): 49 | # Project 3D points into 2D image. Expects pts3d as a 4xN numpy array. 50 | # Returns the 2D projection of the points that are in front of the camera only an the corresponding 3D points. 51 | 52 | # 3D points in camera reference frame. 53 | pts3d_cam = Rrect.dot(T_cam_velo.dot(pts3d)) 54 | 55 | # Before projecting, keep only points with z>0 (points that are in front of the camera). 56 | idx = (pts3d_cam[2, :] >= 0) 57 | pts2d_cam = Prect.dot(pts3d_cam[:, idx]) 58 | 59 | return pts3d[:, idx], pts2d_cam / pts2d_cam[2, :], idx 60 | 61 | 62 | def align_img_and_pc(img_dir, pc_dir, calib_dir): 63 | 64 | img = imread(img_dir) 65 | pts = load_velodyne_points(pc_dir) 66 | P, Tr_velo_to_cam, R_cam_to_rect = load_calib(calib_dir) 67 | 68 | pts3d, indices = prepare_velo_points(pts) 69 | 70 | reflectances = pts[indices, 3] 71 | pts3d, pts2d_normed, idx = project_velo_points_in_img(pts3d, Tr_velo_to_cam, R_cam_to_rect, P) 72 | 73 | # Print reflectances.shape, idx.shape 74 | reflectances = reflectances[idx] 75 | 76 | # Print reflectances.shape, pts3d.shape, pts2d_normed.shape 77 | assert reflectances.shape[0] == pts3d.shape[1] == pts2d_normed.shape[1] 78 | 79 | rows, cols = img.shape[:2] 80 | 81 | points = [] 82 | for i in range(pts2d_normed.shape[1]): 83 | c = int(np.round(pts2d_normed[0,i])) 84 | r = int(np.round(pts2d_normed[1,i])) 85 | if c < cols and r < rows and r > 0 and c > 0: 86 | color = img[r, c, :] 87 | point = [pts3d[0, i], pts3d[1, i], pts3d[2, i], reflectances[i], color[0], color[1], color[2], pts2d_normed[0, i], pts2d_normed[1, i]] 88 | points.append(point) 89 | 90 | points = np.array(points) 91 | 92 | return points 93 | 94 | 95 | if __name__ == '__main__': 96 | 97 | # Update the following directories 98 | IMG_ROOT = './data/KITTI/image/training/image_2/' 99 | PC_ROOT = './data/KITTI/point_cloud/training/velodyne/' 100 | CALIB_ROOT = './data/KITTI/calib/training/calib/' 101 | 102 | for frame in range(0, 7481): 103 | img_dir = IMG_ROOT + '%06d.png' % frame 104 | pc_dir = PC_ROOT + '%06d.bin' % frame 105 | calib_dir = CALIB_ROOT + '%06d.txt' % frame 106 | 107 | points = align_img_and_pc(img_dir, pc_dir, calib_dir) 108 | 109 | output_name = PC_ROOT + str(frame) + '.bin' 110 | points[:,:4].astype('float32').tofile(output_name) 111 | print(output_name) 112 | 113 | 114 | 115 | 116 | 117 | -------------------------------------------------------------------------------- /loader/kitti.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | import numpy as np 4 | 5 | import cv2 6 | cv2.setNumThreads(0) 7 | 8 | import torch 9 | import torch.utils.data as data_utl 10 | 11 | from config import cfg 12 | from utils.data_aug import aug_data 13 | from utils.preprocess import process_pointcloud 14 | 15 | import pdb 16 | 17 | 18 | class Processor: 19 | def __init__(self, data_tag, f_rgb, f_lidar, f_label, data_dir, aug, is_testset): 20 | self.data_tag = data_tag 21 | self.f_rgb = f_rgb 22 | self.f_lidar = f_lidar 23 | self.f_label = f_label 24 | self.data_dir = data_dir 25 | self.aug = aug 26 | self.is_testset = is_testset 27 | 28 | 29 | def __call__(self, load_index): 30 | if self.aug: 31 | ret = aug_data(self.data_tag[load_index], self.data_dir) 32 | else: 33 | rgb = cv2.resize(cv2.imread(self.f_rgb[load_index]), (cfg.IMAGE_WIDTH, cfg.IMAGE_HEIGHT)) 34 | raw_lidar = np.fromfile(self.f_lidar[load_index], dtype = np.float32).reshape((-1, 4)) 35 | if not self.is_testset: 36 | labels = [line for line in open(self.f_label[load_index], 'r').readlines()] 37 | else: 38 | labels = [''] 39 | tag = self.data_tag[load_index] 40 | voxel = process_pointcloud(raw_lidar) 41 | ret = [tag, rgb, raw_lidar, voxel, labels] 42 | 43 | return ret 44 | 45 | 46 | class KITTI(data_utl.Dataset): 47 | def __init__(self, data_dir, shuffle = False, aug = False, is_testset = False): 48 | self.data_dir = data_dir 49 | self.shuffle = shuffle 50 | self.aug = aug 51 | self.is_testset = is_testset 52 | 53 | self.f_rgb = glob.glob(os.path.join(self.data_dir, 'image_2', '*.png')) 54 | self.f_lidar = glob.glob(os.path.join(self.data_dir, 'velodyne', '*.bin')) 55 | self.f_label = glob.glob(os.path.join(self.data_dir, 'label_2', '*.txt')) 56 | 57 | self.f_rgb.sort() 58 | self.f_lidar.sort() 59 | self.f_label.sort() 60 | 61 | self.data_tag = [name.split('/')[-1].split('.')[-2] for name in self.f_rgb] 62 | 63 | assert len(self.data_tag) != 0, 'Dataset folder is not correct!' 64 | assert len(self.data_tag) == len(self.f_rgb) == len(self.f_lidar), 'Dataset folder is not correct!' 65 | 66 | nums = len(self.f_rgb) 67 | self.indices = list(range(nums)) 68 | if self.shuffle: 69 | np.random.shuffle(self.indices) 70 | 71 | # Build a data processor 72 | self.proc = Processor(self.data_tag, self.f_rgb, self.f_lidar, self.f_label, self.data_dir, self.aug, self.is_testset) 73 | 74 | 75 | def __getitem__(self, index): 76 | # A list of [tag, rgb, raw_lidar, voxel, labels] 77 | ret = self.proc(self.indices[index]) 78 | 79 | return ret 80 | 81 | 82 | def __len__(self): 83 | return len(self.indices) 84 | 85 | 86 | def collate_fn(rets): 87 | tag = [ret[0] for ret in rets] 88 | rgb = [ret[1] for ret in rets] 89 | raw_lidar = [ret[2] for ret in rets] 90 | voxel = [ret[3] for ret in rets] 91 | labels = [ret[4] for ret in rets] 92 | 93 | # Only for voxel 94 | _, vox_feature, vox_number, vox_coordinate = build_input(voxel) 95 | 96 | res = ( 97 | np.array(tag), 98 | np.array(labels), 99 | [torch.from_numpy(x) for x in vox_feature], 100 | np.array(vox_number), 101 | [torch.from_numpy(x) for x in vox_coordinate], 102 | np.array(rgb), 103 | np.array(raw_lidar) 104 | ) 105 | 106 | return res 107 | 108 | 109 | def build_input(voxel_dict_list): 110 | batch_size = len(voxel_dict_list) 111 | 112 | feature_list = [] 113 | number_list = [] 114 | coordinate_list = [] 115 | for i, voxel_dict in zip(range(batch_size), voxel_dict_list): 116 | feature_list.append(voxel_dict['feature_buffer']) # (K, T, 7); K is max number of non-empty voxels; T = 35 117 | number_list.append(voxel_dict['number_buffer']) # (K,) 118 | coordinate = voxel_dict['coordinate_buffer'] # (K, 3) 119 | coordinate_list.append(np.pad(coordinate, ((0, 0), (1, 0)), mode = 'constant', constant_values = i)) 120 | 121 | # feature = np.concatenate(feature_list) 122 | # number = np.concatenate(number_list) 123 | # coordinate = np.concatenate(coordinate_list) 124 | # 125 | # return batch_size, feature, number, coordinate 126 | 127 | return batch_size, feature_list, number_list, coordinate_list -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | 4 | import cv2 5 | cv2.setNumThreads(0) 6 | 7 | import torch 8 | import torch.nn as nn 9 | from torch.utils.data import DataLoader as DataLoader 10 | from loader.kitti import collate_fn 11 | 12 | from config import cfg 13 | from utils.utils import box3d_to_label 14 | from model.model import RPN3D 15 | from loader.kitti import KITTI as Dataset 16 | 17 | 18 | parser = argparse.ArgumentParser(description = 'training') 19 | 20 | parser.add_argument('--tag', type = str, default = 'default', help = 'log tag') 21 | parser.add_argument('--output_path', type = str, default = './preds', help = 'results output dir') 22 | parser.add_argument('--vis', type = bool, default = True, help = 'set to True if dumping visualization') 23 | 24 | parser.add_argument('--batch_size', type = int, default = 2, help = 'batch size') 25 | 26 | parser.add_argument('--resumed_model', type = str, default = '', help = 'if specified, load the specified model') 27 | 28 | 29 | args = parser.parse_args() 30 | 31 | 32 | def run(): 33 | os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(str(x) for x in cfg.GPU_AVAILABLE) 34 | 35 | # Build data loader 36 | val_dataset = Dataset(os.path.join(cfg.DATA_DIR, 'validation'), shuffle = False, aug = False, is_testset = False) 37 | val_dataloader = DataLoader(val_dataset, batch_size = args.batch_size, shuffle = False, collate_fn = collate_fn, 38 | num_workers = args.workers, pin_memory = False) 39 | 40 | # Build model 41 | model = RPN3D(cfg.DETECT_OBJ) 42 | 43 | # Resume model 44 | if args.resumed_model: 45 | model_file = os.path.join(save_model_dir, args.resumed_model) 46 | if os.path.isfile(model_file): 47 | checkpoint = torch.load(model_file) 48 | model.load_state_dict(checkpoint['state_dict']) 49 | print(("=> Loaded checkpoint '{}' (epoch {}, global_counter {})".format( 50 | args.resumed_model, checkpoint['epoch'], checkpoint['global_counter']))) 51 | else: 52 | print(("=> No checkpoint found at '{}'".format(args.resumed_model))) 53 | else: 54 | raise Exception('No pre-trained model to test!') 55 | 56 | model = nn.DataParallel(model).cuda() 57 | 58 | model.train(False) # Validation mode 59 | 60 | with torch.no_grad(): 61 | for (i, val_data) in enumerate(val_dataloader): 62 | 63 | # Forward pass for validation and prediction 64 | probs, deltas, val_loss, val_cls_loss, val_reg_loss, cls_pos_loss_rec, cls_neg_loss_rec = model(val_data) 65 | 66 | front_images, bird_views, heatmaps = None, None, None 67 | if args.vis: 68 | tags, ret_box3d_scores, front_images, bird_views, heatmaps = \ 69 | model.module.predict(val_data, probs, deltas, summary = False, vis = True) 70 | else: 71 | tags, ret_box3d_scores = model.module.predict(val_data, probs, deltas, summary = False, vis = False) 72 | 73 | # tags: (N) 74 | # ret_box3d_scores: (N, N'); (class, x, y, z, h, w, l, rz, score) 75 | for tag, score in zip(tags, ret_box3d_scores): 76 | output_path = os.path.join(args.output_path, 'data', tag + '.txt') 77 | with open(output_path, 'w+') as f: 78 | labels = box3d_to_label([score[:, 1:8]], [score[:, 0]], [score[:, -1]], coordinate = 'lidar')[0] 79 | for line in labels: 80 | f.write(line) 81 | print('Write out {} objects to {}'.format(len(labels), tag)) 82 | 83 | # Dump visualizations 84 | if args.vis: 85 | for tag, front_image, bird_view, heatmap in zip(tags, front_images, bird_views, heatmaps): 86 | front_img_path = os.path.join(args.output_path, 'vis', tag + '_front.jpg') 87 | bird_view_path = os.path.join(args.output_path, 'vis', tag + '_bv.jpg') 88 | heatmap_path = os.path.join(args.output_path, 'vis', tag + '_heatmap.jpg') 89 | cv2.imwrite(front_img_path, front_image) 90 | cv2.imwrite(bird_view_path, bird_view) 91 | cv2.imwrite(heatmap_path, heatmap) 92 | 93 | 94 | if __name__ == '__main__': 95 | dataset_dir = cfg.DATA_DIR 96 | val_dir = os.path.join(cfg.DATA_DIR, 'validation') 97 | save_model_dir = os.path.join('./saved', args.tag) 98 | 99 | # Create output folder 100 | os.makedirs(args.output_path, exist_ok = True) 101 | os.makedirs(os.path.join(args.output_path, 'data'), exist_ok = True) 102 | if args.vis: 103 | os.makedirs(os.path.join(args.output_path, 'vis'), exist_ok = True) 104 | 105 | run() -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | This is an unofficial inplementation of [VoxelNet: End-to-End Learning for Point Cloud Based 3D Object Detection](https://arxiv.org/abs/1711.06396) in PyTorch. 3 | This project is based on the work (in TensorFlow) [here](https://github.com/qianguih/voxelnet). 4 | Thanks to [@qianguih](https://github.com/qianguih). 5 | 6 | 7 | # Dependencies 8 | - `python3.5+` 9 | - `Pytorch` (tested on 0.4.1) 10 | - `TensorBoardX` (tested on 1.4) 11 | - `OpenCV` 12 | - `Pillow` (for add_image in TensorBoardX) 13 | - `Boost` (for compiling evaluation code) 14 | 15 | 16 | # Installation 17 | 1. Clone this repository. 18 | 19 | 2. Compile the Cython module: 20 | ```bash 21 | $ python utils/setup.py build_ext --inplace 22 | ``` 23 | 24 | 3. Compile the evaluation code: 25 | ```bash 26 | $ cd eval/KITTI 27 | $ g++ -I path/to/boost/include -o evaluate_object_3d_offline evaluate_object_3d_offline.cpp 28 | ``` 29 | 30 | 4. grant the execution permission to evaluation script: 31 | ```bash 32 | $ cd eval/KITTI 33 | $ chmod +x launch_test.sh 34 | ``` 35 | 36 | 37 | # Data Preparation 38 | 1. Download the 3D KITTI detection dataset from [here](http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d). 39 | Description of annotation can be found [here](https://github.com/yanii/kitti-pcl/blob/master/KITTI_README.TXT). Data to download includes: 40 | * Velodyne point clouds (29 GB): unzip it and put `'training'` and `'testing'` sub-folders into `'data/KITTI/point_cloud'` 41 | * Training labels of object data set (5 MB) for input labels of VoxelNet: unzip it and put `'training'` sub-folder into `'data/KITTI/label'` 42 | * Camera calibration matrices of object data set (16 MB) for visualization of predictions: unzip it and put `'training'` and `'testing'` sub-folders into `'data/KITTI/calib'` 43 | * Left color images of object data set (12 GB) for visualization of predictions: unzip it and put `'training'` and `'testing'` sub-folders into `'data/KITTI/image'` 44 | 45 | 2. Crop point cloud data for training and validation. Point clouds outside the image coordinates are removed. Modify data path in `preproc/crop.py` and run it to generate cropped data. Note that cropped point cloud data will overwrite raw point cloud data. 46 | 47 | 3. Download the train/val split protocal [here](https://xiaozhichen.github.io/files/mv3d/imagesets.tar.gz) and untar it into `'data/KITTI'`. Modify data path in `'preproc\split.py'` and run it to generate train/val folder which has the following structure: 48 | ```plain 49 | └── data 50 | ├── KITTI 51 | └── MD_KITTI 52 |         ├── training  <-- training data 53 |         | ├── image_2 54 |         | ├── label_2 55 |         | └── velodyne 56 |         └── validation <--- evaluation data 57 |         ├── image_2 58 |         ├── label_2 59 |         └── velodyne 60 | ``` 61 | 62 | 4. Update the dataset directory in `config.py` and `eval/KITTI/launch_test.sh` 63 | 64 | 65 | # Train 66 | 1. Specify the GPUs to use in `config.py`. Currently, the code only supports single GPU. 67 | 2. run `train.sh` with desired hyper-parameters to start training: 68 | ```bash 69 | $ bash train.sh 70 | ``` 71 | Note that more hyper-parameters can be specified in this bash file or in the python file directly. 72 | Another group of hyper-parameters can be found in the [repo](https://github.com/qianguih/voxelnet). 73 | 74 | During training, training statistics are recorded in `log/default`, which can be monitored by TensorboardX using command `tensorboard --logdir=path/to/log/default`. Models are saved in `saved/default`. 75 | 76 | Intermediate validation results will be dumped into the folder `preds/XXX/data` with `XXX` as the epoch number. Metrics will be calculated (by calling `eval/KITTI/launch_test.sh`) and saved in `predictions/XXX/log`. 77 | 78 | If the `--vis` flag is set to be `True`, visualizations of intermediate results will be dumped in the folder `preds/XXX/vis`. 79 | 80 | 3. When the training is done, run `parse.sh` to generate the learning curve. 81 | ```bash 82 | $ bash parse.sh 83 | ``` 84 | 85 | 4. There is a pre-trained model for car in `save_model/pre_trained_car`. 86 | 87 | 88 | # Evaluate 89 | 1. Run `test.sh` to produce final predictions on the validation set after training is done. Change `--tag` flag to `pre_trained_car` will test for the pre-trained model (to be done). 90 | ```bash 91 | $ bash test.sh 92 | ``` 93 | Note taht results will be dumped into `preds/data`. Set the `--vis` flag to True to dump visualizations into `preds/vis`. 94 | 95 | 2. Run the following command to measure the quantitative performance of predictions: 96 | ```bash 97 | $ ./eval/KITTI/evaluate_object_3d_offline ./data/MD_KITTI/validation/label_2 ./preds 98 | ``` 99 | 100 | 101 | # TODO 102 | - [ ] Support multiple GPUs 103 | - [ ] reproduce results for `Car`, `Pedestrian` and `Cyclist` 104 | 105 | 106 | -------------------------------------------------------------------------------- /utils/box_overlaps.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Sergey Karayev 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | from cython.parallel import prange, parallel 11 | 12 | 13 | DTYPE = np.float32 14 | ctypedef float DTYPE_t 15 | 16 | 17 | def bbox_overlaps( 18 | np.ndarray[DTYPE_t, ndim=2] boxes, 19 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 20 | """ 21 | Parameters 22 | ---------- 23 | boxes: (N, 4) ndarray of float 24 | query_boxes: (K, 4) ndarray of float 25 | Returns 26 | ------- 27 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 28 | """ 29 | cdef unsigned int N = boxes.shape[0] 30 | cdef unsigned int K = query_boxes.shape[0] 31 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) 32 | cdef DTYPE_t iw, ih, box_area 33 | cdef DTYPE_t ua 34 | cdef unsigned int k, n 35 | for k in range(K): 36 | box_area = ( 37 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 38 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 39 | ) 40 | for n in range(N): 41 | iw = ( 42 | min(boxes[n, 2], query_boxes[k, 2]) - 43 | max(boxes[n, 0], query_boxes[k, 0]) + 1 44 | ) 45 | if iw > 0: 46 | ih = ( 47 | min(boxes[n, 3], query_boxes[k, 3]) - 48 | max(boxes[n, 1], query_boxes[k, 1]) + 1 49 | ) 50 | if ih > 0: 51 | ua = float( 52 | (boxes[n, 2] - boxes[n, 0] + 1) * 53 | (boxes[n, 3] - boxes[n, 1] + 1) + 54 | box_area - iw * ih 55 | ) 56 | overlaps[n, k] = iw * ih / ua 57 | return overlaps 58 | 59 | def bbox_intersections( 60 | np.ndarray[DTYPE_t, ndim=2] boxes, 61 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 62 | """ 63 | For each query box compute the intersection ratio covered by boxes 64 | ---------- 65 | Parameters 66 | ---------- 67 | boxes: (N, 4) ndarray of float 68 | query_boxes: (K, 4) ndarray of float 69 | Returns 70 | ------- 71 | overlaps: (N, K) ndarray of intersec between boxes and query_boxes 72 | """ 73 | cdef unsigned int N = boxes.shape[0] 74 | cdef unsigned int K = query_boxes.shape[0] 75 | cdef np.ndarray[DTYPE_t, ndim=2] intersec = np.zeros((N, K), dtype=DTYPE) 76 | cdef DTYPE_t iw, ih, box_area 77 | cdef DTYPE_t ua 78 | cdef unsigned int k, n 79 | for k in range(K): 80 | box_area = ( 81 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 82 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 83 | ) 84 | for n in range(N): 85 | iw = ( 86 | min(boxes[n, 2], query_boxes[k, 2]) - 87 | max(boxes[n, 0], query_boxes[k, 0]) + 1 88 | ) 89 | if iw > 0: 90 | ih = ( 91 | min(boxes[n, 3], query_boxes[k, 3]) - 92 | max(boxes[n, 1], query_boxes[k, 1]) + 1 93 | ) 94 | if ih > 0: 95 | intersec[n, k] = iw * ih / box_area 96 | return intersec 97 | 98 | # Compute bounding box voting 99 | def box_vote( 100 | np.ndarray[float, ndim=2] dets_NMS, 101 | np.ndarray[float, ndim=2] dets_all): 102 | cdef np.ndarray[float, ndim=2] dets_voted = np.zeros((dets_NMS.shape[0], dets_NMS.shape[1]), dtype=np.float32) 103 | cdef unsigned int N = dets_NMS.shape[0] 104 | cdef unsigned int M = dets_all.shape[0] 105 | 106 | cdef np.ndarray[float, ndim=1] det 107 | cdef np.ndarray[float, ndim=1] acc_box 108 | cdef float acc_score 109 | 110 | cdef np.ndarray[float, ndim=1] det2 111 | cdef float bi0, bi1, bit2, bi3 112 | cdef float iw, ih, ua 113 | 114 | cdef float thresh=0.5 115 | 116 | for i in range(N): 117 | det = dets_NMS[i, :] 118 | acc_box = np.zeros((4), dtype=np.float32) 119 | acc_score = 0.0 120 | 121 | for m in range(M): 122 | det2 = dets_all[m, :] 123 | 124 | bi0 = max(det[0], det2[0]) 125 | bi1 = max(det[1], det2[1]) 126 | bi2 = min(det[2], det2[2]) 127 | bi3 = min(det[3], det2[3]) 128 | 129 | iw = bi2 - bi0 + 1 130 | ih = bi3 - bi1 + 1 131 | 132 | if not (iw > 0 and ih > 0): 133 | continue 134 | 135 | ua = (det[2] - det[0] + 1) * (det[3] - det[1] + 1) + (det2[2] - det2[0] + 1) * (det2[3] - det2[1] + 1) - iw * ih 136 | ov = iw * ih / ua 137 | 138 | if (ov < thresh): 139 | continue 140 | 141 | acc_box += det2[4] * det2[0:4] 142 | acc_score += det2[4] 143 | 144 | dets_voted[i][0:4] = acc_box / acc_score 145 | dets_voted[i][4] = det[4] # Keep the original score 146 | 147 | return dets_voted 148 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:UTF-8 -*- 3 | 4 | 5 | """ 6 | VoxelNet config system. 7 | """ 8 | 9 | import os.path as osp 10 | from easydict import EasyDict as edict 11 | import math 12 | 13 | __C = edict() 14 | # Consumers can get config by: import config as cfg 15 | cfg = __C 16 | 17 | # For dataset dir 18 | __C.DATA_DIR = './data/MD_KITTI' 19 | __C.CALIB_DIR = './data/KITTI/calib/training/calib' 20 | 21 | 22 | # For gpu allocation 23 | __C.GPU_AVAILABLE = '0' 24 | __C.GPU_USE_COUNT = len(__C.GPU_AVAILABLE.split(',')) 25 | __C.GPU_MEMORY_FRACTION = 1 26 | 27 | # Selected object 28 | __C.DETECT_OBJ = 'Car' # Pedestrian/Cyclist 29 | if __C.DETECT_OBJ == 'Car': 30 | __C.Z_MIN = -3 31 | __C.Z_MAX = 1 32 | __C.Y_MIN = -40 33 | __C.Y_MAX = 40 34 | __C.X_MIN = 0 35 | __C.X_MAX = 70.4 36 | __C.VOXEL_Z_SIZE = 0.4 37 | __C.VOXEL_Y_SIZE = 0.2 38 | __C.VOXEL_X_SIZE = 0.2 39 | __C.VOXEL_POINT_COUNT = 35 40 | __C.INPUT_DEPTH = int((__C.Z_MAX - __C.Z_MIN) / __C.VOXEL_Z_SIZE) 41 | __C.INPUT_HEIGHT = int((__C.Y_MAX - __C.Y_MIN) / __C.VOXEL_Y_SIZE) 42 | __C.INPUT_WIDTH = int((__C.X_MAX - __C.X_MIN) / __C.VOXEL_X_SIZE) 43 | __C.FEATURE_RATIO = 2 44 | __C.FEATURE_WIDTH = int(__C.INPUT_WIDTH / __C.FEATURE_RATIO) 45 | __C.FEATURE_HEIGHT = int(__C.INPUT_HEIGHT / __C.FEATURE_RATIO) 46 | else: 47 | __C.Z_MIN = -3 48 | __C.Z_MAX = 1 49 | __C.Y_MIN = -20 50 | __C.Y_MAX = 20 51 | __C.X_MIN = 0 52 | __C.X_MAX = 48 53 | __C.VOXEL_Z_SIZE = 0.4 54 | __C.VOXEL_X_SIZE = 0.2 55 | __C.VOXEL_Y_SIZE = 0.2 56 | __C.VOXEL_POINT_COUNT = 45 57 | __C.INPUT_DEPTH = int((__C.Z_MAX - __C.Z_MIN) / __C.VOXEL_Z_SIZE) 58 | __C.INPUT_WIDTH = int((__C.X_MAX - __C.X_MIN) / __C.VOXEL_X_SIZE) 59 | __C.INPUT_HEIGHT = int((__C.Y_MAX - __C.Y_MIN) / __C.VOXEL_Y_SIZE) 60 | __C.FEATURE_RATIO = 2 61 | __C.FEATURE_WIDTH = int(__C.INPUT_WIDTH / __C.FEATURE_RATIO) 62 | __C.FEATURE_HEIGHT = int(__C.INPUT_HEIGHT / __C.FEATURE_RATIO) 63 | 64 | # Set the log image scale factor 65 | __C.BV_LOG_FACTOR = 4 66 | 67 | # for data set type 68 | __C.DATA_SETS_TYPE = 'kitti' 69 | 70 | # Root directory of project 71 | __C.CHECKPOINT_DIR = osp.join('checkpoint') 72 | __C.LOG_DIR = osp.join('log') 73 | 74 | # for data preprocess 75 | # sensors 76 | __C.VELODYNE_ANGULAR_RESOLUTION = 0.08 / 180 * math.pi 77 | __C.VELODYNE_VERTICAL_RESOLUTION = 0.4 / 180 * math.pi 78 | __C.VELODYNE_HEIGHT = 1.73 79 | 80 | # rgb 81 | if __C.DATA_SETS_TYPE == 'kitti': 82 | __C.IMAGE_WIDTH = 1242 83 | __C.IMAGE_HEIGHT = 375 84 | __C.IMAGE_CHANNEL = 3 85 | 86 | # top 87 | if __C.DATA_SETS_TYPE == 'kitti': 88 | __C.TOP_Y_MIN = -30 89 | __C.TOP_Y_MAX = +30 90 | __C.TOP_X_MIN = 0 91 | __C.TOP_X_MAX = 80 92 | __C.TOP_Z_MIN = -4.2 93 | __C.TOP_Z_MAX = 0.8 94 | 95 | __C.TOP_X_DIVISION = 0.1 96 | __C.TOP_Y_DIVISION = 0.1 97 | __C.TOP_Z_DIVISION = 0.2 98 | 99 | __C.TOP_WIDTH = (__C.TOP_X_MAX - __C.TOP_X_MIN) // __C.TOP_X_DIVISION 100 | __C.TOP_HEIGHT = (__C.TOP_Y_MAX - __C.TOP_Y_MIN) // __C.TOP_Y_DIVISION 101 | __C.TOP_CHANNEL = (__C.TOP_Z_MAX - __C.TOP_Z_MIN) // __C.TOP_Z_DIVISION 102 | 103 | # for 2d proposal to 3d proposal 104 | __C.PROPOSAL3D_Z_MIN = -2.3 # -2.52 105 | __C.PROPOSAL3D_Z_MAX = 1.5 # -1.02 106 | 107 | # for RPN basenet choose 108 | __C.USE_VGG_AS_RPN = 0 109 | __C.USE_RESNET_AS_RPN = 0 110 | __C.USE_RESNEXT_AS_RPN = 0 111 | 112 | # for camera and lidar coordination convert 113 | if __C.DATA_SETS_TYPE == 'kitti': 114 | # cal mean from train set 115 | __C.MATRIX_P2 = ([[719.787081, 0., 608.463003, 44.9538775], 116 | [0., 719.787081, 174.545111, 0.1066855], 117 | [0., 0., 1., 3.0106472e-03], 118 | [0., 0., 0., 0]]) 119 | 120 | # Cal mean from train set 121 | __C.MATRIX_T_VELO_2_CAM = ([ 122 | [7.49916597e-03, -9.99971248e-01, -8.65110297e-04, -6.71807577e-03], 123 | [1.18652889e-02, 9.54520517e-04, -9.99910318e-01, -7.33152811e-02], 124 | [9.99882833e-01, 7.49141178e-03, 1.18719929e-02, -2.78557062e-01], 125 | [0, 0, 0, 1] 126 | ]) 127 | # Cal mean from train set 128 | __C.MATRIX_R_RECT_0 = ([ 129 | [0.99992475, 0.00975976, -0.00734152, 0], 130 | [-0.0097913, 0.99994262, -0.00430371, 0], 131 | [0.00729911, 0.0043753, 0.99996319, 0], 132 | [0, 0, 0, 1] 133 | ]) 134 | 135 | # Faster-RCNN/SSD Hyper params 136 | if __C.DETECT_OBJ == 'Car': 137 | # Car anchor 138 | __C.ANCHOR_L = 3.9 139 | __C.ANCHOR_W = 1.6 140 | __C.ANCHOR_H = 1.56 141 | __C.ANCHOR_Z = -1.0 - cfg.ANCHOR_H/2 142 | __C.RPN_POS_IOU = 0.6 143 | __C.RPN_NEG_IOU = 0.45 144 | 145 | elif __C.DETECT_OBJ == 'Pedestrian': 146 | # Pedestrian anchor 147 | __C.ANCHOR_L = 0.8 148 | __C.ANCHOR_W = 0.6 149 | __C.ANCHOR_H = 1.73 150 | __C.ANCHOR_Z = -0.6 - cfg.ANCHOR_H/2 151 | __C.RPN_POS_IOU = 0.5 152 | __C.RPN_NEG_IOU = 0.35 153 | 154 | if __C.DETECT_OBJ == 'Cyclist': 155 | # Cyclist anchor 156 | __C.ANCHOR_L = 1.76 157 | __C.ANCHOR_W = 0.6 158 | __C.ANCHOR_H = 1.73 159 | __C.ANCHOR_Z = -0.6 - cfg.ANCHOR_H/2 160 | __C.RPN_POS_IOU = 0.5 161 | __C.RPN_NEG_IOU = 0.35 162 | 163 | # for rpn nms 164 | __C.RPN_NMS_POST_TOPK = 20 165 | __C.RPN_NMS_THRESH = 0.1 166 | __C.RPN_SCORE_THRESH = 0.96 167 | 168 | 169 | # utils 170 | __C.CORNER2CENTER_AVG = True # average version or max version 171 | 172 | if __name__ == '__main__': 173 | print('__C.ROOT_DIR = ' + __C.ROOT_DIR) 174 | print('__C.DATA_SETS_DIR = ' + __C.DATA_SETS_DIR) 175 | -------------------------------------------------------------------------------- /model/rpn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from config import cfg 6 | 7 | import pdb 8 | 9 | 10 | class ConvMD(nn.Module): 11 | def __init__(self, M, cin, cout, k, s, p, bn = True, activation = True): 12 | super(ConvMD, self).__init__() 13 | 14 | self.M = M # Dimension of input 15 | self.cin = cin 16 | self.cout = cout 17 | self.k = k 18 | self.s = s 19 | self.p = p 20 | self.bn = bn 21 | self.activation = activation 22 | 23 | if self.M == 2: # 2D input 24 | self.conv = nn.Conv2d(self.cin, self.cout, self.k, self.s, self.p) 25 | if self.bn: 26 | self.batch_norm = nn.BatchNorm2d(self.cout) 27 | elif self.M == 3: # 3D input 28 | self.conv = nn.Conv3d(self.cin, self.cout, self.k, self.s, self.p) 29 | if self.bn: 30 | self.batch_norm = nn.BatchNorm3d(self.cout) 31 | else: 32 | raise Exception('No such mode!') 33 | 34 | 35 | def forward(self, inputs): 36 | 37 | out = self.conv(inputs) 38 | 39 | if self.bn: 40 | out = self.batch_norm(out) 41 | 42 | if self.activation: 43 | return F.relu(out) 44 | else: 45 | return out 46 | 47 | 48 | class Deconv2D(nn.Module): 49 | def __init__(self, cin, cout, k, s, p, bn = True): 50 | super(Deconv2D, self).__init__() 51 | 52 | self.cin = cin 53 | self.cout = cout 54 | self.k = k 55 | self.s = s 56 | self.p = p 57 | self.bn = bn 58 | 59 | self.deconv = nn.ConvTranspose2d(self.cin, self.cout, self.k, self.s, self.p) 60 | 61 | if self.bn: 62 | self.batch_norm = nn.BatchNorm2d(self.cout) 63 | 64 | 65 | def forward(self, inputs): 66 | out = self.deconv(inputs) 67 | 68 | if self.bn == True: 69 | out = self.batch_norm(out) 70 | 71 | return F.relu(out) 72 | 73 | 74 | class MiddleAndRPN(nn.Module): 75 | def __init__(self, alpha = 1.5, beta = 1, sigma = 3, training = True, name = ''): 76 | super(MiddleAndRPN, self).__init__() 77 | 78 | self.middle_layer = nn.Sequential(ConvMD(3, 128, 64, 3, (2, 1, 1,), (1, 1, 1)), 79 | ConvMD(3, 64, 64, 3, (1, 1, 1), (0, 1, 1)), 80 | ConvMD(3, 64, 64, 3, (2, 1, 1), (1, 1, 1))) 81 | 82 | 83 | if cfg.DETECT_OBJ == 'Car': 84 | self.block1 = nn.Sequential(ConvMD(2, 128, 128, 3, (2, 2), (1, 1)), 85 | ConvMD(2, 128, 128, 3, (1, 1), (1, 1)), 86 | ConvMD(2, 128, 128, 3, (1, 1), (1, 1)), 87 | ConvMD(2, 128, 128, 3, (1, 1), (1, 1)), 88 | ConvMD(2, 128, 128, 3, (1, 1), (1, 1))) 89 | else: # Pedestrian/Cyclist 90 | self.block1 = nn.Sequential(ConvMD(2, 128, 128, 3, (1, 1), (1, 1)), 91 | ConvMD(2, 128, 128, 3, (1, 1), (1, 1)), 92 | ConvMD(2, 128, 128, 3, (1, 1), (1, 1)), 93 | ConvMD(2, 128, 128, 3, (1, 1), (1, 1)), 94 | ConvMD(2, 128, 128, 3, (1, 1), (1, 1))) 95 | 96 | self.deconv1 = Deconv2D(128, 256, 3, (1, 1), (1, 1)) 97 | 98 | self.block2 = nn.Sequential(ConvMD(2, 128, 128, 3, (2, 2), (1, 1)), 99 | ConvMD(2, 128, 128, 3, (1, 1), (1, 1)), 100 | ConvMD(2, 128, 128, 3, (1, 1), (1, 1)), 101 | ConvMD(2, 128, 128, 3, (1, 1), (1, 1)), 102 | ConvMD(2, 128, 128, 3, (1, 1), (1, 1)), 103 | ConvMD(2, 128, 128, 3, (1, 1), (1, 1))) 104 | 105 | self.deconv2 = Deconv2D(128, 256, 2, (2, 2), (0, 0)) 106 | 107 | self.block3 = nn.Sequential(ConvMD(2, 128, 256, 3, (2, 2), (1, 1)), 108 | ConvMD(2, 256, 256, 3, (1, 1), (1, 1)), 109 | ConvMD(2, 256, 256, 3, (1, 1), (1, 1)), 110 | ConvMD(2, 256, 256, 3, (1, 1), (1, 1)), 111 | ConvMD(2, 256, 256, 3, (1, 1), (1, 1)), 112 | ConvMD(2, 256, 256, 3, (1, 1), (1, 1))) 113 | 114 | self.deconv3 = Deconv2D(256, 256, 4, (4, 4), (0, 0)) 115 | 116 | self.prob_conv = ConvMD(2, 768, 2, 1, (1, 1), (0, 0), bn = False, activation = False) 117 | 118 | self.reg_conv = ConvMD(2, 768, 14, 1, (1, 1), (0, 0), bn = False, activation = False) 119 | 120 | self.output_shape = [cfg.FEATURE_HEIGHT, cfg.FEATURE_WIDTH] 121 | 122 | 123 | def forward(self, inputs): 124 | 125 | batch_size, DEPTH, HEIGHT, WIDTH, C = inputs.shape # [batch_size, 10, 400/200, 352/240, 128] 126 | 127 | inputs = inputs.permute(0, 4, 1, 2, 3) # (B, D, H, W, C) -> (B, C, D, H, W) 128 | 129 | temp_conv = self.middle_layer(inputs) # [batch, 64, 2, 400, 352] 130 | temp_conv = temp_conv.view(batch_size, -1, HEIGHT, WIDTH) # [batch, 128, 400, 352] 131 | 132 | temp_conv = self.block1(temp_conv) # [batch, 128, 200, 176] 133 | temp_deconv1 = self.deconv1(temp_conv) 134 | 135 | temp_conv = self.block2(temp_conv) # [batch, 128, 100, 88] 136 | temp_deconv2 = self.deconv2(temp_conv) 137 | 138 | temp_conv = self.block3(temp_conv) # [batch, 256, 50, 44] 139 | temp_deconv3 = self.deconv3(temp_conv) 140 | 141 | temp_conv = torch.cat([temp_deconv3, temp_deconv2, temp_deconv1], dim = 1) 142 | 143 | # Probability score map, [batch, 2, 200/100, 176/120] 144 | p_map = self.prob_conv(temp_conv) 145 | 146 | # Regression map, [batch, 14, 200/100, 176/120] 147 | r_map = self.reg_conv(temp_conv) 148 | 149 | return torch.sigmoid(p_map), r_map 150 | -------------------------------------------------------------------------------- /utils/data_aug.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:UTF-8 -*- 3 | 4 | # File Name : data_aug.py 5 | # Purpose : 6 | # Creation Date : 21-12-2017 7 | # Last Modified : Fri 19 Jan 2018 01:06:35 PM CST 8 | # Created By : Jeasine Ma [jeasinema[at]gmail[dot]com] 9 | 10 | import os 11 | from utils.utils import * 12 | from utils.preprocess import * 13 | 14 | import pdb 15 | 16 | 17 | def aug_data(tag, object_dir): 18 | np.random.seed() 19 | # rgb = cv2.imread(os.path.join(object_dir,'image_2', tag + '.png')) 20 | rgb = cv2.resize(cv2.imread(os.path.join(object_dir, 'image_2', tag + '.png')), (cfg.IMAGE_WIDTH, cfg.IMAGE_HEIGHT)) 21 | lidar = np.fromfile(os.path.join(object_dir, 'velodyne', tag + '.bin'), dtype = np.float32).reshape(-1, 4) 22 | label = np.array([line for line in open(os.path.join(object_dir, 'label_2', tag + '.txt'), 'r').readlines()]) # (N') 23 | 24 | cls = np.array([line.split()[0] for line in label]) # (N') 25 | gt_box3d = label_to_gt_box3d(np.array(label)[np.newaxis, :], cls = '', coordinate = 'camera')[0] # (N', 7); 7 means (x, y, z, h, w, l, r) 26 | 27 | choice = np.random.randint(0, 10) 28 | if choice >= 7: 29 | # Disable this augmention here. Current implementation will decrease the performances. 30 | lidar_center_gt_box3d = camera_to_lidar_box(gt_box3d) 31 | lidar_corner_gt_box3d = center_to_corner_box3d(lidar_center_gt_box3d, coordinate = 'lidar') 32 | for idx in range(len(lidar_corner_gt_box3d)): 33 | # TODO: precisely gather the point 34 | is_collision = True 35 | _count = 0 36 | while is_collision and _count < 100: 37 | t_rz = np.random.uniform(-np.pi / 10, np.pi / 10) 38 | t_x = np.random.normal() 39 | t_y = np.random.normal() 40 | t_z = np.random.normal() 41 | # Check collision 42 | tmp = box_transform(lidar_center_gt_box3d[[idx]], t_x, t_y, t_z, t_rz, 'lidar') 43 | is_collision = False 44 | for idy in range(idx): 45 | x1, y1, w1, l1, r1 = tmp[0][[0, 1, 4, 5, 6]] 46 | x2, y2, w2, l2, r2 = lidar_center_gt_box3d[idy][[0, 1, 4, 5, 6]] 47 | iou = cal_iou2d(np.array([x1, y1, w1, l1, r1], dtype = np.float32), 48 | np.array([x2, y2, w2, l2, r2], dtype = np.float32)) 49 | if iou > 0: 50 | is_collision = True 51 | _count += 1 52 | break 53 | if not is_collision: 54 | box_corner = lidar_corner_gt_box3d[idx] 55 | minx = np.min(box_corner[:, 0]) 56 | miny = np.min(box_corner[:, 1]) 57 | minz = np.min(box_corner[:, 2]) 58 | maxx = np.max(box_corner[:, 0]) 59 | maxy = np.max(box_corner[:, 1]) 60 | maxz = np.max(box_corner[:, 2]) 61 | bound_x = np.logical_and(lidar[:, 0] >= minx, lidar[:, 0] <= maxx) 62 | bound_y = np.logical_and(lidar[:, 1] >= miny, lidar[:, 1] <= maxy) 63 | bound_z = np.logical_and(lidar[:, 2] >= minz, lidar[:, 2] <= maxz) 64 | bound_box = np.logical_and(np.logical_and(bound_x, bound_y), bound_z) 65 | lidar[bound_box, 0:3] = point_transform(lidar[bound_box, 0:3], t_x, t_y, t_z, rz = t_rz) 66 | lidar_center_gt_box3d[idx] = box_transform(lidar_center_gt_box3d[[idx]], t_x, t_y, t_z, t_rz, 'lidar') 67 | 68 | gt_box3d = lidar_to_camera_box(lidar_center_gt_box3d) 69 | newtag = 'aug_{}_1_{}'.format(tag, np.random.randint(1, 1024)) 70 | elif choice < 7 and choice >= 4: 71 | # Global rotation 72 | angle = np.random.uniform(-np.pi / 4, np.pi / 4) 73 | lidar[:, 0:3] = point_transform(lidar[:, 0:3], 0, 0, 0, rz = angle) 74 | lidar_center_gt_box3d = camera_to_lidar_box(gt_box3d) 75 | lidar_center_gt_box3d = box_transform(lidar_center_gt_box3d, 0, 0, 0, r = angle, coordinate = 'lidar') 76 | gt_box3d = lidar_to_camera_box(lidar_center_gt_box3d) 77 | newtag = 'aug_{}_2_{:.4f}'.format(tag, angle).replace('.', '_') 78 | else: 79 | # Global scaling 80 | factor = np.random.uniform(0.95, 1.05) 81 | lidar[:, 0:3] = lidar[:, 0:3] * factor 82 | lidar_center_gt_box3d = camera_to_lidar_box(gt_box3d) 83 | lidar_center_gt_box3d[:, 0:6] = lidar_center_gt_box3d[:, 0:6] * factor 84 | gt_box3d = lidar_to_camera_box(lidar_center_gt_box3d) 85 | newtag = 'aug_{}_3_{:.4f}'.format(tag, factor).replace('.', '_') 86 | 87 | label = box3d_to_label(gt_box3d[np.newaxis, ...], cls[np.newaxis, ...], coordinate = 'camera')[0] # (N') 88 | voxel_dict = process_pointcloud(lidar) # Contains: feature_buffer, number_buffer, coordinate_buffer 89 | 90 | return newtag, rgb, lidar, voxel_dict, label 91 | 92 | 93 | # def worker(tag): 94 | # new_tag, rgb, lidar, voxel_dict, label = aug_data(tag) 95 | # output_path = os.path.join(object_dir, 'training_aug') 96 | # 97 | # cv2.imwrite(os.path.join(output_path, 'image_2', newtag + '.png'), rgb) 98 | # lidar.reshape(-1).tofile(os.path.join(output_path, 99 | # 'velodyne', newtag + '.bin')) 100 | # np.savez_compressed(os.path.join( 101 | # output_path, 'voxel' if cfg.DETECT_OBJ == 'Car' else 'voxel_ped', newtag), **voxel_dict) 102 | # with open(os.path.join(output_path, 'label_2', newtag + '.txt'), 'w+') as f: 103 | # for line in label: 104 | # f.write(line) 105 | # print(newtag) 106 | # 107 | # 108 | # def main(): 109 | # object_dir = './data/object' 110 | # fl = glob.glob(os.path.join(object_dir, 'training', 'calib', '*.txt')) 111 | # candidate = [f.split('/')[-1].split('.')[0] for f in fl] 112 | # tags = [] 113 | # for _ in range(args.aug_amount): 114 | # tags.append(candidate[np.random.randint(0, len(candidate))]) 115 | # print('generate {} tags'.format(len(tags))) 116 | # pool = mp.Pool(args.num_workers) 117 | # pool.map(worker, tags) 118 | # 119 | # 120 | # if __name__ == '__main__': 121 | # parser = argparse.ArgumentParser(description='') 122 | # parser.add_argument('-i', '--aug-amount', type=int, nargs='?', default=1000) 123 | # parser.add_argument('-n', '--num-workers', type=int, nargs='?', default=10) 124 | # args = parser.parse_args() 125 | # 126 | # main() 127 | -------------------------------------------------------------------------------- /model/model.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | from config import cfg 7 | from utils.utils import * 8 | from utils.colorize import colorize 9 | from utils.nms import nms 10 | from model.group_pointcloud import FeatureNet 11 | from model.rpn import MiddleAndRPN 12 | 13 | import pdb 14 | 15 | 16 | small_addon_for_BCE = 1e-6 17 | 18 | 19 | class RPN3D(nn.Module): 20 | def __init__(self, cls = 'Car', alpha = 1.5, beta = 1, sigma = 3): 21 | super(RPN3D, self).__init__() 22 | 23 | self.cls = cls 24 | self.alpha = alpha 25 | self.beta = beta 26 | self.sigma = sigma 27 | 28 | self.feature = FeatureNet() 29 | self.rpn = MiddleAndRPN() 30 | 31 | # Generate anchors 32 | self.anchors = cal_anchors() # [cfg.FEATURE_HEIGHT, cfg.FEATURE_WIDTH, 2, 7]; 2 means two rotations; 7 means (cx, cy, cz, h, w, l, r) 33 | 34 | self.rpn_output_shape = self.rpn.output_shape 35 | 36 | 37 | def forward(self, data): 38 | tag = data[0] 39 | label = data[1] 40 | vox_feature = data[2] 41 | vox_number = data[3] 42 | vox_coordinate = data[4] 43 | 44 | features = self.feature(vox_feature, vox_number, vox_coordinate) 45 | prob_output, delta_output = self.rpn(features) 46 | 47 | # Calculate ground-truth 48 | pos_equal_one, neg_equal_one, targets = cal_rpn_target( 49 | label, self.rpn_output_shape, self.anchors, cls = cfg.DETECT_OBJ, coordinate = 'lidar') 50 | pos_equal_one_for_reg = np.concatenate( 51 | [np.tile(pos_equal_one[..., [0]], 7), np.tile(pos_equal_one[..., [1]], 7)], axis = -1) 52 | pos_equal_one_sum = np.clip(np.sum(pos_equal_one, axis = (1, 2, 3)).reshape(-1, 1, 1, 1), a_min = 1, a_max = None) 53 | neg_equal_one_sum = np.clip(np.sum(neg_equal_one, axis = (1, 2, 3)).reshape(-1, 1, 1, 1), a_min = 1, a_max = None) 54 | 55 | # Move to gpu 56 | device = features.device 57 | pos_equal_one = torch.from_numpy(pos_equal_one).to(device).float() 58 | neg_equal_one = torch.from_numpy(neg_equal_one).to(device).float() 59 | targets = torch.from_numpy(targets).to(device).float() 60 | pos_equal_one_for_reg = torch.from_numpy(pos_equal_one_for_reg).to(device).float() 61 | pos_equal_one_sum = torch.from_numpy(pos_equal_one_sum).to(device).float() 62 | neg_equal_one_sum = torch.from_numpy(neg_equal_one_sum).to(device).float() 63 | 64 | # [batch, cfg.FEATURE_HEIGHT, cfg.FEATURE_WIDTH, 2/14] -> [batch, 2/14, cfg.FEATURE_HEIGHT, cfg.FEATURE_WIDTH] 65 | pos_equal_one = pos_equal_one.permute(0, 3, 1, 2) 66 | neg_equal_one = neg_equal_one.permute(0, 3, 1, 2) 67 | targets = targets.permute(0, 3, 1, 2) 68 | pos_equal_one_for_reg = pos_equal_one_for_reg.permute(0, 3, 1, 2) 69 | 70 | # Calculate loss 71 | cls_pos_loss = (-pos_equal_one * torch.log(prob_output + small_addon_for_BCE)) / pos_equal_one_sum 72 | cls_neg_loss = (-neg_equal_one * torch.log(1 - prob_output + small_addon_for_BCE)) / neg_equal_one_sum 73 | 74 | cls_loss = torch.sum(self.alpha * cls_pos_loss + self.beta * cls_neg_loss) 75 | cls_pos_loss_rec = torch.sum(cls_pos_loss) 76 | cls_neg_loss_rec = torch.sum(cls_neg_loss) 77 | 78 | reg_loss = smooth_l1(delta_output * pos_equal_one_for_reg, targets * pos_equal_one_for_reg, self.sigma) / pos_equal_one_sum 79 | reg_loss = torch.sum(reg_loss) 80 | 81 | loss = cls_loss + reg_loss 82 | 83 | return prob_output, delta_output, loss, cls_loss, reg_loss, cls_pos_loss_rec, cls_neg_loss_rec 84 | 85 | 86 | def predict(self, data, probs, deltas, summary = False, vis = False): 87 | ''' 88 | probs: (batch, 2, cfg.FEATURE_HEIGHT, cfg.FEATURE_WIDTH) 89 | deltas: (batch, 14, cfg.FEATURE_HEIGHT, cfg.FEATURE_WIDTH) 90 | ''' 91 | tag = data[0] 92 | label = data[1] 93 | vox_feature = data[2] 94 | vox_number = data[3] 95 | vox_coordinate = data[4] 96 | img = data[5] 97 | lidar = data[6] 98 | 99 | batch_size, _, _, _ = probs.shape 100 | device = probs.device 101 | 102 | batch_gt_boxes3d = None 103 | if summary or vis: 104 | batch_gt_boxes3d = label_to_gt_box3d(label, cls = self.cls, coordinate = 'lidar') 105 | 106 | # Move to cpu and convert to numpy array 107 | probs = probs.cpu().detach().numpy() 108 | deltas = deltas.cpu().detach().numpy() 109 | 110 | # BOTTLENECK 111 | batch_boxes3d = delta_to_boxes3d(deltas, self.anchors, coordinate = 'lidar') 112 | batch_boxes2d = batch_boxes3d[:, :, [0, 1, 4, 5, 6]] 113 | batch_probs = probs.reshape((batch_size, -1)) 114 | 115 | # NMS 116 | ret_box3d = [] 117 | ret_score = [] 118 | for batch_id in range(batch_size): 119 | # Remove box with low score 120 | ind = np.where(batch_probs[batch_id, :] >= cfg.RPN_SCORE_THRESH)[0] 121 | tmp_boxes3d = batch_boxes3d[batch_id, ind, ...] 122 | tmp_boxes2d = batch_boxes2d[batch_id, ind, ...] 123 | tmp_scores = batch_probs[batch_id, ind] 124 | 125 | # TODO: if possible, use rotate NMS 126 | boxes2d = corner_to_standup_box2d(center_to_corner_box2d(tmp_boxes2d, coordinate = 'lidar')) 127 | 128 | # 2D box index after nms 129 | ind, cnt = nms(torch.from_numpy(boxes2d).to(device), torch.from_numpy(tmp_scores).to(device), 130 | cfg.RPN_NMS_THRESH, cfg.RPN_NMS_POST_TOPK) 131 | ind = ind[:cnt].cpu().detach().numpy() 132 | 133 | tmp_boxes3d = tmp_boxes3d[ind, ...] 134 | tmp_scores = tmp_scores[ind] 135 | ret_box3d.append(tmp_boxes3d) 136 | ret_score.append(tmp_scores) 137 | 138 | ret_box3d_score = [] 139 | for boxes3d, scores in zip(ret_box3d, ret_score): 140 | ret_box3d_score.append(np.concatenate([np.tile(self.cls, len(boxes3d))[:, np.newaxis], 141 | boxes3d, scores[:, np.newaxis]], axis = -1)) 142 | 143 | if summary: 144 | # Only summry the first one in a batch 145 | cur_tag = tag[0] 146 | P, Tr, R = load_calib(os.path.join(cfg.CALIB_DIR, cur_tag + '.txt')) 147 | 148 | front_image = draw_lidar_box3d_on_image(img[0], ret_box3d[0], ret_score[0], 149 | batch_gt_boxes3d[0], P2 = P, T_VELO_2_CAM = Tr, R_RECT_0 = R) 150 | 151 | bird_view = lidar_to_bird_view_img(lidar[0], factor = cfg.BV_LOG_FACTOR) 152 | 153 | bird_view = draw_lidar_box3d_on_birdview(bird_view, ret_box3d[0], ret_score[0], batch_gt_boxes3d[0], 154 | factor = cfg.BV_LOG_FACTOR, P2 = P, T_VELO_2_CAM = Tr, R_RECT_0 = R) 155 | 156 | heatmap = colorize(probs[0, ...], cfg.BV_LOG_FACTOR) 157 | 158 | ret_summary = [['predict/front_view_rgb', front_image[np.newaxis, ...]], # [None, cfg.IMAGE_HEIGHT, cfg.IMAGE_WIDTH, 3] 159 | # [None, cfg.BV_LOG_FACTOR * cfg.INPUT_HEIGHT, cfg.BV_LOG_FACTOR * cfg.INPUT_WIDTH, 3] 160 | ['predict/bird_view_lidar', bird_view[np.newaxis, ...]], 161 | # [None, cfg.BV_LOG_FACTOR * cfg.FEATURE_HEIGHT, cfg.BV_LOG_FACTOR * cfg.FEATURE_WIDTH, 3] 162 | ['predict/bird_view_heatmap', heatmap[np.newaxis, ...]]] 163 | 164 | return tag, ret_box3d_score, ret_summary 165 | 166 | if vis: 167 | front_images, bird_views, heatmaps = [], [], [] 168 | for i in range(len(img)): 169 | cur_tag = tag[i] 170 | P, Tr, R = load_calib(os.path.join(cfg.CALIB_DIR, cur_tag + '.txt')) 171 | 172 | front_image = draw_lidar_box3d_on_image(img[i], ret_box3d[i], ret_score[i], 173 | batch_gt_boxes3d[i], P2 = P, T_VELO_2_CAM = Tr, R_RECT_0 = R) 174 | 175 | bird_view = lidar_to_bird_view_img(lidar[i], factor = cfg.BV_LOG_FACTOR) 176 | 177 | bird_view = draw_lidar_box3d_on_birdview(bird_view, ret_box3d[i], ret_score[i], batch_gt_boxes3d[i], 178 | factor = cfg.BV_LOG_FACTOR, P2 = P, T_VELO_2_CAM = Tr, R_RECT_0 = R) 179 | 180 | heatmap = colorize(probs[i, ...], cfg.BV_LOG_FACTOR) 181 | 182 | front_images.append(front_image) 183 | bird_views.append(bird_view) 184 | heatmaps.append(heatmap) 185 | 186 | return tag, ret_box3d_score, front_images, bird_views, heatmaps 187 | 188 | return tag, ret_box3d_score 189 | 190 | 191 | def smooth_l1(deltas, targets, sigma = 3.0): 192 | # Reference: https://mohitjainweb.files.wordpress.com/2018/03/smoothl1loss.pdf 193 | sigma2 = sigma * sigma 194 | diffs = deltas - targets 195 | smooth_l1_signs = torch.lt(torch.abs(diffs), 1.0 / sigma2).float() 196 | 197 | smooth_l1_option1 = torch.mul(diffs, diffs) * 0.5 * sigma2 198 | smooth_l1_option2 = torch.abs(diffs) - 0.5 / sigma2 199 | smooth_l1_add = torch.mul(smooth_l1_option1, smooth_l1_signs) + torch.mul(smooth_l1_option2, 1 - smooth_l1_signs) 200 | smooth_l1 = smooth_l1_add 201 | 202 | return smooth_l1 -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import time 4 | import shutil 5 | import argparse 6 | 7 | import cv2 8 | cv2.setNumThreads(0) 9 | 10 | import torch 11 | import torch.nn as nn 12 | import torch.optim as optim 13 | from torch.utils.data import DataLoader as DataLoader 14 | from torch.nn.utils import clip_grad_norm_ 15 | 16 | from tensorboardX import SummaryWriter 17 | 18 | from config import cfg 19 | from utils.utils import box3d_to_label 20 | from model.model import RPN3D 21 | from loader.kitti import KITTI as Dataset 22 | from loader.kitti import collate_fn 23 | 24 | import pdb 25 | 26 | 27 | parser = argparse.ArgumentParser(description = 'training') 28 | 29 | parser.add_argument('--lr', type = float, default = 0.01, help = 'initial learning rate') 30 | parser.add_argument('--alpha', type = float, default = 1.5, help = 'alpha in loss function') 31 | parser.add_argument('--beta', type = float, default = 1, help = 'beta in loss function') 32 | 33 | parser.add_argument('--max_epoch', type = int, default = 1, help = 'max epoch') 34 | parser.add_argument('--batch_size', type = int, default = 1, help = 'batch size') 35 | parser.add_argument('--workers', type = int, default = 4) 36 | 37 | parser.add_argument('--summary_interval', type = int, default = 100, help = 'iter interval for training summary') 38 | parser.add_argument('--summary_val_interval', type = int, default = 200, help = 'iter interval for val summary') 39 | parser.add_argument('--val_epoch', type = int, default = 10, help = 'epoch interval for dump val data') 40 | 41 | parser.add_argument('--log_root', type = str, default = 'log') 42 | parser.add_argument('--log_name', type = str, default = 'train.txt') 43 | parser.add_argument('--tag', type = str, default = 'default', help = 'log tag') 44 | 45 | parser.add_argument('--print_freq', default = 20, type = int, help = 'print frequency') 46 | 47 | parser.add_argument('--resumed_model', type = str, default = '', help = 'if specified, load the specified model') 48 | parser.add_argument('--saved_model', type = str, default = 'kitti_{}.pth.tar') 49 | 50 | # For test data 51 | parser.add_argument('--output_path', type = str, default = './preds', help = 'results output dir') 52 | parser.add_argument('--vis', type = bool, default = True, help = 'set to True if dumping visualization') 53 | 54 | args = parser.parse_args() 55 | 56 | 57 | def run(): 58 | os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(str(x) for x in cfg.GPU_AVAILABLE) 59 | 60 | start_epoch = 0 61 | global_counter = 0 62 | min_loss = sys.float_info.max 63 | 64 | # Build data loader 65 | train_dataset = Dataset(os.path.join(cfg.DATA_DIR, 'training'), shuffle = True, aug = True, is_testset = False) 66 | train_dataloader = DataLoader(train_dataset, batch_size = args.batch_size, shuffle = True, collate_fn = collate_fn, 67 | num_workers = args.workers, pin_memory = False) 68 | 69 | val_dataset = Dataset(os.path.join(cfg.DATA_DIR, 'validation'), shuffle = False, aug = False, is_testset = False) 70 | val_dataloader = DataLoader(val_dataset, batch_size = args.batch_size, shuffle = False, collate_fn = collate_fn, 71 | num_workers = args.workers, pin_memory = False) 72 | val_dataloader_iter = iter(val_dataloader) 73 | 74 | # Build model 75 | model = RPN3D(cfg.DETECT_OBJ, args.alpha, args.beta) 76 | 77 | # Resume model if necessary 78 | if args.resumed_model: 79 | model_file = os.path.join(save_model_dir, args.resumed_model) 80 | if os.path.isfile(model_file): 81 | checkpoint = torch.load(model_file) 82 | start_epoch = checkpoint['epoch'] 83 | global_counter = checkpoint['global_counter'] 84 | min_loss = checkpoint['min_loss'] 85 | model.load_state_dict(checkpoint['state_dict']) 86 | print(("=> Loaded checkpoint '{}' (epoch {}, global_counter {})".format( 87 | args.resumed_model, checkpoint['epoch'], checkpoint['global_counter']))) 88 | else: 89 | print(("=> No checkpoint found at '{}'".format(args.resumed_model))) 90 | 91 | model = nn.DataParallel(model).cuda() 92 | 93 | # Optimization scheme 94 | optimizer = optim.Adam(model.parameters(), lr = args.lr) 95 | 96 | lr_sched = optim.lr_scheduler.MultiStepLR(optimizer, [150]) 97 | 98 | # Init file log 99 | log = open(os.path.join(args.log_root, args.log_name), 'a') 100 | 101 | # Init TensorBoardX writer 102 | summary_writer = SummaryWriter(log_dir) 103 | 104 | # train and validate 105 | tot_epoch = start_epoch 106 | for epoch in range(start_epoch, args.max_epoch): 107 | # Learning rate scheme 108 | lr_sched.step() 109 | 110 | counter = 0 111 | batch_time = time.time() 112 | 113 | tot_val_loss = 0 114 | tot_val_times = 0 115 | 116 | for (i, data) in enumerate(train_dataloader): 117 | 118 | model.train(True) # Training mode 119 | 120 | counter += 1 121 | global_counter += 1 122 | 123 | start_time = time.time() 124 | 125 | # Forward pass for training 126 | _, _, loss, cls_loss, reg_loss, cls_pos_loss_rec, cls_neg_loss_rec = model(data) 127 | 128 | forward_time = time.time() - start_time 129 | 130 | loss.backward() 131 | 132 | # Clip gradient 133 | clip_grad_norm_(model.parameters(), 5) 134 | 135 | optimizer.step() 136 | optimizer.zero_grad() 137 | 138 | batch_time = time.time() - batch_time 139 | 140 | if counter % args.print_freq == 0: 141 | # Print training info 142 | info = 'Train: {} @ epoch:{}/{} loss: {:.4f} reg_loss: {:.4f} cls_loss: {:.4f} cls_pos_loss: {:.4f} ' \ 143 | 'cls_neg_loss: {:.4f} forward time: {:.4f} batch time: {:.4f}'.format( 144 | counter, epoch + 1, args.max_epoch, loss.item(), reg_loss.item(), cls_loss.item(), cls_pos_loss_rec.item(), 145 | cls_neg_loss_rec.item(), forward_time, batch_time) 146 | info = '{}\t'.format(time.asctime(time.localtime())) + info 147 | print(info) 148 | 149 | # Write training info to log 150 | log.write(info + '\n') 151 | log.flush() 152 | 153 | # Summarize training info 154 | if counter % args.summary_interval == 0: 155 | print("summary_interval now") 156 | summary_writer.add_scalars(str(epoch + 1), {'train/loss' : loss.item(), 157 | 'train/reg_loss' : reg_loss.item(), 158 | 'train/cls_loss' : cls_loss.item(), 159 | 'train/cls_pos_loss' : cls_pos_loss_rec.item(), 160 | 'train/cls_neg_loss' : cls_neg_loss_rec.item()}, global_counter) 161 | 162 | # Summarize validation info 163 | if counter % args.summary_val_interval == 0: 164 | print('summary_val_interval now') 165 | 166 | with torch.no_grad(): 167 | model.train(False) # Validation mode 168 | 169 | val_data = next(val_dataloader_iter) # Sample one batch 170 | 171 | # Forward pass for validation and prediction 172 | probs, deltas, val_loss, val_cls_loss, val_reg_loss, cls_pos_loss_rec, cls_neg_loss_rec = model(val_data) 173 | 174 | summary_writer.add_scalars(str(epoch + 1), {'validate/loss': loss.item(), 175 | 'validate/reg_loss': reg_loss.item(), 176 | 'validate/cls_loss': cls_loss.item(), 177 | 'validate/cls_pos_loss': cls_pos_loss_rec.item(), 178 | 'validate/cls_neg_loss': cls_neg_loss_rec.item()}, global_counter) 179 | 180 | try: 181 | # Prediction 182 | tags, ret_box3d_scores, ret_summary = model.module.predict(val_data, probs, deltas, summary = True) 183 | 184 | for (tag, img) in ret_summary: 185 | img = img[0].transpose(2, 0, 1) 186 | summary_writer.add_image(tag, img, global_counter) 187 | except: 188 | raise Exception('Prediction skipped due to an error!') 189 | 190 | # Add sampled data loss 191 | tot_val_loss += val_loss.item() 192 | tot_val_times += 1 193 | 194 | batch_time = time.time() 195 | 196 | # Save the best model 197 | avg_val_loss = tot_val_loss / float(tot_val_times) 198 | is_best = avg_val_loss < min_loss 199 | min_loss = min(avg_val_loss, min_loss) 200 | save_checkpoint({'epoch': epoch + 1, 'global_counter': global_counter, 'state_dict': model.module.state_dict(), 'min_loss': min_loss}, 201 | is_best, args.saved_model.format(cfg.DETECT_OBJ)) 202 | 203 | # Dump test data every 10 epochs 204 | if (epoch + 1) % args.val_epoch == 0: # Time consuming 205 | # Create output folder 206 | os.makedirs(os.path.join(args.output_path, str(epoch + 1)), exist_ok = True) 207 | os.makedirs(os.path.join(args.output_path, str(epoch + 1), 'data'), exist_ok = True) 208 | os.makedirs(os.path.join(args.output_path, str(epoch + 1), 'log'), exist_ok=True) 209 | 210 | if args.vis: 211 | os.makedirs(os.path.join(args.output_path, str(epoch + 1), 'vis'), exist_ok = True) 212 | 213 | model.train(False) # Validation mode 214 | 215 | with torch.no_grad(): 216 | for (i, val_data) in enumerate(val_dataloader): 217 | 218 | # Forward pass for validation and prediction 219 | probs, deltas, val_loss, val_cls_loss, val_reg_loss, cls_pos_loss_rec, cls_neg_loss_rec = model(val_data) 220 | 221 | front_images, bird_views, heatmaps = None, None, None 222 | if args.vis: 223 | tags, ret_box3d_scores, front_images, bird_views, heatmaps = \ 224 | model.module.predict(val_data, probs, deltas, summary = False, vis = True) 225 | else: 226 | tags, ret_box3d_scores = model.module.predict(val_data, probs, deltas, summary = False, vis = False) 227 | 228 | # tags: (N) 229 | # ret_box3d_scores: (N, N'); (class, x, y, z, h, w, l, rz, score) 230 | for tag, score in zip(tags, ret_box3d_scores): 231 | output_path = os.path.join(args.output_path, str(epoch + 1), 'data', tag + '.txt') 232 | with open(output_path, 'w+') as f: 233 | labels = box3d_to_label([score[:, 1:8]], [score[:, 0]], [score[:, -1]], coordinate = 'lidar')[0] 234 | for line in labels: 235 | f.write(line) 236 | print('Write out {} objects to {}'.format(len(labels), tag)) 237 | 238 | # Dump visualizations 239 | if args.vis: 240 | for tag, front_image, bird_view, heatmap in zip(tags, front_images, bird_views, heatmaps): 241 | front_img_path = os.path.join(args.output_path, str(epoch + 1), 'vis', tag + '_front.jpg') 242 | bird_view_path = os.path.join(args.output_path, str(epoch + 1), 'vis', tag + '_bv.jpg') 243 | heatmap_path = os.path.join(args.output_path, str(epoch + 1), 'vis', tag + '_heatmap.jpg') 244 | cv2.imwrite(front_img_path, front_image) 245 | cv2.imwrite(bird_view_path, bird_view) 246 | cv2.imwrite(heatmap_path, heatmap) 247 | 248 | # Run evaluation code 249 | cmd_1 = './eval/KITTI/launch_test.sh' 250 | cmd_2 = os.path.join(args.output_path, str(epoch + 1)) 251 | cmd_3 = os.path.join(args.output_path, str(epoch + 1), 'log') 252 | os.system(' '.join([cmd_1, cmd_2, cmd_3])) 253 | 254 | tot_epoch = epoch + 1 255 | 256 | print('Train done with total epoch:{}, iter:{}'.format(tot_epoch, global_counter)) 257 | 258 | # Close TensorBoardX writer 259 | summary_writer.close() 260 | 261 | 262 | def save_checkpoint(state, is_best, filename = 'to_be_determined.pth.tar'): 263 | torch.save(state, '%s/%s' % (save_model_dir, filename)) 264 | if is_best: 265 | best_filename = filename.replace('.pth.tar', '_best.pth.tar') 266 | shutil.copyfile('%s/%s' % (save_model_dir, filename), '%s/%s' % (save_model_dir, best_filename)) 267 | 268 | 269 | if __name__ == '__main__': 270 | dataset_dir = cfg.DATA_DIR 271 | train_dir = os.path.join(cfg.DATA_DIR, 'training') 272 | val_dir = os.path.join(cfg.DATA_DIR, 'validation') 273 | log_dir = os.path.join('./log', args.tag) 274 | save_model_dir = os.path.join('./saved', args.tag) 275 | os.makedirs(log_dir, exist_ok = True) 276 | os.makedirs(save_model_dir, exist_ok = True) 277 | 278 | run() 279 | 280 | 281 | 282 | -------------------------------------------------------------------------------- /eval/KITTI/evaluate_object_3d.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | 12 | #include 13 | #include 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | #include "mail.h" 21 | 22 | BOOST_GEOMETRY_REGISTER_C_ARRAY_CS(cs::cartesian) 23 | 24 | typedef boost::geometry::model::polygon > Polygon; 25 | 26 | 27 | using namespace std; 28 | 29 | /*======================================================================= 30 | STATIC EVALUATION PARAMETERS 31 | =======================================================================*/ 32 | 33 | // holds the number of test images on the server 34 | const int32_t N_TESTIMAGES = 7518; 35 | 36 | // easy, moderate and hard evaluation level 37 | enum DIFFICULTY{EASY=0, MODERATE=1, HARD=2}; 38 | 39 | // evaluation metrics: image, ground or 3D 40 | enum METRIC{IMAGE=0, GROUND=1, BOX3D=2}; 41 | 42 | // evaluation parameter 43 | const int32_t MIN_HEIGHT[3] = {40, 25, 25}; // minimum height for evaluated groundtruth/detections 44 | const int32_t MAX_OCCLUSION[3] = {0, 1, 2}; // maximum occlusion level of the groundtruth used for evaluation 45 | const double MAX_TRUNCATION[3] = {0.15, 0.3, 0.5}; // maximum truncation level of the groundtruth used for evaluation 46 | 47 | // evaluated object classes 48 | enum CLASSES{CAR=0, PEDESTRIAN=1, CYCLIST=2}; 49 | const int NUM_CLASS = 3; 50 | 51 | // parameters varying per class 52 | vector CLASS_NAMES; 53 | // the minimum overlap required for 2D evaluation on the image/ground plane and 3D evaluation 54 | const double MIN_OVERLAP[3][3] = {{0.7, 0.5, 0.5}, {0.5, 0.25, 0.25}, {0.5, 0.25, 0.25}}; 55 | 56 | // no. of recall steps that should be evaluated (discretized) 57 | const double N_SAMPLE_PTS = 41; 58 | 59 | 60 | // initialize class names 61 | void initGlobals () { 62 | CLASS_NAMES.push_back("car"); 63 | CLASS_NAMES.push_back("pedestrian"); 64 | CLASS_NAMES.push_back("cyclist"); 65 | } 66 | 67 | /*======================================================================= 68 | DATA TYPES FOR EVALUATION 69 | =======================================================================*/ 70 | 71 | // holding data needed for precision-recall and precision-aos 72 | struct tPrData { 73 | vector v; // detection score for computing score thresholds 74 | double similarity; // orientation similarity 75 | int32_t tp; // true positives 76 | int32_t fp; // false positives 77 | int32_t fn; // false negatives 78 | tPrData () : 79 | similarity(0), tp(0), fp(0), fn(0) {} 80 | }; 81 | 82 | // holding bounding boxes for ground truth and detections 83 | struct tBox { 84 | string type; // object type as car, pedestrian or cyclist,... 85 | double x1; // left corner 86 | double y1; // top corner 87 | double x2; // right corner 88 | double y2; // bottom corner 89 | double alpha; // image orientation 90 | tBox (string type, double x1,double y1,double x2,double y2,double alpha) : 91 | type(type),x1(x1),y1(y1),x2(x2),y2(y2),alpha(alpha) {} 92 | }; 93 | 94 | // holding ground truth data 95 | struct tGroundtruth { 96 | tBox box; // object type, box, orientation 97 | double truncation; // truncation 0..1 98 | int32_t occlusion; // occlusion 0,1,2 (non, partly, fully) 99 | double ry; 100 | double t1, t2, t3; 101 | double h, w, l; 102 | tGroundtruth () : 103 | box(tBox("invalild",-1,-1,-1,-1,-10)),truncation(-1),occlusion(-1) {} 104 | tGroundtruth (tBox box,double truncation,int32_t occlusion) : 105 | box(box),truncation(truncation),occlusion(occlusion) {} 106 | tGroundtruth (string type,double x1,double y1,double x2,double y2,double alpha,double truncation,int32_t occlusion) : 107 | box(tBox(type,x1,y1,x2,y2,alpha)),truncation(truncation),occlusion(occlusion) {} 108 | }; 109 | 110 | // holding detection data 111 | struct tDetection { 112 | tBox box; // object type, box, orientation 113 | double thresh; // detection score 114 | double ry; 115 | double t1, t2, t3; 116 | double h, w, l; 117 | tDetection (): 118 | box(tBox("invalid",-1,-1,-1,-1,-10)),thresh(-1000) {} 119 | tDetection (tBox box,double thresh) : 120 | box(box),thresh(thresh) {} 121 | tDetection (string type,double x1,double y1,double x2,double y2,double alpha,double thresh) : 122 | box(tBox(type,x1,y1,x2,y2,alpha)),thresh(thresh) {} 123 | }; 124 | 125 | 126 | /*======================================================================= 127 | FUNCTIONS TO LOAD DETECTION AND GROUND TRUTH DATA ONCE, SAVE RESULTS 128 | =======================================================================*/ 129 | vector indices; 130 | 131 | vector loadDetections(string file_name, bool &compute_aos, 132 | vector &eval_image, vector &eval_ground, 133 | vector &eval_3d, bool &success) { 134 | 135 | // holds all detections (ignored detections are indicated by an index vector 136 | vector detections; 137 | FILE *fp = fopen(file_name.c_str(),"r"); 138 | if (!fp) { 139 | success = false; 140 | return detections; 141 | } 142 | while (!feof(fp)) { 143 | tDetection d; 144 | double trash; 145 | char str[255]; 146 | if (fscanf(fp, "%s %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf", 147 | str, &trash, &trash, &d.box.alpha, &d.box.x1, &d.box.y1, 148 | &d.box.x2, &d.box.y2, &d.h, &d.w, &d.l, &d.t1, &d.t2, &d.t3, 149 | &d.ry, &d.thresh)==16) { 150 | 151 | // d.thresh = 1; 152 | d.box.type = str; 153 | detections.push_back(d); 154 | 155 | // orientation=-10 is invalid, AOS is not evaluated if at least one orientation is invalid 156 | if(d.box.alpha == -10) 157 | compute_aos = false; 158 | 159 | // a class is only evaluated if it is detected at least once 160 | for (int c = 0; c < NUM_CLASS; c++) { 161 | if (!strcasecmp(d.box.type.c_str(), CLASS_NAMES[c].c_str())) { 162 | if (!eval_image[c] && d.box.x1 >= 0) 163 | eval_image[c] = true; 164 | if (!eval_ground[c] && d.t1 != -1000) 165 | eval_ground[c] = true; 166 | if (!eval_3d[c] && d.t2 != -1000) 167 | eval_3d[c] = true; 168 | break; 169 | } 170 | } 171 | } 172 | } 173 | fclose(fp); 174 | success = true; 175 | return detections; 176 | } 177 | 178 | vector loadGroundtruth(string file_name,bool &success) { 179 | 180 | // holds all ground truth (ignored ground truth is indicated by an index vector 181 | vector groundtruth; 182 | FILE *fp = fopen(file_name.c_str(),"r"); 183 | if (!fp) { 184 | success = false; 185 | return groundtruth; 186 | } 187 | while (!feof(fp)) { 188 | tGroundtruth g; 189 | char str[255]; 190 | if (fscanf(fp, "%s %lf %d %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf", 191 | str, &g.truncation, &g.occlusion, &g.box.alpha, 192 | &g.box.x1, &g.box.y1, &g.box.x2, &g.box.y2, 193 | &g.h, &g.w, &g.l, &g.t1, 194 | &g.t2, &g.t3, &g.ry )==15) { 195 | g.box.type = str; 196 | groundtruth.push_back(g); 197 | } 198 | } 199 | fclose(fp); 200 | success = true; 201 | return groundtruth; 202 | } 203 | 204 | void saveStats (const vector &precision, const vector &aos, FILE *fp_det, FILE *fp_ori) { 205 | 206 | // save precision to file 207 | if(precision.empty()) 208 | return; 209 | for (int32_t i=0; i 269 | Polygon toPolygon(const T& g) { 270 | using namespace boost::numeric::ublas; 271 | using namespace boost::geometry; 272 | matrix mref(2, 2); 273 | mref(0, 0) = cos(g.ry); mref(0, 1) = sin(g.ry); 274 | mref(1, 0) = -sin(g.ry); mref(1, 1) = cos(g.ry); 275 | 276 | static int count = 0; 277 | matrix corners(2, 4); 278 | double data[] = {g.l / 2, g.l / 2, -g.l / 2, -g.l / 2, 279 | g.w / 2, -g.w / 2, -g.w / 2, g.w / 2}; 280 | std::copy(data, data + 8, corners.data().begin()); 281 | matrix gc = prod(mref, corners); 282 | for (int i = 0; i < 4; ++i) { 283 | gc(0, i) += g.t1; 284 | gc(1, i) += g.t3; 285 | } 286 | 287 | double points[][2] = {{gc(0, 0), gc(1, 0)},{gc(0, 1), gc(1, 1)},{gc(0, 2), gc(1, 2)},{gc(0, 3), gc(1, 3)},{gc(0, 0), gc(1, 0)}}; 288 | Polygon poly; 289 | append(poly, points); 290 | return poly; 291 | } 292 | 293 | // measure overlap between bird's eye view bounding boxes, parametrized by (ry, l, w, tx, tz) 294 | inline double groundBoxOverlap(tDetection d, tGroundtruth g, int32_t criterion = -1) { 295 | using namespace boost::geometry; 296 | Polygon gp = toPolygon(g); 297 | Polygon dp = toPolygon(d); 298 | 299 | std::vector in, un; 300 | intersection(gp, dp, in); 301 | union_(gp, dp, un); 302 | 303 | double inter_area = in.empty() ? 0 : area(in.front()); 304 | double union_area = area(un.front()); 305 | double o; 306 | if(criterion==-1) // union 307 | o = inter_area / union_area; 308 | else if(criterion==0) // bbox_a 309 | o = inter_area / area(dp); 310 | else if(criterion==1) // bbox_b 311 | o = inter_area / area(gp); 312 | 313 | return o; 314 | } 315 | 316 | // measure overlap between 3D bounding boxes, parametrized by (ry, h, w, l, tx, ty, tz) 317 | inline double box3DOverlap(tDetection d, tGroundtruth g, int32_t criterion = -1) { 318 | using namespace boost::geometry; 319 | Polygon gp = toPolygon(g); 320 | Polygon dp = toPolygon(d); 321 | 322 | std::vector in, un; 323 | intersection(gp, dp, in); 324 | union_(gp, dp, un); 325 | 326 | double ymax = min(d.t2, g.t2); 327 | double ymin = max(d.t2 - d.h, g.t2 - g.h); 328 | 329 | double inter_area = in.empty() ? 0 : area(in.front()); 330 | double inter_vol = inter_area * max(0.0, ymax - ymin); 331 | 332 | double det_vol = d.h * d.l * d.w; 333 | double gt_vol = g.h * g.l * g.w; 334 | 335 | double o; 336 | if(criterion==-1) // union 337 | o = inter_vol / (det_vol + gt_vol - inter_vol); 338 | else if(criterion==0) // bbox_a 339 | o = inter_vol / det_vol; 340 | else if(criterion==1) // bbox_b 341 | o = inter_vol / gt_vol; 342 | 343 | return o; 344 | } 345 | 346 | vector getThresholds(vector &v, double n_groundtruth){ 347 | 348 | // holds scores needed to compute N_SAMPLE_PTS recall values 349 | vector t; 350 | 351 | // sort scores in descending order 352 | // (highest score is assumed to give best/most confident detections) 353 | sort(v.begin(), v.end(), greater()); 354 | 355 | // get scores for linearly spaced recall 356 | double current_recall = 0; 357 | for(int32_t i=0; i >, const vector &det, vector &ignored_gt, vector &dc, vector &ignored_det, int32_t &n_gt, DIFFICULTY difficulty){ 382 | 383 | // extract ground truth bounding boxes for current evaluation class 384 | for(int32_t i=0;iMAX_OCCLUSION[difficulty] || gt[i].truncation>MAX_TRUNCATION[difficulty] || height >, 456 | const vector &det, const vector &dc, 457 | const vector &ignored_gt, const vector &ignored_det, 458 | bool compute_fp, double (*boxoverlap)(tDetection, tGroundtruth, int32_t), 459 | METRIC metric, bool compute_aos=false, double thresh=0, bool debug=false){ 460 | 461 | tPrData stat = tPrData(); 462 | const double NO_DETECTION = -10000000; 463 | vector delta; // holds angular difference for TPs (needed for AOS evaluation) 464 | vector assigned_detection; // holds wether a detection was assigned to a valid or ignored ground truth 465 | assigned_detection.assign(det.size(), false); 466 | vector ignored_threshold; 467 | ignored_threshold.assign(det.size(), false); // holds detections with a threshold lower than thresh if FP are computed 468 | 469 | // detections with a low score are ignored for computing precision (needs FP) 470 | if(compute_fp) 471 | for(int32_t i=0; i 0.5) (logical len(det)) 484 | =======================================================================*/ 485 | int32_t det_idx = -1; 486 | double valid_detection = NO_DETECTION; 487 | double max_overlap = 0; 488 | 489 | // search for a possible detection 490 | bool assigned_ignored_det = false; 491 | for(int32_t j=0; jMIN_OVERLAP[metric][current_class] && det[j].thresh>valid_detection){ 506 | det_idx = j; 507 | valid_detection = det[j].thresh; 508 | } 509 | 510 | // for computing pr curve values, the candidate with the greatest overlap is considered 511 | // if the greatest overlap is an ignored detection (min_height), the overlapping detection is used 512 | else if(compute_fp && overlap>MIN_OVERLAP[metric][current_class] && (overlap>max_overlap || assigned_ignored_det) && ignored_det[j]==0){ 513 | max_overlap = overlap; 514 | det_idx = j; 515 | valid_detection = 1; 516 | assigned_ignored_det = false; 517 | } 518 | else if(compute_fp && overlap>MIN_OVERLAP[metric][current_class] && valid_detection==NO_DETECTION && ignored_det[j]==1){ 519 | det_idx = j; 520 | valid_detection = 1; 521 | assigned_ignored_det = true; 522 | } 523 | } 524 | 525 | /*======================================================================= 526 | compute TP, FP and FN 527 | =======================================================================*/ 528 | 529 | // nothing was assigned to this valid ground truth 530 | if(valid_detection==NO_DETECTION && ignored_gt[i]==0) { 531 | stat.fn++; 532 | } 533 | 534 | // only evaluate valid ground truth <=> detection assignments (considering difficulty level) 535 | else if(valid_detection!=NO_DETECTION && (ignored_gt[i]==1 || ignored_det[det_idx]==1)) 536 | assigned_detection[det_idx] = true; 537 | 538 | // found a valid true positive 539 | else if(valid_detection!=NO_DETECTION){ 540 | 541 | // write highest score to threshold vector 542 | stat.tp++; 543 | stat.v.push_back(det[det_idx].thresh); 544 | 545 | // compute angular difference of detection and ground truth if valid detection orientation was provided 546 | if(compute_aos) 547 | delta.push_back(gt[i].box.alpha - det[det_idx].box.alpha); 548 | 549 | // clean up 550 | assigned_detection[det_idx] = true; 551 | } 552 | } 553 | 554 | // if FP are requested, consider stuff area 555 | if(compute_fp){ 556 | 557 | // count fp 558 | for(int32_t i=0; iMIN_OVERLAP[metric][current_class]){ 581 | assigned_detection[j] = true; 582 | nstuff++; 583 | } 584 | } 585 | } 586 | 587 | // FP = no. of all not to ground truth assigned detections - detections assigned to stuff areas 588 | stat.fp -= nstuff; 589 | 590 | // if all orientation values are valid, the AOS is computed 591 | if(compute_aos){ 592 | vector tmp; 593 | 594 | // FP have a similarity of 0, for all TP compute AOS 595 | tmp.assign(stat.fp, 0); 596 | for(int32_t i=0; i0 || stat.fp>0) 605 | stat.similarity = accumulate(tmp.begin(), tmp.end(), 0.0); 606 | 607 | // there was neither a FP nor a TP, so the similarity is ignored in the evaluation 608 | else 609 | stat.similarity = -1; 610 | } 611 | } 612 | return stat; 613 | } 614 | 615 | /*======================================================================= 616 | EVALUATE CLASS-WISE 617 | =======================================================================*/ 618 | 619 | bool eval_class (FILE *fp_det, FILE *fp_ori, CLASSES current_class, 620 | const vector< vector > &groundtruth, 621 | const vector< vector > &detections, bool compute_aos, 622 | double (*boxoverlap)(tDetection, tGroundtruth, int32_t), 623 | vector &precision, vector &aos, 624 | DIFFICULTY difficulty, METRIC metric) { 625 | assert(groundtruth.size() == detections.size()); 626 | 627 | // init 628 | int32_t n_gt=0; // total no. of gt (denominator of recall) 629 | vector v, thresholds; // detection scores, evaluated for recall discretization 630 | vector< vector > ignored_gt, ignored_det; // index of ignored gt detection for current class/difficulty 631 | vector< vector > dontcare; // index of dontcare areas, included in ground truth 632 | 633 | // for all test images do 634 | for (int32_t i=0; i i_gt, i_det; 638 | vector dc; 639 | 640 | // only evaluate objects of current class and ignore occluded, truncated objects 641 | cleanData(current_class, groundtruth[i], detections[i], i_gt, dc, i_det, n_gt, difficulty); 642 | ignored_gt.push_back(i_gt); 643 | ignored_det.push_back(i_det); 644 | dontcare.push_back(dc); 645 | 646 | // compute statistics to get recall values 647 | tPrData pr_tmp = tPrData(); 648 | pr_tmp = computeStatistics(current_class, groundtruth[i], detections[i], dc, i_gt, i_det, false, boxoverlap, metric); 649 | 650 | // add detection scores to vector over all images 651 | for(int32_t j=0; j pr; 660 | pr.assign(thresholds.size(),tPrData()); 661 | for (int32_t i=0; i recall; 681 | precision.assign(N_SAMPLE_PTS, 0); 682 | if(compute_aos) 683 | aos.assign(N_SAMPLE_PTS, 0); 684 | double r=0; 685 | for (int32_t i=0; i vals[],bool is_aos){ 706 | 707 | char command[1024]; 708 | 709 | // save plot data to file 710 | FILE *fp = fopen((dir_name + "/" + file_name + ".txt").c_str(),"w"); 711 | printf("save %s\n", (dir_name + "/" + file_name + ".txt").c_str()); 712 | for (int32_t i=0; i<(int)N_SAMPLE_PTS; i++) 713 | fprintf(fp,"%f %f %f %f\n",(double)i/(N_SAMPLE_PTS-1.0),vals[0][i],vals[1][i],vals[2][i]); 714 | fclose(fp); 715 | 716 | // create png + eps 717 | for (int32_t j=0; j<2; j++) { 718 | 719 | // open file 720 | FILE *fp = fopen((dir_name + "/" + file_name + ".gp").c_str(),"w"); 721 | 722 | // save gnuplot instructions 723 | if (j==0) { 724 | fprintf(fp,"set term png size 450,315 font \"Helvetica\" 11\n"); 725 | fprintf(fp,"set output \"%s.png\"\n",file_name.c_str()); 726 | } else { 727 | fprintf(fp,"set term postscript eps enhanced color font \"Helvetica\" 20\n"); 728 | fprintf(fp,"set output \"%s.eps\"\n",file_name.c_str()); 729 | } 730 | 731 | // set labels and ranges 732 | fprintf(fp,"set size ratio 0.7\n"); 733 | fprintf(fp,"set xrange [0:1]\n"); 734 | fprintf(fp,"set yrange [0:1]\n"); 735 | fprintf(fp,"set xlabel \"Recall\"\n"); 736 | if (!is_aos) fprintf(fp,"set ylabel \"Precision\"\n"); 737 | else fprintf(fp,"set ylabel \"Orientation Similarity\"\n"); 738 | obj_type[0] = toupper(obj_type[0]); 739 | fprintf(fp,"set title \"%s\"\n",obj_type.c_str()); 740 | 741 | // line width 742 | int32_t lw = 5; 743 | if (j==0) lw = 3; 744 | 745 | // plot error curve 746 | fprintf(fp,"plot "); 747 | fprintf(fp,"\"%s.txt\" using 1:2 title 'Easy' with lines ls 1 lw %d,",file_name.c_str(),lw); 748 | fprintf(fp,"\"%s.txt\" using 1:3 title 'Moderate' with lines ls 2 lw %d,",file_name.c_str(),lw); 749 | fprintf(fp,"\"%s.txt\" using 1:4 title 'Hard' with lines ls 3 lw %d",file_name.c_str(),lw); 750 | 751 | // close file 752 | fclose(fp); 753 | 754 | // run gnuplot => create png + eps 755 | sprintf(command,"cd %s; gnuplot %s",dir_name.c_str(),(file_name + ".gp").c_str()); 756 | system(command); 757 | } 758 | 759 | // create pdf and crop 760 | sprintf(command,"cd %s; ps2pdf %s.eps %s_large.pdf",dir_name.c_str(),file_name.c_str(),file_name.c_str()); 761 | system(command); 762 | sprintf(command,"cd %s; pdfcrop %s_large.pdf %s.pdf",dir_name.c_str(),file_name.c_str(),file_name.c_str()); 763 | system(command); 764 | sprintf(command,"cd %s; rm %s_large.pdf",dir_name.c_str(),file_name.c_str()); 765 | system(command); 766 | } 767 | 768 | bool eval(string result_sha,Mail* mail){ 769 | 770 | // set some global parameters 771 | initGlobals(); 772 | 773 | // ground truth and result directories 774 | string gt_dir = "data/object/label_2"; 775 | string result_dir = "results/" + result_sha; 776 | string plot_dir = result_dir + "/plot"; 777 | 778 | // create output directories 779 | system(("mkdir " + plot_dir).c_str()); 780 | 781 | // hold detections and ground truth in memory 782 | vector< vector > groundtruth; 783 | vector< vector > detections; 784 | 785 | // holds wether orientation similarity shall be computed (might be set to false while loading detections) 786 | // and which labels where provided by this submission 787 | bool compute_aos=true; 788 | vector eval_image(NUM_CLASS, false); 789 | vector eval_ground(NUM_CLASS, false); 790 | vector eval_3d(NUM_CLASS, false); 791 | 792 | // for all images read groundtruth and detections 793 | mail->msg("Loading detections..."); 794 | for (int32_t i=0; i gt = loadGroundtruth(gt_dir + "/" + file_name,gt_success); 803 | vector det = loadDetections(result_dir + "/data/" + file_name, 804 | compute_aos, eval_image, eval_ground, eval_3d, det_success); 805 | groundtruth.push_back(gt); 806 | detections.push_back(det); 807 | 808 | // check for errors 809 | if (!gt_success) { 810 | mail->msg("ERROR: Couldn't read: %s of ground truth. Please write me an email!", file_name); 811 | return false; 812 | } 813 | if (!det_success) { 814 | mail->msg("ERROR: Couldn't read: %s", file_name); 815 | return false; 816 | } 817 | } 818 | mail->msg(" done."); 819 | 820 | // holds pointers for result files 821 | FILE *fp_det=0, *fp_ori=0; 822 | 823 | // eval image 2D bounding boxes 824 | for (int c = 0; c < NUM_CLASS; c++) { 825 | CLASSES cls = (CLASSES)c; 826 | if (eval_image[c]) { 827 | fp_det = fopen((result_dir + "/stats_" + CLASS_NAMES[c] + "_detection.txt").c_str(), "w"); 828 | if(compute_aos) 829 | fp_ori = fopen((result_dir + "/stats_" + CLASS_NAMES[c] + "_orientation.txt").c_str(),"w"); 830 | vector precision[3], aos[3]; 831 | if( !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, imageBoxOverlap, precision[0], aos[0], EASY, IMAGE) 832 | || !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, imageBoxOverlap, precision[1], aos[1], MODERATE, IMAGE) 833 | || !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, imageBoxOverlap, precision[2], aos[2], HARD, IMAGE)) { 834 | mail->msg("%s evaluation failed.", CLASS_NAMES[c].c_str()); 835 | return false; 836 | } 837 | fclose(fp_det); 838 | saveAndPlotPlots(plot_dir, CLASS_NAMES[c] + "_detection", CLASS_NAMES[c], precision, 0); 839 | if(compute_aos){ 840 | saveAndPlotPlots(plot_dir, CLASS_NAMES[c] + "_orientation", CLASS_NAMES[c], aos, 1); 841 | fclose(fp_ori); 842 | } 843 | } 844 | } 845 | 846 | // don't evaluate AOS for birdview boxes and 3D boxes 847 | compute_aos = false; 848 | 849 | // eval bird's eye view bounding boxes 850 | for (int c = 0; c < NUM_CLASS; c++) { 851 | CLASSES cls = (CLASSES)c; 852 | if (eval_ground[c]) { 853 | fp_det = fopen((result_dir + "/stats_" + CLASS_NAMES[c] + "_detection_ground.txt").c_str(), "w"); 854 | vector precision[3], aos[3]; 855 | if( !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, groundBoxOverlap, precision[0], aos[0], EASY, GROUND) 856 | || !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, groundBoxOverlap, precision[1], aos[1], MODERATE, GROUND) 857 | || !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, groundBoxOverlap, precision[2], aos[2], HARD, GROUND)) { 858 | mail->msg("%s evaluation failed.", CLASS_NAMES[c].c_str()); 859 | return false; 860 | } 861 | fclose(fp_det); 862 | saveAndPlotPlots(plot_dir, CLASS_NAMES[c] + "_detection_ground", CLASS_NAMES[c], precision, 0); 863 | } 864 | } 865 | 866 | // eval 3D bounding boxes 867 | for (int c = 0; c < NUM_CLASS; c++) { 868 | CLASSES cls = (CLASSES)c; 869 | if (eval_3d[c]) { 870 | fp_det = fopen((result_dir + "/stats_" + CLASS_NAMES[c] + "_detection_3d.txt").c_str(), "w"); 871 | vector precision[3], aos[3]; 872 | if( !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, box3DOverlap, precision[0], aos[0], EASY, BOX3D) 873 | || !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, box3DOverlap, precision[1], aos[1], MODERATE, BOX3D) 874 | || !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, box3DOverlap, precision[2], aos[2], HARD, BOX3D)) { 875 | mail->msg("%s evaluation failed.", CLASS_NAMES[c].c_str()); 876 | return false; 877 | } 878 | fclose(fp_det); 879 | saveAndPlotPlots(plot_dir, CLASS_NAMES[c] + "_detection_3d", CLASS_NAMES[c], precision, 0); 880 | } 881 | } 882 | 883 | // success 884 | return true; 885 | } 886 | 887 | int32_t main (int32_t argc,char *argv[]) { 888 | 889 | // we need 2 or 4 arguments! 890 | if (argc!=2 && argc!=4) { 891 | cout << "Usage: ./eval_detection result_sha [user_sha email]" << endl; 892 | return 1; 893 | } 894 | 895 | // read arguments 896 | string result_sha = argv[1]; 897 | 898 | // init notification mail 899 | Mail *mail; 900 | if (argc==4) mail = new Mail(argv[3]); 901 | else mail = new Mail(); 902 | mail->msg("Thank you for participating in our evaluation!"); 903 | 904 | // run evaluation 905 | if (eval(result_sha,mail)) { 906 | mail->msg("Your evaluation results are available at:"); 907 | mail->msg("http://www.cvlibs.net/datasets/kitti/user_submit_check_login.php?benchmark=object&user=%s&result=%s",argv[2], result_sha.c_str()); 908 | } else { 909 | system(("rm -r results/" + result_sha).c_str()); 910 | mail->msg("An error occured while processing your results."); 911 | mail->msg("Please make sure that the data in your zip archive has the right format!"); 912 | } 913 | 914 | // send mail and exit 915 | delete mail; 916 | 917 | return 0; 918 | } 919 | 920 | 921 | -------------------------------------------------------------------------------- /utils/utils.py: -------------------------------------------------------------------------------- 1 | 2 | # -*- cooing:UTF-8 -*- 3 | 4 | import cv2 5 | cv2.setNumThreads(0) 6 | 7 | import math 8 | 9 | from config import cfg 10 | from utils.box_overlaps import * 11 | 12 | import pdb 13 | 14 | 15 | #Util function to load calib matrices 16 | CAM = 2 17 | def load_calib(calib_dir): 18 | # P2 * R0_rect * Tr_velo_to_cam * y 19 | lines = open(calib_dir).readlines() 20 | lines = [line.split()[1:] for line in lines][:-1] 21 | 22 | P = np.array(lines[CAM]).reshape(3, 4) 23 | P = np.concatenate((P, np.array([[0, 0, 0, 0]])), 0) 24 | 25 | Tr_velo_to_cam = np.array(lines[5]).reshape(3, 4) 26 | Tr_velo_to_cam = np.concatenate([Tr_velo_to_cam, np.array([0, 0, 0, 1]).reshape(1, 4)], 0) 27 | 28 | R_cam_to_rect = np.eye(4) 29 | R_cam_to_rect[:3, :3] = np.array(lines[4][:9]).reshape(3, 3) 30 | 31 | P = P.astype('float32') 32 | Tr_velo_to_cam = Tr_velo_to_cam.astype('float32') 33 | R_cam_to_rect = R_cam_to_rect.astype('float32') 34 | 35 | return P, Tr_velo_to_cam, R_cam_to_rect 36 | 37 | 38 | def lidar_to_bird_view(x, y, factor = 1): 39 | # using the cfg.INPUT_XXX 40 | a = (x - cfg.X_MIN) / cfg.VOXEL_X_SIZE * factor 41 | b = (y - cfg.Y_MIN) / cfg.VOXEL_Y_SIZE * factor 42 | a = np.clip(a, a_max = (cfg.X_MAX - cfg.X_MIN) / cfg.VOXEL_X_SIZE * factor, a_min = 0) 43 | b = np.clip(b, a_max = (cfg.Y_MAX - cfg.Y_MIN) / cfg.VOXEL_Y_SIZE * factor, a_min = 0) 44 | 45 | return a, b 46 | 47 | def batch_lidar_to_bird_view(points, factor = 1): 48 | # Input: 49 | # points (N, 2) 50 | # Outputs: 51 | # points (N, 2) 52 | # using the cfg.INPUT_XXX 53 | a = (points[:, 0] - cfg.X_MIN) / cfg.VOXEL_X_SIZE * factor 54 | b = (points[:, 1] - cfg.Y_MIN) / cfg.VOXEL_Y_SIZE * factor 55 | a = np.clip(a, a_max = (cfg.X_MAX - cfg.X_MIN) / cfg.VOXEL_X_SIZE * factor, a_min = 0) 56 | b = np.clip(b, a_max = (cfg.Y_MAX - cfg.Y_MIN) / cfg.VOXEL_Y_SIZE * factor, a_min = 0) 57 | 58 | return np.concatenate([a[:, np.newaxis], b[:, np.newaxis]], axis = -1) 59 | 60 | 61 | def angle_in_limit(angle): 62 | # To limit the angle in -pi/2 - pi/2 63 | limit_degree = 5 64 | while angle >= np.pi / 2: 65 | angle -= np.pi 66 | while angle < -np.pi / 2: 67 | angle += np.pi 68 | if abs(angle + np.pi / 2) < limit_degree / 180 * np.pi: 69 | angle = np.pi / 2 70 | 71 | return angle 72 | 73 | 74 | def camera_to_lidar(x, y, z, T_VELO_2_CAM = None, R_RECT_0 = None): 75 | if type(T_VELO_2_CAM) == type(None): 76 | T_VELO_2_CAM = np.array(cfg.MATRIX_T_VELO_2_CAM) 77 | 78 | if type(R_RECT_0) == type(None): 79 | R_RECT_0 = np.array(cfg.MATRIX_R_RECT_0) 80 | 81 | p = np.array([x, y, z, 1]) 82 | p = np.matmul(np.linalg.inv(R_RECT_0), p) 83 | p = np.matmul(np.linalg.inv(T_VELO_2_CAM), p) 84 | p = p[0 : 3] 85 | 86 | return tuple(p) 87 | 88 | 89 | def lidar_to_camera(x, y, z, T_VELO_2_CAM = None, R_RECT_0 = None): 90 | if type(T_VELO_2_CAM) == type(None): 91 | T_VELO_2_CAM = np.array(cfg.MATRIX_T_VELO_2_CAM) 92 | 93 | if type(R_RECT_0) == type(None): 94 | R_RECT_0 = np.array(cfg.MATRIX_R_RECT_0) 95 | 96 | p = np.array([x, y, z, 1]) 97 | p = np.matmul(T_VELO_2_CAM, p) 98 | p = np.matmul(R_RECT_0, p) 99 | p = p[0:3] 100 | 101 | return tuple(p) 102 | 103 | 104 | def camera_to_lidar_point(points, T_VELO_2_CAM = None, R_RECT_0 = None): 105 | # (N, 3) -> (N, 3) 106 | N = points.shape[0] 107 | points = np.hstack([points, np.ones((N, 1))]).T # (N,4) -> (4,N) 108 | 109 | if type(T_VELO_2_CAM) == type(None): 110 | T_VELO_2_CAM = np.array(cfg.MATRIX_T_VELO_2_CAM) 111 | 112 | if type(R_RECT_0) == type(None): 113 | R_RECT_0 = np.array(cfg.MATRIX_R_RECT_0) 114 | 115 | points = np.matmul(np.linalg.inv(R_RECT_0), points) 116 | points = np.matmul(np.linalg.inv(T_VELO_2_CAM), points).T # (4, N) -> (N, 4) 117 | points = points[:, 0:3] 118 | 119 | return points.reshape(-1, 3) 120 | 121 | 122 | def lidar_to_camera_point(points, T_VELO_2_CAM = None, R_RECT_0 = None): 123 | # (N, 3) -> (N, 3) 124 | N = points.shape[0] 125 | points = np.hstack([points, np.ones((N, 1))]).T 126 | 127 | 128 | if type(T_VELO_2_CAM) == type(None): 129 | T_VELO_2_CAM = np.array(cfg.MATRIX_T_VELO_2_CAM) 130 | 131 | if type(R_RECT_0) == type(None): 132 | R_RECT_0 = np.array(cfg.MATRIX_R_RECT_0) 133 | 134 | points = np.matmul(T_VELO_2_CAM, points) 135 | points = np.matmul(R_RECT_0, points).T 136 | points = points[:, 0:3] 137 | 138 | return points.reshape(-1, 3) 139 | 140 | 141 | def camera_to_lidar_box(boxes, T_VELO_2_CAM = None, R_RECT_0 = None): 142 | # (N, 7) -> (N, 7) x,y,z,h,w,l,r 143 | ret = [] 144 | for box in boxes: 145 | x, y, z, h, w, l, ry = box 146 | (x, y, z), h, w, l, rz = camera_to_lidar(x, y, z, T_VELO_2_CAM, R_RECT_0), h, w, l, -ry - np.pi / 2 147 | rz = angle_in_limit(rz) 148 | ret.append([x, y, z, h, w, l, rz]) 149 | 150 | return np.array(ret).reshape(-1, 7) 151 | 152 | 153 | def lidar_to_camera_box(boxes, T_VELO_2_CAM = None, R_RECT_0 = None): 154 | # (N, 7) -> (N, 7) x,y,z,h,w,l,r 155 | ret = [] 156 | for box in boxes: 157 | x, y, z, h, w, l, rz = box 158 | (x, y, z), h, w, l, ry = lidar_to_camera( 159 | x, y, z, T_VELO_2_CAM, R_RECT_0), h, w, l, -rz - np.pi / 2 160 | ry = angle_in_limit(ry) 161 | ret.append([x, y, z, h, w, l, ry]) 162 | 163 | return np.array(ret).reshape(-1, 7) 164 | 165 | 166 | def center_to_corner_box2d(boxes_center, coordinate = 'lidar', T_VELO_2_CAM = None, R_RECT_0 = None): 167 | # (N, 5) -> (N, 4, 2) 168 | N = boxes_center.shape[0] 169 | boxes3d_center = np.zeros((N, 7)) 170 | boxes3d_center[:, [0, 1, 4, 5, 6]] = boxes_center 171 | boxes3d_corner = center_to_corner_box3d( 172 | boxes3d_center, coordinate = coordinate, T_VELO_2_CAM = T_VELO_2_CAM, R_RECT_0 = R_RECT_0) 173 | 174 | return boxes3d_corner[:, 0:4, 0:2] 175 | 176 | 177 | def center_to_corner_box3d(boxes_center, coordinate = 'lidar', T_VELO_2_CAM = None, R_RECT_0 = None): 178 | # (N, 7) -> (N, 8, 3) 179 | N = boxes_center.shape[0] 180 | ret = np.zeros((N, 8, 3), dtype = np.float32) 181 | 182 | if coordinate == 'camera': 183 | boxes_center = camera_to_lidar_box(boxes_center, T_VELO_2_CAM, R_RECT_0) 184 | 185 | for i in range(N): 186 | box = boxes_center[i] 187 | translation = box[0:3] 188 | size = box[3:6] 189 | rotation = [0, 0, box[-1]] 190 | 191 | h, w, l = size[0], size[1], size[2] 192 | trackletBox = np.array([ # in velodyne coordinates around zero point and without orientation yet 193 | [-l / 2, -l / 2, l / 2, l / 2, -l / 2, -l / 2, l / 2, l / 2], \ 194 | [w / 2, -w / 2, -w / 2, w / 2, w / 2, -w / 2, -w / 2, w / 2], \ 195 | [0, 0, 0, 0, h, h, h, h]]) 196 | 197 | # re-create 3D bounding box in velodyne coordinate system 198 | yaw = rotation[2] 199 | rotMat = np.array([ 200 | [np.cos(yaw), -np.sin(yaw), 0.0], 201 | [np.sin(yaw), np.cos(yaw), 0.0], 202 | [0.0, 0.0, 1.0]]) 203 | cornerPosInVelo = np.dot(rotMat, trackletBox) + \ 204 | np.tile(translation, (8, 1)).T 205 | box3d = cornerPosInVelo.transpose() 206 | ret[i] = box3d 207 | 208 | if coordinate == 'camera': 209 | for idx in range(len(ret)): 210 | ret[idx] = lidar_to_camera_point(ret[idx], T_VELO_2_CAM, R_RECT_0) 211 | 212 | return ret 213 | 214 | 215 | def corner_to_center_box2d(boxes_corner, coordinate='lidar', T_VELO_2_CAM = None, R_RECT_0 = None): 216 | # (N, 4, 2) -> (N, 5); x,y,w,l,r 217 | N = boxes_corner.shape[0] 218 | boxes3d_corner = np.zeros((N, 8, 3)) 219 | boxes3d_corner[:, 0:4, 0:2] = boxes_corner 220 | boxes3d_corner[:, 4:8, 0:2] = boxes_corner 221 | boxes3d_center = corner_to_center_box3d( 222 | boxes3d_corner, coordinate = coordinate, T_VELO_2_CAM = T_VELO_2_CAM, R_RECT_0 = R_RECT_0) 223 | 224 | return boxes3d_center[:, [0, 1, 4, 5, 6]] 225 | 226 | 227 | def corner_to_standup_box2d(boxes_corner): 228 | # (N, 4, 2) -> (N, 4); x1, y1, x2, y2 229 | N = boxes_corner.shape[0] 230 | standup_boxes2d = np.zeros((N, 4)) 231 | standup_boxes2d[:, 0] = np.min(boxes_corner[:, :, 0], axis = 1) 232 | standup_boxes2d[:, 1] = np.min(boxes_corner[:, :, 1], axis = 1) 233 | standup_boxes2d[:, 2] = np.max(boxes_corner[:, :, 0], axis = 1) 234 | standup_boxes2d[:, 3] = np.max(boxes_corner[:, :, 1], axis = 1) 235 | 236 | return standup_boxes2d 237 | 238 | 239 | # TODO: 0/90 may be not correct 240 | def anchor_to_standup_box2d(anchors): 241 | # (N, 4) -> (N, 4); x,y,w,l -> x1,y1,x2,y2 242 | anchor_standup = np.zeros_like(anchors) 243 | # r == 0 244 | anchor_standup[::2, 0] = anchors[::2, 0] - anchors[::2, 3] / 2 245 | anchor_standup[::2, 1] = anchors[::2, 1] - anchors[::2, 2] / 2 246 | anchor_standup[::2, 2] = anchors[::2, 0] + anchors[::2, 3] / 2 247 | anchor_standup[::2, 3] = anchors[::2, 1] + anchors[::2, 2] / 2 248 | # r == pi/2 249 | anchor_standup[1::2, 0] = anchors[1::2, 0] - anchors[1::2, 2] / 2 250 | anchor_standup[1::2, 1] = anchors[1::2, 1] - anchors[1::2, 3] / 2 251 | anchor_standup[1::2, 2] = anchors[1::2, 0] + anchors[1::2, 2] / 2 252 | anchor_standup[1::2, 3] = anchors[1::2, 1] + anchors[1::2, 3] / 2 253 | 254 | return anchor_standup 255 | 256 | 257 | def corner_to_center_box3d(boxes_corner, coordinate='camera', T_VELO_2_CAM = None, R_RECT_0 = None): 258 | # (N, 8, 3) -> (N, 7); x,y,z,h,w,l,ry/z 259 | if coordinate == 'lidar': 260 | for idx in range(len(boxes_corner)): 261 | boxes_corner[idx] = lidar_to_camera_point(boxes_corner[idx], T_VELO_2_CAM, R_RECT_0) 262 | ret = [] 263 | for roi in boxes_corner: 264 | if cfg.CORNER2CENTER_AVG: # average version 265 | roi = np.array(roi) 266 | h = abs(np.sum(roi[:4, 1] - roi[4:, 1]) / 4) 267 | w = np.sum( 268 | np.sqrt(np.sum((roi[0, [0, 2]] - roi[3, [0, 2]])**2)) + 269 | np.sqrt(np.sum((roi[1, [0, 2]] - roi[2, [0, 2]])**2)) + 270 | np.sqrt(np.sum((roi[4, [0, 2]] - roi[7, [0, 2]])**2)) + 271 | np.sqrt(np.sum((roi[5, [0, 2]] - roi[6, [0, 2]])**2)) 272 | ) / 4 273 | l = np.sum( 274 | np.sqrt(np.sum((roi[0, [0, 2]] - roi[1, [0, 2]])**2)) + 275 | np.sqrt(np.sum((roi[2, [0, 2]] - roi[3, [0, 2]])**2)) + 276 | np.sqrt(np.sum((roi[4, [0, 2]] - roi[5, [0, 2]])**2)) + 277 | np.sqrt(np.sum((roi[6, [0, 2]] - roi[7, [0, 2]])**2)) 278 | ) / 4 279 | x = np.sum(roi[:, 0], axis = 0)/ 8 280 | y = np.sum(roi[0:4, 1], axis = 0)/ 4 281 | z = np.sum(roi[:, 2], axis = 0)/ 8 282 | ry = np.sum( 283 | math.atan2(roi[2, 0] - roi[1, 0], roi[2, 2] - roi[1, 2]) + 284 | math.atan2(roi[6, 0] - roi[5, 0], roi[6, 2] - roi[5, 2]) + 285 | math.atan2(roi[3, 0] - roi[0, 0], roi[3, 2] - roi[0, 2]) + 286 | math.atan2(roi[7, 0] - roi[4, 0], roi[7, 2] - roi[4, 2]) + 287 | math.atan2(roi[0, 2] - roi[1, 2], roi[1, 0] - roi[0, 0]) + 288 | math.atan2(roi[4, 2] - roi[5, 2], roi[5, 0] - roi[4, 0]) + 289 | math.atan2(roi[3, 2] - roi[2, 2], roi[2, 0] - roi[3, 0]) + 290 | math.atan2(roi[7, 2] - roi[6, 2], roi[6, 0] - roi[7, 0]) 291 | ) / 8 292 | if w > l: 293 | w, l = l, w 294 | ry = angle_in_limit(ry + np.pi / 2) 295 | else: # max version 296 | h = max(abs(roi[:4, 1] - roi[4:, 1])) 297 | w = np.max( 298 | np.sqrt(np.sum((roi[0, [0, 2]] - roi[3, [0, 2]])**2)) + 299 | np.sqrt(np.sum((roi[1, [0, 2]] - roi[2, [0, 2]])**2)) + 300 | np.sqrt(np.sum((roi[4, [0, 2]] - roi[7, [0, 2]])**2)) + 301 | np.sqrt(np.sum((roi[5, [0, 2]] - roi[6, [0, 2]])**2)) 302 | ) 303 | l = np.max( 304 | np.sqrt(np.sum((roi[0, [0, 2]] - roi[1, [0, 2]])**2)) + 305 | np.sqrt(np.sum((roi[2, [0, 2]] - roi[3, [0, 2]])**2)) + 306 | np.sqrt(np.sum((roi[4, [0, 2]] - roi[5, [0, 2]])**2)) + 307 | np.sqrt(np.sum((roi[6, [0, 2]] - roi[7, [0, 2]])**2)) 308 | ) 309 | x = np.sum(roi[:, 0], axis = 0)/ 8 310 | y = np.sum(roi[0:4, 1], axis = 0)/ 4 311 | z = np.sum(roi[:, 2], axis = 0)/ 8 312 | ry = np.sum( 313 | math.atan2(roi[2, 0] - roi[1, 0], roi[2, 2] - roi[1, 2]) + 314 | math.atan2(roi[6, 0] - roi[5, 0], roi[6, 2] - roi[5, 2]) + 315 | math.atan2(roi[3, 0] - roi[0, 0], roi[3, 2] - roi[0, 2]) + 316 | math.atan2(roi[7, 0] - roi[4, 0], roi[7, 2] - roi[4, 2]) + 317 | math.atan2(roi[0, 2] - roi[1, 2], roi[1, 0] - roi[0, 0]) + 318 | math.atan2(roi[4, 2] - roi[5, 2], roi[5, 0] - roi[4, 0]) + 319 | math.atan2(roi[3, 2] - roi[2, 2], roi[2, 0] - roi[3, 0]) + 320 | math.atan2(roi[7, 2] - roi[6, 2], roi[6, 0] - roi[7, 0]) 321 | ) / 8 322 | if w > l: 323 | w, l = l, w 324 | ry = angle_in_limit(ry + np.pi / 2) 325 | ret.append([x, y, z, h, w, l, ry]) 326 | if coordinate == 'lidar': 327 | ret = camera_to_lidar_box(np.array(ret), T_VELO_2_CAM, R_RECT_0) 328 | 329 | return np.array(ret) 330 | 331 | 332 | # This just for visulize and testing 333 | def lidar_box3d_to_camera_box(boxes3d, cal_projection = False, P2 = None, T_VELO_2_CAM = None, R_RECT_0 = None): 334 | # (N, 7) -> (N, 4)/(N, 8, 2); x,y,z,h,w,l,rz -> x1,y1,x2,y2/8*(x, y) 335 | num = len(boxes3d) 336 | boxes2d = np.zeros((num, 4), dtype = np.int32) 337 | projections = np.zeros((num, 8, 2), dtype = np.float32) 338 | 339 | lidar_boxes3d_corner = center_to_corner_box3d(boxes3d, coordinate = 'lidar', T_VELO_2_CAM = T_VELO_2_CAM, R_RECT_0 = R_RECT_0) 340 | if type(P2) == type(None): 341 | P2 = np.array(cfg.MATRIX_P2) 342 | 343 | for n in range(num): 344 | box3d = lidar_boxes3d_corner[n] 345 | box3d = lidar_to_camera_point(box3d, T_VELO_2_CAM, R_RECT_0) 346 | points = np.hstack((box3d, np.ones((8, 1)))).T # (8, 4) -> (4, 8) 347 | points = np.matmul(P2, points).T 348 | 349 | points = np.nan_to_num(points) 350 | 351 | points[:, 0] /= points[:, 2] 352 | points[:, 1] /= points[:, 2] 353 | 354 | projections[n] = points[:, 0:2] 355 | minx = 0 if np.isnan(np.min(points[:, 0])) else int(np.min(points[:, 0])) 356 | maxx = 0 if np.isnan(np.max(points[:, 0])) else int(np.max(points[:, 0])) 357 | miny = 0 if np.isnan(np.min(points[:, 1])) else int(np.min(points[:, 1])) 358 | maxy = 0 if np.isnan(np.max(points[:, 1])) else int(np.max(points[:, 1])) 359 | 360 | boxes2d[n, :] = minx, miny, maxx, maxy 361 | 362 | return projections if cal_projection else boxes2d 363 | 364 | 365 | def lidar_to_bird_view_img(lidar, factor = 1): 366 | # Input: 367 | # lidar: (N', 4) 368 | # Output: 369 | # birdview: (w, l, 3) 370 | birdview = np.zeros( 371 | (cfg.INPUT_HEIGHT * factor, cfg.INPUT_WIDTH * factor, 1)) 372 | for point in lidar: 373 | x, y = point[0:2] 374 | if cfg.X_MIN < x < cfg.X_MAX and cfg.Y_MIN < y < cfg.Y_MAX: 375 | x, y = int((x - cfg.X_MIN) / cfg.VOXEL_X_SIZE * 376 | factor), int((y - cfg.Y_MIN) / cfg.VOXEL_Y_SIZE * factor) 377 | birdview[y, x] += 1 378 | birdview = birdview - np.min(birdview) 379 | divisor = np.max(birdview) - np.min(birdview) 380 | # TODO: adjust this factor 381 | birdview = np.clip((birdview / divisor * 255) * 382 | 5 * factor, a_min = 0, a_max = 255) 383 | birdview = np.tile(birdview, 3).astype(np.uint8) 384 | 385 | return birdview 386 | 387 | 388 | def draw_lidar_box3d_on_image(img, boxes3d, scores, gt_boxes3d = np.array([]), color = (0, 255, 255), 389 | gt_color = (255, 0, 255), thickness = 1, P2 = None, T_VELO_2_CAM = None, R_RECT_0 = None): 390 | # Input: 391 | # img: (h, w, 3) 392 | # boxes3d (N, 7) [x, y, z, h, w, l, r] 393 | # scores 394 | # gt_boxes3d (N, 7) [x, y, z, h, w, l, r] 395 | img = img.copy() 396 | projections = lidar_box3d_to_camera_box(boxes3d, cal_projection = True, P2 = P2, T_VELO_2_CAM = T_VELO_2_CAM, R_RECT_0 = R_RECT_0) 397 | gt_projections = lidar_box3d_to_camera_box(gt_boxes3d, cal_projection = True, P2 = P2, T_VELO_2_CAM = T_VELO_2_CAM, R_RECT_0 = R_RECT_0) 398 | 399 | # Draw projections 400 | for qs in projections: 401 | for k in range(0, 4): 402 | i, j = k, (k + 1) % 4 403 | cv2.line(img, (qs[i, 0], qs[i, 1]), (qs[j, 0], 404 | qs[j, 1]), color, thickness, cv2.LINE_AA) 405 | 406 | i, j = k + 4, (k + 1) % 4 + 4 407 | cv2.line(img, (qs[i, 0], qs[i, 1]), (qs[j, 0], 408 | qs[j, 1]), color, thickness, cv2.LINE_AA) 409 | 410 | i, j = k, k + 4 411 | cv2.line(img, (qs[i, 0], qs[i, 1]), (qs[j, 0], 412 | qs[j, 1]), color, thickness, cv2.LINE_AA) 413 | # Draw gt projections 414 | for qs in gt_projections: 415 | for k in range(0, 4): 416 | i, j = k, (k + 1) % 4 417 | cv2.line(img, (qs[i, 0], qs[i, 1]), (qs[j, 0], 418 | qs[j, 1]), gt_color, thickness, cv2.LINE_AA) 419 | 420 | i, j = k + 4, (k + 1) % 4 + 4 421 | cv2.line(img, (qs[i, 0], qs[i, 1]), (qs[j, 0], 422 | qs[j, 1]), gt_color, thickness, cv2.LINE_AA) 423 | 424 | i, j = k, k + 4 425 | cv2.line(img, (qs[i, 0], qs[i, 1]), (qs[j, 0], 426 | qs[j, 1]), gt_color, thickness, cv2.LINE_AA) 427 | 428 | return cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB) 429 | 430 | 431 | 432 | def draw_lidar_box3d_on_birdview(birdview, boxes3d, scores, gt_boxes3d = np.array([]), 433 | color = (0, 255, 255), gt_color = (255, 0, 255), thickness = 1, factor = 1, P2 = None, T_VELO_2_CAM = None, R_RECT_0 = None): 434 | # Input: 435 | # birdview: (h, w, 3) 436 | # boxes3d (N, 7) [x, y, z, h, w, l, r] 437 | # scores 438 | # gt_boxes3d (N, 7) [x, y, z, h, w, l, r] 439 | img = birdview.copy() 440 | corner_boxes3d = center_to_corner_box3d(boxes3d, coordinate = 'lidar', T_VELO_2_CAM = T_VELO_2_CAM, R_RECT_0 = R_RECT_0) 441 | corner_gt_boxes3d = center_to_corner_box3d(gt_boxes3d, coordinate = 'lidar', T_VELO_2_CAM = T_VELO_2_CAM, R_RECT_0 = R_RECT_0) 442 | # draw gt 443 | for box in corner_gt_boxes3d: 444 | x0, y0 = lidar_to_bird_view(*box[0, 0:2], factor = factor) 445 | x1, y1 = lidar_to_bird_view(*box[1, 0:2], factor = factor) 446 | x2, y2 = lidar_to_bird_view(*box[2, 0:2], factor = factor) 447 | x3, y3 = lidar_to_bird_view(*box[3, 0:2], factor = factor) 448 | 449 | cv2.line(img, (int(x0), int(y0)), (int(x1), int(y1)), 450 | gt_color, thickness, cv2.LINE_AA) 451 | cv2.line(img, (int(x1), int(y1)), (int(x2), int(y2)), 452 | gt_color, thickness, cv2.LINE_AA) 453 | cv2.line(img, (int(x2), int(y2)), (int(x3), int(y3)), 454 | gt_color, thickness, cv2.LINE_AA) 455 | cv2.line(img, (int(x3), int(y3)), (int(x0), int(y0)), 456 | gt_color, thickness, cv2.LINE_AA) 457 | 458 | # draw detections 459 | for box in corner_boxes3d: 460 | x0, y0 = lidar_to_bird_view(*box[0, 0:2], factor = factor) 461 | x1, y1 = lidar_to_bird_view(*box[1, 0:2], factor = factor) 462 | x2, y2 = lidar_to_bird_view(*box[2, 0:2], factor = factor) 463 | x3, y3 = lidar_to_bird_view(*box[3, 0:2], factor = factor) 464 | 465 | cv2.line(img, (int(x0), int(y0)), (int(x1), int(y1)), 466 | color, thickness, cv2.LINE_AA) 467 | cv2.line(img, (int(x1), int(y1)), (int(x2), int(y2)), 468 | color, thickness, cv2.LINE_AA) 469 | cv2.line(img, (int(x2), int(y2)), (int(x3), int(y3)), 470 | color, thickness, cv2.LINE_AA) 471 | cv2.line(img, (int(x3), int(y3)), (int(x0), int(y0)), 472 | color, thickness, cv2.LINE_AA) 473 | 474 | return cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB) 475 | 476 | 477 | def label_to_gt_box3d(labels, cls = 'Car', coordinate = 'camera', T_VELO_2_CAM = None, R_RECT_0 = None): 478 | # Input: 479 | # label: (N, N') 480 | # cls: 'Car' or 'Pedestrain' or 'Cyclist' 481 | # coordinate: 'camera' or 'lidar' 482 | # Output: 483 | # (N, N', 7) 484 | boxes3d = [] 485 | if cls == 'Car': 486 | acc_cls = ['Car', 'Van'] 487 | elif cls == 'Pedestrian': 488 | acc_cls = ['Pedestrian'] 489 | elif cls == 'Cyclist': 490 | acc_cls = ['Cyclist'] 491 | else: # all 492 | acc_cls = [] 493 | 494 | for label in labels: 495 | boxes3d_a_label = [] 496 | for line in label: 497 | ret = line.split() 498 | if ret[0] in acc_cls or acc_cls == []: 499 | h, w, l, x, y, z, r = [float(i) for i in ret[-7:]] 500 | box3d = np.array([x, y, z, h, w, l, r]) 501 | boxes3d_a_label.append(box3d) 502 | if coordinate == 'lidar': 503 | boxes3d_a_label = camera_to_lidar_box(np.array(boxes3d_a_label), T_VELO_2_CAM, R_RECT_0) 504 | 505 | boxes3d.append(np.array(boxes3d_a_label).reshape(-1, 7)) 506 | 507 | return boxes3d 508 | 509 | 510 | def box3d_to_label(batch_box3d, batch_cls, batch_score = [], coordinate='camera', P2 = None, T_VELO_2_CAM = None, R_RECT_0 = None): 511 | # Input: 512 | # (N, N', 7) x y z h w l r 513 | # (N, N') 514 | # cls: (N, N') 'Car' or 'Pedestrain' or 'Cyclist' 515 | # coordinate(input): 'camera' or 'lidar' 516 | # Output: 517 | # label: (N, N') N batches and N lines 518 | batch_label = [] 519 | if batch_score: 520 | template = '{} ' + ' '.join(['{:.4f}' for i in range(15)]) + '\n' 521 | for boxes, scores, clses in zip(batch_box3d, batch_score, batch_cls): 522 | label = [] 523 | for box, score, cls in zip(boxes, scores, clses): 524 | if coordinate == 'camera': 525 | box3d = box 526 | box2d = lidar_box3d_to_camera_box( 527 | camera_to_lidar_box(box[np.newaxis, :].astype(np.float32), T_VELO_2_CAM, R_RECT_0), cal_projection = False, 528 | P2 = P2, T_VELO_2_CAM = T_VELO_2_CAM, R_RECT_0 = R_RECT_0)[0] 529 | else: 530 | box3d = lidar_to_camera_box( 531 | box[np.newaxis, :].astype(np.float32), T_VELO_2_CAM, R_RECT_0)[0] 532 | box2d = lidar_box3d_to_camera_box( 533 | box[np.newaxis, :].astype(np.float32), cal_projection = False, P2 = P2, T_VELO_2_CAM = T_VELO_2_CAM, R_RECT_0 = R_RECT_0)[0] 534 | x, y, z, h, w, l, r = box3d 535 | box3d = [h, w, l, x, y, z, r] 536 | label.append(template.format( 537 | cls, 0, 0, 0, *box2d, *box3d, float(score))) 538 | batch_label.append(label) 539 | else: 540 | template = '{} ' + ' '.join(['{:.4f}' for i in range(14)]) + '\n' 541 | for boxes, clses in zip(batch_box3d, batch_cls): 542 | label = [] 543 | for box, cls in zip(boxes, clses): 544 | if coordinate == 'camera': 545 | box3d = box 546 | box2d = lidar_box3d_to_camera_box( 547 | camera_to_lidar_box(box[np.newaxis, :].astype(np.float32), T_VELO_2_CAM, R_RECT_0), 548 | cal_projection = False, P2 = P2, T_VELO_2_CAM = T_VELO_2_CAM, R_RECT_0 = R_RECT_0)[0] 549 | else: 550 | box3d = lidar_to_camera_box( 551 | box[np.newaxis, :].astype(np.float32), T_VELO_2_CAM, R_RECT_0)[0] 552 | box2d = lidar_box3d_to_camera_box( 553 | box[np.newaxis, :].astype(np.float32), cal_projection = False, P2 = P2, T_VELO_2_CAM = T_VELO_2_CAM, R_RECT_0 = R_RECT_0)[0] 554 | x, y, z, h, w, l, r = box3d 555 | box3d = [h, w, l, x, y, z, r] 556 | label.append(template.format(cls, 0, 0, 0, *box2d, *box3d)) 557 | batch_label.append(label) 558 | 559 | return np.array(batch_label) 560 | 561 | 562 | def cal_anchors(): 563 | # Output: 564 | # Anchors: (w, l, 2, 7) x y z h w l r 565 | x = np.linspace(cfg.X_MIN, cfg.X_MAX, cfg.FEATURE_WIDTH) 566 | y = np.linspace(cfg.Y_MIN, cfg.Y_MAX, cfg.FEATURE_HEIGHT) 567 | cx, cy = np.meshgrid(x, y) 568 | # All are (w, l, 2) 569 | cx = np.tile(cx[..., np.newaxis], 2) 570 | cy = np.tile(cy[..., np.newaxis], 2) 571 | cz = np.ones_like(cx) * cfg.ANCHOR_Z 572 | w = np.ones_like(cx) * cfg.ANCHOR_W 573 | l = np.ones_like(cx) * cfg.ANCHOR_L 574 | h = np.ones_like(cx) * cfg.ANCHOR_H 575 | r = np.ones_like(cx) 576 | r[..., 0] = 0 # 0 577 | r[..., 1] = 90 / 180 * np.pi # 90 578 | 579 | # 7 * (w, l, 2) -> (w, l, 2, 7) 580 | anchors = np.stack([cx, cy, cz, h, w, l, r], axis = -1) 581 | 582 | return anchors 583 | 584 | 585 | def cal_rpn_target(labels, feature_map_shape, anchors, cls = 'Car', coordinate = 'lidar'): 586 | # Input: 587 | # labels: (N, N') 588 | # feature_map_shape: (w, l) 589 | # anchors: (w, l, 2, 7) 590 | # Output: 591 | # pos_equal_one (N, w, l, 2) 592 | # neg_equal_one (N, w, l, 2) 593 | # targets (N, w, l, 14) 594 | # Attention: cal IoU on birdview 595 | 596 | batch_size = labels.shape[0] 597 | batch_gt_boxes3d = label_to_gt_box3d(labels, cls = cls, coordinate = coordinate) 598 | # Defined in eq(1) in 2.2 599 | anchors_reshaped = anchors.reshape(-1, 7) 600 | anchors_d = np.sqrt(anchors_reshaped[:, 4] ** 2 + anchors_reshaped[:, 5] ** 2) 601 | pos_equal_one = np.zeros((batch_size, *feature_map_shape, 2)) 602 | neg_equal_one = np.zeros((batch_size, *feature_map_shape, 2)) 603 | targets = np.zeros((batch_size, *feature_map_shape, 14)) 604 | 605 | for batch_id in range(batch_size): 606 | # BOTTLENECK; from (x,y,w,l) to (x1,y1,x2,y2) 607 | anchors_standup_2d = anchor_to_standup_box2d(anchors_reshaped[:, [0, 1, 4, 5]]) 608 | # BOTTLENECK 609 | gt_standup_2d = corner_to_standup_box2d(center_to_corner_box2d( 610 | batch_gt_boxes3d[batch_id][:, [0, 1, 4, 5, 6]], coordinate = coordinate)) 611 | 612 | iou = bbox_overlaps( 613 | np.ascontiguousarray(anchors_standup_2d).astype(np.float32), 614 | np.ascontiguousarray(gt_standup_2d).astype(np.float32), 615 | ) 616 | # iou = cal_box3d_iou(anchors_reshaped, batch_gt_boxes3d[batch_id]) 617 | 618 | # Find anchor with highest iou (iou should also > 0) 619 | id_highest = np.argmax(iou.T, axis = 1) 620 | id_highest_gt = np.arange(iou.T.shape[0]) 621 | mask = iou.T[id_highest_gt, id_highest] > 0 622 | id_highest, id_highest_gt = id_highest[mask], id_highest_gt[mask] 623 | 624 | # Find anchor iou > cfg.XXX_POS_IOU 625 | id_pos, id_pos_gt = np.where(iou > cfg.RPN_POS_IOU) 626 | 627 | # Find anchor iou < cfg.XXX_NEG_IOU 628 | id_neg = np.where(np.sum(iou < cfg.RPN_NEG_IOU, axis = 1) == iou.shape[1])[0] 629 | 630 | id_pos = np.concatenate([id_pos, id_highest]) 631 | id_pos_gt = np.concatenate([id_pos_gt, id_highest_gt]) 632 | 633 | # TODO: uniquify the array in a more scientific way 634 | id_pos, index = np.unique(id_pos, return_index = True) 635 | id_pos_gt = id_pos_gt[index] 636 | id_neg.sort() 637 | 638 | # Cal the target and set the equal one 639 | index_x, index_y, index_z = np.unravel_index(id_pos, (*feature_map_shape, 2)) 640 | pos_equal_one[batch_id, index_x, index_y, index_z] = 1 641 | 642 | # ATTENTION: index_z should be np.array 643 | targets[batch_id, index_x, index_y, np.array(index_z) * 7] = ( 644 | batch_gt_boxes3d[batch_id][id_pos_gt, 0] - anchors_reshaped[id_pos, 0]) / anchors_d[id_pos] 645 | targets[batch_id, index_x, index_y, np.array(index_z) * 7 + 1] = ( 646 | batch_gt_boxes3d[batch_id][id_pos_gt, 1] - anchors_reshaped[id_pos, 1]) / anchors_d[id_pos] 647 | targets[batch_id, index_x, index_y, np.array(index_z) * 7 + 2] = ( 648 | batch_gt_boxes3d[batch_id][id_pos_gt, 2] - anchors_reshaped[id_pos, 2]) / cfg.ANCHOR_H 649 | targets[batch_id, index_x, index_y, np.array(index_z) * 7 + 3] = np.log( 650 | batch_gt_boxes3d[batch_id][id_pos_gt, 3] / anchors_reshaped[id_pos, 3]) 651 | targets[batch_id, index_x, index_y, np.array(index_z) * 7 + 4] = np.log( 652 | batch_gt_boxes3d[batch_id][id_pos_gt, 4] / anchors_reshaped[id_pos, 4]) 653 | targets[batch_id, index_x, index_y, np.array(index_z) * 7 + 5] = np.log( 654 | batch_gt_boxes3d[batch_id][id_pos_gt, 5] / anchors_reshaped[id_pos, 5]) 655 | targets[batch_id, index_x, index_y, np.array(index_z) * 7 + 6] = ( 656 | batch_gt_boxes3d[batch_id][id_pos_gt, 6] - anchors_reshaped[id_pos, 6]) 657 | 658 | index_x, index_y, index_z = np.unravel_index(id_neg, (*feature_map_shape, 2)) 659 | neg_equal_one[batch_id, index_x, index_y, index_z] = 1 660 | # To avoid a box be pos/neg in the same time 661 | index_x, index_y, index_z = np.unravel_index(id_highest, (*feature_map_shape, 2)) 662 | neg_equal_one[batch_id, index_x, index_y, index_z] = 0 663 | 664 | return pos_equal_one, neg_equal_one, targets 665 | 666 | 667 | # BOTTLENECK 668 | def delta_to_boxes3d(deltas, anchors, coordinate = 'lidar'): 669 | # Input: 670 | # deltas: (N, w, l, 14) 671 | # feature_map_shape: (w, l) 672 | # anchors: (w, l, 2, 7) 673 | 674 | # Ouput: 675 | # boxes3d: (N, w*l*2, 7) 676 | anchors_reshaped = anchors.reshape(-1, 7) 677 | deltas = deltas.reshape(deltas.shape[0], -1, 7) 678 | anchors_d = np.sqrt(anchors_reshaped[:, 4] ** 2 + anchors_reshaped[:, 5] ** 2) 679 | boxes3d = np.zeros_like(deltas) 680 | boxes3d[..., [0, 1]] = deltas[..., [0, 1]] * \ 681 | anchors_d[:, np.newaxis] + anchors_reshaped[..., [0, 1]] 682 | boxes3d[..., [2]] = deltas[..., [2]] * \ 683 | cfg.ANCHOR_H + anchors_reshaped[..., [2]] 684 | boxes3d[..., [3, 4, 5]] = np.exp( 685 | deltas[..., [3, 4, 5]]) * anchors_reshaped[..., [3, 4, 5]] 686 | boxes3d[..., 6] = deltas[..., 6] + anchors_reshaped[..., 6] 687 | 688 | return boxes3d 689 | 690 | 691 | def point_transform(points, tx, ty, tz, rx = 0, ry = 0, rz = 0): 692 | # Input: 693 | # points: (N, 3) 694 | # rx/y/z: in radians 695 | # Output: 696 | # points: (N, 3) 697 | N = points.shape[0] 698 | points = np.hstack([points, np.ones((N, 1))]) 699 | 700 | mat1 = np.eye(4) 701 | mat1[3, 0:3] = tx, ty, tz 702 | points = np.matmul(points, mat1) 703 | 704 | if rx != 0: 705 | mat = np.zeros((4, 4)) 706 | mat[0, 0] = 1 707 | mat[3, 3] = 1 708 | mat[1, 1] = np.cos(rx) 709 | mat[1, 2] = -np.sin(rx) 710 | mat[2, 1] = np.sin(rx) 711 | mat[2, 2] = np.cos(rx) 712 | points = np.matmul(points, mat) 713 | 714 | if ry != 0: 715 | mat = np.zeros((4, 4)) 716 | mat[1, 1] = 1 717 | mat[3, 3] = 1 718 | mat[0, 0] = np.cos(ry) 719 | mat[0, 2] = np.sin(ry) 720 | mat[2, 0] = -np.sin(ry) 721 | mat[2, 2] = np.cos(ry) 722 | points = np.matmul(points, mat) 723 | 724 | if rz != 0: 725 | mat = np.zeros((4, 4)) 726 | mat[2, 2] = 1 727 | mat[3, 3] = 1 728 | mat[0, 0] = np.cos(rz) 729 | mat[0, 1] = -np.sin(rz) 730 | mat[1, 0] = np.sin(rz) 731 | mat[1, 1] = np.cos(rz) 732 | points = np.matmul(points, mat) 733 | 734 | return points[:, 0:3] 735 | 736 | 737 | def box_transform(boxes, tx, ty, tz, r = 0, coordinate = 'lidar'): 738 | # Input: 739 | # boxes: (N, 7) x y z h w l rz/y 740 | # Output: 741 | # boxes: (N, 7) x y z h w l rz/y 742 | boxes_corner = center_to_corner_box3d( 743 | boxes, coordinate = coordinate) # (N, 8, 3) 744 | for idx in range(len(boxes_corner)): 745 | if coordinate == 'lidar': 746 | boxes_corner[idx] = point_transform( 747 | boxes_corner[idx], tx, ty, tz, rz = r) 748 | else: 749 | boxes_corner[idx] = point_transform( 750 | boxes_corner[idx], tx, ty, tz, ry = r) 751 | 752 | return corner_to_center_box3d(boxes_corner, coordinate = coordinate) 753 | 754 | 755 | def cal_iou2d(box1, box2, T_VELO_2_CAM = None, R_RECT_0 = None): 756 | # Input: 757 | # box1/2: x, y, w, l, r 758 | # Output : 759 | # iou 760 | buf1 = np.zeros((cfg.INPUT_HEIGHT, cfg.INPUT_WIDTH, 3)) 761 | buf2 = np.zeros((cfg.INPUT_HEIGHT, cfg.INPUT_WIDTH, 3)) 762 | tmp = center_to_corner_box2d(np.array([box1, box2]), coordinate = 'lidar', T_VELO_2_CAM = T_VELO_2_CAM, R_RECT_0 = R_RECT_0) 763 | box1_corner = batch_lidar_to_bird_view(tmp[0]).astype(np.int32) 764 | box2_corner = batch_lidar_to_bird_view(tmp[1]).astype(np.int32) 765 | buf1 = cv2.fillConvexPoly(buf1, box1_corner, color = (1, 1, 1))[..., 0] 766 | buf2 = cv2.fillConvexPoly(buf2, box2_corner, color = (1, 1, 1))[..., 0] 767 | indiv = np.sum(np.absolute(buf1-buf2)) 768 | share = np.sum((buf1 + buf2) == 2) 769 | if indiv == 0: 770 | return 0.0 # when target is out of bound 771 | 772 | return share / (indiv + share) 773 | 774 | 775 | def cal_z_intersect(cz1, h1, cz2, h2): 776 | b1z1, b1z2 = cz1 - h1 / 2, cz1 + h1 / 2 777 | b2z1, b2z2 = cz2 - h2 / 2, cz2 + h2 / 2 778 | if b1z1 > b2z2 or b2z1 > b1z2: 779 | return 0 780 | elif b2z1 <= b1z1 <= b2z2: 781 | if b1z2 <= b2z2: 782 | return h1 / h2 783 | else: 784 | return (b2z2 - b1z1) / (b1z2 - b2z1) 785 | elif b1z1 < b2z1 < b1z2: 786 | if b2z2 <= b1z2: 787 | return h2 / h1 788 | else: 789 | return (b1z2 - b2z1) / (b2z2 - b1z1) 790 | 791 | 792 | def cal_iou3d(box1, box2, T_VELO_2_CAM = None, R_RECT_0 = None): 793 | # Input: 794 | # box1/2: x, y, z, h, w, l, r 795 | # Output: 796 | # iou 797 | buf1 = np.zeros((cfg.INPUT_HEIGHT, cfg.INPUT_WIDTH, 3)) 798 | buf2 = np.zeros((cfg.INPUT_HEIGHT, cfg.INPUT_WIDTH, 3)) 799 | tmp = center_to_corner_box2d(np.array([box1[[0, 1, 4, 5, 6]], box2[[0, 1, 4, 5, 6]]]), coordinate = 'lidar', 800 | T_VELO_2_CAM = T_VELO_2_CAM, R_RECT_0 = R_RECT_0) 801 | box1_corner = batch_lidar_to_bird_view(tmp[0]).astype(np.int32) 802 | box2_corner = batch_lidar_to_bird_view(tmp[1]).astype(np.int32) 803 | buf1 = cv2.fillConvexPoly(buf1, box1_corner, color = (1, 1, 1))[..., 0] 804 | buf2 = cv2.fillConvexPoly(buf2, box2_corner, color = (1, 1, 1))[..., 0] 805 | share = np.sum((buf1 + buf2) == 2) 806 | area1 = np.sum(buf1) 807 | area2 = np.sum(buf2) 808 | 809 | z1, h1, z2, h2 = box1[2], box1[3], box2[2], box2[3] 810 | z_intersect = cal_z_intersect(z1, h1, z2, h2) 811 | 812 | return share * z_intersect / (area1 * h1 + area2 * h2 - share * z_intersect) 813 | 814 | 815 | def cal_box3d_iou(boxes3d, gt_boxes3d, cal_3d = 0, T_VELO_2_CAM = None, R_RECT_0 = None): 816 | # Inputs: 817 | # boxes3d: (N1, 7) x,y,z,h,w,l,r 818 | # gt_boxed3d: (N2, 7) x,y,z,h,w,l,r 819 | # Outputs: 820 | # iou: (N1, N2) 821 | N1 = len(boxes3d) 822 | N2 = len(gt_boxes3d) 823 | output = np.zeros((N1, N2), dtype = np.float32) 824 | 825 | for idx in range(N1): 826 | for idy in range(N2): 827 | if cal_3d: 828 | output[idx, idy] = float(cal_iou3d(boxes3d[idx], gt_boxes3d[idy], T_VELO_2_CAM, R_RECT_0)) 829 | else: 830 | output[idx, idy] = float( 831 | cal_iou2d(boxes3d[idx, [0, 1, 4, 5, 6]], gt_boxes3d[idy, [0, 1, 4, 5, 6]], T_VELO_2_CAM, R_RECT_0)) 832 | 833 | return output 834 | 835 | 836 | def cal_box2d_iou(boxes2d, gt_boxes2d, T_VELO_2_CAM = None, R_RECT_0 = None): 837 | # Inputs: 838 | # boxes2d: (N1, 5) x,y,w,l,r 839 | # gt_boxes2d: (N2, 5) x,y,w,l,r 840 | # Outputs: 841 | # iou: (N1, N2) 842 | N1 = len(boxes2d) 843 | N2 = len(gt_boxes2d) 844 | output = np.zeros((N1, N2), dtype = np.float32) 845 | for idx in range(N1): 846 | for idy in range(N2): 847 | output[idx, idy] = cal_iou2d(boxes2d[idx], gt_boxes2d[idy], T_VELO_2_CAM, R_RECT_0) 848 | 849 | return output 850 | 851 | 852 | if __name__ == '__main__': 853 | pass 854 | -------------------------------------------------------------------------------- /eval/KITTI/evaluate_object_3d_offline.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | 12 | #include 13 | #include 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | #include "mail.h" 21 | 22 | BOOST_GEOMETRY_REGISTER_C_ARRAY_CS(cs::cartesian) 23 | 24 | typedef boost::geometry::model::polygon > Polygon; 25 | 26 | 27 | using namespace std; 28 | 29 | /*======================================================================= 30 | STATIC EVALUATION PARAMETERS 31 | =======================================================================*/ 32 | 33 | // holds the number of test images on the server 34 | const int32_t N_TESTIMAGES = 7518; 35 | 36 | // easy, moderate and hard evaluation level 37 | enum DIFFICULTY{EASY=0, MODERATE=1, HARD=2}; 38 | 39 | // evaluation metrics: image, ground or 3D 40 | enum METRIC{IMAGE=0, GROUND=1, BOX3D=2}; 41 | 42 | // evaluation parameter 43 | const int32_t MIN_HEIGHT[3] = {40, 25, 25}; // minimum height for evaluated groundtruth/detections 44 | const int32_t MAX_OCCLUSION[3] = {0, 1, 2}; // maximum occlusion level of the groundtruth used for evaluation 45 | const double MAX_TRUNCATION[3] = {0.15, 0.3, 0.5}; // maximum truncation level of the groundtruth used for evaluation 46 | 47 | // evaluated object classes 48 | enum CLASSES{CAR=0, PEDESTRIAN=1, CYCLIST=2}; 49 | const int NUM_CLASS = 3; 50 | 51 | // parameters varying per class 52 | vector CLASS_NAMES; 53 | // the minimum overlap required for 2D evaluation on the image/ground plane and 3D evaluation 54 | const double MIN_OVERLAP[3][3] = {{0.7, 0.5, 0.5}, {0.7, 0.5, 0.5}, {0.7, 0.5, 0.5}}; 55 | 56 | // no. of recall steps that should be evaluated (discretized) 57 | const double N_SAMPLE_PTS = 41; 58 | 59 | 60 | // initialize class names 61 | void initGlobals () { 62 | CLASS_NAMES.push_back("car"); 63 | CLASS_NAMES.push_back("pedestrian"); 64 | CLASS_NAMES.push_back("cyclist"); 65 | } 66 | 67 | /*======================================================================= 68 | DATA TYPES FOR EVALUATION 69 | =======================================================================*/ 70 | 71 | // holding data needed for precision-recall and precision-aos 72 | struct tPrData { 73 | vector v; // detection score for computing score thresholds 74 | double similarity; // orientation similarity 75 | int32_t tp; // true positives 76 | int32_t fp; // false positives 77 | int32_t fn; // false negatives 78 | tPrData () : 79 | similarity(0), tp(0), fp(0), fn(0) {} 80 | }; 81 | 82 | // holding bounding boxes for ground truth and detections 83 | struct tBox { 84 | string type; // object type as car, pedestrian or cyclist,... 85 | double x1; // left corner 86 | double y1; // top corner 87 | double x2; // right corner 88 | double y2; // bottom corner 89 | double alpha; // image orientation 90 | tBox (string type, double x1,double y1,double x2,double y2,double alpha) : 91 | type(type),x1(x1),y1(y1),x2(x2),y2(y2),alpha(alpha) {} 92 | }; 93 | 94 | // holding ground truth data 95 | struct tGroundtruth { 96 | tBox box; // object type, box, orientation 97 | double truncation; // truncation 0..1 98 | int32_t occlusion; // occlusion 0,1,2 (non, partly, fully) 99 | double ry; 100 | double t1, t2, t3; 101 | double h, w, l; 102 | tGroundtruth () : 103 | box(tBox("invalild",-1,-1,-1,-1,-10)),truncation(-1),occlusion(-1) {} 104 | tGroundtruth (tBox box,double truncation,int32_t occlusion) : 105 | box(box),truncation(truncation),occlusion(occlusion) {} 106 | tGroundtruth (string type,double x1,double y1,double x2,double y2,double alpha,double truncation,int32_t occlusion) : 107 | box(tBox(type,x1,y1,x2,y2,alpha)),truncation(truncation),occlusion(occlusion) {} 108 | }; 109 | 110 | // holding detection data 111 | struct tDetection { 112 | tBox box; // object type, box, orientation 113 | double thresh; // detection score 114 | double ry; 115 | double t1, t2, t3; 116 | double h, w, l; 117 | tDetection (): 118 | box(tBox("invalid",-1,-1,-1,-1,-10)),thresh(-1000) {} 119 | tDetection (tBox box,double thresh) : 120 | box(box),thresh(thresh) {} 121 | tDetection (string type,double x1,double y1,double x2,double y2,double alpha,double thresh) : 122 | box(tBox(type,x1,y1,x2,y2,alpha)),thresh(thresh) {} 123 | }; 124 | 125 | 126 | /*======================================================================= 127 | FUNCTIONS TO LOAD DETECTION AND GROUND TRUTH DATA ONCE, SAVE RESULTS 128 | =======================================================================*/ 129 | vector indices; 130 | 131 | vector loadDetections(string file_name, bool &compute_aos, 132 | vector &eval_image, vector &eval_ground, 133 | vector &eval_3d, bool &success) { 134 | 135 | // holds all detections (ignored detections are indicated by an index vector 136 | vector detections; 137 | FILE *fp = fopen(file_name.c_str(),"r"); 138 | if (!fp) { 139 | success = false; 140 | return detections; 141 | } 142 | while (!feof(fp)) { 143 | tDetection d; 144 | double trash; 145 | char str[255]; 146 | if (fscanf(fp, "%s %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf", 147 | str, &trash, &trash, &d.box.alpha, &d.box.x1, &d.box.y1, 148 | &d.box.x2, &d.box.y2, &d.h, &d.w, &d.l, &d.t1, &d.t2, &d.t3, 149 | &d.ry, &d.thresh)==16) { 150 | 151 | // d.thresh = 1; 152 | d.box.type = str; 153 | detections.push_back(d); 154 | 155 | // orientation=-10 is invalid, AOS is not evaluated if at least one orientation is invalid 156 | if(d.box.alpha == -10) 157 | compute_aos = false; 158 | 159 | // a class is only evaluated if it is detected at least once 160 | for (int c = 0; c < NUM_CLASS; c++) { 161 | if (!strcasecmp(d.box.type.c_str(), CLASS_NAMES[c].c_str())) { 162 | if (!eval_image[c] && d.box.x1 >= 0) 163 | eval_image[c] = true; 164 | if (!eval_ground[c] && d.t1 != -1000) 165 | eval_ground[c] = true; 166 | if (!eval_3d[c] && d.t2 != -1000) 167 | eval_3d[c] = true; 168 | break; 169 | } 170 | } 171 | } 172 | } 173 | fclose(fp); 174 | success = true; 175 | return detections; 176 | } 177 | 178 | vector loadGroundtruth(string file_name,bool &success) { 179 | 180 | // holds all ground truth (ignored ground truth is indicated by an index vector 181 | vector groundtruth; 182 | FILE *fp = fopen(file_name.c_str(),"r"); 183 | if (!fp) { 184 | success = false; 185 | return groundtruth; 186 | } 187 | while (!feof(fp)) { 188 | tGroundtruth g; 189 | char str[255]; 190 | if (fscanf(fp, "%s %lf %d %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf", 191 | str, &g.truncation, &g.occlusion, &g.box.alpha, 192 | &g.box.x1, &g.box.y1, &g.box.x2, &g.box.y2, 193 | &g.h, &g.w, &g.l, &g.t1, 194 | &g.t2, &g.t3, &g.ry )==15) { 195 | g.box.type = str; 196 | groundtruth.push_back(g); 197 | } 198 | } 199 | fclose(fp); 200 | success = true; 201 | return groundtruth; 202 | } 203 | 204 | void saveStats (const vector &precision, const vector &aos, FILE *fp_det, FILE *fp_ori) { 205 | 206 | // save precision to file 207 | if(precision.empty()) 208 | return; 209 | for (int32_t i=0; i 269 | Polygon toPolygon(const T& g) { 270 | using namespace boost::numeric::ublas; 271 | using namespace boost::geometry; 272 | matrix mref(2, 2); 273 | mref(0, 0) = cos(g.ry); mref(0, 1) = sin(g.ry); 274 | mref(1, 0) = -sin(g.ry); mref(1, 1) = cos(g.ry); 275 | 276 | static int count = 0; 277 | matrix corners(2, 4); 278 | double data[] = {g.l / 2, g.l / 2, -g.l / 2, -g.l / 2, 279 | g.w / 2, -g.w / 2, -g.w / 2, g.w / 2}; 280 | std::copy(data, data + 8, corners.data().begin()); 281 | matrix gc = prod(mref, corners); 282 | for (int i = 0; i < 4; ++i) { 283 | gc(0, i) += g.t1; 284 | gc(1, i) += g.t3; 285 | } 286 | 287 | double points[][2] = {{gc(0, 0), gc(1, 0)},{gc(0, 1), gc(1, 1)},{gc(0, 2), gc(1, 2)},{gc(0, 3), gc(1, 3)},{gc(0, 0), gc(1, 0)}}; 288 | Polygon poly; 289 | append(poly, points); 290 | return poly; 291 | } 292 | 293 | // measure overlap between bird's eye view bounding boxes, parametrized by (ry, l, w, tx, tz) 294 | inline double groundBoxOverlap(tDetection d, tGroundtruth g, int32_t criterion = -1) { 295 | using namespace boost::geometry; 296 | Polygon gp = toPolygon(g); 297 | Polygon dp = toPolygon(d); 298 | 299 | std::vector in, un; 300 | intersection(gp, dp, in); 301 | union_(gp, dp, un); 302 | 303 | double inter_area = in.empty() ? 0 : area(in.front()); 304 | double union_area = area(un.front()); 305 | double o; 306 | if(criterion==-1) // union 307 | o = inter_area / union_area; 308 | else if(criterion==0) // bbox_a 309 | o = inter_area / area(dp); 310 | else if(criterion==1) // bbox_b 311 | o = inter_area / area(gp); 312 | 313 | return o; 314 | } 315 | 316 | // measure overlap between 3D bounding boxes, parametrized by (ry, h, w, l, tx, ty, tz) 317 | inline double box3DOverlap(tDetection d, tGroundtruth g, int32_t criterion = -1) { 318 | using namespace boost::geometry; 319 | Polygon gp = toPolygon(g); 320 | Polygon dp = toPolygon(d); 321 | 322 | std::vector in, un; 323 | intersection(gp, dp, in); 324 | union_(gp, dp, un); 325 | 326 | double ymax = min(d.t2, g.t2); 327 | double ymin = max(d.t2 - d.h, g.t2 - g.h); 328 | 329 | double inter_area = in.empty() ? 0 : area(in.front()); 330 | double inter_vol = inter_area * max(0.0, ymax - ymin); 331 | 332 | double det_vol = d.h * d.l * d.w; 333 | double gt_vol = g.h * g.l * g.w; 334 | 335 | double o; 336 | if(criterion==-1) // union 337 | o = inter_vol / (det_vol + gt_vol - inter_vol); 338 | else if(criterion==0) // bbox_a 339 | o = inter_vol / det_vol; 340 | else if(criterion==1) // bbox_b 341 | o = inter_vol / gt_vol; 342 | 343 | return o; 344 | } 345 | 346 | vector getThresholds(vector &v, double n_groundtruth){ 347 | 348 | // holds scores needed to compute N_SAMPLE_PTS recall values 349 | vector t; 350 | 351 | // sort scores in descending order 352 | // (highest score is assumed to give best/most confident detections) 353 | sort(v.begin(), v.end(), greater()); 354 | 355 | // get scores for linearly spaced recall 356 | double current_recall = 0; 357 | for(int32_t i=0; i >, const vector &det, vector &ignored_gt, vector &dc, vector &ignored_det, int32_t &n_gt, DIFFICULTY difficulty){ 382 | 383 | // extract ground truth bounding boxes for current evaluation class 384 | for(int32_t i=0;iMAX_OCCLUSION[difficulty] || gt[i].truncation>MAX_TRUNCATION[difficulty] || height >, 457 | const vector &det, const vector &dc, 458 | const vector &ignored_gt, const vector &ignored_det, 459 | bool compute_fp, double (*boxoverlap)(tDetection, tGroundtruth, int32_t), 460 | METRIC metric, bool compute_aos=false, double thresh=0, bool debug=false){ 461 | 462 | tPrData stat = tPrData(); 463 | const double NO_DETECTION = -10000000; 464 | vector delta; // holds angular difference for TPs (needed for AOS evaluation) 465 | vector assigned_detection; // holds wether a detection was assigned to a valid or ignored ground truth 466 | assigned_detection.assign(det.size(), false); 467 | vector ignored_threshold; 468 | ignored_threshold.assign(det.size(), false); // holds detections with a threshold lower than thresh if FP are computed 469 | 470 | // detections with a low score are ignored for computing precision (needs FP) 471 | if(compute_fp) 472 | for(int32_t i=0; i 0.5) (logical len(det)) 485 | =======================================================================*/ 486 | int32_t det_idx = -1; 487 | double valid_detection = NO_DETECTION; 488 | double max_overlap = 0; 489 | 490 | // search for a possible detection 491 | bool assigned_ignored_det = false; 492 | for(int32_t j=0; jMIN_OVERLAP[metric][current_class] && det[j].thresh>valid_detection){ 507 | det_idx = j; 508 | valid_detection = det[j].thresh; 509 | } 510 | 511 | // for computing pr curve values, the candidate with the greatest overlap is considered 512 | // if the greatest overlap is an ignored detection (min_height), the overlapping detection is used 513 | else if(compute_fp && overlap>MIN_OVERLAP[metric][current_class] && (overlap>max_overlap || assigned_ignored_det) && ignored_det[j]==0){ 514 | max_overlap = overlap; 515 | det_idx = j; 516 | valid_detection = 1; 517 | assigned_ignored_det = false; 518 | } 519 | else if(compute_fp && overlap>MIN_OVERLAP[metric][current_class] && valid_detection==NO_DETECTION && ignored_det[j]==1){ 520 | det_idx = j; 521 | valid_detection = 1; 522 | assigned_ignored_det = true; 523 | } 524 | } 525 | 526 | /*======================================================================= 527 | compute TP, FP and FN 528 | =======================================================================*/ 529 | 530 | // nothing was assigned to this valid ground truth 531 | if(valid_detection==NO_DETECTION && ignored_gt[i]==0) { 532 | stat.fn++; 533 | } 534 | 535 | // only evaluate valid ground truth <=> detection assignments (considering difficulty level) 536 | else if(valid_detection!=NO_DETECTION && (ignored_gt[i]==1 || ignored_det[det_idx]==1)) 537 | assigned_detection[det_idx] = true; 538 | 539 | // found a valid true positive 540 | else if(valid_detection!=NO_DETECTION){ 541 | 542 | // write highest score to threshold vector 543 | stat.tp++; 544 | stat.v.push_back(det[det_idx].thresh); 545 | 546 | // compute angular difference of detection and ground truth if valid detection orientation was provided 547 | if(compute_aos) 548 | delta.push_back(gt[i].box.alpha - det[det_idx].box.alpha); 549 | 550 | // clean up 551 | assigned_detection[det_idx] = true; 552 | } 553 | } 554 | 555 | // if FP are requested, consider stuff area 556 | if(compute_fp){ 557 | 558 | // count fp 559 | for(int32_t i=0; iMIN_OVERLAP[metric][current_class]){ 582 | assigned_detection[j] = true; 583 | nstuff++; 584 | } 585 | } 586 | } 587 | 588 | // FP = no. of all not to ground truth assigned detections - detections assigned to stuff areas 589 | stat.fp -= nstuff; 590 | 591 | // if all orientation values are valid, the AOS is computed 592 | if(compute_aos){ 593 | vector tmp; 594 | 595 | // FP have a similarity of 0, for all TP compute AOS 596 | tmp.assign(stat.fp, 0); 597 | for(int32_t i=0; i0 || stat.fp>0) 606 | stat.similarity = accumulate(tmp.begin(), tmp.end(), 0.0); 607 | 608 | // there was neither a FP nor a TP, so the similarity is ignored in the evaluation 609 | else 610 | stat.similarity = -1; 611 | } 612 | } 613 | return stat; 614 | } 615 | 616 | /*======================================================================= 617 | EVALUATE CLASS-WISE 618 | =======================================================================*/ 619 | 620 | bool eval_class (FILE *fp_det, FILE *fp_ori, CLASSES current_class, 621 | const vector< vector > &groundtruth, 622 | const vector< vector > &detections, bool compute_aos, 623 | double (*boxoverlap)(tDetection, tGroundtruth, int32_t), 624 | vector &precision, vector &aos, 625 | DIFFICULTY difficulty, METRIC metric) { 626 | assert(groundtruth.size() == detections.size()); 627 | 628 | // init 629 | int32_t n_gt=0; // total no. of gt (denominator of recall) 630 | vector v, thresholds; // detection scores, evaluated for recall discretization 631 | vector< vector > ignored_gt, ignored_det; // index of ignored gt detection for current class/difficulty 632 | vector< vector > dontcare; // index of dontcare areas, included in ground truth 633 | 634 | // for all test images do 635 | for (int32_t i=0; i i_gt, i_det; 639 | vector dc; 640 | 641 | // only evaluate objects of current class and ignore occluded, truncated objects 642 | cleanData(current_class, groundtruth[i], detections[i], i_gt, dc, i_det, n_gt, difficulty); 643 | ignored_gt.push_back(i_gt); 644 | ignored_det.push_back(i_det); 645 | dontcare.push_back(dc); 646 | 647 | // compute statistics to get recall values 648 | tPrData pr_tmp = tPrData(); 649 | pr_tmp = computeStatistics(current_class, groundtruth[i], detections[i], dc, i_gt, i_det, false, boxoverlap, metric); 650 | 651 | // add detection scores to vector over all images 652 | for(int32_t j=0; j pr; 661 | pr.assign(thresholds.size(),tPrData()); 662 | for (int32_t i=0; i recall; 682 | precision.assign(N_SAMPLE_PTS, 0); 683 | if(compute_aos) 684 | aos.assign(N_SAMPLE_PTS, 0); 685 | double r=0; 686 | for (int32_t i=0; i vals[],bool is_aos){ 707 | 708 | char command[1024]; 709 | 710 | // save plot data to file 711 | FILE *fp = fopen((dir_name + "/" + file_name + ".txt").c_str(),"w"); 712 | printf("save %s\n", (dir_name + "/" + file_name + ".txt").c_str()); 713 | for (int32_t i=0; i<(int)N_SAMPLE_PTS; i++) 714 | fprintf(fp,"%f %f %f %f\n",(double)i/(N_SAMPLE_PTS-1.0),vals[0][i],vals[1][i],vals[2][i]); 715 | fclose(fp); 716 | 717 | float sum[3] = {0, 0, 0}; 718 | for (int v = 0; v < 3; ++v) 719 | for (int i = 0; i < vals[v].size(); i = i + 4) 720 | sum[v] += vals[v][i]; 721 | printf("%s AP: %f %f %f\n", file_name.c_str(), sum[0] / 11 * 100, sum[1] / 11 * 100, sum[2] / 11 * 100); 722 | 723 | 724 | // create png + eps 725 | for (int32_t j=0; j<2; j++) { 726 | 727 | // open file 728 | FILE *fp = fopen((dir_name + "/" + file_name + ".gp").c_str(),"w"); 729 | 730 | // save gnuplot instructions 731 | if (j==0) { 732 | fprintf(fp,"set term png size 450,315 font \"Helvetica\" 11\n"); 733 | fprintf(fp,"set output \"%s.png\"\n",file_name.c_str()); 734 | } else { 735 | fprintf(fp,"set term postscript eps enhanced color font \"Helvetica\" 20\n"); 736 | fprintf(fp,"set output \"%s.eps\"\n",file_name.c_str()); 737 | } 738 | 739 | // set labels and ranges 740 | fprintf(fp,"set size ratio 0.7\n"); 741 | fprintf(fp,"set xrange [0:1]\n"); 742 | fprintf(fp,"set yrange [0:1]\n"); 743 | fprintf(fp,"set xlabel \"Recall\"\n"); 744 | if (!is_aos) fprintf(fp,"set ylabel \"Precision\"\n"); 745 | else fprintf(fp,"set ylabel \"Orientation Similarity\"\n"); 746 | obj_type[0] = toupper(obj_type[0]); 747 | fprintf(fp,"set title \"%s\"\n",obj_type.c_str()); 748 | 749 | // line width 750 | int32_t lw = 5; 751 | if (j==0) lw = 3; 752 | 753 | // plot error curve 754 | fprintf(fp,"plot "); 755 | fprintf(fp,"\"%s.txt\" using 1:2 title 'Easy' with lines ls 1 lw %d,",file_name.c_str(),lw); 756 | fprintf(fp,"\"%s.txt\" using 1:3 title 'Moderate' with lines ls 2 lw %d,",file_name.c_str(),lw); 757 | fprintf(fp,"\"%s.txt\" using 1:4 title 'Hard' with lines ls 3 lw %d",file_name.c_str(),lw); 758 | 759 | // close file 760 | fclose(fp); 761 | 762 | // run gnuplot => create png + eps 763 | sprintf(command,"cd %s; gnuplot %s",dir_name.c_str(),(file_name + ".gp").c_str()); 764 | system(command); 765 | } 766 | 767 | // create pdf and crop 768 | sprintf(command,"cd %s; ps2pdf %s.eps %s_large.pdf",dir_name.c_str(),file_name.c_str(),file_name.c_str()); 769 | system(command); 770 | sprintf(command,"cd %s; pdfcrop %s_large.pdf %s.pdf",dir_name.c_str(),file_name.c_str(),file_name.c_str()); 771 | system(command); 772 | sprintf(command,"cd %s; rm %s_large.pdf",dir_name.c_str(),file_name.c_str()); 773 | system(command); 774 | } 775 | 776 | vector getEvalIndices(const string& result_dir) { 777 | 778 | DIR* dir; 779 | dirent* entity; 780 | dir = opendir(result_dir.c_str()); 781 | if (dir) { 782 | while (entity = readdir(dir)) { 783 | string path(entity->d_name); 784 | int32_t len = path.size(); 785 | if (len < 10) continue; 786 | int32_t index = atoi(path.substr(len - 10, 10).c_str()); 787 | indices.push_back(index); 788 | } 789 | } 790 | return indices; 791 | } 792 | 793 | bool eval(string gt_dir, string result_dir, Mail* mail){ 794 | 795 | // set some global parameters 796 | initGlobals(); 797 | 798 | // ground truth and result directories 799 | // string gt_dir = "data/object/label_2"; 800 | // string result_dir = "results/" + result_sha; 801 | string plot_dir = result_dir + "/plot"; 802 | 803 | // create output directories 804 | system(("mkdir " + plot_dir).c_str()); 805 | 806 | // hold detections and ground truth in memory 807 | vector< vector > groundtruth; 808 | vector< vector > detections; 809 | 810 | // holds wether orientation similarity shall be computed (might be set to false while loading detections) 811 | // and which labels where provided by this submission 812 | bool compute_aos=true; 813 | vector eval_image(NUM_CLASS, false); 814 | vector eval_ground(NUM_CLASS, false); 815 | vector eval_3d(NUM_CLASS, false); 816 | 817 | // for all images read groundtruth and detections 818 | mail->msg("Loading detections..."); 819 | std::vector indices = getEvalIndices(result_dir + "/data/"); 820 | printf("number of files for evaluation: %d\n", (int)indices.size()); 821 | 822 | for (int32_t i=0; i gt = loadGroundtruth(gt_dir + "/" + file_name,gt_success); 831 | vector det = loadDetections(result_dir + "/data/" + file_name, 832 | compute_aos, eval_image, eval_ground, eval_3d, det_success); 833 | groundtruth.push_back(gt); 834 | detections.push_back(det); 835 | 836 | // check for errors 837 | if (!gt_success) { 838 | mail->msg("ERROR: Couldn't read: %s of ground truth. Please write me an email!", file_name); 839 | return false; 840 | } 841 | if (!det_success) { 842 | mail->msg("ERROR: Couldn't read: %s", file_name); 843 | return false; 844 | } 845 | } 846 | mail->msg(" done."); 847 | 848 | // holds pointers for result files 849 | FILE *fp_det=0, *fp_ori=0; 850 | 851 | // eval image 2D bounding boxes 852 | printf("start to eval image 2D bounding boxes... \n"); 853 | for (int c = 0; c < NUM_CLASS; c++) { 854 | CLASSES cls = (CLASSES)c; 855 | if (eval_image[c]) { 856 | fp_det = fopen((result_dir + "/stats_" + CLASS_NAMES[c] + "_detection.txt").c_str(), "w"); 857 | if(compute_aos) 858 | fp_ori = fopen((result_dir + "/stats_" + CLASS_NAMES[c] + "_orientation.txt").c_str(),"w"); 859 | vector precision[3], aos[3]; 860 | if( !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, imageBoxOverlap, precision[0], aos[0], EASY, IMAGE) 861 | || !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, imageBoxOverlap, precision[1], aos[1], MODERATE, IMAGE) 862 | || !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, imageBoxOverlap, precision[2], aos[2], HARD, IMAGE)) { 863 | mail->msg("%s evaluation failed.", CLASS_NAMES[c].c_str()); 864 | return false; 865 | } 866 | fclose(fp_det); 867 | saveAndPlotPlots(plot_dir, CLASS_NAMES[c] + "_detection", CLASS_NAMES[c], precision, 0); 868 | printf("2D bounding boxes detection saved ... \n"); 869 | if(compute_aos){ 870 | printf(" start to compute aos ... \n"); 871 | saveAndPlotPlots(plot_dir, CLASS_NAMES[c] + "_orientation", CLASS_NAMES[c], aos, 1); 872 | fclose(fp_ori); 873 | printf("2D bounding boxes aos saved ... \n"); 874 | } 875 | } 876 | } 877 | 878 | // don't evaluate AOS for birdview boxes and 3D boxes 879 | compute_aos = false; 880 | 881 | // eval bird's eye view bounding boxes 882 | printf("start to eval bird's eye view bounding boxes... \n"); 883 | for (int c = 0; c < NUM_CLASS; c++) { 884 | CLASSES cls = (CLASSES)c; 885 | if (eval_ground[c]) { 886 | fp_det = fopen((result_dir + "/stats_" + CLASS_NAMES[c] + "_detection_ground.txt").c_str(), "w"); 887 | vector precision[3], aos[3]; 888 | if( !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, groundBoxOverlap, precision[0], aos[0], EASY, GROUND) 889 | || !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, groundBoxOverlap, precision[1], aos[1], MODERATE, GROUND) 890 | || !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, groundBoxOverlap, precision[2], aos[2], HARD, GROUND)) { 891 | mail->msg("%s evaluation failed.", CLASS_NAMES[c].c_str()); 892 | return false; 893 | } 894 | fclose(fp_det); 895 | saveAndPlotPlots(plot_dir, CLASS_NAMES[c] + "_detection_ground", CLASS_NAMES[c], precision, 0); 896 | } 897 | } 898 | 899 | // eval 3D bounding boxes 900 | printf("start to eval 3D bounding boxes... \n"); 901 | for (int c = 0; c < NUM_CLASS; c++) { 902 | CLASSES cls = (CLASSES)c; 903 | if (eval_3d[c]) { 904 | fp_det = fopen((result_dir + "/stats_" + CLASS_NAMES[c] + "_detection_3d.txt").c_str(), "w"); 905 | vector precision[3], aos[3]; 906 | if( !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, box3DOverlap, precision[0], aos[0], EASY, BOX3D) 907 | || !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, box3DOverlap, precision[1], aos[1], MODERATE, BOX3D) 908 | || !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, box3DOverlap, precision[2], aos[2], HARD, BOX3D)) { 909 | mail->msg("%s evaluation failed.", CLASS_NAMES[c].c_str()); 910 | return false; 911 | } 912 | fclose(fp_det); 913 | saveAndPlotPlots(plot_dir, CLASS_NAMES[c] + "_detection_3d", CLASS_NAMES[c], precision, 0); 914 | } 915 | } 916 | 917 | // success 918 | return true; 919 | } 920 | 921 | int32_t main (int32_t argc,char *argv[]) { 922 | 923 | // we need 2 or 4 arguments! 924 | if (argc!=3) { 925 | cout << "Usage: ./eval_detection_3d_offline gt_dir result_dir" << endl; 926 | return 1; 927 | } 928 | 929 | // read arguments 930 | string gt_dir = argv[1]; 931 | string result_dir = argv[2]; 932 | 933 | // init notification mail 934 | Mail *mail; 935 | mail = new Mail(); 936 | mail->msg("Thank you for participating in our evaluation!"); 937 | 938 | // run evaluation 939 | if (eval(gt_dir, result_dir, mail)) { 940 | mail->msg("Your evaluation results are available at:"); 941 | mail->msg(result_dir.c_str()); 942 | } else { 943 | system(("rm -r " + result_dir + "/plot").c_str()); 944 | mail->msg("An error occured while processing your results."); 945 | } 946 | 947 | // send mail and exit 948 | delete mail; 949 | 950 | return 0; 951 | } 952 | 953 | 954 | 955 | 956 | 957 | --------------------------------------------------------------------------------