├── model
    ├── __init__.py
    ├── group_pointcloud.py
    ├── rpn.py
    └── model.py
├── utils
    ├── __init__.py
    ├── setup.py
    ├── nms.py
    ├── preprocess.py
    ├── colorize.py
    ├── box_overlaps.pyx
    ├── data_aug.py
    └── utils.py
├── parse.sh
├── train.sh
├── test.sh
├── eval
    └── KITTI
    │   ├── launch_test.sh
    │   ├── README.md
    │   ├── mail.h
    │   ├── evaluate_object_3d.cpp
    │   └── evaluate_object_3d_offline.cpp
├── notes
    ├── QUESTIONS.md
    ├── DESCRIPTIONS.md
    └── REMARKS.md
├── LICENSE
├── .gitignore
├── preproc
    ├── split.py
    └── crop.py
├── parse.py
├── loader
    └── kitti.py
├── test.py
├── README.md
├── config.py
└── train.py


/model/__init__.py:
--------------------------------------------------------------------------------
1 | from model.rpn import *


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from utils.box_overlaps import *


--------------------------------------------------------------------------------
/parse.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | python parse.py preds


--------------------------------------------------------------------------------
/train.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | python train.py --alpha 1.5 --beta 1


--------------------------------------------------------------------------------
/test.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | python test.py --tag default --resumed_model kitti_Car_best.pth.tar


--------------------------------------------------------------------------------
/eval/KITTI/launch_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # Crooped gt dir
 3 | GT_DIR=./data/MD_KITTI/validation/label_2
 4 | # pred dir
 5 | PRED_DIR=$1
 6 | 
 7 | # Output log
 8 | OUTPUT=$2
 9 | # Start test
10 | nohup `pwd`/eval/KITTI/evaluate_object_3d_offline $GT_DIR $PRED_DIR > $OUTPUT 2>&1 &
11 | 


--------------------------------------------------------------------------------
/notes/QUESTIONS.md:
--------------------------------------------------------------------------------
1 | - Some details of functions in [utils\utils.py](..\utils\utils.py) are not fully understood, especially ones pre-processing dataset.
2 | They involve multiple coordinates projection and transformation.
3 | 
4 | - The maximum number of non-voxels (K) is different across samples, so it is not strightforward to use multiple GPUs in PyTorch.
5 | Because it is hard to pack a batch of samples into a single tensor. (TensorFlow can assign certain samples to a certain GPU.)


--------------------------------------------------------------------------------
/utils/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding:UTF-8 -*-
 3 | 
 4 | # File Name : setup.py
 5 | # Purpose :
 6 | # Creation Date : 11-12-2017
 7 | # Last Modified : Sat 23 Dec 2017 03:19:46 PM CST
 8 | # Created By : Jeasine Ma [jeasinema[at]gmail[dot]com]
 9 | 
10 | 
11 | from distutils.core import setup
12 | from Cython.Build import cythonize
13 | import numpy
14 | 
15 | setup(
16 |     name = 'box overlaps',
17 |     ext_modules = cythonize('./utils/box_overlaps.pyx'),
18 |     include_dirs = [numpy.get_include()]
19 | )
20 | 


--------------------------------------------------------------------------------
/eval/KITTI/README.md:
--------------------------------------------------------------------------------
 1 | # KITTI Evaluation
 2 | 
 3 | `evaluate_object_3d_offline.cpp` evaluates your KITTI detection locally on your own computer using your validation data selected from KITTI training dataset, with the following metrics:
 4 | 
 5 | - overlap on image (AP)
 6 | - oriented overlap on image (AOS)
 7 | - overlap on ground-plane (AP)
 8 | - overlap in 3D (AP)
 9 | 
10 | Compile `evaluate_object_3d_offline.cpp` with dependency of Boost and Linux `dirent.h` (You should already have it under most Linux).
11 | 
12 | Run the evalutaion by:
13 | 
14 |     ./evaluate_object_3d_offline groundtruth_dir result_dir
15 |     
16 | Note that you don't have to detect over all KITTI training data. The evaluator only evaluates samples whose result files exist.
17 | 
18 | 
19 | ### Updates
20 | 
21 | - June, 2017:
22 |   * Fixed the bug of detection box filtering based on min height according to KITTI's note on 25.04.2017.
23 | 


--------------------------------------------------------------------------------
/eval/KITTI/mail.h:
--------------------------------------------------------------------------------
 1 | #ifndef MAIL_H
 2 | #define MAIL_H
 3 | 
 4 | #include <stdio.h>
 5 | #include <stdarg.h>
 6 | #include <string.h>
 7 | 
 8 | class Mail {
 9 | 
10 | public:
11 | 
12 |   Mail (std::string email = "") {
13 |     if (email.compare("")) {
14 |       mail = popen("/usr/lib/sendmail -t -f noreply@cvlibs.net","w");
15 |       fprintf(mail,"To: %s\n", email.c_str());
16 |       fprintf(mail,"From: noreply@cvlibs.net\n");
17 |       fprintf(mail,"Subject: KITTI Evaluation Benchmark\n");
18 |       fprintf(mail,"\n\n");
19 |     } else {
20 |       mail = 0;
21 |     }
22 |   }
23 |   
24 |   ~Mail() {
25 |     if (mail) {
26 |       pclose(mail);
27 |     }
28 |   }
29 |   
30 |   void msg (const char *format, ...) {
31 |     va_list args;
32 |     va_start(args,format);
33 |     if (mail) {
34 |       vfprintf(mail,format,args);
35 |       fprintf(mail,"\n");
36 |     }
37 |     vprintf(format,args);
38 |     printf("\n");
39 |     va_end(args);
40 |   }
41 |     
42 | private:
43 | 
44 |   FILE *mail;
45 |   
46 | };
47 | 
48 | #endif
49 | 


--------------------------------------------------------------------------------
/notes/DESCRIPTIONS.md:
--------------------------------------------------------------------------------
 1 | # data_object_image_2.zip
 2 | Images from the second color camera.
 3 | 
 4 | # data_object_velodyne.zip
 5 | Velodyne scanned points. Each file (.bin) corresponds to an image and contains around 12K 3D points.
 6 | Each point is stored in the format (x, y, z, r), where r is the reflectance value.
 7 | 
 8 | # data_object_label_2.zip
 9 | Label format can be found [here](https://github.com/NVIDIA/DIGITS/blob/v4.0.0-rc.3/digits/extensions/data/objectDetection/README.md#label-format).
10 | The last value is only used for online submission. Original description can be downloaded [here](https://s3.eu-central-1.amazonaws.com/avg-kitti/devkit_object.zip).
11 | 
12 | 
13 | 
14 | # data_object_calib.zip
15 | Pi: projection matrix after rectification, size of 3x4; i is camera index
16 | 
17 | R0_rect: rectifying rotation matrix of the reference camera (camera 0)
18 | 
19 | Tr_velo_to_cam: rotation and translation matrix from the Velodyne coordinate to the camera coordinate
20 | 
21 | To transform a 3D point x in Velodyne coordinates to a point y in i-th camera image using: **y = Pi * R0_rect * Tr_velo_to_cam * x**


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 huanghao-code
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 


--------------------------------------------------------------------------------
/notes/REMARKS.md:
--------------------------------------------------------------------------------
 1 | # Sparse Tensor Representation
 2 | - Use `torch.sparse.FloatTensor` to replace `tf.scatter_nd` in [model\group_pointcloud.py](..\model\group_pointcloud.py)
 3 | - A toy example for illustration:
 4 | ```
 5 | size = torch.Size([2, 3, 3, 3, 5])    # similary to '[batch_size, DEPTH, HEIGHT, WIDTH, 128]'
 6 | src = torch.Tensor([[1, 1, 1, 1, 1], [2, 2, 2, 2, 2], [3, 3, 3, 3, 3]]) # similary to 'voxelwise'
 7 | index = torch.Tensor([[0, 0, 1, 2], [0, 1, 1, 1], [1, 2, 1, 0]])    # similary to 'coordinate'
 8 | dst = torch.sparse.FloatTensor(index.t(), src, size)    # similary to 'outputs'
 9 | ```
10 | 
11 | # Deconv2D
12 | - According to the formula of [`ConvTranspose2d`](https://pytorch.org/docs/0.4.1/nn.html?highlight=convtranspose2d#torch.nn.ConvTranspose2d) in PyTorch,
13 | H_out=(H_in−1)×stride\[0\]−2×padding\[0\]+kernel_size\[0\], if the first Deconv2D is set to (128, 256, 3, 1, 0), we cannot keep the spatial size unchanged.
14 | - In [model\rpn.py](..\model\rpn.py), change the first Deconv2D to (128, 256, 3, 1, 1).
15 | 
16 | # Gradient Clip
17 | - In the TensorFlow implementation, it contains many lines to clip gradients in the file [model/model.py](https://github.com/qianguih/voxelnet/blob/b74823daa328fc2fa99452bf79793e1f3c32c72a/model/model.py#L98).
18 | In PyTorch implementation [train.py](../train.py), only use a single function `'clip_grad_norm_'`.
19 | 
20 | # Non-maximum Suppression
21 | - In the TensorFlow implementation, it uses Tensorflow API `'tf.image.non_max_suppression'` to conduct non-maximum suppression [model/model.py](https://github.com/qianguih/voxelnet/blob/b74823daa328fc2fa99452bf79793e1f3c32c72a/model/model.py#L136).
22 | PyTorch does not provide such API.
23 | - In PyTorch implementation, an third-party open source [NMS](https://gist.github.com/mkocabas/a2f565b27331af0da740c11c78699185) code is adpoted. A potential problem is the number of candidate boxes is zero.
24 | - An alternative option is a C language based code which can be found [here](https://github.com/multimodallearning/pytorch-mask-rcnn/tree/master/nms). It needs compiling but may be more faster.
25 | 


--------------------------------------------------------------------------------
/model/group_pointcloud.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | from config import cfg
 5 | 
 6 | import pdb
 7 | 
 8 | 
 9 | class VFELayer(nn.Module):
10 |     def __init__(self, in_channels, out_channels):
11 |         super(VFELayer, self).__init__()
12 | 
13 |         self.in_channels = in_channels
14 |         self.out_channels = out_channels
15 |         self.units = int(out_channels / 2)
16 | 
17 |         self.dense = nn.Sequential(nn.Linear(self.in_channels, self.units), nn.ReLU())
18 |         self.batch_norm = nn.BatchNorm1d(self.units)
19 | 
20 | 
21 |     def forward(self, inputs, mask):
22 |         # [ΣK, T, in_ch] -> [ΣK, T, units] -> [ΣK, units, T]
23 |         tmp = self.dense(inputs).transpose(1, 2)
24 |         # [ΣK, units, T] -> [ΣK, T, units]
25 |         pointwise = self.batch_norm(tmp).transpose(1, 2)
26 | 
27 |         # [ΣK, 1, units]
28 |         aggregated, _ = torch.max(pointwise, dim = 1, keepdim = True)
29 | 
30 |         # [ΣK, T, units]
31 |         repeated = aggregated.expand(-1, cfg.VOXEL_POINT_COUNT, -1)
32 | 
33 |         # [ΣK, T, 2 * units]
34 |         concatenated = torch.cat([pointwise, repeated], dim = 2)
35 | 
36 |         # [ΣK, T, 1] -> [ΣK, T, 2 * units]
37 |         mask = mask.expand(-1, -1, 2 * self.units)
38 | 
39 |         concatenated = concatenated * mask.float()
40 | 
41 |         return concatenated
42 | 
43 | 
44 | class FeatureNet(nn.Module):
45 |     def __init__(self):
46 |         super(FeatureNet, self).__init__()
47 | 
48 |         self.vfe1 = VFELayer(7, 32)
49 |         self.vfe2 = VFELayer(32, 128)
50 | 
51 | 
52 |     def forward(self, feature, number, coordinate):
53 | 
54 |         batch_size = len(feature)
55 | 
56 |         feature = torch.cat(feature, dim = 0)   # [ΣK, cfg.VOXEL_POINT_COUNT, 7]; cfg.VOXEL_POINT_COUNT = 35/45
57 |         coordinate = torch.cat(coordinate, dim = 0)     # [ΣK, 4]; each row stores (batch, d, h, w)
58 | 
59 |         vmax, _ = torch.max(feature, dim = 2, keepdim = True)
60 |         mask = (vmax != 0)  # [ΣK, T, 1]
61 | 
62 |         x = self.vfe1(feature, mask)
63 |         x = self.vfe2(x, mask)
64 | 
65 |         # [ΣK, 128]
66 |         voxelwise, _ = torch.max(x, dim = 1)
67 | 
68 |         # Car: [B, 10, 400, 352, 128]; Pedestrain/Cyclist: [B, 10, 200, 240, 128]
69 |         outputs = torch.sparse.FloatTensor(coordinate.t(), voxelwise, torch.Size([batch_size, cfg.INPUT_DEPTH, cfg.INPUT_HEIGHT, cfg.INPUT_WIDTH, 128]))
70 | 
71 |         outputs = outputs.to_dense()
72 | 
73 |         return outputs


--------------------------------------------------------------------------------
/preproc/split.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from shutil import copyfile
 3 | 
 4 | import pdb
 5 | 
 6 | 
 7 | def rearrange(original_folders, target_folders, split_file):
 8 |     for _, v in original_folders.items():
 9 |         if not os.path.isdir(v):
10 |             raise Exception('No such folder: %s' % v)
11 | 
12 |     for _, v in target_folders.items():
13 |         if not os.path.isdir(v):
14 |             os.makedirs(v)
15 | 
16 |     if not os.path.exists(split_file):
17 |         raise Exception('No such split file: %s' % split_file)
18 |     else:
19 |         with open(split_file) as f:
20 |             content = f.readlines()
21 |         content = [x.strip() for x in content]
22 | 
23 |         for file in content:
24 |             src_img = os.path.join(original_folders['IMG_ROOT'], file + '.png')
25 |             src_pc = os.path.join(original_folders['PC_ROOT'], file + '.bin')
26 |             src_label = os.path.join(original_folders['LABEL_ROOT'], file + '.txt')
27 | 
28 |             if (not os.path.exists(src_img)) or (not os.path.exists(src_pc)) or (not os.path.exists(src_label)):
29 |                  print('No such file: %s' % file)
30 |             else:
31 |                 dst_img = os.path.join(target_folders['IMG_ROOT'], file + '.png')
32 |                 dst_pc = os.path.join(target_folders['PC_ROOT'], file + '.bin')
33 |                 dst_label = os.path.join(target_folders['LABEL_ROOT'], file + '.txt')
34 | 
35 |                 copyfile(src_img, dst_img)
36 |                 copyfile(src_pc, dst_pc)
37 |                 copyfile(src_label, dst_label)
38 | 
39 | 
40 | if __name__ == '__main__':
41 |     # Original folder
42 |     original_folders = dict()
43 |     original_folders['IMG_ROOT'] = './data/KITTI/image/training/image_2/'
44 |     original_folders['PC_ROOT'] = './data/KITTI/point_cloud/training/velodyne/'
45 |     original_folders['LABEL_ROOT'] = './data/KITTI/label/training/label_2/'
46 | 
47 |     # Modified folder
48 |     train_folders = dict()
49 |     train_folders['IMG_ROOT'] = './data/MD_KITTI/training/image_2/'
50 |     train_folders['PC_ROOT'] = './data/MD_KITTI/training/velodyne/'
51 |     train_folders['LABEL_ROOT'] = './data/MD_KITTI/training/label_2/'
52 | 
53 |     val_folders = dict()
54 |     val_folders['IMG_ROOT'] = './data/MD_KITTI/validation/image_2/'
55 |     val_folders['PC_ROOT'] = './data/MD_KITTI/validation/velodyne/'
56 |     val_folders['LABEL_ROOT'] = './data/MD_KITTI/validation/label_2/'
57 | 
58 |     # Split file
59 |     train_split_file = './data/KITTI/imagesets/ImageSets/train.txt'
60 |     val_split_file = './data/KITTI/imagesets/ImageSets/val.txt'
61 | 
62 |     rearrange(original_folders, train_folders, train_split_file)
63 |     rearrange(original_folders, val_folders, val_split_file)


--------------------------------------------------------------------------------
/utils/nms.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | import pdb
 4 | 
 5 | # Original author: Francisco Massa:
 6 | # https://github.com/fmassa/object-detection.torch
 7 | # Ported to PyTorch by Max deGroot (02/01/2017)
 8 | 
 9 | def nms(boxes, scores, overlap = 0.5, top_k = 200):
10 |     """Apply non-maximum suppression at test time to avoid detecting too many
11 |     overlapping bounding boxes for a given object.
12 |     Args:
13 |         boxes: (tensor) The location preds for the img, Shape: [num_priors,4].
14 |         scores: (tensor) The class predscores for the img, Shape:[num_priors].
15 |         overlap: (float) The overlap thresh for suppressing unnecessary boxes.
16 |         top_k: (int) The Maximum number of box preds to consider.
17 |     Return:
18 |         The indices of the kept boxes with respect to num_priors.
19 |     """
20 | 
21 |     keep = scores.new(scores.size(0)).zero_().long()
22 |     count = 0
23 |     if boxes.numel() == 0:
24 |         return keep, count
25 |     x1 = boxes[:, 0]
26 |     y1 = boxes[:, 1]
27 |     x2 = boxes[:, 2]
28 |     y2 = boxes[:, 3]
29 |     area = torch.mul(x2 - x1, y2 - y1)
30 |     v, idx = scores.sort(0)  # sort in ascending order
31 |     # I = I[v >= 0.01]
32 |     idx = idx[-top_k:]  # indices of the top-k largest vals
33 |     xx1 = boxes.new()
34 |     yy1 = boxes.new()
35 |     xx2 = boxes.new()
36 |     yy2 = boxes.new()
37 |     w = boxes.new()
38 |     h = boxes.new()
39 | 
40 |     # keep = torch.Tensor()
41 |     while idx.numel() > 0:
42 |         i = idx[-1]  # index of current largest val
43 |         # keep.append(i)
44 |         keep[count] = i
45 |         count += 1
46 |         if idx.size(0) == 1:
47 |             break
48 |         idx = idx[:-1]  # remove kept element from view
49 |         # load bboxes of next highest vals
50 |         torch.index_select(x1, 0, idx, out = xx1)
51 |         torch.index_select(y1, 0, idx, out = yy1)
52 |         torch.index_select(x2, 0, idx, out = xx2)
53 |         torch.index_select(y2, 0, idx, out = yy2)
54 |         # store element-wise max with next highest score
55 |         xx1 = torch.clamp(xx1, min = x1[i])
56 |         yy1 = torch.clamp(yy1, min = y1[i])
57 |         xx2 = torch.clamp(xx2, max = x2[i])
58 |         yy2 = torch.clamp(yy2, max = y2[i])
59 |         w.resize_as_(xx2)
60 |         h.resize_as_(yy2)
61 |         w = xx2 - xx1
62 |         h = yy2 - yy1
63 |         # check sizes of xx1 and xx2.. after each iteration
64 |         w = torch.clamp(w, min = 0.0)
65 |         h = torch.clamp(h, min = 0.0)
66 |         inter = w * h
67 |         # IoU = i / (area(a) + area(b) - i)
68 |         rem_areas = torch.index_select(area, 0, idx)  # load remaining areas)
69 |         union = (rem_areas - inter) + area[i]
70 |         IoU = inter / union  # store result in iou
71 |         # keep only elements with an IoU <= overlap
72 |         idx = idx[IoU.le(overlap)]
73 | 
74 |     return keep, count


--------------------------------------------------------------------------------
/utils/preprocess.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding:UTF-8 -*-
 3 | 
 4 | # File Name : preprocess.py
 5 | # Purpose :
 6 | # Creation Date : 10-12-2017
 7 | # Last Modified : Thu 18 Jan 2018 05:34:42 PM CST
 8 | # Created By : Jeasine Ma [jeasinema[at]gmail[dot]com]
 9 | 
10 | import numpy as np
11 | 
12 | from config import cfg
13 | 
14 | data_dir = 'velodyne'
15 | 
16 | def process_pointcloud(point_cloud, cls = cfg.DETECT_OBJ):
17 |     # Input:
18 |     #   (N, 4)
19 |     # Output:
20 |     #   voxel_dict
21 |     if cls == 'Car':
22 |         scene_size = np.array([4, 80, 70.4], dtype = np.float32)
23 |         voxel_size = np.array([0.4, 0.2, 0.2], dtype = np.float32)
24 |         grid_size = np.array([10, 400, 352], dtype = np.int64)
25 |         lidar_coord = np.array([0, 40, 3], dtype = np.float32)
26 |         max_point_number = 35
27 |     else:
28 |         scene_size = np.array([4, 40, 48], dtype = np.float32)
29 |         voxel_size = np.array([0.4, 0.2, 0.2], dtype = np.float32)
30 |         grid_size = np.array([10, 200, 240], dtype = np.int64)
31 |         lidar_coord = np.array([0, 20, 3], dtype = np.float32)
32 |         max_point_number = 45
33 | 
34 |         np.random.shuffle(point_cloud)
35 | 
36 |     shifted_coord = point_cloud[:, :3] + lidar_coord
37 |     # reverse the point cloud coordinate (X, Y, Z) -> (Z, Y, X)
38 |     voxel_index = np.floor(
39 |         shifted_coord[:, ::-1] / voxel_size).astype(np.int)
40 | 
41 |     bound_x = np.logical_and(
42 |         voxel_index[:, 2] >= 0, voxel_index[:, 2] < grid_size[2])
43 |     bound_y = np.logical_and(
44 |         voxel_index[:, 1] >= 0, voxel_index[:, 1] < grid_size[1])
45 |     bound_z = np.logical_and(
46 |         voxel_index[:, 0] >= 0, voxel_index[:, 0] < grid_size[0])
47 | 
48 |     bound_box = np.logical_and(np.logical_and(bound_x, bound_y), bound_z)
49 | 
50 |     point_cloud = point_cloud[bound_box]
51 |     voxel_index = voxel_index[bound_box]
52 | 
53 |     # [K, 3] coordinate buffer as described in the paper
54 |     coordinate_buffer = np.unique(voxel_index, axis = 0)
55 | 
56 |     K = len(coordinate_buffer)
57 |     T = max_point_number
58 | 
59 |     # [K, 1] store number of points in each voxel grid
60 |     number_buffer = np.zeros(shape = (K), dtype = np.int64)
61 | 
62 |     # [K, T, 7] feature buffer as described in the paper
63 |     feature_buffer = np.zeros(shape = (K, T, 7), dtype = np.float32)
64 | 
65 |     # build a reverse index for coordinate buffer
66 |     index_buffer = {}
67 |     for i in range(K):
68 |         index_buffer[tuple(coordinate_buffer[i])] = i
69 | 
70 |     for voxel, point in zip(voxel_index, point_cloud):
71 |         index = index_buffer[tuple(voxel)]
72 |         number = number_buffer[index]
73 |         if number < T:
74 |             feature_buffer[index, number, :4] = point
75 |             number_buffer[index] += 1
76 | 
77 |     feature_buffer[:, :, -3:] = feature_buffer[:, :, :3] - \
78 |         feature_buffer[:, :, :3].sum(axis = 1, keepdims = True)/number_buffer.reshape(K, 1, 1)
79 | 
80 |     voxel_dict = {'feature_buffer': feature_buffer,
81 |                   'coordinate_buffer': coordinate_buffer,
82 |                   'number_buffer': number_buffer}
83 | 
84 |     return voxel_dict
85 | 
86 | 


--------------------------------------------------------------------------------
/utils/colorize.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding:UTF-8 -*-
  3 | 
  4 | # File Name : colorize.py
  5 | # Purpose :
  6 | # Creation Date : 21-12-2017
  7 | # Last Modified : Thu 21 Dec 2017 09:02:22 PM CST
  8 | # Created By : Jeasine Ma [jeasinema[at]gmail[dot]com]
  9 | 
 10 | # ref: https://gist.github.com/jimfleming/c1adfdb0f526465c99409cc143dea97b
 11 | 
 12 | import cv2
 13 | cv2.setNumThreads(0)
 14 | 
 15 | import numpy as np
 16 | 
 17 | 
 18 | def colorize(value, factor = 1, vmin = None, vmax = None):
 19 |     """
 20 |     A utility function for TensorFlow that maps a grayscale image to a matplotlib
 21 |     colormap for use with TensorBoard image summaries.
 22 | 
 23 |     By default it will normalize the input value to the range 0..1 before mapping
 24 |     to a grayscale colormap.
 25 | 
 26 |     Arguments:
 27 |       - value: 2D Tensor of shape [height, width] or 3D Tensor of shape
 28 |         [height, width, 1].
 29 |       - factor: resize factor, scalar
 30 |       - vmin: the minimum value of the range used for normalization.
 31 |         (Default: value minimum)
 32 |       - vmax: the maximum value of the range used for normalization.
 33 |         (Default: value maximum)
 34 | 
 35 |     Example usage:
 36 | 
 37 |     ```
 38 |     output = tf.random_uniform(shape=[256, 256, 1])
 39 |     output_color = colorize(output, vmin=0.0, vmax=1.0, cmap='viridis')
 40 |     tf.summary.image('output', output_color)
 41 |     ```
 42 | 
 43 |     Returns a 3D tensor of shape [height, width, 3].
 44 |     """
 45 | 
 46 |     # normalize
 47 |     value = np.sum(value, axis = -1)
 48 |     vmin = np.min(value) if vmin is None else vmin
 49 |     vmax = np.max(value) if vmax is None else vmax
 50 |     value = (value - vmin) / (vmax - vmin)  # vmin..vmax
 51 | 
 52 |     value = (value * 255).astype(np.uint8)
 53 |     value = cv2.applyColorMap(value, cv2.COLORMAP_JET)
 54 |     value = cv2.cvtColor(value, cv2.COLOR_BGR2RGB)
 55 |     x, y, _ = value.shape
 56 |     value = cv2.resize(value, (y * factor, x * factor))
 57 | 
 58 |     return value
 59 | 
 60 | 
 61 | # def tf_colorize(value, factor = 1, vmin = None, vmax = None, cmap = None):
 62 | #     """
 63 | #     A utility function for TensorFlow that maps a grayscale image to a matplotlib
 64 | #     colormap for use with TensorBoard image summaries.
 65 | #
 66 | #     By default it will normalize the input value to the range 0..1 before mapping
 67 | #     to a grayscale colormap.
 68 | #
 69 | #     Arguments:
 70 | #       - value: 2D Tensor of shape [height, width] or 3D Tensor of shape
 71 | #         [height, width, 1].
 72 | #       - factor: resize factor, scalar
 73 | #       - vmin: the minimum value of the range used for normalization.
 74 | #         (Default: value minimum)
 75 | #       - vmax: the maximum value of the range used for normalization.
 76 | #         (Default: value maximum)
 77 | #       - cmap: a valid cmap named for use with matplotlib's `get_cmap`.
 78 | #         (Default: 'gray')
 79 | #
 80 | #     Example usage:
 81 | #
 82 | #     ```
 83 | #     output = tf.random_uniform(shape=[256, 256, 1])
 84 | #     output_color = colorize(output, vmin=0.0, vmax=1.0, cmap='viridis')
 85 | #     tf.summary.image('output', output_color)
 86 | #     ```
 87 | #
 88 | #     Returns a 3D tensor of shape [height, width, 3].
 89 | #     """
 90 | #
 91 | #     # normalize
 92 | #     vmin = tf.reduce_min(value) if vmin is None else vmin
 93 | #     vmax = tf.reduce_max(value) if vmax is None else vmax
 94 | #     value = (value - vmin) / (vmax - vmin)  # vmin..vmax
 95 | #
 96 | #     # squeeze last dim if it exists
 97 | #     value = tf.squeeze(value)
 98 | #
 99 | #     # quantize
100 | #     indices = tf.to_int32(tf.round(value * 255))
101 | #
102 | #     # gather
103 | #     cm = matplotlib.cm.get_cmap(cmap if cmap is not None else 'gray')
104 | #     colors = tf.constant(cm.colors, dtype = tf.float32)
105 | #     value = tf.gather(colors, indices)
106 | #
107 | #     return value
108 | 


--------------------------------------------------------------------------------
/parse.py:
--------------------------------------------------------------------------------
 1 | import matplotlib
 2 | matplotlib.use('Agg')
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | import os
 6 | import sys
 7 | from collections import OrderedDict
 8 | 
 9 | 
10 | linestyles = OrderedDict(
11 |                          [('solid',               (0, ())),
12 |                           ('loosely dotted',      (0, (1, 10))),
13 |                           ('dotted',              (0, (1, 5))),
14 |                           ('densely dotted',      (0, (1, 1))),
15 |                           ('loosely dashed',      (0, (5, 10))),
16 |                           ('dashed',              (0, (5, 5))),
17 |                           ('densely dashed',      (0, (5, 1))),
18 |                           ('loosely dashdotted',  (0, (3, 10, 1, 10))),
19 |                           ('dashdotted',          (0, (3, 5, 1, 5))),
20 |                           ('densely dashdotted',  (0, (3, 1, 1, 1))),
21 |                           ('loosely dashdotdotted', (0, (3, 10, 1, 10, 1, 10))),
22 |                           ('dashdotdotted',         (0, (3, 5, 1, 5, 1, 5))),
23 |                           ('densely dashdotdotted', (0, (3, 1, 1, 1, 1, 1)))])
24 | 
25 | # These are the "Tableau 20" colors as RGB.
26 | tableau20 = [(31, 119, 180), (174, 199, 232), (255, 127, 14), (255, 187, 120),
27 |              (44, 160, 44), (152, 223, 138), (214, 39, 40), (255, 152, 150),
28 |              (148, 103, 189), (197, 176, 213), (140, 86, 75), (196, 156, 148),
29 |              (227, 119, 194), (247, 182, 210), (127, 127, 127), (199, 199, 199),
30 |              (188, 189, 34), (219, 219, 141), (23, 190, 207), (158, 218, 229)]
31 | 
32 | # Scale the RGB values to the [0, 1] range, which is the format matplotlib accepts.
33 | for i in range(len(tableau20)):
34 |     r, g, b = tableau20[i]
35 |     tableau20[i] = (r / 255., g / 255., b / 255.)
36 | 
37 | 
38 | 
39 | ROOT_DIR = sys.argv[1]
40 | 
41 | det_3d = [[] for _ in range(3)]
42 | det_bv = [[] for _ in range(3)]
43 | 
44 | for epoch in range(9, 200, 10):
45 |     log_file = os.path.join(ROOT_DIR, str(epoch), 'log')
46 |     if not os.path.exists(log_file):
47 |         break
48 |     else:
49 |         lines = open(log_file).readlines()
50 |         for line in lines:
51 |             line = line.split()
52 |             if line[0] == 'car_detection_ground':
53 |                 det_bv[0].append( float(line[-3] ))
54 |                 det_bv[1].append( float(line[-2] ))
55 |                 det_bv[2].append( float(line[-1] ))
56 |             elif line[0] == 'car_detection_3d':
57 |                 det_3d[0].append( float(line[-3]))
58 |                 det_3d[1].append( float(line[-2]))
59 |                 det_3d[2].append( float(line[-1]))
60 | 
61 | RANGE = range(len(det_bv[0]))
62 | 
63 | plt.figure(figsize = (10, 7))
64 | 
65 | plt.plot(RANGE, det_3d[0], linestyle = linestyles['solid'], linewidth = 1.5, color = tableau20[0])
66 | plt.plot(RANGE, det_3d[1], linestyle = linestyles['solid'], linewidth = 1.5, color = tableau20[2])
67 | plt.plot(RANGE, det_3d[2], linestyle = linestyles['solid'], linewidth = 1.5, color = tableau20[4])
68 | plt.plot(RANGE, det_bv[0], linestyle = linestyles['densely dotted'], linewidth = 1.5, color = tableau20[0])
69 | plt.plot(RANGE, det_bv[1], linestyle = linestyles['densely dotted'], linewidth = 1.5, color = tableau20[2])
70 | plt.plot(RANGE, det_bv[2], linestyle = linestyles['densely dotted'], linewidth = 1.5, color = tableau20[4])
71 | 
72 | 
73 | plt.legend(['3d easy', '3d moderate', '3d hard', 'bird view easy', 'bird view moderate', 'bird view hard'], loc = 4)
74 | 
75 | plt.xlabel('Epoch',  fontsize = 16)
76 | plt.xticks(RANGE, range(9, len(RANGE) * 10, 10))
77 | plt.xticks(fontsize = 14)
78 | 
79 | plt.ylabel('AP', fontsize = 16)
80 | plt.ylim(35, 95)
81 | plt.yticks(range(35, 95, 5))
82 | plt.yticks(fontsize = 14)
83 | 
84 | plt.grid(linestyle = linestyles['dotted'])
85 | 
86 | DIR_NAME = ROOT_DIR.split('/')[-1]
87 | 
88 | OUTPUT_NAME = DIR_NAME + '.jpg'
89 | plt.savefig(OUTPUT_NAME)
90 | 
91 | print('results parsed and saved in: ' + OUTPUT_NAME)
92 | 
93 | 
94 | 
95 | 
96 | 
97 | 


--------------------------------------------------------------------------------
/preproc/crop.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from scipy.misc import imread
  3 | 
  4 | import pdb
  5 | 
  6 | CAM = 2
  7 | 
  8 | def load_velodyne_points(filename):
  9 |     # Load data points
 10 |     points = np.fromfile(filename, dtype = np.float32).reshape(-1, 4)   # (N, 4); N is around 10k, 4 means [x, y, z, r]
 11 | 
 12 |     return points
 13 | 
 14 | 
 15 | def load_calib(calib_dir):
 16 |     # P2 * R0_rect * Tr_velo_to_cam * y
 17 |     lines = open(calib_dir).readlines()
 18 |     lines = [line.split()[1:] for line in lines][:-1]
 19 | 
 20 |     P = np.array(lines[CAM]).reshape(3, 4)
 21 | 
 22 |     Tr_velo_to_cam = np.array(lines[5]).reshape(3, 4)
 23 |     Tr_velo_to_cam = np.concatenate([Tr_velo_to_cam, np.array([0, 0, 0, 1]).reshape(1, 4)], 0)
 24 | 
 25 |     R_cam_to_rect = np.eye(4)
 26 |     R_cam_to_rect[:3, :3] = np.array(lines[4][:9]).reshape(3, 3)
 27 | 
 28 |     P = P.astype('float32')
 29 |     Tr_velo_to_cam = Tr_velo_to_cam.astype('float32')
 30 |     R_cam_to_rect = R_cam_to_rect.astype('float32')
 31 | 
 32 |     return P, Tr_velo_to_cam, R_cam_to_rect
 33 | 
 34 | 
 35 | def prepare_velo_points(pts3d_raw):
 36 |     # Replaces the reflectance value by 1, and tranposes the array,
 37 |     # so points can be directly multiplied by the camera projection matrix
 38 |     pts3d = pts3d_raw
 39 | 
 40 |     # Reflectance > 0
 41 |     indices = pts3d[:, 3] > 0
 42 |     pts3d = pts3d[indices ,:]
 43 |     pts3d[:, 3] = 1
 44 | 
 45 |     return pts3d.transpose(), indices
 46 | 
 47 | 
 48 | def project_velo_points_in_img(pts3d, T_cam_velo, Rrect, Prect):
 49 |     # Project 3D points into 2D image. Expects pts3d as a 4xN numpy array.
 50 |     # Returns the 2D projection of the points that are in front of the camera only an the corresponding 3D points.
 51 | 
 52 |     # 3D points in camera reference frame.
 53 |     pts3d_cam = Rrect.dot(T_cam_velo.dot(pts3d))
 54 | 
 55 |     # Before projecting, keep only points with z>0 (points that are in front of the camera).
 56 |     idx = (pts3d_cam[2, :] >= 0)
 57 |     pts2d_cam = Prect.dot(pts3d_cam[:, idx])
 58 | 
 59 |     return pts3d[:, idx], pts2d_cam / pts2d_cam[2, :], idx
 60 | 
 61 | 
 62 | def align_img_and_pc(img_dir, pc_dir, calib_dir):
 63 |     
 64 |     img = imread(img_dir)
 65 |     pts = load_velodyne_points(pc_dir)
 66 |     P, Tr_velo_to_cam, R_cam_to_rect = load_calib(calib_dir)
 67 | 
 68 |     pts3d, indices = prepare_velo_points(pts)
 69 | 
 70 |     reflectances = pts[indices, 3]
 71 |     pts3d, pts2d_normed, idx = project_velo_points_in_img(pts3d, Tr_velo_to_cam, R_cam_to_rect, P)
 72 | 
 73 |     # Print reflectances.shape, idx.shape
 74 |     reflectances = reflectances[idx]
 75 | 
 76 |     # Print reflectances.shape, pts3d.shape, pts2d_normed.shape
 77 |     assert reflectances.shape[0] == pts3d.shape[1] == pts2d_normed.shape[1]
 78 | 
 79 |     rows, cols = img.shape[:2]
 80 | 
 81 |     points = []
 82 |     for i in range(pts2d_normed.shape[1]):
 83 |         c = int(np.round(pts2d_normed[0,i]))
 84 |         r = int(np.round(pts2d_normed[1,i]))
 85 |         if c < cols and r < rows and r > 0 and c > 0:
 86 |             color = img[r, c, :]
 87 |             point = [pts3d[0, i], pts3d[1, i], pts3d[2, i], reflectances[i], color[0], color[1], color[2], pts2d_normed[0, i], pts2d_normed[1, i]]
 88 |             points.append(point)
 89 | 
 90 |     points = np.array(points)
 91 | 
 92 |     return points
 93 | 
 94 | 
 95 | if __name__ == '__main__':
 96 | 
 97 |     # Update the following directories
 98 |     IMG_ROOT = './data/KITTI/image/training/image_2/'
 99 |     PC_ROOT = './data/KITTI/point_cloud/training/velodyne/'
100 |     CALIB_ROOT = './data/KITTI/calib/training/calib/'
101 | 
102 |     for frame in range(0, 7481):
103 |         img_dir = IMG_ROOT + '%06d.png' % frame
104 |         pc_dir = PC_ROOT + '%06d.bin' % frame
105 |         calib_dir = CALIB_ROOT + '%06d.txt' % frame
106 | 
107 |         points = align_img_and_pc(img_dir, pc_dir, calib_dir)
108 | 
109 |         output_name = PC_ROOT + str(frame) + '.bin'
110 |         points[:,:4].astype('float32').tofile(output_name)
111 |         print(output_name)
112 | 
113 | 
114 | 
115 | 
116 | 
117 | 


--------------------------------------------------------------------------------
/loader/kitti.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import glob
  3 | import numpy as np
  4 | 
  5 | import cv2
  6 | cv2.setNumThreads(0)
  7 | 
  8 | import torch
  9 | import torch.utils.data as data_utl
 10 | 
 11 | from config import cfg
 12 | from utils.data_aug import aug_data
 13 | from utils.preprocess import process_pointcloud
 14 | 
 15 | import pdb
 16 | 
 17 | 
 18 | class Processor:
 19 |     def __init__(self, data_tag, f_rgb, f_lidar, f_label, data_dir, aug, is_testset):
 20 |         self.data_tag = data_tag
 21 |         self.f_rgb = f_rgb
 22 |         self.f_lidar = f_lidar
 23 |         self.f_label = f_label
 24 |         self.data_dir = data_dir
 25 |         self.aug = aug
 26 |         self.is_testset = is_testset
 27 | 
 28 | 
 29 |     def __call__(self, load_index):
 30 |         if self.aug:
 31 |             ret = aug_data(self.data_tag[load_index], self.data_dir)
 32 |         else:
 33 |             rgb = cv2.resize(cv2.imread(self.f_rgb[load_index]), (cfg.IMAGE_WIDTH, cfg.IMAGE_HEIGHT))
 34 |             raw_lidar = np.fromfile(self.f_lidar[load_index], dtype = np.float32).reshape((-1, 4))
 35 |             if not self.is_testset:
 36 |                 labels = [line for line in open(self.f_label[load_index], 'r').readlines()]
 37 |             else:
 38 |                 labels = ['']
 39 |             tag = self.data_tag[load_index]
 40 |             voxel = process_pointcloud(raw_lidar)
 41 |             ret = [tag, rgb, raw_lidar, voxel, labels]
 42 | 
 43 |         return ret
 44 | 
 45 | 
 46 | class KITTI(data_utl.Dataset):
 47 |     def __init__(self, data_dir, shuffle = False, aug = False, is_testset = False):
 48 |         self.data_dir = data_dir
 49 |         self.shuffle = shuffle
 50 |         self.aug = aug
 51 |         self.is_testset = is_testset
 52 | 
 53 |         self.f_rgb = glob.glob(os.path.join(self.data_dir, 'image_2', '*.png'))
 54 |         self.f_lidar = glob.glob(os.path.join(self.data_dir, 'velodyne', '*.bin'))
 55 |         self.f_label = glob.glob(os.path.join(self.data_dir, 'label_2', '*.txt'))
 56 | 
 57 |         self.f_rgb.sort()
 58 |         self.f_lidar.sort()
 59 |         self.f_label.sort()
 60 | 
 61 |         self.data_tag = [name.split('/')[-1].split('.')[-2] for name in self.f_rgb]
 62 | 
 63 |         assert len(self.data_tag) != 0, 'Dataset folder is not correct!'
 64 |         assert len(self.data_tag) == len(self.f_rgb) == len(self.f_lidar), 'Dataset folder is not correct!'
 65 | 
 66 |         nums = len(self.f_rgb)
 67 |         self.indices = list(range(nums))
 68 |         if self.shuffle:
 69 |             np.random.shuffle(self.indices)
 70 | 
 71 |         # Build a data processor
 72 |         self.proc = Processor(self.data_tag, self.f_rgb, self.f_lidar, self.f_label, self.data_dir, self.aug, self.is_testset)
 73 | 
 74 | 
 75 |     def __getitem__(self, index):
 76 |         # A list of [tag, rgb, raw_lidar, voxel, labels]
 77 |         ret = self.proc(self.indices[index])
 78 | 
 79 |         return ret
 80 | 
 81 | 
 82 |     def __len__(self):
 83 |         return len(self.indices)
 84 | 
 85 | 
 86 | def collate_fn(rets):
 87 |     tag = [ret[0] for ret in rets]
 88 |     rgb = [ret[1] for ret in rets]
 89 |     raw_lidar = [ret[2] for ret in rets]
 90 |     voxel = [ret[3] for ret in rets]
 91 |     labels = [ret[4] for ret in rets]
 92 | 
 93 |     # Only for voxel
 94 |     _, vox_feature, vox_number, vox_coordinate = build_input(voxel)
 95 | 
 96 |     res = (
 97 |         np.array(tag),
 98 |         np.array(labels),
 99 |         [torch.from_numpy(x) for x in vox_feature],
100 |         np.array(vox_number),
101 |         [torch.from_numpy(x) for x in vox_coordinate],
102 |         np.array(rgb),
103 |         np.array(raw_lidar)
104 |     )
105 | 
106 |     return res
107 | 
108 | 
109 | def build_input(voxel_dict_list):
110 |     batch_size = len(voxel_dict_list)
111 | 
112 |     feature_list = []
113 |     number_list = []
114 |     coordinate_list = []
115 |     for i, voxel_dict in zip(range(batch_size), voxel_dict_list):
116 |         feature_list.append(voxel_dict['feature_buffer'])   # (K, T, 7); K is max number of non-empty voxels; T = 35
117 |         number_list.append(voxel_dict['number_buffer'])     # (K,)
118 |         coordinate = voxel_dict['coordinate_buffer']        # (K, 3)
119 |         coordinate_list.append(np.pad(coordinate, ((0, 0), (1, 0)), mode = 'constant', constant_values = i))
120 | 
121 |     # feature = np.concatenate(feature_list)
122 |     # number = np.concatenate(number_list)
123 |     # coordinate = np.concatenate(coordinate_list)
124 |     #
125 |     # return batch_size, feature, number, coordinate
126 | 
127 |     return batch_size, feature_list, number_list, coordinate_list


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import argparse
  3 | 
  4 | import cv2
  5 | cv2.setNumThreads(0)
  6 | 
  7 | import torch
  8 | import torch.nn as nn
  9 | from torch.utils.data import DataLoader as DataLoader
 10 | from loader.kitti import collate_fn
 11 | 
 12 | from config import cfg
 13 | from utils.utils import box3d_to_label
 14 | from model.model import RPN3D
 15 | from loader.kitti import KITTI as Dataset
 16 | 
 17 | 
 18 | parser = argparse.ArgumentParser(description = 'training')
 19 | 
 20 | parser.add_argument('--tag', type = str, default = 'default', help = 'log tag')
 21 | parser.add_argument('--output_path', type = str, default = './preds', help = 'results output dir')
 22 | parser.add_argument('--vis', type = bool, default = True, help = 'set to True if dumping visualization')
 23 | 
 24 | parser.add_argument('--batch_size', type = int, default = 2, help = 'batch size')
 25 | 
 26 | parser.add_argument('--resumed_model', type = str, default = '', help = 'if specified, load the specified model')
 27 | 
 28 | 
 29 | args = parser.parse_args()
 30 | 
 31 | 
 32 | def run():
 33 |     os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(str(x) for x in cfg.GPU_AVAILABLE)
 34 | 
 35 |     # Build data loader
 36 |     val_dataset = Dataset(os.path.join(cfg.DATA_DIR, 'validation'), shuffle = False, aug = False, is_testset = False)
 37 |     val_dataloader = DataLoader(val_dataset, batch_size = args.batch_size, shuffle = False, collate_fn = collate_fn,
 38 |                                 num_workers = args.workers, pin_memory = False)
 39 | 
 40 |     # Build model
 41 |     model = RPN3D(cfg.DETECT_OBJ)
 42 | 
 43 |     # Resume model
 44 |     if args.resumed_model:
 45 |         model_file = os.path.join(save_model_dir, args.resumed_model)
 46 |         if os.path.isfile(model_file):
 47 |             checkpoint = torch.load(model_file)
 48 |             model.load_state_dict(checkpoint['state_dict'])
 49 |             print(("=> Loaded checkpoint '{}' (epoch {}, global_counter {})".format(
 50 |                 args.resumed_model, checkpoint['epoch'], checkpoint['global_counter'])))
 51 |         else:
 52 |             print(("=> No checkpoint found at '{}'".format(args.resumed_model)))
 53 |     else:
 54 |         raise Exception('No pre-trained model to test!')
 55 | 
 56 |     model = nn.DataParallel(model).cuda()
 57 | 
 58 |     model.train(False)  # Validation mode
 59 | 
 60 |     with torch.no_grad():
 61 |         for (i, val_data) in enumerate(val_dataloader):
 62 | 
 63 |             # Forward pass for validation and prediction
 64 |             probs, deltas, val_loss, val_cls_loss, val_reg_loss, cls_pos_loss_rec, cls_neg_loss_rec = model(val_data)
 65 | 
 66 |             front_images, bird_views, heatmaps = None, None, None
 67 |             if args.vis:
 68 |                 tags, ret_box3d_scores, front_images, bird_views, heatmaps = \
 69 |                     model.module.predict(val_data, probs, deltas, summary = False, vis = True)
 70 |             else:
 71 |                 tags, ret_box3d_scores = model.module.predict(val_data, probs, deltas, summary = False, vis = False)
 72 | 
 73 |             # tags: (N)
 74 |             # ret_box3d_scores: (N, N'); (class, x, y, z, h, w, l, rz, score)
 75 |             for tag, score in zip(tags, ret_box3d_scores):
 76 |                 output_path = os.path.join(args.output_path, 'data', tag + '.txt')
 77 |                 with open(output_path, 'w+') as f:
 78 |                     labels = box3d_to_label([score[:, 1:8]], [score[:, 0]], [score[:, -1]], coordinate = 'lidar')[0]
 79 |                     for line in labels:
 80 |                         f.write(line)
 81 |                     print('Write out {} objects to {}'.format(len(labels), tag))
 82 | 
 83 |             # Dump visualizations
 84 |             if args.vis:
 85 |                 for tag, front_image, bird_view, heatmap in zip(tags, front_images, bird_views, heatmaps):
 86 |                     front_img_path = os.path.join(args.output_path, 'vis', tag + '_front.jpg')
 87 |                     bird_view_path = os.path.join(args.output_path, 'vis', tag + '_bv.jpg')
 88 |                     heatmap_path = os.path.join(args.output_path, 'vis', tag + '_heatmap.jpg')
 89 |                     cv2.imwrite(front_img_path, front_image)
 90 |                     cv2.imwrite(bird_view_path, bird_view)
 91 |                     cv2.imwrite(heatmap_path, heatmap)
 92 | 
 93 | 
 94 | if __name__ == '__main__':
 95 |     dataset_dir = cfg.DATA_DIR
 96 |     val_dir = os.path.join(cfg.DATA_DIR, 'validation')
 97 |     save_model_dir = os.path.join('./saved', args.tag)
 98 | 
 99 |     # Create output folder
100 |     os.makedirs(args.output_path, exist_ok = True)
101 |     os.makedirs(os.path.join(args.output_path, 'data'), exist_ok = True)
102 |     if args.vis:
103 |         os.makedirs(os.path.join(args.output_path, 'vis'), exist_ok =  True)
104 | 
105 |     run()


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Introduction
  2 | This is an unofficial inplementation of [VoxelNet: End-to-End Learning for Point Cloud Based 3D Object Detection](https://arxiv.org/abs/1711.06396) in PyTorch.
  3 | This project is based on the work (in TensorFlow) [here](https://github.com/qianguih/voxelnet).
  4 | Thanks to [@qianguih](https://github.com/qianguih).
  5 | 
  6 | 
  7 | # Dependencies
  8 | - `python3.5+`
  9 | - `Pytorch` (tested on 0.4.1)
 10 | - `TensorBoardX` (tested on 1.4)
 11 | - `OpenCV`
 12 | - `Pillow` (for add_image in TensorBoardX)
 13 | - `Boost` (for compiling evaluation code)
 14 | 
 15 | 
 16 | # Installation
 17 | 1. Clone this repository.
 18 | 
 19 | 2. Compile the Cython module:
 20 | ```bash
 21 | $ python utils/setup.py build_ext --inplace
 22 | ```
 23 | 
 24 | 3. Compile the evaluation code:
 25 | ```bash
 26 | $ cd eval/KITTI
 27 | $ g++ -I path/to/boost/include -o evaluate_object_3d_offline evaluate_object_3d_offline.cpp
 28 | ```
 29 | 
 30 | 4. grant the execution permission to evaluation script:
 31 | ```bash
 32 | $ cd eval/KITTI
 33 | $ chmod +x launch_test.sh
 34 | ```
 35 | 
 36 | 
 37 | # Data Preparation
 38 | 1. Download the 3D KITTI detection dataset from [here](http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d).
 39 | Description of annotation can be found [here](https://github.com/yanii/kitti-pcl/blob/master/KITTI_README.TXT). Data to download includes:
 40 |     * Velodyne point clouds (29 GB): unzip it and put `'training'` and `'testing'` sub-folders into `'data/KITTI/point_cloud'`
 41 |     * Training labels of object data set (5 MB) for input labels of VoxelNet: unzip it and put `'training'` sub-folder into `'data/KITTI/label'`
 42 |     * Camera calibration matrices of object data set (16 MB) for visualization of predictions: unzip it and put `'training'` and `'testing'` sub-folders into `'data/KITTI/calib'`
 43 |     * Left color images of object data set (12 GB) for visualization of predictions: unzip it and put `'training'` and `'testing'` sub-folders into `'data/KITTI/image'`
 44 | 
 45 | 2. Crop point cloud data for training and validation. Point clouds outside the image coordinates are removed. Modify data path in `preproc/crop.py` and run it to generate cropped data. Note that cropped point cloud data will overwrite raw point cloud data.
 46 | 
 47 | 3. Download the train/val split protocal [here](https://xiaozhichen.github.io/files/mv3d/imagesets.tar.gz) and untar it into `'data/KITTI'`. Modify data path in `'preproc\split.py'` and run it to generate train/val folder which has the following structure:
 48 | ```plain
 49 | └── data
 50 |        ├── KITTI
 51 |        └── MD_KITTI
 52 |                   ├── training   <-- training data
 53 |                   |          ├── image_2
 54 |                   |          ├── label_2
 55 |                   |          └── velodyne
 56 |                   └── validation  <--- evaluation data
 57 |                                ├── image_2
 58 |                                ├── label_2
 59 |                                └── velodyne
 60 | ```
 61 | 
 62 | 4. Update the dataset directory in `config.py` and `eval/KITTI/launch_test.sh`
 63 | 
 64 | 
 65 | # Train
 66 | 1. Specify the GPUs to use in `config.py`. Currently, the code only supports single GPU.
 67 | 2. run `train.sh` with desired hyper-parameters to start training:
 68 | ```bash
 69 | $ bash train.sh
 70 | ```
 71 | Note that more hyper-parameters can be specified in this bash file or in the python file directly.
 72 | Another group of hyper-parameters can be found in the [repo](https://github.com/qianguih/voxelnet).
 73 | 
 74 | During training, training statistics are recorded in `log/default`, which can be monitored by TensorboardX using command `tensorboard --logdir=path/to/log/default`. Models are saved in `saved/default`.
 75 | 
 76 | Intermediate validation results will be dumped into the folder `preds/XXX/data` with `XXX` as the epoch number. Metrics will be calculated (by calling `eval/KITTI/launch_test.sh`) and saved in  `predictions/XXX/log`.
 77 | 
 78 | If the `--vis` flag is set to be `True`, visualizations of intermediate results will be dumped in the folder `preds/XXX/vis`.
 79 | 
 80 | 3. When the training is done, run `parse.sh` to generate the learning curve.
 81 | ```bash
 82 | $ bash parse.sh
 83 | ```
 84 | 
 85 | 4. There is a pre-trained model for car in `save_model/pre_trained_car`.
 86 | 
 87 | 
 88 | # Evaluate
 89 | 1. Run `test.sh` to produce final predictions on the validation set after training is done. Change `--tag` flag to `pre_trained_car` will test for the pre-trained model (to be done).
 90 | ```bash
 91 | $ bash test.sh
 92 | ```
 93 | Note taht results will be dumped into `preds/data`. Set the `--vis` flag to True to dump visualizations into `preds/vis`.
 94 | 
 95 | 2. Run the following command to measure the quantitative performance of predictions:
 96 | ```bash
 97 | $ ./eval/KITTI/evaluate_object_3d_offline ./data/MD_KITTI/validation/label_2 ./preds
 98 | ```
 99 | 
100 | 
101 | # TODO
102 | - [ ] Support multiple GPUs
103 | - [ ] reproduce results for `Car`, `Pedestrian` and `Cyclist`
104 | 
105 | 
106 | 


--------------------------------------------------------------------------------
/utils/box_overlaps.pyx:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Sergey Karayev
  6 | # --------------------------------------------------------
  7 | 
  8 | import numpy as np
  9 | cimport numpy as np
 10 | from cython.parallel import prange, parallel
 11 | 
 12 | 
 13 | DTYPE = np.float32
 14 | ctypedef float DTYPE_t
 15 | 
 16 | 
 17 | def bbox_overlaps(
 18 |         np.ndarray[DTYPE_t, ndim=2] boxes,
 19 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
 20 |     """
 21 |     Parameters
 22 |     ----------
 23 |     boxes: (N, 4) ndarray of float
 24 |     query_boxes: (K, 4) ndarray of float
 25 |     Returns
 26 |     -------
 27 |     overlaps: (N, K) ndarray of overlap between boxes and query_boxes
 28 |     """
 29 |     cdef unsigned int N = boxes.shape[0]
 30 |     cdef unsigned int K = query_boxes.shape[0]
 31 |     cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
 32 |     cdef DTYPE_t iw, ih, box_area
 33 |     cdef DTYPE_t ua
 34 |     cdef unsigned int k, n
 35 |     for k in range(K):
 36 |         box_area = (
 37 |             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
 38 |             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
 39 |         )
 40 |         for n in range(N):
 41 |             iw = (
 42 |                 min(boxes[n, 2], query_boxes[k, 2]) -
 43 |                 max(boxes[n, 0], query_boxes[k, 0]) + 1
 44 |             )
 45 |             if iw > 0:
 46 |                 ih = (
 47 |                     min(boxes[n, 3], query_boxes[k, 3]) -
 48 |                     max(boxes[n, 1], query_boxes[k, 1]) + 1
 49 |                 )
 50 |                 if ih > 0:
 51 |                     ua = float(
 52 |                         (boxes[n, 2] - boxes[n, 0] + 1) *
 53 |                         (boxes[n, 3] - boxes[n, 1] + 1) +
 54 |                         box_area - iw * ih
 55 |                     )
 56 |                     overlaps[n, k] = iw * ih / ua
 57 |     return overlaps
 58 | 
 59 | def bbox_intersections(
 60 |         np.ndarray[DTYPE_t, ndim=2] boxes,
 61 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
 62 |     """
 63 |     For each query box compute the intersection ratio covered by boxes
 64 |     ----------
 65 |     Parameters
 66 |     ----------
 67 |     boxes: (N, 4) ndarray of float
 68 |     query_boxes: (K, 4) ndarray of float
 69 |     Returns
 70 |     -------
 71 |     overlaps: (N, K) ndarray of intersec between boxes and query_boxes
 72 |     """
 73 |     cdef unsigned int N = boxes.shape[0]
 74 |     cdef unsigned int K = query_boxes.shape[0]
 75 |     cdef np.ndarray[DTYPE_t, ndim=2] intersec = np.zeros((N, K), dtype=DTYPE)
 76 |     cdef DTYPE_t iw, ih, box_area
 77 |     cdef DTYPE_t ua
 78 |     cdef unsigned int k, n
 79 |     for k in range(K):
 80 |         box_area = (
 81 |             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
 82 |             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
 83 |         )
 84 |         for n in range(N):
 85 |             iw = (
 86 |                 min(boxes[n, 2], query_boxes[k, 2]) -
 87 |                 max(boxes[n, 0], query_boxes[k, 0]) + 1
 88 |             )
 89 |             if iw > 0:
 90 |                 ih = (
 91 |                     min(boxes[n, 3], query_boxes[k, 3]) -
 92 |                     max(boxes[n, 1], query_boxes[k, 1]) + 1
 93 |                 )
 94 |                 if ih > 0:
 95 |                     intersec[n, k] = iw * ih / box_area
 96 |     return intersec
 97 | 
 98 | # Compute bounding box voting
 99 | def box_vote(
100 |         np.ndarray[float, ndim=2] dets_NMS,
101 |         np.ndarray[float, ndim=2] dets_all):
102 |     cdef np.ndarray[float, ndim=2] dets_voted = np.zeros((dets_NMS.shape[0], dets_NMS.shape[1]), dtype=np.float32)
103 |     cdef unsigned int N = dets_NMS.shape[0]
104 |     cdef unsigned int M = dets_all.shape[0]
105 | 
106 |     cdef np.ndarray[float, ndim=1] det
107 |     cdef np.ndarray[float, ndim=1] acc_box
108 |     cdef float acc_score
109 | 
110 |     cdef np.ndarray[float, ndim=1] det2
111 |     cdef float bi0, bi1, bit2, bi3
112 |     cdef float iw, ih, ua
113 | 
114 |     cdef float thresh=0.5
115 | 
116 |     for i in range(N):
117 |         det = dets_NMS[i, :]
118 |         acc_box = np.zeros((4), dtype=np.float32)
119 |         acc_score = 0.0
120 | 
121 |         for m in range(M):
122 |             det2 = dets_all[m, :]
123 | 
124 |             bi0 = max(det[0], det2[0])
125 |             bi1 = max(det[1], det2[1])
126 |             bi2 = min(det[2], det2[2])
127 |             bi3 = min(det[3], det2[3])
128 | 
129 |             iw = bi2 - bi0 + 1
130 |             ih = bi3 - bi1 + 1
131 | 
132 |             if not (iw > 0 and ih > 0):
133 |                 continue
134 | 
135 |             ua = (det[2] - det[0] + 1) * (det[3] - det[1] + 1) + (det2[2] - det2[0] + 1) * (det2[3] - det2[1] + 1) - iw * ih
136 |             ov = iw * ih / ua
137 | 
138 |             if (ov < thresh):
139 |                 continue
140 | 
141 |             acc_box += det2[4] * det2[0:4]
142 |             acc_score += det2[4]
143 | 
144 |         dets_voted[i][0:4] = acc_box / acc_score
145 |         dets_voted[i][4] = det[4]       # Keep the original score
146 | 
147 |     return dets_voted
148 | 


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding:UTF-8 -*-
  3 | 
  4 | 
  5 | """
  6 | VoxelNet config system.
  7 | """
  8 | 
  9 | import os.path as osp
 10 | from easydict import EasyDict as edict
 11 | import math
 12 | 
 13 | __C = edict()
 14 | # Consumers can get config by: import config as cfg
 15 | cfg = __C
 16 | 
 17 | # For dataset dir
 18 | __C.DATA_DIR = './data/MD_KITTI'
 19 | __C.CALIB_DIR = './data/KITTI/calib/training/calib'
 20 | 
 21 | 
 22 | # For gpu allocation
 23 | __C.GPU_AVAILABLE = '0'
 24 | __C.GPU_USE_COUNT = len(__C.GPU_AVAILABLE.split(','))
 25 | __C.GPU_MEMORY_FRACTION = 1
 26 | 
 27 | # Selected object
 28 | __C.DETECT_OBJ = 'Car'  # Pedestrian/Cyclist
 29 | if __C.DETECT_OBJ == 'Car':
 30 |     __C.Z_MIN = -3
 31 |     __C.Z_MAX = 1
 32 |     __C.Y_MIN = -40
 33 |     __C.Y_MAX = 40
 34 |     __C.X_MIN = 0
 35 |     __C.X_MAX = 70.4
 36 |     __C.VOXEL_Z_SIZE = 0.4
 37 |     __C.VOXEL_Y_SIZE = 0.2
 38 |     __C.VOXEL_X_SIZE = 0.2
 39 |     __C.VOXEL_POINT_COUNT = 35
 40 |     __C.INPUT_DEPTH = int((__C.Z_MAX - __C.Z_MIN) / __C.VOXEL_Z_SIZE)
 41 |     __C.INPUT_HEIGHT = int((__C.Y_MAX - __C.Y_MIN) / __C.VOXEL_Y_SIZE)
 42 |     __C.INPUT_WIDTH = int((__C.X_MAX - __C.X_MIN) / __C.VOXEL_X_SIZE)
 43 |     __C.FEATURE_RATIO = 2
 44 |     __C.FEATURE_WIDTH = int(__C.INPUT_WIDTH / __C.FEATURE_RATIO)
 45 |     __C.FEATURE_HEIGHT = int(__C.INPUT_HEIGHT / __C.FEATURE_RATIO)
 46 | else:
 47 |     __C.Z_MIN = -3
 48 |     __C.Z_MAX = 1
 49 |     __C.Y_MIN = -20
 50 |     __C.Y_MAX = 20
 51 |     __C.X_MIN = 0
 52 |     __C.X_MAX = 48
 53 |     __C.VOXEL_Z_SIZE = 0.4
 54 |     __C.VOXEL_X_SIZE = 0.2
 55 |     __C.VOXEL_Y_SIZE = 0.2
 56 |     __C.VOXEL_POINT_COUNT = 45
 57 |     __C.INPUT_DEPTH = int((__C.Z_MAX - __C.Z_MIN) / __C.VOXEL_Z_SIZE)
 58 |     __C.INPUT_WIDTH = int((__C.X_MAX - __C.X_MIN) / __C.VOXEL_X_SIZE)
 59 |     __C.INPUT_HEIGHT = int((__C.Y_MAX - __C.Y_MIN) / __C.VOXEL_Y_SIZE)
 60 |     __C.FEATURE_RATIO = 2
 61 |     __C.FEATURE_WIDTH = int(__C.INPUT_WIDTH / __C.FEATURE_RATIO)
 62 |     __C.FEATURE_HEIGHT = int(__C.INPUT_HEIGHT / __C.FEATURE_RATIO)
 63 | 
 64 | # Set the log image scale factor
 65 | __C.BV_LOG_FACTOR = 4
 66 | 
 67 | # for data set type
 68 | __C.DATA_SETS_TYPE = 'kitti'
 69 | 
 70 | # Root directory of project
 71 | __C.CHECKPOINT_DIR = osp.join('checkpoint')
 72 | __C.LOG_DIR = osp.join('log')
 73 | 
 74 | # for data preprocess
 75 | # sensors
 76 | __C.VELODYNE_ANGULAR_RESOLUTION = 0.08 / 180 * math.pi
 77 | __C.VELODYNE_VERTICAL_RESOLUTION = 0.4 / 180 * math.pi
 78 | __C.VELODYNE_HEIGHT = 1.73
 79 | 
 80 | # rgb
 81 | if __C.DATA_SETS_TYPE == 'kitti':
 82 |     __C.IMAGE_WIDTH = 1242
 83 |     __C.IMAGE_HEIGHT = 375
 84 |     __C.IMAGE_CHANNEL = 3
 85 | 
 86 | # top
 87 | if __C.DATA_SETS_TYPE == 'kitti':
 88 |     __C.TOP_Y_MIN = -30
 89 |     __C.TOP_Y_MAX = +30
 90 |     __C.TOP_X_MIN = 0
 91 |     __C.TOP_X_MAX = 80
 92 |     __C.TOP_Z_MIN = -4.2
 93 |     __C.TOP_Z_MAX = 0.8
 94 | 
 95 |     __C.TOP_X_DIVISION = 0.1
 96 |     __C.TOP_Y_DIVISION = 0.1
 97 |     __C.TOP_Z_DIVISION = 0.2
 98 | 
 99 |     __C.TOP_WIDTH = (__C.TOP_X_MAX - __C.TOP_X_MIN) // __C.TOP_X_DIVISION
100 |     __C.TOP_HEIGHT = (__C.TOP_Y_MAX - __C.TOP_Y_MIN) // __C.TOP_Y_DIVISION
101 |     __C.TOP_CHANNEL = (__C.TOP_Z_MAX - __C.TOP_Z_MIN) // __C.TOP_Z_DIVISION
102 | 
103 | # for 2d proposal to 3d proposal
104 | __C.PROPOSAL3D_Z_MIN = -2.3  # -2.52
105 | __C.PROPOSAL3D_Z_MAX = 1.5  # -1.02
106 | 
107 | # for RPN basenet choose
108 | __C.USE_VGG_AS_RPN = 0
109 | __C.USE_RESNET_AS_RPN = 0
110 | __C.USE_RESNEXT_AS_RPN = 0
111 | 
112 | # for camera and lidar coordination convert
113 | if __C.DATA_SETS_TYPE == 'kitti':
114 |     # cal mean from train set
115 |     __C.MATRIX_P2 = ([[719.787081,    0.,            608.463003, 44.9538775],
116 |                       [0.,            719.787081,    174.545111, 0.1066855],
117 |                       [0.,            0.,            1.,         3.0106472e-03],
118 |                       [0.,            0.,            0.,         0]])
119 | 
120 |     # Cal mean from train set
121 |     __C.MATRIX_T_VELO_2_CAM = ([
122 |         [7.49916597e-03, -9.99971248e-01, -8.65110297e-04, -6.71807577e-03],
123 |         [1.18652889e-02, 9.54520517e-04, -9.99910318e-01, -7.33152811e-02],
124 |         [9.99882833e-01, 7.49141178e-03, 1.18719929e-02, -2.78557062e-01],
125 |         [0, 0, 0, 1]
126 |     ])
127 |     # Cal mean from train set
128 |     __C.MATRIX_R_RECT_0 = ([
129 |         [0.99992475, 0.00975976, -0.00734152, 0],
130 |         [-0.0097913, 0.99994262, -0.00430371, 0],
131 |         [0.00729911, 0.0043753, 0.99996319, 0],
132 |         [0, 0, 0, 1]
133 |     ])
134 | 
135 | # Faster-RCNN/SSD Hyper params
136 | if __C.DETECT_OBJ == 'Car':
137 |     # Car anchor
138 |     __C.ANCHOR_L = 3.9
139 |     __C.ANCHOR_W = 1.6
140 |     __C.ANCHOR_H = 1.56
141 |     __C.ANCHOR_Z = -1.0 - cfg.ANCHOR_H/2
142 |     __C.RPN_POS_IOU = 0.6
143 |     __C.RPN_NEG_IOU = 0.45
144 | 
145 | elif __C.DETECT_OBJ == 'Pedestrian':
146 |     # Pedestrian anchor
147 |     __C.ANCHOR_L = 0.8
148 |     __C.ANCHOR_W = 0.6
149 |     __C.ANCHOR_H = 1.73
150 |     __C.ANCHOR_Z = -0.6 - cfg.ANCHOR_H/2
151 |     __C.RPN_POS_IOU = 0.5
152 |     __C.RPN_NEG_IOU = 0.35
153 | 
154 | if __C.DETECT_OBJ == 'Cyclist':
155 |     # Cyclist anchor
156 |     __C.ANCHOR_L = 1.76
157 |     __C.ANCHOR_W = 0.6
158 |     __C.ANCHOR_H = 1.73
159 |     __C.ANCHOR_Z = -0.6 - cfg.ANCHOR_H/2
160 |     __C.RPN_POS_IOU = 0.5
161 |     __C.RPN_NEG_IOU = 0.35
162 | 
163 | # for rpn nms
164 | __C.RPN_NMS_POST_TOPK = 20
165 | __C.RPN_NMS_THRESH = 0.1
166 | __C.RPN_SCORE_THRESH = 0.96
167 | 
168 | 
169 | # utils
170 | __C.CORNER2CENTER_AVG = True  # average version or max version
171 | 
172 | if __name__ == '__main__':
173 |     print('__C.ROOT_DIR = ' + __C.ROOT_DIR)
174 |     print('__C.DATA_SETS_DIR = ' + __C.DATA_SETS_DIR)
175 | 


--------------------------------------------------------------------------------
/model/rpn.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | from config import cfg
  6 | 
  7 | import pdb
  8 | 
  9 | 
 10 | class ConvMD(nn.Module):
 11 |     def __init__(self, M, cin, cout, k, s, p, bn = True, activation = True):
 12 |         super(ConvMD, self).__init__()
 13 | 
 14 |         self.M = M  # Dimension of input
 15 |         self.cin = cin
 16 |         self.cout = cout
 17 |         self.k = k
 18 |         self.s = s
 19 |         self.p = p
 20 |         self.bn = bn
 21 |         self.activation = activation
 22 | 
 23 |         if self.M == 2:     # 2D input
 24 |             self.conv = nn.Conv2d(self.cin, self.cout, self.k, self.s, self.p)
 25 |             if self.bn:
 26 |                 self.batch_norm = nn.BatchNorm2d(self.cout)
 27 |         elif self.M == 3:   # 3D input
 28 |             self.conv = nn.Conv3d(self.cin, self.cout, self.k, self.s, self.p)
 29 |             if self.bn:
 30 |                 self.batch_norm = nn.BatchNorm3d(self.cout)
 31 |         else:
 32 |             raise Exception('No such mode!')
 33 | 
 34 | 
 35 |     def forward(self, inputs):
 36 | 
 37 |         out = self.conv(inputs)
 38 | 
 39 |         if self.bn:
 40 |             out = self.batch_norm(out)
 41 | 
 42 |         if self.activation:
 43 |             return F.relu(out)
 44 |         else:
 45 |             return out
 46 | 
 47 | 
 48 | class Deconv2D(nn.Module):
 49 |     def __init__(self, cin, cout, k, s, p, bn = True):
 50 |         super(Deconv2D, self).__init__()
 51 | 
 52 |         self.cin = cin
 53 |         self.cout = cout
 54 |         self.k = k
 55 |         self.s = s
 56 |         self.p = p
 57 |         self.bn = bn
 58 | 
 59 |         self.deconv = nn.ConvTranspose2d(self.cin, self.cout, self.k, self.s, self.p)
 60 | 
 61 |         if self.bn:
 62 |             self.batch_norm = nn.BatchNorm2d(self.cout)
 63 | 
 64 | 
 65 |     def forward(self, inputs):
 66 |         out = self.deconv(inputs)
 67 | 
 68 |         if self.bn == True:
 69 |             out = self.batch_norm(out)
 70 | 
 71 |         return F.relu(out)
 72 | 
 73 | 
 74 | class MiddleAndRPN(nn.Module):
 75 |     def __init__(self, alpha = 1.5, beta = 1, sigma = 3, training = True, name = ''):
 76 |         super(MiddleAndRPN, self).__init__()
 77 | 
 78 |         self.middle_layer = nn.Sequential(ConvMD(3, 128, 64, 3, (2, 1, 1,), (1, 1, 1)),
 79 |                                           ConvMD(3, 64, 64, 3, (1, 1, 1), (0, 1, 1)),
 80 |                                           ConvMD(3, 64, 64, 3, (2, 1, 1), (1, 1, 1)))
 81 | 
 82 | 
 83 |         if cfg.DETECT_OBJ == 'Car':
 84 |             self.block1 = nn.Sequential(ConvMD(2, 128, 128, 3, (2, 2), (1, 1)),
 85 |                                         ConvMD(2, 128, 128, 3, (1, 1), (1, 1)),
 86 |                                         ConvMD(2, 128, 128, 3, (1, 1), (1, 1)),
 87 |                                         ConvMD(2, 128, 128, 3, (1, 1), (1, 1)),
 88 |                                         ConvMD(2, 128, 128, 3, (1, 1), (1, 1)))
 89 |         else:   # Pedestrian/Cyclist
 90 |             self.block1 = nn.Sequential(ConvMD(2, 128, 128, 3, (1, 1), (1, 1)),
 91 |                                         ConvMD(2, 128, 128, 3, (1, 1), (1, 1)),
 92 |                                         ConvMD(2, 128, 128, 3, (1, 1), (1, 1)),
 93 |                                         ConvMD(2, 128, 128, 3, (1, 1), (1, 1)),
 94 |                                         ConvMD(2, 128, 128, 3, (1, 1), (1, 1)))
 95 | 
 96 |         self.deconv1 = Deconv2D(128, 256, 3, (1, 1), (1, 1))
 97 | 
 98 |         self.block2 = nn.Sequential(ConvMD(2, 128, 128, 3, (2, 2), (1, 1)),
 99 |                                     ConvMD(2, 128, 128, 3, (1, 1), (1, 1)),
100 |                                     ConvMD(2, 128, 128, 3, (1, 1), (1, 1)),
101 |                                     ConvMD(2, 128, 128, 3, (1, 1), (1, 1)),
102 |                                     ConvMD(2, 128, 128, 3, (1, 1), (1, 1)),
103 |                                     ConvMD(2, 128, 128, 3, (1, 1), (1, 1)))
104 | 
105 |         self.deconv2 = Deconv2D(128, 256, 2, (2, 2), (0, 0))
106 | 
107 |         self.block3 = nn.Sequential(ConvMD(2, 128, 256, 3, (2, 2), (1, 1)),
108 |                                     ConvMD(2, 256, 256, 3, (1, 1), (1, 1)),
109 |                                     ConvMD(2, 256, 256, 3, (1, 1), (1, 1)),
110 |                                     ConvMD(2, 256, 256, 3, (1, 1), (1, 1)),
111 |                                     ConvMD(2, 256, 256, 3, (1, 1), (1, 1)),
112 |                                     ConvMD(2, 256, 256, 3, (1, 1), (1, 1)))
113 | 
114 |         self.deconv3 = Deconv2D(256, 256, 4, (4, 4), (0, 0))
115 | 
116 |         self.prob_conv = ConvMD(2, 768, 2, 1, (1, 1), (0, 0), bn = False, activation = False)
117 | 
118 |         self.reg_conv = ConvMD(2, 768, 14, 1, (1, 1), (0, 0), bn = False, activation = False)
119 | 
120 |         self.output_shape = [cfg.FEATURE_HEIGHT, cfg.FEATURE_WIDTH]
121 | 
122 | 
123 |     def forward(self, inputs):
124 | 
125 |         batch_size, DEPTH, HEIGHT, WIDTH, C = inputs.shape  # [batch_size, 10, 400/200, 352/240, 128]
126 | 
127 |         inputs = inputs.permute(0, 4, 1, 2, 3)  # (B, D, H, W, C) -> (B, C, D, H, W)
128 | 
129 |         temp_conv = self.middle_layer(inputs)   # [batch, 64, 2, 400, 352]
130 |         temp_conv = temp_conv.view(batch_size, -1, HEIGHT, WIDTH)   # [batch, 128, 400, 352]
131 | 
132 |         temp_conv = self.block1(temp_conv)      # [batch, 128, 200, 176]
133 |         temp_deconv1 = self.deconv1(temp_conv)
134 | 
135 |         temp_conv = self.block2(temp_conv)      # [batch, 128, 100, 88]
136 |         temp_deconv2 = self.deconv2(temp_conv)
137 | 
138 |         temp_conv = self.block3(temp_conv)      # [batch, 256, 50, 44]
139 |         temp_deconv3 = self.deconv3(temp_conv)
140 | 
141 |         temp_conv = torch.cat([temp_deconv3, temp_deconv2, temp_deconv1], dim = 1)
142 | 
143 |         # Probability score map, [batch, 2, 200/100, 176/120]
144 |         p_map = self.prob_conv(temp_conv)
145 | 
146 |         # Regression map, [batch, 14, 200/100, 176/120]
147 |         r_map = self.reg_conv(temp_conv)
148 | 
149 |         return torch.sigmoid(p_map), r_map
150 | 


--------------------------------------------------------------------------------
/utils/data_aug.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding:UTF-8 -*-
  3 | 
  4 | # File Name : data_aug.py
  5 | # Purpose :
  6 | # Creation Date : 21-12-2017
  7 | # Last Modified : Fri 19 Jan 2018 01:06:35 PM CST
  8 | # Created By : Jeasine Ma [jeasinema[at]gmail[dot]com]
  9 | 
 10 | import os
 11 | from utils.utils import *
 12 | from utils.preprocess import *
 13 | 
 14 | import pdb
 15 | 
 16 | 
 17 | def aug_data(tag, object_dir):
 18 |     np.random.seed()
 19 |     # rgb = cv2.imread(os.path.join(object_dir,'image_2', tag + '.png'))
 20 |     rgb = cv2.resize(cv2.imread(os.path.join(object_dir, 'image_2', tag + '.png')), (cfg.IMAGE_WIDTH, cfg.IMAGE_HEIGHT))
 21 |     lidar = np.fromfile(os.path.join(object_dir, 'velodyne', tag + '.bin'), dtype = np.float32).reshape(-1, 4)
 22 |     label = np.array([line for line in open(os.path.join(object_dir, 'label_2', tag + '.txt'), 'r').readlines()])  # (N')
 23 | 
 24 |     cls = np.array([line.split()[0] for line in label])  # (N')
 25 |     gt_box3d = label_to_gt_box3d(np.array(label)[np.newaxis, :], cls = '', coordinate = 'camera')[0]  # (N', 7); 7 means (x, y, z, h, w, l, r)
 26 | 
 27 |     choice = np.random.randint(0, 10)
 28 |     if choice >= 7:
 29 |         # Disable this augmention here. Current implementation will decrease the performances.
 30 |         lidar_center_gt_box3d = camera_to_lidar_box(gt_box3d)
 31 |         lidar_corner_gt_box3d = center_to_corner_box3d(lidar_center_gt_box3d, coordinate = 'lidar')
 32 |         for idx in range(len(lidar_corner_gt_box3d)):
 33 |             # TODO: precisely gather the point
 34 |             is_collision = True
 35 |             _count = 0
 36 |             while is_collision and _count < 100:
 37 |                 t_rz = np.random.uniform(-np.pi / 10, np.pi / 10)
 38 |                 t_x = np.random.normal()
 39 |                 t_y = np.random.normal()
 40 |                 t_z = np.random.normal()
 41 |                 # Check collision
 42 |                 tmp = box_transform(lidar_center_gt_box3d[[idx]], t_x, t_y, t_z, t_rz, 'lidar')
 43 |                 is_collision = False
 44 |                 for idy in range(idx):
 45 |                     x1, y1, w1, l1, r1 = tmp[0][[0, 1, 4, 5, 6]]
 46 |                     x2, y2, w2, l2, r2 = lidar_center_gt_box3d[idy][[0, 1, 4, 5, 6]]
 47 |                     iou = cal_iou2d(np.array([x1, y1, w1, l1, r1], dtype = np.float32),
 48 |                                     np.array([x2, y2, w2, l2, r2], dtype = np.float32))
 49 |                     if iou > 0:
 50 |                         is_collision = True
 51 |                         _count += 1
 52 |                         break
 53 |             if not is_collision:
 54 |                 box_corner = lidar_corner_gt_box3d[idx]
 55 |                 minx = np.min(box_corner[:, 0])
 56 |                 miny = np.min(box_corner[:, 1])
 57 |                 minz = np.min(box_corner[:, 2])
 58 |                 maxx = np.max(box_corner[:, 0])
 59 |                 maxy = np.max(box_corner[:, 1])
 60 |                 maxz = np.max(box_corner[:, 2])
 61 |                 bound_x = np.logical_and(lidar[:, 0] >= minx, lidar[:, 0] <= maxx)
 62 |                 bound_y = np.logical_and(lidar[:, 1] >= miny, lidar[:, 1] <= maxy)
 63 |                 bound_z = np.logical_and(lidar[:, 2] >= minz, lidar[:, 2] <= maxz)
 64 |                 bound_box = np.logical_and(np.logical_and(bound_x, bound_y), bound_z)
 65 |                 lidar[bound_box, 0:3] = point_transform(lidar[bound_box, 0:3], t_x, t_y, t_z, rz = t_rz)
 66 |                 lidar_center_gt_box3d[idx] = box_transform(lidar_center_gt_box3d[[idx]], t_x, t_y, t_z, t_rz, 'lidar')
 67 | 
 68 |         gt_box3d = lidar_to_camera_box(lidar_center_gt_box3d)
 69 |         newtag = 'aug_{}_1_{}'.format(tag, np.random.randint(1, 1024))
 70 |     elif choice < 7 and choice >= 4:
 71 |         # Global rotation
 72 |         angle = np.random.uniform(-np.pi / 4, np.pi / 4)
 73 |         lidar[:, 0:3] = point_transform(lidar[:, 0:3], 0, 0, 0, rz = angle)
 74 |         lidar_center_gt_box3d = camera_to_lidar_box(gt_box3d)
 75 |         lidar_center_gt_box3d = box_transform(lidar_center_gt_box3d, 0, 0, 0, r = angle, coordinate = 'lidar')
 76 |         gt_box3d = lidar_to_camera_box(lidar_center_gt_box3d)
 77 |         newtag = 'aug_{}_2_{:.4f}'.format(tag, angle).replace('.', '_')
 78 |     else:
 79 |         # Global scaling
 80 |         factor = np.random.uniform(0.95, 1.05)
 81 |         lidar[:, 0:3] = lidar[:, 0:3] * factor
 82 |         lidar_center_gt_box3d = camera_to_lidar_box(gt_box3d)
 83 |         lidar_center_gt_box3d[:, 0:6] = lidar_center_gt_box3d[:, 0:6] * factor
 84 |         gt_box3d = lidar_to_camera_box(lidar_center_gt_box3d)
 85 |         newtag = 'aug_{}_3_{:.4f}'.format(tag, factor).replace('.', '_')
 86 | 
 87 |     label = box3d_to_label(gt_box3d[np.newaxis, ...], cls[np.newaxis, ...], coordinate = 'camera')[0]  # (N')
 88 |     voxel_dict = process_pointcloud(lidar)  # Contains: feature_buffer, number_buffer, coordinate_buffer
 89 | 
 90 |     return newtag, rgb, lidar, voxel_dict, label
 91 | 
 92 | 
 93 | # def worker(tag):
 94 | #     new_tag, rgb, lidar, voxel_dict, label = aug_data(tag)
 95 | #     output_path = os.path.join(object_dir, 'training_aug')
 96 | #
 97 | #     cv2.imwrite(os.path.join(output_path, 'image_2', newtag + '.png'), rgb)
 98 | #     lidar.reshape(-1).tofile(os.path.join(output_path,
 99 | #                                           'velodyne', newtag + '.bin'))
100 | #     np.savez_compressed(os.path.join(
101 | #         output_path, 'voxel' if cfg.DETECT_OBJ == 'Car' else 'voxel_ped', newtag), **voxel_dict)
102 | #     with open(os.path.join(output_path, 'label_2', newtag + '.txt'), 'w+') as f:
103 | #         for line in label:
104 | #             f.write(line)
105 | #     print(newtag)
106 | #
107 | #
108 | # def main():
109 | #     object_dir = './data/object'
110 | #     fl = glob.glob(os.path.join(object_dir, 'training', 'calib', '*.txt'))
111 | #     candidate = [f.split('/')[-1].split('.')[0] for f in fl]
112 | #     tags = []
113 | #     for _ in range(args.aug_amount):
114 | #         tags.append(candidate[np.random.randint(0, len(candidate))])
115 | #     print('generate {} tags'.format(len(tags)))
116 | #     pool = mp.Pool(args.num_workers)
117 | #     pool.map(worker, tags)
118 | #
119 | #
120 | # if __name__ == '__main__':
121 | #     parser = argparse.ArgumentParser(description='')
122 | #     parser.add_argument('-i', '--aug-amount', type=int, nargs='?', default=1000)
123 | #     parser.add_argument('-n', '--num-workers', type=int, nargs='?', default=10)
124 | #     args = parser.parse_args()
125 | #
126 | #     main()
127 | 


--------------------------------------------------------------------------------
/model/model.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | 
  6 | from config import cfg
  7 | from utils.utils import *
  8 | from utils.colorize import colorize
  9 | from utils.nms import nms
 10 | from model.group_pointcloud import FeatureNet
 11 | from model.rpn import MiddleAndRPN
 12 | 
 13 | import pdb
 14 | 
 15 | 
 16 | small_addon_for_BCE = 1e-6
 17 | 
 18 | 
 19 | class RPN3D(nn.Module):
 20 |     def __init__(self, cls = 'Car', alpha = 1.5, beta = 1, sigma = 3):
 21 |         super(RPN3D, self).__init__()
 22 | 
 23 |         self.cls = cls
 24 |         self.alpha = alpha
 25 |         self.beta = beta
 26 |         self.sigma = sigma
 27 | 
 28 |         self.feature = FeatureNet()
 29 |         self.rpn = MiddleAndRPN()
 30 | 
 31 |         # Generate anchors
 32 |         self.anchors = cal_anchors()    # [cfg.FEATURE_HEIGHT, cfg.FEATURE_WIDTH, 2, 7]; 2 means two rotations; 7 means (cx, cy, cz, h, w, l, r)
 33 | 
 34 |         self.rpn_output_shape = self.rpn.output_shape
 35 | 
 36 | 
 37 |     def forward(self, data):
 38 |         tag = data[0]
 39 |         label = data[1]
 40 |         vox_feature = data[2]
 41 |         vox_number = data[3]
 42 |         vox_coordinate = data[4]
 43 | 
 44 |         features = self.feature(vox_feature, vox_number, vox_coordinate)
 45 |         prob_output, delta_output = self.rpn(features)
 46 | 
 47 |         # Calculate ground-truth
 48 |         pos_equal_one, neg_equal_one, targets = cal_rpn_target(
 49 |             label, self.rpn_output_shape, self.anchors, cls = cfg.DETECT_OBJ, coordinate = 'lidar')
 50 |         pos_equal_one_for_reg = np.concatenate(
 51 |             [np.tile(pos_equal_one[..., [0]], 7), np.tile(pos_equal_one[..., [1]], 7)], axis = -1)
 52 |         pos_equal_one_sum = np.clip(np.sum(pos_equal_one, axis = (1, 2, 3)).reshape(-1, 1, 1, 1), a_min = 1, a_max = None)
 53 |         neg_equal_one_sum = np.clip(np.sum(neg_equal_one, axis = (1, 2, 3)).reshape(-1, 1, 1, 1), a_min = 1, a_max = None)
 54 | 
 55 |         # Move to gpu
 56 |         device = features.device
 57 |         pos_equal_one = torch.from_numpy(pos_equal_one).to(device).float()
 58 |         neg_equal_one = torch.from_numpy(neg_equal_one).to(device).float()
 59 |         targets = torch.from_numpy(targets).to(device).float()
 60 |         pos_equal_one_for_reg = torch.from_numpy(pos_equal_one_for_reg).to(device).float()
 61 |         pos_equal_one_sum = torch.from_numpy(pos_equal_one_sum).to(device).float()
 62 |         neg_equal_one_sum = torch.from_numpy(neg_equal_one_sum).to(device).float()
 63 | 
 64 |         # [batch, cfg.FEATURE_HEIGHT, cfg.FEATURE_WIDTH, 2/14] -> [batch, 2/14, cfg.FEATURE_HEIGHT, cfg.FEATURE_WIDTH]
 65 |         pos_equal_one = pos_equal_one.permute(0, 3, 1, 2)
 66 |         neg_equal_one = neg_equal_one.permute(0, 3, 1, 2)
 67 |         targets = targets.permute(0, 3, 1, 2)
 68 |         pos_equal_one_for_reg = pos_equal_one_for_reg.permute(0, 3, 1, 2)
 69 | 
 70 |         # Calculate loss
 71 |         cls_pos_loss = (-pos_equal_one * torch.log(prob_output + small_addon_for_BCE)) / pos_equal_one_sum
 72 |         cls_neg_loss = (-neg_equal_one * torch.log(1 - prob_output + small_addon_for_BCE)) / neg_equal_one_sum
 73 | 
 74 |         cls_loss = torch.sum(self.alpha * cls_pos_loss + self.beta * cls_neg_loss)
 75 |         cls_pos_loss_rec = torch.sum(cls_pos_loss)
 76 |         cls_neg_loss_rec = torch.sum(cls_neg_loss)
 77 | 
 78 |         reg_loss = smooth_l1(delta_output * pos_equal_one_for_reg, targets * pos_equal_one_for_reg, self.sigma) / pos_equal_one_sum
 79 |         reg_loss = torch.sum(reg_loss)
 80 | 
 81 |         loss = cls_loss + reg_loss
 82 | 
 83 |         return prob_output, delta_output, loss, cls_loss, reg_loss, cls_pos_loss_rec, cls_neg_loss_rec
 84 | 
 85 | 
 86 |     def predict(self, data, probs, deltas, summary = False, vis = False):
 87 |         '''
 88 |         probs: (batch, 2, cfg.FEATURE_HEIGHT, cfg.FEATURE_WIDTH)
 89 |         deltas: (batch, 14, cfg.FEATURE_HEIGHT, cfg.FEATURE_WIDTH)
 90 |         '''
 91 |         tag = data[0]
 92 |         label = data[1]
 93 |         vox_feature = data[2]
 94 |         vox_number = data[3]
 95 |         vox_coordinate = data[4]
 96 |         img = data[5]
 97 |         lidar = data[6]
 98 | 
 99 |         batch_size, _, _, _ = probs.shape
100 |         device = probs.device
101 | 
102 |         batch_gt_boxes3d = None
103 |         if summary or vis:
104 |             batch_gt_boxes3d = label_to_gt_box3d(label, cls = self.cls, coordinate = 'lidar')
105 | 
106 |         # Move to cpu and convert to numpy array
107 |         probs = probs.cpu().detach().numpy()
108 |         deltas = deltas.cpu().detach().numpy()
109 | 
110 |         # BOTTLENECK
111 |         batch_boxes3d = delta_to_boxes3d(deltas, self.anchors, coordinate = 'lidar')
112 |         batch_boxes2d = batch_boxes3d[:, :, [0, 1, 4, 5, 6]]
113 |         batch_probs = probs.reshape((batch_size, -1))
114 | 
115 |         # NMS
116 |         ret_box3d = []
117 |         ret_score = []
118 |         for batch_id in range(batch_size):
119 |             # Remove box with low score
120 |             ind = np.where(batch_probs[batch_id, :] >= cfg.RPN_SCORE_THRESH)[0]
121 |             tmp_boxes3d = batch_boxes3d[batch_id, ind, ...]
122 |             tmp_boxes2d = batch_boxes2d[batch_id, ind, ...]
123 |             tmp_scores = batch_probs[batch_id, ind]
124 | 
125 |             # TODO: if possible, use rotate NMS
126 |             boxes2d = corner_to_standup_box2d(center_to_corner_box2d(tmp_boxes2d, coordinate = 'lidar'))
127 | 
128 |             # 2D box index after nms
129 |             ind, cnt = nms(torch.from_numpy(boxes2d).to(device), torch.from_numpy(tmp_scores).to(device),
130 |                            cfg.RPN_NMS_THRESH, cfg.RPN_NMS_POST_TOPK)
131 |             ind = ind[:cnt].cpu().detach().numpy()
132 | 
133 |             tmp_boxes3d = tmp_boxes3d[ind, ...]
134 |             tmp_scores = tmp_scores[ind]
135 |             ret_box3d.append(tmp_boxes3d)
136 |             ret_score.append(tmp_scores)
137 | 
138 |         ret_box3d_score = []
139 |         for boxes3d, scores in zip(ret_box3d, ret_score):
140 |             ret_box3d_score.append(np.concatenate([np.tile(self.cls, len(boxes3d))[:, np.newaxis],
141 |                                                    boxes3d, scores[:, np.newaxis]], axis = -1))
142 | 
143 |         if summary:
144 |             # Only summry the first one in a batch
145 |             cur_tag = tag[0]
146 |             P, Tr, R = load_calib(os.path.join(cfg.CALIB_DIR, cur_tag + '.txt'))
147 | 
148 |             front_image = draw_lidar_box3d_on_image(img[0], ret_box3d[0], ret_score[0],
149 |                                                     batch_gt_boxes3d[0], P2 = P, T_VELO_2_CAM = Tr, R_RECT_0 = R)
150 | 
151 |             bird_view = lidar_to_bird_view_img(lidar[0], factor = cfg.BV_LOG_FACTOR)
152 | 
153 |             bird_view = draw_lidar_box3d_on_birdview(bird_view, ret_box3d[0], ret_score[0], batch_gt_boxes3d[0],
154 |                                                      factor = cfg.BV_LOG_FACTOR, P2 = P, T_VELO_2_CAM = Tr, R_RECT_0 = R)
155 | 
156 |             heatmap = colorize(probs[0, ...], cfg.BV_LOG_FACTOR)
157 | 
158 |             ret_summary = [['predict/front_view_rgb', front_image[np.newaxis, ...]],  # [None, cfg.IMAGE_HEIGHT, cfg.IMAGE_WIDTH, 3]
159 |                            # [None, cfg.BV_LOG_FACTOR * cfg.INPUT_HEIGHT, cfg.BV_LOG_FACTOR * cfg.INPUT_WIDTH, 3]
160 |                            ['predict/bird_view_lidar', bird_view[np.newaxis, ...]],
161 |                            # [None, cfg.BV_LOG_FACTOR * cfg.FEATURE_HEIGHT, cfg.BV_LOG_FACTOR * cfg.FEATURE_WIDTH, 3]
162 |                            ['predict/bird_view_heatmap', heatmap[np.newaxis, ...]]]
163 | 
164 |             return tag, ret_box3d_score, ret_summary
165 | 
166 |         if vis:
167 |             front_images, bird_views, heatmaps = [], [], []
168 |             for i in range(len(img)):
169 |                 cur_tag = tag[i]
170 |                 P, Tr, R = load_calib(os.path.join(cfg.CALIB_DIR, cur_tag + '.txt'))
171 | 
172 |                 front_image = draw_lidar_box3d_on_image(img[i], ret_box3d[i], ret_score[i],
173 |                                                         batch_gt_boxes3d[i], P2 = P, T_VELO_2_CAM = Tr, R_RECT_0 = R)
174 | 
175 |                 bird_view = lidar_to_bird_view_img(lidar[i], factor = cfg.BV_LOG_FACTOR)
176 | 
177 |                 bird_view = draw_lidar_box3d_on_birdview(bird_view, ret_box3d[i], ret_score[i], batch_gt_boxes3d[i],
178 |                                                          factor = cfg.BV_LOG_FACTOR, P2 = P, T_VELO_2_CAM = Tr, R_RECT_0 = R)
179 | 
180 |                 heatmap = colorize(probs[i, ...], cfg.BV_LOG_FACTOR)
181 | 
182 |                 front_images.append(front_image)
183 |                 bird_views.append(bird_view)
184 |                 heatmaps.append(heatmap)
185 | 
186 |             return tag, ret_box3d_score, front_images, bird_views, heatmaps
187 | 
188 |         return tag, ret_box3d_score
189 | 
190 | 
191 | def smooth_l1(deltas, targets, sigma = 3.0):
192 |     # Reference: https://mohitjainweb.files.wordpress.com/2018/03/smoothl1loss.pdf
193 |     sigma2 = sigma * sigma
194 |     diffs = deltas - targets
195 |     smooth_l1_signs = torch.lt(torch.abs(diffs), 1.0 / sigma2).float()
196 | 
197 |     smooth_l1_option1 = torch.mul(diffs, diffs) * 0.5 * sigma2
198 |     smooth_l1_option2 = torch.abs(diffs) - 0.5 / sigma2
199 |     smooth_l1_add = torch.mul(smooth_l1_option1, smooth_l1_signs) + torch.mul(smooth_l1_option2, 1 - smooth_l1_signs)
200 |     smooth_l1 = smooth_l1_add
201 | 
202 |     return smooth_l1


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import time
  4 | import shutil
  5 | import argparse
  6 | 
  7 | import cv2
  8 | cv2.setNumThreads(0)
  9 | 
 10 | import torch
 11 | import torch.nn as nn
 12 | import torch.optim as optim
 13 | from torch.utils.data import DataLoader as DataLoader
 14 | from torch.nn.utils import clip_grad_norm_
 15 | 
 16 | from tensorboardX import SummaryWriter
 17 | 
 18 | from config import cfg
 19 | from utils.utils import box3d_to_label
 20 | from model.model import RPN3D
 21 | from loader.kitti import KITTI as Dataset
 22 | from loader.kitti import collate_fn
 23 | 
 24 | import pdb
 25 | 
 26 | 
 27 | parser = argparse.ArgumentParser(description = 'training')
 28 | 
 29 | parser.add_argument('--lr', type = float, default = 0.01, help = 'initial learning rate')
 30 | parser.add_argument('--alpha', type = float, default = 1.5, help = 'alpha in loss function')
 31 | parser.add_argument('--beta', type = float, default = 1, help = 'beta in loss function')
 32 | 
 33 | parser.add_argument('--max_epoch', type = int, default = 1, help = 'max epoch')
 34 | parser.add_argument('--batch_size', type = int, default = 1, help = 'batch size')
 35 | parser.add_argument('--workers', type = int, default = 4)
 36 | 
 37 | parser.add_argument('--summary_interval', type = int, default = 100, help = 'iter interval for training summary')
 38 | parser.add_argument('--summary_val_interval', type = int, default = 200, help = 'iter interval for val summary')
 39 | parser.add_argument('--val_epoch', type = int, default = 10, help = 'epoch interval for dump val data')
 40 | 
 41 | parser.add_argument('--log_root', type = str, default = 'log')
 42 | parser.add_argument('--log_name', type = str, default = 'train.txt')
 43 | parser.add_argument('--tag', type = str, default = 'default', help = 'log tag')
 44 | 
 45 | parser.add_argument('--print_freq', default = 20, type = int, help = 'print frequency')
 46 | 
 47 | parser.add_argument('--resumed_model', type = str, default = '', help = 'if specified, load the specified model')
 48 | parser.add_argument('--saved_model', type = str, default = 'kitti_{}.pth.tar')
 49 | 
 50 | # For test data
 51 | parser.add_argument('--output_path', type = str, default = './preds', help = 'results output dir')
 52 | parser.add_argument('--vis', type = bool, default = True, help = 'set to True if dumping visualization')
 53 | 
 54 | args = parser.parse_args()
 55 | 
 56 | 
 57 | def run():
 58 |     os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(str(x) for x in cfg.GPU_AVAILABLE)
 59 | 
 60 |     start_epoch = 0
 61 |     global_counter = 0
 62 |     min_loss = sys.float_info.max
 63 | 
 64 |     # Build data loader
 65 |     train_dataset = Dataset(os.path.join(cfg.DATA_DIR, 'training'), shuffle = True, aug = True, is_testset = False)
 66 |     train_dataloader = DataLoader(train_dataset, batch_size = args.batch_size, shuffle = True, collate_fn = collate_fn,
 67 |                                   num_workers = args.workers, pin_memory = False)
 68 | 
 69 |     val_dataset = Dataset(os.path.join(cfg.DATA_DIR, 'validation'), shuffle = False, aug = False, is_testset = False)
 70 |     val_dataloader = DataLoader(val_dataset, batch_size = args.batch_size, shuffle = False, collate_fn = collate_fn,
 71 |                                 num_workers = args.workers, pin_memory = False)
 72 |     val_dataloader_iter = iter(val_dataloader)
 73 | 
 74 |     # Build model
 75 |     model = RPN3D(cfg.DETECT_OBJ, args.alpha, args.beta)
 76 | 
 77 |     # Resume model if necessary
 78 |     if args.resumed_model:
 79 |         model_file = os.path.join(save_model_dir, args.resumed_model)
 80 |         if os.path.isfile(model_file):
 81 |             checkpoint = torch.load(model_file)
 82 |             start_epoch = checkpoint['epoch']
 83 |             global_counter = checkpoint['global_counter']
 84 |             min_loss = checkpoint['min_loss']
 85 |             model.load_state_dict(checkpoint['state_dict'])
 86 |             print(("=> Loaded checkpoint '{}' (epoch {}, global_counter {})".format(
 87 |                 args.resumed_model, checkpoint['epoch'], checkpoint['global_counter'])))
 88 |         else:
 89 |             print(("=> No checkpoint found at '{}'".format(args.resumed_model)))
 90 | 
 91 |     model = nn.DataParallel(model).cuda()
 92 | 
 93 |     # Optimization scheme
 94 |     optimizer = optim.Adam(model.parameters(), lr = args.lr)
 95 | 
 96 |     lr_sched = optim.lr_scheduler.MultiStepLR(optimizer, [150])
 97 | 
 98 |     # Init file log
 99 |     log = open(os.path.join(args.log_root, args.log_name), 'a')
100 | 
101 |     # Init TensorBoardX writer
102 |     summary_writer = SummaryWriter(log_dir)
103 | 
104 |     # train and validate
105 |     tot_epoch = start_epoch
106 |     for epoch in range(start_epoch, args.max_epoch):
107 |         # Learning rate scheme
108 |         lr_sched.step()
109 | 
110 |         counter = 0
111 |         batch_time = time.time()
112 | 
113 |         tot_val_loss = 0
114 |         tot_val_times = 0
115 | 
116 |         for (i, data) in enumerate(train_dataloader):
117 | 
118 |             model.train(True)   # Training mode
119 | 
120 |             counter += 1
121 |             global_counter += 1
122 | 
123 |             start_time = time.time()
124 | 
125 |             # Forward pass for training
126 |             _, _, loss, cls_loss, reg_loss, cls_pos_loss_rec, cls_neg_loss_rec = model(data)
127 | 
128 |             forward_time = time.time() - start_time
129 | 
130 |             loss.backward()
131 | 
132 |             # Clip gradient
133 |             clip_grad_norm_(model.parameters(), 5)
134 | 
135 |             optimizer.step()
136 |             optimizer.zero_grad()
137 | 
138 |             batch_time = time.time() - batch_time
139 | 
140 |             if counter % args.print_freq == 0:
141 |                 # Print training info
142 |                 info = 'Train: {} @ epoch:{}/{} loss: {:.4f} reg_loss: {:.4f} cls_loss: {:.4f} cls_pos_loss: {:.4f} ' \
143 |                        'cls_neg_loss: {:.4f} forward time: {:.4f} batch time: {:.4f}'.format(
144 |                     counter, epoch + 1, args.max_epoch, loss.item(), reg_loss.item(), cls_loss.item(), cls_pos_loss_rec.item(),
145 |                     cls_neg_loss_rec.item(), forward_time, batch_time)
146 |                 info = '{}\t'.format(time.asctime(time.localtime())) + info
147 |                 print(info)
148 | 
149 |                 # Write training info to log
150 |                 log.write(info + '\n')
151 |                 log.flush()
152 | 
153 |             # Summarize training info
154 |             if counter % args.summary_interval == 0:
155 |                 print("summary_interval now")
156 |                 summary_writer.add_scalars(str(epoch + 1), {'train/loss' : loss.item(),
157 |                                                             'train/reg_loss' : reg_loss.item(),
158 |                                                             'train/cls_loss' : cls_loss.item(),
159 |                                                             'train/cls_pos_loss' : cls_pos_loss_rec.item(),
160 |                                                             'train/cls_neg_loss' : cls_neg_loss_rec.item()}, global_counter)
161 | 
162 |             # Summarize validation info
163 |             if counter % args.summary_val_interval == 0:
164 |                 print('summary_val_interval now')
165 | 
166 |                 with torch.no_grad():
167 |                     model.train(False)  # Validation mode
168 | 
169 |                     val_data = next(val_dataloader_iter)    # Sample one batch
170 | 
171 |                     # Forward pass for validation and prediction
172 |                     probs, deltas, val_loss, val_cls_loss, val_reg_loss, cls_pos_loss_rec, cls_neg_loss_rec = model(val_data)
173 | 
174 |                     summary_writer.add_scalars(str(epoch + 1), {'validate/loss': loss.item(),
175 |                                                                 'validate/reg_loss': reg_loss.item(),
176 |                                                                 'validate/cls_loss': cls_loss.item(),
177 |                                                                 'validate/cls_pos_loss': cls_pos_loss_rec.item(),
178 |                                                                 'validate/cls_neg_loss': cls_neg_loss_rec.item()}, global_counter)
179 | 
180 |                     try:
181 |                         # Prediction
182 |                         tags, ret_box3d_scores, ret_summary = model.module.predict(val_data, probs, deltas, summary = True)
183 | 
184 |                         for (tag, img) in ret_summary:
185 |                             img = img[0].transpose(2, 0, 1)
186 |                             summary_writer.add_image(tag, img, global_counter)
187 |                     except:
188 |                         raise Exception('Prediction skipped due to an error!')
189 | 
190 |                     # Add sampled data loss
191 |                     tot_val_loss += val_loss.item()
192 |                     tot_val_times += 1
193 | 
194 |             batch_time = time.time()
195 | 
196 |         # Save the best model
197 |         avg_val_loss = tot_val_loss / float(tot_val_times)
198 |         is_best = avg_val_loss < min_loss
199 |         min_loss = min(avg_val_loss, min_loss)
200 |         save_checkpoint({'epoch': epoch + 1, 'global_counter': global_counter, 'state_dict': model.module.state_dict(), 'min_loss': min_loss},
201 |                         is_best, args.saved_model.format(cfg.DETECT_OBJ))
202 | 
203 |         # Dump test data every 10 epochs
204 |         if (epoch + 1) % args.val_epoch == 0:   # Time consuming
205 |             # Create output folder
206 |             os.makedirs(os.path.join(args.output_path, str(epoch + 1)), exist_ok = True)
207 |             os.makedirs(os.path.join(args.output_path, str(epoch + 1), 'data'), exist_ok = True)
208 |             os.makedirs(os.path.join(args.output_path, str(epoch + 1), 'log'), exist_ok=True)
209 | 
210 |             if args.vis:
211 |                 os.makedirs(os.path.join(args.output_path, str(epoch + 1), 'vis'), exist_ok = True)
212 | 
213 |             model.train(False)  # Validation mode
214 | 
215 |             with torch.no_grad():
216 |                 for (i, val_data) in enumerate(val_dataloader):
217 | 
218 |                     # Forward pass for validation and prediction
219 |                     probs, deltas, val_loss, val_cls_loss, val_reg_loss, cls_pos_loss_rec, cls_neg_loss_rec = model(val_data)
220 | 
221 |                     front_images, bird_views, heatmaps = None, None, None
222 |                     if args.vis:
223 |                         tags, ret_box3d_scores, front_images, bird_views, heatmaps = \
224 |                             model.module.predict(val_data, probs, deltas, summary = False, vis = True)
225 |                     else:
226 |                         tags, ret_box3d_scores = model.module.predict(val_data, probs, deltas, summary = False, vis = False)
227 | 
228 |                     # tags: (N)
229 |                     # ret_box3d_scores: (N, N'); (class, x, y, z, h, w, l, rz, score)
230 |                     for tag, score in zip(tags, ret_box3d_scores):
231 |                         output_path = os.path.join(args.output_path, str(epoch + 1), 'data', tag + '.txt')
232 |                         with open(output_path, 'w+') as f:
233 |                             labels = box3d_to_label([score[:, 1:8]], [score[:, 0]], [score[:, -1]], coordinate = 'lidar')[0]
234 |                             for line in labels:
235 |                                 f.write(line)
236 |                             print('Write out {} objects to {}'.format(len(labels), tag))
237 | 
238 |                     # Dump visualizations
239 |                     if args.vis:
240 |                         for tag, front_image, bird_view, heatmap in zip(tags, front_images, bird_views, heatmaps):
241 |                             front_img_path = os.path.join(args.output_path, str(epoch + 1), 'vis', tag + '_front.jpg')
242 |                             bird_view_path = os.path.join(args.output_path, str(epoch + 1), 'vis', tag + '_bv.jpg')
243 |                             heatmap_path = os.path.join(args.output_path, str(epoch + 1), 'vis', tag + '_heatmap.jpg')
244 |                             cv2.imwrite(front_img_path, front_image)
245 |                             cv2.imwrite(bird_view_path, bird_view)
246 |                             cv2.imwrite(heatmap_path, heatmap)
247 | 
248 |             # Run evaluation code
249 |             cmd_1 = './eval/KITTI/launch_test.sh'
250 |             cmd_2 = os.path.join(args.output_path, str(epoch + 1))
251 |             cmd_3 = os.path.join(args.output_path, str(epoch + 1), 'log')
252 |             os.system(' '.join([cmd_1, cmd_2, cmd_3]))
253 | 
254 |         tot_epoch = epoch + 1
255 | 
256 |     print('Train done with total epoch:{}, iter:{}'.format(tot_epoch, global_counter))
257 | 
258 |     # Close TensorBoardX writer
259 |     summary_writer.close()
260 | 
261 | 
262 | def save_checkpoint(state, is_best, filename = 'to_be_determined.pth.tar'):
263 |     torch.save(state, '%s/%s' % (save_model_dir, filename))
264 |     if is_best:
265 |         best_filename = filename.replace('.pth.tar', '_best.pth.tar')
266 |         shutil.copyfile('%s/%s' % (save_model_dir, filename), '%s/%s' % (save_model_dir, best_filename))
267 | 
268 | 
269 | if __name__ == '__main__':
270 |     dataset_dir = cfg.DATA_DIR
271 |     train_dir = os.path.join(cfg.DATA_DIR, 'training')
272 |     val_dir = os.path.join(cfg.DATA_DIR, 'validation')
273 |     log_dir = os.path.join('./log', args.tag)
274 |     save_model_dir = os.path.join('./saved', args.tag)
275 |     os.makedirs(log_dir, exist_ok = True)
276 |     os.makedirs(save_model_dir, exist_ok = True)
277 | 
278 |     run()
279 | 
280 | 
281 | 
282 | 


--------------------------------------------------------------------------------
/eval/KITTI/evaluate_object_3d.cpp:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include <algorithm>
  3 | #include <stdio.h>
  4 | #include <math.h>
  5 | #include <vector>
  6 | #include <numeric>
  7 | #include <strings.h>
  8 | #include <assert.h>
  9 | 
 10 | #include <dirent.h>
 11 | 
 12 | #include <boost/numeric/ublas/matrix.hpp>
 13 | #include <boost/numeric/ublas/io.hpp>
 14 | 
 15 | #include <boost/geometry.hpp>
 16 | #include <boost/geometry/geometries/point_xy.hpp>
 17 | #include <boost/geometry/geometries/polygon.hpp>
 18 | #include <boost/geometry/geometries/adapted/c_array.hpp>
 19 | 
 20 | #include "mail.h"
 21 | 
 22 | BOOST_GEOMETRY_REGISTER_C_ARRAY_CS(cs::cartesian)
 23 | 
 24 | typedef boost::geometry::model::polygon<boost::geometry::model::d2::point_xy<double> > Polygon;
 25 | 
 26 | 
 27 | using namespace std;
 28 | 
 29 | /*=======================================================================
 30 | STATIC EVALUATION PARAMETERS
 31 | =======================================================================*/
 32 | 
 33 | // holds the number of test images on the server
 34 | const int32_t N_TESTIMAGES = 7518;
 35 | 
 36 | // easy, moderate and hard evaluation level
 37 | enum DIFFICULTY{EASY=0, MODERATE=1, HARD=2};
 38 | 
 39 | // evaluation metrics: image, ground or 3D
 40 | enum METRIC{IMAGE=0, GROUND=1, BOX3D=2};
 41 | 
 42 | // evaluation parameter
 43 | const int32_t MIN_HEIGHT[3]     = {40, 25, 25};     // minimum height for evaluated groundtruth/detections
 44 | const int32_t MAX_OCCLUSION[3]  = {0, 1, 2};        // maximum occlusion level of the groundtruth used for evaluation
 45 | const double  MAX_TRUNCATION[3] = {0.15, 0.3, 0.5}; // maximum truncation level of the groundtruth used for evaluation
 46 | 
 47 | // evaluated object classes
 48 | enum CLASSES{CAR=0, PEDESTRIAN=1, CYCLIST=2};
 49 | const int NUM_CLASS = 3;
 50 | 
 51 | // parameters varying per class
 52 | vector<string> CLASS_NAMES;
 53 | // the minimum overlap required for 2D evaluation on the image/ground plane and 3D evaluation
 54 | const double MIN_OVERLAP[3][3] = {{0.7, 0.5, 0.5}, {0.5, 0.25, 0.25}, {0.5, 0.25, 0.25}};
 55 | 
 56 | // no. of recall steps that should be evaluated (discretized)
 57 | const double N_SAMPLE_PTS = 41;
 58 | 
 59 | 
 60 | // initialize class names
 61 | void initGlobals () {
 62 |   CLASS_NAMES.push_back("car");
 63 |   CLASS_NAMES.push_back("pedestrian");
 64 |   CLASS_NAMES.push_back("cyclist");
 65 | }
 66 | 
 67 | /*=======================================================================
 68 | DATA TYPES FOR EVALUATION
 69 | =======================================================================*/
 70 | 
 71 | // holding data needed for precision-recall and precision-aos
 72 | struct tPrData {
 73 |   vector<double> v;           // detection score for computing score thresholds
 74 |   double         similarity;  // orientation similarity
 75 |   int32_t        tp;          // true positives
 76 |   int32_t        fp;          // false positives
 77 |   int32_t        fn;          // false negatives
 78 |   tPrData () :
 79 |     similarity(0), tp(0), fp(0), fn(0) {}
 80 | };
 81 | 
 82 | // holding bounding boxes for ground truth and detections
 83 | struct tBox {
 84 |   string  type;     // object type as car, pedestrian or cyclist,...
 85 |   double   x1;      // left corner
 86 |   double   y1;      // top corner
 87 |   double   x2;      // right corner
 88 |   double   y2;      // bottom corner
 89 |   double   alpha;   // image orientation
 90 |   tBox (string type, double x1,double y1,double x2,double y2,double alpha) :
 91 |     type(type),x1(x1),y1(y1),x2(x2),y2(y2),alpha(alpha) {}
 92 | };
 93 | 
 94 | // holding ground truth data
 95 | struct tGroundtruth {
 96 |   tBox    box;        // object type, box, orientation
 97 |   double  truncation; // truncation 0..1
 98 |   int32_t occlusion;  // occlusion 0,1,2 (non, partly, fully)
 99 |   double ry;
100 |   double  t1, t2, t3;
101 |   double h, w, l;
102 |   tGroundtruth () :
103 |     box(tBox("invalild",-1,-1,-1,-1,-10)),truncation(-1),occlusion(-1) {}
104 |   tGroundtruth (tBox box,double truncation,int32_t occlusion) :
105 |     box(box),truncation(truncation),occlusion(occlusion) {}
106 |   tGroundtruth (string type,double x1,double y1,double x2,double y2,double alpha,double truncation,int32_t occlusion) :
107 |     box(tBox(type,x1,y1,x2,y2,alpha)),truncation(truncation),occlusion(occlusion) {}
108 | };
109 | 
110 | // holding detection data
111 | struct tDetection {
112 |   tBox    box;    // object type, box, orientation
113 |   double  thresh; // detection score
114 |   double  ry;
115 |   double  t1, t2, t3;
116 |   double  h, w, l;
117 |   tDetection ():
118 |     box(tBox("invalid",-1,-1,-1,-1,-10)),thresh(-1000) {}
119 |   tDetection (tBox box,double thresh) :
120 |     box(box),thresh(thresh) {}
121 |   tDetection (string type,double x1,double y1,double x2,double y2,double alpha,double thresh) :
122 |     box(tBox(type,x1,y1,x2,y2,alpha)),thresh(thresh) {}
123 | };
124 | 
125 | 
126 | /*=======================================================================
127 | FUNCTIONS TO LOAD DETECTION AND GROUND TRUTH DATA ONCE, SAVE RESULTS
128 | =======================================================================*/
129 | vector<int32_t> indices;
130 | 
131 | vector<tDetection> loadDetections(string file_name, bool &compute_aos,
132 |         vector<bool> &eval_image, vector<bool> &eval_ground,
133 |         vector<bool> &eval_3d, bool &success) {
134 | 
135 |   // holds all detections (ignored detections are indicated by an index vector
136 |   vector<tDetection> detections;
137 |   FILE *fp = fopen(file_name.c_str(),"r");
138 |   if (!fp) {
139 |     success = false;
140 |     return detections;
141 |   }
142 |   while (!feof(fp)) {
143 |     tDetection d;
144 |     double trash;
145 |     char str[255];
146 |     if (fscanf(fp, "%s %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf",
147 |                    str, &trash, &trash, &d.box.alpha, &d.box.x1, &d.box.y1,
148 |                    &d.box.x2, &d.box.y2, &d.h, &d.w, &d.l, &d.t1, &d.t2, &d.t3,
149 |                    &d.ry, &d.thresh)==16) {
150 | 
151 |         // d.thresh = 1;
152 |       d.box.type = str;
153 |       detections.push_back(d);
154 | 
155 |       // orientation=-10 is invalid, AOS is not evaluated if at least one orientation is invalid
156 |       if(d.box.alpha == -10)
157 |         compute_aos = false;
158 | 
159 |       // a class is only evaluated if it is detected at least once
160 |       for (int c = 0; c < NUM_CLASS; c++) {
161 |         if (!strcasecmp(d.box.type.c_str(), CLASS_NAMES[c].c_str())) {
162 |           if (!eval_image[c] && d.box.x1 >= 0)
163 |             eval_image[c] = true;
164 |           if (!eval_ground[c] && d.t1 != -1000)
165 |             eval_ground[c] = true;
166 |           if (!eval_3d[c] && d.t2 != -1000)
167 |             eval_3d[c] = true;
168 |           break;
169 |         }
170 |       }
171 |     }
172 |   }
173 |   fclose(fp);
174 |   success = true;
175 |   return detections;
176 | }
177 | 
178 | vector<tGroundtruth> loadGroundtruth(string file_name,bool &success) {
179 | 
180 |   // holds all ground truth (ignored ground truth is indicated by an index vector
181 |   vector<tGroundtruth> groundtruth;
182 |   FILE *fp = fopen(file_name.c_str(),"r");
183 |   if (!fp) {
184 |     success = false;
185 |     return groundtruth;
186 |   }
187 |   while (!feof(fp)) {
188 |     tGroundtruth g;
189 |     char str[255];
190 |     if (fscanf(fp, "%s %lf %d %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf",
191 |                    str, &g.truncation, &g.occlusion, &g.box.alpha,
192 |                    &g.box.x1,   &g.box.y1,     &g.box.x2,    &g.box.y2,
193 |                    &g.h,      &g.w,        &g.l,       &g.t1,
194 |                    &g.t2,      &g.t3,        &g.ry )==15) {
195 |       g.box.type = str;
196 |       groundtruth.push_back(g);
197 |     }
198 |   }
199 |   fclose(fp);
200 |   success = true;
201 |   return groundtruth;
202 | }
203 | 
204 | void saveStats (const vector<double> &precision, const vector<double> &aos, FILE *fp_det, FILE *fp_ori) {
205 | 
206 |   // save precision to file
207 |   if(precision.empty())
208 |     return;
209 |   for (int32_t i=0; i<precision.size(); i++)
210 |     fprintf(fp_det,"%f ",precision[i]);
211 |   fprintf(fp_det,"\n");
212 | 
213 |   // save orientation similarity, only if there were no invalid orientation entries in submission (alpha=-10)
214 |   if(aos.empty())
215 |     return;
216 |   for (int32_t i=0; i<aos.size(); i++)
217 |     fprintf(fp_ori,"%f ",aos[i]);
218 |   fprintf(fp_ori,"\n");
219 | }
220 | 
221 | /*=======================================================================
222 | EVALUATION HELPER FUNCTIONS
223 | =======================================================================*/
224 | 
225 | // criterion defines whether the overlap is computed with respect to both areas (ground truth and detection)
226 | // or with respect to box a or b (detection and "dontcare" areas)
227 | inline double imageBoxOverlap(tBox a, tBox b, int32_t criterion=-1){
228 | 
229 |   // overlap is invalid in the beginning
230 |   double o = -1;
231 | 
232 |   // get overlapping area
233 |   double x1 = max(a.x1, b.x1);
234 |   double y1 = max(a.y1, b.y1);
235 |   double x2 = min(a.x2, b.x2);
236 |   double y2 = min(a.y2, b.y2);
237 | 
238 |   // compute width and height of overlapping area
239 |   double w = x2-x1;
240 |   double h = y2-y1;
241 | 
242 |   // set invalid entries to 0 overlap
243 |   if(w<=0 || h<=0)
244 |     return 0;
245 | 
246 |   // get overlapping areas
247 |   double inter = w*h;
248 |   double a_area = (a.x2-a.x1) * (a.y2-a.y1);
249 |   double b_area = (b.x2-b.x1) * (b.y2-b.y1);
250 | 
251 |   // intersection over union overlap depending on users choice
252 |   if(criterion==-1)     // union
253 |     o = inter / (a_area+b_area-inter);
254 |   else if(criterion==0) // bbox_a
255 |     o = inter / a_area;
256 |   else if(criterion==1) // bbox_b
257 |     o = inter / b_area;
258 | 
259 |   // overlap
260 |   return o;
261 | }
262 | 
263 | inline double imageBoxOverlap(tDetection a, tGroundtruth b, int32_t criterion=-1){
264 |   return imageBoxOverlap(a.box, b.box, criterion);
265 | }
266 | 
267 | // compute polygon of an oriented bounding box
268 | template <typename T>
269 | Polygon toPolygon(const T& g) {
270 |     using namespace boost::numeric::ublas;
271 |     using namespace boost::geometry;
272 |     matrix<double> mref(2, 2);
273 |     mref(0, 0) = cos(g.ry); mref(0, 1) = sin(g.ry);
274 |     mref(1, 0) = -sin(g.ry); mref(1, 1) = cos(g.ry);
275 | 
276 |     static int count = 0;
277 |     matrix<double> corners(2, 4);
278 |     double data[] = {g.l / 2, g.l / 2, -g.l / 2, -g.l / 2,
279 |                      g.w / 2, -g.w / 2, -g.w / 2, g.w / 2};
280 |     std::copy(data, data + 8, corners.data().begin());
281 |     matrix<double> gc = prod(mref, corners);
282 |     for (int i = 0; i < 4; ++i) {
283 |         gc(0, i) += g.t1;
284 |         gc(1, i) += g.t3;
285 |     }
286 | 
287 |     double points[][2] = {{gc(0, 0), gc(1, 0)},{gc(0, 1), gc(1, 1)},{gc(0, 2), gc(1, 2)},{gc(0, 3), gc(1, 3)},{gc(0, 0), gc(1, 0)}};
288 |     Polygon poly;
289 |     append(poly, points);
290 |     return poly;
291 | }
292 | 
293 | // measure overlap between bird's eye view bounding boxes, parametrized by (ry, l, w, tx, tz)
294 | inline double groundBoxOverlap(tDetection d, tGroundtruth g, int32_t criterion = -1) {
295 |     using namespace boost::geometry;
296 |     Polygon gp = toPolygon(g);
297 |     Polygon dp = toPolygon(d);
298 | 
299 |     std::vector<Polygon> in, un;
300 |     intersection(gp, dp, in);
301 |     union_(gp, dp, un);
302 | 
303 |     double inter_area = in.empty() ? 0 : area(in.front());
304 |     double union_area = area(un.front());
305 |     double o;
306 |     if(criterion==-1)     // union
307 |         o = inter_area / union_area;
308 |     else if(criterion==0) // bbox_a
309 |         o = inter_area / area(dp);
310 |     else if(criterion==1) // bbox_b
311 |         o = inter_area / area(gp);
312 | 
313 |     return o;
314 | }
315 | 
316 | // measure overlap between 3D bounding boxes, parametrized by (ry, h, w, l, tx, ty, tz)
317 | inline double box3DOverlap(tDetection d, tGroundtruth g, int32_t criterion = -1) {
318 |     using namespace boost::geometry;
319 |     Polygon gp = toPolygon(g);
320 |     Polygon dp = toPolygon(d);
321 | 
322 |     std::vector<Polygon> in, un;
323 |     intersection(gp, dp, in);
324 |     union_(gp, dp, un);
325 | 
326 |     double ymax = min(d.t2, g.t2);
327 |     double ymin = max(d.t2 - d.h, g.t2 - g.h);
328 | 
329 |     double inter_area = in.empty() ? 0 : area(in.front());
330 |     double inter_vol = inter_area * max(0.0, ymax - ymin);
331 | 
332 |     double det_vol = d.h * d.l * d.w;
333 |     double gt_vol = g.h * g.l * g.w;
334 | 
335 |     double o;
336 |     if(criterion==-1)     // union
337 |         o = inter_vol / (det_vol + gt_vol - inter_vol);
338 |     else if(criterion==0) // bbox_a
339 |         o = inter_vol / det_vol;
340 |     else if(criterion==1) // bbox_b
341 |         o = inter_vol / gt_vol;
342 | 
343 |     return o;
344 | }
345 | 
346 | vector<double> getThresholds(vector<double> &v, double n_groundtruth){
347 | 
348 |   // holds scores needed to compute N_SAMPLE_PTS recall values
349 |   vector<double> t;
350 | 
351 |   // sort scores in descending order
352 |   // (highest score is assumed to give best/most confident detections)
353 |   sort(v.begin(), v.end(), greater<double>());
354 | 
355 |   // get scores for linearly spaced recall
356 |   double current_recall = 0;
357 |   for(int32_t i=0; i<v.size(); i++){
358 | 
359 |     // check if right-hand-side recall with respect to current recall is close than left-hand-side one
360 |     // in this case, skip the current detection score
361 |     double l_recall, r_recall, recall;
362 |     l_recall = (double)(i+1)/n_groundtruth;
363 |     if(i<(v.size()-1))
364 |       r_recall = (double)(i+2)/n_groundtruth;
365 |     else
366 |       r_recall = l_recall;
367 | 
368 |     if( (r_recall-current_recall) < (current_recall-l_recall) && i<(v.size()-1))
369 |       continue;
370 | 
371 |     // left recall is the best approximation, so use this and goto next recall step for approximation
372 |     recall = l_recall;
373 | 
374 |     // the next recall step was reached
375 |     t.push_back(v[i]);
376 |     current_recall += 1.0/(N_SAMPLE_PTS-1.0);
377 |   }
378 |   return t;
379 | }
380 | 
381 | void cleanData(CLASSES current_class, const vector<tGroundtruth> &gt, const vector<tDetection> &det, vector<int32_t> &ignored_gt, vector<tGroundtruth> &dc, vector<int32_t> &ignored_det, int32_t &n_gt, DIFFICULTY difficulty){
382 | 
383 |   // extract ground truth bounding boxes for current evaluation class
384 |   for(int32_t i=0;i<gt.size(); i++){
385 | 
386 |     // only bounding boxes with a minimum height are used for evaluation
387 |     double height = gt[i].box.y2 - gt[i].box.y1;
388 | 
389 |     // neighboring classes are ignored ("van" for "car" and "person_sitting" for "pedestrian")
390 |     // (lower/upper cases are ignored)
391 |     int32_t valid_class;
392 | 
393 |     // all classes without a neighboring class
394 |     if(!strcasecmp(gt[i].box.type.c_str(), CLASS_NAMES[current_class].c_str()))
395 |       valid_class = 1;
396 | 
397 |     // classes with a neighboring class
398 |     else if(!strcasecmp(CLASS_NAMES[current_class].c_str(), "Pedestrian") && !strcasecmp("Person_sitting", gt[i].box.type.c_str()))
399 |       valid_class = 0;
400 |     else if(!strcasecmp(CLASS_NAMES[current_class].c_str(), "Car") && !strcasecmp("Van", gt[i].box.type.c_str()))
401 |       valid_class = 0;
402 | 
403 |     // classes not used for evaluation
404 |     else
405 |       valid_class = -1;
406 | 
407 |     // ground truth is ignored, if occlusion, truncation exceeds the difficulty or ground truth is too small
408 |     // (doesn't count as FN nor TP, although detections may be assigned)
409 |     bool ignore = false;
410 |     if(gt[i].occlusion>MAX_OCCLUSION[difficulty] || gt[i].truncation>MAX_TRUNCATION[difficulty] || height<MIN_HEIGHT[difficulty])
411 |       ignore = true;
412 | 
413 |     // set ignored vector for ground truth
414 |     // current class and not ignored (total no. of ground truth is detected for recall denominator)
415 |     if(valid_class==1 && !ignore){
416 |       ignored_gt.push_back(0);
417 |       n_gt++;
418 |     }
419 | 
420 |     // neighboring class, or current class but ignored
421 |     else if(valid_class==0 || (ignore && valid_class==1))
422 |       ignored_gt.push_back(1);
423 | 
424 |     // all other classes which are FN in the evaluation
425 |     else
426 |       ignored_gt.push_back(-1);
427 |   }
428 | 
429 |   // extract dontcare areas
430 |   for(int32_t i=0;i<gt.size(); i++)
431 |     if(!strcasecmp("DontCare", gt[i].box.type.c_str()))
432 |       dc.push_back(gt[i]);
433 | 
434 |   // extract detections bounding boxes of the current class
435 |   for(int32_t i=0;i<det.size(); i++){
436 | 
437 |     // neighboring classes are not evaluated
438 |     int32_t valid_class;
439 |     if(!strcasecmp(det[i].box.type.c_str(), CLASS_NAMES[current_class].c_str()))
440 |       valid_class = 1;
441 |     else
442 |       valid_class = -1;
443 | 
444 |     int32_t height = fabs(det[i].box.y1 - det[i].box.y2);
445 |     // set ignored vector for detections
446 |     if(height<MIN_HEIGHT[difficulty])
447 |       ignored_det.push_back(1);
448 |     else if(valid_class==1)
449 |       ignored_det.push_back(0);
450 |     else
451 |       ignored_det.push_back(-1);
452 |   }
453 | }
454 | 
455 | tPrData computeStatistics(CLASSES current_class, const vector<tGroundtruth> &gt,
456 |         const vector<tDetection> &det, const vector<tGroundtruth> &dc,
457 |         const vector<int32_t> &ignored_gt, const vector<int32_t>  &ignored_det,
458 |         bool compute_fp, double (*boxoverlap)(tDetection, tGroundtruth, int32_t),
459 |         METRIC metric, bool compute_aos=false, double thresh=0, bool debug=false){
460 | 
461 |   tPrData stat = tPrData();
462 |   const double NO_DETECTION = -10000000;
463 |   vector<double> delta;            // holds angular difference for TPs (needed for AOS evaluation)
464 |   vector<bool> assigned_detection; // holds wether a detection was assigned to a valid or ignored ground truth
465 |   assigned_detection.assign(det.size(), false);
466 |   vector<bool> ignored_threshold;
467 |   ignored_threshold.assign(det.size(), false); // holds detections with a threshold lower than thresh if FP are computed
468 | 
469 |   // detections with a low score are ignored for computing precision (needs FP)
470 |   if(compute_fp)
471 |     for(int32_t i=0; i<det.size(); i++)
472 |       if(det[i].thresh<thresh)
473 |         ignored_threshold[i] = true;
474 | 
475 |   // evaluate all ground truth boxes
476 |   for(int32_t i=0; i<gt.size(); i++){
477 | 
478 |     // this ground truth is not of the current or a neighboring class and therefore ignored
479 |     if(ignored_gt[i]==-1)
480 |       continue;
481 | 
482 |     /*=======================================================================
483 |     find candidates (overlap with ground truth > 0.5) (logical len(det))
484 |     =======================================================================*/
485 |     int32_t det_idx          = -1;
486 |     double valid_detection = NO_DETECTION;
487 |     double max_overlap     = 0;
488 | 
489 |     // search for a possible detection
490 |     bool assigned_ignored_det = false;
491 |     for(int32_t j=0; j<det.size(); j++){
492 | 
493 |       // detections not of the current class, already assigned or with a low threshold are ignored
494 |       if(ignored_det[j]==-1)
495 |         continue;
496 |       if(assigned_detection[j])
497 |         continue;
498 |       if(ignored_threshold[j])
499 |         continue;
500 | 
501 |       // find the maximum score for the candidates and get idx of respective detection
502 |       double overlap = boxoverlap(det[j], gt[i], -1);
503 | 
504 |       // for computing recall thresholds, the candidate with highest score is considered
505 |       if(!compute_fp && overlap>MIN_OVERLAP[metric][current_class] && det[j].thresh>valid_detection){
506 |         det_idx         = j;
507 |         valid_detection = det[j].thresh;
508 |       }
509 | 
510 |       // for computing pr curve values, the candidate with the greatest overlap is considered
511 |       // if the greatest overlap is an ignored detection (min_height), the overlapping detection is used
512 |       else if(compute_fp && overlap>MIN_OVERLAP[metric][current_class] && (overlap>max_overlap || assigned_ignored_det) && ignored_det[j]==0){
513 |         max_overlap     = overlap;
514 |         det_idx         = j;
515 |         valid_detection = 1;
516 |         assigned_ignored_det = false;
517 |       }
518 |       else if(compute_fp && overlap>MIN_OVERLAP[metric][current_class] && valid_detection==NO_DETECTION && ignored_det[j]==1){
519 |         det_idx              = j;
520 |         valid_detection      = 1;
521 |         assigned_ignored_det = true;
522 |       }
523 |     }
524 | 
525 |     /*=======================================================================
526 |     compute TP, FP and FN
527 |     =======================================================================*/
528 | 
529 |     // nothing was assigned to this valid ground truth
530 |     if(valid_detection==NO_DETECTION && ignored_gt[i]==0) {
531 |       stat.fn++;
532 |     }
533 | 
534 |     // only evaluate valid ground truth <=> detection assignments (considering difficulty level)
535 |     else if(valid_detection!=NO_DETECTION && (ignored_gt[i]==1 || ignored_det[det_idx]==1))
536 |       assigned_detection[det_idx] = true;
537 | 
538 |     // found a valid true positive
539 |     else if(valid_detection!=NO_DETECTION){
540 | 
541 |       // write highest score to threshold vector
542 |       stat.tp++;
543 |       stat.v.push_back(det[det_idx].thresh);
544 | 
545 |       // compute angular difference of detection and ground truth if valid detection orientation was provided
546 |       if(compute_aos)
547 |         delta.push_back(gt[i].box.alpha - det[det_idx].box.alpha);
548 | 
549 |       // clean up
550 |       assigned_detection[det_idx] = true;
551 |     }
552 |   }
553 | 
554 |   // if FP are requested, consider stuff area
555 |   if(compute_fp){
556 | 
557 |     // count fp
558 |     for(int32_t i=0; i<det.size(); i++){
559 | 
560 |       // count false positives if required (height smaller than required is ignored (ignored_det==1)
561 |       if(!(assigned_detection[i] || ignored_det[i]==-1 || ignored_det[i]==1 || ignored_threshold[i]))
562 |         stat.fp++;
563 |     }
564 | 
565 |     // do not consider detections overlapping with stuff area
566 |     int32_t nstuff = 0;
567 |     for(int32_t i=0; i<dc.size(); i++){
568 |       for(int32_t j=0; j<det.size(); j++){
569 | 
570 |         // detections not of the current class, already assigned, with a low threshold or a low minimum height are ignored
571 |         if(assigned_detection[j])
572 |           continue;
573 |         if(ignored_det[j]==-1 || ignored_det[j]==1)
574 |           continue;
575 |         if(ignored_threshold[j])
576 |           continue;
577 | 
578 |         // compute overlap and assign to stuff area, if overlap exceeds class specific value
579 |         double overlap = boxoverlap(det[j], dc[i], 0);
580 |         if(overlap>MIN_OVERLAP[metric][current_class]){
581 |           assigned_detection[j] = true;
582 |           nstuff++;
583 |         }
584 |       }
585 |     }
586 | 
587 |     // FP = no. of all not to ground truth assigned detections - detections assigned to stuff areas
588 |     stat.fp -= nstuff;
589 | 
590 |     // if all orientation values are valid, the AOS is computed
591 |     if(compute_aos){
592 |       vector<double> tmp;
593 | 
594 |       // FP have a similarity of 0, for all TP compute AOS
595 |       tmp.assign(stat.fp, 0);
596 |       for(int32_t i=0; i<delta.size(); i++)
597 |         tmp.push_back((1.0+cos(delta[i]))/2.0);
598 | 
599 |       // be sure, that all orientation deltas are computed
600 |       assert(tmp.size()==stat.fp+stat.tp);
601 |       assert(delta.size()==stat.tp);
602 | 
603 |       // get the mean orientation similarity for this image
604 |       if(stat.tp>0 || stat.fp>0)
605 |         stat.similarity = accumulate(tmp.begin(), tmp.end(), 0.0);
606 | 
607 |       // there was neither a FP nor a TP, so the similarity is ignored in the evaluation
608 |       else
609 |         stat.similarity = -1;
610 |     }
611 |   }
612 |   return stat;
613 | }
614 | 
615 | /*=======================================================================
616 | EVALUATE CLASS-WISE
617 | =======================================================================*/
618 | 
619 | bool eval_class (FILE *fp_det, FILE *fp_ori, CLASSES current_class,
620 |         const vector< vector<tGroundtruth> > &groundtruth,
621 |         const vector< vector<tDetection> > &detections, bool compute_aos,
622 |         double (*boxoverlap)(tDetection, tGroundtruth, int32_t),
623 |         vector<double> &precision, vector<double> &aos,
624 |         DIFFICULTY difficulty, METRIC metric) {
625 |     assert(groundtruth.size() == detections.size());
626 | 
627 |   // init
628 |   int32_t n_gt=0;                                     // total no. of gt (denominator of recall)
629 |   vector<double> v, thresholds;                       // detection scores, evaluated for recall discretization
630 |   vector< vector<int32_t> > ignored_gt, ignored_det;  // index of ignored gt detection for current class/difficulty
631 |   vector< vector<tGroundtruth> > dontcare;            // index of dontcare areas, included in ground truth
632 | 
633 |   // for all test images do
634 |   for (int32_t i=0; i<groundtruth.size(); i++){
635 | 
636 |     // holds ignored ground truth, ignored detections and dontcare areas for current frame
637 |     vector<int32_t> i_gt, i_det;
638 |     vector<tGroundtruth> dc;
639 | 
640 |     // only evaluate objects of current class and ignore occluded, truncated objects
641 |     cleanData(current_class, groundtruth[i], detections[i], i_gt, dc, i_det, n_gt, difficulty);
642 |     ignored_gt.push_back(i_gt);
643 |     ignored_det.push_back(i_det);
644 |     dontcare.push_back(dc);
645 | 
646 |     // compute statistics to get recall values
647 |     tPrData pr_tmp = tPrData();
648 |     pr_tmp = computeStatistics(current_class, groundtruth[i], detections[i], dc, i_gt, i_det, false, boxoverlap, metric);
649 | 
650 |     // add detection scores to vector over all images
651 |     for(int32_t j=0; j<pr_tmp.v.size(); j++)
652 |       v.push_back(pr_tmp.v[j]);
653 |   }
654 | 
655 |   // get scores that must be evaluated for recall discretization
656 |   thresholds = getThresholds(v, n_gt);
657 | 
658 |   // compute TP,FP,FN for relevant scores
659 |   vector<tPrData> pr;
660 |   pr.assign(thresholds.size(),tPrData());
661 |   for (int32_t i=0; i<groundtruth.size(); i++){
662 | 
663 |     // for all scores/recall thresholds do:
664 |     for(int32_t t=0; t<thresholds.size(); t++){
665 |       tPrData tmp = tPrData();
666 |       tmp = computeStatistics(current_class, groundtruth[i], detections[i], dontcare[i],
667 |                               ignored_gt[i], ignored_det[i], true, boxoverlap, metric,
668 |                               compute_aos, thresholds[t], t==38);
669 | 
670 |       // add no. of TP, FP, FN, AOS for current frame to total evaluation for current threshold
671 |       pr[t].tp += tmp.tp;
672 |       pr[t].fp += tmp.fp;
673 |       pr[t].fn += tmp.fn;
674 |       if(tmp.similarity!=-1)
675 |         pr[t].similarity += tmp.similarity;
676 |     }
677 |   }
678 | 
679 |   // compute recall, precision and AOS
680 |   vector<double> recall;
681 |   precision.assign(N_SAMPLE_PTS, 0);
682 |   if(compute_aos)
683 |     aos.assign(N_SAMPLE_PTS, 0);
684 |   double r=0;
685 |   for (int32_t i=0; i<thresholds.size(); i++){
686 |     r = pr[i].tp/(double)(pr[i].tp + pr[i].fn);
687 |     recall.push_back(r);
688 |     precision[i] = pr[i].tp/(double)(pr[i].tp + pr[i].fp);
689 |     if(compute_aos)
690 |       aos[i] = pr[i].similarity/(double)(pr[i].tp + pr[i].fp);
691 |   }
692 | 
693 |   // filter precision and AOS using max_{i..end}(precision)
694 |   for (int32_t i=0; i<thresholds.size(); i++){
695 |     precision[i] = *max_element(precision.begin()+i, precision.end());
696 |     if(compute_aos)
697 |       aos[i] = *max_element(aos.begin()+i, aos.end());
698 |   }
699 | 
700 |   // save statisics and finish with success
701 |   saveStats(precision, aos, fp_det, fp_ori);
702 |     return true;
703 | }
704 | 
705 | void saveAndPlotPlots(string dir_name,string file_name,string obj_type,vector<double> vals[],bool is_aos){
706 | 
707 |   char command[1024];
708 | 
709 |   // save plot data to file
710 |   FILE *fp = fopen((dir_name + "/" + file_name + ".txt").c_str(),"w");
711 |   printf("save %s\n", (dir_name + "/" + file_name + ".txt").c_str());
712 |   for (int32_t i=0; i<(int)N_SAMPLE_PTS; i++)
713 |     fprintf(fp,"%f %f %f %f\n",(double)i/(N_SAMPLE_PTS-1.0),vals[0][i],vals[1][i],vals[2][i]);
714 |   fclose(fp);
715 | 
716 |   // create png + eps
717 |   for (int32_t j=0; j<2; j++) {
718 | 
719 |     // open file
720 |     FILE *fp = fopen((dir_name + "/" + file_name + ".gp").c_str(),"w");
721 | 
722 |     // save gnuplot instructions
723 |     if (j==0) {
724 |       fprintf(fp,"set term png size 450,315 font \"Helvetica\" 11\n");
725 |       fprintf(fp,"set output \"%s.png\"\n",file_name.c_str());
726 |     } else {
727 |       fprintf(fp,"set term postscript eps enhanced color font \"Helvetica\" 20\n");
728 |       fprintf(fp,"set output \"%s.eps\"\n",file_name.c_str());
729 |     }
730 | 
731 |     // set labels and ranges
732 |     fprintf(fp,"set size ratio 0.7\n");
733 |     fprintf(fp,"set xrange [0:1]\n");
734 |     fprintf(fp,"set yrange [0:1]\n");
735 |     fprintf(fp,"set xlabel \"Recall\"\n");
736 |     if (!is_aos) fprintf(fp,"set ylabel \"Precision\"\n");
737 |     else         fprintf(fp,"set ylabel \"Orientation Similarity\"\n");
738 |     obj_type[0] = toupper(obj_type[0]);
739 |     fprintf(fp,"set title \"%s\"\n",obj_type.c_str());
740 | 
741 |     // line width
742 |     int32_t   lw = 5;
743 |     if (j==0) lw = 3;
744 | 
745 |     // plot error curve
746 |     fprintf(fp,"plot ");
747 |     fprintf(fp,"\"%s.txt\" using 1:2 title 'Easy' with lines ls 1 lw %d,",file_name.c_str(),lw);
748 |     fprintf(fp,"\"%s.txt\" using 1:3 title 'Moderate' with lines ls 2 lw %d,",file_name.c_str(),lw);
749 |     fprintf(fp,"\"%s.txt\" using 1:4 title 'Hard' with lines ls 3 lw %d",file_name.c_str(),lw);
750 | 
751 |     // close file
752 |     fclose(fp);
753 | 
754 |     // run gnuplot => create png + eps
755 |     sprintf(command,"cd %s; gnuplot %s",dir_name.c_str(),(file_name + ".gp").c_str());
756 |     system(command);
757 |   }
758 | 
759 |   // create pdf and crop
760 |   sprintf(command,"cd %s; ps2pdf %s.eps %s_large.pdf",dir_name.c_str(),file_name.c_str(),file_name.c_str());
761 |   system(command);
762 |   sprintf(command,"cd %s; pdfcrop %s_large.pdf %s.pdf",dir_name.c_str(),file_name.c_str(),file_name.c_str());
763 |   system(command);
764 |   sprintf(command,"cd %s; rm %s_large.pdf",dir_name.c_str(),file_name.c_str());
765 |   system(command);
766 | }
767 | 
768 | bool eval(string result_sha,Mail* mail){
769 | 
770 |   // set some global parameters
771 |   initGlobals();
772 | 
773 |   // ground truth and result directories
774 |   string gt_dir         = "data/object/label_2";
775 |   string result_dir     = "results/" + result_sha;
776 |   string plot_dir       = result_dir + "/plot";
777 | 
778 |   // create output directories
779 |   system(("mkdir " + plot_dir).c_str());
780 | 
781 |   // hold detections and ground truth in memory
782 |   vector< vector<tGroundtruth> > groundtruth;
783 |   vector< vector<tDetection> >   detections;
784 | 
785 |   // holds wether orientation similarity shall be computed (might be set to false while loading detections)
786 |   // and which labels where provided by this submission
787 |   bool compute_aos=true;
788 |   vector<bool> eval_image(NUM_CLASS, false);
789 |   vector<bool> eval_ground(NUM_CLASS, false);
790 |   vector<bool> eval_3d(NUM_CLASS, false);
791 | 
792 |   // for all images read groundtruth and detections
793 |   mail->msg("Loading detections...");
794 |   for (int32_t i=0; i<N_TESTIMAGES; i++) {
795 | 
796 |     // file name
797 |     char file_name[256];
798 |     sprintf(file_name,"%06d.txt",indices.at(i));
799 | 
800 |     // read ground truth and result poses
801 |     bool gt_success,det_success;
802 |     vector<tGroundtruth> gt   = loadGroundtruth(gt_dir + "/" + file_name,gt_success);
803 |     vector<tDetection>   det  = loadDetections(result_dir + "/data/" + file_name,
804 |             compute_aos, eval_image, eval_ground, eval_3d, det_success);
805 |     groundtruth.push_back(gt);
806 |     detections.push_back(det);
807 | 
808 |     // check for errors
809 |     if (!gt_success) {
810 |       mail->msg("ERROR: Couldn't read: %s of ground truth. Please write me an email!", file_name);
811 |       return false;
812 |     }
813 |     if (!det_success) {
814 |       mail->msg("ERROR: Couldn't read: %s", file_name);
815 |       return false;
816 |     }
817 |   }
818 |   mail->msg("  done.");
819 | 
820 |   // holds pointers for result files
821 |   FILE *fp_det=0, *fp_ori=0;
822 | 
823 |   // eval image 2D bounding boxes
824 |   for (int c = 0; c < NUM_CLASS; c++) {
825 |     CLASSES cls = (CLASSES)c;
826 |     if (eval_image[c]) {
827 |       fp_det = fopen((result_dir + "/stats_" + CLASS_NAMES[c] + "_detection.txt").c_str(), "w");
828 |       if(compute_aos)
829 |         fp_ori = fopen((result_dir + "/stats_" + CLASS_NAMES[c] + "_orientation.txt").c_str(),"w");
830 |       vector<double> precision[3], aos[3];
831 |       if(   !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, imageBoxOverlap, precision[0], aos[0], EASY, IMAGE)
832 |          || !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, imageBoxOverlap, precision[1], aos[1], MODERATE, IMAGE)
833 |          || !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, imageBoxOverlap, precision[2], aos[2], HARD, IMAGE)) {
834 |         mail->msg("%s evaluation failed.", CLASS_NAMES[c].c_str());
835 |         return false;
836 |       }
837 |       fclose(fp_det);
838 |       saveAndPlotPlots(plot_dir, CLASS_NAMES[c] + "_detection", CLASS_NAMES[c], precision, 0);
839 |       if(compute_aos){
840 |         saveAndPlotPlots(plot_dir, CLASS_NAMES[c] + "_orientation", CLASS_NAMES[c], aos, 1);
841 |         fclose(fp_ori);
842 |       }
843 |     }
844 |   }
845 | 
846 |   // don't evaluate AOS for birdview boxes and 3D boxes
847 |   compute_aos = false;
848 | 
849 |   // eval bird's eye view bounding boxes
850 |   for (int c = 0; c < NUM_CLASS; c++) {
851 |     CLASSES cls = (CLASSES)c;
852 |     if (eval_ground[c]) {
853 |       fp_det = fopen((result_dir + "/stats_" + CLASS_NAMES[c] + "_detection_ground.txt").c_str(), "w");
854 |       vector<double> precision[3], aos[3];
855 |       if(   !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, groundBoxOverlap, precision[0], aos[0], EASY, GROUND)
856 |          || !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, groundBoxOverlap, precision[1], aos[1], MODERATE, GROUND)
857 |          || !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, groundBoxOverlap, precision[2], aos[2], HARD, GROUND)) {
858 |         mail->msg("%s evaluation failed.", CLASS_NAMES[c].c_str());
859 |         return false;
860 |       }
861 |       fclose(fp_det);
862 |       saveAndPlotPlots(plot_dir, CLASS_NAMES[c] + "_detection_ground", CLASS_NAMES[c], precision, 0);
863 |     }
864 |   }
865 | 
866 |   // eval 3D bounding boxes
867 |   for (int c = 0; c < NUM_CLASS; c++) {
868 |     CLASSES cls = (CLASSES)c;
869 |     if (eval_3d[c]) {
870 |       fp_det = fopen((result_dir + "/stats_" + CLASS_NAMES[c] + "_detection_3d.txt").c_str(), "w");
871 |       vector<double> precision[3], aos[3];
872 |       if(   !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, box3DOverlap, precision[0], aos[0], EASY, BOX3D)
873 |          || !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, box3DOverlap, precision[1], aos[1], MODERATE, BOX3D)
874 |          || !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, box3DOverlap, precision[2], aos[2], HARD, BOX3D)) {
875 |         mail->msg("%s evaluation failed.", CLASS_NAMES[c].c_str());
876 |         return false;
877 |       }
878 |       fclose(fp_det);
879 |       saveAndPlotPlots(plot_dir, CLASS_NAMES[c] + "_detection_3d", CLASS_NAMES[c], precision, 0);
880 |     }
881 |   }
882 | 
883 |   // success
884 |   return true;
885 | }
886 | 
887 | int32_t main (int32_t argc,char *argv[]) {
888 | 
889 |   // we need 2 or 4 arguments!
890 |   if (argc!=2 && argc!=4) {
891 |     cout << "Usage: ./eval_detection result_sha [user_sha email]" << endl;
892 |     return 1;
893 |   }
894 | 
895 |   // read arguments
896 |   string result_sha = argv[1];
897 | 
898 |   // init notification mail
899 |   Mail *mail;
900 |   if (argc==4) mail = new Mail(argv[3]);
901 |   else         mail = new Mail();
902 |   mail->msg("Thank you for participating in our evaluation!");
903 | 
904 |   // run evaluation
905 |   if (eval(result_sha,mail)) {
906 |     mail->msg("Your evaluation results are available at:");
907 |     mail->msg("http://www.cvlibs.net/datasets/kitti/user_submit_check_login.php?benchmark=object&user=%s&result=%s",argv[2], result_sha.c_str());
908 |   } else {
909 |     system(("rm -r results/" + result_sha).c_str());
910 |     mail->msg("An error occured while processing your results.");
911 |     mail->msg("Please make sure that the data in your zip archive has the right format!");
912 |   }
913 | 
914 |   // send mail and exit
915 |   delete mail;
916 | 
917 |   return 0;
918 | }
919 | 
920 | 
921 | 


--------------------------------------------------------------------------------
/utils/utils.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # -*- cooing:UTF-8 -*-
  3 | 
  4 | import cv2
  5 | cv2.setNumThreads(0)
  6 | 
  7 | import math
  8 | 
  9 | from config import cfg
 10 | from utils.box_overlaps import *
 11 | 
 12 | import pdb
 13 | 
 14 | 
 15 | #Util function to load calib matrices
 16 | CAM = 2
 17 | def load_calib(calib_dir):
 18 |     # P2 * R0_rect * Tr_velo_to_cam * y
 19 |     lines = open(calib_dir).readlines()
 20 |     lines = [line.split()[1:] for line in lines][:-1]
 21 | 
 22 |     P = np.array(lines[CAM]).reshape(3, 4)
 23 |     P = np.concatenate((P, np.array([[0, 0, 0, 0]])), 0)
 24 | 
 25 |     Tr_velo_to_cam = np.array(lines[5]).reshape(3, 4)
 26 |     Tr_velo_to_cam = np.concatenate([Tr_velo_to_cam, np.array([0, 0, 0, 1]).reshape(1, 4)], 0)
 27 | 
 28 |     R_cam_to_rect = np.eye(4)
 29 |     R_cam_to_rect[:3, :3] = np.array(lines[4][:9]).reshape(3, 3)
 30 | 
 31 |     P = P.astype('float32')
 32 |     Tr_velo_to_cam = Tr_velo_to_cam.astype('float32')
 33 |     R_cam_to_rect = R_cam_to_rect.astype('float32')
 34 | 
 35 |     return P, Tr_velo_to_cam, R_cam_to_rect
 36 | 
 37 | 
 38 | def lidar_to_bird_view(x, y, factor = 1):
 39 |     # using the cfg.INPUT_XXX
 40 |     a = (x - cfg.X_MIN) / cfg.VOXEL_X_SIZE * factor
 41 |     b = (y - cfg.Y_MIN) / cfg.VOXEL_Y_SIZE * factor
 42 |     a = np.clip(a, a_max = (cfg.X_MAX - cfg.X_MIN) / cfg.VOXEL_X_SIZE * factor, a_min = 0)
 43 |     b = np.clip(b, a_max = (cfg.Y_MAX - cfg.Y_MIN) / cfg.VOXEL_Y_SIZE * factor, a_min = 0)
 44 | 
 45 |     return a, b
 46 | 
 47 | def batch_lidar_to_bird_view(points, factor = 1):
 48 |     # Input:
 49 |     #   points (N, 2)
 50 |     # Outputs:
 51 |     #   points (N, 2)
 52 |     # using the cfg.INPUT_XXX
 53 |     a = (points[:, 0] - cfg.X_MIN) / cfg.VOXEL_X_SIZE * factor
 54 |     b = (points[:, 1] - cfg.Y_MIN) / cfg.VOXEL_Y_SIZE * factor
 55 |     a = np.clip(a, a_max = (cfg.X_MAX - cfg.X_MIN) / cfg.VOXEL_X_SIZE * factor, a_min = 0)
 56 |     b = np.clip(b, a_max = (cfg.Y_MAX - cfg.Y_MIN) / cfg.VOXEL_Y_SIZE * factor, a_min = 0)
 57 | 
 58 |     return np.concatenate([a[:, np.newaxis], b[:, np.newaxis]], axis = -1)
 59 | 
 60 | 
 61 | def angle_in_limit(angle):
 62 |     # To limit the angle in -pi/2 - pi/2
 63 |     limit_degree = 5
 64 |     while angle >= np.pi / 2:
 65 |         angle -= np.pi
 66 |     while angle < -np.pi / 2:
 67 |         angle += np.pi
 68 |     if abs(angle + np.pi / 2) < limit_degree / 180 * np.pi:
 69 |         angle = np.pi / 2
 70 | 
 71 |     return angle
 72 | 
 73 | 
 74 | def camera_to_lidar(x, y, z, T_VELO_2_CAM = None, R_RECT_0 = None):
 75 |     if type(T_VELO_2_CAM) == type(None):
 76 |         T_VELO_2_CAM = np.array(cfg.MATRIX_T_VELO_2_CAM)
 77 |     
 78 |     if type(R_RECT_0) == type(None):
 79 |         R_RECT_0 = np.array(cfg.MATRIX_R_RECT_0)
 80 | 
 81 |     p = np.array([x, y, z, 1])
 82 |     p = np.matmul(np.linalg.inv(R_RECT_0), p)
 83 |     p = np.matmul(np.linalg.inv(T_VELO_2_CAM), p)
 84 |     p = p[0 : 3]
 85 | 
 86 |     return tuple(p)
 87 | 
 88 | 
 89 | def lidar_to_camera(x, y, z, T_VELO_2_CAM = None, R_RECT_0 = None):
 90 |     if type(T_VELO_2_CAM) == type(None):
 91 |         T_VELO_2_CAM = np.array(cfg.MATRIX_T_VELO_2_CAM)
 92 |     
 93 |     if type(R_RECT_0) == type(None):
 94 |         R_RECT_0 = np.array(cfg.MATRIX_R_RECT_0)
 95 | 
 96 |     p = np.array([x, y, z, 1])
 97 |     p = np.matmul(T_VELO_2_CAM, p)
 98 |     p = np.matmul(R_RECT_0, p)
 99 |     p = p[0:3]
100 | 
101 |     return tuple(p)
102 | 
103 | 
104 | def camera_to_lidar_point(points, T_VELO_2_CAM = None, R_RECT_0 = None):
105 |     # (N, 3) -> (N, 3)
106 |     N = points.shape[0]
107 |     points = np.hstack([points, np.ones((N, 1))]).T  # (N,4) -> (4,N)
108 | 
109 |     if type(T_VELO_2_CAM) == type(None):
110 |         T_VELO_2_CAM = np.array(cfg.MATRIX_T_VELO_2_CAM)
111 |     
112 |     if type(R_RECT_0) == type(None):
113 |         R_RECT_0 = np.array(cfg.MATRIX_R_RECT_0)
114 | 
115 |     points = np.matmul(np.linalg.inv(R_RECT_0), points)
116 |     points = np.matmul(np.linalg.inv(T_VELO_2_CAM), points).T  # (4, N) -> (N, 4)
117 |     points = points[:, 0:3]
118 | 
119 |     return points.reshape(-1, 3)
120 | 
121 | 
122 | def lidar_to_camera_point(points, T_VELO_2_CAM = None, R_RECT_0 = None):
123 |     # (N, 3) -> (N, 3)
124 |     N = points.shape[0]
125 |     points = np.hstack([points, np.ones((N, 1))]).T
126 |     
127 |     
128 |     if type(T_VELO_2_CAM) == type(None):
129 |         T_VELO_2_CAM = np.array(cfg.MATRIX_T_VELO_2_CAM)
130 |     
131 |     if type(R_RECT_0) == type(None):
132 |         R_RECT_0 = np.array(cfg.MATRIX_R_RECT_0)
133 | 
134 |     points = np.matmul(T_VELO_2_CAM, points)
135 |     points = np.matmul(R_RECT_0, points).T
136 |     points = points[:, 0:3]
137 | 
138 |     return points.reshape(-1, 3)
139 | 
140 | 
141 | def camera_to_lidar_box(boxes, T_VELO_2_CAM = None, R_RECT_0 = None):
142 |     # (N, 7) -> (N, 7) x,y,z,h,w,l,r
143 |     ret = []
144 |     for box in boxes:
145 |         x, y, z, h, w, l, ry = box
146 |         (x, y, z), h, w, l, rz = camera_to_lidar(x, y, z, T_VELO_2_CAM, R_RECT_0), h, w, l, -ry - np.pi / 2
147 |         rz = angle_in_limit(rz)
148 |         ret.append([x, y, z, h, w, l, rz])
149 | 
150 |     return np.array(ret).reshape(-1, 7)
151 | 
152 | 
153 | def lidar_to_camera_box(boxes, T_VELO_2_CAM = None, R_RECT_0 = None):
154 |     # (N, 7) -> (N, 7) x,y,z,h,w,l,r
155 |     ret = []
156 |     for box in boxes:
157 |         x, y, z, h, w, l, rz = box
158 |         (x, y, z), h, w, l, ry = lidar_to_camera(
159 |             x, y, z, T_VELO_2_CAM, R_RECT_0), h, w, l, -rz - np.pi / 2
160 |         ry = angle_in_limit(ry)
161 |         ret.append([x, y, z, h, w, l, ry])
162 | 
163 |     return np.array(ret).reshape(-1, 7)
164 | 
165 | 
166 | def center_to_corner_box2d(boxes_center, coordinate = 'lidar', T_VELO_2_CAM = None, R_RECT_0 = None):
167 |     # (N, 5) -> (N, 4, 2)
168 |     N = boxes_center.shape[0]
169 |     boxes3d_center = np.zeros((N, 7))
170 |     boxes3d_center[:, [0, 1, 4, 5, 6]] = boxes_center
171 |     boxes3d_corner = center_to_corner_box3d(
172 |         boxes3d_center, coordinate = coordinate, T_VELO_2_CAM = T_VELO_2_CAM, R_RECT_0 = R_RECT_0)
173 | 
174 |     return boxes3d_corner[:, 0:4, 0:2]
175 | 
176 | 
177 | def center_to_corner_box3d(boxes_center, coordinate = 'lidar', T_VELO_2_CAM = None, R_RECT_0 = None):
178 |     # (N, 7) -> (N, 8, 3)
179 |     N = boxes_center.shape[0]
180 |     ret = np.zeros((N, 8, 3), dtype = np.float32)
181 | 
182 |     if coordinate == 'camera':
183 |         boxes_center = camera_to_lidar_box(boxes_center, T_VELO_2_CAM, R_RECT_0)
184 | 
185 |     for i in range(N):
186 |         box = boxes_center[i]
187 |         translation = box[0:3]
188 |         size = box[3:6]
189 |         rotation = [0, 0, box[-1]]
190 | 
191 |         h, w, l = size[0], size[1], size[2]
192 |         trackletBox = np.array([  # in velodyne coordinates around zero point and without orientation yet
193 |             [-l / 2, -l / 2, l / 2, l / 2, -l / 2, -l / 2, l / 2, l / 2], \
194 |             [w / 2, -w / 2, -w / 2, w / 2, w / 2, -w / 2, -w / 2, w / 2], \
195 |             [0, 0, 0, 0, h, h, h, h]])
196 | 
197 |         # re-create 3D bounding box in velodyne coordinate system
198 |         yaw = rotation[2]
199 |         rotMat = np.array([
200 |             [np.cos(yaw), -np.sin(yaw), 0.0],
201 |             [np.sin(yaw), np.cos(yaw), 0.0],
202 |             [0.0, 0.0, 1.0]])
203 |         cornerPosInVelo = np.dot(rotMat, trackletBox) + \
204 |             np.tile(translation, (8, 1)).T
205 |         box3d = cornerPosInVelo.transpose()
206 |         ret[i] = box3d
207 | 
208 |     if coordinate == 'camera':
209 |         for idx in range(len(ret)):
210 |             ret[idx] = lidar_to_camera_point(ret[idx], T_VELO_2_CAM, R_RECT_0)
211 | 
212 |     return ret
213 | 
214 | 
215 | def corner_to_center_box2d(boxes_corner, coordinate='lidar', T_VELO_2_CAM = None, R_RECT_0 = None):
216 |     # (N, 4, 2) -> (N, 5); x,y,w,l,r
217 |     N = boxes_corner.shape[0]
218 |     boxes3d_corner = np.zeros((N, 8, 3))
219 |     boxes3d_corner[:, 0:4, 0:2] = boxes_corner
220 |     boxes3d_corner[:, 4:8, 0:2] = boxes_corner
221 |     boxes3d_center = corner_to_center_box3d(
222 |         boxes3d_corner, coordinate = coordinate, T_VELO_2_CAM = T_VELO_2_CAM, R_RECT_0 = R_RECT_0)
223 | 
224 |     return boxes3d_center[:, [0, 1, 4, 5, 6]]
225 | 
226 | 
227 | def corner_to_standup_box2d(boxes_corner):
228 |     # (N, 4, 2) -> (N, 4); x1, y1, x2, y2
229 |     N = boxes_corner.shape[0]
230 |     standup_boxes2d = np.zeros((N, 4))
231 |     standup_boxes2d[:, 0] = np.min(boxes_corner[:, :, 0], axis = 1)
232 |     standup_boxes2d[:, 1] = np.min(boxes_corner[:, :, 1], axis = 1)
233 |     standup_boxes2d[:, 2] = np.max(boxes_corner[:, :, 0], axis = 1)
234 |     standup_boxes2d[:, 3] = np.max(boxes_corner[:, :, 1], axis = 1)
235 | 
236 |     return standup_boxes2d
237 | 
238 | 
239 | # TODO: 0/90 may be not correct
240 | def anchor_to_standup_box2d(anchors):
241 |     # (N, 4) -> (N, 4); x,y,w,l -> x1,y1,x2,y2
242 |     anchor_standup = np.zeros_like(anchors)
243 |     # r == 0
244 |     anchor_standup[::2, 0] = anchors[::2, 0] - anchors[::2, 3] / 2
245 |     anchor_standup[::2, 1] = anchors[::2, 1] - anchors[::2, 2] / 2
246 |     anchor_standup[::2, 2] = anchors[::2, 0] + anchors[::2, 3] / 2
247 |     anchor_standup[::2, 3] = anchors[::2, 1] + anchors[::2, 2] / 2
248 |     # r == pi/2
249 |     anchor_standup[1::2, 0] = anchors[1::2, 0] - anchors[1::2, 2] / 2
250 |     anchor_standup[1::2, 1] = anchors[1::2, 1] - anchors[1::2, 3] / 2
251 |     anchor_standup[1::2, 2] = anchors[1::2, 0] + anchors[1::2, 2] / 2
252 |     anchor_standup[1::2, 3] = anchors[1::2, 1] + anchors[1::2, 3] / 2
253 | 
254 |     return anchor_standup
255 | 
256 | 
257 | def corner_to_center_box3d(boxes_corner, coordinate='camera', T_VELO_2_CAM = None, R_RECT_0 = None):
258 |     # (N, 8, 3) -> (N, 7); x,y,z,h,w,l,ry/z
259 |     if coordinate == 'lidar':
260 |         for idx in range(len(boxes_corner)):
261 |             boxes_corner[idx] = lidar_to_camera_point(boxes_corner[idx], T_VELO_2_CAM, R_RECT_0)
262 |     ret = []
263 |     for roi in boxes_corner:
264 |         if cfg.CORNER2CENTER_AVG:  # average version
265 |             roi = np.array(roi)
266 |             h = abs(np.sum(roi[:4, 1] - roi[4:, 1]) / 4)
267 |             w = np.sum(
268 |                 np.sqrt(np.sum((roi[0, [0, 2]] - roi[3, [0, 2]])**2)) +
269 |                 np.sqrt(np.sum((roi[1, [0, 2]] - roi[2, [0, 2]])**2)) +
270 |                 np.sqrt(np.sum((roi[4, [0, 2]] - roi[7, [0, 2]])**2)) +
271 |                 np.sqrt(np.sum((roi[5, [0, 2]] - roi[6, [0, 2]])**2))
272 |             ) / 4
273 |             l = np.sum(
274 |                 np.sqrt(np.sum((roi[0, [0, 2]] - roi[1, [0, 2]])**2)) +
275 |                 np.sqrt(np.sum((roi[2, [0, 2]] - roi[3, [0, 2]])**2)) +
276 |                 np.sqrt(np.sum((roi[4, [0, 2]] - roi[5, [0, 2]])**2)) +
277 |                 np.sqrt(np.sum((roi[6, [0, 2]] - roi[7, [0, 2]])**2))
278 |             ) / 4
279 |             x = np.sum(roi[:, 0], axis = 0)/ 8
280 |             y = np.sum(roi[0:4, 1], axis = 0)/ 4
281 |             z = np.sum(roi[:, 2], axis = 0)/ 8
282 |             ry = np.sum(
283 |                 math.atan2(roi[2, 0] - roi[1, 0], roi[2, 2] - roi[1, 2]) +
284 |                 math.atan2(roi[6, 0] - roi[5, 0], roi[6, 2] - roi[5, 2]) +
285 |                 math.atan2(roi[3, 0] - roi[0, 0], roi[3, 2] - roi[0, 2]) +
286 |                 math.atan2(roi[7, 0] - roi[4, 0], roi[7, 2] - roi[4, 2]) +
287 |                 math.atan2(roi[0, 2] - roi[1, 2], roi[1, 0] - roi[0, 0]) +
288 |                 math.atan2(roi[4, 2] - roi[5, 2], roi[5, 0] - roi[4, 0]) +
289 |                 math.atan2(roi[3, 2] - roi[2, 2], roi[2, 0] - roi[3, 0]) +
290 |                 math.atan2(roi[7, 2] - roi[6, 2], roi[6, 0] - roi[7, 0])
291 |             ) / 8
292 |             if w > l:
293 |                 w, l = l, w
294 |                 ry = angle_in_limit(ry + np.pi / 2)
295 |         else:  # max version
296 |             h = max(abs(roi[:4, 1] - roi[4:, 1]))
297 |             w = np.max(
298 |                 np.sqrt(np.sum((roi[0, [0, 2]] - roi[3, [0, 2]])**2)) +
299 |                 np.sqrt(np.sum((roi[1, [0, 2]] - roi[2, [0, 2]])**2)) +
300 |                 np.sqrt(np.sum((roi[4, [0, 2]] - roi[7, [0, 2]])**2)) +
301 |                 np.sqrt(np.sum((roi[5, [0, 2]] - roi[6, [0, 2]])**2))
302 |             )
303 |             l = np.max(
304 |                 np.sqrt(np.sum((roi[0, [0, 2]] - roi[1, [0, 2]])**2)) +
305 |                 np.sqrt(np.sum((roi[2, [0, 2]] - roi[3, [0, 2]])**2)) +
306 |                 np.sqrt(np.sum((roi[4, [0, 2]] - roi[5, [0, 2]])**2)) +
307 |                 np.sqrt(np.sum((roi[6, [0, 2]] - roi[7, [0, 2]])**2))
308 |             )
309 |             x = np.sum(roi[:, 0], axis = 0)/ 8
310 |             y = np.sum(roi[0:4, 1], axis = 0)/ 4
311 |             z = np.sum(roi[:, 2], axis = 0)/ 8
312 |             ry = np.sum(
313 |                 math.atan2(roi[2, 0] - roi[1, 0], roi[2, 2] - roi[1, 2]) +
314 |                 math.atan2(roi[6, 0] - roi[5, 0], roi[6, 2] - roi[5, 2]) +
315 |                 math.atan2(roi[3, 0] - roi[0, 0], roi[3, 2] - roi[0, 2]) +
316 |                 math.atan2(roi[7, 0] - roi[4, 0], roi[7, 2] - roi[4, 2]) +
317 |                 math.atan2(roi[0, 2] - roi[1, 2], roi[1, 0] - roi[0, 0]) +
318 |                 math.atan2(roi[4, 2] - roi[5, 2], roi[5, 0] - roi[4, 0]) +
319 |                 math.atan2(roi[3, 2] - roi[2, 2], roi[2, 0] - roi[3, 0]) +
320 |                 math.atan2(roi[7, 2] - roi[6, 2], roi[6, 0] - roi[7, 0])
321 |             ) / 8
322 |             if w > l:
323 |                 w, l = l, w
324 |                 ry = angle_in_limit(ry + np.pi / 2)
325 |         ret.append([x, y, z, h, w, l, ry])
326 |     if coordinate == 'lidar':
327 |         ret = camera_to_lidar_box(np.array(ret), T_VELO_2_CAM, R_RECT_0)
328 | 
329 |     return np.array(ret)
330 | 
331 | 
332 | # This just for visulize and testing
333 | def lidar_box3d_to_camera_box(boxes3d, cal_projection = False, P2 = None, T_VELO_2_CAM = None, R_RECT_0 = None):
334 |     # (N, 7) -> (N, 4)/(N, 8, 2); x,y,z,h,w,l,rz -> x1,y1,x2,y2/8*(x, y)
335 |     num = len(boxes3d)
336 |     boxes2d = np.zeros((num, 4), dtype = np.int32)
337 |     projections = np.zeros((num, 8, 2), dtype = np.float32)
338 | 
339 |     lidar_boxes3d_corner = center_to_corner_box3d(boxes3d, coordinate = 'lidar', T_VELO_2_CAM = T_VELO_2_CAM, R_RECT_0 = R_RECT_0)
340 |     if type(P2) == type(None):
341 |         P2 = np.array(cfg.MATRIX_P2)
342 | 
343 |     for n in range(num):
344 |         box3d = lidar_boxes3d_corner[n]
345 |         box3d = lidar_to_camera_point(box3d, T_VELO_2_CAM, R_RECT_0)
346 |         points = np.hstack((box3d, np.ones((8, 1)))).T  # (8, 4) -> (4, 8)
347 |         points = np.matmul(P2, points).T
348 | 
349 |         points = np.nan_to_num(points)
350 | 
351 |         points[:, 0] /= points[:, 2]
352 |         points[:, 1] /= points[:, 2]
353 | 
354 |         projections[n] = points[:, 0:2]
355 |         minx = 0 if np.isnan(np.min(points[:, 0])) else int(np.min(points[:, 0]))
356 |         maxx = 0 if np.isnan(np.max(points[:, 0])) else int(np.max(points[:, 0]))
357 |         miny = 0 if np.isnan(np.min(points[:, 1])) else int(np.min(points[:, 1]))
358 |         maxy = 0 if np.isnan(np.max(points[:, 1])) else int(np.max(points[:, 1]))
359 | 
360 |         boxes2d[n, :] = minx, miny, maxx, maxy
361 | 
362 |     return projections if cal_projection else boxes2d
363 | 
364 | 
365 | def lidar_to_bird_view_img(lidar, factor = 1):
366 |     # Input:
367 |     #   lidar: (N', 4)
368 |     # Output:
369 |     #   birdview: (w, l, 3)
370 |     birdview = np.zeros(
371 |         (cfg.INPUT_HEIGHT * factor, cfg.INPUT_WIDTH * factor, 1))
372 |     for point in lidar:
373 |         x, y = point[0:2]
374 |         if cfg.X_MIN < x < cfg.X_MAX and cfg.Y_MIN < y < cfg.Y_MAX:
375 |             x, y = int((x - cfg.X_MIN) / cfg.VOXEL_X_SIZE *
376 |                        factor), int((y - cfg.Y_MIN) / cfg.VOXEL_Y_SIZE * factor)
377 |             birdview[y, x] += 1
378 |     birdview = birdview - np.min(birdview)
379 |     divisor = np.max(birdview) - np.min(birdview)
380 |     # TODO: adjust this factor
381 |     birdview = np.clip((birdview / divisor * 255) *
382 |                        5 * factor, a_min = 0, a_max = 255)
383 |     birdview = np.tile(birdview, 3).astype(np.uint8)
384 | 
385 |     return birdview
386 | 
387 | 
388 | def draw_lidar_box3d_on_image(img, boxes3d, scores, gt_boxes3d = np.array([]), color = (0, 255, 255),
389 |                               gt_color = (255, 0, 255), thickness = 1, P2 = None, T_VELO_2_CAM = None, R_RECT_0 = None):
390 |     # Input:
391 |     #   img: (h, w, 3)
392 |     #   boxes3d (N, 7) [x, y, z, h, w, l, r]
393 |     #   scores
394 |     #   gt_boxes3d (N, 7) [x, y, z, h, w, l, r]
395 |     img = img.copy()
396 |     projections = lidar_box3d_to_camera_box(boxes3d, cal_projection = True, P2 = P2, T_VELO_2_CAM = T_VELO_2_CAM, R_RECT_0 = R_RECT_0)
397 |     gt_projections = lidar_box3d_to_camera_box(gt_boxes3d, cal_projection = True, P2 = P2, T_VELO_2_CAM = T_VELO_2_CAM, R_RECT_0 = R_RECT_0)
398 | 
399 |     # Draw projections
400 |     for qs in projections:
401 |         for k in range(0, 4):
402 |             i, j = k, (k + 1) % 4
403 |             cv2.line(img, (qs[i, 0], qs[i, 1]), (qs[j, 0],
404 |                                                  qs[j, 1]), color, thickness, cv2.LINE_AA)
405 | 
406 |             i, j = k + 4, (k + 1) % 4 + 4
407 |             cv2.line(img, (qs[i, 0], qs[i, 1]), (qs[j, 0],
408 |                                                  qs[j, 1]), color, thickness, cv2.LINE_AA)
409 | 
410 |             i, j = k, k + 4
411 |             cv2.line(img, (qs[i, 0], qs[i, 1]), (qs[j, 0],
412 |                                                  qs[j, 1]), color, thickness, cv2.LINE_AA)
413 |     # Draw gt projections
414 |     for qs in gt_projections:
415 |         for k in range(0, 4):
416 |             i, j = k, (k + 1) % 4
417 |             cv2.line(img, (qs[i, 0], qs[i, 1]), (qs[j, 0],
418 |                                                  qs[j, 1]), gt_color, thickness, cv2.LINE_AA)
419 | 
420 |             i, j = k + 4, (k + 1) % 4 + 4
421 |             cv2.line(img, (qs[i, 0], qs[i, 1]), (qs[j, 0],
422 |                                                  qs[j, 1]), gt_color, thickness, cv2.LINE_AA)
423 | 
424 |             i, j = k, k + 4
425 |             cv2.line(img, (qs[i, 0], qs[i, 1]), (qs[j, 0],
426 |                                                  qs[j, 1]), gt_color, thickness, cv2.LINE_AA)
427 | 
428 |     return cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB)
429 |     
430 | 
431 | 
432 | def draw_lidar_box3d_on_birdview(birdview, boxes3d, scores, gt_boxes3d = np.array([]),
433 |                                  color = (0, 255, 255), gt_color = (255, 0, 255), thickness = 1, factor = 1, P2 = None, T_VELO_2_CAM = None, R_RECT_0 = None):
434 |     # Input:
435 |     #   birdview: (h, w, 3)
436 |     #   boxes3d (N, 7) [x, y, z, h, w, l, r]
437 |     #   scores
438 |     #   gt_boxes3d (N, 7) [x, y, z, h, w, l, r]
439 |     img = birdview.copy()
440 |     corner_boxes3d = center_to_corner_box3d(boxes3d, coordinate = 'lidar', T_VELO_2_CAM = T_VELO_2_CAM, R_RECT_0 = R_RECT_0)
441 |     corner_gt_boxes3d = center_to_corner_box3d(gt_boxes3d, coordinate = 'lidar', T_VELO_2_CAM = T_VELO_2_CAM, R_RECT_0 = R_RECT_0)
442 |     # draw gt
443 |     for box in corner_gt_boxes3d:
444 |         x0, y0 = lidar_to_bird_view(*box[0, 0:2], factor = factor)
445 |         x1, y1 = lidar_to_bird_view(*box[1, 0:2], factor = factor)
446 |         x2, y2 = lidar_to_bird_view(*box[2, 0:2], factor = factor)
447 |         x3, y3 = lidar_to_bird_view(*box[3, 0:2], factor = factor)
448 | 
449 |         cv2.line(img, (int(x0), int(y0)), (int(x1), int(y1)),
450 |                  gt_color, thickness, cv2.LINE_AA)
451 |         cv2.line(img, (int(x1), int(y1)), (int(x2), int(y2)),
452 |                  gt_color, thickness, cv2.LINE_AA)
453 |         cv2.line(img, (int(x2), int(y2)), (int(x3), int(y3)),
454 |                  gt_color, thickness, cv2.LINE_AA)
455 |         cv2.line(img, (int(x3), int(y3)), (int(x0), int(y0)),
456 |                  gt_color, thickness, cv2.LINE_AA)
457 | 
458 |     # draw detections
459 |     for box in corner_boxes3d:
460 |         x0, y0 = lidar_to_bird_view(*box[0, 0:2], factor = factor)
461 |         x1, y1 = lidar_to_bird_view(*box[1, 0:2], factor = factor)
462 |         x2, y2 = lidar_to_bird_view(*box[2, 0:2], factor = factor)
463 |         x3, y3 = lidar_to_bird_view(*box[3, 0:2], factor = factor)
464 | 
465 |         cv2.line(img, (int(x0), int(y0)), (int(x1), int(y1)),
466 |                  color, thickness, cv2.LINE_AA)
467 |         cv2.line(img, (int(x1), int(y1)), (int(x2), int(y2)),
468 |                  color, thickness, cv2.LINE_AA)
469 |         cv2.line(img, (int(x2), int(y2)), (int(x3), int(y3)),
470 |                  color, thickness, cv2.LINE_AA)
471 |         cv2.line(img, (int(x3), int(y3)), (int(x0), int(y0)),
472 |                  color, thickness, cv2.LINE_AA)
473 | 
474 |     return cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB)
475 | 
476 | 
477 | def label_to_gt_box3d(labels, cls = 'Car', coordinate = 'camera', T_VELO_2_CAM = None, R_RECT_0 = None):
478 |     # Input:
479 |     #   label: (N, N')
480 |     #   cls: 'Car' or 'Pedestrain' or 'Cyclist'
481 |     #   coordinate: 'camera' or 'lidar'
482 |     # Output:
483 |     #   (N, N', 7)
484 |     boxes3d = []
485 |     if cls == 'Car':
486 |         acc_cls = ['Car', 'Van']
487 |     elif cls == 'Pedestrian':
488 |         acc_cls = ['Pedestrian']
489 |     elif cls == 'Cyclist':
490 |         acc_cls = ['Cyclist']
491 |     else: # all
492 |         acc_cls = []
493 | 
494 |     for label in labels:
495 |         boxes3d_a_label = []
496 |         for line in label:
497 |             ret = line.split()
498 |             if ret[0] in acc_cls or acc_cls == []:
499 |                 h, w, l, x, y, z, r = [float(i) for i in ret[-7:]]
500 |                 box3d = np.array([x, y, z, h, w, l, r])
501 |                 boxes3d_a_label.append(box3d)
502 |         if coordinate == 'lidar':
503 |             boxes3d_a_label = camera_to_lidar_box(np.array(boxes3d_a_label), T_VELO_2_CAM, R_RECT_0)
504 | 
505 |         boxes3d.append(np.array(boxes3d_a_label).reshape(-1, 7))
506 | 
507 |     return boxes3d
508 | 
509 | 
510 | def box3d_to_label(batch_box3d, batch_cls, batch_score = [], coordinate='camera', P2 = None, T_VELO_2_CAM = None, R_RECT_0 = None):
511 |     # Input:
512 |     #   (N, N', 7) x y z h w l r
513 |     #   (N, N')
514 |     #   cls: (N, N') 'Car' or 'Pedestrain' or 'Cyclist'
515 |     #   coordinate(input): 'camera' or 'lidar'
516 |     # Output:
517 |     #   label: (N, N') N batches and N lines
518 |     batch_label = []
519 |     if batch_score:
520 |         template = '{} ' + ' '.join(['{:.4f}' for i in range(15)]) + '\n'
521 |         for boxes, scores, clses in zip(batch_box3d, batch_score, batch_cls):
522 |             label = []
523 |             for box, score, cls in zip(boxes, scores, clses):
524 |                 if coordinate == 'camera':
525 |                     box3d = box
526 |                     box2d = lidar_box3d_to_camera_box(
527 |                         camera_to_lidar_box(box[np.newaxis, :].astype(np.float32), T_VELO_2_CAM, R_RECT_0), cal_projection = False,
528 |                         P2 = P2, T_VELO_2_CAM = T_VELO_2_CAM, R_RECT_0 = R_RECT_0)[0]
529 |                 else:
530 |                     box3d = lidar_to_camera_box(
531 |                         box[np.newaxis, :].astype(np.float32), T_VELO_2_CAM, R_RECT_0)[0]
532 |                     box2d = lidar_box3d_to_camera_box(
533 |                         box[np.newaxis, :].astype(np.float32), cal_projection = False, P2 = P2, T_VELO_2_CAM = T_VELO_2_CAM, R_RECT_0 = R_RECT_0)[0]
534 |                 x, y, z, h, w, l, r = box3d
535 |                 box3d = [h, w, l, x, y, z, r]
536 |                 label.append(template.format(
537 |                     cls, 0, 0, 0, *box2d, *box3d, float(score)))
538 |             batch_label.append(label)
539 |     else:
540 |         template = '{} ' + ' '.join(['{:.4f}' for i in range(14)]) + '\n'
541 |         for boxes, clses in zip(batch_box3d, batch_cls):
542 |             label = []
543 |             for box, cls in zip(boxes, clses):
544 |                 if coordinate == 'camera':
545 |                     box3d = box
546 |                     box2d = lidar_box3d_to_camera_box(
547 |                         camera_to_lidar_box(box[np.newaxis, :].astype(np.float32), T_VELO_2_CAM, R_RECT_0),
548 |                         cal_projection = False,  P2 = P2, T_VELO_2_CAM = T_VELO_2_CAM, R_RECT_0 = R_RECT_0)[0]
549 |                 else:
550 |                     box3d = lidar_to_camera_box(
551 |                         box[np.newaxis, :].astype(np.float32), T_VELO_2_CAM, R_RECT_0)[0]
552 |                     box2d = lidar_box3d_to_camera_box(
553 |                         box[np.newaxis, :].astype(np.float32), cal_projection = False, P2 = P2, T_VELO_2_CAM = T_VELO_2_CAM, R_RECT_0 = R_RECT_0)[0]
554 |                 x, y, z, h, w, l, r = box3d
555 |                 box3d = [h, w, l, x, y, z, r]
556 |                 label.append(template.format(cls, 0, 0, 0, *box2d, *box3d))
557 |             batch_label.append(label)
558 | 
559 |     return np.array(batch_label)
560 | 
561 | 
562 | def cal_anchors():
563 |     # Output:
564 |     # Anchors: (w, l, 2, 7) x y z h w l r
565 |     x = np.linspace(cfg.X_MIN, cfg.X_MAX, cfg.FEATURE_WIDTH)
566 |     y = np.linspace(cfg.Y_MIN, cfg.Y_MAX, cfg.FEATURE_HEIGHT)
567 |     cx, cy = np.meshgrid(x, y)
568 |     # All are (w, l, 2)
569 |     cx = np.tile(cx[..., np.newaxis], 2)
570 |     cy = np.tile(cy[..., np.newaxis], 2)
571 |     cz = np.ones_like(cx) * cfg.ANCHOR_Z
572 |     w = np.ones_like(cx) * cfg.ANCHOR_W
573 |     l = np.ones_like(cx) * cfg.ANCHOR_L
574 |     h = np.ones_like(cx) * cfg.ANCHOR_H
575 |     r = np.ones_like(cx)
576 |     r[..., 0] = 0  # 0
577 |     r[..., 1] = 90 / 180 * np.pi  # 90
578 | 
579 |     # 7 * (w, l, 2) -> (w, l, 2, 7)
580 |     anchors = np.stack([cx, cy, cz, h, w, l, r], axis = -1)
581 | 
582 |     return anchors
583 | 
584 | 
585 | def cal_rpn_target(labels, feature_map_shape, anchors, cls = 'Car', coordinate = 'lidar'):
586 |     # Input:
587 |     #   labels: (N, N')
588 |     #   feature_map_shape: (w, l)
589 |     #   anchors: (w, l, 2, 7)
590 |     # Output:
591 |     #   pos_equal_one (N, w, l, 2)
592 |     #   neg_equal_one (N, w, l, 2)
593 |     #   targets (N, w, l, 14)
594 |     # Attention: cal IoU on birdview
595 | 
596 |     batch_size = labels.shape[0]
597 |     batch_gt_boxes3d = label_to_gt_box3d(labels, cls = cls, coordinate = coordinate)
598 |     # Defined in eq(1) in 2.2
599 |     anchors_reshaped = anchors.reshape(-1, 7)
600 |     anchors_d = np.sqrt(anchors_reshaped[:, 4] ** 2 + anchors_reshaped[:, 5] ** 2)
601 |     pos_equal_one = np.zeros((batch_size, *feature_map_shape, 2))
602 |     neg_equal_one = np.zeros((batch_size, *feature_map_shape, 2))
603 |     targets = np.zeros((batch_size, *feature_map_shape, 14))
604 | 
605 |     for batch_id in range(batch_size):
606 |         # BOTTLENECK; from (x,y,w,l) to (x1,y1,x2,y2)
607 |         anchors_standup_2d = anchor_to_standup_box2d(anchors_reshaped[:, [0, 1, 4, 5]])
608 |         # BOTTLENECK
609 |         gt_standup_2d = corner_to_standup_box2d(center_to_corner_box2d(
610 |             batch_gt_boxes3d[batch_id][:, [0, 1, 4, 5, 6]], coordinate = coordinate))
611 | 
612 |         iou = bbox_overlaps(
613 |             np.ascontiguousarray(anchors_standup_2d).astype(np.float32),
614 |             np.ascontiguousarray(gt_standup_2d).astype(np.float32),
615 |         )
616 |         # iou = cal_box3d_iou(anchors_reshaped, batch_gt_boxes3d[batch_id])
617 | 
618 |         # Find anchor with highest iou (iou should also > 0)
619 |         id_highest = np.argmax(iou.T, axis = 1)
620 |         id_highest_gt = np.arange(iou.T.shape[0])
621 |         mask = iou.T[id_highest_gt, id_highest] > 0
622 |         id_highest, id_highest_gt = id_highest[mask], id_highest_gt[mask]
623 | 
624 |         # Find anchor iou > cfg.XXX_POS_IOU
625 |         id_pos, id_pos_gt = np.where(iou > cfg.RPN_POS_IOU)
626 | 
627 |         # Find anchor iou < cfg.XXX_NEG_IOU
628 |         id_neg = np.where(np.sum(iou < cfg.RPN_NEG_IOU, axis = 1) == iou.shape[1])[0]
629 | 
630 |         id_pos = np.concatenate([id_pos, id_highest])
631 |         id_pos_gt = np.concatenate([id_pos_gt, id_highest_gt])
632 | 
633 |         # TODO: uniquify the array in a more scientific way
634 |         id_pos, index = np.unique(id_pos, return_index = True)
635 |         id_pos_gt = id_pos_gt[index]
636 |         id_neg.sort()
637 | 
638 |         # Cal the target and set the equal one
639 |         index_x, index_y, index_z = np.unravel_index(id_pos, (*feature_map_shape, 2))
640 |         pos_equal_one[batch_id, index_x, index_y, index_z] = 1
641 | 
642 |         # ATTENTION: index_z should be np.array
643 |         targets[batch_id, index_x, index_y, np.array(index_z) * 7] = (
644 |             batch_gt_boxes3d[batch_id][id_pos_gt, 0] - anchors_reshaped[id_pos, 0]) / anchors_d[id_pos]
645 |         targets[batch_id, index_x, index_y, np.array(index_z) * 7 + 1] = (
646 |             batch_gt_boxes3d[batch_id][id_pos_gt, 1] - anchors_reshaped[id_pos, 1]) / anchors_d[id_pos]
647 |         targets[batch_id, index_x, index_y, np.array(index_z) * 7 + 2] = (
648 |             batch_gt_boxes3d[batch_id][id_pos_gt, 2] - anchors_reshaped[id_pos, 2]) / cfg.ANCHOR_H
649 |         targets[batch_id, index_x, index_y, np.array(index_z) * 7 + 3] = np.log(
650 |             batch_gt_boxes3d[batch_id][id_pos_gt, 3] / anchors_reshaped[id_pos, 3])
651 |         targets[batch_id, index_x, index_y, np.array(index_z) * 7 + 4] = np.log(
652 |             batch_gt_boxes3d[batch_id][id_pos_gt, 4] / anchors_reshaped[id_pos, 4])
653 |         targets[batch_id, index_x, index_y, np.array(index_z) * 7 + 5] = np.log(
654 |             batch_gt_boxes3d[batch_id][id_pos_gt, 5] / anchors_reshaped[id_pos, 5])
655 |         targets[batch_id, index_x, index_y, np.array(index_z) * 7 + 6] = (
656 |             batch_gt_boxes3d[batch_id][id_pos_gt, 6] - anchors_reshaped[id_pos, 6])
657 | 
658 |         index_x, index_y, index_z = np.unravel_index(id_neg, (*feature_map_shape, 2))
659 |         neg_equal_one[batch_id, index_x, index_y, index_z] = 1
660 |         # To avoid a box be pos/neg in the same time
661 |         index_x, index_y, index_z = np.unravel_index(id_highest, (*feature_map_shape, 2))
662 |         neg_equal_one[batch_id, index_x, index_y, index_z] = 0
663 | 
664 |     return pos_equal_one, neg_equal_one, targets
665 | 
666 | 
667 | # BOTTLENECK
668 | def delta_to_boxes3d(deltas, anchors, coordinate = 'lidar'):
669 |     # Input:
670 |     #   deltas: (N, w, l, 14)
671 |     #   feature_map_shape: (w, l)
672 |     #   anchors: (w, l, 2, 7)
673 | 
674 |     # Ouput:
675 |     #   boxes3d: (N, w*l*2, 7)
676 |     anchors_reshaped = anchors.reshape(-1, 7)
677 |     deltas = deltas.reshape(deltas.shape[0], -1, 7)
678 |     anchors_d = np.sqrt(anchors_reshaped[:, 4] ** 2 + anchors_reshaped[:, 5] ** 2)
679 |     boxes3d = np.zeros_like(deltas)
680 |     boxes3d[..., [0, 1]] = deltas[..., [0, 1]] * \
681 |         anchors_d[:, np.newaxis] + anchors_reshaped[..., [0, 1]]
682 |     boxes3d[..., [2]] = deltas[..., [2]] * \
683 |         cfg.ANCHOR_H + anchors_reshaped[..., [2]]
684 |     boxes3d[..., [3, 4, 5]] = np.exp(
685 |         deltas[..., [3, 4, 5]]) * anchors_reshaped[..., [3, 4, 5]]
686 |     boxes3d[..., 6] = deltas[..., 6] + anchors_reshaped[..., 6]
687 | 
688 |     return boxes3d
689 | 
690 | 
691 | def point_transform(points, tx, ty, tz, rx = 0, ry = 0, rz = 0):
692 |     # Input:
693 |     #   points: (N, 3)
694 |     #   rx/y/z: in radians
695 |     # Output:
696 |     #   points: (N, 3)
697 |     N = points.shape[0]
698 |     points = np.hstack([points, np.ones((N, 1))])
699 | 
700 |     mat1 = np.eye(4)
701 |     mat1[3, 0:3] = tx, ty, tz
702 |     points = np.matmul(points, mat1)
703 | 
704 |     if rx != 0:
705 |         mat = np.zeros((4, 4))
706 |         mat[0, 0] = 1
707 |         mat[3, 3] = 1
708 |         mat[1, 1] = np.cos(rx)
709 |         mat[1, 2] = -np.sin(rx)
710 |         mat[2, 1] = np.sin(rx)
711 |         mat[2, 2] = np.cos(rx)
712 |         points = np.matmul(points, mat)
713 | 
714 |     if ry != 0:
715 |         mat = np.zeros((4, 4))
716 |         mat[1, 1] = 1
717 |         mat[3, 3] = 1
718 |         mat[0, 0] = np.cos(ry)
719 |         mat[0, 2] = np.sin(ry)
720 |         mat[2, 0] = -np.sin(ry)
721 |         mat[2, 2] = np.cos(ry)
722 |         points = np.matmul(points, mat)
723 | 
724 |     if rz != 0:
725 |         mat = np.zeros((4, 4))
726 |         mat[2, 2] = 1
727 |         mat[3, 3] = 1
728 |         mat[0, 0] = np.cos(rz)
729 |         mat[0, 1] = -np.sin(rz)
730 |         mat[1, 0] = np.sin(rz)
731 |         mat[1, 1] = np.cos(rz)
732 |         points = np.matmul(points, mat)
733 | 
734 |     return points[:, 0:3]
735 | 
736 | 
737 | def box_transform(boxes, tx, ty, tz, r = 0, coordinate = 'lidar'):
738 |     # Input:
739 |     #   boxes: (N, 7) x y z h w l rz/y
740 |     # Output:
741 |     #   boxes: (N, 7) x y z h w l rz/y
742 |     boxes_corner = center_to_corner_box3d(
743 |         boxes, coordinate = coordinate)  # (N, 8, 3)
744 |     for idx in range(len(boxes_corner)):
745 |         if coordinate == 'lidar':
746 |             boxes_corner[idx] = point_transform(
747 |                 boxes_corner[idx], tx, ty, tz, rz = r)
748 |         else:
749 |             boxes_corner[idx] = point_transform(
750 |                 boxes_corner[idx], tx, ty, tz, ry = r)
751 | 
752 |     return corner_to_center_box3d(boxes_corner, coordinate = coordinate)
753 | 
754 | 
755 | def cal_iou2d(box1, box2, T_VELO_2_CAM = None, R_RECT_0 = None):
756 |     # Input: 
757 |     #   box1/2: x, y, w, l, r
758 |     # Output :
759 |     #   iou
760 |     buf1 = np.zeros((cfg.INPUT_HEIGHT, cfg.INPUT_WIDTH, 3))
761 |     buf2 = np.zeros((cfg.INPUT_HEIGHT, cfg.INPUT_WIDTH, 3))
762 |     tmp = center_to_corner_box2d(np.array([box1, box2]), coordinate = 'lidar', T_VELO_2_CAM = T_VELO_2_CAM, R_RECT_0 = R_RECT_0)
763 |     box1_corner = batch_lidar_to_bird_view(tmp[0]).astype(np.int32)
764 |     box2_corner = batch_lidar_to_bird_view(tmp[1]).astype(np.int32)
765 |     buf1 = cv2.fillConvexPoly(buf1, box1_corner, color = (1, 1, 1))[..., 0]
766 |     buf2 = cv2.fillConvexPoly(buf2, box2_corner, color = (1, 1, 1))[..., 0]
767 |     indiv = np.sum(np.absolute(buf1-buf2))
768 |     share = np.sum((buf1 + buf2) == 2)
769 |     if indiv == 0:
770 |         return 0.0 # when target is out of bound
771 | 
772 |     return share / (indiv + share)
773 | 
774 | 
775 | def cal_z_intersect(cz1, h1, cz2, h2):
776 |     b1z1, b1z2 = cz1 - h1 / 2, cz1 + h1 / 2
777 |     b2z1, b2z2 = cz2 - h2 / 2, cz2 + h2 / 2
778 |     if b1z1 > b2z2 or b2z1 > b1z2:
779 |         return 0
780 |     elif b2z1 <= b1z1 <= b2z2:
781 |         if b1z2 <= b2z2:
782 |             return h1 / h2
783 |         else:
784 |             return (b2z2 - b1z1) / (b1z2 - b2z1)
785 |     elif b1z1 < b2z1 < b1z2:
786 |         if b2z2 <= b1z2:
787 |             return h2 / h1
788 |         else:
789 |             return (b1z2 - b2z1) / (b2z2 - b1z1)
790 | 
791 | 
792 | def cal_iou3d(box1, box2, T_VELO_2_CAM = None, R_RECT_0 = None):
793 |     # Input:
794 |     #   box1/2: x, y, z, h, w, l, r
795 |     # Output:
796 |     #   iou
797 |     buf1 = np.zeros((cfg.INPUT_HEIGHT, cfg.INPUT_WIDTH, 3))
798 |     buf2 = np.zeros((cfg.INPUT_HEIGHT, cfg.INPUT_WIDTH, 3))
799 |     tmp = center_to_corner_box2d(np.array([box1[[0, 1, 4, 5, 6]], box2[[0, 1, 4, 5, 6]]]), coordinate = 'lidar',
800 |                                  T_VELO_2_CAM = T_VELO_2_CAM, R_RECT_0 = R_RECT_0)
801 |     box1_corner = batch_lidar_to_bird_view(tmp[0]).astype(np.int32)
802 |     box2_corner = batch_lidar_to_bird_view(tmp[1]).astype(np.int32)
803 |     buf1 = cv2.fillConvexPoly(buf1, box1_corner, color = (1, 1, 1))[..., 0]
804 |     buf2 = cv2.fillConvexPoly(buf2, box2_corner, color = (1, 1, 1))[..., 0]
805 |     share = np.sum((buf1 + buf2) == 2)
806 |     area1 = np.sum(buf1)
807 |     area2 = np.sum(buf2)
808 |     
809 |     z1, h1, z2, h2 = box1[2], box1[3], box2[2], box2[3]
810 |     z_intersect = cal_z_intersect(z1, h1, z2, h2)
811 | 
812 |     return share * z_intersect / (area1 * h1 + area2 * h2 - share * z_intersect)
813 | 
814 | 
815 | def cal_box3d_iou(boxes3d, gt_boxes3d, cal_3d = 0, T_VELO_2_CAM = None, R_RECT_0 = None):
816 |     # Inputs:
817 |     #   boxes3d: (N1, 7) x,y,z,h,w,l,r
818 |     #   gt_boxed3d: (N2, 7) x,y,z,h,w,l,r
819 |     # Outputs:
820 |     #   iou: (N1, N2)
821 |     N1 = len(boxes3d)
822 |     N2 = len(gt_boxes3d)
823 |     output = np.zeros((N1, N2), dtype = np.float32)
824 | 
825 |     for idx in range(N1):
826 |         for idy in range(N2):
827 |             if cal_3d:
828 |                 output[idx, idy] = float(cal_iou3d(boxes3d[idx], gt_boxes3d[idy], T_VELO_2_CAM, R_RECT_0))
829 |             else:
830 |                 output[idx, idy] = float(
831 |                     cal_iou2d(boxes3d[idx, [0, 1, 4, 5, 6]], gt_boxes3d[idy, [0, 1, 4, 5, 6]], T_VELO_2_CAM, R_RECT_0))
832 | 
833 |     return output
834 | 
835 | 
836 | def cal_box2d_iou(boxes2d, gt_boxes2d, T_VELO_2_CAM = None, R_RECT_0 = None):
837 |     # Inputs:
838 |     #   boxes2d: (N1, 5) x,y,w,l,r
839 |     #   gt_boxes2d: (N2, 5) x,y,w,l,r
840 |     # Outputs:
841 |     #   iou: (N1, N2)
842 |     N1 = len(boxes2d)
843 |     N2 = len(gt_boxes2d)
844 |     output = np.zeros((N1, N2), dtype = np.float32)
845 |     for idx in range(N1):
846 |         for idy in range(N2):
847 |             output[idx, idy] = cal_iou2d(boxes2d[idx], gt_boxes2d[idy], T_VELO_2_CAM, R_RECT_0)
848 | 
849 |     return output
850 | 
851 | 
852 | if __name__ == '__main__':
853 |     pass
854 | 


--------------------------------------------------------------------------------
/eval/KITTI/evaluate_object_3d_offline.cpp:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include <algorithm>
  3 | #include <stdio.h>
  4 | #include <math.h>
  5 | #include <vector>
  6 | #include <numeric>
  7 | #include <strings.h>
  8 | #include <assert.h>
  9 | 
 10 | #include <dirent.h>
 11 | 
 12 | #include <boost/numeric/ublas/matrix.hpp>
 13 | #include <boost/numeric/ublas/io.hpp>
 14 | 
 15 | #include <boost/geometry.hpp>
 16 | #include <boost/geometry/geometries/point_xy.hpp>
 17 | #include <boost/geometry/geometries/polygon.hpp>
 18 | #include <boost/geometry/geometries/adapted/c_array.hpp>
 19 | 
 20 | #include "mail.h"
 21 | 
 22 | BOOST_GEOMETRY_REGISTER_C_ARRAY_CS(cs::cartesian)
 23 | 
 24 | typedef boost::geometry::model::polygon<boost::geometry::model::d2::point_xy<double> > Polygon;
 25 | 
 26 | 
 27 | using namespace std;
 28 | 
 29 | /*=======================================================================
 30 | STATIC EVALUATION PARAMETERS
 31 | =======================================================================*/
 32 | 
 33 | // holds the number of test images on the server
 34 | const int32_t N_TESTIMAGES = 7518;
 35 | 
 36 | // easy, moderate and hard evaluation level
 37 | enum DIFFICULTY{EASY=0, MODERATE=1, HARD=2};
 38 | 
 39 | // evaluation metrics: image, ground or 3D
 40 | enum METRIC{IMAGE=0, GROUND=1, BOX3D=2};
 41 | 
 42 | // evaluation parameter
 43 | const int32_t MIN_HEIGHT[3]     = {40, 25, 25};     // minimum height for evaluated groundtruth/detections
 44 | const int32_t MAX_OCCLUSION[3]  = {0, 1, 2};        // maximum occlusion level of the groundtruth used for evaluation
 45 | const double  MAX_TRUNCATION[3] = {0.15, 0.3, 0.5}; // maximum truncation level of the groundtruth used for evaluation
 46 | 
 47 | // evaluated object classes
 48 | enum CLASSES{CAR=0, PEDESTRIAN=1, CYCLIST=2};
 49 | const int NUM_CLASS = 3;
 50 | 
 51 | // parameters varying per class
 52 | vector<string> CLASS_NAMES;
 53 | // the minimum overlap required for 2D evaluation on the image/ground plane and 3D evaluation
 54 | const double MIN_OVERLAP[3][3] = {{0.7, 0.5, 0.5}, {0.7, 0.5, 0.5}, {0.7, 0.5, 0.5}};
 55 | 
 56 | // no. of recall steps that should be evaluated (discretized)
 57 | const double N_SAMPLE_PTS = 41;
 58 | 
 59 | 
 60 | // initialize class names
 61 | void initGlobals () {
 62 |   CLASS_NAMES.push_back("car");
 63 |   CLASS_NAMES.push_back("pedestrian");
 64 |   CLASS_NAMES.push_back("cyclist");
 65 | }
 66 | 
 67 | /*=======================================================================
 68 | DATA TYPES FOR EVALUATION
 69 | =======================================================================*/
 70 | 
 71 | // holding data needed for precision-recall and precision-aos
 72 | struct tPrData {
 73 |   vector<double> v;           // detection score for computing score thresholds
 74 |   double         similarity;  // orientation similarity
 75 |   int32_t        tp;          // true positives
 76 |   int32_t        fp;          // false positives
 77 |   int32_t        fn;          // false negatives
 78 |   tPrData () :
 79 |     similarity(0), tp(0), fp(0), fn(0) {}
 80 | };
 81 | 
 82 | // holding bounding boxes for ground truth and detections
 83 | struct tBox {
 84 |   string  type;     // object type as car, pedestrian or cyclist,...
 85 |   double   x1;      // left corner
 86 |   double   y1;      // top corner
 87 |   double   x2;      // right corner
 88 |   double   y2;      // bottom corner
 89 |   double   alpha;   // image orientation
 90 |   tBox (string type, double x1,double y1,double x2,double y2,double alpha) :
 91 |     type(type),x1(x1),y1(y1),x2(x2),y2(y2),alpha(alpha) {}
 92 | };
 93 | 
 94 | // holding ground truth data
 95 | struct tGroundtruth {
 96 |   tBox    box;        // object type, box, orientation
 97 |   double  truncation; // truncation 0..1
 98 |   int32_t occlusion;  // occlusion 0,1,2 (non, partly, fully)
 99 |   double ry;
100 |   double  t1, t2, t3;
101 |   double h, w, l;
102 |   tGroundtruth () :
103 |     box(tBox("invalild",-1,-1,-1,-1,-10)),truncation(-1),occlusion(-1) {}
104 |   tGroundtruth (tBox box,double truncation,int32_t occlusion) :
105 |     box(box),truncation(truncation),occlusion(occlusion) {}
106 |   tGroundtruth (string type,double x1,double y1,double x2,double y2,double alpha,double truncation,int32_t occlusion) :
107 |     box(tBox(type,x1,y1,x2,y2,alpha)),truncation(truncation),occlusion(occlusion) {}
108 | };
109 | 
110 | // holding detection data
111 | struct tDetection {
112 |   tBox    box;    // object type, box, orientation
113 |   double  thresh; // detection score
114 |   double  ry;
115 |   double  t1, t2, t3;
116 |   double  h, w, l;
117 |   tDetection ():
118 |     box(tBox("invalid",-1,-1,-1,-1,-10)),thresh(-1000) {}
119 |   tDetection (tBox box,double thresh) :
120 |     box(box),thresh(thresh) {}
121 |   tDetection (string type,double x1,double y1,double x2,double y2,double alpha,double thresh) :
122 |     box(tBox(type,x1,y1,x2,y2,alpha)),thresh(thresh) {}
123 | };
124 | 
125 | 
126 | /*=======================================================================
127 | FUNCTIONS TO LOAD DETECTION AND GROUND TRUTH DATA ONCE, SAVE RESULTS
128 | =======================================================================*/
129 | vector<int32_t> indices;
130 | 
131 | vector<tDetection> loadDetections(string file_name, bool &compute_aos,
132 |         vector<bool> &eval_image, vector<bool> &eval_ground,
133 |         vector<bool> &eval_3d, bool &success) {
134 | 
135 |   // holds all detections (ignored detections are indicated by an index vector
136 |   vector<tDetection> detections;
137 |   FILE *fp = fopen(file_name.c_str(),"r");
138 |   if (!fp) {
139 |     success = false;
140 |     return detections;
141 |   }
142 |   while (!feof(fp)) {
143 |     tDetection d;
144 |     double trash;
145 |     char str[255];
146 |     if (fscanf(fp, "%s %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf",
147 |                    str, &trash, &trash, &d.box.alpha, &d.box.x1, &d.box.y1,
148 |                    &d.box.x2, &d.box.y2, &d.h, &d.w, &d.l, &d.t1, &d.t2, &d.t3,
149 |                    &d.ry, &d.thresh)==16) {
150 | 
151 |         // d.thresh = 1;
152 |       d.box.type = str;
153 |       detections.push_back(d);
154 | 
155 |       // orientation=-10 is invalid, AOS is not evaluated if at least one orientation is invalid
156 |       if(d.box.alpha == -10)
157 |         compute_aos = false;
158 | 
159 |       // a class is only evaluated if it is detected at least once
160 |       for (int c = 0; c < NUM_CLASS; c++) {
161 |         if (!strcasecmp(d.box.type.c_str(), CLASS_NAMES[c].c_str())) {
162 |           if (!eval_image[c] && d.box.x1 >= 0)
163 |             eval_image[c] = true;
164 |           if (!eval_ground[c] && d.t1 != -1000)
165 |             eval_ground[c] = true;
166 |           if (!eval_3d[c] && d.t2 != -1000)
167 |             eval_3d[c] = true;
168 |           break;
169 |         }
170 |       }
171 |     }
172 |   }
173 |   fclose(fp);
174 |   success = true;
175 |   return detections;
176 | }
177 | 
178 | vector<tGroundtruth> loadGroundtruth(string file_name,bool &success) {
179 | 
180 |   // holds all ground truth (ignored ground truth is indicated by an index vector
181 |   vector<tGroundtruth> groundtruth;
182 |   FILE *fp = fopen(file_name.c_str(),"r");
183 |   if (!fp) {
184 |     success = false;
185 |     return groundtruth;
186 |   }
187 |   while (!feof(fp)) {
188 |     tGroundtruth g;
189 |     char str[255];
190 |     if (fscanf(fp, "%s %lf %d %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf %lf",
191 |                    str, &g.truncation, &g.occlusion, &g.box.alpha,
192 |                    &g.box.x1,   &g.box.y1,     &g.box.x2,    &g.box.y2,
193 |                    &g.h,      &g.w,        &g.l,       &g.t1,
194 |                    &g.t2,      &g.t3,        &g.ry )==15) {
195 |       g.box.type = str;
196 |       groundtruth.push_back(g);
197 |     }
198 |   }
199 |   fclose(fp);
200 |   success = true;
201 |   return groundtruth;
202 | }
203 | 
204 | void saveStats (const vector<double> &precision, const vector<double> &aos, FILE *fp_det, FILE *fp_ori) {
205 | 
206 |   // save precision to file
207 |   if(precision.empty())
208 |     return;
209 |   for (int32_t i=0; i<precision.size(); i++)
210 |     fprintf(fp_det,"%f ",precision[i]);
211 |   fprintf(fp_det,"\n");
212 | 
213 |   // save orientation similarity, only if there were no invalid orientation entries in submission (alpha=-10)
214 |   if(aos.empty())
215 |     return;
216 |   for (int32_t i=0; i<aos.size(); i++)
217 |     fprintf(fp_ori,"%f ",aos[i]);
218 |   fprintf(fp_ori,"\n");
219 | }
220 | 
221 | /*=======================================================================
222 | EVALUATION HELPER FUNCTIONS
223 | =======================================================================*/
224 | 
225 | // criterion defines whether the overlap is computed with respect to both areas (ground truth and detection)
226 | // or with respect to box a or b (detection and "dontcare" areas)
227 | inline double imageBoxOverlap(tBox a, tBox b, int32_t criterion=-1){
228 | 
229 |   // overlap is invalid in the beginning
230 |   double o = -1;
231 | 
232 |   // get overlapping area
233 |   double x1 = max(a.x1, b.x1);
234 |   double y1 = max(a.y1, b.y1);
235 |   double x2 = min(a.x2, b.x2);
236 |   double y2 = min(a.y2, b.y2);
237 | 
238 |   // compute width and height of overlapping area
239 |   double w = x2-x1;
240 |   double h = y2-y1;
241 | 
242 |   // set invalid entries to 0 overlap
243 |   if(w<=0 || h<=0)
244 |     return 0;
245 | 
246 |   // get overlapping areas
247 |   double inter = w*h;
248 |   double a_area = (a.x2-a.x1) * (a.y2-a.y1);
249 |   double b_area = (b.x2-b.x1) * (b.y2-b.y1);
250 | 
251 |   // intersection over union overlap depending on users choice
252 |   if(criterion==-1)     // union
253 |     o = inter / (a_area+b_area-inter);
254 |   else if(criterion==0) // bbox_a
255 |     o = inter / a_area;
256 |   else if(criterion==1) // bbox_b
257 |     o = inter / b_area;
258 | 
259 |   // overlap
260 |   return o;
261 | }
262 | 
263 | inline double imageBoxOverlap(tDetection a, tGroundtruth b, int32_t criterion=-1){
264 |   return imageBoxOverlap(a.box, b.box, criterion);
265 | }
266 | 
267 | // compute polygon of an oriented bounding box
268 | template <typename T>
269 | Polygon toPolygon(const T& g) {
270 |     using namespace boost::numeric::ublas;
271 |     using namespace boost::geometry;
272 |     matrix<double> mref(2, 2);
273 |     mref(0, 0) = cos(g.ry); mref(0, 1) = sin(g.ry);
274 |     mref(1, 0) = -sin(g.ry); mref(1, 1) = cos(g.ry);
275 | 
276 |     static int count = 0;
277 |     matrix<double> corners(2, 4);
278 |     double data[] = {g.l / 2, g.l / 2, -g.l / 2, -g.l / 2,
279 |                      g.w / 2, -g.w / 2, -g.w / 2, g.w / 2};
280 |     std::copy(data, data + 8, corners.data().begin());
281 |     matrix<double> gc = prod(mref, corners);
282 |     for (int i = 0; i < 4; ++i) {
283 |         gc(0, i) += g.t1;
284 |         gc(1, i) += g.t3;
285 |     }
286 | 
287 |     double points[][2] = {{gc(0, 0), gc(1, 0)},{gc(0, 1), gc(1, 1)},{gc(0, 2), gc(1, 2)},{gc(0, 3), gc(1, 3)},{gc(0, 0), gc(1, 0)}};
288 |     Polygon poly;
289 |     append(poly, points);
290 |     return poly;
291 | }
292 | 
293 | // measure overlap between bird's eye view bounding boxes, parametrized by (ry, l, w, tx, tz)
294 | inline double groundBoxOverlap(tDetection d, tGroundtruth g, int32_t criterion = -1) {
295 |     using namespace boost::geometry;
296 |     Polygon gp = toPolygon(g);
297 |     Polygon dp = toPolygon(d);
298 | 
299 |     std::vector<Polygon> in, un;
300 |     intersection(gp, dp, in);
301 |     union_(gp, dp, un);
302 | 
303 |     double inter_area = in.empty() ? 0 : area(in.front());
304 |     double union_area = area(un.front());
305 |     double o;
306 |     if(criterion==-1)     // union
307 |         o = inter_area / union_area;
308 |     else if(criterion==0) // bbox_a
309 |         o = inter_area / area(dp);
310 |     else if(criterion==1) // bbox_b
311 |         o = inter_area / area(gp);
312 | 
313 |     return o;
314 | }
315 | 
316 | // measure overlap between 3D bounding boxes, parametrized by (ry, h, w, l, tx, ty, tz)
317 | inline double box3DOverlap(tDetection d, tGroundtruth g, int32_t criterion = -1) {
318 |     using namespace boost::geometry;
319 |     Polygon gp = toPolygon(g);
320 |     Polygon dp = toPolygon(d);
321 | 
322 |     std::vector<Polygon> in, un;
323 |     intersection(gp, dp, in);
324 |     union_(gp, dp, un);
325 | 
326 |     double ymax = min(d.t2, g.t2);
327 |     double ymin = max(d.t2 - d.h, g.t2 - g.h);
328 | 
329 |     double inter_area = in.empty() ? 0 : area(in.front());
330 |     double inter_vol = inter_area * max(0.0, ymax - ymin);
331 | 
332 |     double det_vol = d.h * d.l * d.w;
333 |     double gt_vol = g.h * g.l * g.w;
334 | 
335 |     double o;
336 |     if(criterion==-1)     // union
337 |         o = inter_vol / (det_vol + gt_vol - inter_vol);
338 |     else if(criterion==0) // bbox_a
339 |         o = inter_vol / det_vol;
340 |     else if(criterion==1) // bbox_b
341 |         o = inter_vol / gt_vol;
342 | 
343 |     return o;
344 | }
345 | 
346 | vector<double> getThresholds(vector<double> &v, double n_groundtruth){
347 | 
348 |   // holds scores needed to compute N_SAMPLE_PTS recall values
349 |   vector<double> t;
350 | 
351 |   // sort scores in descending order
352 |   // (highest score is assumed to give best/most confident detections)
353 |   sort(v.begin(), v.end(), greater<double>());
354 | 
355 |   // get scores for linearly spaced recall
356 |   double current_recall = 0;
357 |   for(int32_t i=0; i<v.size(); i++){
358 | 
359 |     // check if right-hand-side recall with respect to current recall is close than left-hand-side one
360 |     // in this case, skip the current detection score
361 |     double l_recall, r_recall, recall;
362 |     l_recall = (double)(i+1)/n_groundtruth;
363 |     if(i<(v.size()-1))
364 |       r_recall = (double)(i+2)/n_groundtruth;
365 |     else
366 |       r_recall = l_recall;
367 | 
368 |     if( (r_recall-current_recall) < (current_recall-l_recall) && i<(v.size()-1))
369 |       continue;
370 | 
371 |     // left recall is the best approximation, so use this and goto next recall step for approximation
372 |     recall = l_recall;
373 | 
374 |     // the next recall step was reached
375 |     t.push_back(v[i]);
376 |     current_recall += 1.0/(N_SAMPLE_PTS-1.0);
377 |   }
378 |   return t;
379 | }
380 | 
381 | void cleanData(CLASSES current_class, const vector<tGroundtruth> &gt, const vector<tDetection> &det, vector<int32_t> &ignored_gt, vector<tGroundtruth> &dc, vector<int32_t> &ignored_det, int32_t &n_gt, DIFFICULTY difficulty){
382 | 
383 |   // extract ground truth bounding boxes for current evaluation class
384 |   for(int32_t i=0;i<gt.size(); i++){
385 | 
386 |     // only bounding boxes with a minimum height are used for evaluation
387 |     double height = gt[i].box.y2 - gt[i].box.y1;
388 | 
389 |     // neighboring classes are ignored ("van" for "car" and "person_sitting" for "pedestrian")
390 |     // (lower/upper cases are ignored)
391 |     int32_t valid_class;
392 | 
393 |     // all classes without a neighboring class
394 |     if(!strcasecmp(gt[i].box.type.c_str(), CLASS_NAMES[current_class].c_str()))
395 |       valid_class = 1;
396 | 
397 |     // classes with a neighboring class
398 |     else if(!strcasecmp(CLASS_NAMES[current_class].c_str(), "Pedestrian") && !strcasecmp("Person_sitting", gt[i].box.type.c_str()))
399 |       valid_class = 0;
400 |     else if(!strcasecmp(CLASS_NAMES[current_class].c_str(), "Car") && !strcasecmp("Van", gt[i].box.type.c_str()))
401 |       valid_class = 0;
402 | 
403 |     // classes not used for evaluation
404 |     else
405 |       valid_class = -1;
406 | 
407 |     // ground truth is ignored, if occlusion, truncation exceeds the difficulty or ground truth is too small
408 |     // (doesn't count as FN nor TP, although detections may be assigned)
409 |     bool ignore = false;
410 |     if(gt[i].occlusion>MAX_OCCLUSION[difficulty] || gt[i].truncation>MAX_TRUNCATION[difficulty] || height<MIN_HEIGHT[difficulty])
411 |       ignore = true;
412 | 
413 |     // set ignored vector for ground truth
414 |     // current class and not ignored (total no. of ground truth is detected for recall denominator)
415 |     if(valid_class==1 && !ignore){
416 |       ignored_gt.push_back(0);
417 |       n_gt++;
418 |     }
419 | 
420 |     // neighboring class, or current class but ignored
421 |     else if(valid_class==0 || (ignore && valid_class==1))
422 |       ignored_gt.push_back(1);
423 | 
424 |     // all other classes which are FN in the evaluation
425 |     else
426 |       ignored_gt.push_back(-1);
427 |   }
428 | 
429 |   // extract dontcare areas
430 |   for(int32_t i=0;i<gt.size(); i++)
431 |     if(!strcasecmp("DontCare", gt[i].box.type.c_str()))
432 |       dc.push_back(gt[i]);
433 | 
434 |   // extract detections bounding boxes of the current class
435 |   for(int32_t i=0;i<det.size(); i++){
436 | 
437 |     // neighboring classes are not evaluated
438 |     int32_t valid_class;
439 |     if(!strcasecmp(det[i].box.type.c_str(), CLASS_NAMES[current_class].c_str()))
440 |       valid_class = 1;
441 |     else
442 |       valid_class = -1;
443 | 
444 |     int32_t height = fabs(det[i].box.y1 - det[i].box.y2);
445 | 
446 |     // set ignored vector for detections
447 |     if(height<MIN_HEIGHT[difficulty])
448 |       ignored_det.push_back(1);
449 |     else if(valid_class==1)
450 |       ignored_det.push_back(0);
451 |     else
452 |       ignored_det.push_back(-1);
453 |   }
454 | }
455 | 
456 | tPrData computeStatistics(CLASSES current_class, const vector<tGroundtruth> &gt,
457 |         const vector<tDetection> &det, const vector<tGroundtruth> &dc,
458 |         const vector<int32_t> &ignored_gt, const vector<int32_t>  &ignored_det,
459 |         bool compute_fp, double (*boxoverlap)(tDetection, tGroundtruth, int32_t),
460 |         METRIC metric, bool compute_aos=false, double thresh=0, bool debug=false){
461 | 
462 |   tPrData stat = tPrData();
463 |   const double NO_DETECTION = -10000000;
464 |   vector<double> delta;            // holds angular difference for TPs (needed for AOS evaluation)
465 |   vector<bool> assigned_detection; // holds wether a detection was assigned to a valid or ignored ground truth
466 |   assigned_detection.assign(det.size(), false);
467 |   vector<bool> ignored_threshold;
468 |   ignored_threshold.assign(det.size(), false); // holds detections with a threshold lower than thresh if FP are computed
469 | 
470 |   // detections with a low score are ignored for computing precision (needs FP)
471 |   if(compute_fp)
472 |     for(int32_t i=0; i<det.size(); i++)
473 |       if(det[i].thresh<thresh)
474 |         ignored_threshold[i] = true;
475 | 
476 |   // evaluate all ground truth boxes
477 |   for(int32_t i=0; i<gt.size(); i++){
478 | 
479 |     // this ground truth is not of the current or a neighboring class and therefore ignored
480 |     if(ignored_gt[i]==-1)
481 |       continue;
482 | 
483 |     /*=======================================================================
484 |     find candidates (overlap with ground truth > 0.5) (logical len(det))
485 |     =======================================================================*/
486 |     int32_t det_idx          = -1;
487 |     double valid_detection = NO_DETECTION;
488 |     double max_overlap     = 0;
489 | 
490 |     // search for a possible detection
491 |     bool assigned_ignored_det = false;
492 |     for(int32_t j=0; j<det.size(); j++){
493 | 
494 |       // detections not of the current class, already assigned or with a low threshold are ignored
495 |       if(ignored_det[j]==-1)
496 |         continue;
497 |       if(assigned_detection[j])
498 |         continue;
499 |       if(ignored_threshold[j])
500 |         continue;
501 | 
502 |       // find the maximum score for the candidates and get idx of respective detection
503 |       double overlap = boxoverlap(det[j], gt[i], -1);
504 | 
505 |       // for computing recall thresholds, the candidate with highest score is considered
506 |       if(!compute_fp && overlap>MIN_OVERLAP[metric][current_class] && det[j].thresh>valid_detection){
507 |         det_idx         = j;
508 |         valid_detection = det[j].thresh;
509 |       }
510 | 
511 |       // for computing pr curve values, the candidate with the greatest overlap is considered
512 |       // if the greatest overlap is an ignored detection (min_height), the overlapping detection is used
513 |       else if(compute_fp && overlap>MIN_OVERLAP[metric][current_class] && (overlap>max_overlap || assigned_ignored_det) && ignored_det[j]==0){
514 |         max_overlap     = overlap;
515 |         det_idx         = j;
516 |         valid_detection = 1;
517 |         assigned_ignored_det = false;
518 |       }
519 |       else if(compute_fp && overlap>MIN_OVERLAP[metric][current_class] && valid_detection==NO_DETECTION && ignored_det[j]==1){
520 |         det_idx              = j;
521 |         valid_detection      = 1;
522 |         assigned_ignored_det = true;
523 |       }
524 |     }
525 | 
526 |     /*=======================================================================
527 |     compute TP, FP and FN
528 |     =======================================================================*/
529 | 
530 |     // nothing was assigned to this valid ground truth
531 |     if(valid_detection==NO_DETECTION && ignored_gt[i]==0) {
532 |       stat.fn++;
533 |     }
534 | 
535 |     // only evaluate valid ground truth <=> detection assignments (considering difficulty level)
536 |     else if(valid_detection!=NO_DETECTION && (ignored_gt[i]==1 || ignored_det[det_idx]==1))
537 |       assigned_detection[det_idx] = true;
538 | 
539 |     // found a valid true positive
540 |     else if(valid_detection!=NO_DETECTION){
541 | 
542 |       // write highest score to threshold vector
543 |       stat.tp++;
544 |       stat.v.push_back(det[det_idx].thresh);
545 | 
546 |       // compute angular difference of detection and ground truth if valid detection orientation was provided
547 |       if(compute_aos)
548 |         delta.push_back(gt[i].box.alpha - det[det_idx].box.alpha);
549 | 
550 |       // clean up
551 |       assigned_detection[det_idx] = true;
552 |     }
553 |   }
554 | 
555 |   // if FP are requested, consider stuff area
556 |   if(compute_fp){
557 | 
558 |     // count fp
559 |     for(int32_t i=0; i<det.size(); i++){
560 | 
561 |       // count false positives if required (height smaller than required is ignored (ignored_det==1)
562 |       if(!(assigned_detection[i] || ignored_det[i]==-1 || ignored_det[i]==1 || ignored_threshold[i]))
563 |         stat.fp++;
564 |     }
565 | 
566 |     // do not consider detections overlapping with stuff area
567 |     int32_t nstuff = 0;
568 |     for(int32_t i=0; i<dc.size(); i++){
569 |       for(int32_t j=0; j<det.size(); j++){
570 | 
571 |         // detections not of the current class, already assigned, with a low threshold or a low minimum height are ignored
572 |         if(assigned_detection[j])
573 |           continue;
574 |         if(ignored_det[j]==-1 || ignored_det[j]==1)
575 |           continue;
576 |         if(ignored_threshold[j])
577 |           continue;
578 | 
579 |         // compute overlap and assign to stuff area, if overlap exceeds class specific value
580 |         double overlap = boxoverlap(det[j], dc[i], 0);
581 |         if(overlap>MIN_OVERLAP[metric][current_class]){
582 |           assigned_detection[j] = true;
583 |           nstuff++;
584 |         }
585 |       }
586 |     }
587 | 
588 |     // FP = no. of all not to ground truth assigned detections - detections assigned to stuff areas
589 |     stat.fp -= nstuff;
590 | 
591 |     // if all orientation values are valid, the AOS is computed
592 |     if(compute_aos){
593 |       vector<double> tmp;
594 | 
595 |       // FP have a similarity of 0, for all TP compute AOS
596 |       tmp.assign(stat.fp, 0);
597 |       for(int32_t i=0; i<delta.size(); i++)
598 |         tmp.push_back((1.0+cos(delta[i]))/2.0);
599 | 
600 |       // be sure, that all orientation deltas are computed
601 |       assert(tmp.size()==stat.fp+stat.tp);
602 |       assert(delta.size()==stat.tp);
603 | 
604 |       // get the mean orientation similarity for this image
605 |       if(stat.tp>0 || stat.fp>0)
606 |         stat.similarity = accumulate(tmp.begin(), tmp.end(), 0.0);
607 | 
608 |       // there was neither a FP nor a TP, so the similarity is ignored in the evaluation
609 |       else
610 |         stat.similarity = -1;
611 |     }
612 |   }
613 |   return stat;
614 | }
615 | 
616 | /*=======================================================================
617 | EVALUATE CLASS-WISE
618 | =======================================================================*/
619 | 
620 | bool eval_class (FILE *fp_det, FILE *fp_ori, CLASSES current_class,
621 |         const vector< vector<tGroundtruth> > &groundtruth,
622 |         const vector< vector<tDetection> > &detections, bool compute_aos,
623 |         double (*boxoverlap)(tDetection, tGroundtruth, int32_t),
624 |         vector<double> &precision, vector<double> &aos,
625 |         DIFFICULTY difficulty, METRIC metric) {
626 |     assert(groundtruth.size() == detections.size());
627 | 
628 |   // init
629 |   int32_t n_gt=0;                                     // total no. of gt (denominator of recall)
630 |   vector<double> v, thresholds;                       // detection scores, evaluated for recall discretization
631 |   vector< vector<int32_t> > ignored_gt, ignored_det;  // index of ignored gt detection for current class/difficulty
632 |   vector< vector<tGroundtruth> > dontcare;            // index of dontcare areas, included in ground truth
633 | 
634 |   // for all test images do
635 |   for (int32_t i=0; i<groundtruth.size(); i++){
636 | 
637 |     // holds ignored ground truth, ignored detections and dontcare areas for current frame
638 |     vector<int32_t> i_gt, i_det;
639 |     vector<tGroundtruth> dc;
640 | 
641 |     // only evaluate objects of current class and ignore occluded, truncated objects
642 |     cleanData(current_class, groundtruth[i], detections[i], i_gt, dc, i_det, n_gt, difficulty);
643 |     ignored_gt.push_back(i_gt);
644 |     ignored_det.push_back(i_det);
645 |     dontcare.push_back(dc);
646 | 
647 |     // compute statistics to get recall values
648 |     tPrData pr_tmp = tPrData();
649 |     pr_tmp = computeStatistics(current_class, groundtruth[i], detections[i], dc, i_gt, i_det, false, boxoverlap, metric);
650 | 
651 |     // add detection scores to vector over all images
652 |     for(int32_t j=0; j<pr_tmp.v.size(); j++)
653 |       v.push_back(pr_tmp.v[j]);
654 |   }
655 | 
656 |   // get scores that must be evaluated for recall discretization
657 |   thresholds = getThresholds(v, n_gt);
658 | 
659 |   // compute TP,FP,FN for relevant scores
660 |   vector<tPrData> pr;
661 |   pr.assign(thresholds.size(),tPrData());
662 |   for (int32_t i=0; i<groundtruth.size(); i++){
663 | 
664 |     // for all scores/recall thresholds do:
665 |     for(int32_t t=0; t<thresholds.size(); t++){
666 |       tPrData tmp = tPrData();
667 |       tmp = computeStatistics(current_class, groundtruth[i], detections[i], dontcare[i],
668 |                               ignored_gt[i], ignored_det[i], true, boxoverlap, metric,
669 |                               compute_aos, thresholds[t], t==38);
670 | 
671 |       // add no. of TP, FP, FN, AOS for current frame to total evaluation for current threshold
672 |       pr[t].tp += tmp.tp;
673 |       pr[t].fp += tmp.fp;
674 |       pr[t].fn += tmp.fn;
675 |       if(tmp.similarity!=-1)
676 |         pr[t].similarity += tmp.similarity;
677 |     }
678 |   }
679 | 
680 |   // compute recall, precision and AOS
681 |   vector<double> recall;
682 |   precision.assign(N_SAMPLE_PTS, 0);
683 |   if(compute_aos)
684 |     aos.assign(N_SAMPLE_PTS, 0);
685 |   double r=0;
686 |   for (int32_t i=0; i<thresholds.size(); i++){
687 |     r = pr[i].tp/(double)(pr[i].tp + pr[i].fn);
688 |     recall.push_back(r);
689 |     precision[i] = pr[i].tp/(double)(pr[i].tp + pr[i].fp);
690 |     if(compute_aos)
691 |       aos[i] = pr[i].similarity/(double)(pr[i].tp + pr[i].fp);
692 |   }
693 | 
694 |   // filter precision and AOS using max_{i..end}(precision)
695 |   for (int32_t i=0; i<thresholds.size(); i++){
696 |     precision[i] = *max_element(precision.begin()+i, precision.end());
697 |     if(compute_aos)
698 |       aos[i] = *max_element(aos.begin()+i, aos.end());
699 |   }
700 | 
701 |   // save statisics and finish with success
702 |   saveStats(precision, aos, fp_det, fp_ori);
703 |     return true;
704 | }
705 | 
706 | void saveAndPlotPlots(string dir_name,string file_name,string obj_type,vector<double> vals[],bool is_aos){
707 | 
708 |   char command[1024];
709 | 
710 |   // save plot data to file
711 |   FILE *fp = fopen((dir_name + "/" + file_name + ".txt").c_str(),"w");
712 |   printf("save %s\n", (dir_name + "/" + file_name + ".txt").c_str());
713 |   for (int32_t i=0; i<(int)N_SAMPLE_PTS; i++)
714 |     fprintf(fp,"%f %f %f %f\n",(double)i/(N_SAMPLE_PTS-1.0),vals[0][i],vals[1][i],vals[2][i]);
715 |   fclose(fp);
716 | 
717 |   float sum[3] = {0, 0, 0};
718 |   for (int v = 0; v < 3; ++v)
719 |       for (int i = 0; i < vals[v].size(); i = i + 4)
720 |           sum[v] += vals[v][i];
721 |   printf("%s AP: %f %f %f\n", file_name.c_str(), sum[0] / 11 * 100, sum[1] / 11 * 100, sum[2] / 11 * 100);
722 | 
723 | 
724 |   // create png + eps
725 |   for (int32_t j=0; j<2; j++) {
726 | 
727 |     // open file
728 |     FILE *fp = fopen((dir_name + "/" + file_name + ".gp").c_str(),"w");
729 | 
730 |     // save gnuplot instructions
731 |     if (j==0) {
732 |       fprintf(fp,"set term png size 450,315 font \"Helvetica\" 11\n");
733 |       fprintf(fp,"set output \"%s.png\"\n",file_name.c_str());
734 |     } else {
735 |       fprintf(fp,"set term postscript eps enhanced color font \"Helvetica\" 20\n");
736 |       fprintf(fp,"set output \"%s.eps\"\n",file_name.c_str());
737 |     }
738 | 
739 |     // set labels and ranges
740 |     fprintf(fp,"set size ratio 0.7\n");
741 |     fprintf(fp,"set xrange [0:1]\n");
742 |     fprintf(fp,"set yrange [0:1]\n");
743 |     fprintf(fp,"set xlabel \"Recall\"\n");
744 |     if (!is_aos) fprintf(fp,"set ylabel \"Precision\"\n");
745 |     else         fprintf(fp,"set ylabel \"Orientation Similarity\"\n");
746 |     obj_type[0] = toupper(obj_type[0]);
747 |     fprintf(fp,"set title \"%s\"\n",obj_type.c_str());
748 | 
749 |     // line width
750 |     int32_t   lw = 5;
751 |     if (j==0) lw = 3;
752 | 
753 |     // plot error curve
754 |     fprintf(fp,"plot ");
755 |     fprintf(fp,"\"%s.txt\" using 1:2 title 'Easy' with lines ls 1 lw %d,",file_name.c_str(),lw);
756 |     fprintf(fp,"\"%s.txt\" using 1:3 title 'Moderate' with lines ls 2 lw %d,",file_name.c_str(),lw);
757 |     fprintf(fp,"\"%s.txt\" using 1:4 title 'Hard' with lines ls 3 lw %d",file_name.c_str(),lw);
758 | 
759 |     // close file
760 |     fclose(fp);
761 | 
762 |     // run gnuplot => create png + eps
763 |     sprintf(command,"cd %s; gnuplot %s",dir_name.c_str(),(file_name + ".gp").c_str());
764 |     system(command);
765 |   }
766 | 
767 |   // create pdf and crop
768 |   sprintf(command,"cd %s; ps2pdf %s.eps %s_large.pdf",dir_name.c_str(),file_name.c_str(),file_name.c_str());
769 |   system(command);
770 |   sprintf(command,"cd %s; pdfcrop %s_large.pdf %s.pdf",dir_name.c_str(),file_name.c_str(),file_name.c_str());
771 |   system(command);
772 |   sprintf(command,"cd %s; rm %s_large.pdf",dir_name.c_str(),file_name.c_str());
773 |   system(command);
774 | }
775 | 
776 | vector<int32_t> getEvalIndices(const string& result_dir) {
777 | 
778 |     DIR* dir;
779 |     dirent* entity;
780 |     dir = opendir(result_dir.c_str());
781 |     if (dir) {
782 |         while (entity = readdir(dir)) {
783 |             string path(entity->d_name);
784 |             int32_t len = path.size();
785 |             if (len < 10) continue;
786 |             int32_t index = atoi(path.substr(len - 10, 10).c_str());
787 |             indices.push_back(index);
788 |         }
789 |     }
790 |     return indices;
791 | }
792 | 
793 | bool eval(string gt_dir, string result_dir, Mail* mail){
794 | 
795 |   // set some global parameters
796 |   initGlobals();
797 | 
798 |   // ground truth and result directories
799 |   // string gt_dir         = "data/object/label_2";
800 |   // string result_dir     = "results/" + result_sha;
801 |   string plot_dir       = result_dir + "/plot";
802 | 
803 |   // create output directories
804 |   system(("mkdir " + plot_dir).c_str());
805 | 
806 |   // hold detections and ground truth in memory
807 |   vector< vector<tGroundtruth> > groundtruth;
808 |   vector< vector<tDetection> >   detections;
809 | 
810 |   // holds wether orientation similarity shall be computed (might be set to false while loading detections)
811 |   // and which labels where provided by this submission
812 |   bool compute_aos=true;
813 |   vector<bool> eval_image(NUM_CLASS, false);
814 |   vector<bool> eval_ground(NUM_CLASS, false);
815 |   vector<bool> eval_3d(NUM_CLASS, false);
816 | 
817 |   // for all images read groundtruth and detections
818 |   mail->msg("Loading detections...");
819 |   std::vector<int32_t> indices = getEvalIndices(result_dir + "/data/");
820 |   printf("number of files for evaluation: %d\n", (int)indices.size());
821 | 
822 |   for (int32_t i=0; i<indices.size(); i++) {
823 | 
824 |     // file name
825 |     char file_name[256];
826 |     sprintf(file_name,"%06d.txt",indices.at(i));
827 | 
828 |     // read ground truth and result poses
829 |     bool gt_success,det_success;
830 |     vector<tGroundtruth> gt   = loadGroundtruth(gt_dir + "/" + file_name,gt_success);
831 |     vector<tDetection>   det  = loadDetections(result_dir + "/data/" + file_name,
832 |             compute_aos, eval_image, eval_ground, eval_3d, det_success);
833 |     groundtruth.push_back(gt);
834 |     detections.push_back(det);
835 | 
836 |     // check for errors
837 |     if (!gt_success) {
838 |       mail->msg("ERROR: Couldn't read: %s of ground truth. Please write me an email!", file_name);
839 |       return false;
840 |     }
841 |     if (!det_success) {
842 |       mail->msg("ERROR: Couldn't read: %s", file_name);
843 |       return false;
844 |     }
845 |   }
846 |   mail->msg("  done.");
847 | 
848 |   // holds pointers for result files
849 |   FILE *fp_det=0, *fp_ori=0;
850 | 
851 |   // eval image 2D bounding boxes
852 |   printf("start to eval image 2D bounding boxes... \n");
853 |   for (int c = 0; c < NUM_CLASS; c++) {
854 |     CLASSES cls = (CLASSES)c;
855 |     if (eval_image[c]) {
856 |       fp_det = fopen((result_dir + "/stats_" + CLASS_NAMES[c] + "_detection.txt").c_str(), "w");
857 |       if(compute_aos)
858 |         fp_ori = fopen((result_dir + "/stats_" + CLASS_NAMES[c] + "_orientation.txt").c_str(),"w");
859 |       vector<double> precision[3], aos[3];
860 |       if(   !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, imageBoxOverlap, precision[0], aos[0], EASY, IMAGE)
861 |          || !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, imageBoxOverlap, precision[1], aos[1], MODERATE, IMAGE)
862 |          || !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, imageBoxOverlap, precision[2], aos[2], HARD, IMAGE)) {
863 |         mail->msg("%s evaluation failed.", CLASS_NAMES[c].c_str());
864 |         return false;
865 |       }
866 |       fclose(fp_det);
867 |       saveAndPlotPlots(plot_dir, CLASS_NAMES[c] + "_detection", CLASS_NAMES[c], precision, 0);
868 |       printf("2D bounding boxes detection saved ... \n");
869 |       if(compute_aos){
870 |         printf("  start to compute aos ... \n");
871 |         saveAndPlotPlots(plot_dir, CLASS_NAMES[c] + "_orientation", CLASS_NAMES[c], aos, 1);
872 |         fclose(fp_ori);
873 |         printf("2D bounding boxes aos saved ... \n");
874 |       }
875 |     }
876 |   }
877 | 
878 |   // don't evaluate AOS for birdview boxes and 3D boxes
879 |   compute_aos = false;
880 | 
881 |   // eval bird's eye view bounding boxes
882 |   printf("start to eval bird's eye view bounding boxes... \n");
883 |   for (int c = 0; c < NUM_CLASS; c++) {
884 |     CLASSES cls = (CLASSES)c;
885 |     if (eval_ground[c]) {
886 |       fp_det = fopen((result_dir + "/stats_" + CLASS_NAMES[c] + "_detection_ground.txt").c_str(), "w");
887 |       vector<double> precision[3], aos[3];
888 |       if(   !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, groundBoxOverlap, precision[0], aos[0], EASY, GROUND)
889 |          || !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, groundBoxOverlap, precision[1], aos[1], MODERATE, GROUND)
890 |          || !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, groundBoxOverlap, precision[2], aos[2], HARD, GROUND)) {
891 |         mail->msg("%s evaluation failed.", CLASS_NAMES[c].c_str());
892 |         return false;
893 |       }
894 |       fclose(fp_det);
895 |       saveAndPlotPlots(plot_dir, CLASS_NAMES[c] + "_detection_ground", CLASS_NAMES[c], precision, 0);
896 |     }
897 |   }
898 | 
899 |   // eval 3D bounding boxes
900 |   printf("start to eval 3D bounding boxes... \n");
901 |   for (int c = 0; c < NUM_CLASS; c++) {
902 |     CLASSES cls = (CLASSES)c;
903 |     if (eval_3d[c]) {
904 |       fp_det = fopen((result_dir + "/stats_" + CLASS_NAMES[c] + "_detection_3d.txt").c_str(), "w");
905 |       vector<double> precision[3], aos[3];
906 |       if(   !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, box3DOverlap, precision[0], aos[0], EASY, BOX3D)
907 |          || !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, box3DOverlap, precision[1], aos[1], MODERATE, BOX3D)
908 |          || !eval_class(fp_det, fp_ori, cls, groundtruth, detections, compute_aos, box3DOverlap, precision[2], aos[2], HARD, BOX3D)) {
909 |         mail->msg("%s evaluation failed.", CLASS_NAMES[c].c_str());
910 |         return false;
911 |       }
912 |       fclose(fp_det);
913 |       saveAndPlotPlots(plot_dir, CLASS_NAMES[c] + "_detection_3d", CLASS_NAMES[c], precision, 0);
914 |     }
915 |   }
916 | 
917 |   // success
918 |   return true;
919 | }
920 | 
921 | int32_t main (int32_t argc,char *argv[]) {
922 | 
923 |   // we need 2 or 4 arguments!
924 |   if (argc!=3) {
925 |     cout << "Usage: ./eval_detection_3d_offline gt_dir result_dir" << endl;
926 |     return 1;
927 |   }
928 | 
929 |   // read arguments
930 |   string gt_dir = argv[1];
931 |   string result_dir = argv[2];
932 | 
933 |   // init notification mail
934 |   Mail *mail;
935 |   mail = new Mail();
936 |   mail->msg("Thank you for participating in our evaluation!");
937 | 
938 |   // run evaluation
939 |   if (eval(gt_dir, result_dir, mail)) {
940 |     mail->msg("Your evaluation results are available at:");
941 |     mail->msg(result_dir.c_str());
942 |   } else {
943 |     system(("rm -r " + result_dir + "/plot").c_str());
944 |     mail->msg("An error occured while processing your results.");
945 |   }
946 | 
947 |   // send mail and exit
948 |   delete mail;
949 | 
950 |   return 0;
951 | }
952 | 
953 | 
954 | 
955 | 
956 | 
957 | 


--------------------------------------------------------------------------------