├── LICENSE ├── README.md ├── checkpoint.py ├── eval.py ├── models ├── ap_helper.py ├── backbone_module.py ├── backbone_module_scale.py ├── dump_helper.py ├── hdnet.py ├── hdnet_1bb.py ├── loss_helper.py ├── proposal_module_refine.py ├── proposal_module_surface.py └── voting_module.py ├── overview.jpg ├── pointnet2 ├── _ext_src │ ├── include │ │ ├── ball_query.h │ │ ├── cuda_utils.h │ │ ├── group_points.h │ │ ├── interpolate.h │ │ ├── sampling.h │ │ └── utils.h │ └── src │ │ ├── ball_query.cpp │ │ ├── ball_query_gpu.cu │ │ ├── bindings.cpp │ │ ├── group_points.cpp │ │ ├── group_points_gpu.cu │ │ ├── interpolate.cpp │ │ ├── interpolate_gpu.cu │ │ ├── sampling.cpp │ │ └── sampling_gpu.cu ├── pointnet2_modules.py ├── pointnet2_utils.py ├── pytorch_utils.py └── setup.py ├── scannet ├── meta_data │ ├── scannet_means.npz │ ├── scannet_means_v2.npz.npy │ ├── scannet_train.txt │ ├── scannetv2-labels.combined.tsv │ ├── scannetv2_test.txt │ ├── scannetv2_train.txt │ └── scannetv2_val.txt ├── model_util_scannet.py └── scannet_detection_dataset_hd.py ├── sunrgbd ├── model_util_sunrgbd.py ├── sunrgbd_detection_dataset_hd.py └── sunrgbd_utils.py ├── train.py ├── train_1bb.py └── utils ├── box_util.py ├── eval_det.py ├── metric_util.py ├── nms.py ├── nn_distance.py ├── pc_util.py ├── show_results_scannet.py ├── show_results_sunrgbd.py ├── tf_logger.py ├── tf_visualizer.py ├── utils.py └── viewpoint.json /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 zaiweizhang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # H3DNet: 3D Object Detection Using Hybrid Geometric Primitives 2 | Created by Zaiwei Zhang, Haitao Yang, Bo Sun and Qixing Huang. 3 | 4 | ![overview](overview.jpg) 5 | 6 | ## Introduction 7 | This repository is code release for our paper (arXiv report [here](https://arxiv.org/abs/2006.05682)). 8 | 9 | We introduce H3DNet, which takes a colorless 3D point cloud as input and outputs a collection of oriented object bounding boxes (or BB) and their semantic labels. The critical idea of H3DNet is to predict a hybrid set of geometric primitives, i.e., BB centers, BB face centers, and BB edge centers. We show how to convert the predicted geometric primitives into object proposals by defining a distance function between an object and the geometric primitives. This distance function enables continuous optimization of object proposals, and its local minimums provide high-fidelity object proposals. H3DNet then utilizes a matching and refinement module to classify object proposals into detected objects and fine-tune the geometric parameters of the detected objects. The hybrid set of geometric primitives not only provides more accurate signals for object detection than using a single type of geometric primitives, but it also provides an overcomplete set of constraints on the resulting 3D layout. Therefore, H3DNet can tolerate outliers in predicted geometric primitives. Our model achieves state-of-the-art 3D detection results, with only pointclouds input, on two large datasets with real 3D scans, ScanNet and SUN RGB-D. 10 | 11 | In this repository, we provide H3DNet model implementation (with Pytorch) as well as data preparation, training and evaluation scripts on SUN RGB-D and ScanNet. Since our model is built on VoteNet, we borrowed a lot of codes from their codebase. 12 | 13 | ## Installation 14 | 15 | Since we are built on top of VoteNet, we require similar packages before using our code. Install [Pytorch](https://pytorch.org/get-started/locally/) and [Tensorflow](https://github.com/tensorflow/tensorflow) (for TensorBoard). It is required that you have access to GPUs. Matlab is required to prepare data for SUN RGB-D. The code is tested with Ubuntu 18.04, Pytorch v1.1, TensorFlow v1.14, CUDA 10.0 and cuDNN v7.4. 16 | 17 | Compile the CUDA layers for [PointNet++](http://arxiv.org/abs/1706.02413), which we used in the backbone network: 18 | 19 | cd pointnet2 20 | python setup.py install 21 | 22 | Install the following Python dependencies (with `pip install`): 23 | 24 | numpy 25 | matplotlib 26 | scipy 27 | sklearn 28 | opencv-python 29 | plyfile 30 | pytorch=1.1.0 31 | tensorflow-gpu==1.12.0 (only for visualization) 32 | 'trimesh>=2.35.39,<2.35.40' 33 | 'networkx>=2.2,<2.3' 34 | 35 | ## Training and evaluating 36 | 37 | ### Data preparation 38 | 39 | For data preparation, we share the same data pre-processing steps with VoteNet. We provide the processed training and testing data for SUN RGB-D [here](https://drive.google.com/file/d/1uwoi34N43jfreZooG-SuYhG5mdAsSHvK/view?usp=sharing), and for ScanNet [here](https://drive.google.com/file/d/1WtzsQBqU9rxc3tsa4kooRU_DbhRpuIyb/view?usp=sharing). 40 | 41 | ### Train and test on SUN RGB-D 42 | 43 | To train a new H3DNet model on SUN RGB-D data (depth images): 44 | 45 | python train.py --data_path path/to/sunrgbd --dataset sunrgbd --log_dir log_sunrgbd --num_point 40000 --model hdnet --batch_size 16 46 | 47 | In order to train in batch_size 16, you will have to use at least 3/4 GPUs. You can use `CUDA_VISIBLE_DEVICES=0,1,2` to specify which GPU(s) to use. Without specifying CUDA devices, the training will use all the available GPUs and train with data parallel. 48 | While training you can check the `log_sunrgbd/log_train.txt` file on its progress, or use the TensorBoard to see loss curves. 49 | 50 | To run H3DNet with one backbone (less memory): 51 | 52 | python train_1bb.py --data_path path/to/sunrgbd --dataset sunrgbd --log_dir log_sunrgbd --num_point 40000 --model hdnet_1bb --batch_size 16 53 | 54 | You can set the pretrained-weight using --pre_checkpoint_path flag. You can use the pretrained weight in [here](https://github.com/facebookresearch/DepthContrast). Please set the scale of the backbone using --scale accordingly. Use pretrained weight with scale 3 should achieve around 63.5 mAP@0.25. 55 | 56 | To test the trained model with its checkpoint: 57 | 58 | python eval.py --data_path path/to/sunrgbd --dataset sunrgbd --model hdnet --checkpoint_path path/to/checkpoint --dump_dir eval_sunrgbd --cluster_sampling seed_fps --use_3d_nms --use_cls_nms --per_class_proposal 59 | 60 | Example results will be dumped in the `eval_sunrgbd` folder (or any other folder you specify). You can run `python eval.py -h` to see the full options for evaluation. After the evaluation, you can use MeshLab to visualize the predicted votes and 3D bounding boxes (select wireframe mode to view the boxes). Final evaluation results will be printed on screen and also written in the `log_eval.txt` file under the dump directory. In default we evaluate with both AP@0.25 and AP@0.5 with 3D IoU on oriented boxes. A properly trained H3DNet should have around 60 mAP@0.25 and 39 mAP@0.5. 61 | 62 | ### Train and test on ScanNet 63 | 64 | To train a H3DNet model on Scannet data (fused scan): 65 | 66 | python train.py --data_path path/to/scannet_train_detection_data --dataset scannet --log_dir log_scannet --num_point 40000 --model hdnet --batch_size 8 67 | 68 | To run H3DNet with one backbone (less memory): 69 | 70 | python train_1bb.py --data_path path/to/scannet_train_detection_data --dataset scannet --log_dir log_scannet --num_point 40000 --model hdnet_1bb --batch_size 8 71 | 72 | It should provide 66 mAP@0.25 with training from scratch. You can set the pretrained-weight using --pre_checkpoint_path flag. You can use the pretrained weight in [here](https://github.com/facebookresearch/DepthContrast). Please set the scale of the backbone using --scale accordingly. Use pretrained weight with scale 3 should achieve around 69.0 mAP@0.25. 73 | 74 | To test the trained model with its checkpoint: 75 | 76 | python eval.py --data_path path/to/scannet_train_detection_data --dataset scannet --model hdnet --checkpoint_path path/to/checkpoint --dump_dir eval_scannet --num_point 40000 --cluster_sampling seed_fps --use_3d_nms --use_cls_nms --per_class_proposal 77 | 78 | Example results will be dumped in the `eval_scannet` folder (or any other folder you specify). In default we evaluate with both AP@0.25 and AP@0.5 with 3D IoU on axis aligned boxes. A properly trained H3DNet should have around 67 mAP@0.25 and 48 mAP@0.5. 79 | 80 | ### Visualize predictions and ground truths 81 | Visualization codes for ScanNet and SUN RGB-D are in `utils/show_results_scannet.py` and `utils/show_results_sunrgbd.py` saparately. 82 | 83 | Before running them, you should change the data paths in the beginning of each script. 84 | 85 | To visualize ground truth scenes and bounding boxes of ScanNet, run 86 | 87 | python show_results_scannet.py gt 88 | 89 | To visualize ground truth scenes and bounding boxes of ScanNet, run 90 | 91 | python show_results_scannet.py pred 92 | 93 | Usages for SUN RGB-D are just replacing scripts with args unchanged. 94 | ## License 95 | H3DNet is relased under the MIT License. See the LICENSE file for more details. 96 | -------------------------------------------------------------------------------- /checkpoint.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. 3 | # All rights reserved. 4 | # 5 | # This source code is licensed under the license found in the 6 | # LICENSE file in the root directory of this source tree. 7 | # 8 | import logging 9 | import os 10 | 11 | import torch 12 | 13 | def _print_state_dict_shapes(state_dict): 14 | logging.info("Model state_dict:") 15 | for param_tensor in state_dict.keys(): 16 | logging.info(f"{param_tensor}:\t{state_dict[param_tensor].size()}") 17 | 18 | def init_model_from_weights( 19 | model, 20 | state_dict, 21 | skip_layers=None, 22 | print_init_layers=True, 23 | ): 24 | """ 25 | Initialize the model from any given params file. This is particularly useful 26 | during the finetuning process or when we want to evaluate a model on a range 27 | of tasks. 28 | skip_layers: string : layer names with this key are not copied 29 | print_init_layers: print whether layer was init or ignored 30 | indicates whether the layername was copied or not 31 | """ 32 | # whether it's a model from somewhere else or a model from this codebase 33 | state_dict = state_dict["model"] 34 | 35 | all_layers = model.state_dict() 36 | init_layers = {layername: False for layername in all_layers} 37 | 38 | 39 | new_state_dict = {} 40 | for param_name in state_dict: 41 | if "module.trunk.0" not in param_name: 42 | continue 43 | param_data = param_name.split(".") 44 | newname = "backbone_net1" 45 | for i in range(len(param_data[3:])): 46 | newname += "."+param_data[i+3] 47 | new_state_dict[newname] = state_dict[param_name] 48 | state_dict = new_state_dict 49 | 50 | local_rank = int(os.environ.get("LOCAL_RANK", 0)) 51 | not_found, not_init = [], [] 52 | for layername in all_layers.keys(): 53 | if ( 54 | skip_layers and len(skip_layers) > 0 and layername.find(skip_layers) >= 0 55 | ) or layername.find("num_batches_tracked") >= 0: 56 | if print_init_layers and (local_rank == 0): 57 | not_init.append(layername) 58 | print(f"Ignored layer:\t{layername}") 59 | continue 60 | if layername in state_dict: 61 | param = state_dict[layername] 62 | if not isinstance(param, torch.Tensor): 63 | param = torch.from_numpy(param) 64 | all_layers[layername].copy_(param) 65 | init_layers[layername] = True 66 | if print_init_layers and (local_rank == 0): 67 | print(f"Init layer:\t{layername}") 68 | else: 69 | not_found.append(layername) 70 | if print_init_layers and (local_rank == 0): 71 | print(f"Not found:\t{layername}") 72 | ####################### DEBUG ############################ 73 | # _print_state_dict_shapes(model.state_dict()) 74 | return model 75 | -------------------------------------------------------------------------------- /eval.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | """ Evaluation routine for 3D object detection with SUN RGB-D and ScanNet. 7 | """ 8 | 9 | import os 10 | import sys 11 | import numpy as np 12 | from datetime import datetime 13 | import argparse 14 | import importlib 15 | import torch 16 | import torch.nn as nn 17 | import torch.optim as optim 18 | from torch.utils.data import DataLoader 19 | 20 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 21 | ROOT_DIR = BASE_DIR 22 | sys.path.append(os.path.join(ROOT_DIR, 'models')) 23 | from ap_helper import APCalculator, parse_predictions, parse_groundtruths 24 | from dump_helper import dump_results 25 | 26 | parser = argparse.ArgumentParser() 27 | parser.add_argument('--data_path', default='/scratch/cluster/yanght/Dataset/sunrgbd/', help='path to dataset') 28 | parser.add_argument('--model', default='hdnet', help='Model file name [default: hdnet]') 29 | parser.add_argument('--dataset', default='sunrgbd', help='Dataset name. sunrgbd or scannet. [default: sunrgbd]') 30 | parser.add_argument('--checkpoint_path', default=None, help='Model checkpoint path [default: None]') 31 | parser.add_argument('--dump_dir', default=None, help='Dump dir to save sample outputs [default: None]') 32 | parser.add_argument('--num_point', type=int, default=20000, help='Point Number [default: 20000]') 33 | parser.add_argument('--num_target', type=int, default=256, help='Point Number [default: 256]') 34 | parser.add_argument('--batch_size', type=int, default=8, help='Batch Size during training [default: 8]') 35 | parser.add_argument('--vote_factor', type=int, default=1, help='Number of votes generated from each seed [default: 1]') 36 | parser.add_argument('--cluster_sampling', default='vote_fps', help='Sampling strategy for vote clusters: vote_fps, seed_fps, random [default: vote_fps]') 37 | parser.add_argument('--ap_iou_thresh', type=float, default=0.25, help='AP IoU threshold [default: 0.25]') 38 | parser.add_argument('--no_height', action='store_true', help='Do NOT use height signal in input.') 39 | parser.add_argument('--use_color', action='store_true', help='Use RGB color in input.') 40 | parser.add_argument('--use_sunrgbd_v2', action='store_true', help='Use SUN RGB-D V2 box labels.') 41 | parser.add_argument('--use_3d_nms', action='store_true', help='Use 3D NMS instead of 2D NMS.') 42 | parser.add_argument('--use_cls_nms', action='store_true', help='Use per class NMS.') 43 | parser.add_argument('--use_old_type_nms', action='store_true', help='Use old type of NMS, IoBox2Area.') 44 | parser.add_argument('--per_class_proposal', action='store_true', help='Duplicate each proposal num_class times.') 45 | parser.add_argument('--nms_iou', type=float, default=0.25, help='NMS IoU threshold. [default: 0.25]') 46 | parser.add_argument('--conf_thresh', type=float, default=0.05, help='Filter out predictions with obj prob less than it. [default: 0.05]') 47 | parser.add_argument('--faster_eval', action='store_true', help='Faster evaluation by skippling empty bounding box removal.') 48 | parser.add_argument('--shuffle_dataset', action='store_true', help='Shuffle the dataset (random order).') 49 | parser.add_argument('--dump_results', action='store_true', help='Dump results.') 50 | 51 | FLAGS = parser.parse_args() 52 | 53 | if FLAGS.use_cls_nms: 54 | assert(FLAGS.use_3d_nms) 55 | 56 | # ------------------------------------------------------------------------- GLOBAL CONFIG BEG 57 | BATCH_SIZE = FLAGS.batch_size 58 | NUM_POINT = FLAGS.num_point 59 | DUMP_DIR = FLAGS.dump_dir 60 | CHECKPOINT_PATH = FLAGS.checkpoint_path 61 | assert(CHECKPOINT_PATH is not None) 62 | FLAGS.DUMP_DIR = DUMP_DIR 63 | 64 | 65 | # Prepare DUMP_DIR 66 | if not os.path.exists(DUMP_DIR): os.mkdir(DUMP_DIR) 67 | DUMP_FOUT = open(os.path.join(DUMP_DIR, 'log_eval.txt'), 'w') 68 | DUMP_FOUT.write(str(FLAGS)+'\n') 69 | def log_string(out_str): 70 | DUMP_FOUT.write(out_str+'\n') 71 | DUMP_FOUT.flush() 72 | print(out_str) 73 | 74 | # Init datasets and dataloaders 75 | def my_worker_init_fn(worker_id): 76 | np.random.seed(np.random.get_state()[1][0] + worker_id) 77 | 78 | if FLAGS.dataset == 'sunrgbd': 79 | sys.path.append(os.path.join(ROOT_DIR, 'sunrgbd')) 80 | from sunrgbd_detection_dataset_hd import SunrgbdDetectionVotesDataset, MAX_NUM_OBJ 81 | from model_util_sunrgbd import SunrgbdDatasetConfig 82 | DATASET_CONFIG = SunrgbdDatasetConfig() 83 | TEST_DATASET = SunrgbdDetectionVotesDataset(FLAGS.data_path, 'val', num_points=NUM_POINT, 84 | augment=False, use_color=FLAGS.use_color, use_height=(not FLAGS.no_height), 85 | use_v1=(not FLAGS.use_sunrgbd_v2)) 86 | elif FLAGS.dataset == 'scannet': 87 | sys.path.append(os.path.join(ROOT_DIR, 'scannet')) 88 | from scannet_detection_dataset_hd import ScannetDetectionDataset, MAX_NUM_OBJ 89 | from model_util_scannet import ScannetDatasetConfig 90 | DATASET_CONFIG = ScannetDatasetConfig() 91 | TEST_DATASET = ScannetDetectionDataset(FLAGS.data_path, 'val', num_points=NUM_POINT, 92 | augment=False, use_angle=False, 93 | use_color=FLAGS.use_color, use_height=(not FLAGS.no_height)) 94 | else: 95 | print('Unknown dataset %s. Exiting...'%(FLAGS.dataset)) 96 | exit(-1) 97 | print(len(TEST_DATASET)) 98 | TEST_DATALOADER = DataLoader(TEST_DATASET, batch_size=BATCH_SIZE, 99 | shuffle=FLAGS.shuffle_dataset, num_workers=4, worker_init_fn=my_worker_init_fn) 100 | 101 | # Init the model and optimzier 102 | MODEL = importlib.import_module(FLAGS.model) # import network module 103 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 104 | num_input_channel = int(FLAGS.use_color)*3 + int(not FLAGS.no_height)*1 105 | 106 | Detector = MODEL.HDNet 107 | 108 | net = Detector(num_class=DATASET_CONFIG.num_class, 109 | num_heading_bin=DATASET_CONFIG.num_heading_bin, 110 | num_size_cluster=DATASET_CONFIG.num_size_cluster, 111 | mean_size_arr=DATASET_CONFIG.mean_size_arr, 112 | num_proposal=FLAGS.num_target, 113 | input_feature_dim=num_input_channel, 114 | vote_factor=FLAGS.vote_factor, 115 | sampling=FLAGS.cluster_sampling) 116 | 117 | if torch.cuda.device_count() > 1: 118 | log_string("Let's use %d GPUs!" % (torch.cuda.device_count())) 119 | # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs 120 | net = nn.DataParallel(net) 121 | net.to(device) 122 | criterion = MODEL.get_loss 123 | 124 | # Load the Adam optimizer 125 | optimizer = optim.Adam(net.parameters(), lr=0.001) 126 | 127 | # Load checkpoint if there is any 128 | if CHECKPOINT_PATH is not None and os.path.isfile(CHECKPOINT_PATH): 129 | checkpoint = torch.load(CHECKPOINT_PATH) 130 | checkpoint_multigpu = dict() 131 | if torch.cuda.device_count() > 1: 132 | for name, param in checkpoint['model_state_dict'].items(): 133 | checkpoint_multigpu.update({'module.' + name: param}) 134 | net.load_state_dict(checkpoint_multigpu) 135 | else: 136 | net.load_state_dict(checkpoint['model_state_dict']) 137 | optimizer.load_state_dict(checkpoint['optimizer_state_dict']) 138 | epoch = checkpoint['epoch'] 139 | log_string("Loaded checkpoint %s (epoch: %d)"%(CHECKPOINT_PATH, epoch)) 140 | 141 | # Used for AP calculation 142 | CONFIG_DICT = {'remove_empty_box':False, 'use_3d_nms':True, 143 | 'nms_iou':0.25, 'use_old_type_nms':False, 'cls_nms':True, 144 | 'per_class_proposal': True, 'conf_thresh':0.05, 145 | 'dataset_config':DATASET_CONFIG} 146 | 147 | CONFIG_DICT_L = {'remove_empty_box':False, 'use_3d_nms':True, 148 | 'nms_iou':0.5, 'use_old_type_nms':False, 'cls_nms':True, 149 | 'per_class_proposal': True, 'conf_thresh':0.05, 150 | 'dataset_config':DATASET_CONFIG} 151 | 152 | # ------------------------------------------------------------------------- GLOBAL CONFIG END 153 | 154 | def evaluate_one_epoch(): 155 | stat_dict = {} 156 | 157 | ap_calculator = APCalculator(ap_iou_thresh=FLAGS.ap_iou_thresh, 158 | class2type_map=DATASET_CONFIG.class2type) 159 | ap_calculator_l = APCalculator(ap_iou_thresh=FLAGS.ap_iou_thresh*2, 160 | class2type_map=DATASET_CONFIG.class2type) 161 | 162 | net.eval() # set model to eval mode (for bn and dp) 163 | for batch_idx, batch_data_label in enumerate(TEST_DATALOADER): 164 | end_points = {} 165 | if batch_idx % 10 == 0: 166 | print('Eval batch: %d'%(batch_idx)) 167 | for key in batch_data_label: 168 | batch_data_label[key] = batch_data_label[key].to(device) 169 | 170 | # Forward pass 171 | inputs = {'point_clouds': batch_data_label['point_clouds']} 172 | with torch.no_grad(): 173 | end_points = net(inputs, end_points) 174 | 175 | # Compute loss 176 | for key in batch_data_label: 177 | end_points[key] = batch_data_label[key] 178 | loss, end_points = criterion(inputs, end_points, DATASET_CONFIG) 179 | 180 | # Accumulate statistics and print out 181 | for key in end_points: 182 | if 'loss' in key or 'acc' in key or 'ratio' in key: 183 | if key not in stat_dict: stat_dict[key] = 0 184 | stat_dict[key] += end_points[key].item() 185 | 186 | batch_pred_map_cls = parse_predictions(end_points, CONFIG_DICT, opt_ang=(FLAGS.dataset == 'sunrgbd')) 187 | batch_gt_map_cls = parse_groundtruths(end_points, CONFIG_DICT) 188 | ap_calculator.step(batch_pred_map_cls, batch_gt_map_cls) 189 | 190 | batch_pred_map_cls = parse_predictions(end_points, CONFIG_DICT_L, opt_ang=(FLAGS.dataset == 'sunrgbd')) 191 | batch_gt_map_cls = parse_groundtruths(end_points, CONFIG_DICT_L) 192 | ap_calculator_l.step(batch_pred_map_cls, batch_gt_map_cls) 193 | 194 | if FLAGS.dump_results: 195 | dump_results(end_points, DUMP_DIR+'/result/', DATASET_CONFIG, TEST_DATASET, opt_ang=(FLAGS.dataset == 'sunrgbd')) 196 | 197 | 198 | # Log statistics 199 | for key in sorted(stat_dict.keys()): 200 | log_string('eval mean %s: %f'%(key, stat_dict[key]/(float(batch_idx+1)))) 201 | 202 | metrics_dict = ap_calculator.compute_metrics() 203 | for key in metrics_dict: 204 | log_string('iou = 0.25, eval %s: %f'%(key, metrics_dict[key])) 205 | metrics_dict = ap_calculator_l.compute_metrics() 206 | for key in metrics_dict: 207 | log_string('iou = 0.5, eval %s: %f'%(key, metrics_dict[key])) 208 | 209 | mean_loss = stat_dict['loss']/float(batch_idx+1) 210 | return mean_loss 211 | 212 | 213 | def eval(): 214 | log_string(str(datetime.now())) 215 | # Reset numpy seed. 216 | # REF: https://github.com/pytorch/pytorch/issues/5059 217 | np.random.seed() 218 | loss = evaluate_one_epoch() 219 | 220 | if __name__=='__main__': 221 | eval() 222 | -------------------------------------------------------------------------------- /models/ap_helper.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | """ Helper functions and class to calculate Average Precisions for 3D object detection. 7 | """ 8 | import os 9 | import sys 10 | import numpy as np 11 | import torch 12 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 13 | ROOT_DIR = os.path.dirname(BASE_DIR) 14 | sys.path.append(os.path.join(ROOT_DIR, 'utils')) 15 | from eval_det import eval_det_cls, eval_det_multiprocessing 16 | from eval_det import get_iou_obb 17 | from nms import nms_2d_faster, nms_3d_faster, nms_3d_faster_samecls 18 | from box_util import get_3d_box 19 | sys.path.append(os.path.join(ROOT_DIR, 'sunrgbd')) 20 | from sunrgbd_utils import extract_pc_in_box3d 21 | 22 | def flip_axis_to_camera(pc): 23 | ''' Flip X-right,Y-forward,Z-up to X-right,Y-down,Z-forward 24 | Input and output are both (N,3) array 25 | ''' 26 | pc2 = np.copy(pc) 27 | pc2[...,[0,1,2]] = pc2[...,[0,2,1]] # cam X,Y,Z = depth X,-Z,Y 28 | pc2[...,1] *= -1 29 | return pc2 30 | 31 | def flip_axis_to_depth(pc): 32 | pc2 = np.copy(pc) 33 | pc2[...,[0,1,2]] = pc2[...,[0,2,1]] # depth X,Y,Z = cam X,Z,-Y 34 | pc2[...,2] *= -1 35 | return pc2 36 | 37 | def softmax(x): 38 | ''' Numpy function for softmax''' 39 | shape = x.shape 40 | probs = np.exp(x - np.max(x, axis=len(shape)-1, keepdims=True)) 41 | probs /= np.sum(probs, axis=len(shape)-1, keepdims=True) 42 | return probs 43 | 44 | def parse_predictions(end_points, config_dict, opt_ang=False, opt_sem=False): 45 | """ Parse predictions to OBB parameters and suppress overlapping boxes 46 | 47 | Args: 48 | end_points: dict 49 | {point_clouds, center, heading_scores, heading_residuals, 50 | size_scores, size_residuals, sem_cls_scores} 51 | config_dict: dict 52 | {dataset_config, remove_empty_box, use_3d_nms, nms_iou, 53 | use_old_type_nms, conf_thresh, per_class_proposal} 54 | 55 | Returns: 56 | batch_pred_map_cls: a list of len == batch size (BS) 57 | [pred_list_i], i = 0, 1, ..., BS-1 58 | where pred_list_i = [(pred_sem_cls, box_params, box_score)_j] 59 | where j = 0, ..., num of valid detections - 1 from sample input i 60 | """ 61 | 62 | pred_center = end_points['center'+'opt']# + end_points['center'+'opt'] # B,num_proposal,3 63 | 64 | if opt_ang: 65 | pred_heading_class = torch.argmax(end_points['heading_scores'+'center'], -1) # B,num_proposal 66 | pred_heading_residual = torch.gather(end_points['heading_residuals'+'opt'], 2, 67 | pred_heading_class.unsqueeze(-1)) # B,num_proposal,1 68 | else: 69 | pred_heading_class = torch.argmax(end_points['heading_scores'+'center'], -1) # B,num_proposal 70 | pred_heading_residual = torch.gather(end_points['heading_residuals'+'center'], 2, 71 | pred_heading_class.unsqueeze(-1)) # B,num_proposal,1 72 | pred_heading_residual.squeeze_(2) 73 | 74 | pred_size_class = torch.argmax(end_points['size_scores'+'center'], -1) # B,num_proposal 75 | pred_size_residual = torch.gather(end_points['size_residuals'+'opt'], 2, 76 | pred_size_class.unsqueeze(-1).unsqueeze(-1).repeat(1,1,1,3)) # B,num_proposal,1,3 77 | pred_size_residual.squeeze_(2) 78 | 79 | if opt_sem: 80 | pred_sem_cls = torch.argmax(end_points['sem_cls_scores'+'opt'], -1) # B,num_proposal 81 | sem_cls_probs = softmax(end_points['sem_cls_scores'+'opt'].detach().cpu().numpy()) # B,num_proposal,10 82 | else: 83 | pred_sem_cls = torch.argmax(end_points['sem_cls_scores'+'center'], -1) # B,num_proposal 84 | sem_cls_probs = softmax(end_points['sem_cls_scores'+'center'].detach().cpu().numpy()) # B,num_proposal,10 85 | pred_sem_cls_prob = np.max(sem_cls_probs,-1) # B,num_proposal 86 | 87 | num_proposal = pred_center.shape[1] 88 | # Since we operate in upright_depth coord for points, while util functions 89 | # assume upright_camera coord. 90 | bsize = pred_center.shape[0] 91 | pred_corners_3d_upright_camera = np.zeros((bsize, num_proposal, 8, 3)) 92 | pred_center_upright_camera = flip_axis_to_camera(pred_center.detach().cpu().numpy()) 93 | 94 | for i in range(bsize): 95 | for j in range(num_proposal): 96 | heading_angle = config_dict['dataset_config'].class2angle(\ 97 | pred_heading_class[i,j].detach().cpu().numpy(), pred_heading_residual[i,j].detach().cpu().numpy()) 98 | box_size = config_dict['dataset_config'].class2size(\ 99 | int(pred_size_class[i,j].detach().cpu().numpy()), pred_size_residual[i,j].detach().cpu().numpy()) 100 | corners_3d_upright_camera = get_3d_box(box_size, heading_angle, pred_center_upright_camera[i,j,:]) 101 | pred_corners_3d_upright_camera[i,j] = corners_3d_upright_camera 102 | 103 | K = pred_center.shape[1] # K==num_proposal 104 | nonempty_box_mask = np.ones((bsize, K)) 105 | 106 | if config_dict['remove_empty_box']: 107 | # ------------------------------------- 108 | # Remove predicted boxes without any point within them.. 109 | batch_pc = end_points['point_clouds'].cpu().numpy()[:,:,0:3] # B,N,3 110 | for i in range(bsize): 111 | pc = batch_pc[i,:,:] # (N,3) 112 | for j in range(K): 113 | box3d = pred_corners_3d_upright_camera[i,j,:,:] # (8,3) 114 | box3d = flip_axis_to_depth(box3d) 115 | pc_in_box,inds = extract_pc_in_box3d(pc, box3d) 116 | if len(pc_in_box) < 5: 117 | nonempty_box_mask[i,j] = 0 118 | # ------------------------------------- 119 | 120 | obj_logits = end_points['objectness_scores'+'opt'].detach().cpu().numpy() 121 | obj_prob = softmax(obj_logits)[:,:,1] # (B,K) 122 | 123 | if not config_dict['use_3d_nms']: 124 | # ---------- NMS input: pred_with_prob in (B,K,7) ----------- 125 | pred_mask = np.zeros((bsize, K)) 126 | for i in range(bsize): 127 | boxes_2d_with_prob = np.zeros((K,5)) 128 | for j in range(K): 129 | boxes_2d_with_prob[j,0] = np.min(pred_corners_3d_upright_camera[i,j,:,0]) 130 | boxes_2d_with_prob[j,2] = np.max(pred_corners_3d_upright_camera[i,j,:,0]) 131 | boxes_2d_with_prob[j,1] = np.min(pred_corners_3d_upright_camera[i,j,:,2]) 132 | boxes_2d_with_prob[j,3] = np.max(pred_corners_3d_upright_camera[i,j,:,2]) 133 | boxes_2d_with_prob[j,4] = obj_prob[i,j] 134 | nonempty_box_inds = np.where(nonempty_box_mask[i,:]==1)[0] 135 | pick = nms_2d_faster(boxes_2d_with_prob[nonempty_box_mask[i,:]==1,:], 136 | config_dict['nms_iou'], config_dict['use_old_type_nms']) 137 | assert(len(pick)>0) 138 | pred_mask[i, nonempty_box_inds[pick]] = 1 139 | end_points['pred_mask'] = pred_mask 140 | # ---------- NMS output: pred_mask in (B,K) ----------- 141 | elif config_dict['use_3d_nms'] and (not config_dict['cls_nms']): 142 | # ---------- NMS input: pred_with_prob in (B,K,7) ----------- 143 | pred_mask = np.zeros((bsize, K)) 144 | for i in range(bsize): 145 | boxes_3d_with_prob = np.zeros((K,7)) 146 | for j in range(K): 147 | boxes_3d_with_prob[j,0] = np.min(pred_corners_3d_upright_camera[i,j,:,0]) 148 | boxes_3d_with_prob[j,1] = np.min(pred_corners_3d_upright_camera[i,j,:,1]) 149 | boxes_3d_with_prob[j,2] = np.min(pred_corners_3d_upright_camera[i,j,:,2]) 150 | boxes_3d_with_prob[j,3] = np.max(pred_corners_3d_upright_camera[i,j,:,0]) 151 | boxes_3d_with_prob[j,4] = np.max(pred_corners_3d_upright_camera[i,j,:,1]) 152 | boxes_3d_with_prob[j,5] = np.max(pred_corners_3d_upright_camera[i,j,:,2]) 153 | boxes_3d_with_prob[j,6] = obj_prob[i,j] 154 | nonempty_box_inds = np.where(nonempty_box_mask[i,:]==1)[0] 155 | pick = nms_3d_faster(boxes_3d_with_prob[nonempty_box_mask[i,:]==1,:], 156 | config_dict['nms_iou'], config_dict['use_old_type_nms']) 157 | assert(len(pick)>0) 158 | pred_mask[i, nonempty_box_inds[pick]] = 1 159 | end_points['pred_mask'] = pred_mask 160 | # ---------- NMS output: pred_mask in (B,K) ----------- 161 | elif config_dict['use_3d_nms'] and config_dict['cls_nms']: 162 | # ---------- NMS input: pred_with_prob in (B,K,8) ----------- 163 | pred_mask = np.zeros((bsize, K)) 164 | for i in range(bsize): 165 | boxes_3d_with_prob = np.zeros((K,8)) 166 | for j in range(K): 167 | boxes_3d_with_prob[j,0] = np.min(pred_corners_3d_upright_camera[i,j,:,0]) 168 | boxes_3d_with_prob[j,1] = np.min(pred_corners_3d_upright_camera[i,j,:,1]) 169 | boxes_3d_with_prob[j,2] = np.min(pred_corners_3d_upright_camera[i,j,:,2]) 170 | boxes_3d_with_prob[j,3] = np.max(pred_corners_3d_upright_camera[i,j,:,0]) 171 | boxes_3d_with_prob[j,4] = np.max(pred_corners_3d_upright_camera[i,j,:,1]) 172 | boxes_3d_with_prob[j,5] = np.max(pred_corners_3d_upright_camera[i,j,:,2]) 173 | boxes_3d_with_prob[j,6] = obj_prob[i,j] 174 | boxes_3d_with_prob[j,7] = pred_sem_cls[i,j] # only suppress if the two boxes are of the same class!! 175 | nonempty_box_inds = np.where(nonempty_box_mask[i,:]==1)[0] 176 | pick = nms_3d_faster_samecls(boxes_3d_with_prob[nonempty_box_mask[i,:]==1,:], 177 | config_dict['nms_iou'], config_dict['use_old_type_nms']) 178 | assert(len(pick)>0) 179 | pred_mask[i, nonempty_box_inds[pick]] = 1 180 | end_points['pred_mask'] = pred_mask 181 | # ---------- NMS output: pred_mask in (B,K) ----------- 182 | 183 | batch_pred_map_cls = [] # a list (len: batch_size) of list (len: num of predictions per sample) of tuples of pred_cls, pred_box and conf (0-1) 184 | for i in range(bsize): 185 | if config_dict['per_class_proposal']: 186 | cur_list = [] 187 | for ii in range(config_dict['dataset_config'].num_class): 188 | cur_list += [(ii, pred_corners_3d_upright_camera[i,j], sem_cls_probs[i,j,ii]*obj_prob[i,j]) \ 189 | for j in range(pred_center.shape[1]) if pred_mask[i,j]==1 and obj_prob[i,j]>config_dict['conf_thresh']] 190 | batch_pred_map_cls.append(cur_list) 191 | else: 192 | batch_pred_map_cls.append([(pred_sem_cls[i,j].item(), pred_corners_3d_upright_camera[i,j], obj_prob[i,j]) \ 193 | for j in range(pred_center.shape[1]) if pred_mask[i,j]==1 and obj_prob[i,j]>config_dict['conf_thresh']]) 194 | end_points['batch_pred_map_cls'] = batch_pred_map_cls 195 | 196 | return batch_pred_map_cls 197 | 198 | def parse_groundtruths(end_points, config_dict): 199 | """ Parse groundtruth labels to OBB parameters. 200 | 201 | Args: 202 | end_points: dict 203 | {center_label, heading_class_label, heading_residual_label, 204 | size_class_label, size_residual_label, sem_cls_label, 205 | box_label_mask} 206 | config_dict: dict 207 | {dataset_config} 208 | 209 | Returns: 210 | batch_gt_map_cls: a list of len == batch_size (BS) 211 | [gt_list_i], i = 0, 1, ..., BS-1 212 | where gt_list_i = [(gt_sem_cls, gt_box_params)_j] 213 | where j = 0, ..., num of objects - 1 at sample input i 214 | """ 215 | center_label = end_points['center_label'] 216 | heading_class_label = end_points['heading_class_label'] 217 | heading_residual_label = end_points['heading_residual_label'] 218 | size_class_label = end_points['size_class_label'] 219 | size_residual_label = end_points['size_residual_label'] 220 | box_label_mask = end_points['box_label_mask'] 221 | sem_cls_label = end_points['sem_cls_label'] 222 | bsize = center_label.shape[0] 223 | 224 | K2 = center_label.shape[1] # K2==MAX_NUM_OBJ 225 | gt_corners_3d_upright_camera = np.zeros((bsize, K2, 8, 3)) 226 | gt_center_upright_camera = flip_axis_to_camera(center_label[:,:,0:3].detach().cpu().numpy()) 227 | for i in range(bsize): 228 | for j in range(K2): 229 | if box_label_mask[i,j] == 0: continue 230 | heading_angle = config_dict['dataset_config'].class2angle(heading_class_label[i,j].detach().cpu().numpy(), heading_residual_label[i,j].detach().cpu().numpy()) 231 | box_size = config_dict['dataset_config'].class2size(int(size_class_label[i,j].detach().cpu().numpy()), size_residual_label[i,j].detach().cpu().numpy()) 232 | corners_3d_upright_camera = get_3d_box(box_size, heading_angle, gt_center_upright_camera[i,j,:]) 233 | gt_corners_3d_upright_camera[i,j] = corners_3d_upright_camera 234 | 235 | batch_gt_map_cls = [] 236 | for i in range(bsize): 237 | batch_gt_map_cls.append([(sem_cls_label[i,j].item(), gt_corners_3d_upright_camera[i,j]) for j in range(gt_corners_3d_upright_camera.shape[1]) if box_label_mask[i,j]==1]) 238 | end_points['batch_gt_map_cls'] = batch_gt_map_cls 239 | 240 | return batch_gt_map_cls 241 | 242 | class APCalculator(object): 243 | ''' Calculating Average Precision ''' 244 | def __init__(self, ap_iou_thresh=0.25, class2type_map=None): 245 | """ 246 | Args: 247 | ap_iou_thresh: float between 0 and 1.0 248 | IoU threshold to judge whether a prediction is positive. 249 | class2type_map: [optional] dict {class_int:class_name} 250 | """ 251 | self.ap_iou_thresh = ap_iou_thresh 252 | self.class2type_map = class2type_map 253 | self.reset() 254 | 255 | def step(self, batch_pred_map_cls, batch_gt_map_cls): 256 | """ Accumulate one batch of prediction and groundtruth. 257 | 258 | Args: 259 | batch_pred_map_cls: a list of lists [[(pred_cls, pred_box_params, score),...],...] 260 | batch_gt_map_cls: a list of lists [[(gt_cls, gt_box_params),...],...] 261 | should have the same length with batch_pred_map_cls (batch_size) 262 | """ 263 | 264 | bsize = len(batch_pred_map_cls) 265 | assert(bsize == len(batch_gt_map_cls)) 266 | for i in range(bsize): 267 | self.gt_map_cls[self.scan_cnt] = batch_gt_map_cls[i] 268 | self.pred_map_cls[self.scan_cnt] = batch_pred_map_cls[i] 269 | self.scan_cnt += 1 270 | 271 | def compute_metrics(self): 272 | """ Use accumulated predictions and groundtruths to compute Average Precision. 273 | """ 274 | rec, prec, ap = eval_det_multiprocessing(self.pred_map_cls, self.gt_map_cls, ovthresh=self.ap_iou_thresh, get_iou_func=get_iou_obb) 275 | ret_dict = {} 276 | for key in sorted(ap.keys()): 277 | clsname = self.class2type_map[key] if self.class2type_map else str(key) 278 | ret_dict['%s Average Precision'%(clsname)] = ap[key] 279 | ret_dict['mAP'] = np.mean(list(ap.values())) 280 | rec_list = [] 281 | for key in sorted(ap.keys()): 282 | clsname = self.class2type_map[key] if self.class2type_map else str(key) 283 | try: 284 | ret_dict['%s Recall'%(clsname)] = rec[key][-1] 285 | rec_list.append(rec[key][-1]) 286 | except: 287 | ret_dict['%s Recall'%(clsname)] = 0 288 | rec_list.append(0) 289 | ret_dict['AR'] = np.mean(rec_list) 290 | return ret_dict 291 | 292 | def reset(self): 293 | self.gt_map_cls = {} # {scan_id: [(classname, bbox)]} 294 | self.pred_map_cls = {} # {scan_id: [(classname, bbox, score)]} 295 | self.scan_cnt = 0 296 | -------------------------------------------------------------------------------- /models/dump_helper.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import numpy as np 7 | import torch 8 | import os 9 | import sys 10 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 11 | ROOT_DIR = os.path.dirname(BASE_DIR) 12 | sys.path.append(os.path.join(ROOT_DIR, 'utils')) 13 | import pc_util 14 | import scipy.io as sio 15 | import scipy 16 | 17 | DUMP_CONF_THRESH = 0.5 # Dump boxes with obj prob larger than that. 18 | 19 | def params2bbox(center, xsize, ysize, zsize, angle): 20 | ''' from bbox_center, angle and size to bbox 21 | @Args: 22 | center: (3) 23 | x/y/zsize: scalar 24 | angle: -pi ~ pi 25 | @Returns: 26 | bbox: 8 x 3, order: 27 | [[xmin, ymin, zmin], [xmin, ymin, zmax], [xmin, ymax, zmin], [xmin, ymax, zmax], 28 | [xmax, ymin, zmin], [xmax, ymin, zmax], [xmax, ymax, zmin], [xmax, ymax, zmax]] 29 | ''' 30 | vx = np.array([np.cos(angle), np.sin(angle), 0]) 31 | vy = np.array([-np.sin(angle), np.cos(angle), 0]) 32 | vx = vx * np.abs(xsize) / 2 33 | vy = vy * np.abs(ysize) / 2 34 | vz = np.array([0, 0, np.abs(zsize) / 2]) 35 | bbox = np.array([\ 36 | center - vx - vy - vz, center - vx - vy + vz, 37 | center - vx + vy - vz, center - vx + vy + vz, 38 | center + vx - vy - vz, center + vx - vy + vz, 39 | center + vx + vy - vz, center + vx + vy + vz]) 40 | return bbox 41 | 42 | def softmax(x): 43 | ''' Numpy function for softmax''' 44 | shape = x.shape 45 | probs = np.exp(x - np.max(x, axis=len(shape)-1, keepdims=True)) 46 | probs /= np.sum(probs, axis=len(shape)-1, keepdims=True) 47 | return probs 48 | 49 | 50 | DUMP_CONF_THRESH = 0.5 # Dump boxes with obj prob larger than that. 51 | 52 | def softmax(x): 53 | ''' Numpy function for softmax''' 54 | shape = x.shape 55 | probs = np.exp(x - np.max(x, axis=len(shape)-1, keepdims=True)) 56 | probs /= np.sum(probs, axis=len(shape)-1, keepdims=True) 57 | return probs 58 | 59 | def dump_results(end_points, dump_dir, config, dataset, opt_ang, mode='opt'): 60 | ''' 61 | similar to dump results 62 | scan_names: all scan names 63 | ''' 64 | if not os.path.exists(dump_dir): 65 | os.system('mkdir %s'%(dump_dir)) 66 | 67 | # INPUT 68 | point_clouds = end_points['point_clouds'].cpu().numpy() 69 | batch_size = point_clouds.shape[0] 70 | 71 | # NETWORK OUTPUTS 72 | seed_xyz_z = end_points['seed_xyz'].detach().cpu().numpy() # (B,num_seed,3) 73 | seed_xyz_xy = end_points['seed_xyz'].detach().cpu().numpy() # (B,num_seed,3) 74 | seed_xyz_line = end_points['seed_xyz'].detach().cpu().numpy() # (B,num_seed,3) 75 | 76 | gt_center = end_points['center_label'].cpu().numpy() # (B,MAX_NUM_OBJ,3) 77 | gt_num = end_points['num_instance'].cpu().numpy() # (B,MAX_NUM_OBJ,3) 78 | scan_idxes = end_points['scan_idx'].detach().cpu().numpy() 79 | 80 | pred_center = end_points['vote_xyz'].detach().cpu().numpy() 81 | 82 | aggregated_vote_xyz = end_points['aggregated_vote_xyz'+mode].detach().cpu().numpy() 83 | objectness_scores = end_points['objectness_scores'+mode].detach().cpu().numpy() # (B,K,2) 84 | pred_center = end_points['center'+mode].detach().cpu().numpy() # (B,K,3) 85 | 86 | pred_heading_class = torch.argmax(end_points['heading_scores'+'center'], -1) # B,num_proposal 87 | if opt_ang: 88 | pred_heading_residual = torch.gather(end_points['heading_residuals'+'opt'], 2, pred_heading_class.unsqueeze(-1)) # B,num_proposal,1 89 | else: 90 | pred_heading_residual = torch.gather(end_points['heading_residuals'+'center'], 2, pred_heading_class.unsqueeze(-1)) # B,num_proposal,1 91 | 92 | pred_heading_class = pred_heading_class.detach().cpu().numpy() # B,num_proposal 93 | pred_heading_residual = pred_heading_residual.squeeze(2).detach().cpu().numpy() # B,num_proposal 94 | 95 | pred_size_class = torch.argmax(end_points['size_scores'+'center'], -1) # B,num_proposal 96 | pred_size_residual = torch.gather(end_points['size_residuals'+mode], 2, pred_size_class.unsqueeze(-1).unsqueeze(-1).repeat(1,1,1,3)) # B,num_proposal,1,3 97 | pred_size_residual = pred_size_residual.squeeze(2).detach().cpu().numpy() # B,num_proposal,3 98 | pred_sem_cls = torch.argmax(end_points['sem_cls_scores'+'center'], -1) # B, num_proposal 99 | pred_sem_cls = pred_sem_cls.detach().cpu().numpy() 100 | 101 | pred_mask = end_points['pred_mask'] # B,num_proposal 102 | 103 | # LABELS 104 | gt_center = end_points['center_label'].cpu().numpy() # (B,MAX_NUM_OBJ,3) 105 | gt_mask = end_points['box_label_mask'].cpu().numpy() # B,K2 106 | gt_heading_class = end_points['heading_class_label'].cpu().numpy() # B,K2 107 | gt_heading_residual = end_points['heading_residual_label'].cpu().numpy() # B,K2 108 | gt_size_class = end_points['size_class_label'].cpu().numpy() # B,K2 109 | gt_size_residual = end_points['size_residual_label'].cpu().numpy() # B,K2,3 110 | objectness_label = end_points['objectness_label'+mode].detach().cpu().numpy() # (B,K,) 111 | objectness_mask = end_points['objectness_mask'+mode].detach().cpu().numpy() # (B,K,) 112 | sem_cls_label = end_points['sem_cls_label'].detach().cpu().numpy() 113 | 114 | ### Boundary points 115 | boundary_gt_z = end_points['sub_point_sem_cls_label'+'_z'].detach().cpu().numpy() 116 | boundary_pred_z = end_points['pred_flag'+'_z'].detach().cpu().numpy() 117 | boundary_gt_xy = end_points['sub_point_sem_cls_label'+'_xy'].detach().cpu().numpy() 118 | boundary_pred_xy = end_points['pred_flag'+'_xy'].detach().cpu().numpy() 119 | boundary_gt_line = end_points['sub_point_sem_cls_label'+'_line'].detach().cpu().numpy() 120 | boundary_pred_line = end_points['pred_flag'+'_line'].detach().cpu().numpy() 121 | 122 | gt_center_z = end_points['surface_center_gt_z'].detach().cpu().numpy() 123 | gt_sem_z = end_points['surface_sem_gt_z'].detach().cpu().numpy() 124 | gt_mask_z = end_points['surface_mask_gt_z'].detach().cpu().numpy() 125 | 126 | gt_center_xy = end_points['surface_center_gt_xy'].detach().cpu().numpy() 127 | gt_sem_xy = end_points['surface_sem_gt_xy'].detach().cpu().numpy() 128 | gt_mask_xy = end_points['surface_mask_gt_xy'].detach().cpu().numpy() 129 | 130 | gt_center_line = end_points['surface_center_gt_line'].detach().cpu().numpy() 131 | gt_sem_line = end_points['surface_sem_gt_line'].detach().cpu().numpy() 132 | gt_mask_line = end_points['surface_mask_gt_line'].detach().cpu().numpy() 133 | 134 | pred_center_z = end_points['center_z'].detach().cpu().numpy() 135 | pred_center_xy = end_points['center_xy'].detach().cpu().numpy() 136 | pred_center_line = end_points['center_line'].detach().cpu().numpy() 137 | 138 | pred_size_z = end_points['size_residuals_z'].detach().cpu().numpy() 139 | pred_size_xy = end_points['size_residuals_xy'].detach().cpu().numpy() 140 | 141 | pred_sem_z = end_points['sem_cls_scores_z'].detach().cpu().numpy() 142 | pred_sem_xy = end_points['sem_cls_scores_xy'].detach().cpu().numpy() 143 | pred_sem_line = end_points['sem_cls_scores_line'].detach().cpu().numpy() 144 | 145 | num_proposal = pred_center.shape[1] 146 | for i in range(batch_size): 147 | idx = scan_idxes[i] 148 | scan = dataset.scan_names[idx] 149 | print('-' * 30) 150 | print(scan) 151 | print('-' * 30) 152 | 153 | box_pred_list = [] 154 | box_gt_list = [] 155 | obb_pred_list = [] 156 | obb_gt_list = [] 157 | 158 | for j in range(num_proposal): 159 | obb = config.param2obb2(pred_center[i,j,0:3], pred_heading_class[i,j], pred_heading_residual[i,j], 160 | pred_size_class[i,j], pred_size_residual[i,j]) 161 | obb_pred_list.append(np.hstack([obb, pred_sem_cls[i, j] + 1])) # ATTENTION: need to + 1 162 | box = params2bbox(obb[:3], obb[3], obb[4], obb[5], obb[6]) 163 | box_pred_list.append(box) 164 | obb_pred_mat = np.array(obb_pred_list) 165 | 166 | for j in range(gt_center.shape[1]): 167 | if gt_mask[i, j] == 0: continue 168 | obb = config.param2obb2(gt_center[i,j,0:3], gt_heading_class[i,j], gt_heading_residual[i,j], 169 | gt_size_class[i,j], gt_size_residual[i,j]) 170 | obb_gt_list.append(np.hstack([obb, sem_cls_label[i, j] + 1])) # ATTENTION: need to + 1 171 | box = params2bbox(obb[:3], obb[3], obb[4], obb[5], obb[6]) 172 | box_gt_list.append(box) 173 | obb_gt_mat = np.array(obb_gt_list) 174 | 175 | scipy.io.savemat(dump_dir + mode + scan + '_gt.mat', {'gt': obb_gt_mat}) 176 | scipy.io.savemat(dump_dir + mode + scan + '_boundary_z.mat', {'gt': boundary_gt_z[i,...], 'pred': boundary_pred_z[i,...], 'origpc': point_clouds[i,...], 'seedpc': seed_xyz_z[i,...], 'gt_center': gt_center_z[i,...], 'gt_sem': gt_sem_z[i,...], 'gt_mask': gt_mask_z[i,...], 'pred_center': pred_center_z[i,...], 'pred_sem': pred_sem_z[i,...], 'pred_size': pred_size_z[i,...]}) 177 | scipy.io.savemat(dump_dir + mode + scan + '_boundary_xy.mat', {'gt': boundary_gt_xy[i,...], 'pred': boundary_pred_xy[i,...], 'origpc': point_clouds[i,...], 'seedpc': seed_xyz_xy[i,...], 'gt_center': gt_center_xy[i,...], 'gt_sem': gt_sem_xy[i,...], 'gt_mask': gt_mask_xy[i,...], 'pred_center': pred_center_xy[i,...], 'pred_sem': pred_sem_xy[i,...], 'pred_size': pred_size_xy[i,...]}) 178 | scipy.io.savemat(dump_dir + mode + scan + '_boundary_line.mat', {'gt': boundary_gt_line[i,...], 'pred': boundary_pred_line[i,...], 'origpc': point_clouds[i,...], 'seedpc': seed_xyz_line[i,...], 'gt_center': gt_center_line[i,...], 'gt_sem': gt_sem_line[i,...], 'gt_mask': gt_mask_line[i,...], 'pred_center': pred_center_line[i,...], 'pred_sem': pred_sem_line[i,...]}) 179 | 180 | # uncomment to visualize 181 | # Dump predicted bounding boxes 182 | objectness_prob = softmax(objectness_scores[i,:,:])[:,1] # (K,) 183 | select_idx = np.logical_and(objectness_prob>DUMP_CONF_THRESH, pred_mask[i,:]==1) 184 | box_pred_nms_list = [] 185 | obb_pred_nms_list = [] 186 | for i, val in enumerate(select_idx.tolist()): 187 | if val: 188 | box_pred_nms_list.append(box_pred_list[i]) 189 | obb_pred_nms_list.append(obb_pred_list[i]) 190 | 191 | votenet_pred_nms_arr = np.array(obb_pred_nms_list) 192 | np.save(dump_dir + mode + scan + '_nms.npy', votenet_pred_nms_arr) 193 | -------------------------------------------------------------------------------- /models/hdnet.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | """ Deep hough voting network for 3D object detection in point clouds. 7 | 8 | Author: Charles R. Qi and Or Litany 9 | """ 10 | 11 | import torch 12 | import torch.nn as nn 13 | import torch.nn.functional as F 14 | import numpy as np 15 | import sys 16 | import os 17 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 18 | ROOT_DIR = os.path.dirname(BASE_DIR) 19 | sys.path.append(BASE_DIR) 20 | sys.path.append(os.path.join(ROOT_DIR, 'utils')) 21 | import pc_util 22 | 23 | from backbone_module import Pointnet2Backbone 24 | from voting_module import VotingModule 25 | 26 | from proposal_module_refine import ProposalModuleRefine 27 | from proposal_module_surface import PrimitiveModule 28 | 29 | from dump_helper import dump_results 30 | from loss_helper import get_loss 31 | 32 | class HDNet(nn.Module): 33 | r""" 34 | A deep neural network for 3D object detection with end-to-end optimizable hough voting. 35 | 36 | Parameters 37 | ---------- 38 | num_class: int 39 | Number of semantics classes to predict over -- size of softmax classifier 40 | num_heading_bin: int 41 | num_size_cluster: int 42 | input_feature_dim: (default: 0) 43 | Input dim in the feature descriptor for each point. If the point cloud is Nx9, this 44 | value should be 6 as in an Nx9 point cloud, 3 of the channels are xyz, and 6 are feature descriptors 45 | num_proposal: int (default: 128) 46 | Number of proposals/detections generated from the network. Each proposal is a 3D OBB with a semantic class. 47 | vote_factor: (default: 1) 48 | Number of votes generated from each seed point. 49 | """ 50 | 51 | def __init__(self, num_class, num_heading_bin, num_size_cluster, mean_size_arr, 52 | input_feature_dim=0, num_proposal=128, vote_factor=1, sampling='vote_fps', with_angle=False): 53 | super().__init__() 54 | 55 | self.num_class = num_class 56 | self.num_heading_bin = num_heading_bin 57 | self.num_size_cluster = num_size_cluster 58 | self.mean_size_arr = mean_size_arr 59 | self.input_feature_dim = input_feature_dim 60 | self.num_proposal = num_proposal 61 | self.vote_factor = vote_factor 62 | self.sampling=sampling 63 | 64 | # Backbone point feature learning: 4 bb tower 65 | self.backbone_net1 = Pointnet2Backbone(input_feature_dim=self.input_feature_dim) ### Just xyz + height 66 | self.backbone_net2 = Pointnet2Backbone(input_feature_dim=self.input_feature_dim) ### Just xyz + height 67 | self.backbone_net3 = Pointnet2Backbone(input_feature_dim=self.input_feature_dim) ### Just xyz + height 68 | self.backbone_net4 = Pointnet2Backbone(input_feature_dim=self.input_feature_dim) ### Just xyz + height 69 | 70 | ### Feature concatenation 71 | self.conv_agg1 = torch.nn.Conv1d(256*4,256*2,1) 72 | self.bn_agg1 = torch.nn.BatchNorm1d(256*2) 73 | self.conv_agg2 = torch.nn.Conv1d(256*2,256,1) 74 | self.bn_agg2 = torch.nn.BatchNorm1d(256) 75 | 76 | ### Existence flag prediction 77 | self.conv_flag_z1 = torch.nn.Conv1d(256,128,1) 78 | self.bn_flag_z1 = torch.nn.BatchNorm1d(128) 79 | self.conv_flag_z2 = torch.nn.Conv1d(128,2,1) 80 | 81 | self.conv_flag_xy1 = torch.nn.Conv1d(256,128,1) 82 | self.bn_flag_xy1 = torch.nn.BatchNorm1d(128) 83 | self.conv_flag_xy2 = torch.nn.Conv1d(128,2,1) 84 | 85 | self.conv_flag_line1 = torch.nn.Conv1d(256,128,1) 86 | self.bn_flag_line1 = torch.nn.BatchNorm1d(128) 87 | self.conv_flag_line2 = torch.nn.Conv1d(128,2,1) 88 | 89 | # Hough voting and clustering 90 | self.vgen = VotingModule(self.vote_factor, 256) 91 | self.vgen_z = VotingModule(self.vote_factor, 256) 92 | self.vgen_xy = VotingModule(self.vote_factor, 256) 93 | self.vgen_line = VotingModule(self.vote_factor, 256) 94 | 95 | # Vote aggregation and detection 96 | self.pnet_z = PrimitiveModule(num_class, num_heading_bin, num_size_cluster, 97 | mean_size_arr, num_proposal, sampling, seed_feat_dim=256, numd=2) 98 | self.pnet_xy = PrimitiveModule(num_class, num_heading_bin, num_size_cluster, 99 | mean_size_arr, num_proposal, sampling, seed_feat_dim=256, numd=1) 100 | self.pnet_line = PrimitiveModule(num_class, num_heading_bin, num_size_cluster, 101 | mean_size_arr, num_proposal, sampling, seed_feat_dim=256, numd=0) 102 | 103 | self.pnet_final = ProposalModuleRefine(num_class, num_heading_bin, num_size_cluster, 104 | mean_size_arr, num_proposal, sampling, seed_feat_dim=256, with_angle=with_angle) 105 | 106 | def forward(self, inputs, end_points, mode=""): 107 | """ Forward pass of the network 108 | 109 | Args: 110 | inputs: dict 111 | {point_clouds} 112 | 113 | point_clouds: Variable(torch.cuda.FloatTensor) 114 | (B, N, 3 + input_channels) tensor 115 | Point cloud to run predicts on 116 | Each point in the point-cloud MUST 117 | be formated as (x, y, z, features...) 118 | Returns: 119 | end_points: dict 120 | """ 121 | batch_size = inputs['point_clouds'].shape[0] 122 | 123 | end_points = self.backbone_net1(inputs['point_clouds'], end_points) 124 | end_points = self.backbone_net2(inputs['point_clouds'], end_points, mode='net1') 125 | end_points = self.backbone_net3(inputs['point_clouds'], end_points, mode='net2') 126 | end_points = self.backbone_net4(inputs['point_clouds'], end_points, mode='net3') 127 | 128 | ### Extract feature here 129 | xyz = end_points['fp2_xyz'] 130 | features1 = end_points['fp2_features'] 131 | features2 = end_points['fp2_features'+'net1'] 132 | features3 = end_points['fp2_features'+'net2'] 133 | features4 = end_points['fp2_features'+'net3'] 134 | end_points['seed_inds'] = end_points['fp2_inds'] 135 | end_points['seed_xyz'] = xyz 136 | end_points['seed_features'] = features1 137 | 138 | ### Combine the feature here 139 | features_hd_discriptor = torch.cat((features1, features2, features3, features4), dim=1) 140 | features_hd_discriptor = F.relu(self.bn_agg1(self.conv_agg1(features_hd_discriptor))) 141 | features_hd_discriptor = F.relu(self.bn_agg2(self.conv_agg2(features_hd_discriptor))) 142 | 143 | end_points['hd_feature'] = features_hd_discriptor 144 | 145 | net_flag_z = F.relu(self.bn_flag_z1(self.conv_flag_z1(features_hd_discriptor))) 146 | net_flag_z = self.conv_flag_z2(net_flag_z) 147 | end_points["pred_flag_z"] = net_flag_z 148 | 149 | net_flag_xy = F.relu(self.bn_flag_xy1(self.conv_flag_xy1(features_hd_discriptor))) 150 | net_flag_xy = self.conv_flag_xy2(net_flag_xy) 151 | end_points["pred_flag_xy"] = net_flag_xy 152 | 153 | net_flag_line = F.relu(self.bn_flag_line1(self.conv_flag_line1(features_hd_discriptor))) 154 | net_flag_line = self.conv_flag_line2(net_flag_line) 155 | end_points["pred_flag_line"] = net_flag_line 156 | 157 | proposal_xyz, proposal_features, center_offset, center_residual = self.vgen(xyz, features_hd_discriptor) 158 | proposal_features_norm = torch.norm(proposal_features, p=2, dim=1) 159 | proposal_features = proposal_features.div(proposal_features_norm.unsqueeze(1)) 160 | end_points['vote_xyz'] = proposal_xyz 161 | end_points['vote_features'] = proposal_features 162 | 163 | voted_z, voted_z_feature, z_offset, z_residual = self.vgen_z(xyz, features_hd_discriptor) 164 | voted_z_feature_norm = torch.norm(voted_z_feature, p=2, dim=1) 165 | voted_z_feature = voted_z_feature.div(voted_z_feature_norm.unsqueeze(1)) 166 | end_points['vote_z'] = voted_z 167 | end_points['vote_z_feature'] = voted_z_feature 168 | 169 | voted_xy, voted_xy_feature, xy_offset, xy_residual = self.vgen_xy(xyz, features_hd_discriptor) 170 | voted_xy_feature_norm = torch.norm(voted_xy_feature, p=2, dim=1) 171 | voted_xy_feature = voted_xy_feature.div(voted_xy_feature_norm.unsqueeze(1)) 172 | end_points['vote_xy'] = voted_xy 173 | end_points['vote_xy_feature'] = voted_xy_feature 174 | 175 | voted_line, voted_line_feature, line_offset, line_residual = self.vgen_line(xyz, features_hd_discriptor) 176 | voted_line_feature_norm = torch.norm(voted_line_feature, p=2, dim=1) 177 | voted_line_feature = voted_line_feature.div(voted_line_feature_norm.unsqueeze(1)) 178 | end_points['vote_line'] = voted_line 179 | end_points['vote_line_feature'] = voted_line_feature 180 | 181 | center_z, feature_z, end_points = self.pnet_z(voted_z, voted_z_feature, end_points, mode='_z') 182 | center_xy, feature_xy, end_points = self.pnet_xy(voted_xy, voted_xy_feature, end_points, mode='_xy') 183 | center_line, feature_line, end_points = self.pnet_line(voted_line, voted_line_feature, end_points, mode='_line') 184 | 185 | end_points = self.pnet_final(proposal_xyz, proposal_features, center_z, feature_z, center_xy, feature_xy, center_line, feature_line, end_points) 186 | return end_points 187 | 188 | -------------------------------------------------------------------------------- /models/hdnet_1bb.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | """ Deep hough voting network for 3D object detection in point clouds. 7 | 8 | Author: Charles R. Qi and Or Litany 9 | """ 10 | 11 | import torch 12 | import torch.nn as nn 13 | import torch.nn.functional as F 14 | import numpy as np 15 | import sys 16 | import os 17 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 18 | ROOT_DIR = os.path.dirname(BASE_DIR) 19 | sys.path.append(BASE_DIR) 20 | sys.path.append(os.path.join(ROOT_DIR, 'utils')) 21 | import pc_util 22 | 23 | from backbone_module_scale import Pointnet2Backbone 24 | from voting_module import VotingModule 25 | 26 | from proposal_module_refine import ProposalModuleRefine 27 | from proposal_module_surface import PrimitiveModule 28 | 29 | from dump_helper import dump_results 30 | from loss_helper import get_loss 31 | 32 | class HDNet_1bb(nn.Module): 33 | r""" 34 | A deep neural network for 3D object detection with end-to-end optimizable hough voting. 35 | 36 | Parameters 37 | ---------- 38 | num_class: int 39 | Number of semantics classes to predict over -- size of softmax classifier 40 | num_heading_bin: int 41 | num_size_cluster: int 42 | input_feature_dim: (default: 0) 43 | Input dim in the feature descriptor for each point. If the point cloud is Nx9, this 44 | value should be 6 as in an Nx9 point cloud, 3 of the channels are xyz, and 6 are feature descriptors 45 | num_proposal: int (default: 128) 46 | Number of proposals/detections generated from the network. Each proposal is a 3D OBB with a semantic class. 47 | vote_factor: (default: 1) 48 | Number of votes generated from each seed point. 49 | """ 50 | 51 | def __init__(self, num_class, num_heading_bin, num_size_cluster, mean_size_arr, 52 | input_feature_dim=0, num_proposal=128, vote_factor=1, sampling='vote_fps', with_angle=False, scale=1): 53 | super().__init__() 54 | 55 | self.num_class = num_class 56 | self.num_heading_bin = num_heading_bin 57 | self.num_size_cluster = num_size_cluster 58 | self.mean_size_arr = mean_size_arr 59 | self.input_feature_dim = input_feature_dim 60 | self.num_proposal = num_proposal 61 | self.vote_factor = vote_factor 62 | self.sampling=sampling 63 | 64 | # Backbone point feature learning: 4 bb tower 65 | self.backbone_net1 = Pointnet2Backbone(input_feature_dim=self.input_feature_dim, scale=scale) ### Just xyz + height 66 | scale = max(scale, 2) 67 | 68 | ### Existence flag prediction 69 | self.conv_flag_z1 = torch.nn.Conv1d(256*scale,128,1) 70 | self.bn_flag_z1 = torch.nn.BatchNorm1d(128) 71 | self.conv_flag_z2 = torch.nn.Conv1d(128,2,1) 72 | 73 | self.conv_flag_xy1 = torch.nn.Conv1d(256*scale,128,1) 74 | self.bn_flag_xy1 = torch.nn.BatchNorm1d(128) 75 | self.conv_flag_xy2 = torch.nn.Conv1d(128,2,1) 76 | 77 | self.conv_flag_line1 = torch.nn.Conv1d(256*scale,128,1) 78 | self.bn_flag_line1 = torch.nn.BatchNorm1d(128) 79 | self.conv_flag_line2 = torch.nn.Conv1d(128,2,1) 80 | 81 | # Hough voting and clustering 82 | self.vgen = VotingModule(self.vote_factor, 256*scale) 83 | self.vgen_z = VotingModule(self.vote_factor, 256*scale) 84 | self.vgen_xy = VotingModule(self.vote_factor, 256*scale) 85 | self.vgen_line = VotingModule(self.vote_factor, 256*scale) 86 | 87 | # Vote aggregation and detection 88 | self.pnet_z = PrimitiveModule(num_class, num_heading_bin, num_size_cluster, 89 | mean_size_arr, num_proposal, sampling, seed_feat_dim=256*scale, numd=2) 90 | self.pnet_xy = PrimitiveModule(num_class, num_heading_bin, num_size_cluster, 91 | mean_size_arr, num_proposal, sampling, seed_feat_dim=256*scale, numd=1) 92 | self.pnet_line = PrimitiveModule(num_class, num_heading_bin, num_size_cluster, 93 | mean_size_arr, num_proposal, sampling, seed_feat_dim=256*scale, numd=0) 94 | 95 | self.pnet_final = ProposalModuleRefine(num_class, num_heading_bin, num_size_cluster, 96 | mean_size_arr, num_proposal, sampling, seed_feat_dim=256*scale, with_angle=with_angle) 97 | 98 | def forward(self, inputs, end_points, mode=""): 99 | """ Forward pass of the network 100 | 101 | Args: 102 | inputs: dict 103 | {point_clouds} 104 | 105 | point_clouds: Variable(torch.cuda.FloatTensor) 106 | (B, N, 3 + input_channels) tensor 107 | Point cloud to run predicts on 108 | Each point in the point-cloud MUST 109 | be formated as (x, y, z, features...) 110 | Returns: 111 | end_points: dict 112 | """ 113 | batch_size = inputs['point_clouds'].shape[0] 114 | 115 | end_points = self.backbone_net1(inputs['point_clouds'], end_points) 116 | 117 | ### Extract feature here 118 | xyz = end_points['fp2_xyz'] 119 | features1 = end_points['fp2_features'] 120 | end_points['seed_inds'] = end_points['fp2_inds'] 121 | end_points['seed_xyz'] = xyz 122 | end_points['seed_features'] = features1 123 | 124 | ### Combine the feature here 125 | features_hd_discriptor = features1 126 | 127 | end_points['hd_feature'] = features_hd_discriptor 128 | 129 | net_flag_z = F.relu(self.bn_flag_z1(self.conv_flag_z1(features_hd_discriptor))) 130 | net_flag_z = self.conv_flag_z2(net_flag_z) 131 | end_points["pred_flag_z"] = net_flag_z 132 | 133 | net_flag_xy = F.relu(self.bn_flag_xy1(self.conv_flag_xy1(features_hd_discriptor))) 134 | net_flag_xy = self.conv_flag_xy2(net_flag_xy) 135 | end_points["pred_flag_xy"] = net_flag_xy 136 | 137 | net_flag_line = F.relu(self.bn_flag_line1(self.conv_flag_line1(features_hd_discriptor))) 138 | net_flag_line = self.conv_flag_line2(net_flag_line) 139 | end_points["pred_flag_line"] = net_flag_line 140 | 141 | proposal_xyz, proposal_features, center_offset, center_residual = self.vgen(xyz, features_hd_discriptor) 142 | proposal_features_norm = torch.norm(proposal_features, p=2, dim=1) 143 | proposal_features = proposal_features.div(proposal_features_norm.unsqueeze(1)) 144 | end_points['vote_xyz'] = proposal_xyz 145 | end_points['vote_features'] = proposal_features 146 | 147 | voted_z, voted_z_feature, z_offset, z_residual = self.vgen_z(xyz, features_hd_discriptor) 148 | voted_z_feature_norm = torch.norm(voted_z_feature, p=2, dim=1) 149 | voted_z_feature = voted_z_feature.div(voted_z_feature_norm.unsqueeze(1)) 150 | end_points['vote_z'] = voted_z 151 | end_points['vote_z_feature'] = voted_z_feature 152 | 153 | voted_xy, voted_xy_feature, xy_offset, xy_residual = self.vgen_xy(xyz, features_hd_discriptor) 154 | voted_xy_feature_norm = torch.norm(voted_xy_feature, p=2, dim=1) 155 | voted_xy_feature = voted_xy_feature.div(voted_xy_feature_norm.unsqueeze(1)) 156 | end_points['vote_xy'] = voted_xy 157 | end_points['vote_xy_feature'] = voted_xy_feature 158 | 159 | voted_line, voted_line_feature, line_offset, line_residual = self.vgen_line(xyz, features_hd_discriptor) 160 | voted_line_feature_norm = torch.norm(voted_line_feature, p=2, dim=1) 161 | voted_line_feature = voted_line_feature.div(voted_line_feature_norm.unsqueeze(1)) 162 | end_points['vote_line'] = voted_line 163 | end_points['vote_line_feature'] = voted_line_feature 164 | 165 | center_z, feature_z, end_points = self.pnet_z(voted_z, voted_z_feature, end_points, mode='_z') 166 | center_xy, feature_xy, end_points = self.pnet_xy(voted_xy, voted_xy_feature, end_points, mode='_xy') 167 | center_line, feature_line, end_points = self.pnet_line(voted_line, voted_line_feature, end_points, mode='_line') 168 | 169 | end_points = self.pnet_final(proposal_xyz, proposal_features, center_z, feature_z, center_xy, feature_xy, center_line, feature_line, end_points) 170 | return end_points 171 | 172 | -------------------------------------------------------------------------------- /models/proposal_module_surface.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | import numpy as np 10 | import os 11 | import sys 12 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 13 | ROOT_DIR = os.path.dirname(BASE_DIR) 14 | sys.path.append(os.path.join(ROOT_DIR, 'pointnet2')) 15 | from pointnet2_modules import PointnetSAModuleVotes 16 | import pointnet2_utils 17 | 18 | def decode_scores(net, end_points, num_class, mode=''): 19 | net_transposed = net.transpose(2,1) # (batch_size, 1024, ..) 20 | batch_size = net_transposed.shape[0] 21 | num_proposal = net_transposed.shape[1] 22 | 23 | base_xyz = end_points['aggregated_vote_xyz'+mode] # (batch_size, num_proposal, 3) 24 | center = base_xyz + net_transposed[:,:,0:3] # (batch_size, num_proposal, 3) 25 | end_points['center'+mode] = center 26 | 27 | if mode == '_z': 28 | end_points['size_residuals'+mode] = net_transposed[:,:,3:5] 29 | sem_cls_scores = net_transposed[:,:,5:] # Bxnum_proposalx10 30 | end_points['sem_cls_scores'+mode] = sem_cls_scores 31 | elif mode == '_xy': 32 | end_points['size_residuals'+mode] = net_transposed[:,:,3:4] 33 | sem_cls_scores = net_transposed[:,:,4:] # Bxnum_proposalx10 34 | end_points['sem_cls_scores'+mode] = sem_cls_scores 35 | else: 36 | sem_cls_scores = net_transposed[:,:,3:] # Bxnum_proposalx10 37 | end_points['sem_cls_scores'+mode] = sem_cls_scores 38 | return center, end_points 39 | 40 | 41 | class PrimitiveModule(nn.Module): 42 | def __init__(self, num_class, num_heading_bin, num_size_cluster, mean_size_arr, num_proposal, sampling, seed_feat_dim=256, numd=1): 43 | super().__init__() 44 | 45 | self.num_class = num_class 46 | self.num_heading_bin = num_heading_bin 47 | self.num_size_cluster = num_size_cluster 48 | self.mean_size_arr = mean_size_arr 49 | self.num_proposal = num_proposal 50 | self.sampling = sampling 51 | self.seed_feat_dim = seed_feat_dim 52 | 53 | # Vote clustering 54 | self.vote_aggregation = PointnetSAModuleVotes( 55 | npoint=self.num_proposal, 56 | radius=0.3, 57 | nsample=16, 58 | mlp=[self.seed_feat_dim, 128, 128, 128], 59 | use_xyz=True, 60 | normalize_xyz=True, 61 | same_idx=True 62 | ) 63 | 64 | # Object proposal/detection 65 | # Objectness scores (2), center residual (3), 66 | # heading class+residual (num_heading_bin*2), size class+residual(num_size_cluster*4) 67 | self.conv1 = torch.nn.Conv1d(128,128,1) 68 | self.conv2 = torch.nn.Conv1d(128,128,1) 69 | self.conv3 = torch.nn.Conv1d(128,3+numd+self.num_class,1) 70 | self.bn1 = torch.nn.BatchNorm1d(128) 71 | self.bn2 = torch.nn.BatchNorm1d(128) 72 | 73 | def forward(self, xyz, features, end_points, mode=''): 74 | """ 75 | Args: 76 | xyz: (B,K,3) 77 | features: (B,C,K) 78 | Returns: 79 | scores: (B,num_proposal,2+3+NH*2+NS*4) 80 | """ 81 | if self.sampling == 'vote_fps': 82 | # Farthest point sampling (FPS) on votes 83 | original_feature = features 84 | xyz, features, fps_inds = self.vote_aggregation(xyz, features) 85 | #original_feature = torch.gather(original_features, 2, fps_inds.unsqueeze(1).repeat(1,256,1).detach().long()).contiguous() 86 | sample_inds = fps_inds 87 | elif self.sampling == 'seed_fps': 88 | # FPS on seed and choose the votes corresponding to the seeds 89 | # This gets us a slightly better coverage of *object* votes than vote_fps (which tends to get more cluster votes) 90 | sample_inds = pointnet2_utils.furthest_point_sample(end_points['seed_xyz'], self.num_proposal) 91 | xyz, features, _ = self.vote_aggregation(xyz, features, sample_inds) 92 | elif self.sampling == 'random': 93 | # Random sampling from the votes 94 | num_seed = end_points['seed_xyz'].shape[1] 95 | sample_inds = torch.randint(0, num_seed, (batch_size, self.num_proposal), dtype=torch.int).cuda() 96 | xyz, features, _ = self.vote_aggregation(xyz, features, sample_inds) 97 | else: 98 | log_string('Unknown sampling strategy: %s. Exiting!'%(self.sampling)) 99 | exit() 100 | end_points['aggregated_vote_xyz'+mode] = xyz # (batch_size, num_proposal, 3) 101 | end_points['aggregated_vote_inds'+mode] = sample_inds # (batch_size, num_proposal,) # should be 0,1,2,...,num_proposal 102 | end_points['aggregated_feature'+mode] = features 103 | 104 | # --------- PROPOSAL GENERATION --------- 105 | net = F.relu(self.bn1(self.conv1(features))) 106 | last_net = F.relu(self.bn2(self.conv2(net))) 107 | net = self.conv3(last_net) # (batch_size, 2+3+num_heading_bin*2+num_size_cluster*4, num_proposal) 108 | 109 | newcenter, end_points = decode_scores(net, end_points, self.num_class, mode=mode) 110 | return newcenter.contiguous(), features.contiguous(), end_points 111 | 112 | 113 | -------------------------------------------------------------------------------- /models/voting_module.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | ''' Voting module: generate votes from XYZ and features of seed points. 7 | 8 | Date: July, 2019 9 | Author: Charles R. Qi and Or Litany 10 | ''' 11 | 12 | import torch 13 | import torch.nn as nn 14 | import torch.nn.functional as F 15 | 16 | class VotingModule(nn.Module): 17 | def __init__(self, vote_factor, seed_feature_dim): 18 | """ Votes generation from seed point features. 19 | 20 | Args: 21 | vote_facotr: int 22 | number of votes generated from each seed point 23 | seed_feature_dim: int 24 | number of channels of seed point features 25 | vote_feature_dim: int 26 | number of channels of vote features 27 | """ 28 | super().__init__() 29 | self.vote_factor = vote_factor 30 | self.in_dim = seed_feature_dim 31 | self.out_dim = self.in_dim # due to residual feature, in_dim has to be == out_dim 32 | self.conv1 = torch.nn.Conv1d(self.in_dim, self.in_dim, 1) 33 | self.conv2 = torch.nn.Conv1d(self.in_dim, self.in_dim, 1) 34 | self.conv3 = torch.nn.Conv1d(self.in_dim, (3+self.out_dim) * self.vote_factor, 1) 35 | self.bn1 = torch.nn.BatchNorm1d(self.in_dim) 36 | self.bn2 = torch.nn.BatchNorm1d(self.in_dim) 37 | 38 | def forward(self, seed_xyz, seed_features): 39 | """ Forward pass. 40 | 41 | Arguments: 42 | seed_xyz: (batch_size, num_seed, 3) Pytorch tensor 43 | seed_features: (batch_size, feature_dim, num_seed) Pytorch tensor 44 | Returns: 45 | vote_xyz: (batch_size, num_seed*vote_factor, 3) 46 | vote_features: (batch_size, vote_feature_dim, num_seed*vote_factor) 47 | """ 48 | batch_size = seed_xyz.shape[0] 49 | num_seed = seed_xyz.shape[1] 50 | num_vote = num_seed*self.vote_factor 51 | net = F.relu(self.bn1(self.conv1(seed_features))) 52 | net = F.relu(self.bn2(self.conv2(net))) 53 | net = self.conv3(net) # (batch_size, (3+out_dim)*vote_factor, num_seed) 54 | 55 | net = net.transpose(2,1).view(batch_size, num_seed, self.vote_factor, 3+self.out_dim) 56 | offset = net[:,:,:,:3] 57 | vote_xyz = seed_xyz.unsqueeze(2) + offset 58 | vote_xyz = vote_xyz.contiguous().view(batch_size, num_vote, 3) 59 | 60 | residual_features = net[:,:,:,3:] # (batch_size, num_seed, vote_factor, out_dim) 61 | vote_features = seed_features.transpose(2,1).unsqueeze(2) + residual_features 62 | vote_features = vote_features.contiguous().view(batch_size, num_vote, self.out_dim) 63 | vote_features = vote_features.transpose(2,1).contiguous() 64 | 65 | return vote_xyz, vote_features, offset.squeeze(2), residual_features 66 | 67 | if __name__=='__main__': 68 | net = VotingModule(2, 256).cuda() 69 | xyz, features = net(torch.rand(8,1024,3).cuda(), torch.rand(8,256,1024).cuda()) 70 | print('xyz', xyz.shape) 71 | print('features', features.shape) 72 | 73 | -------------------------------------------------------------------------------- /overview.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zaiweizhang/H3DNet/81bd6af37cb131fd9e81774f52f29a0f3b0a0f43/overview.jpg -------------------------------------------------------------------------------- /pointnet2/_ext_src/include/ball_query.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #pragma once 7 | #include 8 | 9 | at::Tensor ball_query(at::Tensor new_xyz, at::Tensor xyz, const float radius, 10 | const int nsample); 11 | -------------------------------------------------------------------------------- /pointnet2/_ext_src/include/cuda_utils.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #ifndef _CUDA_UTILS_H 7 | #define _CUDA_UTILS_H 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | #include 15 | 16 | #include 17 | 18 | #define TOTAL_THREADS 512 19 | 20 | inline int opt_n_threads(int work_size) { 21 | const int pow_2 = std::log(static_cast(work_size)) / std::log(2.0); 22 | 23 | return max(min(1 << pow_2, TOTAL_THREADS), 1); 24 | } 25 | 26 | inline dim3 opt_block_config(int x, int y) { 27 | const int x_threads = opt_n_threads(x); 28 | const int y_threads = 29 | max(min(opt_n_threads(y), TOTAL_THREADS / x_threads), 1); 30 | dim3 block_config(x_threads, y_threads, 1); 31 | 32 | return block_config; 33 | } 34 | 35 | #define CUDA_CHECK_ERRORS() \ 36 | do { \ 37 | cudaError_t err = cudaGetLastError(); \ 38 | if (cudaSuccess != err) { \ 39 | fprintf(stderr, "CUDA kernel failed : %s\n%s at L:%d in %s\n", \ 40 | cudaGetErrorString(err), __PRETTY_FUNCTION__, __LINE__, \ 41 | __FILE__); \ 42 | exit(-1); \ 43 | } \ 44 | } while (0) 45 | 46 | #endif 47 | -------------------------------------------------------------------------------- /pointnet2/_ext_src/include/group_points.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #pragma once 7 | #include 8 | 9 | at::Tensor group_points(at::Tensor points, at::Tensor idx); 10 | at::Tensor group_points_grad(at::Tensor grad_out, at::Tensor idx, const int n); 11 | -------------------------------------------------------------------------------- /pointnet2/_ext_src/include/interpolate.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #pragma once 7 | 8 | #include 9 | #include 10 | 11 | std::vector three_nn(at::Tensor unknowns, at::Tensor knows); 12 | at::Tensor three_interpolate(at::Tensor points, at::Tensor idx, 13 | at::Tensor weight); 14 | at::Tensor three_interpolate_grad(at::Tensor grad_out, at::Tensor idx, 15 | at::Tensor weight, const int m); 16 | -------------------------------------------------------------------------------- /pointnet2/_ext_src/include/sampling.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #pragma once 7 | #include 8 | 9 | at::Tensor gather_points(at::Tensor points, at::Tensor idx); 10 | at::Tensor gather_points_grad(at::Tensor grad_out, at::Tensor idx, const int n); 11 | at::Tensor furthest_point_sampling(at::Tensor points, const int nsamples); 12 | -------------------------------------------------------------------------------- /pointnet2/_ext_src/include/utils.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #pragma once 7 | #include 8 | #include 9 | 10 | #define CHECK_CUDA(x) \ 11 | do { \ 12 | AT_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor"); \ 13 | } while (0) 14 | 15 | #define CHECK_CONTIGUOUS(x) \ 16 | do { \ 17 | AT_CHECK(x.is_contiguous(), #x " must be a contiguous tensor"); \ 18 | } while (0) 19 | 20 | #define CHECK_IS_INT(x) \ 21 | do { \ 22 | AT_CHECK(x.scalar_type() == at::ScalarType::Int, \ 23 | #x " must be an int tensor"); \ 24 | } while (0) 25 | 26 | #define CHECK_IS_FLOAT(x) \ 27 | do { \ 28 | AT_CHECK(x.scalar_type() == at::ScalarType::Float, \ 29 | #x " must be a float tensor"); \ 30 | } while (0) 31 | -------------------------------------------------------------------------------- /pointnet2/_ext_src/src/ball_query.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #include "ball_query.h" 7 | #include "utils.h" 8 | 9 | void query_ball_point_kernel_wrapper(int b, int n, int m, float radius, 10 | int nsample, const float *new_xyz, 11 | const float *xyz, int *idx); 12 | 13 | at::Tensor ball_query(at::Tensor new_xyz, at::Tensor xyz, const float radius, 14 | const int nsample) { 15 | CHECK_CONTIGUOUS(new_xyz); 16 | CHECK_CONTIGUOUS(xyz); 17 | CHECK_IS_FLOAT(new_xyz); 18 | CHECK_IS_FLOAT(xyz); 19 | 20 | if (new_xyz.type().is_cuda()) { 21 | CHECK_CUDA(xyz); 22 | } 23 | 24 | at::Tensor idx = 25 | torch::zeros({new_xyz.size(0), new_xyz.size(1), nsample}, 26 | at::device(new_xyz.device()).dtype(at::ScalarType::Int)); 27 | 28 | if (new_xyz.type().is_cuda()) { 29 | query_ball_point_kernel_wrapper(xyz.size(0), xyz.size(1), new_xyz.size(1), 30 | radius, nsample, new_xyz.data(), 31 | xyz.data(), idx.data()); 32 | } else { 33 | AT_CHECK(false, "CPU not supported"); 34 | } 35 | 36 | return idx; 37 | } 38 | -------------------------------------------------------------------------------- /pointnet2/_ext_src/src/ball_query_gpu.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include "cuda_utils.h" 11 | 12 | // input: new_xyz(b, m, 3) xyz(b, n, 3) 13 | // output: idx(b, m, nsample) 14 | __global__ void query_ball_point_kernel(int b, int n, int m, float radius, 15 | int nsample, 16 | const float *__restrict__ new_xyz, 17 | const float *__restrict__ xyz, 18 | int *__restrict__ idx) { 19 | int batch_index = blockIdx.x; 20 | xyz += batch_index * n * 3; 21 | new_xyz += batch_index * m * 3; 22 | idx += m * nsample * batch_index; 23 | 24 | int index = threadIdx.x; 25 | int stride = blockDim.x; 26 | 27 | float radius2 = radius * radius; 28 | for (int j = index; j < m; j += stride) { 29 | float new_x = new_xyz[j * 3 + 0]; 30 | float new_y = new_xyz[j * 3 + 1]; 31 | float new_z = new_xyz[j * 3 + 2]; 32 | for (int k = 0, cnt = 0; k < n && cnt < nsample; ++k) { 33 | float x = xyz[k * 3 + 0]; 34 | float y = xyz[k * 3 + 1]; 35 | float z = xyz[k * 3 + 2]; 36 | float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + 37 | (new_z - z) * (new_z - z); 38 | if (d2 < radius2) { 39 | if (cnt == 0) { 40 | for (int l = 0; l < nsample; ++l) { 41 | idx[j * nsample + l] = k; 42 | } 43 | } 44 | idx[j * nsample + cnt] = k; 45 | ++cnt; 46 | } 47 | } 48 | } 49 | } 50 | 51 | void query_ball_point_kernel_wrapper(int b, int n, int m, float radius, 52 | int nsample, const float *new_xyz, 53 | const float *xyz, int *idx) { 54 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 55 | query_ball_point_kernel<<>>( 56 | b, n, m, radius, nsample, new_xyz, xyz, idx); 57 | 58 | CUDA_CHECK_ERRORS(); 59 | } 60 | -------------------------------------------------------------------------------- /pointnet2/_ext_src/src/bindings.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #include "ball_query.h" 7 | #include "group_points.h" 8 | #include "interpolate.h" 9 | #include "sampling.h" 10 | 11 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 12 | m.def("gather_points", &gather_points); 13 | m.def("gather_points_grad", &gather_points_grad); 14 | m.def("furthest_point_sampling", &furthest_point_sampling); 15 | 16 | m.def("three_nn", &three_nn); 17 | m.def("three_interpolate", &three_interpolate); 18 | m.def("three_interpolate_grad", &three_interpolate_grad); 19 | 20 | m.def("ball_query", &ball_query); 21 | 22 | m.def("group_points", &group_points); 23 | m.def("group_points_grad", &group_points_grad); 24 | } 25 | -------------------------------------------------------------------------------- /pointnet2/_ext_src/src/group_points.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #include "group_points.h" 7 | #include "utils.h" 8 | 9 | void group_points_kernel_wrapper(int b, int c, int n, int npoints, int nsample, 10 | const float *points, const int *idx, 11 | float *out); 12 | 13 | void group_points_grad_kernel_wrapper(int b, int c, int n, int npoints, 14 | int nsample, const float *grad_out, 15 | const int *idx, float *grad_points); 16 | 17 | at::Tensor group_points(at::Tensor points, at::Tensor idx) { 18 | CHECK_CONTIGUOUS(points); 19 | CHECK_CONTIGUOUS(idx); 20 | CHECK_IS_FLOAT(points); 21 | CHECK_IS_INT(idx); 22 | 23 | if (points.type().is_cuda()) { 24 | CHECK_CUDA(idx); 25 | } 26 | 27 | at::Tensor output = 28 | torch::zeros({points.size(0), points.size(1), idx.size(1), idx.size(2)}, 29 | at::device(points.device()).dtype(at::ScalarType::Float)); 30 | 31 | if (points.type().is_cuda()) { 32 | group_points_kernel_wrapper(points.size(0), points.size(1), points.size(2), 33 | idx.size(1), idx.size(2), points.data(), 34 | idx.data(), output.data()); 35 | } else { 36 | AT_CHECK(false, "CPU not supported"); 37 | } 38 | 39 | return output; 40 | } 41 | 42 | at::Tensor group_points_grad(at::Tensor grad_out, at::Tensor idx, const int n) { 43 | CHECK_CONTIGUOUS(grad_out); 44 | CHECK_CONTIGUOUS(idx); 45 | CHECK_IS_FLOAT(grad_out); 46 | CHECK_IS_INT(idx); 47 | 48 | if (grad_out.type().is_cuda()) { 49 | CHECK_CUDA(idx); 50 | } 51 | 52 | at::Tensor output = 53 | torch::zeros({grad_out.size(0), grad_out.size(1), n}, 54 | at::device(grad_out.device()).dtype(at::ScalarType::Float)); 55 | 56 | if (grad_out.type().is_cuda()) { 57 | group_points_grad_kernel_wrapper( 58 | grad_out.size(0), grad_out.size(1), n, idx.size(1), idx.size(2), 59 | grad_out.data(), idx.data(), output.data()); 60 | } else { 61 | AT_CHECK(false, "CPU not supported"); 62 | } 63 | 64 | return output; 65 | } 66 | -------------------------------------------------------------------------------- /pointnet2/_ext_src/src/group_points_gpu.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #include 7 | #include 8 | 9 | #include "cuda_utils.h" 10 | 11 | // input: points(b, c, n) idx(b, npoints, nsample) 12 | // output: out(b, c, npoints, nsample) 13 | __global__ void group_points_kernel(int b, int c, int n, int npoints, 14 | int nsample, 15 | const float *__restrict__ points, 16 | const int *__restrict__ idx, 17 | float *__restrict__ out) { 18 | int batch_index = blockIdx.x; 19 | points += batch_index * n * c; 20 | idx += batch_index * npoints * nsample; 21 | out += batch_index * npoints * nsample * c; 22 | 23 | const int index = threadIdx.y * blockDim.x + threadIdx.x; 24 | const int stride = blockDim.y * blockDim.x; 25 | for (int i = index; i < c * npoints; i += stride) { 26 | const int l = i / npoints; 27 | const int j = i % npoints; 28 | for (int k = 0; k < nsample; ++k) { 29 | int ii = idx[j * nsample + k]; 30 | out[(l * npoints + j) * nsample + k] = points[l * n + ii]; 31 | } 32 | } 33 | } 34 | 35 | void group_points_kernel_wrapper(int b, int c, int n, int npoints, int nsample, 36 | const float *points, const int *idx, 37 | float *out) { 38 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 39 | 40 | group_points_kernel<<>>( 41 | b, c, n, npoints, nsample, points, idx, out); 42 | 43 | CUDA_CHECK_ERRORS(); 44 | } 45 | 46 | // input: grad_out(b, c, npoints, nsample), idx(b, npoints, nsample) 47 | // output: grad_points(b, c, n) 48 | __global__ void group_points_grad_kernel(int b, int c, int n, int npoints, 49 | int nsample, 50 | const float *__restrict__ grad_out, 51 | const int *__restrict__ idx, 52 | float *__restrict__ grad_points) { 53 | int batch_index = blockIdx.x; 54 | grad_out += batch_index * npoints * nsample * c; 55 | idx += batch_index * npoints * nsample; 56 | grad_points += batch_index * n * c; 57 | 58 | const int index = threadIdx.y * blockDim.x + threadIdx.x; 59 | const int stride = blockDim.y * blockDim.x; 60 | for (int i = index; i < c * npoints; i += stride) { 61 | const int l = i / npoints; 62 | const int j = i % npoints; 63 | for (int k = 0; k < nsample; ++k) { 64 | int ii = idx[j * nsample + k]; 65 | atomicAdd(grad_points + l * n + ii, 66 | grad_out[(l * npoints + j) * nsample + k]); 67 | } 68 | } 69 | } 70 | 71 | void group_points_grad_kernel_wrapper(int b, int c, int n, int npoints, 72 | int nsample, const float *grad_out, 73 | const int *idx, float *grad_points) { 74 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 75 | 76 | group_points_grad_kernel<<>>( 77 | b, c, n, npoints, nsample, grad_out, idx, grad_points); 78 | 79 | CUDA_CHECK_ERRORS(); 80 | } 81 | -------------------------------------------------------------------------------- /pointnet2/_ext_src/src/interpolate.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #include "interpolate.h" 7 | #include "utils.h" 8 | 9 | void three_nn_kernel_wrapper(int b, int n, int m, const float *unknown, 10 | const float *known, float *dist2, int *idx); 11 | void three_interpolate_kernel_wrapper(int b, int c, int m, int n, 12 | const float *points, const int *idx, 13 | const float *weight, float *out); 14 | void three_interpolate_grad_kernel_wrapper(int b, int c, int n, int m, 15 | const float *grad_out, 16 | const int *idx, const float *weight, 17 | float *grad_points); 18 | 19 | std::vector three_nn(at::Tensor unknowns, at::Tensor knows) { 20 | CHECK_CONTIGUOUS(unknowns); 21 | CHECK_CONTIGUOUS(knows); 22 | CHECK_IS_FLOAT(unknowns); 23 | CHECK_IS_FLOAT(knows); 24 | 25 | if (unknowns.type().is_cuda()) { 26 | CHECK_CUDA(knows); 27 | } 28 | 29 | at::Tensor idx = 30 | torch::zeros({unknowns.size(0), unknowns.size(1), 3}, 31 | at::device(unknowns.device()).dtype(at::ScalarType::Int)); 32 | at::Tensor dist2 = 33 | torch::zeros({unknowns.size(0), unknowns.size(1), 3}, 34 | at::device(unknowns.device()).dtype(at::ScalarType::Float)); 35 | 36 | if (unknowns.type().is_cuda()) { 37 | three_nn_kernel_wrapper(unknowns.size(0), unknowns.size(1), knows.size(1), 38 | unknowns.data(), knows.data(), 39 | dist2.data(), idx.data()); 40 | } else { 41 | AT_CHECK(false, "CPU not supported"); 42 | } 43 | 44 | return {dist2, idx}; 45 | } 46 | 47 | at::Tensor three_interpolate(at::Tensor points, at::Tensor idx, 48 | at::Tensor weight) { 49 | CHECK_CONTIGUOUS(points); 50 | CHECK_CONTIGUOUS(idx); 51 | CHECK_CONTIGUOUS(weight); 52 | CHECK_IS_FLOAT(points); 53 | CHECK_IS_INT(idx); 54 | CHECK_IS_FLOAT(weight); 55 | 56 | if (points.type().is_cuda()) { 57 | CHECK_CUDA(idx); 58 | CHECK_CUDA(weight); 59 | } 60 | 61 | at::Tensor output = 62 | torch::zeros({points.size(0), points.size(1), idx.size(1)}, 63 | at::device(points.device()).dtype(at::ScalarType::Float)); 64 | 65 | if (points.type().is_cuda()) { 66 | three_interpolate_kernel_wrapper( 67 | points.size(0), points.size(1), points.size(2), idx.size(1), 68 | points.data(), idx.data(), weight.data(), 69 | output.data()); 70 | } else { 71 | AT_CHECK(false, "CPU not supported"); 72 | } 73 | 74 | return output; 75 | } 76 | at::Tensor three_interpolate_grad(at::Tensor grad_out, at::Tensor idx, 77 | at::Tensor weight, const int m) { 78 | CHECK_CONTIGUOUS(grad_out); 79 | CHECK_CONTIGUOUS(idx); 80 | CHECK_CONTIGUOUS(weight); 81 | CHECK_IS_FLOAT(grad_out); 82 | CHECK_IS_INT(idx); 83 | CHECK_IS_FLOAT(weight); 84 | 85 | if (grad_out.type().is_cuda()) { 86 | CHECK_CUDA(idx); 87 | CHECK_CUDA(weight); 88 | } 89 | 90 | at::Tensor output = 91 | torch::zeros({grad_out.size(0), grad_out.size(1), m}, 92 | at::device(grad_out.device()).dtype(at::ScalarType::Float)); 93 | 94 | if (grad_out.type().is_cuda()) { 95 | three_interpolate_kernel_wrapper( 96 | grad_out.size(0), grad_out.size(1), grad_out.size(2), m, 97 | grad_out.data(), idx.data(), weight.data(), 98 | output.data()); 99 | } else { 100 | AT_CHECK(false, "CPU not supported"); 101 | } 102 | 103 | return output; 104 | } 105 | -------------------------------------------------------------------------------- /pointnet2/_ext_src/src/interpolate_gpu.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include "cuda_utils.h" 11 | 12 | // input: unknown(b, n, 3) known(b, m, 3) 13 | // output: dist2(b, n, 3), idx(b, n, 3) 14 | __global__ void three_nn_kernel(int b, int n, int m, 15 | const float *__restrict__ unknown, 16 | const float *__restrict__ known, 17 | float *__restrict__ dist2, 18 | int *__restrict__ idx) { 19 | int batch_index = blockIdx.x; 20 | unknown += batch_index * n * 3; 21 | known += batch_index * m * 3; 22 | dist2 += batch_index * n * 3; 23 | idx += batch_index * n * 3; 24 | 25 | int index = threadIdx.x; 26 | int stride = blockDim.x; 27 | for (int j = index; j < n; j += stride) { 28 | float ux = unknown[j * 3 + 0]; 29 | float uy = unknown[j * 3 + 1]; 30 | float uz = unknown[j * 3 + 2]; 31 | 32 | double best1 = 1e40, best2 = 1e40, best3 = 1e40; 33 | int besti1 = 0, besti2 = 0, besti3 = 0; 34 | for (int k = 0; k < m; ++k) { 35 | float x = known[k * 3 + 0]; 36 | float y = known[k * 3 + 1]; 37 | float z = known[k * 3 + 2]; 38 | float d = (ux - x) * (ux - x) + (uy - y) * (uy - y) + (uz - z) * (uz - z); 39 | if (d < best1) { 40 | best3 = best2; 41 | besti3 = besti2; 42 | best2 = best1; 43 | besti2 = besti1; 44 | best1 = d; 45 | besti1 = k; 46 | } else if (d < best2) { 47 | best3 = best2; 48 | besti3 = besti2; 49 | best2 = d; 50 | besti2 = k; 51 | } else if (d < best3) { 52 | best3 = d; 53 | besti3 = k; 54 | } 55 | } 56 | dist2[j * 3 + 0] = best1; 57 | dist2[j * 3 + 1] = best2; 58 | dist2[j * 3 + 2] = best3; 59 | 60 | idx[j * 3 + 0] = besti1; 61 | idx[j * 3 + 1] = besti2; 62 | idx[j * 3 + 2] = besti3; 63 | } 64 | } 65 | 66 | void three_nn_kernel_wrapper(int b, int n, int m, const float *unknown, 67 | const float *known, float *dist2, int *idx) { 68 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 69 | three_nn_kernel<<>>(b, n, m, unknown, known, 70 | dist2, idx); 71 | 72 | CUDA_CHECK_ERRORS(); 73 | } 74 | 75 | // input: points(b, c, m), idx(b, n, 3), weight(b, n, 3) 76 | // output: out(b, c, n) 77 | __global__ void three_interpolate_kernel(int b, int c, int m, int n, 78 | const float *__restrict__ points, 79 | const int *__restrict__ idx, 80 | const float *__restrict__ weight, 81 | float *__restrict__ out) { 82 | int batch_index = blockIdx.x; 83 | points += batch_index * m * c; 84 | 85 | idx += batch_index * n * 3; 86 | weight += batch_index * n * 3; 87 | 88 | out += batch_index * n * c; 89 | 90 | const int index = threadIdx.y * blockDim.x + threadIdx.x; 91 | const int stride = blockDim.y * blockDim.x; 92 | for (int i = index; i < c * n; i += stride) { 93 | const int l = i / n; 94 | const int j = i % n; 95 | float w1 = weight[j * 3 + 0]; 96 | float w2 = weight[j * 3 + 1]; 97 | float w3 = weight[j * 3 + 2]; 98 | 99 | int i1 = idx[j * 3 + 0]; 100 | int i2 = idx[j * 3 + 1]; 101 | int i3 = idx[j * 3 + 2]; 102 | 103 | out[i] = points[l * m + i1] * w1 + points[l * m + i2] * w2 + 104 | points[l * m + i3] * w3; 105 | } 106 | } 107 | 108 | void three_interpolate_kernel_wrapper(int b, int c, int m, int n, 109 | const float *points, const int *idx, 110 | const float *weight, float *out) { 111 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 112 | three_interpolate_kernel<<>>( 113 | b, c, m, n, points, idx, weight, out); 114 | 115 | CUDA_CHECK_ERRORS(); 116 | } 117 | 118 | // input: grad_out(b, c, n), idx(b, n, 3), weight(b, n, 3) 119 | // output: grad_points(b, c, m) 120 | 121 | __global__ void three_interpolate_grad_kernel( 122 | int b, int c, int n, int m, const float *__restrict__ grad_out, 123 | const int *__restrict__ idx, const float *__restrict__ weight, 124 | float *__restrict__ grad_points) { 125 | int batch_index = blockIdx.x; 126 | grad_out += batch_index * n * c; 127 | idx += batch_index * n * 3; 128 | weight += batch_index * n * 3; 129 | grad_points += batch_index * m * c; 130 | 131 | const int index = threadIdx.y * blockDim.x + threadIdx.x; 132 | const int stride = blockDim.y * blockDim.x; 133 | for (int i = index; i < c * n; i += stride) { 134 | const int l = i / n; 135 | const int j = i % n; 136 | float w1 = weight[j * 3 + 0]; 137 | float w2 = weight[j * 3 + 1]; 138 | float w3 = weight[j * 3 + 2]; 139 | 140 | int i1 = idx[j * 3 + 0]; 141 | int i2 = idx[j * 3 + 1]; 142 | int i3 = idx[j * 3 + 2]; 143 | 144 | atomicAdd(grad_points + l * m + i1, grad_out[i] * w1); 145 | atomicAdd(grad_points + l * m + i2, grad_out[i] * w2); 146 | atomicAdd(grad_points + l * m + i3, grad_out[i] * w3); 147 | } 148 | } 149 | 150 | void three_interpolate_grad_kernel_wrapper(int b, int c, int n, int m, 151 | const float *grad_out, 152 | const int *idx, const float *weight, 153 | float *grad_points) { 154 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 155 | three_interpolate_grad_kernel<<>>( 156 | b, c, n, m, grad_out, idx, weight, grad_points); 157 | 158 | CUDA_CHECK_ERRORS(); 159 | } 160 | -------------------------------------------------------------------------------- /pointnet2/_ext_src/src/sampling.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #include "sampling.h" 7 | #include "utils.h" 8 | 9 | void gather_points_kernel_wrapper(int b, int c, int n, int npoints, 10 | const float *points, const int *idx, 11 | float *out); 12 | void gather_points_grad_kernel_wrapper(int b, int c, int n, int npoints, 13 | const float *grad_out, const int *idx, 14 | float *grad_points); 15 | 16 | void furthest_point_sampling_kernel_wrapper(int b, int n, int m, 17 | const float *dataset, float *temp, 18 | int *idxs); 19 | 20 | at::Tensor gather_points(at::Tensor points, at::Tensor idx) { 21 | CHECK_CONTIGUOUS(points); 22 | CHECK_CONTIGUOUS(idx); 23 | CHECK_IS_FLOAT(points); 24 | CHECK_IS_INT(idx); 25 | 26 | if (points.type().is_cuda()) { 27 | CHECK_CUDA(idx); 28 | } 29 | 30 | at::Tensor output = 31 | torch::zeros({points.size(0), points.size(1), idx.size(1)}, 32 | at::device(points.device()).dtype(at::ScalarType::Float)); 33 | 34 | if (points.type().is_cuda()) { 35 | gather_points_kernel_wrapper(points.size(0), points.size(1), points.size(2), 36 | idx.size(1), points.data(), 37 | idx.data(), output.data()); 38 | } else { 39 | AT_CHECK(false, "CPU not supported"); 40 | } 41 | 42 | return output; 43 | } 44 | 45 | at::Tensor gather_points_grad(at::Tensor grad_out, at::Tensor idx, 46 | const int n) { 47 | CHECK_CONTIGUOUS(grad_out); 48 | CHECK_CONTIGUOUS(idx); 49 | CHECK_IS_FLOAT(grad_out); 50 | CHECK_IS_INT(idx); 51 | 52 | if (grad_out.type().is_cuda()) { 53 | CHECK_CUDA(idx); 54 | } 55 | 56 | at::Tensor output = 57 | torch::zeros({grad_out.size(0), grad_out.size(1), n}, 58 | at::device(grad_out.device()).dtype(at::ScalarType::Float)); 59 | 60 | if (grad_out.type().is_cuda()) { 61 | gather_points_grad_kernel_wrapper(grad_out.size(0), grad_out.size(1), n, 62 | idx.size(1), grad_out.data(), 63 | idx.data(), output.data()); 64 | } else { 65 | AT_CHECK(false, "CPU not supported"); 66 | } 67 | 68 | return output; 69 | } 70 | at::Tensor furthest_point_sampling(at::Tensor points, const int nsamples) { 71 | CHECK_CONTIGUOUS(points); 72 | CHECK_IS_FLOAT(points); 73 | 74 | at::Tensor output = 75 | torch::zeros({points.size(0), nsamples}, 76 | at::device(points.device()).dtype(at::ScalarType::Int)); 77 | 78 | at::Tensor tmp = 79 | torch::full({points.size(0), points.size(1)}, 1e10, 80 | at::device(points.device()).dtype(at::ScalarType::Float)); 81 | 82 | if (points.type().is_cuda()) { 83 | furthest_point_sampling_kernel_wrapper( 84 | points.size(0), points.size(1), nsamples, points.data(), 85 | tmp.data(), output.data()); 86 | } else { 87 | AT_CHECK(false, "CPU not supported"); 88 | } 89 | 90 | return output; 91 | } 92 | -------------------------------------------------------------------------------- /pointnet2/_ext_src/src/sampling_gpu.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #include 7 | #include 8 | 9 | #include "cuda_utils.h" 10 | 11 | // input: points(b, c, n) idx(b, m) 12 | // output: out(b, c, m) 13 | __global__ void gather_points_kernel(int b, int c, int n, int m, 14 | const float *__restrict__ points, 15 | const int *__restrict__ idx, 16 | float *__restrict__ out) { 17 | for (int i = blockIdx.x; i < b; i += gridDim.x) { 18 | for (int l = blockIdx.y; l < c; l += gridDim.y) { 19 | for (int j = threadIdx.x; j < m; j += blockDim.x) { 20 | int a = idx[i * m + j]; 21 | out[(i * c + l) * m + j] = points[(i * c + l) * n + a]; 22 | } 23 | } 24 | } 25 | } 26 | 27 | void gather_points_kernel_wrapper(int b, int c, int n, int npoints, 28 | const float *points, const int *idx, 29 | float *out) { 30 | gather_points_kernel<<>>(b, c, n, npoints, 32 | points, idx, out); 33 | 34 | CUDA_CHECK_ERRORS(); 35 | } 36 | 37 | // input: grad_out(b, c, m) idx(b, m) 38 | // output: grad_points(b, c, n) 39 | __global__ void gather_points_grad_kernel(int b, int c, int n, int m, 40 | const float *__restrict__ grad_out, 41 | const int *__restrict__ idx, 42 | float *__restrict__ grad_points) { 43 | for (int i = blockIdx.x; i < b; i += gridDim.x) { 44 | for (int l = blockIdx.y; l < c; l += gridDim.y) { 45 | for (int j = threadIdx.x; j < m; j += blockDim.x) { 46 | int a = idx[i * m + j]; 47 | atomicAdd(grad_points + (i * c + l) * n + a, 48 | grad_out[(i * c + l) * m + j]); 49 | } 50 | } 51 | } 52 | } 53 | 54 | void gather_points_grad_kernel_wrapper(int b, int c, int n, int npoints, 55 | const float *grad_out, const int *idx, 56 | float *grad_points) { 57 | gather_points_grad_kernel<<>>( 59 | b, c, n, npoints, grad_out, idx, grad_points); 60 | 61 | CUDA_CHECK_ERRORS(); 62 | } 63 | 64 | __device__ void __update(float *__restrict__ dists, int *__restrict__ dists_i, 65 | int idx1, int idx2) { 66 | const float v1 = dists[idx1], v2 = dists[idx2]; 67 | const int i1 = dists_i[idx1], i2 = dists_i[idx2]; 68 | dists[idx1] = max(v1, v2); 69 | dists_i[idx1] = v2 > v1 ? i2 : i1; 70 | } 71 | 72 | // Input dataset: (b, n, 3), tmp: (b, n) 73 | // Ouput idxs (b, m) 74 | template 75 | __global__ void furthest_point_sampling_kernel( 76 | int b, int n, int m, const float *__restrict__ dataset, 77 | float *__restrict__ temp, int *__restrict__ idxs) { 78 | if (m <= 0) return; 79 | __shared__ float dists[block_size]; 80 | __shared__ int dists_i[block_size]; 81 | 82 | int batch_index = blockIdx.x; 83 | dataset += batch_index * n * 3; 84 | temp += batch_index * n; 85 | idxs += batch_index * m; 86 | 87 | int tid = threadIdx.x; 88 | const int stride = block_size; 89 | 90 | int old = 0; 91 | if (threadIdx.x == 0) idxs[0] = old; 92 | 93 | __syncthreads(); 94 | for (int j = 1; j < m; j++) { 95 | int besti = 0; 96 | float best = -1; 97 | float x1 = dataset[old * 3 + 0]; 98 | float y1 = dataset[old * 3 + 1]; 99 | float z1 = dataset[old * 3 + 2]; 100 | for (int k = tid; k < n; k += stride) { 101 | float x2, y2, z2; 102 | x2 = dataset[k * 3 + 0]; 103 | y2 = dataset[k * 3 + 1]; 104 | z2 = dataset[k * 3 + 2]; 105 | float mag = (x2 * x2) + (y2 * y2) + (z2 * z2); 106 | if (mag <= 1e-3) continue; 107 | 108 | float d = 109 | (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1) + (z2 - z1) * (z2 - z1); 110 | 111 | float d2 = min(d, temp[k]); 112 | temp[k] = d2; 113 | besti = d2 > best ? k : besti; 114 | best = d2 > best ? d2 : best; 115 | } 116 | dists[tid] = best; 117 | dists_i[tid] = besti; 118 | __syncthreads(); 119 | 120 | if (block_size >= 512) { 121 | if (tid < 256) { 122 | __update(dists, dists_i, tid, tid + 256); 123 | } 124 | __syncthreads(); 125 | } 126 | if (block_size >= 256) { 127 | if (tid < 128) { 128 | __update(dists, dists_i, tid, tid + 128); 129 | } 130 | __syncthreads(); 131 | } 132 | if (block_size >= 128) { 133 | if (tid < 64) { 134 | __update(dists, dists_i, tid, tid + 64); 135 | } 136 | __syncthreads(); 137 | } 138 | if (block_size >= 64) { 139 | if (tid < 32) { 140 | __update(dists, dists_i, tid, tid + 32); 141 | } 142 | __syncthreads(); 143 | } 144 | if (block_size >= 32) { 145 | if (tid < 16) { 146 | __update(dists, dists_i, tid, tid + 16); 147 | } 148 | __syncthreads(); 149 | } 150 | if (block_size >= 16) { 151 | if (tid < 8) { 152 | __update(dists, dists_i, tid, tid + 8); 153 | } 154 | __syncthreads(); 155 | } 156 | if (block_size >= 8) { 157 | if (tid < 4) { 158 | __update(dists, dists_i, tid, tid + 4); 159 | } 160 | __syncthreads(); 161 | } 162 | if (block_size >= 4) { 163 | if (tid < 2) { 164 | __update(dists, dists_i, tid, tid + 2); 165 | } 166 | __syncthreads(); 167 | } 168 | if (block_size >= 2) { 169 | if (tid < 1) { 170 | __update(dists, dists_i, tid, tid + 1); 171 | } 172 | __syncthreads(); 173 | } 174 | 175 | old = dists_i[0]; 176 | if (tid == 0) idxs[j] = old; 177 | } 178 | } 179 | 180 | void furthest_point_sampling_kernel_wrapper(int b, int n, int m, 181 | const float *dataset, float *temp, 182 | int *idxs) { 183 | unsigned int n_threads = opt_n_threads(n); 184 | 185 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 186 | 187 | switch (n_threads) { 188 | case 512: 189 | furthest_point_sampling_kernel<512> 190 | <<>>(b, n, m, dataset, temp, idxs); 191 | break; 192 | case 256: 193 | furthest_point_sampling_kernel<256> 194 | <<>>(b, n, m, dataset, temp, idxs); 195 | break; 196 | case 128: 197 | furthest_point_sampling_kernel<128> 198 | <<>>(b, n, m, dataset, temp, idxs); 199 | break; 200 | case 64: 201 | furthest_point_sampling_kernel<64> 202 | <<>>(b, n, m, dataset, temp, idxs); 203 | break; 204 | case 32: 205 | furthest_point_sampling_kernel<32> 206 | <<>>(b, n, m, dataset, temp, idxs); 207 | break; 208 | case 16: 209 | furthest_point_sampling_kernel<16> 210 | <<>>(b, n, m, dataset, temp, idxs); 211 | break; 212 | case 8: 213 | furthest_point_sampling_kernel<8> 214 | <<>>(b, n, m, dataset, temp, idxs); 215 | break; 216 | case 4: 217 | furthest_point_sampling_kernel<4> 218 | <<>>(b, n, m, dataset, temp, idxs); 219 | break; 220 | case 2: 221 | furthest_point_sampling_kernel<2> 222 | <<>>(b, n, m, dataset, temp, idxs); 223 | break; 224 | case 1: 225 | furthest_point_sampling_kernel<1> 226 | <<>>(b, n, m, dataset, temp, idxs); 227 | break; 228 | default: 229 | furthest_point_sampling_kernel<512> 230 | <<>>(b, n, m, dataset, temp, idxs); 231 | } 232 | 233 | CUDA_CHECK_ERRORS(); 234 | } 235 | -------------------------------------------------------------------------------- /pointnet2/pointnet2_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | ''' Modified based on: https://github.com/erikwijmans/Pointnet2_PyTorch ''' 7 | from __future__ import ( 8 | division, 9 | absolute_import, 10 | with_statement, 11 | print_function, 12 | unicode_literals, 13 | ) 14 | import torch 15 | from torch.autograd import Function 16 | import torch.nn as nn 17 | import pytorch_utils as pt_utils 18 | import sys 19 | 20 | try: 21 | import builtins 22 | except: 23 | import __builtin__ as builtins 24 | 25 | try: 26 | import pointnet2._ext as _ext 27 | except ImportError: 28 | if not getattr(builtins, "__POINTNET2_SETUP__", False): 29 | raise ImportError( 30 | "Could not import _ext module.\n" 31 | "Please see the setup instructions in the README: " 32 | "https://github.com/erikwijmans/Pointnet2_PyTorch/blob/master/README.rst" 33 | ) 34 | 35 | if False: 36 | # Workaround for type hints without depending on the `typing` module 37 | from typing import * 38 | 39 | 40 | class RandomDropout(nn.Module): 41 | def __init__(self, p=0.5, inplace=False): 42 | super(RandomDropout, self).__init__() 43 | self.p = p 44 | self.inplace = inplace 45 | 46 | def forward(self, X): 47 | theta = torch.Tensor(1).uniform_(0, self.p)[0] 48 | return pt_utils.feature_dropout_no_scaling(X, theta, self.train, self.inplace) 49 | 50 | 51 | class FurthestPointSampling(Function): 52 | @staticmethod 53 | def forward(ctx, xyz, npoint): 54 | # type: (Any, torch.Tensor, int) -> torch.Tensor 55 | r""" 56 | Uses iterative furthest point sampling to select a set of npoint features that have the largest 57 | minimum distance 58 | 59 | Parameters 60 | ---------- 61 | xyz : torch.Tensor 62 | (B, N, 3) tensor where N > npoint 63 | npoint : int32 64 | number of features in the sampled set 65 | 66 | Returns 67 | ------- 68 | torch.Tensor 69 | (B, npoint) tensor containing the set 70 | """ 71 | return _ext.furthest_point_sampling(xyz, npoint) 72 | 73 | @staticmethod 74 | def backward(xyz, a=None): 75 | return None, None 76 | 77 | 78 | furthest_point_sample = FurthestPointSampling.apply 79 | 80 | 81 | class GatherOperation(Function): 82 | @staticmethod 83 | def forward(ctx, features, idx): 84 | # type: (Any, torch.Tensor, torch.Tensor) -> torch.Tensor 85 | r""" 86 | 87 | Parameters 88 | ---------- 89 | features : torch.Tensor 90 | (B, C, N) tensor 91 | 92 | idx : torch.Tensor 93 | (B, npoint) tensor of the features to gather 94 | 95 | Returns 96 | ------- 97 | torch.Tensor 98 | (B, C, npoint) tensor 99 | """ 100 | 101 | _, C, N = features.size() 102 | 103 | ctx.for_backwards = (idx, C, N) 104 | 105 | return _ext.gather_points(features, idx) 106 | 107 | @staticmethod 108 | def backward(ctx, grad_out): 109 | idx, C, N = ctx.for_backwards 110 | 111 | grad_features = _ext.gather_points_grad(grad_out.contiguous(), idx, N) 112 | return grad_features, None 113 | 114 | 115 | gather_operation = GatherOperation.apply 116 | 117 | 118 | class ThreeNN(Function): 119 | @staticmethod 120 | def forward(ctx, unknown, known): 121 | # type: (Any, torch.Tensor, torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor] 122 | r""" 123 | Find the three nearest neighbors of unknown in known 124 | Parameters 125 | ---------- 126 | unknown : torch.Tensor 127 | (B, n, 3) tensor of known features 128 | known : torch.Tensor 129 | (B, m, 3) tensor of unknown features 130 | 131 | Returns 132 | ------- 133 | dist : torch.Tensor 134 | (B, n, 3) l2 distance to the three nearest neighbors 135 | idx : torch.Tensor 136 | (B, n, 3) index of 3 nearest neighbors 137 | """ 138 | dist2, idx = _ext.three_nn(unknown, known) 139 | 140 | return torch.sqrt(dist2), idx 141 | 142 | @staticmethod 143 | def backward(ctx, a=None, b=None): 144 | return None, None 145 | 146 | 147 | three_nn = ThreeNN.apply 148 | 149 | 150 | class ThreeInterpolate(Function): 151 | @staticmethod 152 | def forward(ctx, features, idx, weight): 153 | # type(Any, torch.Tensor, torch.Tensor, torch.Tensor) -> Torch.Tensor 154 | r""" 155 | Performs weight linear interpolation on 3 features 156 | Parameters 157 | ---------- 158 | features : torch.Tensor 159 | (B, c, m) Features descriptors to be interpolated from 160 | idx : torch.Tensor 161 | (B, n, 3) three nearest neighbors of the target features in features 162 | weight : torch.Tensor 163 | (B, n, 3) weights 164 | 165 | Returns 166 | ------- 167 | torch.Tensor 168 | (B, c, n) tensor of the interpolated features 169 | """ 170 | B, c, m = features.size() 171 | n = idx.size(1) 172 | 173 | ctx.three_interpolate_for_backward = (idx, weight, m) 174 | 175 | return _ext.three_interpolate(features, idx, weight) 176 | 177 | @staticmethod 178 | def backward(ctx, grad_out): 179 | # type: (Any, torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor] 180 | r""" 181 | Parameters 182 | ---------- 183 | grad_out : torch.Tensor 184 | (B, c, n) tensor with gradients of ouputs 185 | 186 | Returns 187 | ------- 188 | grad_features : torch.Tensor 189 | (B, c, m) tensor with gradients of features 190 | 191 | None 192 | 193 | None 194 | """ 195 | idx, weight, m = ctx.three_interpolate_for_backward 196 | 197 | grad_features = _ext.three_interpolate_grad( 198 | grad_out.contiguous(), idx, weight, m 199 | ) 200 | 201 | return grad_features, None, None 202 | 203 | 204 | three_interpolate = ThreeInterpolate.apply 205 | 206 | 207 | class GroupingOperation(Function): 208 | @staticmethod 209 | def forward(ctx, features, idx): 210 | # type: (Any, torch.Tensor, torch.Tensor) -> torch.Tensor 211 | r""" 212 | 213 | Parameters 214 | ---------- 215 | features : torch.Tensor 216 | (B, C, N) tensor of features to group 217 | idx : torch.Tensor 218 | (B, npoint, nsample) tensor containing the indicies of features to group with 219 | 220 | Returns 221 | ------- 222 | torch.Tensor 223 | (B, C, npoint, nsample) tensor 224 | """ 225 | B, nfeatures, nsample = idx.size() 226 | _, C, N = features.size() 227 | 228 | ctx.for_backwards = (idx, N) 229 | 230 | return _ext.group_points(features, idx) 231 | 232 | @staticmethod 233 | def backward(ctx, grad_out): 234 | # type: (Any, torch.tensor) -> Tuple[torch.Tensor, torch.Tensor] 235 | r""" 236 | 237 | Parameters 238 | ---------- 239 | grad_out : torch.Tensor 240 | (B, C, npoint, nsample) tensor of the gradients of the output from forward 241 | 242 | Returns 243 | ------- 244 | torch.Tensor 245 | (B, C, N) gradient of the features 246 | None 247 | """ 248 | idx, N = ctx.for_backwards 249 | 250 | grad_features = _ext.group_points_grad(grad_out.contiguous(), idx, N) 251 | 252 | return grad_features, None 253 | 254 | 255 | grouping_operation = GroupingOperation.apply 256 | 257 | 258 | class BallQuery(Function): 259 | @staticmethod 260 | def forward(ctx, radius, nsample, xyz, new_xyz): 261 | # type: (Any, float, int, torch.Tensor, torch.Tensor) -> torch.Tensor 262 | r""" 263 | 264 | Parameters 265 | ---------- 266 | radius : float 267 | radius of the balls 268 | nsample : int 269 | maximum number of features in the balls 270 | xyz : torch.Tensor 271 | (B, N, 3) xyz coordinates of the features 272 | new_xyz : torch.Tensor 273 | (B, npoint, 3) centers of the ball query 274 | 275 | Returns 276 | ------- 277 | torch.Tensor 278 | (B, npoint, nsample) tensor with the indicies of the features that form the query balls 279 | """ 280 | return _ext.ball_query(new_xyz, xyz, radius, nsample) 281 | 282 | @staticmethod 283 | def backward(ctx, a=None): 284 | return None, None, None, None 285 | 286 | 287 | ball_query = BallQuery.apply 288 | 289 | 290 | class QueryAndGroup(nn.Module): 291 | r""" 292 | Groups with a ball query of radius 293 | 294 | Parameters 295 | --------- 296 | radius : float32 297 | Radius of ball 298 | nsample : int32 299 | Maximum number of features to gather in the ball 300 | """ 301 | 302 | def __init__(self, radius, nsample, use_xyz=True, ret_grouped_xyz=False, normalize_xyz=False, sample_uniformly=False, ret_unique_cnt=False, use_feature=False, ret_idx=False): 303 | # type: (QueryAndGroup, float, int, bool) -> None 304 | super(QueryAndGroup, self).__init__() 305 | self.radius, self.nsample, self.use_xyz = radius, nsample, use_xyz 306 | self.ret_grouped_xyz = ret_grouped_xyz 307 | self.normalize_xyz = normalize_xyz 308 | self.sample_uniformly = sample_uniformly 309 | self.ret_unique_cnt = ret_unique_cnt 310 | self.ret_idx = ret_idx 311 | self.use_feature = use_feature 312 | if self.ret_unique_cnt: 313 | assert(self.sample_uniformly) 314 | 315 | def forward(self, xyz, new_xyz, features=None): 316 | # type: (QueryAndGroup, torch.Tensor. torch.Tensor, torch.Tensor) -> Tuple[Torch.Tensor] 317 | r""" 318 | Parameters 319 | ---------- 320 | xyz : torch.Tensor 321 | xyz coordinates of the features (B, N, 3) 322 | new_xyz : torch.Tensor 323 | centriods (B, npoint, 3) 324 | features : torch.Tensor 325 | Descriptors of the features (B, C, N) 326 | 327 | Returns 328 | ------- 329 | new_features : torch.Tensor 330 | (B, 3 + C, npoint, nsample) tensor 331 | """ 332 | idx = ball_query(self.radius, self.nsample, xyz, new_xyz) 333 | 334 | if self.sample_uniformly: 335 | unique_cnt = torch.zeros((idx.shape[0], idx.shape[1])) 336 | for i_batch in range(idx.shape[0]): 337 | for i_region in range(idx.shape[1]): 338 | unique_ind = torch.unique(idx[i_batch, i_region, :]) 339 | num_unique = unique_ind.shape[0] 340 | unique_cnt[i_batch, i_region] = num_unique 341 | sample_ind = torch.randint(0, num_unique, (self.nsample - num_unique,), dtype=torch.long) 342 | all_ind = torch.cat((unique_ind, unique_ind[sample_ind])) 343 | idx[i_batch, i_region, :] = all_ind 344 | 345 | 346 | xyz_trans = xyz.transpose(1, 2).contiguous() 347 | grouped_xyz = grouping_operation(xyz_trans, idx) # (B, 3, npoint, nsample) 348 | grouped_xyz -= new_xyz.transpose(1, 2).unsqueeze(-1) 349 | if self.normalize_xyz: 350 | grouped_xyz /= self.radius 351 | 352 | if features is not None: 353 | grouped_features = grouping_operation(features, idx) 354 | if self.use_xyz: 355 | new_features = torch.cat( 356 | [grouped_xyz, grouped_features], dim=1 357 | ) # (B, C + 3, npoint, nsample) 358 | else: 359 | new_features = grouped_features 360 | if self.use_feature: 361 | orig_features = features.unsqueeze(-1).repeat(1,1,1,self.nsample) 362 | new_features = torch.cat( 363 | [orig_features, new_features], dim=1 364 | ) # (B, C + 3, npoint, nsample) 365 | else: 366 | assert ( 367 | self.use_xyz 368 | ), "Cannot have not features and not use xyz as a feature!" 369 | new_features = grouped_xyz 370 | 371 | ret = [new_features] 372 | if self.ret_grouped_xyz: 373 | ret.append(grouped_xyz) 374 | if self.ret_unique_cnt: 375 | ret.append(unique_cnt) 376 | if self.ret_idx: 377 | ret.append(idx) 378 | if len(ret) == 1: 379 | return ret[0] 380 | else: 381 | return tuple(ret) 382 | 383 | class PairwiseGroup(nn.Module): 384 | r""" 385 | Groups with a ball query of radius 386 | 387 | Parameters 388 | --------- 389 | radius : float32 390 | Radius of ball 391 | nsample : int32 392 | Maximum number of features to gather in the ball 393 | """ 394 | 395 | def __init__(self, radius, nsample, use_xyz=True, ret_grouped_xyz=False, normalize_xyz=False, sample_uniformly=False, ret_unique_cnt=False, use_feature=False): 396 | # type: (QueryAndGroup, float, int, bool) -> None 397 | super(PairwiseGroup, self).__init__() 398 | self.radius, self.nsample, self.use_xyz = radius, nsample, use_xyz 399 | self.ret_grouped_xyz = ret_grouped_xyz 400 | self.normalize_xyz = normalize_xyz 401 | self.sample_uniformly = sample_uniformly 402 | self.ret_unique_cnt = ret_unique_cnt 403 | self.use_feature = use_feature 404 | if self.ret_unique_cnt: 405 | assert(self.sample_uniformly) 406 | 407 | def forward(self, xyz, new_xyz, features=None): 408 | # type: (QueryAndGroup, torch.Tensor. torch.Tensor, torch.Tensor) -> Tuple[Torch.Tensor] 409 | r""" 410 | Parameters 411 | ---------- 412 | xyz : torch.Tensor 413 | xyz coordinates of the features (B, N, 3) 414 | new_xyz : torch.Tensor 415 | centriods (B, npoint, 3) 416 | features : torch.Tensor 417 | Descriptors of the features (B, C, N) 418 | 419 | Returns 420 | ------- 421 | new_features : torch.Tensor 422 | (B, 3 + C, npoint, nsample) tensor 423 | """ 424 | xyz_trans = xyz.transpose(1, 2).contiguous() 425 | grouped_xyz = xyz_trans.unsqueeze(-1).repeat(1,1,1,self.nsample)#grouping_operation(xyz_trans, idx) # (B, 3, npoint, nsample) 426 | grouped_features1 = features.unsqueeze(-1).repeat(1,1,1,self.nsample) 427 | grouped_features2 = features.unsqueeze(-2).repeat(1,1,self.nsample,1) 428 | grouped_features = torch.cat([grouped_features1, grouped_features2], dim=1) 429 | ret = [grouped_features] 430 | if self.ret_grouped_xyz: 431 | ret.append(grouped_xyz) 432 | return tuple(ret) 433 | 434 | class GroupAll(nn.Module): 435 | r""" 436 | Groups all features 437 | 438 | Parameters 439 | --------- 440 | """ 441 | 442 | def __init__(self, use_xyz=True, ret_grouped_xyz=False): 443 | # type: (GroupAll, bool) -> None 444 | super(GroupAll, self).__init__() 445 | self.use_xyz = use_xyz 446 | self.ret_grouped_xyz = ret_grouped_xyz 447 | 448 | def forward(self, xyz, new_xyz, features=None): 449 | # type: (GroupAll, torch.Tensor, torch.Tensor, torch.Tensor) -> Tuple[torch.Tensor] 450 | r""" 451 | Parameters 452 | ---------- 453 | xyz : torch.Tensor 454 | xyz coordinates of the features (B, N, 3) 455 | new_xyz : torch.Tensor 456 | Ignored 457 | features : torch.Tensor 458 | Descriptors of the features (B, C, N) 459 | 460 | Returns 461 | ------- 462 | new_features : torch.Tensor 463 | (B, C + 3, 1, N) tensor 464 | """ 465 | 466 | grouped_xyz = xyz.transpose(1, 2).unsqueeze(2) 467 | 468 | if features is not None: 469 | grouped_features = features.unsqueeze(2) 470 | if self.use_xyz: 471 | new_features = torch.cat( 472 | [grouped_xyz, grouped_features], dim=1 473 | ) # (B, 3 + C, 1, N) 474 | else: 475 | new_features = grouped_features 476 | else: 477 | new_features = grouped_xyz 478 | 479 | if self.ret_grouped_xyz: 480 | return new_features, grouped_xyz 481 | else: 482 | return new_features 483 | -------------------------------------------------------------------------------- /pointnet2/pytorch_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | ''' Modified based on Ref: https://github.com/erikwijmans/Pointnet2_PyTorch ''' 7 | import torch 8 | import torch.nn as nn 9 | from typing import List, Tuple 10 | 11 | class SharedMLP(nn.Sequential): 12 | 13 | def __init__( 14 | self, 15 | args: List[int], 16 | *, 17 | bn: bool = False, 18 | activation=nn.ReLU(inplace=True), 19 | preact: bool = False, 20 | first: bool = False, 21 | name: str = "" 22 | ): 23 | super().__init__() 24 | 25 | for i in range(len(args) - 1): 26 | self.add_module( 27 | name + 'layer{}'.format(i), 28 | Conv2d( 29 | args[i], 30 | args[i + 1], 31 | bn=(not first or not preact or (i != 0)) and bn, 32 | activation=activation 33 | if (not first or not preact or (i != 0)) else None, 34 | preact=preact 35 | ) 36 | ) 37 | 38 | class SplitMLP(nn.Sequential): 39 | 40 | def __init__( 41 | self, 42 | args: List[int], 43 | *, 44 | split: int = 18, 45 | bn: bool = False, 46 | activation=nn.ReLU(inplace=True), 47 | preact: bool = False, 48 | first: bool = False, 49 | name: str = "" 50 | ): 51 | super().__init__() 52 | for j in range(split): 53 | for i in range(len(args) - 1): 54 | self.add_module( 55 | name + 'layer{}'.format(i), 56 | Conv2d( 57 | args[i], 58 | args[i + 1], 59 | bn=(not first or not preact or (i != 0)) and bn, 60 | activation=activation 61 | if (not first or not preact or (i != 0)) else None, 62 | preact=preact 63 | ) 64 | ) 65 | 66 | class _BNBase(nn.Sequential): 67 | 68 | def __init__(self, in_size, batch_norm=None, name=""): 69 | super().__init__() 70 | self.add_module(name + "bn", batch_norm(in_size)) 71 | 72 | nn.init.constant_(self[0].weight, 1.0) 73 | nn.init.constant_(self[0].bias, 0) 74 | 75 | 76 | class BatchNorm1d(_BNBase): 77 | 78 | def __init__(self, in_size: int, *, name: str = ""): 79 | super().__init__(in_size, batch_norm=nn.BatchNorm1d, name=name) 80 | 81 | 82 | class BatchNorm2d(_BNBase): 83 | 84 | def __init__(self, in_size: int, name: str = ""): 85 | super().__init__(in_size, batch_norm=nn.BatchNorm2d, name=name) 86 | 87 | 88 | class BatchNorm3d(_BNBase): 89 | 90 | def __init__(self, in_size: int, name: str = ""): 91 | super().__init__(in_size, batch_norm=nn.BatchNorm3d, name=name) 92 | 93 | 94 | class _ConvBase(nn.Sequential): 95 | 96 | def __init__( 97 | self, 98 | in_size, 99 | out_size, 100 | kernel_size, 101 | stride, 102 | padding, 103 | activation, 104 | bn, 105 | init, 106 | conv=None, 107 | batch_norm=None, 108 | bias=True, 109 | preact=False, 110 | name="" 111 | ): 112 | super().__init__() 113 | 114 | bias = bias and (not bn) 115 | conv_unit = conv( 116 | in_size, 117 | out_size, 118 | kernel_size=kernel_size, 119 | stride=stride, 120 | padding=padding, 121 | bias=bias 122 | ) 123 | init(conv_unit.weight) 124 | if bias: 125 | nn.init.constant_(conv_unit.bias, 0) 126 | 127 | if bn: 128 | if not preact: 129 | bn_unit = batch_norm(out_size) 130 | else: 131 | bn_unit = batch_norm(in_size) 132 | 133 | if preact: 134 | if bn: 135 | self.add_module(name + 'bn', bn_unit) 136 | 137 | if activation is not None: 138 | self.add_module(name + 'activation', activation) 139 | 140 | self.add_module(name + 'conv', conv_unit) 141 | 142 | if not preact: 143 | if bn: 144 | self.add_module(name + 'bn', bn_unit) 145 | 146 | if activation is not None: 147 | self.add_module(name + 'activation', activation) 148 | 149 | 150 | class Conv1d(_ConvBase): 151 | 152 | def __init__( 153 | self, 154 | in_size: int, 155 | out_size: int, 156 | *, 157 | kernel_size: int = 1, 158 | stride: int = 1, 159 | padding: int = 0, 160 | activation=nn.ReLU(inplace=True), 161 | bn: bool = False, 162 | init=nn.init.kaiming_normal_, 163 | bias: bool = True, 164 | preact: bool = False, 165 | name: str = "" 166 | ): 167 | super().__init__( 168 | in_size, 169 | out_size, 170 | kernel_size, 171 | stride, 172 | padding, 173 | activation, 174 | bn, 175 | init, 176 | conv=nn.Conv1d, 177 | batch_norm=BatchNorm1d, 178 | bias=bias, 179 | preact=preact, 180 | name=name 181 | ) 182 | 183 | 184 | class Conv2d(_ConvBase): 185 | 186 | def __init__( 187 | self, 188 | in_size: int, 189 | out_size: int, 190 | *, 191 | kernel_size: Tuple[int, int] = (1, 1), 192 | stride: Tuple[int, int] = (1, 1), 193 | padding: Tuple[int, int] = (0, 0), 194 | activation=nn.ReLU(inplace=True), 195 | bn: bool = False, 196 | init=nn.init.kaiming_normal_, 197 | bias: bool = True, 198 | preact: bool = False, 199 | name: str = "" 200 | ): 201 | super().__init__( 202 | in_size, 203 | out_size, 204 | kernel_size, 205 | stride, 206 | padding, 207 | activation, 208 | bn, 209 | init, 210 | conv=nn.Conv2d, 211 | batch_norm=BatchNorm2d, 212 | bias=bias, 213 | preact=preact, 214 | name=name 215 | ) 216 | 217 | 218 | class Conv3d(_ConvBase): 219 | 220 | def __init__( 221 | self, 222 | in_size: int, 223 | out_size: int, 224 | *, 225 | kernel_size: Tuple[int, int, int] = (1, 1, 1), 226 | stride: Tuple[int, int, int] = (1, 1, 1), 227 | padding: Tuple[int, int, int] = (0, 0, 0), 228 | activation=nn.ReLU(inplace=True), 229 | bn: bool = False, 230 | init=nn.init.kaiming_normal_, 231 | bias: bool = True, 232 | preact: bool = False, 233 | name: str = "" 234 | ): 235 | super().__init__( 236 | in_size, 237 | out_size, 238 | kernel_size, 239 | stride, 240 | padding, 241 | activation, 242 | bn, 243 | init, 244 | conv=nn.Conv3d, 245 | batch_norm=BatchNorm3d, 246 | bias=bias, 247 | preact=preact, 248 | name=name 249 | ) 250 | 251 | 252 | class FC(nn.Sequential): 253 | 254 | def __init__( 255 | self, 256 | in_size: int, 257 | out_size: int, 258 | *, 259 | activation=nn.ReLU(inplace=True), 260 | bn: bool = False, 261 | init=None, 262 | preact: bool = False, 263 | name: str = "" 264 | ): 265 | super().__init__() 266 | 267 | fc = nn.Linear(in_size, out_size, bias=not bn) 268 | if init is not None: 269 | init(fc.weight) 270 | if not bn: 271 | nn.init.constant_(fc.bias, 0) 272 | 273 | if preact: 274 | if bn: 275 | self.add_module(name + 'bn', BatchNorm1d(in_size)) 276 | 277 | if activation is not None: 278 | self.add_module(name + 'activation', activation) 279 | 280 | self.add_module(name + 'fc', fc) 281 | 282 | if not preact: 283 | if bn: 284 | self.add_module(name + 'bn', BatchNorm1d(out_size)) 285 | 286 | if activation is not None: 287 | self.add_module(name + 'activation', activation) 288 | 289 | def set_bn_momentum_default(bn_momentum): 290 | 291 | def fn(m): 292 | if isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d)): 293 | m.momentum = bn_momentum 294 | 295 | return fn 296 | 297 | 298 | class BNMomentumScheduler(object): 299 | 300 | def __init__( 301 | self, model, bn_lambda, last_epoch=-1, 302 | setter=set_bn_momentum_default 303 | ): 304 | if not isinstance(model, nn.Module): 305 | raise RuntimeError( 306 | "Class '{}' is not a PyTorch nn Module".format( 307 | type(model).__name__ 308 | ) 309 | ) 310 | 311 | self.model = model 312 | self.setter = setter 313 | self.lmbd = bn_lambda 314 | 315 | self.step(last_epoch + 1) 316 | self.last_epoch = last_epoch 317 | 318 | def step(self, epoch=None): 319 | if epoch is None: 320 | epoch = self.last_epoch + 1 321 | 322 | self.last_epoch = epoch 323 | self.model.apply(self.setter(self.lmbd(epoch))) 324 | 325 | 326 | -------------------------------------------------------------------------------- /pointnet2/setup.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | from setuptools import setup 7 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 8 | import glob 9 | 10 | _ext_src_root = "_ext_src" 11 | _ext_sources = glob.glob("{}/src/*.cpp".format(_ext_src_root)) + glob.glob( 12 | "{}/src/*.cu".format(_ext_src_root) 13 | ) 14 | _ext_headers = glob.glob("{}/include/*".format(_ext_src_root)) 15 | 16 | setup( 17 | name='pointnet2', 18 | ext_modules=[ 19 | CUDAExtension( 20 | name='pointnet2._ext', 21 | sources=_ext_sources, 22 | extra_compile_args={ 23 | "cxx": ["-O2", "-I{}".format("{}/include".format(_ext_src_root))], 24 | "nvcc": ["-O2", "-I{}".format("{}/include".format(_ext_src_root))], 25 | }, 26 | ) 27 | ], 28 | cmdclass={ 29 | 'build_ext': BuildExtension 30 | } 31 | ) 32 | -------------------------------------------------------------------------------- /scannet/meta_data/scannet_means.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zaiweizhang/H3DNet/81bd6af37cb131fd9e81774f52f29a0f3b0a0f43/scannet/meta_data/scannet_means.npz -------------------------------------------------------------------------------- /scannet/meta_data/scannet_means_v2.npz.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zaiweizhang/H3DNet/81bd6af37cb131fd9e81774f52f29a0f3b0a0f43/scannet/meta_data/scannet_means_v2.npz.npy -------------------------------------------------------------------------------- /scannet/meta_data/scannetv2_test.txt: -------------------------------------------------------------------------------- 1 | scene0707_00 2 | scene0708_00 3 | scene0709_00 4 | scene0710_00 5 | scene0711_00 6 | scene0712_00 7 | scene0713_00 8 | scene0714_00 9 | scene0715_00 10 | scene0716_00 11 | scene0717_00 12 | scene0718_00 13 | scene0719_00 14 | scene0720_00 15 | scene0721_00 16 | scene0722_00 17 | scene0723_00 18 | scene0724_00 19 | scene0725_00 20 | scene0726_00 21 | scene0727_00 22 | scene0728_00 23 | scene0729_00 24 | scene0730_00 25 | scene0731_00 26 | scene0732_00 27 | scene0733_00 28 | scene0734_00 29 | scene0735_00 30 | scene0736_00 31 | scene0737_00 32 | scene0738_00 33 | scene0739_00 34 | scene0740_00 35 | scene0741_00 36 | scene0742_00 37 | scene0743_00 38 | scene0744_00 39 | scene0745_00 40 | scene0746_00 41 | scene0747_00 42 | scene0748_00 43 | scene0749_00 44 | scene0750_00 45 | scene0751_00 46 | scene0752_00 47 | scene0753_00 48 | scene0754_00 49 | scene0755_00 50 | scene0756_00 51 | scene0757_00 52 | scene0758_00 53 | scene0759_00 54 | scene0760_00 55 | scene0761_00 56 | scene0762_00 57 | scene0763_00 58 | scene0764_00 59 | scene0765_00 60 | scene0766_00 61 | scene0767_00 62 | scene0768_00 63 | scene0769_00 64 | scene0770_00 65 | scene0771_00 66 | scene0772_00 67 | scene0773_00 68 | scene0774_00 69 | scene0775_00 70 | scene0776_00 71 | scene0777_00 72 | scene0778_00 73 | scene0779_00 74 | scene0780_00 75 | scene0781_00 76 | scene0782_00 77 | scene0783_00 78 | scene0784_00 79 | scene0785_00 80 | scene0786_00 81 | scene0787_00 82 | scene0788_00 83 | scene0789_00 84 | scene0790_00 85 | scene0791_00 86 | scene0792_00 87 | scene0793_00 88 | scene0794_00 89 | scene0795_00 90 | scene0796_00 91 | scene0797_00 92 | scene0798_00 93 | scene0799_00 94 | scene0800_00 95 | scene0801_00 96 | scene0802_00 97 | scene0803_00 98 | scene0804_00 99 | scene0805_00 100 | scene0806_00 101 | -------------------------------------------------------------------------------- /scannet/meta_data/scannetv2_val.txt: -------------------------------------------------------------------------------- 1 | scene0568_00 2 | scene0568_01 3 | scene0568_02 4 | scene0304_00 5 | scene0488_00 6 | scene0488_01 7 | scene0412_00 8 | scene0412_01 9 | scene0217_00 10 | scene0019_00 11 | scene0019_01 12 | scene0414_00 13 | scene0575_00 14 | scene0575_01 15 | scene0575_02 16 | scene0426_00 17 | scene0426_01 18 | scene0426_02 19 | scene0426_03 20 | scene0549_00 21 | scene0549_01 22 | scene0578_00 23 | scene0578_01 24 | scene0578_02 25 | scene0665_00 26 | scene0665_01 27 | scene0050_00 28 | scene0050_01 29 | scene0050_02 30 | scene0257_00 31 | scene0025_00 32 | scene0025_01 33 | scene0025_02 34 | scene0583_00 35 | scene0583_01 36 | scene0583_02 37 | scene0701_00 38 | scene0701_01 39 | scene0701_02 40 | scene0580_00 41 | scene0580_01 42 | scene0565_00 43 | scene0169_00 44 | scene0169_01 45 | scene0655_00 46 | scene0655_01 47 | scene0655_02 48 | scene0063_00 49 | scene0221_00 50 | scene0221_01 51 | scene0591_00 52 | scene0591_01 53 | scene0591_02 54 | scene0678_00 55 | scene0678_01 56 | scene0678_02 57 | scene0462_00 58 | scene0427_00 59 | scene0595_00 60 | scene0193_00 61 | scene0193_01 62 | scene0164_00 63 | scene0164_01 64 | scene0164_02 65 | scene0164_03 66 | scene0598_00 67 | scene0598_01 68 | scene0598_02 69 | scene0599_00 70 | scene0599_01 71 | scene0599_02 72 | scene0328_00 73 | scene0300_00 74 | scene0300_01 75 | scene0354_00 76 | scene0458_00 77 | scene0458_01 78 | scene0423_00 79 | scene0423_01 80 | scene0423_02 81 | scene0307_00 82 | scene0307_01 83 | scene0307_02 84 | scene0606_00 85 | scene0606_01 86 | scene0606_02 87 | scene0432_00 88 | scene0432_01 89 | scene0608_00 90 | scene0608_01 91 | scene0608_02 92 | scene0651_00 93 | scene0651_01 94 | scene0651_02 95 | scene0430_00 96 | scene0430_01 97 | scene0689_00 98 | scene0357_00 99 | scene0357_01 100 | scene0574_00 101 | scene0574_01 102 | scene0574_02 103 | scene0329_00 104 | scene0329_01 105 | scene0329_02 106 | scene0153_00 107 | scene0153_01 108 | scene0616_00 109 | scene0616_01 110 | scene0671_00 111 | scene0671_01 112 | scene0618_00 113 | scene0382_00 114 | scene0382_01 115 | scene0490_00 116 | scene0621_00 117 | scene0607_00 118 | scene0607_01 119 | scene0149_00 120 | scene0695_00 121 | scene0695_01 122 | scene0695_02 123 | scene0695_03 124 | scene0389_00 125 | scene0377_00 126 | scene0377_01 127 | scene0377_02 128 | scene0342_00 129 | scene0139_00 130 | scene0629_00 131 | scene0629_01 132 | scene0629_02 133 | scene0496_00 134 | scene0633_00 135 | scene0633_01 136 | scene0518_00 137 | scene0652_00 138 | scene0406_00 139 | scene0406_01 140 | scene0406_02 141 | scene0144_00 142 | scene0144_01 143 | scene0494_00 144 | scene0278_00 145 | scene0278_01 146 | scene0316_00 147 | scene0609_00 148 | scene0609_01 149 | scene0609_02 150 | scene0609_03 151 | scene0084_00 152 | scene0084_01 153 | scene0084_02 154 | scene0696_00 155 | scene0696_01 156 | scene0696_02 157 | scene0351_00 158 | scene0351_01 159 | scene0643_00 160 | scene0644_00 161 | scene0645_00 162 | scene0645_01 163 | scene0645_02 164 | scene0081_00 165 | scene0081_01 166 | scene0081_02 167 | scene0647_00 168 | scene0647_01 169 | scene0535_00 170 | scene0353_00 171 | scene0353_01 172 | scene0353_02 173 | scene0559_00 174 | scene0559_01 175 | scene0559_02 176 | scene0593_00 177 | scene0593_01 178 | scene0246_00 179 | scene0653_00 180 | scene0653_01 181 | scene0064_00 182 | scene0064_01 183 | scene0356_00 184 | scene0356_01 185 | scene0356_02 186 | scene0030_00 187 | scene0030_01 188 | scene0030_02 189 | scene0222_00 190 | scene0222_01 191 | scene0338_00 192 | scene0338_01 193 | scene0338_02 194 | scene0378_00 195 | scene0378_01 196 | scene0378_02 197 | scene0660_00 198 | scene0553_00 199 | scene0553_01 200 | scene0553_02 201 | scene0527_00 202 | scene0663_00 203 | scene0663_01 204 | scene0663_02 205 | scene0664_00 206 | scene0664_01 207 | scene0664_02 208 | scene0334_00 209 | scene0334_01 210 | scene0334_02 211 | scene0046_00 212 | scene0046_01 213 | scene0046_02 214 | scene0203_00 215 | scene0203_01 216 | scene0203_02 217 | scene0088_00 218 | scene0088_01 219 | scene0088_02 220 | scene0088_03 221 | scene0086_00 222 | scene0086_01 223 | scene0086_02 224 | scene0670_00 225 | scene0670_01 226 | scene0256_00 227 | scene0256_01 228 | scene0256_02 229 | scene0249_00 230 | scene0441_00 231 | scene0658_00 232 | scene0704_00 233 | scene0704_01 234 | scene0187_00 235 | scene0187_01 236 | scene0131_00 237 | scene0131_01 238 | scene0131_02 239 | scene0207_00 240 | scene0207_01 241 | scene0207_02 242 | scene0461_00 243 | scene0011_00 244 | scene0011_01 245 | scene0343_00 246 | scene0251_00 247 | scene0077_00 248 | scene0077_01 249 | scene0684_00 250 | scene0684_01 251 | scene0550_00 252 | scene0686_00 253 | scene0686_01 254 | scene0686_02 255 | scene0208_00 256 | scene0500_00 257 | scene0500_01 258 | scene0552_00 259 | scene0552_01 260 | scene0648_00 261 | scene0648_01 262 | scene0435_00 263 | scene0435_01 264 | scene0435_02 265 | scene0435_03 266 | scene0690_00 267 | scene0690_01 268 | scene0693_00 269 | scene0693_01 270 | scene0693_02 271 | scene0700_00 272 | scene0700_01 273 | scene0700_02 274 | scene0699_00 275 | scene0231_00 276 | scene0231_01 277 | scene0231_02 278 | scene0697_00 279 | scene0697_01 280 | scene0697_02 281 | scene0697_03 282 | scene0474_00 283 | scene0474_01 284 | scene0474_02 285 | scene0474_03 286 | scene0474_04 287 | scene0474_05 288 | scene0355_00 289 | scene0355_01 290 | scene0146_00 291 | scene0146_01 292 | scene0146_02 293 | scene0196_00 294 | scene0702_00 295 | scene0702_01 296 | scene0702_02 297 | scene0314_00 298 | scene0277_00 299 | scene0277_01 300 | scene0277_02 301 | scene0095_00 302 | scene0095_01 303 | scene0015_00 304 | scene0100_00 305 | scene0100_01 306 | scene0100_02 307 | scene0558_00 308 | scene0558_01 309 | scene0558_02 310 | scene0685_00 311 | scene0685_01 312 | scene0685_02 313 | -------------------------------------------------------------------------------- /scannet/model_util_scannet.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import numpy as np 7 | import sys 8 | import os 9 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 10 | sys.path.append(BASE_DIR) 11 | ROOT_DIR = os.path.dirname(BASE_DIR) 12 | sys.path.append(os.path.join(ROOT_DIR, 'utils')) 13 | from box_util import get_3d_box 14 | 15 | class ScannetDatasetConfig(object): 16 | def __init__(self): 17 | self.dataset = 'scannet' 18 | self.num_class = 18 19 | self.num_heading_bin = 24 # angle: -pi/2~pi/2, so divide 0~2*pi into 24 bin 20 | self.num_size_cluster = 18 21 | 22 | self.type2class = {'cabinet':0, 'bed':1, 'chair':2, 'sofa':3, 'table':4, 'door':5, 'window':6,'bookshelf':7,'picture':8, 'counter':9, 'desk':10, 'curtain':11, 'refrigerator':12, 'showercurtrain':13, 'toilet':14, 'sink':15, 'bathtub':16, 'garbagebin':17} 23 | #self.type2class = {'wall':0, 'floor':1, 'cabinet':2, 'bed':3, 'chair':4, 'sofa':5, 'table':6, 'door':7,'window':8,'bookshelf':9,'picture':10, 'counter':11, 'blinds':12, 'desk':13, 'shelves':14, 'curtain':15, 'dresser':16, 'pillow':17, 'mirror':18, 'floormat':19, 'clothes':20, 'ceiling':21, 'books':22, 'refrigerator':23, 'television':24, 'paper':25, 'towel':26, 'showercurtrain':27, 'box':28, 'whiteboard':29, 'person':30, 'nightstand':31, 'toilet':32, 'sink':33, 'lamp':34, 'bathtub':35, 'bag':36} 24 | self.type2class_room = {'other':0, 'wall':1, 'floor':2} 25 | self.class2type = {self.type2class[t]:t for t in self.type2class} 26 | self.class2type_room = {self.type2class_room[t]:t for t in self.type2class_room} 27 | self.nyu40ids = np.array([3,4,5,6,7,8,9,10,11,12,14,16,24,28,33,34,36,39]) 28 | #self.nyu40ids = np.array([0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36]) 29 | self.nyu40ids_room = np.array([1,2]) 30 | 31 | self.nyu40id2class = {nyu40id: i for i,nyu40id in enumerate(list(self.nyu40ids))} 32 | self.nyu40id2class_sem = {nyu40id: i for i,nyu40id in enumerate(list(self.nyu40ids))} 33 | self.mean_size_arr = np.load(os.path.join(ROOT_DIR,'scannet/meta_data/scannet_means.npz'))['arr_0'] 34 | #self.mean_size_arr = np.load(os.path.join(ROOT_DIR,'scannet/meta_data/scannet_means_v2.npz.npy'))[:self.num_class,:] 35 | self.type_mean_size = {} 36 | for i in range(self.num_size_cluster): 37 | self.type_mean_size[self.class2type[i]] = self.mean_size_arr[i,:] 38 | 39 | 40 | def class2angle(self, pred_cls, residual, to_label_format=True): 41 | return 0 42 | 43 | ''' 44 | def angle2class(self, angle): 45 | # assert(False) 46 | num_class = self.num_heading_bin 47 | angle = angle%(2*np.pi) 48 | assert(angle>=0 and angle<=2*np.pi) 49 | angle_per_class = 2*np.pi/float(num_class) 50 | shifted_angle = (angle+angle_per_class/2)%(2*np.pi) 51 | class_id = int(shifted_angle/angle_per_class) 52 | residual_angle = shifted_angle - (class_id*angle_per_class+angle_per_class/2) 53 | return class_id, residual_angle 54 | def class2angle(self, pred_cls, residual, to_label_format=True): 55 | num_class = self.num_heading_bin 56 | angle_per_class = 2*np.pi/float(num_class) 57 | angle_center = pred_cls * angle_per_class 58 | angle = angle_center + residual 59 | if to_label_format and angle>np.pi: 60 | angle = angle - 2*np.pi 61 | return angle 62 | ''' 63 | def angle2class2(self, angle): 64 | ''' modify according to sunrgbd 65 | scannet_angle: angle: -pi/2 ~ pi/2 66 | 1: angle += pi/2 -> 0~pi 67 | 2: class*(2pi/N) + number = angle + pi/2 68 | ''' 69 | class_id, residual_angle = self.angle2class(angle + np.pi / 2) 70 | return class_id, residual_angle 71 | 72 | def class2angle2(self, pred_cls, residual, to_label_format=True): 73 | angle = self.class2angle(pred_cls, residual) 74 | angle = angle - np.pi / 2 75 | return angle 76 | 77 | def size2class(self, size, type_name): 78 | ''' Convert 3D box size (l,w,h) to size class and size residual ''' 79 | size_class = self.type2class[type_name] 80 | size_residual = size - self.type_mean_size[type_name] 81 | return size_class, size_residual 82 | 83 | def class2size(self, pred_cls, residual): 84 | ''' Inverse function to size2class ''' 85 | return self.mean_size_arr[pred_cls, :] + residual 86 | 87 | def param2obb(self, center, heading_class, heading_residual, size_class, size_residual): 88 | heading_angle = self.class2angle(heading_class, heading_residual) 89 | box_size = self.class2size(int(size_class), size_residual) 90 | obb = np.zeros((7,)) 91 | obb[0:3] = center 92 | obb[3:6] = box_size 93 | obb[6] = heading_angle*-1 94 | return obb 95 | 96 | def param2obb2(self, center, heading_class, heading_residual, size_class, size_residual): 97 | heading_angle = self.class2angle(heading_class, heading_residual) 98 | box_size = self.class2size(int(size_class), size_residual) 99 | obb = np.zeros((7,)) 100 | obb[0:3] = center 101 | obb[3:6] = box_size 102 | obb[6] = heading_angle 103 | return obb 104 | 105 | def rotate_aligned_boxes(input_boxes, rot_mat): 106 | centers, lengths = input_boxes[:,0:3], input_boxes[:,3:6] 107 | new_centers = np.dot(centers, np.transpose(rot_mat)) 108 | 109 | dx, dy = lengths[:,0]/2.0, lengths[:,1]/2.0 110 | new_x = np.zeros((dx.shape[0], 4)) 111 | new_y = np.zeros((dx.shape[0], 4)) 112 | 113 | for i, crnr in enumerate([(-1,-1), (1, -1), (1, 1), (-1, 1)]): 114 | crnrs = np.zeros((dx.shape[0], 3)) 115 | crnrs[:,0] = crnr[0]*dx 116 | crnrs[:,1] = crnr[1]*dy 117 | crnrs = np.dot(crnrs, np.transpose(rot_mat)) 118 | new_x[:,i] = crnrs[:,0] 119 | new_y[:,i] = crnrs[:,1] 120 | 121 | 122 | new_dx = 2.0*np.max(new_x, 1) 123 | new_dy = 2.0*np.max(new_y, 1) 124 | new_lengths = np.stack((new_dx, new_dy, lengths[:,2]), axis=1) 125 | 126 | return np.concatenate([new_centers, new_lengths], axis=1) 127 | -------------------------------------------------------------------------------- /sunrgbd/model_util_sunrgbd.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import numpy as np 7 | import sys 8 | import os 9 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 10 | sys.path.append(BASE_DIR) 11 | ROOT_DIR = os.path.dirname(BASE_DIR) 12 | sys.path.append(os.path.join(ROOT_DIR, 'utils')) 13 | 14 | class SunrgbdDatasetConfig(object): 15 | def __init__(self): 16 | self.dataset = 'sunrgbd' 17 | self.num_class = 10 18 | self.num_heading_bin = 12 19 | self.num_size_cluster = 10 20 | 21 | self.type2class={'bed':0, 'table':1, 'sofa':2, 'chair':3, 'toilet':4, 'desk':5, 'dresser':6, 'night_stand':7, 'bookshelf':8, 'bathtub':9} 22 | 23 | self.class37_2_class10_multi_map = {4:[0], 7:[1, 5], 6:[2, 3], 5:[2, 3], 33:[4], 14:[1, 5], 17:[6], 32:[7], 10:[8], 36:[9]} 24 | self.class37_2_class10_multi = {} 25 | for i in range(38): 26 | if i in self.class37_2_class10_multi_map: 27 | self.class37_2_class10_multi.update({i: self.class37_2_class10_multi_map[i]}) 28 | else: 29 | self.class37_2_class10_multi.update({i: [-1]}) 30 | 31 | self.class37_2_class10_map = {4:0, 7:1, 6:2, 5:3, 33:4, 14:5, 17:6, 32:7, 10:8, 36:9} 32 | self.class37_2_class10 = {} 33 | for i in range(38): 34 | if i in self.class37_2_class10_map: 35 | self.class37_2_class10.update({i: self.class37_2_class10_map[i]}) 36 | else: 37 | self.class37_2_class10.update({i: -1}) 38 | 39 | self.class2type = {self.type2class[t]:t for t in self.type2class} 40 | self.type2onehotclass={'bed':0, 'table':1, 'sofa':2, 'chair':3, 'toilet':4, 'desk':5, 'dresser':6, 'night_stand':7, 'bookshelf':8, 'bathtub':9} 41 | self.type_mean_size = {'bathtub': np.array([0.765840,1.398258,0.472728]), 42 | 'bed': np.array([2.114256,1.620300,0.927272]), 43 | 'bookshelf': np.array([0.404671,1.071108,1.688889]), 44 | 'chair': np.array([0.591958,0.552978,0.827272]), 45 | 'desk': np.array([0.695190,1.346299,0.736364]), 46 | 'dresser': np.array([0.528526,1.002642,1.172878]), 47 | 'night_stand': np.array([0.500618,0.632163,0.683424]), 48 | 'sofa': np.array([0.923508,1.867419,0.845495]), 49 | 'table': np.array([0.791118,1.279516,0.718182]), 50 | 'toilet': np.array([0.699104,0.454178,0.756250])} 51 | 52 | self.mean_size_arr = np.zeros((self.num_size_cluster, 3)) 53 | for i in range(self.num_size_cluster): 54 | self.mean_size_arr[i,:] = self.type_mean_size[self.class2type[i]] 55 | 56 | def size2class(self, size, type_name): 57 | ''' Convert 3D box size (l,w,h) to size class and size residual ''' 58 | size_class = self.type2class[type_name] 59 | size_residual = size - self.type_mean_size[type_name] 60 | return size_class, size_residual 61 | 62 | def class2size(self, pred_cls, residual): 63 | ''' Inverse function to size2class ''' 64 | mean_size = self.type_mean_size[self.class2type[pred_cls]] 65 | return mean_size + residual 66 | 67 | def angle2class(self, angle): 68 | ''' Convert continuous angle to discrete class 69 | [optinal] also small regression number from 70 | class center angle to current angle. 71 | 72 | angle is from 0-2pi (or -pi~pi), class center at 0, 1*(2pi/N), 2*(2pi/N) ... (N-1)*(2pi/N) 73 | return is class of int32 of 0,1,...,N-1 and a number such that 74 | class*(2pi/N) + number = angle 75 | ''' 76 | num_class = self.num_heading_bin 77 | angle = angle%(2*np.pi) 78 | assert(angle>=0 and angle<=2*np.pi) 79 | angle_per_class = 2*np.pi/float(num_class) 80 | shifted_angle = (angle+angle_per_class/2)%(2*np.pi) 81 | class_id = int(shifted_angle/angle_per_class) 82 | residual_angle = shifted_angle - (class_id*angle_per_class+angle_per_class/2) 83 | return class_id, residual_angle 84 | 85 | def class2angle(self, pred_cls, residual, to_label_format=True): 86 | ''' Inverse function to angle2class ''' 87 | num_class = self.num_heading_bin 88 | angle_per_class = 2*np.pi/float(num_class) 89 | angle_center = pred_cls * angle_per_class 90 | angle = angle_center + residual 91 | if to_label_format and angle>np.pi: 92 | angle = angle - 2*np.pi 93 | return angle 94 | 95 | def param2obb(self, center, heading_class, heading_residual, size_class, size_residual): 96 | heading_angle = self.class2angle(heading_class, heading_residual) 97 | box_size = self.class2size(int(size_class), size_residual) 98 | obb = np.zeros((7,)) 99 | obb[0:3] = center 100 | obb[3:6] = box_size 101 | obb[6] = heading_angle*-1 102 | return obb 103 | 104 | 105 | -------------------------------------------------------------------------------- /sunrgbd/sunrgbd_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | ''' Provides Python helper function to read My SUNRGBD dataset. 7 | 8 | Author: Charles R. Qi 9 | Date: October, 2017 10 | 11 | Updated by Charles R. Qi 12 | Date: December, 2018 13 | Note: removed basis loading. 14 | ''' 15 | import numpy as np 16 | import cv2 17 | import os 18 | import scipy.io as sio # to load .mat files for depth points 19 | 20 | type2class={'bed':0, 'table':1, 'sofa':2, 'chair':3, 'toilet':4, 'desk':5, 'dresser':6, 'night_stand':7, 'bookshelf':8, 'bathtub':9} 21 | class2type = {type2class[t]:t for t in type2class} 22 | 23 | 24 | def flip_axis_to_camera(pc): 25 | ''' Flip X-right,Y-forward,Z-up to X-right,Y-down,Z-forward 26 | Input and output are both (N,3) array 27 | ''' 28 | pc2 = np.copy(pc) 29 | pc2[:,[0,1,2]] = pc2[:,[0,2,1]] # cam X,Y,Z = depth X,-Z,Y 30 | pc2[:,1] *= -1 31 | return pc2 32 | 33 | def flip_axis_to_depth(pc): 34 | pc2 = np.copy(pc) 35 | pc2[:,[0,1,2]] = pc2[:,[0,2,1]] # depth X,Y,Z = cam X,Z,-Y 36 | pc2[:,2] *= -1 37 | return pc2 38 | 39 | 40 | class SUNObject3d(object): 41 | def __init__(self, line): 42 | data = line.split(' ') 43 | data[1:] = [float(x) for x in data[1:]] 44 | self.classname = data[0] 45 | self.xmin = data[1] 46 | self.ymin = data[2] 47 | self.xmax = data[1]+data[3] 48 | self.ymax = data[2]+data[4] 49 | self.box2d = np.array([self.xmin,self.ymin,self.xmax,self.ymax]) 50 | self.centroid = np.array([data[5],data[6],data[7]]) 51 | self.unused_dimension = np.array([data[8],data[9],data[10]]) 52 | self.w = data[8] 53 | self.l = data[9] 54 | self.h = data[10] 55 | self.orientation = np.zeros((3,)) 56 | self.orientation[0] = data[11] 57 | self.orientation[1] = data[12] 58 | self.heading_angle = -1 * np.arctan2(self.orientation[1], self.orientation[0]) 59 | 60 | class SUNRGBD_Calibration(object): 61 | ''' Calibration matrices and utils 62 | We define five coordinate system in SUN RGBD dataset 63 | 64 | camera coodinate: 65 | Z is forward, Y is downward, X is rightward 66 | 67 | depth coordinate: 68 | Just change axis order and flip up-down axis from camera coord 69 | 70 | upright depth coordinate: tilted depth coordinate by Rtilt such that Z is gravity direction, 71 | Z is up-axis, Y is forward, X is right-ward 72 | 73 | upright camera coordinate: 74 | Just change axis order and flip up-down axis from upright depth coordinate 75 | 76 | image coordinate: 77 | ----> x-axis (u) 78 | | 79 | v 80 | y-axis (v) 81 | 82 | depth points are stored in upright depth coordinate. 83 | labels for 3d box (basis, centroid, size) are in upright depth coordinate. 84 | 2d boxes are in image coordinate 85 | 86 | We generate frustum point cloud and 3d box in upright camera coordinate 87 | ''' 88 | 89 | def __init__(self, calib_filepath): 90 | lines = [line.rstrip() for line in open(calib_filepath)] 91 | Rtilt = np.array([float(x) for x in lines[0].split(' ')]) 92 | self.Rtilt = np.reshape(Rtilt, (3,3), order='F') 93 | K = np.array([float(x) for x in lines[1].split(' ')]) 94 | self.K = np.reshape(K, (3,3), order='F') 95 | self.f_u = self.K[0,0] 96 | self.f_v = self.K[1,1] 97 | self.c_u = self.K[0,2] 98 | self.c_v = self.K[1,2] 99 | 100 | def project_upright_depth_to_camera(self, pc): 101 | ''' project point cloud from depth coord to camera coordinate 102 | Input: (N,3) Output: (N,3) 103 | ''' 104 | # Project upright depth to depth coordinate 105 | pc2 = np.dot(np.transpose(self.Rtilt), np.transpose(pc[:,0:3])) # (3,n) 106 | return flip_axis_to_camera(np.transpose(pc2)) 107 | 108 | def project_upright_depth_to_image(self, pc): 109 | ''' Input: (N,3) Output: (N,2) UV and (N,) depth ''' 110 | pc2 = self.project_upright_depth_to_camera(pc) 111 | uv = np.dot(pc2, np.transpose(self.K)) # (n,3) 112 | uv[:,0] /= uv[:,2] 113 | uv[:,1] /= uv[:,2] 114 | return uv[:,0:2], pc2[:,2] 115 | 116 | def project_upright_depth_to_upright_camera(self, pc): 117 | return flip_axis_to_camera(pc) 118 | 119 | def project_upright_camera_to_upright_depth(self, pc): 120 | return flip_axis_to_depth(pc) 121 | 122 | def project_image_to_camera(self, uv_depth): 123 | n = uv_depth.shape[0] 124 | x = ((uv_depth[:,0]-self.c_u)*uv_depth[:,2])/self.f_u 125 | y = ((uv_depth[:,1]-self.c_v)*uv_depth[:,2])/self.f_v 126 | pts_3d_camera = np.zeros((n,3)) 127 | pts_3d_camera[:,0] = x 128 | pts_3d_camera[:,1] = y 129 | pts_3d_camera[:,2] = uv_depth[:,2] 130 | return pts_3d_camera 131 | 132 | def project_image_to_upright_camerea(self, uv_depth): 133 | pts_3d_camera = self.project_image_to_camera(uv_depth) 134 | pts_3d_depth = flip_axis_to_depth(pts_3d_camera) 135 | pts_3d_upright_depth = np.transpose(np.dot(self.Rtilt, np.transpose(pts_3d_depth))) 136 | return self.project_upright_depth_to_upright_camera(pts_3d_upright_depth) 137 | 138 | 139 | 140 | def rotx(t): 141 | """Rotation about the x-axis.""" 142 | c = np.cos(t) 143 | s = np.sin(t) 144 | return np.array([[1, 0, 0], 145 | [0, c, -s], 146 | [0, s, c]]) 147 | 148 | 149 | def roty(t): 150 | """Rotation about the y-axis.""" 151 | c = np.cos(t) 152 | s = np.sin(t) 153 | return np.array([[c, 0, s], 154 | [0, 1, 0], 155 | [-s, 0, c]]) 156 | 157 | 158 | def rotz(t): 159 | """Rotation about the z-axis.""" 160 | c = np.cos(t) 161 | s = np.sin(t) 162 | return np.array([[c, -s, 0], 163 | [s, c, 0], 164 | [0, 0, 1]]) 165 | 166 | 167 | def transform_from_rot_trans(R, t): 168 | """Transforation matrix from rotation matrix and translation vector.""" 169 | R = R.reshape(3, 3) 170 | t = t.reshape(3, 1) 171 | return np.vstack((np.hstack([R, t]), [0, 0, 0, 1])) 172 | 173 | 174 | def inverse_rigid_trans(Tr): 175 | """Inverse a rigid body transform matrix (3x4 as [R|t]) 176 | [R'|-R't; 0|1] 177 | """ 178 | inv_Tr = np.zeros_like(Tr) # 3x4 179 | inv_Tr[0:3,0:3] = np.transpose(Tr[0:3,0:3]) 180 | inv_Tr[0:3,3] = np.dot(-np.transpose(Tr[0:3,0:3]), Tr[0:3,3]) 181 | return inv_Tr 182 | 183 | def read_sunrgbd_label(label_filename): 184 | lines = [line.rstrip() for line in open(label_filename)] 185 | objects = [SUNObject3d(line) for line in lines] 186 | return objects 187 | 188 | def load_image(img_filename): 189 | return cv2.imread(img_filename) 190 | 191 | def load_depth_points(depth_filename): 192 | depth = np.loadtxt(depth_filename) 193 | return depth 194 | 195 | def load_depth_points_mat(depth_filename): 196 | depth = sio.loadmat(depth_filename)['instance'] 197 | return depth 198 | 199 | def random_shift_box2d(box2d, shift_ratio=0.1): 200 | ''' Randomly shift box center, randomly scale width and height 201 | ''' 202 | r = shift_ratio 203 | xmin,ymin,xmax,ymax = box2d 204 | h = ymax-ymin 205 | w = xmax-xmin 206 | cx = (xmin+xmax)/2.0 207 | cy = (ymin+ymax)/2.0 208 | cx2 = cx + w*r*(np.random.random()*2-1) 209 | cy2 = cy + h*r*(np.random.random()*2-1) 210 | h2 = h*(1+np.random.random()*2*r-r) # 0.9 to 1.1 211 | w2 = w*(1+np.random.random()*2*r-r) # 0.9 to 1.1 212 | return np.array([cx2-w2/2.0, cy2-h2/2.0, cx2+w2/2.0, cy2+h2/2.0]) 213 | 214 | def in_hull(p, hull): 215 | from scipy.spatial import Delaunay 216 | if not isinstance(hull,Delaunay): 217 | hull = Delaunay(hull) 218 | return hull.find_simplex(p)>=0 219 | 220 | def extract_pc_in_box3d(pc, box3d): 221 | ''' pc: (N,3), box3d: (8,3) ''' 222 | box3d_roi_inds = in_hull(pc[:,0:3], box3d) 223 | return pc[box3d_roi_inds,:], box3d_roi_inds 224 | 225 | 226 | def my_compute_box_3d(center, size, heading_angle): 227 | R = rotz(-1*heading_angle) 228 | l,w,h = size 229 | x_corners = [-l,l,l,-l,-l,l,l,-l] 230 | y_corners = [w,w,-w,-w,w,w,-w,-w] 231 | z_corners = [h,h,h,h,-h,-h,-h,-h] 232 | corners_3d = np.dot(R, np.vstack([x_corners, y_corners, z_corners])) 233 | corners_3d[0,:] += center[0] 234 | corners_3d[1,:] += center[1] 235 | corners_3d[2,:] += center[2] 236 | return np.transpose(corners_3d) 237 | 238 | 239 | def compute_box_3d(obj, calib): 240 | ''' Takes an object and a projection matrix (P) and projects the 3d 241 | bounding box into the image plane. 242 | Returns: 243 | corners_2d: (8,2) array in image coord. 244 | corners_3d: (8,3) array in in upright depth coord. 245 | ''' 246 | center = obj.centroid 247 | 248 | # compute rotational matrix around yaw axis 249 | R = rotz(-1*obj.heading_angle) 250 | #b,a,c = dimension 251 | #print R, a,b,c 252 | 253 | # 3d bounding box dimensions 254 | l = obj.l # along heading arrow 255 | w = obj.w # perpendicular to heading arrow 256 | h = obj.h 257 | 258 | # rotate and translate 3d bounding box 259 | x_corners = [-l,l,l,-l,-l,l,l,-l] 260 | y_corners = [w,w,-w,-w,w,w,-w,-w] 261 | z_corners = [h,h,h,h,-h,-h,-h,-h] 262 | corners_3d = np.dot(R, np.vstack([x_corners, y_corners, z_corners])) 263 | corners_3d[0,:] += center[0] 264 | corners_3d[1,:] += center[1] 265 | corners_3d[2,:] += center[2] 266 | 267 | # project the 3d bounding box into the image plane 268 | corners_2d,_ = calib.project_upright_depth_to_image(np.transpose(corners_3d)) 269 | #print 'corners_2d: ', corners_2d 270 | return corners_2d, np.transpose(corners_3d) 271 | 272 | def compute_orientation_3d(obj, calib): 273 | ''' Takes an object and a projection matrix (P) and projects the 3d 274 | object orientation vector into the image plane. 275 | Returns: 276 | orientation_2d: (2,2) array in image coord. 277 | orientation_3d: (2,3) array in depth coord. 278 | ''' 279 | 280 | # orientation in object coordinate system 281 | ori = obj.orientation 282 | orientation_3d = np.array([[0, ori[0]],[0, ori[1]],[0,0]]) 283 | center = obj.centroid 284 | orientation_3d[0,:] = orientation_3d[0,:] + center[0] 285 | orientation_3d[1,:] = orientation_3d[1,:] + center[1] 286 | orientation_3d[2,:] = orientation_3d[2,:] + center[2] 287 | 288 | # project orientation into the image plane 289 | orientation_2d,_ = calib.project_upright_depth_to_image(np.transpose(orientation_3d)) 290 | return orientation_2d, np.transpose(orientation_3d) 291 | 292 | def draw_projected_box3d(image, qs, color=(255,255,255), thickness=2): 293 | ''' Draw 3d bounding box in image 294 | qs: (8,2) array of vertices for the 3d box in following order: 295 | 1 -------- 0 296 | /| /| 297 | 2 -------- 3 . 298 | | | | | 299 | . 5 -------- 4 300 | |/ |/ 301 | 6 -------- 7 302 | ''' 303 | qs = qs.astype(np.int32) 304 | for k in range(0,4): 305 | #http://docs.enthought.com/mayavi/mayavi/auto/mlab_helper_functions.html 306 | i,j=k,(k+1)%4 307 | cv2.line(image, (qs[i,0],qs[i,1]), (qs[j,0],qs[j,1]), color, thickness, cv2.CV_AA) # use LINE_AA for opencv3 308 | 309 | i,j=k+4,(k+1)%4 + 4 310 | cv2.line(image, (qs[i,0],qs[i,1]), (qs[j,0],qs[j,1]), color, thickness, cv2.CV_AA) 311 | 312 | i,j=k,k+4 313 | cv2.line(image, (qs[i,0],qs[i,1]), (qs[j,0],qs[j,1]), color, thickness, cv2.CV_AA) 314 | return image 315 | 316 | 317 | import pickle 318 | import gzip 319 | 320 | def save_zipped_pickle(obj, filename, protocol=-1): 321 | with gzip.open(filename, 'wb') as f: 322 | pickle.dump(obj, f, protocol) 323 | 324 | def load_zipped_pickle(filename): 325 | with gzip.open(filename, 'rb') as f: 326 | loaded_object = pickle.load(f) 327 | return loaded_object 328 | -------------------------------------------------------------------------------- /utils/eval_det.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | """ Generic Code for Object Detection Evaluation 7 | 8 | Input: 9 | For each class: 10 | For each image: 11 | Predictions: box, score 12 | Groundtruths: box 13 | 14 | Output: 15 | For each class: 16 | precision-recal and average precision 17 | 18 | Author: Charles R. Qi 19 | 20 | Ref: https://raw.githubusercontent.com/rbgirshick/py-faster-rcnn/master/lib/datasets/voc_eval.py 21 | """ 22 | import numpy as np 23 | 24 | def voc_ap(rec, prec, use_07_metric=False): 25 | """ ap = voc_ap(rec, prec, [use_07_metric]) 26 | Compute VOC AP given precision and recall. 27 | If use_07_metric is true, uses the 28 | VOC 07 11 point method (default:False). 29 | """ 30 | if use_07_metric: 31 | # 11 point metric 32 | ap = 0. 33 | for t in np.arange(0., 1.1, 0.1): 34 | if np.sum(rec >= t) == 0: 35 | p = 0 36 | else: 37 | p = np.max(prec[rec >= t]) 38 | ap = ap + p / 11. 39 | else: 40 | # correct AP calculation 41 | # first append sentinel values at the end 42 | mrec = np.concatenate(([0.], rec, [1.])) 43 | mpre = np.concatenate(([0.], prec, [0.])) 44 | 45 | # compute the precision envelope 46 | for i in range(mpre.size - 1, 0, -1): 47 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) 48 | 49 | # to calculate area under PR curve, look for points 50 | # where X axis (recall) changes value 51 | i = np.where(mrec[1:] != mrec[:-1])[0] 52 | 53 | # and sum (\Delta recall) * prec 54 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) 55 | return ap 56 | 57 | import os 58 | import sys 59 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 60 | from metric_util import calc_iou # axis-aligned 3D box IoU 61 | def get_iou(bb1, bb2): 62 | """ Compute IoU of two bounding boxes. 63 | ** Define your bod IoU function HERE ** 64 | """ 65 | #pass 66 | iou3d = calc_iou(bb1, bb2) 67 | return iou3d 68 | 69 | from box_util import box3d_iou 70 | def get_iou_obb(bb1,bb2): 71 | iou3d, iou2d = box3d_iou(bb1,bb2) 72 | return iou3d 73 | 74 | def get_iou_main(get_iou_func, args): 75 | return get_iou_func(*args) 76 | 77 | def eval_det_cls(pred, gt, ovthresh=0.25, use_07_metric=False, get_iou_func=get_iou): 78 | """ Generic functions to compute precision/recall for object detection 79 | for a single class. 80 | Input: 81 | pred: map of {img_id: [(bbox, score)]} where bbox is numpy array 82 | gt: map of {img_id: [bbox]} 83 | ovthresh: scalar, iou threshold 84 | use_07_metric: bool, if True use VOC07 11 point method 85 | Output: 86 | rec: numpy array of length nd 87 | prec: numpy array of length nd 88 | ap: scalar, average precision 89 | """ 90 | 91 | # construct gt objects 92 | class_recs = {} # {img_id: {'bbox': bbox list, 'det': matched list}} 93 | npos = 0 94 | for img_id in gt.keys(): 95 | bbox = np.array(gt[img_id]) 96 | det = [False] * len(bbox) 97 | npos += len(bbox) 98 | class_recs[img_id] = {'bbox': bbox, 'det': det} 99 | # pad empty list to all other imgids 100 | for img_id in pred.keys(): 101 | if img_id not in gt: 102 | class_recs[img_id] = {'bbox': np.array([]), 'det': []} 103 | 104 | # construct dets 105 | image_ids = [] 106 | confidence = [] 107 | BB = [] 108 | for img_id in pred.keys(): 109 | for box,score in pred[img_id]: 110 | image_ids.append(img_id) 111 | confidence.append(score) 112 | BB.append(box) 113 | confidence = np.array(confidence) 114 | BB = np.array(BB) # (nd,4 or 8,3 or 6) 115 | 116 | # sort by confidence 117 | sorted_ind = np.argsort(-confidence) 118 | sorted_scores = np.sort(-confidence) 119 | BB = BB[sorted_ind, ...] 120 | image_ids = [image_ids[x] for x in sorted_ind] 121 | 122 | # go down dets and mark TPs and FPs 123 | nd = len(image_ids) 124 | tp = np.zeros(nd) 125 | fp = np.zeros(nd) 126 | for d in range(nd): 127 | #if d%100==0: print(d) 128 | R = class_recs[image_ids[d]] 129 | bb = BB[d,...].astype(float) 130 | ovmax = -np.inf 131 | BBGT = R['bbox'].astype(float) 132 | 133 | if BBGT.size > 0: 134 | # compute overlaps 135 | for j in range(BBGT.shape[0]): 136 | iou = get_iou_main(get_iou_func, (bb, BBGT[j,...])) 137 | if iou > ovmax: 138 | ovmax = iou 139 | jmax = j 140 | 141 | #print d, ovmax 142 | if ovmax > ovthresh: 143 | if not R['det'][jmax]: 144 | tp[d] = 1. 145 | R['det'][jmax] = 1 146 | else: 147 | fp[d] = 1. 148 | else: 149 | fp[d] = 1. 150 | 151 | # compute precision recall 152 | fp = np.cumsum(fp) 153 | tp = np.cumsum(tp) 154 | rec = tp / float(npos) 155 | #print('NPOS: ', npos) 156 | # avoid divide by zero in case the first detection matches a difficult 157 | # ground truth 158 | prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) 159 | ap = voc_ap(rec, prec, use_07_metric) 160 | 161 | return rec, prec, ap 162 | 163 | def eval_det_cls_wrapper(arguments): 164 | pred, gt, ovthresh, use_07_metric, get_iou_func = arguments 165 | rec, prec, ap = eval_det_cls(pred, gt, ovthresh, use_07_metric, get_iou_func) 166 | return (rec, prec, ap) 167 | 168 | def eval_det(pred_all, gt_all, ovthresh=0.25, use_07_metric=False, get_iou_func=get_iou): 169 | """ Generic functions to compute precision/recall for object detection 170 | for multiple classes. 171 | Input: 172 | pred_all: map of {img_id: [(classname, bbox, score)]} 173 | gt_all: map of {img_id: [(classname, bbox)]} 174 | ovthresh: scalar, iou threshold 175 | use_07_metric: bool, if true use VOC07 11 point method 176 | Output: 177 | rec: {classname: rec} 178 | prec: {classname: prec_all} 179 | ap: {classname: scalar} 180 | """ 181 | pred = {} # map {classname: pred} 182 | gt = {} # map {classname: gt} 183 | for img_id in pred_all.keys(): 184 | for classname, bbox, score in pred_all[img_id]: 185 | if classname not in pred: pred[classname] = {} 186 | if img_id not in pred[classname]: 187 | pred[classname][img_id] = [] 188 | if classname not in gt: gt[classname] = {} 189 | if img_id not in gt[classname]: 190 | gt[classname][img_id] = [] 191 | pred[classname][img_id].append((bbox,score)) 192 | for img_id in gt_all.keys(): 193 | for classname, bbox in gt_all[img_id]: 194 | if classname not in gt: gt[classname] = {} 195 | if img_id not in gt[classname]: 196 | gt[classname][img_id] = [] 197 | gt[classname][img_id].append(bbox) 198 | 199 | rec = {} 200 | prec = {} 201 | ap = {} 202 | for classname in gt.keys(): 203 | print('Computing AP for class: ', classname) 204 | rec[classname], prec[classname], ap[classname] = eval_det_cls(pred[classname], gt[classname], ovthresh, use_07_metric, get_iou_func) 205 | print(classname, ap[classname]) 206 | 207 | return rec, prec, ap 208 | 209 | from multiprocessing import Pool 210 | def eval_det_multiprocessing(pred_all, gt_all, ovthresh=0.25, use_07_metric=False, get_iou_func=get_iou): 211 | """ Generic functions to compute precision/recall for object detection 212 | for multiple classes. 213 | Input: 214 | pred_all: map of {img_id: [(classname, bbox, score)]} 215 | gt_all: map of {img_id: [(classname, bbox)]} 216 | ovthresh: scalar, iou threshold 217 | use_07_metric: bool, if true use VOC07 11 point method 218 | Output: 219 | rec: {classname: rec} 220 | prec: {classname: prec_all} 221 | ap: {classname: scalar} 222 | """ 223 | pred = {} # map {classname: pred} 224 | gt = {} # map {classname: gt} 225 | for img_id in pred_all.keys(): 226 | for classname, bbox, score in pred_all[img_id]: 227 | if classname not in pred: pred[classname] = {} 228 | if img_id not in pred[classname]: 229 | pred[classname][img_id] = [] 230 | if classname not in gt: gt[classname] = {} 231 | if img_id not in gt[classname]: 232 | gt[classname][img_id] = [] 233 | pred[classname][img_id].append((bbox,score)) 234 | for img_id in gt_all.keys(): 235 | for classname, bbox in gt_all[img_id]: 236 | if classname not in gt: gt[classname] = {} 237 | if img_id not in gt[classname]: 238 | gt[classname][img_id] = [] 239 | gt[classname][img_id].append(bbox) 240 | 241 | rec = {} 242 | prec = {} 243 | ap = {} 244 | p = Pool(processes=10) 245 | ret_values = p.map(eval_det_cls_wrapper, [(pred[classname], gt[classname], ovthresh, use_07_metric, get_iou_func) for classname in gt.keys() if classname in pred]) 246 | p.close() 247 | for i, classname in enumerate(gt.keys()): 248 | if classname in pred: 249 | rec[classname], prec[classname], ap[classname] = ret_values[i] 250 | else: 251 | rec[classname] = 0 252 | prec[classname] = 0 253 | ap[classname] = 0 254 | print(classname, ap[classname]) 255 | 256 | return rec, prec, ap 257 | -------------------------------------------------------------------------------- /utils/metric_util.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | """ Utility functions for metric evaluation. 7 | 8 | Author: Or Litany and Charles R. Qi 9 | """ 10 | 11 | import os 12 | import sys 13 | import torch 14 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 15 | sys.path.append(BASE_DIR) 16 | 17 | import numpy as np 18 | 19 | # Mesh IO 20 | import trimesh 21 | 22 | 23 | # ---------------------------------------- 24 | # Precision and Recall 25 | # ---------------------------------------- 26 | 27 | def multi_scene_precision_recall(labels, pred, iou_thresh, conf_thresh, label_mask, pred_mask=None): 28 | ''' 29 | Args: 30 | labels: (B, N, 6) 31 | pred: (B, M, 6) 32 | iou_thresh: scalar 33 | conf_thresh: scalar 34 | label_mask: (B, N,) with values in 0 or 1 to indicate which GT boxes to consider. 35 | pred_mask: (B, M,) with values in 0 or 1 to indicate which PRED boxes to consider. 36 | Returns: 37 | TP,FP,FN,Precision,Recall 38 | ''' 39 | # Make sure the masks are not Torch tensor, otherwise the mask==1 returns uint8 array instead 40 | # of True/False array as in numpy 41 | assert(not torch.is_tensor(label_mask)) 42 | assert(not torch.is_tensor(pred_mask)) 43 | TP, FP, FN = 0, 0, 0 44 | if label_mask is None: label_mask = np.ones((labels.shape[0], labels.shape[1])) 45 | if pred_mask is None: pred_mask = np.ones((pred.shape[0], pred.shape[1])) 46 | for batch_idx in range(labels.shape[0]): 47 | TP_i, FP_i, FN_i = single_scene_precision_recall(labels[batch_idx, label_mask[batch_idx,:]==1, :], 48 | pred[batch_idx, pred_mask[batch_idx,:]==1, :], 49 | iou_thresh, conf_thresh) 50 | TP += TP_i 51 | FP += FP_i 52 | FN += FN_i 53 | 54 | return TP, FP, FN, precision_recall(TP, FP, FN) 55 | 56 | 57 | def single_scene_precision_recall(labels, pred, iou_thresh, conf_thresh): 58 | """Compute P and R for predicted bounding boxes. Ignores classes! 59 | Args: 60 | labels: (N x bbox) ground-truth bounding boxes (6 dims) 61 | pred: (M x (bbox + conf)) predicted bboxes with confidence and maybe classification 62 | Returns: 63 | TP, FP, FN 64 | """ 65 | 66 | 67 | # for each pred box with high conf (C), compute IoU with all gt boxes. 68 | # TP = number of times IoU > th ; FP = C - TP 69 | # FN - number of scene objects without good match 70 | 71 | gt_bboxes = labels[:, :6] 72 | 73 | num_scene_bboxes = gt_bboxes.shape[0] 74 | conf = pred[:, 6] 75 | 76 | conf_pred_bbox = pred[np.where(conf > conf_thresh)[0], :6] 77 | num_conf_pred_bboxes = conf_pred_bbox.shape[0] 78 | 79 | # init an array to keep iou between generated and scene bboxes 80 | iou_arr = np.zeros([num_conf_pred_bboxes, num_scene_bboxes]) 81 | for g_idx in range(num_conf_pred_bboxes): 82 | for s_idx in range(num_scene_bboxes): 83 | iou_arr[g_idx, s_idx] = calc_iou(conf_pred_bbox[g_idx ,:], gt_bboxes[s_idx, :]) 84 | 85 | 86 | good_match_arr = (iou_arr >= iou_thresh) 87 | 88 | TP = good_match_arr.any(axis=1).sum() 89 | FP = num_conf_pred_bboxes - TP 90 | FN = num_scene_bboxes - good_match_arr.any(axis=0).sum() 91 | 92 | return TP, FP, FN 93 | 94 | 95 | def precision_recall(TP, FP, FN): 96 | Prec = 1.0 * TP / (TP + FP) if TP+FP>0 else 0 97 | Rec = 1.0 * TP / (TP + FN) 98 | return Prec, Rec 99 | 100 | 101 | def calc_iou(box_a, box_b): 102 | """Computes IoU of two axis aligned bboxes. 103 | Args: 104 | box_a, box_b: 6D of center and lengths 105 | Returns: 106 | iou 107 | """ 108 | 109 | max_a = box_a[0:3] + box_a[3:6]/2 110 | max_b = box_b[0:3] + box_b[3:6]/2 111 | min_max = np.array([max_a, max_b]).min(0) 112 | 113 | min_a = box_a[0:3] - box_a[3:6]/2 114 | min_b = box_b[0:3] - box_b[3:6]/2 115 | max_min = np.array([min_a, min_b]).max(0) 116 | if not ((min_max > max_min).all()): 117 | return 0.0 118 | 119 | intersection = (min_max - max_min).prod() 120 | vol_a = box_a[3:6].prod() 121 | vol_b = box_b[3:6].prod() 122 | union = vol_a + vol_b - intersection 123 | return 1.0*intersection / union 124 | 125 | 126 | if __name__ == '__main__': 127 | print('running some tests') 128 | 129 | ############ 130 | ## Test IoU 131 | ############ 132 | box_a = np.array([0,0,0,1,1,1]) 133 | box_b = np.array([0,0,0,2,2,2]) 134 | expected_iou = 1.0/8 135 | pred_iou = calc_iou(box_a, box_b) 136 | assert expected_iou == pred_iou, 'function returned wrong IoU' 137 | 138 | box_a = np.array([0,0,0,1,1,1]) 139 | box_b = np.array([10,10,10,2,2,2]) 140 | expected_iou = 0.0 141 | pred_iou = calc_iou(box_a, box_b) 142 | assert expected_iou == pred_iou, 'function returned wrong IoU' 143 | 144 | print('IoU test -- PASSED') 145 | 146 | ######################### 147 | ## Test Precition Recall 148 | ######################### 149 | gt_boxes = np.array([[0,0,0,1,1,1],[3, 0, 1, 1, 10, 1]]) 150 | detected_boxes = np.array([[0,0,0,1,1,1, 1.0],[3, 0, 1, 1, 10, 1, 0.9]]) 151 | TP, FP, FN = single_scene_precision_recall(gt_boxes, detected_boxes, 0.5, 0.5) 152 | assert TP == 2 and FP == 0 and FN == 0 153 | assert precision_recall(TP, FP, FN) == (1, 1) 154 | 155 | detected_boxes = np.array([[0,0,0,1,1,1, 1.0]]) 156 | TP, FP, FN = single_scene_precision_recall(gt_boxes, detected_boxes, 0.5, 0.5) 157 | assert TP == 1 and FP == 0 and FN == 1 158 | assert precision_recall(TP, FP, FN) == (1, 0.5) 159 | 160 | detected_boxes = np.array([[0,0,0,1,1,1, 1.0], [-1,-1,0,0.1,0.1,1, 1.0]]) 161 | TP, FP, FN = single_scene_precision_recall(gt_boxes, detected_boxes, 0.5, 0.5) 162 | assert TP == 1 and FP == 1 and FN == 1 163 | assert precision_recall(TP, FP, FN) == (0.5, 0.5) 164 | 165 | # wrong box has low confidence 166 | detected_boxes = np.array([[0,0,0,1,1,1, 1.0], [-1,-1,0,0.1,0.1,1, 0.1]]) 167 | TP, FP, FN = single_scene_precision_recall(gt_boxes, detected_boxes, 0.5, 0.5) 168 | assert TP == 1 and FP == 0 and FN == 1 169 | assert precision_recall(TP, FP, FN) == (1, 0.5) 170 | 171 | print('Precition Recall test -- PASSED') 172 | 173 | -------------------------------------------------------------------------------- /utils/nms.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import numpy as np 7 | from pc_util import bbox_corner_dist_measure 8 | 9 | # boxes are axis aigned 2D boxes of shape (n,5) in FLOAT numbers with (x1,y1,x2,y2,score) 10 | ''' Ref: https://www.pyimagesearch.com/2015/02/16/faster-non-maximum-suppression-python/ 11 | Ref: https://github.com/vickyboy47/nms-python/blob/master/nms.py 12 | ''' 13 | def nms_2d(boxes, overlap_threshold): 14 | x1 = boxes[:,0] 15 | y1 = boxes[:,1] 16 | x2 = boxes[:,2] 17 | y2 = boxes[:,3] 18 | score = boxes[:,4] 19 | area = (x2-x1)*(y2-y1) 20 | 21 | I = np.argsort(score) 22 | pick = [] 23 | while (I.size!=0): 24 | last = I.size 25 | i = I[-1] 26 | pick.append(i) 27 | suppress = [last-1] 28 | for pos in range(last-1): 29 | j = I[pos] 30 | xx1 = max(x1[i],x1[j]) 31 | yy1 = max(y1[i],y1[j]) 32 | xx2 = min(x2[i],x2[j]) 33 | yy2 = min(y2[i],y2[j]) 34 | w = xx2-xx1 35 | h = yy2-yy1 36 | if (w>0 and h>0): 37 | o = w*h/area[j] 38 | print('Overlap is', o) 39 | if (o>overlap_threshold): 40 | suppress.append(pos) 41 | I = np.delete(I,suppress) 42 | return pick 43 | 44 | def nms_2d_faster(boxes, overlap_threshold, old_type=False): 45 | x1 = boxes[:,0] 46 | y1 = boxes[:,1] 47 | x2 = boxes[:,2] 48 | y2 = boxes[:,3] 49 | score = boxes[:,4] 50 | area = (x2-x1)*(y2-y1) 51 | 52 | I = np.argsort(score) 53 | pick = [] 54 | while (I.size!=0): 55 | last = I.size 56 | i = I[-1] 57 | pick.append(i) 58 | 59 | xx1 = np.maximum(x1[i], x1[I[:last-1]]) 60 | yy1 = np.maximum(y1[i], y1[I[:last-1]]) 61 | xx2 = np.minimum(x2[i], x2[I[:last-1]]) 62 | yy2 = np.minimum(y2[i], y2[I[:last-1]]) 63 | 64 | w = np.maximum(0, xx2-xx1) 65 | h = np.maximum(0, yy2-yy1) 66 | 67 | if old_type: 68 | o = (w*h)/area[I[:last-1]] 69 | else: 70 | inter = w*h 71 | o = inter / (area[i] + area[I[:last-1]] - inter) 72 | 73 | I = np.delete(I, np.concatenate(([last-1], np.where(o>overlap_threshold)[0]))) 74 | 75 | return pick 76 | 77 | def nms_3d_faster(boxes, overlap_threshold, old_type=False): 78 | x1 = boxes[:,0] 79 | y1 = boxes[:,1] 80 | z1 = boxes[:,2] 81 | x2 = boxes[:,3] 82 | y2 = boxes[:,4] 83 | z2 = boxes[:,5] 84 | score = boxes[:,6] 85 | area = (x2-x1)*(y2-y1)*(z2-z1) 86 | 87 | I = np.argsort(score) 88 | pick = [] 89 | while (I.size!=0): 90 | last = I.size 91 | i = I[-1] 92 | pick.append(i) 93 | 94 | xx1 = np.maximum(x1[i], x1[I[:last-1]]) 95 | yy1 = np.maximum(y1[i], y1[I[:last-1]]) 96 | zz1 = np.maximum(z1[i], z1[I[:last-1]]) 97 | xx2 = np.minimum(x2[i], x2[I[:last-1]]) 98 | yy2 = np.minimum(y2[i], y2[I[:last-1]]) 99 | zz2 = np.minimum(z2[i], z2[I[:last-1]]) 100 | 101 | l = np.maximum(0, xx2-xx1) 102 | w = np.maximum(0, yy2-yy1) 103 | h = np.maximum(0, zz2-zz1) 104 | 105 | if old_type: 106 | o = (l*w*h)/area[I[:last-1]] 107 | else: 108 | inter = l*w*h 109 | o = inter / (area[i] + area[I[:last-1]] - inter) 110 | 111 | I = np.delete(I, np.concatenate(([last-1], np.where(o>overlap_threshold)[0]))) 112 | 113 | return pick 114 | 115 | def nms_3d_faster_samecls(boxes, overlap_threshold, old_type=False): 116 | x1 = boxes[:,0] 117 | y1 = boxes[:,1] 118 | z1 = boxes[:,2] 119 | x2 = boxes[:,3] 120 | y2 = boxes[:,4] 121 | z2 = boxes[:,5] 122 | score = boxes[:,6] 123 | cls = boxes[:,7] 124 | area = (x2-x1)*(y2-y1)*(z2-z1) 125 | 126 | I = np.argsort(score) 127 | pick = [] 128 | while (I.size!=0): 129 | last = I.size 130 | i = I[-1] 131 | pick.append(i) 132 | 133 | xx1 = np.maximum(x1[i], x1[I[:last-1]]) 134 | yy1 = np.maximum(y1[i], y1[I[:last-1]]) 135 | zz1 = np.maximum(z1[i], z1[I[:last-1]]) 136 | xx2 = np.minimum(x2[i], x2[I[:last-1]]) 137 | yy2 = np.minimum(y2[i], y2[I[:last-1]]) 138 | zz2 = np.minimum(z2[i], z2[I[:last-1]]) 139 | cls1 = cls[i] 140 | cls2 = cls[I[:last-1]] 141 | 142 | l = np.maximum(0, xx2-xx1) 143 | w = np.maximum(0, yy2-yy1) 144 | h = np.maximum(0, zz2-zz1) 145 | 146 | if old_type: 147 | o = (l*w*h)/area[I[:last-1]] 148 | else: 149 | inter = l*w*h 150 | o = inter / (area[i] + area[I[:last-1]] - inter) 151 | o = o * (cls1==cls2) 152 | 153 | I = np.delete(I, np.concatenate(([last-1], np.where(o>overlap_threshold)[0]))) 154 | 155 | return pick 156 | 157 | 158 | def nms_crnr_dist(boxes, conf, overlap_threshold): 159 | 160 | I = np.argsort(conf) 161 | pick = [] 162 | while (I.size!=0): 163 | last = I.size 164 | i = I[-1] 165 | pick.append(i) 166 | 167 | scores = [] 168 | for ind in I[:-1]: 169 | scores.append(bbox_corner_dist_measure(boxes[i,:], boxes[ind, :])) 170 | 171 | I = np.delete(I, np.concatenate(([last-1], np.where(np.array(scores)>overlap_threshold)[0]))) 172 | 173 | return pick 174 | 175 | if __name__=='__main__': 176 | a = np.random.random((100,5)) 177 | print(nms_2d(a,0.9)) 178 | print(nms_2d_faster(a,0.9)) 179 | -------------------------------------------------------------------------------- /utils/nn_distance.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | """ Chamfer distance in Pytorch. 7 | Author: Charles R. Qi 8 | """ 9 | 10 | import torch 11 | import torch.nn as nn 12 | import numpy as np 13 | 14 | 15 | def huber_loss(error, delta=1.0): 16 | """ 17 | Args: 18 | error: Torch tensor (d1,d2,...,dk) 19 | Returns: 20 | loss: Torch tensor (d1,d2,...,dk) 21 | 22 | x = error = pred - gt or dist(pred,gt) 23 | 0.5 * |x|^2 if |x|<=d 24 | 0.5 * d^2 + d * (|x|-d) if |x|>d 25 | Ref: https://github.com/charlesq34/frustum-pointnets/blob/master/models/model_util.py 26 | """ 27 | abs_error = torch.abs(error) 28 | #quadratic = torch.min(abs_error, torch.FloatTensor([delta])) 29 | quadratic = torch.clamp(abs_error, max=delta) 30 | linear = (abs_error - quadratic) 31 | loss = 0.5 * quadratic**2 + delta * linear 32 | return loss 33 | 34 | def nn_distance(pc1, pc2, l1smooth=False, delta=1.0, l1=False): 35 | """ 36 | Input: 37 | pc1: (B,N,C) torch tensor 38 | pc2: (B,M,C) torch tensor 39 | l1smooth: bool, whether to use l1smooth loss 40 | delta: scalar, the delta used in l1smooth loss 41 | Output: 42 | dist1: (B,N) torch float32 tensor 43 | idx1: (B,N) torch int64 tensor 44 | dist2: (B,M) torch float32 tensor 45 | idx2: (B,M) torch int64 tensor 46 | """ 47 | N = pc1.shape[1] 48 | M = pc2.shape[1] 49 | pc1_expand_tile = pc1.unsqueeze(2).repeat(1,1,M,1) 50 | pc2_expand_tile = pc2.unsqueeze(1).repeat(1,N,1,1) 51 | pc_diff = pc1_expand_tile - pc2_expand_tile 52 | 53 | if l1smooth: 54 | pc_dist = torch.sum(huber_loss(pc_diff, delta), dim=-1) # (B,N,M) 55 | elif l1: 56 | pc_dist = torch.sum(torch.abs(pc_diff), dim=-1) # (B,N,M) 57 | else: 58 | pc_dist = torch.sum(pc_diff**2, dim=-1) # (B,N,M) 59 | dist1, idx1 = torch.min(pc_dist, dim=2) # (B,N) 60 | dist2, idx2 = torch.min(pc_dist, dim=1) # (B,M) 61 | return dist1, idx1, dist2, idx2 62 | 63 | def demo_nn_distance(): 64 | np.random.seed(0) 65 | pc1arr = np.random.random((1,5,3)) 66 | pc2arr = np.random.random((1,6,3)) 67 | pc1 = torch.from_numpy(pc1arr.astype(np.float32)) 68 | pc2 = torch.from_numpy(pc2arr.astype(np.float32)) 69 | dist1, idx1, dist2, idx2 = nn_distance(pc1, pc2) 70 | print(dist1) 71 | print(idx1) 72 | dist = np.zeros((5,6)) 73 | for i in range(5): 74 | for j in range(6): 75 | dist[i,j] = np.sum((pc1arr[0,i,:] - pc2arr[0,j,:]) ** 2) 76 | print(dist) 77 | print('-'*30) 78 | print('L1smooth dists:') 79 | dist1, idx1, dist2, idx2 = nn_distance(pc1, pc2, True) 80 | print(dist1) 81 | print(idx1) 82 | dist = np.zeros((5,6)) 83 | for i in range(5): 84 | for j in range(6): 85 | error = np.abs(pc1arr[0,i,:] - pc2arr[0,j,:]) 86 | quad = np.minimum(error, 1.0) 87 | linear = error - quad 88 | loss = 0.5*quad**2 + 1.0*linear 89 | dist[i,j] = np.sum(loss) 90 | print(dist) 91 | 92 | 93 | if __name__ == '__main__': 94 | demo_nn_distance() 95 | -------------------------------------------------------------------------------- /utils/show_results_scannet.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 4 | ROOT_DIR = BASE_DIR 5 | sys.path.append(os.path.join(ROOT_DIR, 'scannet')) 6 | import datetime 7 | import numpy as np 8 | import pdb 9 | import matplotlib.pyplot as pyplot 10 | import open3d as o3d 11 | from scipy.spatial.distance import directed_hausdorff 12 | import json 13 | import pickle 14 | import random 15 | import scipy.io as sio 16 | 17 | 18 | THRESH = 0 19 | THRESH2 = -0.1 20 | VAL_SCAN_NAMES = [line.rstrip() for line in open('scannet/meta_data/scannetv2_val.txt')] 21 | SCANNET_DIR = '/home/bo/data/scannet/scans/' # path of scannet dataset 22 | LABEL_MAP_FILE = 'scannet/meta_data/scannetv2-labels.combined.tsv' 23 | DONOTCARE_CLASS_IDS = np.array([]) 24 | OBJ_CLASS_IDS = np.array([3,4,5,6,7,8,9,10,11,12,14,16,24,28,33,34,36,39]) 25 | MAX_NUM_POINT = 40000 26 | GT_PATH = '/home/bo/data/scannet/scannet_train_detection_data' # path of data dumped with scripts in scannet folder 27 | PRED_PATH = '/home/bo/data/scannet/dump/supp/result' # path of predictions 28 | mode = sys.argv[1] # gt or pred 29 | color_mapping = {3: [255,140,0], 4:[30,144,255], 5:[50,205,50], 6:[255,215,0], 7:[255,69,0], 8:[138,43,226],9:[0,255,255],10:[210,105,30],11:[255,0,255], 12:[255,255,0], 14:[255,20,147], 16:[165,42,42], 24:[100,149,237], 28:[0,128,0], 33:[255,127,80],34:[221,160,221], 36:[95,158,160], 39:[119,136,153]} 30 | 31 | def create_lineset(bbox, colors=[1, 0, 0]): 32 | ''' create bounding box 33 | ''' 34 | xmin = bbox[0] - bbox[3] / 2 35 | xmax = bbox[0] + bbox[3] / 2 36 | ymin = bbox[1] - bbox[4] / 2 37 | ymax = bbox[1] + bbox[4] / 2 38 | zmin = bbox[2] - bbox[5] / 2 39 | zmax = bbox[2] + bbox[5] / 2 40 | points = [[xmin, ymin, zmin], [xmin, ymin, zmax], [xmin, ymax, zmin], [xmin, ymax, zmax], 41 | [xmax, ymin, zmin], [xmax, ymin, zmax], [xmax, ymax, zmin], [xmax, ymax, zmax]] 42 | lines = [[0, 1], [0, 2], [2, 3], [1, 3], [0, 4], [1, 5], [3, 7], [2, 6], 43 | [4, 5], [5, 7], [6, 7], [4, 6]] 44 | line_set = o3d.geometry.LineSet() 45 | line_set.points = o3d.utility.Vector3dVector(points) 46 | line_set.lines = o3d.utility.Vector2iVector(lines) 47 | line_set.colors = o3d.utility.Vector3dVector(np.tile(colors, [12, 1])) 48 | return line_set 49 | 50 | def load_view_point(pcd, filename, window_name): 51 | if mode=='pred': 52 | left = 50 53 | top=50 54 | elif mode=='gt': 55 | left = 1000 56 | top=50 57 | else: 58 | print("model must be gt or pred") 59 | return 60 | vis = o3d.visualization.Visualizer() 61 | vis.create_window(window_name, width=880, height=680, left=left, top=top) 62 | for part in pcd: 63 | vis.add_geometry(part) 64 | ctr = vis.get_view_control() 65 | current_param = ctr.convert_to_pinhole_camera_parameters() 66 | trajectory = o3d.io.read_pinhole_camera_trajectory(filename) 67 | f = 983.80485869912241 68 | cx = current_param.intrinsic.width / 2 - 0.5 69 | cy = current_param.intrinsic.height / 2 - 0.5 70 | trajectory.parameters[0].intrinsic.set_intrinsics(current_param.intrinsic.width, current_param.intrinsic.height, f, f, cx, cy) 71 | 72 | ctr.convert_from_pinhole_camera_parameters(trajectory.parameters[0]) 73 | vis.run() 74 | vis.destroy_window() 75 | 76 | def select_bbox(bboxes): 77 | choose_ids = [] 78 | for i in range(bboxes.shape[0]): 79 | if bboxes[i,-1] in OBJ_CLASS_IDS: 80 | choose_ids.append(i) 81 | bboxes = bboxes[choose_ids] 82 | return bboxes 83 | 84 | def export_one_scan(scan_name): 85 | pt = np.load(os.path.join(GT_PATH, scan_name+'_vert.npy')) 86 | np.savetxt('tmp.xyz', pt) 87 | os.system("mv tmp.xyz tmp.xyzrgb") 88 | pcd = o3d.io.read_point_cloud('tmp.xyzrgb') 89 | 90 | gt_bbox = np.load(os.path.join(GT_PATH, scan_name+'_all_angle_40cls.npy')) 91 | gt_bbox = select_bbox(np.unique(gt_bbox,axis=0)) 92 | semantic_labels = gt_bbox[:,-1] 93 | pred_proposals = np.load(os.path.join(PRED_PATH, 'opt'+scan_name+'_nms.npy')) 94 | 95 | mask = np.logical_not(np.in1d(semantic_labels, DONOTCARE_CLASS_IDS)) 96 | semantic_labels = semantic_labels[mask] 97 | 98 | bb =[] 99 | if mode=='gt': 100 | boundingboxes = gt_bbox 101 | elif mode =='pred': 102 | boundingboxes = pred_proposals 103 | else: 104 | print("model must be gt or pred") 105 | return 106 | 107 | for i in range(boundingboxes.shape[0]): 108 | if mode =='gt': 109 | c = np.array(color_mapping[int(boundingboxes[i,-1])])/255.0 110 | else: 111 | c = np.array(color_mapping[int(OBJ_CLASS_IDS[int(boundingboxes[i,-1])-1])])/255.0 112 | for _ in range(2): 113 | bb.append(create_lineset(boundingboxes[i]+0.005*(np.random.rand()-0.5)*2, colors=c)) 114 | load_view_point([pcd] + bb, './viewpoint.json', window_name=scan_name+'_'+mode) 115 | 116 | 117 | def batch_export(): 118 | for i, scan_name in enumerate(sorted(VAL_SCAN_NAMES)): 119 | #if not scan_name.endswith('_00'): 120 | # continue 121 | print('-'*20+'begin') 122 | print(datetime.datetime.now()) 123 | print(scan_name) 124 | export_one_scan(scan_name) 125 | print('-'*20+'done') 126 | 127 | if __name__=='__main__': 128 | batch_export() 129 | -------------------------------------------------------------------------------- /utils/show_results_sunrgbd.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | """ Batch mode in loading Scannet scenes with vertices and ground truth labels 7 | for semantic and instance segmentations 8 | 9 | Usage example: python ./batch_load_scannet_data.py 10 | """ 11 | import os 12 | import sys 13 | import datetime 14 | import numpy as np 15 | import pdb 16 | import matplotlib.pyplot as pyplot 17 | import open3d as o3d 18 | from scipy.spatial.distance import directed_hausdorff 19 | import json 20 | import pickle 21 | import random 22 | import scipy.io as sio 23 | from pc_util import params2bbox, write_ply_rgb 24 | 25 | THRESH = 0 26 | THRESH2 = -0.1 27 | DATA_DIR = os.path.join('/home/bo/data/sunrgbd/sunrgbd_pc_bbox_votes_50k_v1_val') # path of sunrgbd dataset 28 | VAL_SCAN_NAMES = sorted(list(set([os.path.basename(x)[0:6] for x in os.listdir(DATA_DIR)]))) 29 | PRED_PATH= '/home/bo/projects/cvpr2020/detection/new/new/sunrgbd/code_sunrgbd/indoor_scene_understanding/dump_sunrgbd/result' # path of predictions 30 | 31 | DONOTCARE_CLASS_IDS = np.array([]) 32 | MAX_NUM_POINT = 40000 33 | mode = sys.argv[1] 34 | 35 | color_mapping = {1:[30,144,255], 2:[255,69,0], 3:[255,215,0], 4:[50,205,50], 5:[255,127,80], 36 | 6:[255,20,147], 7:[100,149,237], 8:[255,127,80],9:[210,105,30], 10:[221,160,221],11:[95,158, 160]} 37 | 38 | def create_lineset_old(bbox, colors=[1, 0, 0]): 39 | ''' create bounding box 40 | ''' 41 | xmin = bbox[0] - bbox[3] / 2 42 | xmax = bbox[0] + bbox[3] / 2 43 | ymin = bbox[1] - bbox[4] / 2 44 | ymax = bbox[1] + bbox[4] / 2 45 | zmin = bbox[2] - bbox[5] / 2 46 | zmax = bbox[2] + bbox[5] / 2 47 | points = [[xmin, ymin, zmin], [xmin, ymin, zmax], [xmin, ymax, zmin], [xmin, ymax, zmax], 48 | [xmax, ymin, zmin], [xmax, ymin, zmax], [xmax, ymax, zmin], [xmax, ymax, zmax]] 49 | lines = [[0, 1], [0, 2], [2, 3], [1, 3], [0, 4], [1, 5], [3, 7], [2, 6], 50 | [4, 5], [5, 7], [6, 7], [4, 6]] 51 | line_set = o3d.geometry.LineSet() 52 | line_set.points = o3d.utility.Vector3dVector(points) 53 | line_set.lines = o3d.utility.Vector2iVector(lines) 54 | line_set.colors = o3d.utility.Vector3dVector(np.tile(colors, [12, 1])) 55 | return line_set 56 | 57 | 58 | def create_lineset(bbox, colors=[1, 0, 0]): 59 | ''' create bounding box 60 | ''' 61 | points = params2bbox(bbox) 62 | lines = [[0, 1], [0, 2], [2, 3], [1, 3], [0, 4], [1, 5], [3, 7], [2, 6], 63 | [4, 5], [5, 7], [6, 7], [4, 6]] 64 | line_set = o3d.geometry.LineSet() 65 | line_set.points = o3d.utility.Vector3dVector(points) 66 | line_set.lines = o3d.utility.Vector2iVector(lines) 67 | line_set.colors = o3d.utility.Vector3dVector(np.tile(colors, [12, 1])) 68 | return line_set 69 | 70 | 71 | def load_view_point(pcd, filename, window_name): 72 | if mode=='pred': 73 | left = 50 74 | top=50 75 | elif mode=='gt': 76 | left = 1000 77 | top=730 78 | else: 79 | print("model must be gt or pred") 80 | return 81 | 82 | vis = o3d.visualization.Visualizer() 83 | vis.create_window(window_name, width=880, height=680, left=left, top=top) 84 | for part in pcd: 85 | vis.add_geometry(part) 86 | ctr = vis.get_view_control() 87 | current_param = ctr.convert_to_pinhole_camera_parameters() 88 | trajectory = o3d.io.read_pinhole_camera_trajectory(filename) 89 | f = 983.80485869912241 90 | cx = current_param.intrinsic.width / 2 - 0.5 91 | cy = current_param.intrinsic.height / 2 - 0.5 92 | trajectory.parameters[0].intrinsic.set_intrinsics(current_param.intrinsic.width, current_param.intrinsic.height, f, f, cx, cy) 93 | 94 | ctr.convert_from_pinhole_camera_parameters(trajectory.parameters[0]) 95 | vis.run() 96 | vis.destroy_window() 97 | 98 | def select_bbox(bboxes): 99 | choose_ids = [] 100 | for i in range(bboxes.shape[0]): 101 | if bboxes[i,-1] in OBJ_CLASS_IDS: 102 | choose_ids.append(i) 103 | bboxes = bboxes[choose_ids] 104 | return bboxes 105 | 106 | def export_one_scan(scan_name): 107 | pt = np.load(os.path.join(DATA_DIR, scan_name+'_pc.npz'))['pc'] 108 | np.savetxt(mode+'tmp.xyz', pt) 109 | os.system("mv {}tmp.xyz {}tmp.xyzrgb".format(mode, mode)) 110 | point_cloud = o3d.io.read_point_cloud(mode+'tmp.xyzrgb') 111 | 112 | pred_proposals = np.load(os.path.join(PRED_PATH, 'center'+scan_name+'_nms.npy')) 113 | gt_bbox = sio.loadmat(os.path.join(PRED_PATH, 'center'+scan_name+'_gt.mat'))['gt'] 114 | bb =[] 115 | if mode=='gt': 116 | boundingboxes = gt_bbox 117 | elif mode =='pred': 118 | boundingboxes = pred_proposals 119 | else: 120 | print("model must be gt or pred") 121 | return 122 | for i in range(boundingboxes.shape[0]): 123 | c = np.array(color_mapping[int(boundingboxes[i,-1])])/255.0 124 | for _ in range(2): 125 | bb.append(create_lineset(boundingboxes[i]+0.005*(np.random.rand()-0.5)*2, colors=c)) 126 | load_view_point([point_cloud] + bb, './viewpoint.json', window_name=scan_name+'_'+mode) 127 | 128 | 129 | def batch_export(): 130 | for i, scan_name in enumerate(VAL_SCAN_NAMES): 131 | if not scan_name.endswith('10'): 132 | continue 133 | print('-'*20+'begin') 134 | print(scan_name) 135 | export_one_scan(scan_name) 136 | print('-'*20+'done') 137 | 138 | if __name__=='__main__': 139 | batch_export() 140 | -------------------------------------------------------------------------------- /utils/tf_logger.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import tensorflow as tf 7 | import numpy as np 8 | import scipy.misc 9 | try: 10 | from StringIO import StringIO # Python 2.7 11 | except ImportError: 12 | from io import BytesIO # Python 3.x 13 | 14 | 15 | class Logger(object): 16 | 17 | def __init__(self, log_dir): 18 | """Create a summary writer logging to log_dir.""" 19 | self.writer = tf.summary.FileWriter(log_dir) 20 | 21 | def scalar_summary(self, tag, value, step): 22 | """Log a scalar variable.""" 23 | summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)]) 24 | self.writer.add_summary(summary, step) 25 | 26 | def image_summary(self, tag, images, step): 27 | """Log a list of images.""" 28 | 29 | img_summaries = [] 30 | for i, img in enumerate(images): 31 | # Write the image to a string 32 | try: 33 | s = StringIO() 34 | except: 35 | s = BytesIO() 36 | scipy.misc.toimage(img).save(s, format="png") 37 | 38 | # Create an Image object 39 | img_sum = tf.Summary.Image(encoded_image_string=s.getvalue(), 40 | height=img.shape[0], 41 | width=img.shape[1]) 42 | # Create a Summary value 43 | img_summaries.append(tf.Summary.Value(tag='%s/%d' % (tag, i), image=img_sum)) 44 | 45 | # Create and write Summary 46 | summary = tf.Summary(value=img_summaries) 47 | self.writer.add_summary(summary, step) 48 | 49 | def histo_summary(self, tag, values, step, bins=1000): 50 | """Log a histogram of the tensor of values.""" 51 | 52 | # Create a histogram using numpy 53 | counts, bin_edges = np.histogram(values, bins=bins) 54 | 55 | # Fill the fields of the histogram proto 56 | hist = tf.HistogramProto() 57 | hist.min = float(np.min(values)) 58 | hist.max = float(np.max(values)) 59 | hist.num = int(np.prod(values.shape)) 60 | hist.sum = float(np.sum(values)) 61 | hist.sum_squares = float(np.sum(values**2)) 62 | 63 | # Drop the start of the first bin 64 | bin_edges = bin_edges[1:] 65 | 66 | # Add bin edges and counts 67 | for edge in bin_edges: 68 | hist.bucket_limit.append(edge) 69 | for c in counts: 70 | hist.bucket.append(c) 71 | 72 | # Create and write Summary 73 | summary = tf.Summary(value=[tf.Summary.Value(tag=tag, histo=hist)]) 74 | self.writer.add_summary(summary, step) 75 | self.writer.flush() 76 | -------------------------------------------------------------------------------- /utils/tf_visualizer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | '''Code adapted from https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix''' 7 | import os 8 | import time 9 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 10 | import sys 11 | sys.path.append(BASE_DIR) 12 | import tf_logger 13 | 14 | 15 | class Visualizer(): 16 | def __init__(self, opt, name='train'): 17 | # self.opt = opt 18 | #self.logger = tf_logger.Logger(os.path.join(opt.logging_dir, opt.name)) 19 | #self.log_name = os.path.join(opt.checkpoint_dir, opt.name, 'loss_log.txt') 20 | self.logger = tf_logger.Logger(os.path.join(opt.log_dir, name)) 21 | self.log_name = os.path.join(opt.log_dir, 'tf_visualizer_log.txt') 22 | with open(self.log_name, "a") as log_file: 23 | now = time.strftime("%c") 24 | log_file.write('================ Training Loss (%s) ================\n' % now) 25 | 26 | # |visuals|: dictionary of images to save 27 | def log_images(self, visuals, step): 28 | for label, image_numpy in visuals.items(): 29 | self.logger.image_summary( 30 | label, [image_numpy], step) 31 | 32 | # scalars: dictionary of scalar labels and values 33 | def log_scalars(self, scalars, step): 34 | for label, val in scalars.items(): 35 | self.logger.scalar_summary(label, val, step) 36 | 37 | # scatter plots 38 | def plot_current_points(self, points, disp_offset=10): 39 | pass 40 | 41 | # scalars: same format as |scalars| of plot_current_scalars 42 | def print_current_scalars(self, epoch, i, scalars): 43 | message = '(epoch: %d, iters: %d) ' % (epoch, i) 44 | for k, v in scalars.items(): 45 | message += '%s: %.3f ' % (k, v) 46 | 47 | print(message) 48 | with open(self.log_name, "a") as log_file: 49 | log_file.write('%s\n' % message) 50 | -------------------------------------------------------------------------------- /utils/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | def conv3x3x3(in_planes, out_planes, stride): 6 | # 3x3x3 convolution with padding 7 | return nn.Conv3d( 8 | in_planes, 9 | out_planes, 10 | kernel_size=3, 11 | stride=stride, 12 | padding=1) 13 | def upconv3x3x3(in_planes, out_planes, stride): 14 | return nn.ConvTranspose3d( 15 | in_planes, 16 | out_planes, 17 | kernel_size=3, 18 | stride=1, 19 | padding=1, 20 | output_padding=1) 21 | 22 | def conv_block_3d(in_dim, out_dim, activation): 23 | return nn.Sequential( 24 | nn.Conv3d(in_dim, out_dim, kernel_size=3, stride=1, padding=1), 25 | nn.BatchNorm3d(out_dim), 26 | activation,) 27 | 28 | 29 | def conv_trans_block_3d(in_dim, out_dim, activation, stride=2): 30 | return nn.Sequential( 31 | nn.ConvTranspose3d(in_dim, out_dim, kernel_size=3, stride=stride, padding=1, output_padding=1), 32 | nn.BatchNorm3d(out_dim), 33 | activation,) 34 | 35 | 36 | def max_pooling_3d(): 37 | return nn.MaxPool3d(kernel_size=2, stride=2, padding=0) 38 | 39 | 40 | def conv_block_2_3d(in_dim, out_dim, activation, stride=1): 41 | return nn.Sequential( 42 | conv_block_3d(in_dim, out_dim, activation), 43 | nn.Conv3d(out_dim, out_dim, kernel_size=3, stride=stride, padding=1), 44 | nn.BatchNorm3d(out_dim),) 45 | 46 | -------------------------------------------------------------------------------- /utils/viewpoint.json: -------------------------------------------------------------------------------- 1 | { 2 | "class_name" : "PinholeCameraTrajectory", 3 | "parameters" : 4 | [ 5 | { 6 | "class_name" : "PinholeCameraParameters", 7 | "extrinsic" : 8 | [ 9 | 0.99916142714838663, 10 | -0.007048749653398266, 11 | -0.040333083531057058, 12 | 0.0, 13 | 0.020877457243770447, 14 | -0.75968410011193177, 15 | 0.64995707536433445, 16 | 0.0, 17 | -0.035221786976728557, 18 | -0.65025409123314892, 19 | -0.75889988968026456, 20 | 0.0, 21 | 0.27650272158383526, 22 | 0.43341214902144198, 23 | 12.630418838778768, 24 | 1.0 25 | ], 26 | "intrinsic" : 27 | { 28 | "height" : 1136, 29 | "intrinsic_matrix" : 30 | [ 31 | 983.80485869912241, 32 | 0.0, 33 | 0.0, 34 | 0.0, 35 | 983.80485869912241, 36 | 0.0, 37 | 959.5, 38 | 567.5, 39 | 1.0 40 | ], 41 | "width" : 1920 42 | }, 43 | "version_major" : 1, 44 | "version_minor" : 0 45 | } 46 | ], 47 | "version_major" : 1, 48 | "version_minor" : 0 49 | } --------------------------------------------------------------------------------