├── LICENSE ├── README.md ├── demo.py ├── doc ├── NIPS_new.jpeg ├── NIPS_new.jpg ├── NIPS_new.pdf ├── teaser.jpg └── tips.md ├── eval.py ├── models ├── ap_helper.py ├── backbone_module.py ├── backbone_module_SA2_denseaspp3_6.py ├── backbone_module_SA2_denseaspp3_6_12.py ├── backbone_module_enc_FP2_K8_G12_C3.py ├── boxnet.py ├── dump_helper.py ├── enc_layer.py ├── loss_helper.py ├── loss_helper_boxnet.py ├── proposal_module.py ├── votenet.py ├── votenet_SA2_denseaspp3_6.py ├── votenet_SA2_denseaspp3_6_12.py ├── votenet_enc_FP2_K8_G12_C3.py ├── votenet_enc_complex_FP2_K8_G12_C3.py └── voting_module.py ├── pointnet2 ├── _ext_src │ ├── include │ │ ├── ball_query.h │ │ ├── cuda_utils.h │ │ ├── group_points.h │ │ ├── interpolate.h │ │ ├── sampling.h │ │ └── utils.h │ └── src │ │ ├── ball_query.cpp │ │ ├── ball_query_gpu.cu │ │ ├── bindings.cpp │ │ ├── group_points.cpp │ │ ├── group_points_gpu.cu │ │ ├── interpolate.cpp │ │ ├── interpolate_gpu.cu │ │ ├── sampling.cpp │ │ └── sampling_gpu.cu ├── pointnet2_modules.py ├── pointnet2_test.py ├── pointnet2_utils.py ├── pytorch_utils.py └── setup.py ├── scannet ├── README.md ├── batch_load_scannet_data.py ├── data_viz.py ├── load_scannet_data.py ├── meta_data │ ├── scannet_means.npz │ ├── scannet_train.txt │ ├── scannetv2-labels.combined.tsv │ ├── scannetv2_test.txt │ ├── scannetv2_train.txt │ └── scannetv2_val.txt ├── model_util_scannet.py ├── scannet_detection_dataset.py └── scannet_utils.py ├── sunrgbd ├── README.md ├── matlab │ ├── extract_rgbd_data_v1.m │ ├── extract_rgbd_data_v2.m │ └── extract_split.m ├── model_util_sunrgbd.py ├── sunrgbd_data.py ├── sunrgbd_detection_dataset.py └── sunrgbd_utils.py ├── train.bash ├── train.py └── utils ├── box_util.py ├── eval_det.py ├── metric_util.py ├── nms.py ├── nn_distance.py ├── pc_util.py ├── tf_logger.py └── tf_visualizer.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Facebook, Inc. and its affiliates. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Point Detectron 2 | Created by Xu Liu, from JD AI Research and The University of Tokyo. 3 | 4 | ![teaser](https://github.com/AsahiLiu/PointDetectron/blob/main/doc/NIPS_new.jpeg) 5 | 6 | ## Introduction 7 | This repository is code release for our NeurIPS 2020 paper Group Contextual Encoding for 3D Poit Clouds (Online Paper [here](https://papers.nips.cc/paper/2020/hash/9b72e31dac81715466cd580a448cf823-Abstract.html)) and 3DV 2020 paper Dense Point Diffusion for 3D Detection (arXiv report [here](https://arxiv.org/pdf/)) 8 | 9 | This repository is built on the VoteNet, we empower VoteNet model with Group Contextual Encoding Block, Dense Point Diffusion modules as well as the Dilated Point Convolution. 10 | ## Citation 11 | @article{liu2020group, 12 | title={Group Contextual Encoding for 3D Point Clouds}, 13 | author={Liu, Xu and Li, Chengtao and Wang, Jian and Wang, Jingbo and Shi, Boxin and He, Xiaodong}, 14 | journal={Advances in Neural Information Processing Systems}, 15 | volume={33}, 16 | year={2020} 17 | } 18 | 19 | ## Installation 20 | 21 | Install [Pytorch](https://pytorch.org/get-started/locally/) and [Tensorflow](https://github.com/tensorflow/tensorflow) (for TensorBoard). It is required that you have access to GPUs. Matlab is required to prepare data for SUN RGB-D. The code is tested with Ubuntu 18.04, Pytorch v1.1, TensorFlow v1.14, CUDA 10.0 and cuDNN v7.4. Note: there is some incompatibility with newer version of Pytorch (e.g. v1.3), which is to be fixed. 22 | 23 | Compile the CUDA layers for [PointNet++](http://arxiv.org/abs/1706.02413), which we used in the backbone network: 24 | 25 | cd pointnet2 26 | python setup.py install 27 | 28 | To see if the compilation is successful, try to run `python models/votenet.py` to see if a forward pass works. 29 | 30 | Install the following Python dependencies (with `pip install`): 31 | 32 | matplotlib 33 | opencv-python 34 | torch-encoding 35 | plyfile 36 | 'trimesh>=2.35.39,<2.35.40' 37 | 38 | ## Run demo 39 | 40 | Following VoteNet, you can run the demo with the pretrained models under the project root path (`/path/to/project/demo_files`) and then run: 41 | 42 | python demo.py 43 | 44 | The demo uses a pre-trained model (on SUN RGB-D) to detect objects in a point cloud from an indoor room of a table and a few chairs (from SUN RGB-D val set). You can use 3D visualization software such as the [MeshLab](http://www.meshlab.net/) to open the dumped file under `demo_files/sunrgbd_results` to see the 3D detection output. Specifically, open `***_pc.ply` and `***_pred_confident_nms_bbox.ply` to see the input point cloud and predicted 3D bounding boxes. 45 | 46 | You can also run the following command to use another pretrained model on a ScanNet: 47 | 48 | python demo.py --dataset scannet --num_point 40000 49 | 50 | Detection results will be dumped to `demo_files/scannet_results`. 51 | 52 | ## Training and evaluating 53 | 54 | ### Data preparation 55 | Please follow the instructions of VoteNet to prepare for the datasets. 56 | 57 | For SUN RGB-D, follow the [README](https://github.com/facebookresearch/votenet/blob/master/sunrgbd/README.md) under the `sunrgbd` folder. 58 | 59 | For ScanNet, follow the [README](https://github.com/facebookresearch/votenet/blob/master/scannet/README.md) under the `scannet` folder. 60 | 61 | ### Train and test on SUN RGB-D 62 | 63 | To train a new model ${MODEL_CONFIG} in the MODEL ZOO on SUN RGB-D data (depth images): 64 | 65 | CUDA_VISIBLE_DEVICES=0 python train.py --dataset sunrgbd --log_dir log_sunrgbd --model ${MODEL_CONFIG} 66 | 67 | You can use `CUDA_VISIBLE_DEVICES=0,1,2` to specify which GPU(s) to use. Without specifying CUDA devices, the training will use all the available GPUs and train with data parallel (Note that due to I/O load, training speedup is not linear to the nubmer of GPUs used). 68 | While training you can check the `log_sunrgbd/log_train.txt` file on its progress, or use the TensorBoard to see loss curves. 69 | 70 | To test the trained model with its checkpoint: 71 | 72 | python eval.py --dataset sunrgbd --checkpoint_path log_sunrgbd/checkpoint.tar --dump_dir eval_sunrgbd --cluster_sampling seed_fps --use_3d_nms --use_cls_nms --per_class_proposal --model ${MODEL_CONFIG} 73 | 74 | Example results will be dumped in the `eval_sunrgbd` folder (or any other folder you specify). You can run `python eval.py -h` to see the full options for evaluation. After the evaluation, you can use MeshLab to visualize the predicted votes and 3D bounding boxes (select wireframe mode to view the boxes). 75 | Final evaluation results will be printed on screen and also written in the `log_eval.txt` file under the dump directory. In default we evaluate with both AP@0.25 and AP@0.5 with 3D IoU on oriented boxes. 76 | 77 | ### Train and test on ScanNet 78 | 79 | To train a model ${MODEL_CONFIG} in the MODEL ZOO on Scannet data (fused scan): 80 | 81 | CUDA_VISIBLE_DEVICES=0 python train.py --dataset scannet --log_dir log_scannet --num_point 40000 --model ${MODEL_CONFIG} 82 | 83 | To test the trained model with its checkpoint: 84 | 85 | python eval.py --dataset scannet --checkpoint_path log_scannet/checkpoint.tar --dump_dir eval_scannet --num_point 40000 --cluster_sampling seed_fps --use_3d_nms --use_cls_nms --per_class_proposal --model ${MODEL_CONFIG} 86 | 87 | Example results will be dumped in the `eval_scannet` folder (or any other folder you specify). 88 | 89 | ### MODEL ZOO 90 | 91 | | MODEL SPECS | $ {MODEL_CONFIG} | SUN-RGBD | ScanNet | 92 | |---------------------------------------------|----------:|----------:|:-------:| 93 | | [Group Contextual Ecoding (K=8, G=12, C×3)](models/votenet_enc_FP2_K8_G12_C3.py)|votenet_enc_FP2_K8_G12_C3 | 60.7 | 60.8 | 94 | | [SA2 - Dense Point Diffusion (3,6,12)](models/votenet_SA2_denseaspp3_6_12.py) |votenet_SA2_denseaspp3_6_12| 58.6 | 59.6 | 95 | | [SA2 - Dense Point Diffusion (3,6)](models/votenet_SA2_denseaspp3_6.py)|votenet_SA2_denseaspp3_6| 58.7 | 58.9 | 96 | | [VoteNet](models/votenet.py) | votenet (default)| 57.7 | 58.6 | 97 | 98 | 99 | 100 | The ablation models in the papers can be derived from the models listed above, therefore, we did not list them all. 101 | ### Train on your own data 102 | 103 | [For Pro Users] If you have your own dataset with point clouds and annotated 3D bounding boxes, you can create a new dataset class and train VoteNet on your own data. To ease the proces, some tips are provided in this [doc](https://github.com/facebookresearch/votenet/blob/master/doc/tips.md). 104 | 105 | ## Acknowledgements 106 | We want to thank Charles Qi for his VoteNet ([original codebase](https://github.com/facebookresearch/votenet)), Hang Zhang for his EncNet ([original codebase](https://hangzhang.org/PyTorch-Encoding/)) and Erik Wijmans for his PointNet++ implementation in Pytorch ([original codebase](https://github.com/erikwijmans/Pointnet2_PyTorch)). 107 | 108 | ## License 109 | votenet is relased under the MIT License. See the [LICENSE file](https://arxiv.org/pdf/1904.09664.pdf) for more details. 110 | -------------------------------------------------------------------------------- /demo.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | """ Demo of using VoteNet 3D object detector to detect objects from a point cloud. 7 | """ 8 | 9 | import os 10 | import sys 11 | import numpy as np 12 | import argparse 13 | import importlib 14 | import time 15 | 16 | parser = argparse.ArgumentParser() 17 | parser.add_argument('--dataset', default='sunrgbd', help='Dataset: sunrgbd or scannet [default: sunrgbd]') 18 | parser.add_argument('--num_point', type=int, default=20000, help='Point Number [default: 20000]') 19 | FLAGS = parser.parse_args() 20 | 21 | import torch 22 | import torch.nn as nn 23 | import torch.optim as optim 24 | 25 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 26 | ROOT_DIR = BASE_DIR 27 | sys.path.append(os.path.join(ROOT_DIR, 'utils')) 28 | sys.path.append(os.path.join(ROOT_DIR, 'models')) 29 | from pc_util import random_sampling, read_ply 30 | from ap_helper import parse_predictions 31 | 32 | def preprocess_point_cloud(point_cloud): 33 | ''' Prepare the numpy point cloud (N,3) for forward pass ''' 34 | point_cloud = point_cloud[:,0:3] # do not use color for now 35 | floor_height = np.percentile(point_cloud[:,2],0.99) 36 | height = point_cloud[:,2] - floor_height 37 | point_cloud = np.concatenate([point_cloud, np.expand_dims(height, 1)],1) # (N,4) or (N,7) 38 | point_cloud = random_sampling(point_cloud, FLAGS.num_point) 39 | pc = np.expand_dims(point_cloud.astype(np.float32), 0) # (1,40000,4) 40 | return pc 41 | 42 | if __name__=='__main__': 43 | 44 | # Set file paths and dataset config 45 | demo_dir = os.path.join(BASE_DIR, 'demo_files') 46 | if FLAGS.dataset == 'sunrgbd': 47 | sys.path.append(os.path.join(ROOT_DIR, 'sunrgbd')) 48 | from sunrgbd_detection_dataset import DC # dataset config 49 | checkpoint_path = os.path.join(demo_dir, 'pretrained_votenet_on_sunrgbd.tar') 50 | pc_path = os.path.join(demo_dir, 'input_pc_sunrgbd.ply') 51 | elif FLAGS.dataset == 'scannet': 52 | sys.path.append(os.path.join(ROOT_DIR, 'scannet')) 53 | from scannet_detection_dataset import DC # dataset config 54 | checkpoint_path = os.path.join(demo_dir, 'pretrained_votenet_on_scannet.tar') 55 | pc_path = os.path.join(demo_dir, 'input_pc_scannet.ply') 56 | else: 57 | print('Unkown dataset %s. Exiting.'%(DATASET)) 58 | exit(-1) 59 | 60 | eval_config_dict = {'remove_empty_box': True, 'use_3d_nms': True, 'nms_iou': 0.25, 61 | 'use_old_type_nms': False, 'cls_nms': False, 'per_class_proposal': False, 62 | 'conf_thresh': 0.5, 'dataset_config': DC} 63 | 64 | # Init the model and optimzier 65 | MODEL = importlib.import_module('votenet') # import network module 66 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 67 | net = MODEL.VoteNet(num_proposal=256, input_feature_dim=1, vote_factor=1, 68 | sampling='seed_fps', num_class=DC.num_class, 69 | num_heading_bin=DC.num_heading_bin, 70 | num_size_cluster=DC.num_size_cluster, 71 | mean_size_arr=DC.mean_size_arr).to(device) 72 | print('Constructed model.') 73 | 74 | # Load checkpoint 75 | optimizer = optim.Adam(net.parameters(), lr=0.001) 76 | checkpoint = torch.load(checkpoint_path) 77 | net.load_state_dict(checkpoint['model_state_dict']) 78 | optimizer.load_state_dict(checkpoint['optimizer_state_dict']) 79 | epoch = checkpoint['epoch'] 80 | print("Loaded checkpoint %s (epoch: %d)"%(checkpoint_path, epoch)) 81 | 82 | # Load and preprocess input point cloud 83 | net.eval() # set model to eval mode (for bn and dp) 84 | point_cloud = read_ply(pc_path) 85 | pc = preprocess_point_cloud(point_cloud) 86 | print('Loaded point cloud data: %s'%(pc_path)) 87 | 88 | # Model inference 89 | inputs = {'point_clouds': torch.from_numpy(pc).to(device)} 90 | tic = time.time() 91 | with torch.no_grad(): 92 | end_points = net(inputs) 93 | toc = time.time() 94 | print('Inference time: %f'%(toc-tic)) 95 | end_points['point_clouds'] = inputs['point_clouds'] 96 | pred_map_cls = parse_predictions(end_points, eval_config_dict) 97 | print('Finished detection. %d object detected.'%(len(pred_map_cls[0]))) 98 | 99 | dump_dir = os.path.join(demo_dir, '%s_results'%(FLAGS.dataset)) 100 | if not os.path.exists(dump_dir): os.mkdir(dump_dir) 101 | MODEL.dump_results(end_points, dump_dir, DC, True) 102 | print('Dumped detection results to folder %s'%(dump_dir)) 103 | -------------------------------------------------------------------------------- /doc/NIPS_new.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AsahiLiu/PointDetectron/634ffadfbc50d2f0dea4434e11d5977640f34004/doc/NIPS_new.jpeg -------------------------------------------------------------------------------- /doc/NIPS_new.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AsahiLiu/PointDetectron/634ffadfbc50d2f0dea4434e11d5977640f34004/doc/NIPS_new.jpg -------------------------------------------------------------------------------- /doc/NIPS_new.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AsahiLiu/PointDetectron/634ffadfbc50d2f0dea4434e11d5977640f34004/doc/NIPS_new.pdf -------------------------------------------------------------------------------- /doc/teaser.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AsahiLiu/PointDetectron/634ffadfbc50d2f0dea4434e11d5977640f34004/doc/teaser.jpg -------------------------------------------------------------------------------- /doc/tips.md: -------------------------------------------------------------------------------- 1 | ### Train on your own data 2 | 3 | [For Pro Users] If you have your own dataset with point clouds and annotated 3D bounding boxes, you can create a new dataset class and train VoteNet on your own data. To ease the proces, some tips are provided below. 4 | 5 | Firstly, you need to store point clouds in the upright coordinate system (Z is up, Y is forward, X is right-ward) and 3D bounding boxes as its center (x,y,z), size (l,w,h) and heading angle (along the up-axis; rotation radius from +X towards -Y; +X is 0 and -Y is pi/4). You can refer to `sunrgbd/sunrgbd_data.py` as to how to compute the groundtruth votes (translational vectors from object points to 3D bounding box centers). If your dataset has instance segmentation annotation, you can also compute groundtruth votes on the fly in the dataset class -- refer to `scannet/batch_load_scannet_data.py` and `scannet/scannet_detection_dataset.py` for more details. 6 | 7 | Secondly, you need to create a new dataset class as well as to specify some config information about the dataset. For config information, you can refer to `sunrgbd/model_util_config.py` as an example and modify the `num_classes`, `type2class`, `num_size_clusters`, `mean_size_arr` etc. The `mean_size_arr` is computed by going through all 3D bounding boxes in the train set and cluster them (either by geometric size or semantic class) into several clusters and then compute the median box size in each cluster (an example porcess is [here](https://github.com/facebookresearch/votenet/blob/7c19af314a3d12532dc3c8dbd05d1d404c75891e/sunrgbd/sunrgbd_data.py#L264)). In both SUN RGB-D and ScanNet, we only consider one tempalte box size for each semantic class, but you can have multiple size templates for each class too (in which case you also need to modify the `size2class` function in the config). For detection dataset class, you can refer to `sunrgbd/sunrgbd_detection_dataset.py` and modify based on it. The major thing to modify is the dataset paths (in `__init__` function) and data loading methods (at the beginning of the `__getitem__` function), which depend on where and how you store the data. 8 | 9 | Lastly, after you make sure the dataset class returns the correct input point clouds and ground truth labels, you need to add the new dataset to the `train.py` file and `eval.py` file by augmenting the options of `FLAGS.dataset` argument (adding another `elif` to the dataset set up section). Then by selecting your new dataset in `train.py`, you should be able to train a VoteNet on your own data! 10 | 11 | Note that the VoteNet was originally used on SUN RGB-D and ScanNet which only have either 1D or 0D rotations in their annotated bounding boxes. It is possible to extend the VoteNet to predict 3D rotations though. One simple way is to supervise the network to predict three Euler angles. To support it you will need to prepare ground truth labels and then change the prediction of the 1D `heading_angle` to prediction of three Euler angles in the network output; and modify the 3D bounding box parameterization and transformations accordingly. 12 | 13 | Feel free to post an issue if you meet any difficulty during the process! 14 | -------------------------------------------------------------------------------- /eval.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | """ Evaluation routine for 3D object detection with SUN RGB-D and ScanNet. 7 | """ 8 | 9 | import os 10 | import sys 11 | import numpy as np 12 | from datetime import datetime 13 | import argparse 14 | import importlib 15 | import torch 16 | import torch.nn as nn 17 | import torch.optim as optim 18 | from torch.utils.data import DataLoader 19 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 20 | ROOT_DIR = BASE_DIR 21 | sys.path.append(os.path.join(ROOT_DIR, 'models')) 22 | from ap_helper import APCalculator, parse_predictions, parse_groundtruths 23 | 24 | parser = argparse.ArgumentParser() 25 | parser.add_argument('--model', default='votenet', help='Model file name [default: votenet]') 26 | parser.add_argument('--dataset', default='sunrgbd', help='Dataset name. sunrgbd or scannet. [default: sunrgbd]') 27 | parser.add_argument('--checkpoint_path', default=None, help='Model checkpoint path [default: None]') 28 | parser.add_argument('--dump_dir', default=None, help='Dump dir to save sample outputs [default: None]') 29 | parser.add_argument('--num_point', type=int, default=20000, help='Point Number [default: 20000]') 30 | parser.add_argument('--num_target', type=int, default=256, help='Point Number [default: 256]') 31 | parser.add_argument('--batch_size', type=int, default=8, help='Batch Size during training [default: 8]') 32 | parser.add_argument('--vote_factor', type=int, default=1, help='Number of votes generated from each seed [default: 1]') 33 | parser.add_argument('--cluster_sampling', default='vote_fps', help='Sampling strategy for vote clusters: vote_fps, seed_fps, random [default: vote_fps]') 34 | parser.add_argument('--ap_iou_thresholds', default='0.25,0.5', help='A list of AP IoU thresholds [default: 0.25,0.5]') 35 | parser.add_argument('--no_height', action='store_true', help='Do NOT use height signal in input.') 36 | parser.add_argument('--use_color', action='store_true', help='Use RGB color in input.') 37 | parser.add_argument('--use_sunrgbd_v2', action='store_true', help='Use SUN RGB-D V2 box labels.') 38 | parser.add_argument('--use_3d_nms', action='store_true', help='Use 3D NMS instead of 2D NMS.') 39 | parser.add_argument('--use_cls_nms', action='store_true', help='Use per class NMS.') 40 | parser.add_argument('--use_old_type_nms', action='store_true', help='Use old type of NMS, IoBox2Area.') 41 | parser.add_argument('--per_class_proposal', action='store_true', help='Duplicate each proposal num_class times.') 42 | parser.add_argument('--nms_iou', type=float, default=0.25, help='NMS IoU threshold. [default: 0.25]') 43 | parser.add_argument('--conf_thresh', type=float, default=0.05, help='Filter out predictions with obj prob less than it. [default: 0.05]') 44 | parser.add_argument('--faster_eval', action='store_true', help='Faster evaluation by skippling empty bounding box removal.') 45 | parser.add_argument('--shuffle_dataset', action='store_true', help='Shuffle the dataset (random order).') 46 | FLAGS = parser.parse_args() 47 | 48 | if FLAGS.use_cls_nms: 49 | assert(FLAGS.use_3d_nms) 50 | 51 | # ------------------------------------------------------------------------- GLOBAL CONFIG BEG 52 | BATCH_SIZE = FLAGS.batch_size 53 | NUM_POINT = FLAGS.num_point 54 | DUMP_DIR = FLAGS.dump_dir 55 | CHECKPOINT_PATH = FLAGS.checkpoint_path 56 | assert(CHECKPOINT_PATH is not None) 57 | FLAGS.DUMP_DIR = DUMP_DIR 58 | AP_IOU_THRESHOLDS = [float(x) for x in FLAGS.ap_iou_thresholds.split(',')] 59 | 60 | # Prepare DUMP_DIR 61 | if not os.path.exists(DUMP_DIR): os.mkdir(DUMP_DIR) 62 | DUMP_FOUT = open(os.path.join(DUMP_DIR, 'log_eval.txt'), 'w') 63 | DUMP_FOUT.write(str(FLAGS)+'\n') 64 | def log_string(out_str): 65 | DUMP_FOUT.write(out_str+'\n') 66 | DUMP_FOUT.flush() 67 | print(out_str) 68 | 69 | # Init datasets and dataloaders 70 | def my_worker_init_fn(worker_id): 71 | np.random.seed(np.random.get_state()[1][0] + worker_id) 72 | 73 | if FLAGS.dataset == 'sunrgbd': 74 | sys.path.append(os.path.join(ROOT_DIR, 'sunrgbd')) 75 | from sunrgbd_detection_dataset import SunrgbdDetectionVotesDataset, MAX_NUM_OBJ 76 | from model_util_sunrgbd import SunrgbdDatasetConfig 77 | DATASET_CONFIG = SunrgbdDatasetConfig() 78 | TEST_DATASET = SunrgbdDetectionVotesDataset('val', num_points=NUM_POINT, 79 | augment=False, use_color=FLAGS.use_color, use_height=(not FLAGS.no_height), 80 | use_v1=(not FLAGS.use_sunrgbd_v2)) 81 | elif FLAGS.dataset == 'scannet': 82 | sys.path.append(os.path.join(ROOT_DIR, 'scannet')) 83 | from scannet_detection_dataset import ScannetDetectionDataset, MAX_NUM_OBJ 84 | from model_util_scannet import ScannetDatasetConfig 85 | DATASET_CONFIG = ScannetDatasetConfig() 86 | TEST_DATASET = ScannetDetectionDataset('val', num_points=NUM_POINT, 87 | augment=False, 88 | use_color=FLAGS.use_color, use_height=(not FLAGS.no_height)) 89 | else: 90 | print('Unknown dataset %s. Exiting...'%(FLAGS.dataset)) 91 | exit(-1) 92 | print(len(TEST_DATASET)) 93 | TEST_DATALOADER = DataLoader(TEST_DATASET, batch_size=BATCH_SIZE, 94 | shuffle=FLAGS.shuffle_dataset, num_workers=4, worker_init_fn=my_worker_init_fn) 95 | 96 | # Init the model and optimzier 97 | MODEL = importlib.import_module(FLAGS.model) # import network module 98 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 99 | num_input_channel = int(FLAGS.use_color)*3 + int(not FLAGS.no_height)*1 100 | 101 | if FLAGS.model == 'boxnet': 102 | Detector = MODEL.BoxNet 103 | else: 104 | Detector = MODEL.VoteNet 105 | 106 | net = Detector(num_class=DATASET_CONFIG.num_class, 107 | num_heading_bin=DATASET_CONFIG.num_heading_bin, 108 | num_size_cluster=DATASET_CONFIG.num_size_cluster, 109 | mean_size_arr=DATASET_CONFIG.mean_size_arr, 110 | num_proposal=FLAGS.num_target, 111 | input_feature_dim=num_input_channel, 112 | vote_factor=FLAGS.vote_factor, 113 | sampling=FLAGS.cluster_sampling) 114 | net.to(device) 115 | criterion = MODEL.get_loss 116 | 117 | # Load the Adam optimizer 118 | optimizer = optim.Adam(net.parameters(), lr=0.001) 119 | 120 | # Load checkpoint if there is any 121 | if CHECKPOINT_PATH is not None and os.path.isfile(CHECKPOINT_PATH): 122 | checkpoint = torch.load(CHECKPOINT_PATH) 123 | net.load_state_dict(checkpoint['model_state_dict']) 124 | optimizer.load_state_dict(checkpoint['optimizer_state_dict']) 125 | epoch = checkpoint['epoch'] 126 | log_string("Loaded checkpoint %s (epoch: %d)"%(CHECKPOINT_PATH, epoch)) 127 | 128 | # Used for AP calculation 129 | CONFIG_DICT = {'remove_empty_box': (not FLAGS.faster_eval), 'use_3d_nms': FLAGS.use_3d_nms, 'nms_iou': FLAGS.nms_iou, 130 | 'use_old_type_nms': FLAGS.use_old_type_nms, 'cls_nms': FLAGS.use_cls_nms, 'per_class_proposal': FLAGS.per_class_proposal, 131 | 'conf_thresh': FLAGS.conf_thresh, 'dataset_config':DATASET_CONFIG} 132 | # ------------------------------------------------------------------------- GLOBAL CONFIG END 133 | 134 | def evaluate_one_epoch(): 135 | stat_dict = {} 136 | ap_calculator_list = [APCalculator(iou_thresh, DATASET_CONFIG.class2type) \ 137 | for iou_thresh in AP_IOU_THRESHOLDS] 138 | net.eval() # set model to eval mode (for bn and dp) 139 | for batch_idx, batch_data_label in enumerate(TEST_DATALOADER): 140 | if batch_idx % 10 == 0: 141 | print('Eval batch: %d'%(batch_idx)) 142 | for key in batch_data_label: 143 | batch_data_label[key] = batch_data_label[key].to(device) 144 | 145 | # Forward pass 146 | inputs = {'point_clouds': batch_data_label['point_clouds']} 147 | with torch.no_grad(): 148 | end_points = net(inputs) 149 | 150 | # Compute loss 151 | for key in batch_data_label: 152 | assert(key not in end_points) 153 | end_points[key] = batch_data_label[key] 154 | loss, end_points = criterion(end_points, DATASET_CONFIG) 155 | 156 | # Accumulate statistics and print out 157 | for key in end_points: 158 | if 'loss' in key or 'acc' in key or 'ratio' in key: 159 | if key not in stat_dict: stat_dict[key] = 0 160 | stat_dict[key] += end_points[key].item() 161 | 162 | batch_pred_map_cls = parse_predictions(end_points, CONFIG_DICT) 163 | batch_gt_map_cls = parse_groundtruths(end_points, CONFIG_DICT) 164 | for ap_calculator in ap_calculator_list: 165 | ap_calculator.step(batch_pred_map_cls, batch_gt_map_cls) 166 | 167 | # Dump evaluation results for visualization 168 | if batch_idx == 0: 169 | MODEL.dump_results(end_points, DUMP_DIR, DATASET_CONFIG) 170 | 171 | # Log statistics 172 | for key in sorted(stat_dict.keys()): 173 | log_string('eval mean %s: %f'%(key, stat_dict[key]/(float(batch_idx+1)))) 174 | 175 | # Evaluate average precision 176 | for i, ap_calculator in enumerate(ap_calculator_list): 177 | print('-'*10, 'iou_thresh: %f'%(AP_IOU_THRESHOLDS[i]), '-'*10) 178 | metrics_dict = ap_calculator.compute_metrics() 179 | for key in metrics_dict: 180 | log_string('eval %s: %f'%(key, metrics_dict[key])) 181 | 182 | mean_loss = stat_dict['loss']/float(batch_idx+1) 183 | return mean_loss 184 | 185 | 186 | def eval(): 187 | log_string(str(datetime.now())) 188 | # Reset numpy seed. 189 | # REF: https://github.com/pytorch/pytorch/issues/5059 190 | np.random.seed() 191 | loss = evaluate_one_epoch() 192 | 193 | if __name__=='__main__': 194 | eval() 195 | -------------------------------------------------------------------------------- /models/backbone_module.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | import numpy as np 10 | import sys 11 | import os 12 | 13 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 14 | ROOT_DIR = os.path.dirname(BASE_DIR) 15 | sys.path.append(ROOT_DIR) 16 | sys.path.append(os.path.join(ROOT_DIR, 'utils')) 17 | sys.path.append(os.path.join(ROOT_DIR, 'pointnet2')) 18 | 19 | from pointnet2_modules import PointnetSAModuleVotes, PointnetFPModule 20 | 21 | class Pointnet2Backbone(nn.Module): 22 | r""" 23 | Backbone network for point cloud feature learning. 24 | Based on Pointnet++ single-scale grouping network. 25 | 26 | Parameters 27 | ---------- 28 | input_feature_dim: int 29 | Number of input channels in the feature descriptor for each point. 30 | e.g. 3 for RGB. 31 | """ 32 | def __init__(self, input_feature_dim=0): 33 | super().__init__() 34 | 35 | self.sa1 = PointnetSAModuleVotes( 36 | npoint=2048, 37 | radius=0.2, 38 | nsample=64, 39 | mlp=[input_feature_dim, 64, 64, 128], 40 | use_xyz=True, 41 | normalize_xyz=True 42 | ) 43 | 44 | self.sa2 = PointnetSAModuleVotes( 45 | npoint=1024, 46 | radius=0.4, 47 | nsample=32, 48 | mlp=[128, 128, 128, 256], 49 | use_xyz=True, 50 | normalize_xyz=True 51 | ) 52 | 53 | self.sa3 = PointnetSAModuleVotes( 54 | npoint=512, 55 | radius=0.8, 56 | nsample=16, 57 | mlp=[256, 128, 128, 256], 58 | use_xyz=True, 59 | normalize_xyz=True 60 | ) 61 | 62 | self.sa4 = PointnetSAModuleVotes( 63 | npoint=256, 64 | radius=1.2, 65 | nsample=16, 66 | mlp=[256, 128, 128, 256], 67 | use_xyz=True, 68 | normalize_xyz=True 69 | ) 70 | 71 | self.fp1 = PointnetFPModule(mlp=[256+256,256,256]) 72 | self.fp2 = PointnetFPModule(mlp=[256+256,256,256]) 73 | 74 | def _break_up_pc(self, pc): 75 | xyz = pc[..., 0:3].contiguous() 76 | features = ( 77 | pc[..., 3:].transpose(1, 2).contiguous() 78 | if pc.size(-1) > 3 else None 79 | ) 80 | 81 | return xyz, features 82 | 83 | def forward(self, pointcloud: torch.cuda.FloatTensor, end_points=None): 84 | r""" 85 | Forward pass of the network 86 | 87 | Parameters 88 | ---------- 89 | pointcloud: Variable(torch.cuda.FloatTensor) 90 | (B, N, 3 + input_feature_dim) tensor 91 | Point cloud to run predicts on 92 | Each point in the point-cloud MUST 93 | be formated as (x, y, z, features...) 94 | 95 | Returns 96 | ---------- 97 | end_points: {XXX_xyz, XXX_features, XXX_inds} 98 | XXX_xyz: float32 Tensor of shape (B,K,3) 99 | XXX_features: float32 Tensor of shape (B,K,D) 100 | XXX-inds: int64 Tensor of shape (B,K) values in [0,N-1] 101 | """ 102 | if not end_points: end_points = {} 103 | batch_size = pointcloud.shape[0] 104 | 105 | xyz, features = self._break_up_pc(pointcloud) 106 | 107 | # --------- 4 SET ABSTRACTION LAYERS --------- 108 | xyz, features, fps_inds = self.sa1(xyz, features) 109 | end_points['sa1_inds'] = fps_inds 110 | end_points['sa1_xyz'] = xyz 111 | end_points['sa1_features'] = features 112 | 113 | xyz, features, fps_inds = self.sa2(xyz, features) # this fps_inds is just 0,1,...,1023 114 | end_points['sa2_inds'] = fps_inds 115 | end_points['sa2_xyz'] = xyz 116 | end_points['sa2_features'] = features 117 | 118 | xyz, features, fps_inds = self.sa3(xyz, features) # this fps_inds is just 0,1,...,511 119 | end_points['sa3_xyz'] = xyz 120 | end_points['sa3_features'] = features 121 | 122 | xyz, features, fps_inds = self.sa4(xyz, features) # this fps_inds is just 0,1,...,255 123 | end_points['sa4_xyz'] = xyz 124 | end_points['sa4_features'] = features 125 | 126 | # --------- 2 FEATURE UPSAMPLING LAYERS -------- 127 | features = self.fp1(end_points['sa3_xyz'], end_points['sa4_xyz'], end_points['sa3_features'], end_points['sa4_features']) 128 | features = self.fp2(end_points['sa2_xyz'], end_points['sa3_xyz'], end_points['sa2_features'], features) 129 | end_points['fp2_features'] = features 130 | end_points['fp2_xyz'] = end_points['sa2_xyz'] 131 | num_seed = end_points['fp2_xyz'].shape[1] 132 | end_points['fp2_inds'] = end_points['sa1_inds'][:,0:num_seed] # indices among the entire input point clouds 133 | return end_points 134 | 135 | 136 | if __name__=='__main__': 137 | backbone_net = Pointnet2Backbone(input_feature_dim=3).cuda() 138 | print(backbone_net) 139 | backbone_net.eval() 140 | out = backbone_net(torch.rand(16,20000,6).cuda()) 141 | for key in sorted(out.keys()): 142 | print(key, '\t', out[key].shape) 143 | -------------------------------------------------------------------------------- /models/backbone_module_SA2_denseaspp3_6.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | import numpy as np 10 | import sys 11 | import os 12 | 13 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 14 | ROOT_DIR = os.path.dirname(BASE_DIR) 15 | sys.path.append(ROOT_DIR) 16 | sys.path.append(os.path.join(ROOT_DIR, 'utils')) 17 | sys.path.append(os.path.join(ROOT_DIR, 'pointnet2')) 18 | 19 | from pointnet2_modules import PointnetSAModuleVotes, PointnetFPModule 20 | 21 | class Pointnet2Backbone(nn.Module): 22 | r""" 23 | Backbone network for point cloud feature learning. 24 | Based on Pointnet++ single-scale grouping network. 25 | 26 | Parameters 27 | ---------- 28 | input_feature_dim: int 29 | Number of input channels in the feature descriptor for each point. 30 | e.g. 3 for RGB. 31 | """ 32 | def __init__(self, input_feature_dim=0): 33 | super().__init__() 34 | 35 | self.sa1 = PointnetSAModuleVotes( 36 | npoint=2048, 37 | radius=0.2, 38 | nsample=64, 39 | mlp=[input_feature_dim, 64, 64, 128], 40 | use_xyz=True, 41 | normalize_xyz=True 42 | ) 43 | 44 | self.sa2 = PointnetSAModuleVotes( 45 | npoint=1024, 46 | radius=0.4, 47 | nsample=32, 48 | mlp=[128, 128, 128, 256], 49 | use_xyz=True, 50 | normalize_xyz=True 51 | ) 52 | 53 | 54 | 55 | self.sa2_d3 = PointnetSAModuleVotes( 56 | npoint=1024, 57 | radius=0.8, 58 | nsample=32*3, 59 | mlp=[256, 128, 128, 256], 60 | use_xyz=True, 61 | normalize_xyz=True, 62 | dilation = 3 63 | ) 64 | 65 | 66 | self.sa2_d6 = PointnetSAModuleVotes( 67 | npoint=1024, 68 | radius=1.2, 69 | nsample=32*6, 70 | mlp=[256+256, 128, 128, 256], 71 | use_xyz=True, 72 | normalize_xyz=True, 73 | dilation = 6 74 | ) 75 | 76 | ''' 77 | self.sa3 = PointnetSAModuleVotes( 78 | npoint=512, 79 | radius=0.8, 80 | nsample=16, 81 | mlp=[256, 128, 128, 256], 82 | use_xyz=True, 83 | normalize_xyz=True 84 | ) 85 | 86 | self.sa4 = PointnetSAModuleVotes( 87 | npoint=256, 88 | radius=1.2, 89 | nsample=16, 90 | mlp=[256, 128, 128, 256], 91 | use_xyz=True, 92 | normalize_xyz=True 93 | ) 94 | 95 | self.fp1 = PointnetFPModule(mlp=[256+256,256,256]) 96 | self.fp2 = PointnetFPModule(mlp=[256+256,256,256]) 97 | ''' 98 | 99 | 100 | def _break_up_pc(self, pc): 101 | xyz = pc[..., 0:3].contiguous() 102 | features = ( 103 | pc[..., 3:].transpose(1, 2).contiguous() 104 | if pc.size(-1) > 3 else None 105 | ) 106 | 107 | return xyz, features 108 | 109 | def forward(self, pointcloud: torch.cuda.FloatTensor, end_points=None): 110 | r""" 111 | Forward pass of the network 112 | 113 | Parameters 114 | ---------- 115 | pointcloud: Variable(torch.cuda.FloatTensor) 116 | (B, N, 3 + input_feature_dim) tensor 117 | Point cloud to run predicts on 118 | Each point in the point-cloud MUST 119 | be formated as (x, y, z, features...) 120 | 121 | Returns 122 | ---------- 123 | end_points: {XXX_xyz, XXX_features, XXX_inds} 124 | XXX_xyz: float32 Tensor of shape (B,K,3) 125 | XXX_features: float32 Tensor of shape (B,K,D) 126 | XXX-inds: int64 Tensor of shape (B,K) values in [0,N-1] 127 | """ 128 | if not end_points: end_points = {} 129 | batch_size = pointcloud.shape[0] 130 | 131 | xyz, features = self._break_up_pc(pointcloud) 132 | 133 | # --------- 4 SET ABSTRACTION LAYERS --------- 134 | xyz, features, fps_inds = self.sa1(xyz, features) 135 | end_points['sa1_inds'] = fps_inds 136 | end_points['sa1_xyz'] = xyz 137 | end_points['sa1_features'] = features 138 | 139 | xyz, features, fps_inds = self.sa2(xyz, features) # this fps_inds is just 0,1,...,1023 140 | end_points['sa2_inds'] = fps_inds 141 | end_points['sa2_xyz'] = xyz 142 | end_points['sa2_features'] = features 143 | 144 | 145 | 146 | _, features, _= self.sa2_d3(xyz, features) # this fps_inds is just 0,1,...,1023 147 | end_points['sa2_d_3_features'] = features 148 | 149 | features_out = torch.cat((end_points['sa2_features'],end_points['sa2_d_3_features']),dim=1) 150 | 151 | _, features, _= self.sa2_d6(xyz, features_out) # this fps_inds is just 0,1,...,1023 152 | end_points['sa2_d_6_features'] = features 153 | 154 | features_out = torch.cat((features_out,end_points['sa2_d_6_features']),dim=1) 155 | 156 | end_points['fp2_features'] = features_out 157 | end_points['fp2_xyz'] = xyz 158 | num_seed = end_points['fp2_xyz'].shape[1] 159 | end_points['fp2_inds'] = end_points['sa1_inds'][:,0:num_seed] # indices among the entire input point clouds 160 | return end_points 161 | 162 | 163 | if __name__=='__main__': 164 | backbone_net = Pointnet2Backbone(input_feature_dim=3).cuda() 165 | print(backbone_net) 166 | backbone_net.eval() 167 | out = backbone_net(torch.rand(16,20000,6).cuda()) 168 | for key in sorted(out.keys()): 169 | print(key, '\t', out[key].shape) 170 | -------------------------------------------------------------------------------- /models/backbone_module_SA2_denseaspp3_6_12.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | import numpy as np 10 | import sys 11 | import os 12 | 13 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 14 | ROOT_DIR = os.path.dirname(BASE_DIR) 15 | sys.path.append(ROOT_DIR) 16 | sys.path.append(os.path.join(ROOT_DIR, 'utils')) 17 | sys.path.append(os.path.join(ROOT_DIR, 'pointnet2')) 18 | 19 | from pointnet2_modules import PointnetSAModuleVotes, PointnetFPModule 20 | 21 | class Pointnet2Backbone(nn.Module): 22 | r""" 23 | Backbone network for point cloud feature learning. 24 | Based on Pointnet++ single-scale grouping network. 25 | 26 | Parameters 27 | ---------- 28 | input_feature_dim: int 29 | Number of input channels in the feature descriptor for each point. 30 | e.g. 3 for RGB. 31 | """ 32 | def __init__(self, input_feature_dim=0): 33 | super().__init__() 34 | 35 | self.sa1 = PointnetSAModuleVotes( 36 | npoint=2048, 37 | radius=0.2, 38 | nsample=64, 39 | mlp=[input_feature_dim, 64, 64, 128], 40 | use_xyz=True, 41 | normalize_xyz=True 42 | ) 43 | 44 | self.sa2 = PointnetSAModuleVotes( 45 | npoint=1024, 46 | radius=0.4, 47 | nsample=32, 48 | mlp=[128, 128, 128, 256], 49 | use_xyz=True, 50 | normalize_xyz=True 51 | ) 52 | 53 | 54 | 55 | self.sa2_d3 = PointnetSAModuleVotes( 56 | npoint=1024, 57 | radius=0.8, 58 | nsample=32*3, 59 | mlp=[256, 128, 128, 256], 60 | use_xyz=True, 61 | normalize_xyz=True, 62 | dilation = 3 63 | ) 64 | 65 | 66 | self.sa2_d6 = PointnetSAModuleVotes( 67 | npoint=1024, 68 | radius=1.2, 69 | nsample=32*6, 70 | mlp=[256+256, 128, 128, 256], 71 | use_xyz=True, 72 | normalize_xyz=True, 73 | dilation = 6 74 | ) 75 | 76 | 77 | self.sa2_d12 = PointnetSAModuleVotes( 78 | npoint=1024, 79 | radius=1.8, 80 | nsample=32*12, 81 | mlp=[256+256+256, 128, 128, 256], 82 | use_xyz=True, 83 | normalize_xyz=True, 84 | dilation = 12 85 | ) 86 | 87 | 88 | 89 | 90 | ''' 91 | self.sa3 = PointnetSAModuleVotes( 92 | npoint=512, 93 | radius=0.8, 94 | nsample=16, 95 | mlp=[256, 128, 128, 256], 96 | use_xyz=True, 97 | normalize_xyz=True 98 | ) 99 | 100 | self.sa4 = PointnetSAModuleVotes( 101 | npoint=256, 102 | radius=1.2, 103 | nsample=16, 104 | mlp=[256, 128, 128, 256], 105 | use_xyz=True, 106 | normalize_xyz=True 107 | ) 108 | 109 | self.fp1 = PointnetFPModule(mlp=[256+256,256,256]) 110 | self.fp2 = PointnetFPModule(mlp=[256+256,256,256]) 111 | ''' 112 | 113 | 114 | def _break_up_pc(self, pc): 115 | xyz = pc[..., 0:3].contiguous() 116 | features = ( 117 | pc[..., 3:].transpose(1, 2).contiguous() 118 | if pc.size(-1) > 3 else None 119 | ) 120 | 121 | return xyz, features 122 | 123 | def forward(self, pointcloud: torch.cuda.FloatTensor, end_points=None): 124 | r""" 125 | Forward pass of the network 126 | 127 | Parameters 128 | ---------- 129 | pointcloud: Variable(torch.cuda.FloatTensor) 130 | (B, N, 3 + input_feature_dim) tensor 131 | Point cloud to run predicts on 132 | Each point in the point-cloud MUST 133 | be formated as (x, y, z, features...) 134 | 135 | Returns 136 | ---------- 137 | end_points: {XXX_xyz, XXX_features, XXX_inds} 138 | XXX_xyz: float32 Tensor of shape (B,K,3) 139 | XXX_features: float32 Tensor of shape (B,K,D) 140 | XXX-inds: int64 Tensor of shape (B,K) values in [0,N-1] 141 | """ 142 | if not end_points: end_points = {} 143 | batch_size = pointcloud.shape[0] 144 | 145 | xyz, features = self._break_up_pc(pointcloud) 146 | 147 | # --------- 4 SET ABSTRACTION LAYERS --------- 148 | xyz, features, fps_inds = self.sa1(xyz, features) 149 | end_points['sa1_inds'] = fps_inds 150 | end_points['sa1_xyz'] = xyz 151 | end_points['sa1_features'] = features 152 | 153 | xyz, features, fps_inds = self.sa2(xyz, features) # this fps_inds is just 0,1,...,1023 154 | end_points['sa2_inds'] = fps_inds 155 | end_points['sa2_xyz'] = xyz 156 | end_points['sa2_features'] = features 157 | 158 | 159 | 160 | _, features, _= self.sa2_d3(xyz, features) # this fps_inds is just 0,1,...,1023 161 | end_points['sa2_d_3_features'] = features 162 | 163 | features_out = torch.cat((end_points['sa2_features'],end_points['sa2_d_3_features']),dim=1) 164 | 165 | _, features, _= self.sa2_d6(xyz, features_out) # this fps_inds is just 0,1,...,1023 166 | end_points['sa2_d_6_features'] = features 167 | 168 | features_out = torch.cat((features_out,end_points['sa2_d_6_features']),dim=1) 169 | 170 | _, features, _= self.sa2_d12(xyz, features_out) # this fps_inds is just 0,1,...,1023 171 | end_points['sa2_d_12_features'] = features 172 | 173 | features_out = torch.cat((features_out, end_points['sa2_d_12_features']), dim=1) 174 | 175 | end_points['fp2_features'] = features_out 176 | end_points['fp2_xyz'] = xyz 177 | num_seed = end_points['fp2_xyz'].shape[1] 178 | end_points['fp2_inds'] = end_points['sa1_inds'][:,0:num_seed] # indices among the entire input point clouds 179 | return end_points 180 | 181 | 182 | if __name__=='__main__': 183 | backbone_net = Pointnet2Backbone(input_feature_dim=3).cuda() 184 | print(backbone_net) 185 | backbone_net.eval() 186 | out = backbone_net(torch.rand(16,20000,6).cuda()) 187 | for key in sorted(out.keys()): 188 | print(key, '\t', out[key].shape) 189 | -------------------------------------------------------------------------------- /models/backbone_module_enc_FP2_K8_G12_C3.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | import numpy as np 10 | import sys 11 | import os 12 | 13 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 14 | ROOT_DIR = os.path.dirname(BASE_DIR) 15 | sys.path.append(ROOT_DIR) 16 | sys.path.append(os.path.join(ROOT_DIR, 'utils')) 17 | sys.path.append(os.path.join(ROOT_DIR, 'pointnet2')) 18 | 19 | from pointnet2_modules import PointnetSAModuleVotes, PointnetFPModule 20 | from enc_layer import PointnetSAModuleVotes_enc, PointnetSAModuleVotes_group_enc 21 | 22 | class Pointnet2Backbone(nn.Module): 23 | r""" 24 | Backbone network for point cloud feature learning. 25 | Based on Pointnet++ single-scale grouping network. 26 | 27 | Parameters 28 | ---------- 29 | input_feature_dim: int 30 | Number of input channels in the feature descriptor for each point. 31 | e.g. 3 for RGB. 32 | """ 33 | def __init__(self, input_feature_dim=0): 34 | super().__init__() 35 | 36 | self.sa1 = PointnetSAModuleVotes_group_enc( 37 | npoint=2048, 38 | radius=0.2, 39 | nsample=64, 40 | mlp=[input_feature_dim, 64, 64, 128*3], 41 | use_xyz=True, 42 | normalize_xyz=True, 43 | dilation=1, 44 | K=8, 45 | G=12 46 | ) 47 | 48 | self.sa2 = PointnetSAModuleVotes_group_enc( 49 | npoint=1024, 50 | radius=0.4, 51 | nsample=32, 52 | mlp=[128*3, 128, 128, 256*3], 53 | use_xyz=True, 54 | normalize_xyz=True, 55 | dilation=1, 56 | K=8, 57 | G=12 58 | ) 59 | 60 | self.sa3 = PointnetSAModuleVotes_group_enc( 61 | npoint=512, 62 | radius=0.8, 63 | nsample=16, 64 | mlp=[256*3, 128, 128, 256*3], 65 | use_xyz=True, 66 | normalize_xyz=True, 67 | dilation=1, 68 | K=8, 69 | G=12 70 | ) 71 | 72 | self.sa4 = PointnetSAModuleVotes_group_enc( 73 | npoint=256, 74 | radius=1.2, 75 | nsample=16, 76 | mlp=[256*3, 128, 128, 256*3], 77 | use_xyz=True, 78 | normalize_xyz=True, 79 | dilation=1, 80 | K=8, 81 | G=12 82 | ) 83 | 84 | self.fp1 = PointnetFPModule(mlp=[(256+256)*3,256,256*3]) 85 | self.fp2 = PointnetFPModule(mlp=[(256+256)*3,256,256*3]) 86 | 87 | def _break_up_pc(self, pc): 88 | xyz = pc[..., 0:3].contiguous() 89 | features = ( 90 | pc[..., 3:].transpose(1, 2).contiguous() 91 | if pc.size(-1) > 3 else None 92 | ) 93 | 94 | return xyz, features 95 | 96 | def forward(self, pointcloud: torch.cuda.FloatTensor, end_points=None): 97 | r""" 98 | Forward pass of the network 99 | 100 | Parameters 101 | ---------- 102 | pointcloud: Variable(torch.cuda.FloatTensor) 103 | (B, N, 3 + input_feature_dim) tensor 104 | Point cloud to run predicts on 105 | Each point in the point-cloud MUST 106 | be formated as (x, y, z, features...) 107 | 108 | Returns 109 | ---------- 110 | end_points: {XXX_xyz, XXX_features, XXX_inds} 111 | XXX_xyz: float32 Tensor of shape (B,K,3) 112 | XXX_features: float32 Tensor of shape (B,K,D) 113 | XXX-inds: int64 Tensor of shape (B,K) values in [0,N-1] 114 | """ 115 | if not end_points: end_points = {} 116 | batch_size = pointcloud.shape[0] 117 | 118 | xyz, features = self._break_up_pc(pointcloud) 119 | 120 | # --------- 4 SET ABSTRACTION LAYERS --------- 121 | xyz, features, fps_inds = self.sa1(xyz, features) 122 | end_points['sa1_inds'] = fps_inds 123 | end_points['sa1_xyz'] = xyz 124 | end_points['sa1_features'] = features 125 | 126 | xyz, features, fps_inds = self.sa2(xyz, features) # this fps_inds is just 0,1,...,1023 127 | end_points['sa2_inds'] = fps_inds 128 | end_points['sa2_xyz'] = xyz 129 | end_points['sa2_features'] = features 130 | 131 | xyz, features, fps_inds = self.sa3(xyz, features) # this fps_inds is just 0,1,...,511 132 | end_points['sa3_xyz'] = xyz 133 | end_points['sa3_features'] = features 134 | 135 | xyz, features, fps_inds = self.sa4(xyz, features) # this fps_inds is just 0,1,...,255 136 | end_points['sa4_xyz'] = xyz 137 | end_points['sa4_features'] = features 138 | 139 | # --------- 2 FEATURE UPSAMPLING LAYERS -------- 140 | features = self.fp1(end_points['sa3_xyz'], end_points['sa4_xyz'], end_points['sa3_features'], end_points['sa4_features']) 141 | features = self.fp2(end_points['sa2_xyz'], end_points['sa3_xyz'], end_points['sa2_features'], features) 142 | end_points['fp2_features'] = features 143 | end_points['fp2_xyz'] = end_points['sa2_xyz'] 144 | num_seed = end_points['fp2_xyz'].shape[1] 145 | end_points['fp2_inds'] = end_points['sa1_inds'][:,0:num_seed] # indices among the entire input point clouds 146 | return end_points 147 | 148 | 149 | if __name__=='__main__': 150 | backbone_net = Pointnet2Backbone(input_feature_dim=3).cuda() 151 | print(backbone_net) 152 | backbone_net.eval() 153 | out = backbone_net(torch.rand(16,20000,6).cuda()) 154 | for key in sorted(out.keys()): 155 | print(key, '\t', out[key].shape) 156 | -------------------------------------------------------------------------------- /models/boxnet.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import torch 7 | import torch.nn as nn 8 | import numpy as np 9 | import sys 10 | import os 11 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 12 | ROOT_DIR = os.path.dirname(BASE_DIR) 13 | sys.path.append(BASE_DIR) 14 | from backbone_module import Pointnet2Backbone 15 | from proposal_module import ProposalModule 16 | from dump_helper import dump_results 17 | from loss_helper_boxnet import get_loss 18 | 19 | 20 | class BoxNet(nn.Module): 21 | r""" 22 | A deep neural network for 3D object detection with end-to-end optimizable hough voting. 23 | 24 | Parameters 25 | ---------- 26 | num_class: int 27 | Number of semantics classes to predict over -- size of softmax classifier 28 | num_heading_bin: int 29 | num_size_cluster: int 30 | input_feature_dim: (default: 0) 31 | Input dim in the feature descriptor for each point. If the point cloud is Nx9, this 32 | value should be 6 as in an Nx9 point cloud, 3 of the channels are xyz, and 6 are feature descriptors 33 | num_proposal: int (default: 128) 34 | Number of proposals/detections generated from the network. Each proposal is a 3D OBB with a semantic class. 35 | vote_factor: (default: 1) 36 | Number of votes generated from each seed point. 37 | """ 38 | 39 | def __init__(self, num_class, num_heading_bin, num_size_cluster, mean_size_arr, 40 | input_feature_dim=0, num_proposal=128, vote_factor=1, sampling='vote_fps'): 41 | super().__init__() 42 | 43 | self.num_class = num_class 44 | self.num_heading_bin = num_heading_bin 45 | self.num_size_cluster = num_size_cluster 46 | self.mean_size_arr = mean_size_arr 47 | assert(mean_size_arr.shape[0] == self.num_size_cluster) 48 | self.input_feature_dim = input_feature_dim 49 | self.num_proposal = num_proposal 50 | self.vote_factor = vote_factor 51 | self.sampling=sampling 52 | 53 | # Backbone point feature learning 54 | self.backbone_net = Pointnet2Backbone(input_feature_dim=self.input_feature_dim) 55 | 56 | # Box proposal, aggregation and detection 57 | self.pnet = ProposalModule(num_class, num_heading_bin, num_size_cluster, 58 | mean_size_arr, num_proposal, sampling) 59 | 60 | def forward(self, inputs): 61 | """ Forward pass of the network 62 | 63 | Args: 64 | inputs: dict 65 | {point_clouds} 66 | 67 | point_clouds: Variable(torch.cuda.FloatTensor) 68 | (B, N, 3 + input_channels) tensor 69 | Point cloud to run predicts on 70 | Each point in the point-cloud MUST 71 | be formated as (x, y, z, features...) 72 | Returns: 73 | end_points: dict 74 | """ 75 | end_points = {} 76 | batch_size = inputs['point_clouds'].shape[0] 77 | 78 | end_points = self.backbone_net(inputs['point_clouds'], end_points) 79 | xyz = end_points['fp2_xyz'] 80 | features = end_points['fp2_features'] 81 | end_points['seed_inds'] = end_points['fp2_inds'] 82 | end_points['seed_xyz'] = xyz 83 | end_points['seed_features'] = features 84 | 85 | # Directly predict bounding boxes (skips voting) 86 | end_points = self.pnet(xyz, features, end_points) 87 | 88 | return end_points 89 | 90 | 91 | if __name__=='__main__': 92 | sys.path.append(os.path.join(ROOT_DIR, 'sunrgbd')) 93 | from sunrgbd_detection_dataset import SunrgbdDetectionVotesDataset, DC 94 | 95 | # Define dataset 96 | TRAIN_DATASET = SunrgbdDetectionVotesDataset('train', num_points=20000, use_v1=True) 97 | 98 | # Define model 99 | model = BoxNet(10,12,10,np.random.random((10,3))).cuda() 100 | 101 | # Model forward pass 102 | sample = TRAIN_DATASET[5] 103 | inputs = {'point_clouds': torch.from_numpy(sample['point_clouds']).unsqueeze(0).cuda()} 104 | end_points = model(inputs) 105 | for key in end_points: 106 | print(key, end_points[key]) 107 | 108 | # Compute loss 109 | for key in sample: 110 | end_points[key] = torch.from_numpy(sample[key]).unsqueeze(0).cuda() 111 | loss, end_points = get_loss(end_points, DC) 112 | print('loss', loss) 113 | end_points['point_clouds'] = inputs['point_clouds'] 114 | end_points['pred_mask'] = np.ones((1,128)) 115 | dump_results(end_points, 'tmp', DC) 116 | -------------------------------------------------------------------------------- /models/dump_helper.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import numpy as np 7 | import torch 8 | import os 9 | import sys 10 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 11 | ROOT_DIR = os.path.dirname(BASE_DIR) 12 | sys.path.append(os.path.join(ROOT_DIR, 'utils')) 13 | import pc_util 14 | 15 | DUMP_CONF_THRESH = 0.5 # Dump boxes with obj prob larger than that. 16 | 17 | def softmax(x): 18 | ''' Numpy function for softmax''' 19 | shape = x.shape 20 | probs = np.exp(x - np.max(x, axis=len(shape)-1, keepdims=True)) 21 | probs /= np.sum(probs, axis=len(shape)-1, keepdims=True) 22 | return probs 23 | 24 | def dump_results(end_points, dump_dir, config, inference_switch=False): 25 | ''' Dump results. 26 | 27 | Args: 28 | end_points: dict 29 | {..., pred_mask} 30 | pred_mask is a binary mask array of size (batch_size, num_proposal) computed by running NMS and empty box removal 31 | Returns: 32 | None 33 | ''' 34 | if not os.path.exists(dump_dir): 35 | os.system('mkdir %s'%(dump_dir)) 36 | 37 | # INPUT 38 | point_clouds = end_points['point_clouds'].cpu().numpy() 39 | batch_size = point_clouds.shape[0] 40 | 41 | # NETWORK OUTPUTS 42 | seed_xyz = end_points['seed_xyz'].detach().cpu().numpy() # (B,num_seed,3) 43 | if 'vote_xyz' in end_points: 44 | aggregated_vote_xyz = end_points['aggregated_vote_xyz'].detach().cpu().numpy() 45 | vote_xyz = end_points['vote_xyz'].detach().cpu().numpy() # (B,num_seed,3) 46 | aggregated_vote_xyz = end_points['aggregated_vote_xyz'].detach().cpu().numpy() 47 | objectness_scores = end_points['objectness_scores'].detach().cpu().numpy() # (B,K,2) 48 | pred_center = end_points['center'].detach().cpu().numpy() # (B,K,3) 49 | pred_heading_class = torch.argmax(end_points['heading_scores'], -1) # B,num_proposal 50 | pred_heading_residual = torch.gather(end_points['heading_residuals'], 2, pred_heading_class.unsqueeze(-1)) # B,num_proposal,1 51 | pred_heading_class = pred_heading_class.detach().cpu().numpy() # B,num_proposal 52 | pred_heading_residual = pred_heading_residual.squeeze(2).detach().cpu().numpy() # B,num_proposal 53 | pred_size_class = torch.argmax(end_points['size_scores'], -1) # B,num_proposal 54 | pred_size_residual = torch.gather(end_points['size_residuals'], 2, pred_size_class.unsqueeze(-1).unsqueeze(-1).repeat(1,1,1,3)) # B,num_proposal,1,3 55 | pred_size_residual = pred_size_residual.squeeze(2).detach().cpu().numpy() # B,num_proposal,3 56 | 57 | # OTHERS 58 | pred_mask = end_points['pred_mask'] # B,num_proposal 59 | idx_beg = 0 60 | 61 | for i in range(batch_size): 62 | pc = point_clouds[i,:,:] 63 | objectness_prob = softmax(objectness_scores[i,:,:])[:,1] # (K,) 64 | 65 | # Dump various point clouds 66 | pc_util.write_ply(pc, os.path.join(dump_dir, '%06d_pc.ply'%(idx_beg+i))) 67 | pc_util.write_ply(seed_xyz[i,:,:], os.path.join(dump_dir, '%06d_seed_pc.ply'%(idx_beg+i))) 68 | if 'vote_xyz' in end_points: 69 | pc_util.write_ply(end_points['vote_xyz'][i,:,:], os.path.join(dump_dir, '%06d_vgen_pc.ply'%(idx_beg+i))) 70 | pc_util.write_ply(aggregated_vote_xyz[i,:,:], os.path.join(dump_dir, '%06d_aggregated_vote_pc.ply'%(idx_beg+i))) 71 | pc_util.write_ply(aggregated_vote_xyz[i,:,:], os.path.join(dump_dir, '%06d_aggregated_vote_pc.ply'%(idx_beg+i))) 72 | pc_util.write_ply(pred_center[i,:,0:3], os.path.join(dump_dir, '%06d_proposal_pc.ply'%(idx_beg+i))) 73 | if np.sum(objectness_prob>DUMP_CONF_THRESH)>0: 74 | pc_util.write_ply(pred_center[i,objectness_prob>DUMP_CONF_THRESH,0:3], os.path.join(dump_dir, '%06d_confident_proposal_pc.ply'%(idx_beg+i))) 75 | 76 | # Dump predicted bounding boxes 77 | if np.sum(objectness_prob>DUMP_CONF_THRESH)>0: 78 | num_proposal = pred_center.shape[1] 79 | obbs = [] 80 | for j in range(num_proposal): 81 | obb = config.param2obb(pred_center[i,j,0:3], pred_heading_class[i,j], pred_heading_residual[i,j], 82 | pred_size_class[i,j], pred_size_residual[i,j]) 83 | obbs.append(obb) 84 | if len(obbs)>0: 85 | obbs = np.vstack(tuple(obbs)) # (num_proposal, 7) 86 | pc_util.write_oriented_bbox(obbs[objectness_prob>DUMP_CONF_THRESH,:], os.path.join(dump_dir, '%06d_pred_confident_bbox.ply'%(idx_beg+i))) 87 | pc_util.write_oriented_bbox(obbs[np.logical_and(objectness_prob>DUMP_CONF_THRESH, pred_mask[i,:]==1),:], os.path.join(dump_dir, '%06d_pred_confident_nms_bbox.ply'%(idx_beg+i))) 88 | pc_util.write_oriented_bbox(obbs[pred_mask[i,:]==1,:], os.path.join(dump_dir, '%06d_pred_nms_bbox.ply'%(idx_beg+i))) 89 | pc_util.write_oriented_bbox(obbs, os.path.join(dump_dir, '%06d_pred_bbox.ply'%(idx_beg+i))) 90 | 91 | # Return if it is at inference time. No dumping of groundtruths 92 | if inference_switch: 93 | return 94 | 95 | # LABELS 96 | gt_center = end_points['center_label'].cpu().numpy() # (B,MAX_NUM_OBJ,3) 97 | gt_mask = end_points['box_label_mask'].cpu().numpy() # B,K2 98 | gt_heading_class = end_points['heading_class_label'].cpu().numpy() # B,K2 99 | gt_heading_residual = end_points['heading_residual_label'].cpu().numpy() # B,K2 100 | gt_size_class = end_points['size_class_label'].cpu().numpy() # B,K2 101 | gt_size_residual = end_points['size_residual_label'].cpu().numpy() # B,K2,3 102 | objectness_label = end_points['objectness_label'].detach().cpu().numpy() # (B,K,) 103 | objectness_mask = end_points['objectness_mask'].detach().cpu().numpy() # (B,K,) 104 | 105 | for i in range(batch_size): 106 | if np.sum(objectness_label[i,:])>0: 107 | pc_util.write_ply(pred_center[i,objectness_label[i,:]>0,0:3], os.path.join(dump_dir, '%06d_gt_positive_proposal_pc.ply'%(idx_beg+i))) 108 | if np.sum(objectness_mask[i,:])>0: 109 | pc_util.write_ply(pred_center[i,objectness_mask[i,:]>0,0:3], os.path.join(dump_dir, '%06d_gt_mask_proposal_pc.ply'%(idx_beg+i))) 110 | pc_util.write_ply(gt_center[i,:,0:3], os.path.join(dump_dir, '%06d_gt_centroid_pc.ply'%(idx_beg+i))) 111 | pc_util.write_ply_color(pred_center[i,:,0:3], objectness_label[i,:], os.path.join(dump_dir, '%06d_proposal_pc_objectness_label.obj'%(idx_beg+i))) 112 | 113 | # Dump GT bounding boxes 114 | obbs = [] 115 | for j in range(gt_center.shape[1]): 116 | if gt_mask[i,j] == 0: continue 117 | obb = config.param2obb(gt_center[i,j,0:3], gt_heading_class[i,j], gt_heading_residual[i,j], 118 | gt_size_class[i,j], gt_size_residual[i,j]) 119 | obbs.append(obb) 120 | if len(obbs)>0: 121 | obbs = np.vstack(tuple(obbs)) # (num_gt_objects, 7) 122 | pc_util.write_oriented_bbox(obbs, os.path.join(dump_dir, '%06d_gt_bbox.ply'%(idx_beg+i))) 123 | 124 | # OPTIONALL, also dump prediction and gt details 125 | if 'batch_pred_map_cls' in end_points: 126 | for ii in range(batch_size): 127 | fout = open(os.path.join(dump_dir, '%06d_pred_map_cls.txt'%(ii)), 'w') 128 | for t in end_points['batch_pred_map_cls'][ii]: 129 | fout.write(str(t[0])+' ') 130 | fout.write(",".join([str(x) for x in list(t[1].flatten())])) 131 | fout.write(' '+str(t[2])) 132 | fout.write('\n') 133 | fout.close() 134 | if 'batch_gt_map_cls' in end_points: 135 | for ii in range(batch_size): 136 | fout = open(os.path.join(dump_dir, '%06d_gt_map_cls.txt'%(ii)), 'w') 137 | for t in end_points['batch_gt_map_cls'][ii]: 138 | fout.write(str(t[0])+' ') 139 | fout.write(",".join([str(x) for x in list(t[1].flatten())])) 140 | fout.write('\n') 141 | fout.close() 142 | -------------------------------------------------------------------------------- /models/loss_helper_boxnet.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import torch 7 | import torch.nn as nn 8 | import numpy as np 9 | import sys 10 | import os 11 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 12 | ROOT_DIR = os.path.dirname(BASE_DIR) 13 | sys.path.append(os.path.join(ROOT_DIR, 'utils')) 14 | from nn_distance import nn_distance, huber_loss 15 | sys.path.append(BASE_DIR) 16 | from loss_helper import compute_box_and_sem_cls_loss 17 | 18 | OBJECTNESS_CLS_WEIGHTS = [0.2,0.8] # put larger weights on positive objectness 19 | 20 | def compute_objectness_loss(end_points): 21 | """ Compute objectness loss for the proposals. 22 | 23 | Args: 24 | end_points: dict (read-only) 25 | 26 | Returns: 27 | objectness_loss: scalar Tensor 28 | objectness_label: (batch_size, num_seed) Tensor with value 0 or 1 29 | objectness_mask: (batch_size, num_seed) Tensor with value 0 or 1 30 | object_assignment: (batch_size, num_seed) Tensor with long int 31 | within [0,num_gt_object-1] 32 | """ 33 | # Associate proposal and GT objects by point-to-point distances 34 | aggregated_vote_xyz = end_points['aggregated_vote_xyz'] 35 | gt_center = end_points['center_label'][:,:,0:3] 36 | B = gt_center.shape[0] 37 | K = aggregated_vote_xyz.shape[1] 38 | K2 = gt_center.shape[1] 39 | dist1, ind1, dist2, _ = nn_distance(aggregated_vote_xyz, gt_center) # dist1: BxK, dist2: BxK2 40 | 41 | # Generate objectness label and mask 42 | # NOTE: Different from VoteNet, here we use seed label as objectness label. 43 | seed_inds = end_points['seed_inds'].long() # B,num_seed in [0,num_points-1] 44 | seed_gt_votes_mask = torch.gather(end_points['vote_label_mask'], 1, seed_inds) 45 | end_points['seed_labels'] = seed_gt_votes_mask 46 | aggregated_vote_inds = end_points['aggregated_vote_inds'] 47 | objectness_label = torch.gather(end_points['seed_labels'], 1, aggregated_vote_inds.long()) # select (B,K) from (B,1024) 48 | objectness_mask = torch.ones((objectness_label.shape[0], objectness_label.shape[1])).cuda() # no ignore zone anymore 49 | 50 | # Compute objectness loss 51 | objectness_scores = end_points['objectness_scores'] 52 | criterion = nn.CrossEntropyLoss(torch.Tensor(OBJECTNESS_CLS_WEIGHTS).cuda(), reduction='none') 53 | objectness_loss = criterion(objectness_scores.transpose(2,1), objectness_label) 54 | objectness_loss = torch.sum(objectness_loss * objectness_mask)/(torch.sum(objectness_mask)+1e-6) 55 | 56 | # Set assignment 57 | object_assignment = ind1 # (B,K) with values in 0,1,...,K2-1 58 | 59 | return objectness_loss, objectness_label, objectness_mask, object_assignment 60 | 61 | 62 | def get_loss(end_points, config): 63 | """ Loss functions 64 | 65 | Args: 66 | end_points: dict 67 | { 68 | seed_xyz, seed_inds, 69 | center, 70 | heading_scores, heading_residuals_normalized, 71 | size_scores, size_residuals_normalized, 72 | sem_cls_scores, #seed_logits,# 73 | center_label, 74 | heading_class_label, heading_residual_label, 75 | size_class_label, size_residual_label, 76 | sem_cls_label, 77 | box_label_mask, 78 | vote_label, vote_label_mask 79 | } 80 | config: dataset config instance 81 | Returns: 82 | loss: pytorch scalar tensor 83 | end_points: dict 84 | """ 85 | 86 | # Obj loss 87 | objectness_loss, objectness_label, objectness_mask, object_assignment = \ 88 | compute_objectness_loss(end_points) 89 | end_points['objectness_loss'] = objectness_loss 90 | end_points['objectness_label'] = objectness_label 91 | end_points['objectness_mask'] = objectness_mask 92 | end_points['object_assignment'] = object_assignment 93 | total_num_proposal = objectness_label.shape[0]*objectness_label.shape[1] 94 | end_points['pos_ratio'] = \ 95 | torch.sum(objectness_label.float().cuda())/float(total_num_proposal) 96 | end_points['neg_ratio'] = \ 97 | torch.sum(objectness_mask.float())/float(total_num_proposal) - end_points['pos_ratio'] 98 | 99 | # Box loss and sem cls loss 100 | center_loss, heading_cls_loss, heading_reg_loss, size_cls_loss, size_reg_loss, sem_cls_loss = \ 101 | compute_box_and_sem_cls_loss(end_points, config) 102 | end_points['center_loss'] = center_loss 103 | end_points['heading_cls_loss'] = heading_cls_loss 104 | end_points['heading_reg_loss'] = heading_reg_loss 105 | end_points['size_cls_loss'] = size_cls_loss 106 | end_points['size_reg_loss'] = size_reg_loss 107 | end_points['sem_cls_loss'] = sem_cls_loss 108 | box_loss = center_loss + 0.1*heading_cls_loss + heading_reg_loss + 0.1*size_cls_loss + size_reg_loss 109 | end_points['box_loss'] = box_loss 110 | 111 | # Final loss function 112 | loss = 0.5*objectness_loss + box_loss + 0.1*sem_cls_loss 113 | loss *= 10 114 | end_points['loss'] = loss 115 | 116 | # -------------------------------------------- 117 | # Some other statistics 118 | obj_pred_val = torch.argmax(end_points['objectness_scores'], 2) # B,K 119 | obj_acc = torch.sum((obj_pred_val==objectness_label.long()).float()*objectness_mask)/(torch.sum(objectness_mask)+1e-6) 120 | end_points['obj_acc'] = obj_acc 121 | 122 | return loss, end_points 123 | -------------------------------------------------------------------------------- /models/proposal_module.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | import numpy as np 10 | import os 11 | import sys 12 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 13 | ROOT_DIR = os.path.dirname(BASE_DIR) 14 | sys.path.append(os.path.join(ROOT_DIR, 'pointnet2')) 15 | from pointnet2_modules import PointnetSAModuleVotes 16 | import pointnet2_utils 17 | 18 | def decode_scores(net, end_points, num_class, num_heading_bin, num_size_cluster, mean_size_arr): 19 | net_transposed = net.transpose(2,1) # (batch_size, 1024, ..) 20 | batch_size = net_transposed.shape[0] 21 | num_proposal = net_transposed.shape[1] 22 | 23 | objectness_scores = net_transposed[:,:,0:2] 24 | end_points['objectness_scores'] = objectness_scores 25 | 26 | base_xyz = end_points['aggregated_vote_xyz'] # (batch_size, num_proposal, 3) 27 | center = base_xyz + net_transposed[:,:,2:5] # (batch_size, num_proposal, 3) 28 | end_points['center'] = center 29 | 30 | heading_scores = net_transposed[:,:,5:5+num_heading_bin] 31 | heading_residuals_normalized = net_transposed[:,:,5+num_heading_bin:5+num_heading_bin*2] 32 | end_points['heading_scores'] = heading_scores # Bxnum_proposalxnum_heading_bin 33 | end_points['heading_residuals_normalized'] = heading_residuals_normalized # Bxnum_proposalxnum_heading_bin (should be -1 to 1) 34 | end_points['heading_residuals'] = heading_residuals_normalized * (np.pi/num_heading_bin) # Bxnum_proposalxnum_heading_bin 35 | 36 | size_scores = net_transposed[:,:,5+num_heading_bin*2:5+num_heading_bin*2+num_size_cluster] 37 | size_residuals_normalized = net_transposed[:,:,5+num_heading_bin*2+num_size_cluster:5+num_heading_bin*2+num_size_cluster*4].view([batch_size, num_proposal, num_size_cluster, 3]) # Bxnum_proposalxnum_size_clusterx3 38 | end_points['size_scores'] = size_scores 39 | end_points['size_residuals_normalized'] = size_residuals_normalized 40 | end_points['size_residuals'] = size_residuals_normalized * torch.from_numpy(mean_size_arr.astype(np.float32)).cuda().unsqueeze(0).unsqueeze(0) 41 | 42 | sem_cls_scores = net_transposed[:,:,5+num_heading_bin*2+num_size_cluster*4:] # Bxnum_proposalx10 43 | end_points['sem_cls_scores'] = sem_cls_scores 44 | return end_points 45 | 46 | 47 | class ProposalModule(nn.Module): 48 | def __init__(self, num_class, num_heading_bin, num_size_cluster, mean_size_arr, num_proposal, sampling, seed_feat_dim=256): 49 | super().__init__() 50 | 51 | self.num_class = num_class 52 | self.num_heading_bin = num_heading_bin 53 | self.num_size_cluster = num_size_cluster 54 | self.mean_size_arr = mean_size_arr 55 | self.num_proposal = num_proposal 56 | self.sampling = sampling 57 | self.seed_feat_dim = seed_feat_dim 58 | 59 | # Vote clustering 60 | self.vote_aggregation = PointnetSAModuleVotes( 61 | npoint=self.num_proposal, 62 | radius=0.3, 63 | nsample=16, 64 | mlp=[self.seed_feat_dim, 128, 128, 128], 65 | use_xyz=True, 66 | normalize_xyz=True 67 | ) 68 | 69 | # Object proposal/detection 70 | # Objectness scores (2), center residual (3), 71 | # heading class+residual (num_heading_bin*2), size class+residual(num_size_cluster*4) 72 | self.conv1 = torch.nn.Conv1d(128,128,1) 73 | self.conv2 = torch.nn.Conv1d(128,128,1) 74 | self.conv3 = torch.nn.Conv1d(128,2+3+num_heading_bin*2+num_size_cluster*4+self.num_class,1) 75 | self.bn1 = torch.nn.BatchNorm1d(128) 76 | self.bn2 = torch.nn.BatchNorm1d(128) 77 | 78 | def forward(self, xyz, features, end_points): 79 | """ 80 | Args: 81 | xyz: (B,K,3) 82 | features: (B,C,K) 83 | Returns: 84 | scores: (B,num_proposal,2+3+NH*2+NS*4) 85 | """ 86 | if self.sampling == 'vote_fps': 87 | # Farthest point sampling (FPS) on votes 88 | xyz, features, fps_inds = self.vote_aggregation(xyz, features) 89 | sample_inds = fps_inds 90 | elif self.sampling == 'seed_fps': 91 | # FPS on seed and choose the votes corresponding to the seeds 92 | # This gets us a slightly better coverage of *object* votes than vote_fps (which tends to get more cluster votes) 93 | sample_inds = pointnet2_utils.furthest_point_sample(end_points['seed_xyz'], self.num_proposal) 94 | xyz, features, _ = self.vote_aggregation(xyz, features, sample_inds) 95 | elif self.sampling == 'random': 96 | # Random sampling from the votes 97 | num_seed = end_points['seed_xyz'].shape[1] 98 | batch_size = end_points['seed_xyz'].shape[0] 99 | sample_inds = torch.randint(0, num_seed, (batch_size, self.num_proposal), dtype=torch.int).cuda() 100 | xyz, features, _ = self.vote_aggregation(xyz, features, sample_inds) 101 | else: 102 | log_string('Unknown sampling strategy: %s. Exiting!'%(self.sampling)) 103 | exit() 104 | end_points['aggregated_vote_xyz'] = xyz # (batch_size, num_proposal, 3) 105 | end_points['aggregated_vote_inds'] = sample_inds # (batch_size, num_proposal,) # should be 0,1,2,...,num_proposal 106 | 107 | # --------- PROPOSAL GENERATION --------- 108 | net = F.relu(self.bn1(self.conv1(features))) 109 | net = F.relu(self.bn2(self.conv2(net))) 110 | net = self.conv3(net) # (batch_size, 2+3+num_heading_bin*2+num_size_cluster*4, num_proposal) 111 | 112 | end_points = decode_scores(net, end_points, self.num_class, self.num_heading_bin, self.num_size_cluster, self.mean_size_arr) 113 | return end_points 114 | 115 | if __name__=='__main__': 116 | sys.path.append(os.path.join(ROOT_DIR, 'sunrgbd')) 117 | from sunrgbd_detection_dataset import SunrgbdDetectionVotesDataset, DC 118 | net = ProposalModule(DC.num_class, DC.num_heading_bin, 119 | DC.num_size_cluster, DC.mean_size_arr, 120 | 128, 'seed_fps').cuda() 121 | end_points = {'seed_xyz': torch.rand(8,1024,3).cuda()} 122 | out = net(torch.rand(8,1024,3).cuda(), torch.rand(8,256,1024).cuda(), end_points) 123 | for key in out: 124 | print(key, out[key].shape) 125 | -------------------------------------------------------------------------------- /models/votenet.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | """ Deep hough voting network for 3D object detection in point clouds. 7 | 8 | Author: Charles R. Qi and Or Litany 9 | """ 10 | 11 | import torch 12 | import torch.nn as nn 13 | import numpy as np 14 | import sys 15 | import os 16 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 17 | ROOT_DIR = os.path.dirname(BASE_DIR) 18 | sys.path.append(BASE_DIR) 19 | from backbone_module import Pointnet2Backbone 20 | from voting_module import VotingModule 21 | from proposal_module import ProposalModule 22 | from dump_helper import dump_results 23 | from loss_helper import get_loss 24 | 25 | 26 | class VoteNet(nn.Module): 27 | r""" 28 | A deep neural network for 3D object detection with end-to-end optimizable hough voting. 29 | 30 | Parameters 31 | ---------- 32 | num_class: int 33 | Number of semantics classes to predict over -- size of softmax classifier 34 | num_heading_bin: int 35 | num_size_cluster: int 36 | input_feature_dim: (default: 0) 37 | Input dim in the feature descriptor for each point. If the point cloud is Nx9, this 38 | value should be 6 as in an Nx9 point cloud, 3 of the channels are xyz, and 6 are feature descriptors 39 | num_proposal: int (default: 128) 40 | Number of proposals/detections generated from the network. Each proposal is a 3D OBB with a semantic class. 41 | vote_factor: (default: 1) 42 | Number of votes generated from each seed point. 43 | """ 44 | 45 | def __init__(self, num_class, num_heading_bin, num_size_cluster, mean_size_arr, 46 | input_feature_dim=0, num_proposal=128, vote_factor=1, sampling='vote_fps'): 47 | super().__init__() 48 | 49 | self.num_class = num_class 50 | self.num_heading_bin = num_heading_bin 51 | self.num_size_cluster = num_size_cluster 52 | self.mean_size_arr = mean_size_arr 53 | assert(mean_size_arr.shape[0] == self.num_size_cluster) 54 | self.input_feature_dim = input_feature_dim 55 | self.num_proposal = num_proposal 56 | self.vote_factor = vote_factor 57 | self.sampling=sampling 58 | 59 | # Backbone point feature learning 60 | self.backbone_net = Pointnet2Backbone(input_feature_dim=self.input_feature_dim) 61 | 62 | # Hough voting 63 | self.vgen = VotingModule(self.vote_factor, 256) 64 | 65 | # Vote aggregation and detection 66 | self.pnet = ProposalModule(num_class, num_heading_bin, num_size_cluster, 67 | mean_size_arr, num_proposal, sampling) 68 | 69 | def forward(self, inputs): 70 | """ Forward pass of the network 71 | 72 | Args: 73 | inputs: dict 74 | {point_clouds} 75 | 76 | point_clouds: Variable(torch.cuda.FloatTensor) 77 | (B, N, 3 + input_channels) tensor 78 | Point cloud to run predicts on 79 | Each point in the point-cloud MUST 80 | be formated as (x, y, z, features...) 81 | Returns: 82 | end_points: dict 83 | """ 84 | end_points = {} 85 | batch_size = inputs['point_clouds'].shape[0] 86 | 87 | end_points = self.backbone_net(inputs['point_clouds'], end_points) 88 | 89 | # --------- HOUGH VOTING --------- 90 | xyz = end_points['fp2_xyz'] 91 | features = end_points['fp2_features'] 92 | end_points['seed_inds'] = end_points['fp2_inds'] 93 | end_points['seed_xyz'] = xyz 94 | end_points['seed_features'] = features 95 | 96 | xyz, features = self.vgen(xyz, features) 97 | features_norm = torch.norm(features, p=2, dim=1) 98 | features = features.div(features_norm.unsqueeze(1)) 99 | end_points['vote_xyz'] = xyz 100 | end_points['vote_features'] = features 101 | 102 | end_points = self.pnet(xyz, features, end_points) 103 | 104 | return end_points 105 | 106 | 107 | if __name__=='__main__': 108 | sys.path.append(os.path.join(ROOT_DIR, 'sunrgbd')) 109 | from sunrgbd_detection_dataset import SunrgbdDetectionVotesDataset, DC 110 | from loss_helper import get_loss 111 | 112 | # Define model 113 | model = VoteNet(10,12,10,np.random.random((10,3))).cuda() 114 | 115 | try: 116 | # Define dataset 117 | TRAIN_DATASET = SunrgbdDetectionVotesDataset('train', num_points=20000, use_v1=True) 118 | 119 | # Model forward pass 120 | sample = TRAIN_DATASET[5] 121 | inputs = {'point_clouds': torch.from_numpy(sample['point_clouds']).unsqueeze(0).cuda()} 122 | except: 123 | print('Dataset has not been prepared. Use a random sample.') 124 | inputs = {'point_clouds': torch.rand((20000,3)).unsqueeze(0).cuda()} 125 | 126 | end_points = model(inputs) 127 | for key in end_points: 128 | print(key, end_points[key]) 129 | 130 | try: 131 | # Compute loss 132 | for key in sample: 133 | end_points[key] = torch.from_numpy(sample[key]).unsqueeze(0).cuda() 134 | loss, end_points = get_loss(end_points, DC) 135 | print('loss', loss) 136 | end_points['point_clouds'] = inputs['point_clouds'] 137 | end_points['pred_mask'] = np.ones((1,128)) 138 | dump_results(end_points, 'tmp', DC) 139 | except: 140 | print('Dataset has not been prepared. Skip loss and dump.') 141 | -------------------------------------------------------------------------------- /models/votenet_SA2_denseaspp3_6.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | """ Deep hough voting network for 3D object detection in point clouds. 7 | 8 | Author: Charles R. Qi and Or Litany 9 | """ 10 | 11 | import torch 12 | import torch.nn as nn 13 | import numpy as np 14 | import sys 15 | import os 16 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 17 | ROOT_DIR = os.path.dirname(BASE_DIR) 18 | sys.path.append(BASE_DIR) 19 | from backbone_module_SA2_denseaspp3_6 import Pointnet2Backbone 20 | from voting_module import VotingModule 21 | from proposal_module import ProposalModule 22 | from dump_helper import dump_results 23 | from loss_helper import get_loss 24 | 25 | 26 | class VoteNet(nn.Module): 27 | r""" 28 | A deep neural network for 3D object detection with end-to-end optimizable hough voting. 29 | 30 | Parameters 31 | ---------- 32 | num_class: int 33 | Number of semantics classes to predict over -- size of softmax classifier 34 | num_heading_bin: int 35 | num_size_cluster: int 36 | input_feature_dim: (default: 0) 37 | Input dim in the feature descriptor for each point. If the point cloud is Nx9, this 38 | value should be 6 as in an Nx9 point cloud, 3 of the channels are xyz, and 6 are feature descriptors 39 | num_proposal: int (default: 128) 40 | Number of proposals/detections generated from the network. Each proposal is a 3D OBB with a semantic class. 41 | vote_factor: (default: 1) 42 | Number of votes generated from each seed point. 43 | """ 44 | 45 | def __init__(self, num_class, num_heading_bin, num_size_cluster, mean_size_arr, 46 | input_feature_dim=0, num_proposal=128, vote_factor=1, sampling='vote_fps'): 47 | super().__init__() 48 | 49 | self.num_class = num_class 50 | self.num_heading_bin = num_heading_bin 51 | self.num_size_cluster = num_size_cluster 52 | self.mean_size_arr = mean_size_arr 53 | assert(mean_size_arr.shape[0] == self.num_size_cluster) 54 | self.input_feature_dim = input_feature_dim 55 | self.num_proposal = num_proposal 56 | self.vote_factor = vote_factor 57 | self.sampling=sampling 58 | 59 | 60 | # Backbone point feature learning 61 | self.backbone_net = Pointnet2Backbone(input_feature_dim=self.input_feature_dim) 62 | 63 | # Hough voting 64 | self.vgen = VotingModule(self.vote_factor, 256+256+256) 65 | 66 | # Vote aggregation and detection 67 | self.pnet = ProposalModule(num_class, num_heading_bin, num_size_cluster, 68 | mean_size_arr, num_proposal, sampling,seed_feat_dim=256+256+256) 69 | 70 | def forward(self, inputs): 71 | """ Forward pass of the network 72 | 73 | Args: 74 | inputs: dict 75 | {point_clouds} 76 | 77 | point_clouds: Variable(torch.cuda.FloatTensor) 78 | (B, N, 3 + input_channels) tensor 79 | Point cloud to run predicts on 80 | Each point in the point-cloud MUST 81 | be formated as (x, y, z, features...) 82 | Returns: 83 | end_points: dict 84 | """ 85 | end_points = {} 86 | batch_size = inputs['point_clouds'].shape[0] 87 | 88 | end_points = self.backbone_net(inputs['point_clouds'], end_points) 89 | 90 | # --------- HOUGH VOTING --------- 91 | xyz = end_points['fp2_xyz'] 92 | features = end_points['fp2_features'] 93 | end_points['seed_inds'] = end_points['fp2_inds'] 94 | end_points['seed_xyz'] = xyz 95 | end_points['seed_features'] = features 96 | 97 | xyz, features = self.vgen(xyz, features) 98 | features_norm = torch.norm(features, p=2, dim=1) 99 | features = features.div(features_norm.unsqueeze(1)) 100 | end_points['vote_xyz'] = xyz 101 | end_points['vote_features'] = features 102 | 103 | end_points = self.pnet(xyz, features, end_points) 104 | 105 | return end_points 106 | 107 | 108 | if __name__=='__main__': 109 | sys.path.append(os.path.join(ROOT_DIR, 'sunrgbd')) 110 | from sunrgbd_detection_dataset import SunrgbdDetectionVotesDataset, DC 111 | from loss_helper import get_loss 112 | 113 | # Define model 114 | model = VoteNet(10,12,10,np.random.random((10,3))).cuda() 115 | 116 | try: 117 | # Define dataset 118 | TRAIN_DATASET = SunrgbdDetectionVotesDataset('train', num_points=20000, use_v1=True) 119 | 120 | # Model forward pass 121 | sample = TRAIN_DATASET[5] 122 | inputs = {'point_clouds': torch.from_numpy(sample['point_clouds']).unsqueeze(0).cuda()} 123 | except: 124 | print('Dataset has not been prepared. Use a random sample.') 125 | inputs = {'point_clouds': torch.rand((20000,3)).unsqueeze(0).cuda()} 126 | 127 | end_points = model(inputs) 128 | for key in end_points: 129 | print(key, end_points[key]) 130 | 131 | try: 132 | # Compute loss 133 | for key in sample: 134 | end_points[key] = torch.from_numpy(sample[key]).unsqueeze(0).cuda() 135 | loss, end_points = get_loss(end_points, DC) 136 | print('loss', loss) 137 | end_points['point_clouds'] = inputs['point_clouds'] 138 | end_points['pred_mask'] = np.ones((1,128)) 139 | dump_results(end_points, 'tmp', DC) 140 | except: 141 | print('Dataset has not been prepared. Skip loss and dump.') 142 | -------------------------------------------------------------------------------- /models/votenet_SA2_denseaspp3_6_12.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | """ Deep hough voting network for 3D object detection in point clouds. 7 | 8 | Author: Charles R. Qi and Or Litany 9 | """ 10 | 11 | import torch 12 | import torch.nn as nn 13 | import numpy as np 14 | import sys 15 | import os 16 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 17 | ROOT_DIR = os.path.dirname(BASE_DIR) 18 | sys.path.append(BASE_DIR) 19 | from backbone_module_SA2_denseaspp3_6_12 import Pointnet2Backbone 20 | from voting_module import VotingModule 21 | from proposal_module import ProposalModule 22 | from dump_helper import dump_results 23 | from loss_helper import get_loss 24 | 25 | 26 | class VoteNet(nn.Module): 27 | r""" 28 | A deep neural network for 3D object detection with end-to-end optimizable hough voting. 29 | 30 | Parameters 31 | ---------- 32 | num_class: int 33 | Number of semantics classes to predict over -- size of softmax classifier 34 | num_heading_bin: int 35 | num_size_cluster: int 36 | input_feature_dim: (default: 0) 37 | Input dim in the feature descriptor for each point. If the point cloud is Nx9, this 38 | value should be 6 as in an Nx9 point cloud, 3 of the channels are xyz, and 6 are feature descriptors 39 | num_proposal: int (default: 128) 40 | Number of proposals/detections generated from the network. Each proposal is a 3D OBB with a semantic class. 41 | vote_factor: (default: 1) 42 | Number of votes generated from each seed point. 43 | """ 44 | 45 | def __init__(self, num_class, num_heading_bin, num_size_cluster, mean_size_arr, 46 | input_feature_dim=0, num_proposal=128, vote_factor=1, sampling='vote_fps'): 47 | super().__init__() 48 | 49 | self.num_class = num_class 50 | self.num_heading_bin = num_heading_bin 51 | self.num_size_cluster = num_size_cluster 52 | self.mean_size_arr = mean_size_arr 53 | assert(mean_size_arr.shape[0] == self.num_size_cluster) 54 | self.input_feature_dim = input_feature_dim 55 | self.num_proposal = num_proposal 56 | self.vote_factor = vote_factor 57 | self.sampling=sampling 58 | 59 | # Backbone point feature learning 60 | self.backbone_net = Pointnet2Backbone(input_feature_dim=self.input_feature_dim) 61 | 62 | # Hough voting 63 | self.vgen = VotingModule(self.vote_factor, 256+256+256+256) 64 | 65 | # Vote aggregation and detection 66 | self.pnet = ProposalModule(num_class, num_heading_bin, num_size_cluster, 67 | mean_size_arr, num_proposal, sampling,seed_feat_dim=256+256+256+256) 68 | 69 | def forward(self, inputs): 70 | """ Forward pass of the network 71 | 72 | Args: 73 | inputs: dict 74 | {point_clouds} 75 | 76 | point_clouds: Variable(torch.cuda.FloatTensor) 77 | (B, N, 3 + input_channels) tensor 78 | Point cloud to run predicts on 79 | Each point in the point-cloud MUST 80 | be formated as (x, y, z, features...) 81 | Returns: 82 | end_points: dict 83 | """ 84 | end_points = {} 85 | batch_size = inputs['point_clouds'].shape[0] 86 | 87 | end_points = self.backbone_net(inputs['point_clouds'], end_points) 88 | 89 | # --------- HOUGH VOTING --------- 90 | xyz = end_points['fp2_xyz'] 91 | features = end_points['fp2_features'] 92 | end_points['seed_inds'] = end_points['fp2_inds'] 93 | end_points['seed_xyz'] = xyz 94 | end_points['seed_features'] = features 95 | 96 | xyz, features = self.vgen(xyz, features) 97 | features_norm = torch.norm(features, p=2, dim=1) 98 | features = features.div(features_norm.unsqueeze(1)) 99 | end_points['vote_xyz'] = xyz 100 | end_points['vote_features'] = features 101 | 102 | end_points = self.pnet(xyz, features, end_points) 103 | del xyz,features 104 | 105 | return end_points 106 | 107 | 108 | if __name__=='__main__': 109 | sys.path.append(os.path.join(ROOT_DIR, 'sunrgbd')) 110 | from sunrgbd_detection_dataset import SunrgbdDetectionVotesDataset, DC 111 | from loss_helper import get_loss 112 | 113 | # Define model 114 | model = VoteNet(10,12,10,np.random.random((10,3))).cuda() 115 | 116 | try: 117 | # Define dataset 118 | TRAIN_DATASET = SunrgbdDetectionVotesDataset('train', num_points=20000, use_v1=True) 119 | 120 | # Model forward pass 121 | sample = TRAIN_DATASET[5] 122 | inputs = {'point_clouds': torch.from_numpy(sample['point_clouds']).unsqueeze(0).cuda()} 123 | except: 124 | print('Dataset has not been prepared. Use a random sample.') 125 | inputs = {'point_clouds': torch.rand((20000,3)).unsqueeze(0).cuda()} 126 | 127 | end_points = model(inputs) 128 | for key in end_points: 129 | print(key, end_points[key]) 130 | 131 | try: 132 | # Compute loss 133 | for key in sample: 134 | end_points[key] = torch.from_numpy(sample[key]).unsqueeze(0).cuda() 135 | loss, end_points = get_loss(end_points, DC) 136 | print('loss', loss) 137 | end_points['point_clouds'] = inputs['point_clouds'] 138 | end_points['pred_mask'] = np.ones((1,128)) 139 | dump_results(end_points, 'tmp', DC) 140 | except: 141 | print('Dataset has not been prepared. Skip loss and dump.') 142 | -------------------------------------------------------------------------------- /models/votenet_enc_FP2_K8_G12_C3.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | """ Deep hough voting network for 3D object detection in point clouds. 7 | 8 | Author: Charles R. Qi and Or Litany 9 | """ 10 | 11 | import torch 12 | import torch.nn as nn 13 | import numpy as np 14 | import sys 15 | import os 16 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 17 | ROOT_DIR = os.path.dirname(BASE_DIR) 18 | sys.path.append(BASE_DIR) 19 | from backbone_module_enc_FP2_K8_G12_C3 import Pointnet2Backbone 20 | from voting_module import VotingModule 21 | from proposal_module import ProposalModule 22 | from dump_helper import dump_results 23 | from loss_helper import get_loss 24 | 25 | 26 | class VoteNet(nn.Module): 27 | r""" 28 | A deep neural network for 3D object detection with end-to-end optimizable hough voting. 29 | 30 | Parameters 31 | ---------- 32 | num_class: int 33 | Number of semantics classes to predict over -- size of softmax classifier 34 | num_heading_bin: int 35 | num_size_cluster: int 36 | input_feature_dim: (default: 0) 37 | Input dim in the feature descriptor for each point. If the point cloud is Nx9, this 38 | value should be 6 as in an Nx9 point cloud, 3 of the channels are xyz, and 6 are feature descriptors 39 | num_proposal: int (default: 128) 40 | Number of proposals/detections generated from the network. Each proposal is a 3D OBB with a semantic class. 41 | vote_factor: (default: 1) 42 | Number of votes generated from each seed point. 43 | """ 44 | 45 | def __init__(self, num_class, num_heading_bin, num_size_cluster, mean_size_arr, 46 | input_feature_dim=0, num_proposal=128, vote_factor=1, sampling='vote_fps'): 47 | super().__init__() 48 | 49 | self.num_class = num_class 50 | self.num_heading_bin = num_heading_bin 51 | self.num_size_cluster = num_size_cluster 52 | self.mean_size_arr = mean_size_arr 53 | assert(mean_size_arr.shape[0] == self.num_size_cluster) 54 | self.input_feature_dim = input_feature_dim 55 | self.num_proposal = num_proposal 56 | self.vote_factor = vote_factor 57 | self.sampling=sampling 58 | 59 | # Backbone point feature learning 60 | self.backbone_net = Pointnet2Backbone(input_feature_dim=self.input_feature_dim) 61 | 62 | # Hough voting 63 | self.vgen = VotingModule(self.vote_factor, 256+256+256) 64 | 65 | # Vote aggregation and detection 66 | self.pnet = ProposalModule(num_class, num_heading_bin, num_size_cluster, 67 | mean_size_arr, num_proposal, sampling, seed_feat_dim=256+256+256) 68 | 69 | def forward(self, inputs): 70 | """ Forward pass of the network 71 | 72 | Args: 73 | inputs: dict 74 | {point_clouds} 75 | 76 | point_clouds: Variable(torch.cuda.FloatTensor) 77 | (B, N, 3 + input_channels) tensor 78 | Point cloud to run predicts on 79 | Each point in the point-cloud MUST 80 | be formated as (x, y, z, features...) 81 | Returns: 82 | end_points: dict 83 | """ 84 | end_points = {} 85 | batch_size = inputs['point_clouds'].shape[0] 86 | 87 | end_points = self.backbone_net(inputs['point_clouds'], end_points) 88 | 89 | # --------- HOUGH VOTING --------- 90 | xyz = end_points['fp2_xyz'] 91 | features = end_points['fp2_features'] 92 | end_points['seed_inds'] = end_points['fp2_inds'] 93 | end_points['seed_xyz'] = xyz 94 | end_points['seed_features'] = features 95 | 96 | xyz, features = self.vgen(xyz, features) 97 | features_norm = torch.norm(features, p=2, dim=1) 98 | features = features.div(features_norm.unsqueeze(1)) 99 | end_points['vote_xyz'] = xyz 100 | end_points['vote_features'] = features 101 | 102 | end_points = self.pnet(xyz, features, end_points) 103 | 104 | return end_points 105 | 106 | 107 | if __name__=='__main__': 108 | sys.path.append(os.path.join(ROOT_DIR, 'sunrgbd')) 109 | from sunrgbd_detection_dataset import SunrgbdDetectionVotesDataset, DC 110 | from loss_helper import get_loss 111 | 112 | # Define model 113 | model = VoteNet(10,12,10,np.random.random((10,3))).cuda() 114 | 115 | try: 116 | # Define dataset 117 | TRAIN_DATASET = SunrgbdDetectionVotesDataset('train', num_points=20000, use_v1=True) 118 | 119 | # Model forward pass 120 | sample = TRAIN_DATASET[5] 121 | inputs = {'point_clouds': torch.from_numpy(sample['point_clouds']).unsqueeze(0).cuda()} 122 | except: 123 | print('Dataset has not been prepared. Use a random sample.') 124 | inputs = {'point_clouds': torch.rand((20000,3)).unsqueeze(0).cuda()} 125 | 126 | end_points = model(inputs) 127 | for key in end_points: 128 | print(key, end_points[key]) 129 | 130 | try: 131 | # Compute loss 132 | for key in sample: 133 | end_points[key] = torch.from_numpy(sample[key]).unsqueeze(0).cuda() 134 | loss, end_points = get_loss(end_points, DC) 135 | print('loss', loss) 136 | end_points['point_clouds'] = inputs['point_clouds'] 137 | end_points['pred_mask'] = np.ones((1,128)) 138 | dump_results(end_points, 'tmp', DC) 139 | except: 140 | print('Dataset has not been prepared. Skip loss and dump.') 141 | -------------------------------------------------------------------------------- /models/votenet_enc_complex_FP2_K8_G12_C3.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | """ Deep hough voting network for 3D object detection in point clouds. 7 | 8 | Author: Charles R. Qi and Or Litany 9 | """ 10 | 11 | import torch 12 | import torch.nn as nn 13 | import numpy as np 14 | import sys 15 | import os 16 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 17 | ROOT_DIR = os.path.dirname(BASE_DIR) 18 | sys.path.append(BASE_DIR) 19 | from backbone_module_enc_complex_FP2_K8_G12_C3 import Pointnet2Backbone 20 | from voting_module import VotingModule 21 | from proposal_module import ProposalModule 22 | from dump_helper import dump_results 23 | from loss_helper import get_loss 24 | 25 | 26 | class VoteNet(nn.Module): 27 | r""" 28 | A deep neural network for 3D object detection with end-to-end optimizable hough voting. 29 | 30 | Parameters 31 | ---------- 32 | num_class: int 33 | Number of semantics classes to predict over -- size of softmax classifier 34 | num_heading_bin: int 35 | num_size_cluster: int 36 | input_feature_dim: (default: 0) 37 | Input dim in the feature descriptor for each point. If the point cloud is Nx9, this 38 | value should be 6 as in an Nx9 point cloud, 3 of the channels are xyz, and 6 are feature descriptors 39 | num_proposal: int (default: 128) 40 | Number of proposals/detections generated from the network. Each proposal is a 3D OBB with a semantic class. 41 | vote_factor: (default: 1) 42 | Number of votes generated from each seed point. 43 | """ 44 | 45 | def __init__(self, num_class, num_heading_bin, num_size_cluster, mean_size_arr, 46 | input_feature_dim=0, num_proposal=128, vote_factor=1, sampling='vote_fps'): 47 | super().__init__() 48 | 49 | self.num_class = num_class 50 | self.num_heading_bin = num_heading_bin 51 | self.num_size_cluster = num_size_cluster 52 | self.mean_size_arr = mean_size_arr 53 | assert(mean_size_arr.shape[0] == self.num_size_cluster) 54 | self.input_feature_dim = input_feature_dim 55 | self.num_proposal = num_proposal 56 | self.vote_factor = vote_factor 57 | self.sampling=sampling 58 | 59 | # Backbone point feature learning 60 | self.backbone_net = Pointnet2Backbone(input_feature_dim=self.input_feature_dim) 61 | 62 | # Hough voting 63 | self.vgen = VotingModule(self.vote_factor, 256+256+256) 64 | 65 | # Vote aggregation and detection 66 | self.pnet = ProposalModule(num_class, num_heading_bin, num_size_cluster, 67 | mean_size_arr, num_proposal, sampling, seed_feat_dim=256+256+256) 68 | 69 | def forward(self, inputs): 70 | """ Forward pass of the network 71 | 72 | Args: 73 | inputs: dict 74 | {point_clouds} 75 | 76 | point_clouds: Variable(torch.cuda.FloatTensor) 77 | (B, N, 3 + input_channels) tensor 78 | Point cloud to run predicts on 79 | Each point in the point-cloud MUST 80 | be formated as (x, y, z, features...) 81 | Returns: 82 | end_points: dict 83 | """ 84 | end_points = {} 85 | batch_size = inputs['point_clouds'].shape[0] 86 | 87 | end_points = self.backbone_net(inputs['point_clouds'], end_points) 88 | 89 | # --------- HOUGH VOTING --------- 90 | xyz = end_points['fp2_xyz'] 91 | features = end_points['fp2_features'] 92 | end_points['seed_inds'] = end_points['fp2_inds'] 93 | end_points['seed_xyz'] = xyz 94 | end_points['seed_features'] = features 95 | 96 | xyz, features = self.vgen(xyz, features) 97 | features_norm = torch.norm(features, p=2, dim=1) 98 | features = features.div(features_norm.unsqueeze(1)) 99 | end_points['vote_xyz'] = xyz 100 | end_points['vote_features'] = features 101 | 102 | end_points = self.pnet(xyz, features, end_points) 103 | 104 | return end_points 105 | 106 | 107 | if __name__=='__main__': 108 | sys.path.append(os.path.join(ROOT_DIR, 'sunrgbd')) 109 | from sunrgbd_detection_dataset import SunrgbdDetectionVotesDataset, DC 110 | from loss_helper import get_loss 111 | 112 | # Define model 113 | model = VoteNet(10,12,10,np.random.random((10,3))).cuda() 114 | 115 | try: 116 | # Define dataset 117 | TRAIN_DATASET = SunrgbdDetectionVotesDataset('train', num_points=20000, use_v1=True) 118 | 119 | # Model forward pass 120 | sample = TRAIN_DATASET[5] 121 | inputs = {'point_clouds': torch.from_numpy(sample['point_clouds']).unsqueeze(0).cuda()} 122 | except: 123 | print('Dataset has not been prepared. Use a random sample.') 124 | inputs = {'point_clouds': torch.rand((20000,3)).unsqueeze(0).cuda()} 125 | 126 | end_points = model(inputs) 127 | for key in end_points: 128 | print(key, end_points[key]) 129 | 130 | try: 131 | # Compute loss 132 | for key in sample: 133 | end_points[key] = torch.from_numpy(sample[key]).unsqueeze(0).cuda() 134 | loss, end_points = get_loss(end_points, DC) 135 | print('loss', loss) 136 | end_points['point_clouds'] = inputs['point_clouds'] 137 | end_points['pred_mask'] = np.ones((1,128)) 138 | dump_results(end_points, 'tmp', DC) 139 | except: 140 | print('Dataset has not been prepared. Skip loss and dump.') 141 | -------------------------------------------------------------------------------- /models/voting_module.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | ''' Voting module: generate votes from XYZ and features of seed points. 7 | 8 | Date: July, 2019 9 | Author: Charles R. Qi and Or Litany 10 | ''' 11 | 12 | import torch 13 | import torch.nn as nn 14 | import torch.nn.functional as F 15 | 16 | class VotingModule(nn.Module): 17 | def __init__(self, vote_factor, seed_feature_dim): 18 | """ Votes generation from seed point features. 19 | 20 | Args: 21 | vote_facotr: int 22 | number of votes generated from each seed point 23 | seed_feature_dim: int 24 | number of channels of seed point features 25 | vote_feature_dim: int 26 | number of channels of vote features 27 | """ 28 | super().__init__() 29 | self.vote_factor = vote_factor 30 | self.in_dim = seed_feature_dim 31 | self.out_dim = self.in_dim # due to residual feature, in_dim has to be == out_dim 32 | self.conv1 = torch.nn.Conv1d(self.in_dim, self.in_dim, 1) 33 | self.conv2 = torch.nn.Conv1d(self.in_dim, self.in_dim, 1) 34 | self.conv3 = torch.nn.Conv1d(self.in_dim, (3+self.out_dim) * self.vote_factor, 1) 35 | self.bn1 = torch.nn.BatchNorm1d(self.in_dim) 36 | self.bn2 = torch.nn.BatchNorm1d(self.in_dim) 37 | 38 | def forward(self, seed_xyz, seed_features): 39 | """ Forward pass. 40 | 41 | Arguments: 42 | seed_xyz: (batch_size, num_seed, 3) Pytorch tensor 43 | seed_features: (batch_size, feature_dim, num_seed) Pytorch tensor 44 | Returns: 45 | vote_xyz: (batch_size, num_seed*vote_factor, 3) 46 | vote_features: (batch_size, vote_feature_dim, num_seed*vote_factor) 47 | """ 48 | batch_size = seed_xyz.shape[0] 49 | num_seed = seed_xyz.shape[1] 50 | num_vote = num_seed*self.vote_factor 51 | net = F.relu(self.bn1(self.conv1(seed_features))) 52 | net = F.relu(self.bn2(self.conv2(net))) 53 | net = self.conv3(net) # (batch_size, (3+out_dim)*vote_factor, num_seed) 54 | 55 | net = net.transpose(2,1).view(batch_size, num_seed, self.vote_factor, 3+self.out_dim) 56 | offset = net[:,:,:,0:3] 57 | vote_xyz = seed_xyz.unsqueeze(2) + offset 58 | vote_xyz = vote_xyz.contiguous().view(batch_size, num_vote, 3) 59 | 60 | residual_features = net[:,:,:,3:] # (batch_size, num_seed, vote_factor, out_dim) 61 | vote_features = seed_features.transpose(2,1).unsqueeze(2) + residual_features 62 | vote_features = vote_features.contiguous().view(batch_size, num_vote, self.out_dim) 63 | vote_features = vote_features.transpose(2,1).contiguous() 64 | 65 | return vote_xyz, vote_features 66 | 67 | if __name__=='__main__': 68 | net = VotingModule(2, 256).cuda() 69 | xyz, features = net(torch.rand(8,1024,3).cuda(), torch.rand(8,256,1024).cuda()) 70 | print('xyz', xyz.shape) 71 | print('features', features.shape) 72 | -------------------------------------------------------------------------------- /pointnet2/_ext_src/include/ball_query.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #pragma once 7 | #include 8 | 9 | at::Tensor ball_query(at::Tensor new_xyz, at::Tensor xyz, const float radius, 10 | const int nsample); 11 | -------------------------------------------------------------------------------- /pointnet2/_ext_src/include/cuda_utils.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #ifndef _CUDA_UTILS_H 7 | #define _CUDA_UTILS_H 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | #include 15 | 16 | #include 17 | 18 | #define TOTAL_THREADS 512 19 | 20 | inline int opt_n_threads(int work_size) { 21 | const int pow_2 = std::log(static_cast(work_size)) / std::log(2.0); 22 | 23 | return max(min(1 << pow_2, TOTAL_THREADS), 1); 24 | } 25 | 26 | inline dim3 opt_block_config(int x, int y) { 27 | const int x_threads = opt_n_threads(x); 28 | const int y_threads = 29 | max(min(opt_n_threads(y), TOTAL_THREADS / x_threads), 1); 30 | dim3 block_config(x_threads, y_threads, 1); 31 | 32 | return block_config; 33 | } 34 | 35 | #define CUDA_CHECK_ERRORS() \ 36 | do { \ 37 | cudaError_t err = cudaGetLastError(); \ 38 | if (cudaSuccess != err) { \ 39 | fprintf(stderr, "CUDA kernel failed : %s\n%s at L:%d in %s\n", \ 40 | cudaGetErrorString(err), __PRETTY_FUNCTION__, __LINE__, \ 41 | __FILE__); \ 42 | exit(-1); \ 43 | } \ 44 | } while (0) 45 | 46 | #endif 47 | -------------------------------------------------------------------------------- /pointnet2/_ext_src/include/group_points.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #pragma once 7 | #include 8 | 9 | at::Tensor group_points(at::Tensor points, at::Tensor idx); 10 | at::Tensor group_points_grad(at::Tensor grad_out, at::Tensor idx, const int n); 11 | -------------------------------------------------------------------------------- /pointnet2/_ext_src/include/interpolate.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #pragma once 7 | 8 | #include 9 | #include 10 | 11 | std::vector three_nn(at::Tensor unknowns, at::Tensor knows); 12 | at::Tensor three_interpolate(at::Tensor points, at::Tensor idx, 13 | at::Tensor weight); 14 | at::Tensor three_interpolate_grad(at::Tensor grad_out, at::Tensor idx, 15 | at::Tensor weight, const int m); 16 | -------------------------------------------------------------------------------- /pointnet2/_ext_src/include/sampling.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #pragma once 7 | #include 8 | 9 | at::Tensor gather_points(at::Tensor points, at::Tensor idx); 10 | at::Tensor gather_points_grad(at::Tensor grad_out, at::Tensor idx, const int n); 11 | at::Tensor furthest_point_sampling(at::Tensor points, const int nsamples); 12 | -------------------------------------------------------------------------------- /pointnet2/_ext_src/include/utils.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #pragma once 7 | #include 8 | #include 9 | 10 | #define CHECK_CUDA(x) \ 11 | do { \ 12 | AT_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor"); \ 13 | } while (0) 14 | 15 | #define CHECK_CONTIGUOUS(x) \ 16 | do { \ 17 | AT_CHECK(x.is_contiguous(), #x " must be a contiguous tensor"); \ 18 | } while (0) 19 | 20 | #define CHECK_IS_INT(x) \ 21 | do { \ 22 | AT_CHECK(x.scalar_type() == at::ScalarType::Int, \ 23 | #x " must be an int tensor"); \ 24 | } while (0) 25 | 26 | #define CHECK_IS_FLOAT(x) \ 27 | do { \ 28 | AT_CHECK(x.scalar_type() == at::ScalarType::Float, \ 29 | #x " must be a float tensor"); \ 30 | } while (0) 31 | -------------------------------------------------------------------------------- /pointnet2/_ext_src/src/ball_query.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #include "ball_query.h" 7 | #include "utils.h" 8 | 9 | void query_ball_point_kernel_wrapper(int b, int n, int m, float radius, 10 | int nsample, const float *new_xyz, 11 | const float *xyz, int *idx); 12 | 13 | at::Tensor ball_query(at::Tensor new_xyz, at::Tensor xyz, const float radius, 14 | const int nsample) { 15 | CHECK_CONTIGUOUS(new_xyz); 16 | CHECK_CONTIGUOUS(xyz); 17 | CHECK_IS_FLOAT(new_xyz); 18 | CHECK_IS_FLOAT(xyz); 19 | 20 | if (new_xyz.type().is_cuda()) { 21 | CHECK_CUDA(xyz); 22 | } 23 | 24 | at::Tensor idx = 25 | torch::zeros({new_xyz.size(0), new_xyz.size(1), nsample}, 26 | at::device(new_xyz.device()).dtype(at::ScalarType::Int)); 27 | 28 | if (new_xyz.type().is_cuda()) { 29 | query_ball_point_kernel_wrapper(xyz.size(0), xyz.size(1), new_xyz.size(1), 30 | radius, nsample, new_xyz.data(), 31 | xyz.data(), idx.data()); 32 | } else { 33 | AT_CHECK(false, "CPU not supported"); 34 | } 35 | 36 | return idx; 37 | } 38 | -------------------------------------------------------------------------------- /pointnet2/_ext_src/src/ball_query_gpu.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include "cuda_utils.h" 11 | 12 | // input: new_xyz(b, m, 3) xyz(b, n, 3) 13 | // output: idx(b, m, nsample) 14 | __global__ void query_ball_point_kernel(int b, int n, int m, float radius, 15 | int nsample, 16 | const float *__restrict__ new_xyz, 17 | const float *__restrict__ xyz, 18 | int *__restrict__ idx) { 19 | int batch_index = blockIdx.x; 20 | xyz += batch_index * n * 3; 21 | new_xyz += batch_index * m * 3; 22 | idx += m * nsample * batch_index; 23 | 24 | int index = threadIdx.x; 25 | int stride = blockDim.x; 26 | 27 | float radius2 = radius * radius; 28 | for (int j = index; j < m; j += stride) { 29 | float new_x = new_xyz[j * 3 + 0]; 30 | float new_y = new_xyz[j * 3 + 1]; 31 | float new_z = new_xyz[j * 3 + 2]; 32 | for (int k = 0, cnt = 0; k < n && cnt < nsample; ++k) { 33 | float x = xyz[k * 3 + 0]; 34 | float y = xyz[k * 3 + 1]; 35 | float z = xyz[k * 3 + 2]; 36 | float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + 37 | (new_z - z) * (new_z - z); 38 | if (d2 < radius2) { 39 | if (cnt == 0) { 40 | for (int l = 0; l < nsample; ++l) { 41 | idx[j * nsample + l] = k; 42 | } 43 | } 44 | idx[j * nsample + cnt] = k; 45 | ++cnt; 46 | } 47 | } 48 | } 49 | } 50 | 51 | void query_ball_point_kernel_wrapper(int b, int n, int m, float radius, 52 | int nsample, const float *new_xyz, 53 | const float *xyz, int *idx) { 54 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 55 | query_ball_point_kernel<<>>( 56 | b, n, m, radius, nsample, new_xyz, xyz, idx); 57 | 58 | CUDA_CHECK_ERRORS(); 59 | } 60 | -------------------------------------------------------------------------------- /pointnet2/_ext_src/src/bindings.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #include "ball_query.h" 7 | #include "group_points.h" 8 | #include "interpolate.h" 9 | #include "sampling.h" 10 | 11 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 12 | m.def("gather_points", &gather_points); 13 | m.def("gather_points_grad", &gather_points_grad); 14 | m.def("furthest_point_sampling", &furthest_point_sampling); 15 | 16 | m.def("three_nn", &three_nn); 17 | m.def("three_interpolate", &three_interpolate); 18 | m.def("three_interpolate_grad", &three_interpolate_grad); 19 | 20 | m.def("ball_query", &ball_query); 21 | 22 | m.def("group_points", &group_points); 23 | m.def("group_points_grad", &group_points_grad); 24 | } 25 | -------------------------------------------------------------------------------- /pointnet2/_ext_src/src/group_points.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #include "group_points.h" 7 | #include "utils.h" 8 | 9 | void group_points_kernel_wrapper(int b, int c, int n, int npoints, int nsample, 10 | const float *points, const int *idx, 11 | float *out); 12 | 13 | void group_points_grad_kernel_wrapper(int b, int c, int n, int npoints, 14 | int nsample, const float *grad_out, 15 | const int *idx, float *grad_points); 16 | 17 | at::Tensor group_points(at::Tensor points, at::Tensor idx) { 18 | CHECK_CONTIGUOUS(points); 19 | CHECK_CONTIGUOUS(idx); 20 | CHECK_IS_FLOAT(points); 21 | CHECK_IS_INT(idx); 22 | 23 | if (points.type().is_cuda()) { 24 | CHECK_CUDA(idx); 25 | } 26 | 27 | at::Tensor output = 28 | torch::zeros({points.size(0), points.size(1), idx.size(1), idx.size(2)}, 29 | at::device(points.device()).dtype(at::ScalarType::Float)); 30 | 31 | if (points.type().is_cuda()) { 32 | group_points_kernel_wrapper(points.size(0), points.size(1), points.size(2), 33 | idx.size(1), idx.size(2), points.data(), 34 | idx.data(), output.data()); 35 | } else { 36 | AT_CHECK(false, "CPU not supported"); 37 | } 38 | 39 | return output; 40 | } 41 | 42 | at::Tensor group_points_grad(at::Tensor grad_out, at::Tensor idx, const int n) { 43 | CHECK_CONTIGUOUS(grad_out); 44 | CHECK_CONTIGUOUS(idx); 45 | CHECK_IS_FLOAT(grad_out); 46 | CHECK_IS_INT(idx); 47 | 48 | if (grad_out.type().is_cuda()) { 49 | CHECK_CUDA(idx); 50 | } 51 | 52 | at::Tensor output = 53 | torch::zeros({grad_out.size(0), grad_out.size(1), n}, 54 | at::device(grad_out.device()).dtype(at::ScalarType::Float)); 55 | 56 | if (grad_out.type().is_cuda()) { 57 | group_points_grad_kernel_wrapper( 58 | grad_out.size(0), grad_out.size(1), n, idx.size(1), idx.size(2), 59 | grad_out.data(), idx.data(), output.data()); 60 | } else { 61 | AT_CHECK(false, "CPU not supported"); 62 | } 63 | 64 | return output; 65 | } 66 | -------------------------------------------------------------------------------- /pointnet2/_ext_src/src/group_points_gpu.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #include 7 | #include 8 | 9 | #include "cuda_utils.h" 10 | 11 | // input: points(b, c, n) idx(b, npoints, nsample) 12 | // output: out(b, c, npoints, nsample) 13 | __global__ void group_points_kernel(int b, int c, int n, int npoints, 14 | int nsample, 15 | const float *__restrict__ points, 16 | const int *__restrict__ idx, 17 | float *__restrict__ out) { 18 | int batch_index = blockIdx.x; 19 | points += batch_index * n * c; 20 | idx += batch_index * npoints * nsample; 21 | out += batch_index * npoints * nsample * c; 22 | 23 | const int index = threadIdx.y * blockDim.x + threadIdx.x; 24 | const int stride = blockDim.y * blockDim.x; 25 | for (int i = index; i < c * npoints; i += stride) { 26 | const int l = i / npoints; 27 | const int j = i % npoints; 28 | for (int k = 0; k < nsample; ++k) { 29 | int ii = idx[j * nsample + k]; 30 | out[(l * npoints + j) * nsample + k] = points[l * n + ii]; 31 | } 32 | } 33 | } 34 | 35 | void group_points_kernel_wrapper(int b, int c, int n, int npoints, int nsample, 36 | const float *points, const int *idx, 37 | float *out) { 38 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 39 | 40 | group_points_kernel<<>>( 41 | b, c, n, npoints, nsample, points, idx, out); 42 | 43 | CUDA_CHECK_ERRORS(); 44 | } 45 | 46 | // input: grad_out(b, c, npoints, nsample), idx(b, npoints, nsample) 47 | // output: grad_points(b, c, n) 48 | __global__ void group_points_grad_kernel(int b, int c, int n, int npoints, 49 | int nsample, 50 | const float *__restrict__ grad_out, 51 | const int *__restrict__ idx, 52 | float *__restrict__ grad_points) { 53 | int batch_index = blockIdx.x; 54 | grad_out += batch_index * npoints * nsample * c; 55 | idx += batch_index * npoints * nsample; 56 | grad_points += batch_index * n * c; 57 | 58 | const int index = threadIdx.y * blockDim.x + threadIdx.x; 59 | const int stride = blockDim.y * blockDim.x; 60 | for (int i = index; i < c * npoints; i += stride) { 61 | const int l = i / npoints; 62 | const int j = i % npoints; 63 | for (int k = 0; k < nsample; ++k) { 64 | int ii = idx[j * nsample + k]; 65 | atomicAdd(grad_points + l * n + ii, 66 | grad_out[(l * npoints + j) * nsample + k]); 67 | } 68 | } 69 | } 70 | 71 | void group_points_grad_kernel_wrapper(int b, int c, int n, int npoints, 72 | int nsample, const float *grad_out, 73 | const int *idx, float *grad_points) { 74 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 75 | 76 | group_points_grad_kernel<<>>( 77 | b, c, n, npoints, nsample, grad_out, idx, grad_points); 78 | 79 | CUDA_CHECK_ERRORS(); 80 | } 81 | -------------------------------------------------------------------------------- /pointnet2/_ext_src/src/interpolate.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #include "interpolate.h" 7 | #include "utils.h" 8 | 9 | void three_nn_kernel_wrapper(int b, int n, int m, const float *unknown, 10 | const float *known, float *dist2, int *idx); 11 | void three_interpolate_kernel_wrapper(int b, int c, int m, int n, 12 | const float *points, const int *idx, 13 | const float *weight, float *out); 14 | void three_interpolate_grad_kernel_wrapper(int b, int c, int n, int m, 15 | const float *grad_out, 16 | const int *idx, const float *weight, 17 | float *grad_points); 18 | 19 | std::vector three_nn(at::Tensor unknowns, at::Tensor knows) { 20 | CHECK_CONTIGUOUS(unknowns); 21 | CHECK_CONTIGUOUS(knows); 22 | CHECK_IS_FLOAT(unknowns); 23 | CHECK_IS_FLOAT(knows); 24 | 25 | if (unknowns.type().is_cuda()) { 26 | CHECK_CUDA(knows); 27 | } 28 | 29 | at::Tensor idx = 30 | torch::zeros({unknowns.size(0), unknowns.size(1), 3}, 31 | at::device(unknowns.device()).dtype(at::ScalarType::Int)); 32 | at::Tensor dist2 = 33 | torch::zeros({unknowns.size(0), unknowns.size(1), 3}, 34 | at::device(unknowns.device()).dtype(at::ScalarType::Float)); 35 | 36 | if (unknowns.type().is_cuda()) { 37 | three_nn_kernel_wrapper(unknowns.size(0), unknowns.size(1), knows.size(1), 38 | unknowns.data(), knows.data(), 39 | dist2.data(), idx.data()); 40 | } else { 41 | AT_CHECK(false, "CPU not supported"); 42 | } 43 | 44 | return {dist2, idx}; 45 | } 46 | 47 | at::Tensor three_interpolate(at::Tensor points, at::Tensor idx, 48 | at::Tensor weight) { 49 | CHECK_CONTIGUOUS(points); 50 | CHECK_CONTIGUOUS(idx); 51 | CHECK_CONTIGUOUS(weight); 52 | CHECK_IS_FLOAT(points); 53 | CHECK_IS_INT(idx); 54 | CHECK_IS_FLOAT(weight); 55 | 56 | if (points.type().is_cuda()) { 57 | CHECK_CUDA(idx); 58 | CHECK_CUDA(weight); 59 | } 60 | 61 | at::Tensor output = 62 | torch::zeros({points.size(0), points.size(1), idx.size(1)}, 63 | at::device(points.device()).dtype(at::ScalarType::Float)); 64 | 65 | if (points.type().is_cuda()) { 66 | three_interpolate_kernel_wrapper( 67 | points.size(0), points.size(1), points.size(2), idx.size(1), 68 | points.data(), idx.data(), weight.data(), 69 | output.data()); 70 | } else { 71 | AT_CHECK(false, "CPU not supported"); 72 | } 73 | 74 | return output; 75 | } 76 | at::Tensor three_interpolate_grad(at::Tensor grad_out, at::Tensor idx, 77 | at::Tensor weight, const int m) { 78 | CHECK_CONTIGUOUS(grad_out); 79 | CHECK_CONTIGUOUS(idx); 80 | CHECK_CONTIGUOUS(weight); 81 | CHECK_IS_FLOAT(grad_out); 82 | CHECK_IS_INT(idx); 83 | CHECK_IS_FLOAT(weight); 84 | 85 | if (grad_out.type().is_cuda()) { 86 | CHECK_CUDA(idx); 87 | CHECK_CUDA(weight); 88 | } 89 | 90 | at::Tensor output = 91 | torch::zeros({grad_out.size(0), grad_out.size(1), m}, 92 | at::device(grad_out.device()).dtype(at::ScalarType::Float)); 93 | 94 | if (grad_out.type().is_cuda()) { 95 | three_interpolate_grad_kernel_wrapper( 96 | grad_out.size(0), grad_out.size(1), grad_out.size(2), m, 97 | grad_out.data(), idx.data(), weight.data(), 98 | output.data()); 99 | } else { 100 | AT_CHECK(false, "CPU not supported"); 101 | } 102 | 103 | return output; 104 | } 105 | -------------------------------------------------------------------------------- /pointnet2/_ext_src/src/interpolate_gpu.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include "cuda_utils.h" 11 | 12 | // input: unknown(b, n, 3) known(b, m, 3) 13 | // output: dist2(b, n, 3), idx(b, n, 3) 14 | __global__ void three_nn_kernel(int b, int n, int m, 15 | const float *__restrict__ unknown, 16 | const float *__restrict__ known, 17 | float *__restrict__ dist2, 18 | int *__restrict__ idx) { 19 | int batch_index = blockIdx.x; 20 | unknown += batch_index * n * 3; 21 | known += batch_index * m * 3; 22 | dist2 += batch_index * n * 3; 23 | idx += batch_index * n * 3; 24 | 25 | int index = threadIdx.x; 26 | int stride = blockDim.x; 27 | for (int j = index; j < n; j += stride) { 28 | float ux = unknown[j * 3 + 0]; 29 | float uy = unknown[j * 3 + 1]; 30 | float uz = unknown[j * 3 + 2]; 31 | 32 | double best1 = 1e40, best2 = 1e40, best3 = 1e40; 33 | int besti1 = 0, besti2 = 0, besti3 = 0; 34 | for (int k = 0; k < m; ++k) { 35 | float x = known[k * 3 + 0]; 36 | float y = known[k * 3 + 1]; 37 | float z = known[k * 3 + 2]; 38 | float d = (ux - x) * (ux - x) + (uy - y) * (uy - y) + (uz - z) * (uz - z); 39 | if (d < best1) { 40 | best3 = best2; 41 | besti3 = besti2; 42 | best2 = best1; 43 | besti2 = besti1; 44 | best1 = d; 45 | besti1 = k; 46 | } else if (d < best2) { 47 | best3 = best2; 48 | besti3 = besti2; 49 | best2 = d; 50 | besti2 = k; 51 | } else if (d < best3) { 52 | best3 = d; 53 | besti3 = k; 54 | } 55 | } 56 | dist2[j * 3 + 0] = best1; 57 | dist2[j * 3 + 1] = best2; 58 | dist2[j * 3 + 2] = best3; 59 | 60 | idx[j * 3 + 0] = besti1; 61 | idx[j * 3 + 1] = besti2; 62 | idx[j * 3 + 2] = besti3; 63 | } 64 | } 65 | 66 | void three_nn_kernel_wrapper(int b, int n, int m, const float *unknown, 67 | const float *known, float *dist2, int *idx) { 68 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 69 | three_nn_kernel<<>>(b, n, m, unknown, known, 70 | dist2, idx); 71 | 72 | CUDA_CHECK_ERRORS(); 73 | } 74 | 75 | // input: points(b, c, m), idx(b, n, 3), weight(b, n, 3) 76 | // output: out(b, c, n) 77 | __global__ void three_interpolate_kernel(int b, int c, int m, int n, 78 | const float *__restrict__ points, 79 | const int *__restrict__ idx, 80 | const float *__restrict__ weight, 81 | float *__restrict__ out) { 82 | int batch_index = blockIdx.x; 83 | points += batch_index * m * c; 84 | 85 | idx += batch_index * n * 3; 86 | weight += batch_index * n * 3; 87 | 88 | out += batch_index * n * c; 89 | 90 | const int index = threadIdx.y * blockDim.x + threadIdx.x; 91 | const int stride = blockDim.y * blockDim.x; 92 | for (int i = index; i < c * n; i += stride) { 93 | const int l = i / n; 94 | const int j = i % n; 95 | float w1 = weight[j * 3 + 0]; 96 | float w2 = weight[j * 3 + 1]; 97 | float w3 = weight[j * 3 + 2]; 98 | 99 | int i1 = idx[j * 3 + 0]; 100 | int i2 = idx[j * 3 + 1]; 101 | int i3 = idx[j * 3 + 2]; 102 | 103 | out[i] = points[l * m + i1] * w1 + points[l * m + i2] * w2 + 104 | points[l * m + i3] * w3; 105 | } 106 | } 107 | 108 | void three_interpolate_kernel_wrapper(int b, int c, int m, int n, 109 | const float *points, const int *idx, 110 | const float *weight, float *out) { 111 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 112 | three_interpolate_kernel<<>>( 113 | b, c, m, n, points, idx, weight, out); 114 | 115 | CUDA_CHECK_ERRORS(); 116 | } 117 | 118 | // input: grad_out(b, c, n), idx(b, n, 3), weight(b, n, 3) 119 | // output: grad_points(b, c, m) 120 | 121 | __global__ void three_interpolate_grad_kernel( 122 | int b, int c, int n, int m, const float *__restrict__ grad_out, 123 | const int *__restrict__ idx, const float *__restrict__ weight, 124 | float *__restrict__ grad_points) { 125 | int batch_index = blockIdx.x; 126 | grad_out += batch_index * n * c; 127 | idx += batch_index * n * 3; 128 | weight += batch_index * n * 3; 129 | grad_points += batch_index * m * c; 130 | 131 | const int index = threadIdx.y * blockDim.x + threadIdx.x; 132 | const int stride = blockDim.y * blockDim.x; 133 | for (int i = index; i < c * n; i += stride) { 134 | const int l = i / n; 135 | const int j = i % n; 136 | float w1 = weight[j * 3 + 0]; 137 | float w2 = weight[j * 3 + 1]; 138 | float w3 = weight[j * 3 + 2]; 139 | 140 | int i1 = idx[j * 3 + 0]; 141 | int i2 = idx[j * 3 + 1]; 142 | int i3 = idx[j * 3 + 2]; 143 | 144 | atomicAdd(grad_points + l * m + i1, grad_out[i] * w1); 145 | atomicAdd(grad_points + l * m + i2, grad_out[i] * w2); 146 | atomicAdd(grad_points + l * m + i3, grad_out[i] * w3); 147 | } 148 | } 149 | 150 | void three_interpolate_grad_kernel_wrapper(int b, int c, int n, int m, 151 | const float *grad_out, 152 | const int *idx, const float *weight, 153 | float *grad_points) { 154 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 155 | three_interpolate_grad_kernel<<>>( 156 | b, c, n, m, grad_out, idx, weight, grad_points); 157 | 158 | CUDA_CHECK_ERRORS(); 159 | } 160 | -------------------------------------------------------------------------------- /pointnet2/_ext_src/src/sampling.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #include "sampling.h" 7 | #include "utils.h" 8 | 9 | void gather_points_kernel_wrapper(int b, int c, int n, int npoints, 10 | const float *points, const int *idx, 11 | float *out); 12 | void gather_points_grad_kernel_wrapper(int b, int c, int n, int npoints, 13 | const float *grad_out, const int *idx, 14 | float *grad_points); 15 | 16 | void furthest_point_sampling_kernel_wrapper(int b, int n, int m, 17 | const float *dataset, float *temp, 18 | int *idxs); 19 | 20 | at::Tensor gather_points(at::Tensor points, at::Tensor idx) { 21 | CHECK_CONTIGUOUS(points); 22 | CHECK_CONTIGUOUS(idx); 23 | CHECK_IS_FLOAT(points); 24 | CHECK_IS_INT(idx); 25 | 26 | if (points.type().is_cuda()) { 27 | CHECK_CUDA(idx); 28 | } 29 | 30 | at::Tensor output = 31 | torch::zeros({points.size(0), points.size(1), idx.size(1)}, 32 | at::device(points.device()).dtype(at::ScalarType::Float)); 33 | 34 | if (points.type().is_cuda()) { 35 | gather_points_kernel_wrapper(points.size(0), points.size(1), points.size(2), 36 | idx.size(1), points.data(), 37 | idx.data(), output.data()); 38 | } else { 39 | AT_CHECK(false, "CPU not supported"); 40 | } 41 | 42 | return output; 43 | } 44 | 45 | at::Tensor gather_points_grad(at::Tensor grad_out, at::Tensor idx, 46 | const int n) { 47 | CHECK_CONTIGUOUS(grad_out); 48 | CHECK_CONTIGUOUS(idx); 49 | CHECK_IS_FLOAT(grad_out); 50 | CHECK_IS_INT(idx); 51 | 52 | if (grad_out.type().is_cuda()) { 53 | CHECK_CUDA(idx); 54 | } 55 | 56 | at::Tensor output = 57 | torch::zeros({grad_out.size(0), grad_out.size(1), n}, 58 | at::device(grad_out.device()).dtype(at::ScalarType::Float)); 59 | 60 | if (grad_out.type().is_cuda()) { 61 | gather_points_grad_kernel_wrapper(grad_out.size(0), grad_out.size(1), n, 62 | idx.size(1), grad_out.data(), 63 | idx.data(), output.data()); 64 | } else { 65 | AT_CHECK(false, "CPU not supported"); 66 | } 67 | 68 | return output; 69 | } 70 | at::Tensor furthest_point_sampling(at::Tensor points, const int nsamples) { 71 | CHECK_CONTIGUOUS(points); 72 | CHECK_IS_FLOAT(points); 73 | 74 | at::Tensor output = 75 | torch::zeros({points.size(0), nsamples}, 76 | at::device(points.device()).dtype(at::ScalarType::Int)); 77 | 78 | at::Tensor tmp = 79 | torch::full({points.size(0), points.size(1)}, 1e10, 80 | at::device(points.device()).dtype(at::ScalarType::Float)); 81 | 82 | if (points.type().is_cuda()) { 83 | furthest_point_sampling_kernel_wrapper( 84 | points.size(0), points.size(1), nsamples, points.data(), 85 | tmp.data(), output.data()); 86 | } else { 87 | AT_CHECK(false, "CPU not supported"); 88 | } 89 | 90 | return output; 91 | } 92 | -------------------------------------------------------------------------------- /pointnet2/_ext_src/src/sampling_gpu.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #include 7 | #include 8 | 9 | #include "cuda_utils.h" 10 | 11 | // input: points(b, c, n) idx(b, m) 12 | // output: out(b, c, m) 13 | __global__ void gather_points_kernel(int b, int c, int n, int m, 14 | const float *__restrict__ points, 15 | const int *__restrict__ idx, 16 | float *__restrict__ out) { 17 | for (int i = blockIdx.x; i < b; i += gridDim.x) { 18 | for (int l = blockIdx.y; l < c; l += gridDim.y) { 19 | for (int j = threadIdx.x; j < m; j += blockDim.x) { 20 | int a = idx[i * m + j]; 21 | out[(i * c + l) * m + j] = points[(i * c + l) * n + a]; 22 | } 23 | } 24 | } 25 | } 26 | 27 | void gather_points_kernel_wrapper(int b, int c, int n, int npoints, 28 | const float *points, const int *idx, 29 | float *out) { 30 | gather_points_kernel<<>>(b, c, n, npoints, 32 | points, idx, out); 33 | 34 | CUDA_CHECK_ERRORS(); 35 | } 36 | 37 | // input: grad_out(b, c, m) idx(b, m) 38 | // output: grad_points(b, c, n) 39 | __global__ void gather_points_grad_kernel(int b, int c, int n, int m, 40 | const float *__restrict__ grad_out, 41 | const int *__restrict__ idx, 42 | float *__restrict__ grad_points) { 43 | for (int i = blockIdx.x; i < b; i += gridDim.x) { 44 | for (int l = blockIdx.y; l < c; l += gridDim.y) { 45 | for (int j = threadIdx.x; j < m; j += blockDim.x) { 46 | int a = idx[i * m + j]; 47 | atomicAdd(grad_points + (i * c + l) * n + a, 48 | grad_out[(i * c + l) * m + j]); 49 | } 50 | } 51 | } 52 | } 53 | 54 | void gather_points_grad_kernel_wrapper(int b, int c, int n, int npoints, 55 | const float *grad_out, const int *idx, 56 | float *grad_points) { 57 | gather_points_grad_kernel<<>>( 59 | b, c, n, npoints, grad_out, idx, grad_points); 60 | 61 | CUDA_CHECK_ERRORS(); 62 | } 63 | 64 | __device__ void __update(float *__restrict__ dists, int *__restrict__ dists_i, 65 | int idx1, int idx2) { 66 | const float v1 = dists[idx1], v2 = dists[idx2]; 67 | const int i1 = dists_i[idx1], i2 = dists_i[idx2]; 68 | dists[idx1] = max(v1, v2); 69 | dists_i[idx1] = v2 > v1 ? i2 : i1; 70 | } 71 | 72 | // Input dataset: (b, n, 3), tmp: (b, n) 73 | // Ouput idxs (b, m) 74 | template 75 | __global__ void furthest_point_sampling_kernel( 76 | int b, int n, int m, const float *__restrict__ dataset, 77 | float *__restrict__ temp, int *__restrict__ idxs) { 78 | if (m <= 0) return; 79 | __shared__ float dists[block_size]; 80 | __shared__ int dists_i[block_size]; 81 | 82 | int batch_index = blockIdx.x; 83 | dataset += batch_index * n * 3; 84 | temp += batch_index * n; 85 | idxs += batch_index * m; 86 | 87 | int tid = threadIdx.x; 88 | const int stride = block_size; 89 | 90 | int old = 0; 91 | if (threadIdx.x == 0) idxs[0] = old; 92 | 93 | __syncthreads(); 94 | for (int j = 1; j < m; j++) { 95 | int besti = 0; 96 | float best = -1; 97 | float x1 = dataset[old * 3 + 0]; 98 | float y1 = dataset[old * 3 + 1]; 99 | float z1 = dataset[old * 3 + 2]; 100 | for (int k = tid; k < n; k += stride) { 101 | float x2, y2, z2; 102 | x2 = dataset[k * 3 + 0]; 103 | y2 = dataset[k * 3 + 1]; 104 | z2 = dataset[k * 3 + 2]; 105 | float mag = (x2 * x2) + (y2 * y2) + (z2 * z2); 106 | if (mag <= 1e-3) continue; 107 | 108 | float d = 109 | (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1) + (z2 - z1) * (z2 - z1); 110 | 111 | float d2 = min(d, temp[k]); 112 | temp[k] = d2; 113 | besti = d2 > best ? k : besti; 114 | best = d2 > best ? d2 : best; 115 | } 116 | dists[tid] = best; 117 | dists_i[tid] = besti; 118 | __syncthreads(); 119 | 120 | if (block_size >= 512) { 121 | if (tid < 256) { 122 | __update(dists, dists_i, tid, tid + 256); 123 | } 124 | __syncthreads(); 125 | } 126 | if (block_size >= 256) { 127 | if (tid < 128) { 128 | __update(dists, dists_i, tid, tid + 128); 129 | } 130 | __syncthreads(); 131 | } 132 | if (block_size >= 128) { 133 | if (tid < 64) { 134 | __update(dists, dists_i, tid, tid + 64); 135 | } 136 | __syncthreads(); 137 | } 138 | if (block_size >= 64) { 139 | if (tid < 32) { 140 | __update(dists, dists_i, tid, tid + 32); 141 | } 142 | __syncthreads(); 143 | } 144 | if (block_size >= 32) { 145 | if (tid < 16) { 146 | __update(dists, dists_i, tid, tid + 16); 147 | } 148 | __syncthreads(); 149 | } 150 | if (block_size >= 16) { 151 | if (tid < 8) { 152 | __update(dists, dists_i, tid, tid + 8); 153 | } 154 | __syncthreads(); 155 | } 156 | if (block_size >= 8) { 157 | if (tid < 4) { 158 | __update(dists, dists_i, tid, tid + 4); 159 | } 160 | __syncthreads(); 161 | } 162 | if (block_size >= 4) { 163 | if (tid < 2) { 164 | __update(dists, dists_i, tid, tid + 2); 165 | } 166 | __syncthreads(); 167 | } 168 | if (block_size >= 2) { 169 | if (tid < 1) { 170 | __update(dists, dists_i, tid, tid + 1); 171 | } 172 | __syncthreads(); 173 | } 174 | 175 | old = dists_i[0]; 176 | if (tid == 0) idxs[j] = old; 177 | } 178 | } 179 | 180 | void furthest_point_sampling_kernel_wrapper(int b, int n, int m, 181 | const float *dataset, float *temp, 182 | int *idxs) { 183 | unsigned int n_threads = opt_n_threads(n); 184 | 185 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 186 | 187 | switch (n_threads) { 188 | case 512: 189 | furthest_point_sampling_kernel<512> 190 | <<>>(b, n, m, dataset, temp, idxs); 191 | break; 192 | case 256: 193 | furthest_point_sampling_kernel<256> 194 | <<>>(b, n, m, dataset, temp, idxs); 195 | break; 196 | case 128: 197 | furthest_point_sampling_kernel<128> 198 | <<>>(b, n, m, dataset, temp, idxs); 199 | break; 200 | case 64: 201 | furthest_point_sampling_kernel<64> 202 | <<>>(b, n, m, dataset, temp, idxs); 203 | break; 204 | case 32: 205 | furthest_point_sampling_kernel<32> 206 | <<>>(b, n, m, dataset, temp, idxs); 207 | break; 208 | case 16: 209 | furthest_point_sampling_kernel<16> 210 | <<>>(b, n, m, dataset, temp, idxs); 211 | break; 212 | case 8: 213 | furthest_point_sampling_kernel<8> 214 | <<>>(b, n, m, dataset, temp, idxs); 215 | break; 216 | case 4: 217 | furthest_point_sampling_kernel<4> 218 | <<>>(b, n, m, dataset, temp, idxs); 219 | break; 220 | case 2: 221 | furthest_point_sampling_kernel<2> 222 | <<>>(b, n, m, dataset, temp, idxs); 223 | break; 224 | case 1: 225 | furthest_point_sampling_kernel<1> 226 | <<>>(b, n, m, dataset, temp, idxs); 227 | break; 228 | default: 229 | furthest_point_sampling_kernel<512> 230 | <<>>(b, n, m, dataset, temp, idxs); 231 | } 232 | 233 | CUDA_CHECK_ERRORS(); 234 | } 235 | -------------------------------------------------------------------------------- /pointnet2/pointnet2_test.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | ''' Testing customized ops. ''' 7 | 8 | import torch 9 | from torch.autograd import gradcheck 10 | import numpy as np 11 | 12 | import os 13 | import sys 14 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 15 | sys.path.append(BASE_DIR) 16 | import pointnet2_utils 17 | 18 | def test_interpolation_grad(): 19 | batch_size = 1 20 | feat_dim = 2 21 | m = 4 22 | feats = torch.randn(batch_size, feat_dim, m, requires_grad=True).float().cuda() 23 | 24 | def interpolate_func(inputs): 25 | idx = torch.from_numpy(np.array([[[0,1,2],[1,2,3]]])).int().cuda() 26 | weight = torch.from_numpy(np.array([[[1,1,1],[2,2,2]]])).float().cuda() 27 | interpolated_feats = pointnet2_utils.three_interpolate(inputs, idx, weight) 28 | return interpolated_feats 29 | 30 | assert (gradcheck(interpolate_func, feats, atol=1e-1, rtol=1e-1)) 31 | 32 | if __name__=='__main__': 33 | test_interpolation_grad() 34 | -------------------------------------------------------------------------------- /pointnet2/pytorch_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | ''' Modified based on Ref: https://github.com/erikwijmans/Pointnet2_PyTorch ''' 7 | import torch 8 | import torch.nn as nn 9 | from typing import List, Tuple 10 | 11 | 12 | class SharedMLP(nn.Sequential): 13 | 14 | def __init__( 15 | self, 16 | args: List[int], 17 | *, 18 | bn: bool = False, 19 | activation=nn.ReLU(inplace=True), 20 | preact: bool = False, 21 | first: bool = False, 22 | name: str = "" 23 | ): 24 | super().__init__() 25 | 26 | for i in range(len(args) - 1): 27 | self.add_module( 28 | name + 'layer{}'.format(i), 29 | Conv2d( 30 | args[i], 31 | args[i + 1], 32 | bn=(not first or not preact or (i != 0)) and bn, 33 | activation=activation 34 | if (not first or not preact or (i != 0)) else None, 35 | preact=preact 36 | ) 37 | ) 38 | 39 | 40 | 41 | class _BNBase(nn.Sequential): 42 | 43 | def __init__(self, in_size, batch_norm=None, name=""): 44 | super().__init__() 45 | self.add_module(name + "bn", batch_norm(in_size)) 46 | 47 | nn.init.constant_(self[0].weight, 1.0) 48 | nn.init.constant_(self[0].bias, 0) 49 | 50 | 51 | class BatchNorm1d(_BNBase): 52 | 53 | def __init__(self, in_size: int, *, name: str = ""): 54 | super().__init__(in_size, batch_norm=nn.BatchNorm1d, name=name) 55 | 56 | 57 | class BatchNorm2d(_BNBase): 58 | 59 | def __init__(self, in_size: int, name: str = ""): 60 | super().__init__(in_size, batch_norm=nn.BatchNorm2d, name=name) 61 | 62 | 63 | class BatchNorm3d(_BNBase): 64 | 65 | def __init__(self, in_size: int, name: str = ""): 66 | super().__init__(in_size, batch_norm=nn.BatchNorm3d, name=name) 67 | 68 | 69 | class _ConvBase(nn.Sequential): 70 | 71 | def __init__( 72 | self, 73 | in_size, 74 | out_size, 75 | kernel_size, 76 | stride, 77 | padding, 78 | activation, 79 | bn, 80 | init, 81 | conv=None, 82 | batch_norm=None, 83 | bias=True, 84 | preact=False, 85 | name="" 86 | ): 87 | super().__init__() 88 | 89 | bias = bias and (not bn) 90 | conv_unit = conv( 91 | in_size, 92 | out_size, 93 | kernel_size=kernel_size, 94 | stride=stride, 95 | padding=padding, 96 | bias=bias 97 | ) 98 | init(conv_unit.weight) 99 | if bias: 100 | nn.init.constant_(conv_unit.bias, 0) 101 | 102 | if bn: 103 | if not preact: 104 | bn_unit = batch_norm(out_size) 105 | else: 106 | bn_unit = batch_norm(in_size) 107 | 108 | if preact: 109 | if bn: 110 | self.add_module(name + 'bn', bn_unit) 111 | 112 | if activation is not None: 113 | self.add_module(name + 'activation', activation) 114 | 115 | self.add_module(name + 'conv', conv_unit) 116 | 117 | if not preact: 118 | if bn: 119 | self.add_module(name + 'bn', bn_unit) 120 | 121 | if activation is not None: 122 | self.add_module(name + 'activation', activation) 123 | 124 | 125 | 126 | 127 | class Conv1d(_ConvBase): 128 | 129 | def __init__( 130 | self, 131 | in_size: int, 132 | out_size: int, 133 | *, 134 | kernel_size: int = 1, 135 | stride: int = 1, 136 | padding: int = 0, 137 | activation=nn.ReLU(inplace=True), 138 | bn: bool = False, 139 | init=nn.init.kaiming_normal_, 140 | bias: bool = True, 141 | preact: bool = False, 142 | name: str = "" 143 | ): 144 | super().__init__( 145 | in_size, 146 | out_size, 147 | kernel_size, 148 | stride, 149 | padding, 150 | activation, 151 | bn, 152 | init, 153 | conv=nn.Conv1d, 154 | batch_norm=BatchNorm1d, 155 | bias=bias, 156 | preact=preact, 157 | name=name 158 | ) 159 | 160 | 161 | class Conv2d(_ConvBase): 162 | 163 | def __init__( 164 | self, 165 | in_size: int, 166 | out_size: int, 167 | *, 168 | kernel_size: Tuple[int, int] = (1, 1), 169 | stride: Tuple[int, int] = (1, 1), 170 | padding: Tuple[int, int] = (0, 0), 171 | activation=nn.ReLU(inplace=True), 172 | bn: bool = False, 173 | init=nn.init.kaiming_normal_, 174 | bias: bool = True, 175 | preact: bool = False, 176 | name: str = "" 177 | ): 178 | super().__init__( 179 | in_size, 180 | out_size, 181 | kernel_size, 182 | stride, 183 | padding, 184 | activation, 185 | bn, 186 | init, 187 | conv=nn.Conv2d, 188 | batch_norm=BatchNorm2d, 189 | bias=bias, 190 | preact=preact, 191 | name=name 192 | ) 193 | 194 | 195 | 196 | 197 | 198 | class Conv3d(_ConvBase): 199 | 200 | def __init__( 201 | self, 202 | in_size: int, 203 | out_size: int, 204 | *, 205 | kernel_size: Tuple[int, int, int] = (1, 1, 1), 206 | stride: Tuple[int, int, int] = (1, 1, 1), 207 | padding: Tuple[int, int, int] = (0, 0, 0), 208 | activation=nn.ReLU(inplace=True), 209 | bn: bool = False, 210 | init=nn.init.kaiming_normal_, 211 | bias: bool = True, 212 | preact: bool = False, 213 | name: str = "" 214 | ): 215 | super().__init__( 216 | in_size, 217 | out_size, 218 | kernel_size, 219 | stride, 220 | padding, 221 | activation, 222 | bn, 223 | init, 224 | conv=nn.Conv3d, 225 | batch_norm=BatchNorm3d, 226 | bias=bias, 227 | preact=preact, 228 | name=name 229 | ) 230 | 231 | 232 | class FC(nn.Sequential): 233 | 234 | def __init__( 235 | self, 236 | in_size: int, 237 | out_size: int, 238 | *, 239 | activation=nn.ReLU(inplace=True), 240 | bn: bool = False, 241 | init=None, 242 | preact: bool = False, 243 | name: str = "" 244 | ): 245 | super().__init__() 246 | 247 | fc = nn.Linear(in_size, out_size, bias=not bn) 248 | if init is not None: 249 | init(fc.weight) 250 | if not bn: 251 | nn.init.constant_(fc.bias, 0) 252 | 253 | if preact: 254 | if bn: 255 | self.add_module(name + 'bn', BatchNorm1d(in_size)) 256 | 257 | if activation is not None: 258 | self.add_module(name + 'activation', activation) 259 | 260 | self.add_module(name + 'fc', fc) 261 | 262 | if not preact: 263 | if bn: 264 | self.add_module(name + 'bn', BatchNorm1d(out_size)) 265 | 266 | if activation is not None: 267 | self.add_module(name + 'activation', activation) 268 | 269 | def set_bn_momentum_default(bn_momentum): 270 | 271 | def fn(m): 272 | if isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d)): 273 | m.momentum = bn_momentum 274 | 275 | return fn 276 | 277 | 278 | class BNMomentumScheduler(object): 279 | 280 | def __init__( 281 | self, model, bn_lambda, last_epoch=-1, 282 | setter=set_bn_momentum_default 283 | ): 284 | if not isinstance(model, nn.Module): 285 | raise RuntimeError( 286 | "Class '{}' is not a PyTorch nn Module".format( 287 | type(model).__name__ 288 | ) 289 | ) 290 | 291 | self.model = model 292 | self.setter = setter 293 | self.lmbd = bn_lambda 294 | 295 | self.step(last_epoch + 1) 296 | self.last_epoch = last_epoch 297 | 298 | def step(self, epoch=None): 299 | if epoch is None: 300 | epoch = self.last_epoch + 1 301 | 302 | self.last_epoch = epoch 303 | self.model.apply(self.setter(self.lmbd(epoch))) 304 | 305 | 306 | -------------------------------------------------------------------------------- /pointnet2/setup.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | from setuptools import setup 7 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 8 | import glob 9 | 10 | _ext_src_root = "_ext_src" 11 | _ext_sources = glob.glob("{}/src/*.cpp".format(_ext_src_root)) + glob.glob( 12 | "{}/src/*.cu".format(_ext_src_root) 13 | ) 14 | _ext_headers = glob.glob("{}/include/*".format(_ext_src_root)) 15 | 16 | setup( 17 | name='pointnet2', 18 | ext_modules=[ 19 | CUDAExtension( 20 | name='pointnet2._ext', 21 | sources=_ext_sources, 22 | extra_compile_args={ 23 | "cxx": ["-O2", "-I{}".format("{}/include".format(_ext_src_root))], 24 | "nvcc": ["-O2", "-I{}".format("{}/include".format(_ext_src_root))], 25 | }, 26 | ) 27 | ], 28 | cmdclass={ 29 | 'build_ext': BuildExtension 30 | } 31 | ) 32 | -------------------------------------------------------------------------------- /scannet/README.md: -------------------------------------------------------------------------------- 1 | ### Prepare ScanNet Data 2 | 3 | 1. Download ScanNet v2 data [HERE](https://github.com/ScanNet/ScanNet). Move/link the `scans` folder such that under `scans` there should be folders with names such as `scene0001_01`. 4 | 5 | 2. Extract point clouds and annotations (semantic seg, instance seg etc.) by running `python batch_load_scannet_data.py`, which will create a folder named `scannet_train_detection_data` here. 6 | -------------------------------------------------------------------------------- /scannet/batch_load_scannet_data.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | """ Batch mode in loading Scannet scenes with vertices and ground truth labels 7 | for semantic and instance segmentations 8 | 9 | Usage example: python ./batch_load_scannet_data.py 10 | """ 11 | import os 12 | import sys 13 | import datetime 14 | import numpy as np 15 | from load_scannet_data import export 16 | import pdb 17 | 18 | SCANNET_DIR = 'scans' 19 | TRAIN_SCAN_NAMES = [line.rstrip() for line in open('meta_data/scannet_train.txt')] 20 | LABEL_MAP_FILE = 'meta_data/scannetv2-labels.combined.tsv' 21 | DONOTCARE_CLASS_IDS = np.array([]) 22 | OBJ_CLASS_IDS = np.array([3,4,5,6,7,8,9,10,11,12,14,16,24,28,33,34,36,39]) 23 | MAX_NUM_POINT = 50000 24 | OUTPUT_FOLDER = './scannet_train_detection_data' 25 | 26 | def export_one_scan(scan_name, output_filename_prefix): 27 | mesh_file = os.path.join(SCANNET_DIR, scan_name, scan_name + '_vh_clean_2.ply') 28 | agg_file = os.path.join(SCANNET_DIR, scan_name, scan_name + '.aggregation.json') 29 | seg_file = os.path.join(SCANNET_DIR, scan_name, scan_name + '_vh_clean_2.0.010000.segs.json') 30 | meta_file = os.path.join(SCANNET_DIR, scan_name, scan_name + '.txt') # includes axisAlignment info for the train set scans. 31 | mesh_vertices, semantic_labels, instance_labels, instance_bboxes, instance2semantic = \ 32 | export(mesh_file, agg_file, seg_file, meta_file, LABEL_MAP_FILE, None) 33 | 34 | mask = np.logical_not(np.in1d(semantic_labels, DONOTCARE_CLASS_IDS)) 35 | mesh_vertices = mesh_vertices[mask,:] 36 | semantic_labels = semantic_labels[mask] 37 | instance_labels = instance_labels[mask] 38 | 39 | num_instances = len(np.unique(instance_labels)) 40 | print('Num of instances: ', num_instances) 41 | 42 | bbox_mask = np.in1d(instance_bboxes[:,-1], OBJ_CLASS_IDS) 43 | instance_bboxes = instance_bboxes[bbox_mask,:] 44 | print('Num of care instances: ', instance_bboxes.shape[0]) 45 | 46 | N = mesh_vertices.shape[0] 47 | if N > MAX_NUM_POINT: 48 | choices = np.random.choice(N, MAX_NUM_POINT, replace=False) 49 | mesh_vertices = mesh_vertices[choices, :] 50 | semantic_labels = semantic_labels[choices] 51 | instance_labels = instance_labels[choices] 52 | 53 | np.save(output_filename_prefix+'_vert.npy', mesh_vertices) 54 | np.save(output_filename_prefix+'_sem_label.npy', semantic_labels) 55 | np.save(output_filename_prefix+'_ins_label.npy', instance_labels) 56 | np.save(output_filename_prefix+'_bbox.npy', instance_bboxes) 57 | 58 | def batch_export(): 59 | if not os.path.exists(OUTPUT_FOLDER): 60 | print('Creating new data folder: {}'.format(OUTPUT_FOLDER)) 61 | os.mkdir(OUTPUT_FOLDER) 62 | 63 | for scan_name in TRAIN_SCAN_NAMES: 64 | print('-'*20+'begin') 65 | print(datetime.datetime.now()) 66 | print(scan_name) 67 | output_filename_prefix = os.path.join(OUTPUT_FOLDER, scan_name) 68 | if os.path.isfile(output_filename_prefix+'_vert.npy'): 69 | print('File already exists. skipping.') 70 | print('-'*20+'done') 71 | continue 72 | try: 73 | export_one_scan(scan_name, output_filename_prefix) 74 | except: 75 | print('Failed export scan: %s'%(scan_name)) 76 | print('-'*20+'done') 77 | 78 | if __name__=='__main__': 79 | batch_export() 80 | -------------------------------------------------------------------------------- /scannet/data_viz.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import sys 7 | import os 8 | 9 | BASE_DIR = os.path.dirname(__file__) 10 | sys.path.append(BASE_DIR) 11 | 12 | import numpy as np 13 | import pc_util 14 | 15 | scene_name = 'scannet_train_detection_data/scene0002_00' 16 | output_folder = 'data_viz_dump' 17 | 18 | data = np.load(scene_name+'_vert.npy') 19 | scene_points = data[:,0:3] 20 | colors = data[:,3:] 21 | instance_labels = np.load(scene_name+'_ins_label.npy') 22 | semantic_labels = np.load(scene_name+'_sem_label.npy') 23 | instance_bboxes = np.load(scene_name+'_bbox.npy') 24 | 25 | print(np.unique(instance_labels)) 26 | print(np.unique(semantic_labels)) 27 | input() 28 | if not os.path.exists(output_folder): 29 | os.mkdir(output_folder) 30 | 31 | # Write scene as OBJ file for visualization 32 | pc_util.write_ply_rgb(scene_points, colors, os.path.join(output_folder, 'scene.obj')) 33 | pc_util.write_ply_color(scene_points, instance_labels, os.path.join(output_folder, 'scene_instance.obj')) 34 | pc_util.write_ply_color(scene_points, semantic_labels, os.path.join(output_folder, 'scene_semantic.obj')) 35 | 36 | from model_util_scannet import ScannetDatasetConfig 37 | DC = ScannetDatasetConfig() 38 | print(instance_bboxes.shape) 39 | -------------------------------------------------------------------------------- /scannet/load_scannet_data.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | """ Load Scannet scenes with vertices and ground truth labels 7 | for semantic and instance segmentations 8 | """ 9 | 10 | # python imports 11 | import math 12 | import os, sys, argparse 13 | import inspect 14 | import json 15 | import pdb 16 | 17 | try: 18 | import numpy as np 19 | except: 20 | print("Failed to import numpy package.") 21 | sys.exit(-1) 22 | 23 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) 24 | import scannet_utils 25 | 26 | def read_aggregation(filename): 27 | assert os.path.isfile(filename) 28 | object_id_to_segs = {} 29 | label_to_segs = {} 30 | with open(filename) as f: 31 | data = json.load(f) 32 | num_objects = len(data['segGroups']) 33 | for i in range(num_objects): 34 | object_id = data['segGroups'][i]['objectId'] + 1 # instance ids should be 1-indexed 35 | label = data['segGroups'][i]['label'] 36 | segs = data['segGroups'][i]['segments'] 37 | object_id_to_segs[object_id] = segs 38 | if label in label_to_segs: 39 | label_to_segs[label].extend(segs) 40 | else: 41 | label_to_segs[label] = segs 42 | return object_id_to_segs, label_to_segs 43 | 44 | 45 | def read_segmentation(filename): 46 | assert os.path.isfile(filename) 47 | seg_to_verts = {} 48 | with open(filename) as f: 49 | data = json.load(f) 50 | num_verts = len(data['segIndices']) 51 | for i in range(num_verts): 52 | seg_id = data['segIndices'][i] 53 | if seg_id in seg_to_verts: 54 | seg_to_verts[seg_id].append(i) 55 | else: 56 | seg_to_verts[seg_id] = [i] 57 | return seg_to_verts, num_verts 58 | 59 | 60 | def export(mesh_file, agg_file, seg_file, meta_file, label_map_file, output_file=None): 61 | """ points are XYZ RGB (RGB in 0-255), 62 | semantic label as nyu40 ids, 63 | instance label as 1-#instance, 64 | box as (cx,cy,cz,dx,dy,dz,semantic_label) 65 | """ 66 | label_map = scannet_utils.read_label_mapping(label_map_file, 67 | label_from='raw_category', label_to='nyu40id') 68 | mesh_vertices = scannet_utils.read_mesh_vertices_rgb(mesh_file) 69 | 70 | # Load scene axis alignment matrix 71 | lines = open(meta_file).readlines() 72 | for line in lines: 73 | if 'axisAlignment' in line: 74 | axis_align_matrix = [float(x) \ 75 | for x in line.rstrip().strip('axisAlignment = ').split(' ')] 76 | break 77 | axis_align_matrix = np.array(axis_align_matrix).reshape((4,4)) 78 | pts = np.ones((mesh_vertices.shape[0], 4)) 79 | pts[:,0:3] = mesh_vertices[:,0:3] 80 | pts = np.dot(pts, axis_align_matrix.transpose()) # Nx4 81 | mesh_vertices[:,0:3] = pts[:,0:3] 82 | 83 | # Load semantic and instance labels 84 | object_id_to_segs, label_to_segs = read_aggregation(agg_file) 85 | seg_to_verts, num_verts = read_segmentation(seg_file) 86 | label_ids = np.zeros(shape=(num_verts), dtype=np.uint32) # 0: unannotated 87 | object_id_to_label_id = {} 88 | for label, segs in label_to_segs.items(): 89 | label_id = label_map[label] 90 | for seg in segs: 91 | verts = seg_to_verts[seg] 92 | label_ids[verts] = label_id 93 | instance_ids = np.zeros(shape=(num_verts), dtype=np.uint32) # 0: unannotated 94 | num_instances = len(np.unique(list(object_id_to_segs.keys()))) 95 | for object_id, segs in object_id_to_segs.items(): 96 | for seg in segs: 97 | verts = seg_to_verts[seg] 98 | instance_ids[verts] = object_id 99 | if object_id not in object_id_to_label_id: 100 | object_id_to_label_id[object_id] = label_ids[verts][0] 101 | instance_bboxes = np.zeros((num_instances,7)) 102 | for obj_id in object_id_to_segs: 103 | label_id = object_id_to_label_id[obj_id] 104 | obj_pc = mesh_vertices[instance_ids==obj_id, 0:3] 105 | if len(obj_pc) == 0: continue 106 | # Compute axis aligned box 107 | # An axis aligned bounding box is parameterized by 108 | # (cx,cy,cz) and (dx,dy,dz) and label id 109 | # where (cx,cy,cz) is the center point of the box, 110 | # dx is the x-axis length of the box. 111 | xmin = np.min(obj_pc[:,0]) 112 | ymin = np.min(obj_pc[:,1]) 113 | zmin = np.min(obj_pc[:,2]) 114 | xmax = np.max(obj_pc[:,0]) 115 | ymax = np.max(obj_pc[:,1]) 116 | zmax = np.max(obj_pc[:,2]) 117 | bbox = np.array([(xmin+xmax)/2, (ymin+ymax)/2, (zmin+zmax)/2, 118 | xmax-xmin, ymax-ymin, zmax-zmin, label_id]) 119 | # NOTE: this assumes obj_id is in 1,2,3,.,,,.NUM_INSTANCES 120 | instance_bboxes[obj_id-1,:] = bbox 121 | 122 | if output_file is not None: 123 | np.save(output_file+'_vert.npy', mesh_vertices) 124 | np.save(output_file+'_sem_label.npy', label_ids) 125 | np.save(output_file+'_ins_label.npy', instance_ids) 126 | np.save(output_file+'_bbox.npy', instance_bboxes) 127 | 128 | return mesh_vertices, label_ids, instance_ids,\ 129 | instance_bboxes, object_id_to_label_id 130 | 131 | def main(): 132 | parser = argparse.ArgumentParser() 133 | parser.add_argument('--scan_path', required=True, help='path to scannet scene (e.g., data/ScanNet/v2/scene0000_00') 134 | parser.add_argument('--output_file', required=True, help='output file') 135 | parser.add_argument('--label_map_file', required=True, help='path to scannetv2-labels.combined.tsv') 136 | opt = parser.parse_args() 137 | 138 | scan_name = os.path.split(opt.scan_path)[-1] 139 | mesh_file = os.path.join(opt.scan_path, scan_name + '_vh_clean_2.ply') 140 | agg_file = os.path.join(opt.scan_path, scan_name + '.aggregation.json') 141 | seg_file = os.path.join(opt.scan_path, scan_name + '_vh_clean_2.0.010000.segs.json') 142 | meta_file = os.path.join(opt.scan_path, scan_name + '.txt') # includes axisAlignment info for the train set scans. 143 | export(mesh_file, agg_file, seg_file, meta_file, opt.label_map_file, opt.output_file) 144 | 145 | if __name__ == '__main__': 146 | main() 147 | -------------------------------------------------------------------------------- /scannet/meta_data/scannet_means.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AsahiLiu/PointDetectron/634ffadfbc50d2f0dea4434e11d5977640f34004/scannet/meta_data/scannet_means.npz -------------------------------------------------------------------------------- /scannet/meta_data/scannetv2_test.txt: -------------------------------------------------------------------------------- 1 | scene0707_00 2 | scene0708_00 3 | scene0709_00 4 | scene0710_00 5 | scene0711_00 6 | scene0712_00 7 | scene0713_00 8 | scene0714_00 9 | scene0715_00 10 | scene0716_00 11 | scene0717_00 12 | scene0718_00 13 | scene0719_00 14 | scene0720_00 15 | scene0721_00 16 | scene0722_00 17 | scene0723_00 18 | scene0724_00 19 | scene0725_00 20 | scene0726_00 21 | scene0727_00 22 | scene0728_00 23 | scene0729_00 24 | scene0730_00 25 | scene0731_00 26 | scene0732_00 27 | scene0733_00 28 | scene0734_00 29 | scene0735_00 30 | scene0736_00 31 | scene0737_00 32 | scene0738_00 33 | scene0739_00 34 | scene0740_00 35 | scene0741_00 36 | scene0742_00 37 | scene0743_00 38 | scene0744_00 39 | scene0745_00 40 | scene0746_00 41 | scene0747_00 42 | scene0748_00 43 | scene0749_00 44 | scene0750_00 45 | scene0751_00 46 | scene0752_00 47 | scene0753_00 48 | scene0754_00 49 | scene0755_00 50 | scene0756_00 51 | scene0757_00 52 | scene0758_00 53 | scene0759_00 54 | scene0760_00 55 | scene0761_00 56 | scene0762_00 57 | scene0763_00 58 | scene0764_00 59 | scene0765_00 60 | scene0766_00 61 | scene0767_00 62 | scene0768_00 63 | scene0769_00 64 | scene0770_00 65 | scene0771_00 66 | scene0772_00 67 | scene0773_00 68 | scene0774_00 69 | scene0775_00 70 | scene0776_00 71 | scene0777_00 72 | scene0778_00 73 | scene0779_00 74 | scene0780_00 75 | scene0781_00 76 | scene0782_00 77 | scene0783_00 78 | scene0784_00 79 | scene0785_00 80 | scene0786_00 81 | scene0787_00 82 | scene0788_00 83 | scene0789_00 84 | scene0790_00 85 | scene0791_00 86 | scene0792_00 87 | scene0793_00 88 | scene0794_00 89 | scene0795_00 90 | scene0796_00 91 | scene0797_00 92 | scene0798_00 93 | scene0799_00 94 | scene0800_00 95 | scene0801_00 96 | scene0802_00 97 | scene0803_00 98 | scene0804_00 99 | scene0805_00 100 | scene0806_00 101 | -------------------------------------------------------------------------------- /scannet/meta_data/scannetv2_val.txt: -------------------------------------------------------------------------------- 1 | scene0568_00 2 | scene0568_01 3 | scene0568_02 4 | scene0304_00 5 | scene0488_00 6 | scene0488_01 7 | scene0412_00 8 | scene0412_01 9 | scene0217_00 10 | scene0019_00 11 | scene0019_01 12 | scene0414_00 13 | scene0575_00 14 | scene0575_01 15 | scene0575_02 16 | scene0426_00 17 | scene0426_01 18 | scene0426_02 19 | scene0426_03 20 | scene0549_00 21 | scene0549_01 22 | scene0578_00 23 | scene0578_01 24 | scene0578_02 25 | scene0665_00 26 | scene0665_01 27 | scene0050_00 28 | scene0050_01 29 | scene0050_02 30 | scene0257_00 31 | scene0025_00 32 | scene0025_01 33 | scene0025_02 34 | scene0583_00 35 | scene0583_01 36 | scene0583_02 37 | scene0701_00 38 | scene0701_01 39 | scene0701_02 40 | scene0580_00 41 | scene0580_01 42 | scene0565_00 43 | scene0169_00 44 | scene0169_01 45 | scene0655_00 46 | scene0655_01 47 | scene0655_02 48 | scene0063_00 49 | scene0221_00 50 | scene0221_01 51 | scene0591_00 52 | scene0591_01 53 | scene0591_02 54 | scene0678_00 55 | scene0678_01 56 | scene0678_02 57 | scene0462_00 58 | scene0427_00 59 | scene0595_00 60 | scene0193_00 61 | scene0193_01 62 | scene0164_00 63 | scene0164_01 64 | scene0164_02 65 | scene0164_03 66 | scene0598_00 67 | scene0598_01 68 | scene0598_02 69 | scene0599_00 70 | scene0599_01 71 | scene0599_02 72 | scene0328_00 73 | scene0300_00 74 | scene0300_01 75 | scene0354_00 76 | scene0458_00 77 | scene0458_01 78 | scene0423_00 79 | scene0423_01 80 | scene0423_02 81 | scene0307_00 82 | scene0307_01 83 | scene0307_02 84 | scene0606_00 85 | scene0606_01 86 | scene0606_02 87 | scene0432_00 88 | scene0432_01 89 | scene0608_00 90 | scene0608_01 91 | scene0608_02 92 | scene0651_00 93 | scene0651_01 94 | scene0651_02 95 | scene0430_00 96 | scene0430_01 97 | scene0689_00 98 | scene0357_00 99 | scene0357_01 100 | scene0574_00 101 | scene0574_01 102 | scene0574_02 103 | scene0329_00 104 | scene0329_01 105 | scene0329_02 106 | scene0153_00 107 | scene0153_01 108 | scene0616_00 109 | scene0616_01 110 | scene0671_00 111 | scene0671_01 112 | scene0618_00 113 | scene0382_00 114 | scene0382_01 115 | scene0490_00 116 | scene0621_00 117 | scene0607_00 118 | scene0607_01 119 | scene0149_00 120 | scene0695_00 121 | scene0695_01 122 | scene0695_02 123 | scene0695_03 124 | scene0389_00 125 | scene0377_00 126 | scene0377_01 127 | scene0377_02 128 | scene0342_00 129 | scene0139_00 130 | scene0629_00 131 | scene0629_01 132 | scene0629_02 133 | scene0496_00 134 | scene0633_00 135 | scene0633_01 136 | scene0518_00 137 | scene0652_00 138 | scene0406_00 139 | scene0406_01 140 | scene0406_02 141 | scene0144_00 142 | scene0144_01 143 | scene0494_00 144 | scene0278_00 145 | scene0278_01 146 | scene0316_00 147 | scene0609_00 148 | scene0609_01 149 | scene0609_02 150 | scene0609_03 151 | scene0084_00 152 | scene0084_01 153 | scene0084_02 154 | scene0696_00 155 | scene0696_01 156 | scene0696_02 157 | scene0351_00 158 | scene0351_01 159 | scene0643_00 160 | scene0644_00 161 | scene0645_00 162 | scene0645_01 163 | scene0645_02 164 | scene0081_00 165 | scene0081_01 166 | scene0081_02 167 | scene0647_00 168 | scene0647_01 169 | scene0535_00 170 | scene0353_00 171 | scene0353_01 172 | scene0353_02 173 | scene0559_00 174 | scene0559_01 175 | scene0559_02 176 | scene0593_00 177 | scene0593_01 178 | scene0246_00 179 | scene0653_00 180 | scene0653_01 181 | scene0064_00 182 | scene0064_01 183 | scene0356_00 184 | scene0356_01 185 | scene0356_02 186 | scene0030_00 187 | scene0030_01 188 | scene0030_02 189 | scene0222_00 190 | scene0222_01 191 | scene0338_00 192 | scene0338_01 193 | scene0338_02 194 | scene0378_00 195 | scene0378_01 196 | scene0378_02 197 | scene0660_00 198 | scene0553_00 199 | scene0553_01 200 | scene0553_02 201 | scene0527_00 202 | scene0663_00 203 | scene0663_01 204 | scene0663_02 205 | scene0664_00 206 | scene0664_01 207 | scene0664_02 208 | scene0334_00 209 | scene0334_01 210 | scene0334_02 211 | scene0046_00 212 | scene0046_01 213 | scene0046_02 214 | scene0203_00 215 | scene0203_01 216 | scene0203_02 217 | scene0088_00 218 | scene0088_01 219 | scene0088_02 220 | scene0088_03 221 | scene0086_00 222 | scene0086_01 223 | scene0086_02 224 | scene0670_00 225 | scene0670_01 226 | scene0256_00 227 | scene0256_01 228 | scene0256_02 229 | scene0249_00 230 | scene0441_00 231 | scene0658_00 232 | scene0704_00 233 | scene0704_01 234 | scene0187_00 235 | scene0187_01 236 | scene0131_00 237 | scene0131_01 238 | scene0131_02 239 | scene0207_00 240 | scene0207_01 241 | scene0207_02 242 | scene0461_00 243 | scene0011_00 244 | scene0011_01 245 | scene0343_00 246 | scene0251_00 247 | scene0077_00 248 | scene0077_01 249 | scene0684_00 250 | scene0684_01 251 | scene0550_00 252 | scene0686_00 253 | scene0686_01 254 | scene0686_02 255 | scene0208_00 256 | scene0500_00 257 | scene0500_01 258 | scene0552_00 259 | scene0552_01 260 | scene0648_00 261 | scene0648_01 262 | scene0435_00 263 | scene0435_01 264 | scene0435_02 265 | scene0435_03 266 | scene0690_00 267 | scene0690_01 268 | scene0693_00 269 | scene0693_01 270 | scene0693_02 271 | scene0700_00 272 | scene0700_01 273 | scene0700_02 274 | scene0699_00 275 | scene0231_00 276 | scene0231_01 277 | scene0231_02 278 | scene0697_00 279 | scene0697_01 280 | scene0697_02 281 | scene0697_03 282 | scene0474_00 283 | scene0474_01 284 | scene0474_02 285 | scene0474_03 286 | scene0474_04 287 | scene0474_05 288 | scene0355_00 289 | scene0355_01 290 | scene0146_00 291 | scene0146_01 292 | scene0146_02 293 | scene0196_00 294 | scene0702_00 295 | scene0702_01 296 | scene0702_02 297 | scene0314_00 298 | scene0277_00 299 | scene0277_01 300 | scene0277_02 301 | scene0095_00 302 | scene0095_01 303 | scene0015_00 304 | scene0100_00 305 | scene0100_01 306 | scene0100_02 307 | scene0558_00 308 | scene0558_01 309 | scene0558_02 310 | scene0685_00 311 | scene0685_01 312 | scene0685_02 313 | -------------------------------------------------------------------------------- /scannet/model_util_scannet.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import numpy as np 7 | import sys 8 | import os 9 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 10 | sys.path.append(BASE_DIR) 11 | ROOT_DIR = os.path.dirname(BASE_DIR) 12 | sys.path.append(os.path.join(ROOT_DIR, 'utils')) 13 | from box_util import get_3d_box 14 | 15 | class ScannetDatasetConfig(object): 16 | def __init__(self): 17 | self.num_class = 18 18 | self.num_heading_bin = 1 19 | self.num_size_cluster = 18 20 | 21 | self.type2class = {'cabinet':0, 'bed':1, 'chair':2, 'sofa':3, 'table':4, 'door':5, 22 | 'window':6,'bookshelf':7,'picture':8, 'counter':9, 'desk':10, 'curtain':11, 23 | 'refrigerator':12, 'showercurtrain':13, 'toilet':14, 'sink':15, 'bathtub':16, 'garbagebin':17} 24 | self.class2type = {self.type2class[t]:t for t in self.type2class} 25 | self.nyu40ids = np.array([3,4,5,6,7,8,9,10,11,12,14,16,24,28,33,34,36,39]) 26 | self.nyu40id2class = {nyu40id: i for i,nyu40id in enumerate(list(self.nyu40ids))} 27 | self.mean_size_arr = np.load(os.path.join(ROOT_DIR,'scannet/meta_data/scannet_means.npz'))['arr_0'] 28 | self.type_mean_size = {} 29 | for i in range(self.num_size_cluster): 30 | self.type_mean_size[self.class2type[i]] = self.mean_size_arr[i,:] 31 | 32 | def angle2class(self, angle): 33 | ''' Convert continuous angle to discrete class 34 | [optinal] also small regression number from 35 | class center angle to current angle. 36 | 37 | angle is from 0-2pi (or -pi~pi), class center at 0, 1*(2pi/N), 2*(2pi/N) ... (N-1)*(2pi/N) 38 | return is class of int32 of 0,1,...,N-1 and a number such that 39 | class*(2pi/N) + number = angle 40 | 41 | NOT USED. 42 | ''' 43 | assert(False) 44 | 45 | def class2angle(self, pred_cls, residual, to_label_format=True): 46 | ''' Inverse function to angle2class. 47 | 48 | As ScanNet only has axis-alined boxes so angles are always 0. ''' 49 | return 0 50 | 51 | def size2class(self, size, type_name): 52 | ''' Convert 3D box size (l,w,h) to size class and size residual ''' 53 | size_class = self.type2class[type_name] 54 | size_residual = size - self.type_mean_size[type_name] 55 | return size_class, size_residual 56 | 57 | def class2size(self, pred_cls, residual): 58 | ''' Inverse function to size2class ''' 59 | return self.mean_size_arr[pred_cls, :] + residual 60 | 61 | def param2obb(self, center, heading_class, heading_residual, size_class, size_residual): 62 | heading_angle = self.class2angle(heading_class, heading_residual) 63 | box_size = self.class2size(int(size_class), size_residual) 64 | obb = np.zeros((7,)) 65 | obb[0:3] = center 66 | obb[3:6] = box_size 67 | obb[6] = heading_angle*-1 68 | return obb 69 | 70 | def rotate_aligned_boxes(input_boxes, rot_mat): 71 | centers, lengths = input_boxes[:,0:3], input_boxes[:,3:6] 72 | new_centers = np.dot(centers, np.transpose(rot_mat)) 73 | 74 | dx, dy = lengths[:,0]/2.0, lengths[:,1]/2.0 75 | new_x = np.zeros((dx.shape[0], 4)) 76 | new_y = np.zeros((dx.shape[0], 4)) 77 | 78 | for i, crnr in enumerate([(-1,-1), (1, -1), (1, 1), (-1, 1)]): 79 | crnrs = np.zeros((dx.shape[0], 3)) 80 | crnrs[:,0] = crnr[0]*dx 81 | crnrs[:,1] = crnr[1]*dy 82 | crnrs = np.dot(crnrs, np.transpose(rot_mat)) 83 | new_x[:,i] = crnrs[:,0] 84 | new_y[:,i] = crnrs[:,1] 85 | 86 | 87 | new_dx = 2.0*np.max(new_x, 1) 88 | new_dy = 2.0*np.max(new_y, 1) 89 | new_lengths = np.stack((new_dx, new_dy, lengths[:,2]), axis=1) 90 | 91 | return np.concatenate([new_centers, new_lengths], axis=1) 92 | -------------------------------------------------------------------------------- /scannet/scannet_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | ''' Ref: https://github.com/ScanNet/ScanNet/blob/master/BenchmarkScripts ''' 7 | import os 8 | import sys 9 | import json 10 | import csv 11 | 12 | try: 13 | import numpy as np 14 | except: 15 | print("Failed to import numpy package.") 16 | sys.exit(-1) 17 | 18 | try: 19 | from plyfile import PlyData, PlyElement 20 | except: 21 | print("Please install the module 'plyfile' for PLY i/o, e.g.") 22 | print("pip install plyfile") 23 | sys.exit(-1) 24 | 25 | def represents_int(s): 26 | ''' if string s represents an int. ''' 27 | try: 28 | int(s) 29 | return True 30 | except ValueError: 31 | return False 32 | 33 | 34 | def read_label_mapping(filename, label_from='raw_category', label_to='nyu40id'): 35 | assert os.path.isfile(filename) 36 | mapping = dict() 37 | with open(filename) as csvfile: 38 | reader = csv.DictReader(csvfile, delimiter='\t') 39 | for row in reader: 40 | mapping[row[label_from]] = int(row[label_to]) 41 | if represents_int(list(mapping.keys())[0]): 42 | mapping = {int(k):v for k,v in mapping.items()} 43 | return mapping 44 | 45 | def read_mesh_vertices(filename): 46 | """ read XYZ for each vertex. 47 | """ 48 | assert os.path.isfile(filename) 49 | with open(filename, 'rb') as f: 50 | plydata = PlyData.read(f) 51 | num_verts = plydata['vertex'].count 52 | vertices = np.zeros(shape=[num_verts, 3], dtype=np.float32) 53 | vertices[:,0] = plydata['vertex'].data['x'] 54 | vertices[:,1] = plydata['vertex'].data['y'] 55 | vertices[:,2] = plydata['vertex'].data['z'] 56 | return vertices 57 | 58 | def read_mesh_vertices_rgb(filename): 59 | """ read XYZ RGB for each vertex. 60 | Note: RGB values are in 0-255 61 | """ 62 | assert os.path.isfile(filename) 63 | with open(filename, 'rb') as f: 64 | plydata = PlyData.read(f) 65 | num_verts = plydata['vertex'].count 66 | vertices = np.zeros(shape=[num_verts, 6], dtype=np.float32) 67 | vertices[:,0] = plydata['vertex'].data['x'] 68 | vertices[:,1] = plydata['vertex'].data['y'] 69 | vertices[:,2] = plydata['vertex'].data['z'] 70 | vertices[:,3] = plydata['vertex'].data['red'] 71 | vertices[:,4] = plydata['vertex'].data['green'] 72 | vertices[:,5] = plydata['vertex'].data['blue'] 73 | return vertices 74 | 75 | 76 | -------------------------------------------------------------------------------- /sunrgbd/README.md: -------------------------------------------------------------------------------- 1 | ### Prepare SUN RGB-D Data 2 | 3 | 1. Download SUNRGBD v2 data [HERE](http://rgbd.cs.princeton.edu/data/) (SUNRGBD.zip, SUNRGBDMeta2DBB_v2.mat, SUNRGBDMeta3DBB_v2.mat) and the toolkits (SUNRGBDtoolbox.zip). Move all the downloaded files under OFFICIAL_SUNRGBD. Unzip the zip files. 4 | 5 | 2. Extract point clouds and annotations (class, v2 2D -- xmin,ymin,xmax,ymax, and 3D bounding boxes -- centroids, size, 2D heading) by running `extract_split.m`, `extract_rgbd_data_v2.m` and `extract_rgbd_data_v1.m` under the `matlab` folder. 6 | 7 | 3. Prepare data by running `python sunrgbd_data.py --gen_v1_data` 8 | 9 | You can also examine and visualize the data with `python sunrgbd_data.py --viz` and use MeshLab to view the generated PLY files at `data_viz_dump`. 10 | 11 | NOTE: SUNRGBDtoolbox.zip should have MD5 hash `18d22e1761d36352f37232cba102f91f` (you can check the hash with `md5 SUNRGBDtoolbox.zip` on Mac OS or `md5sum SUNRGBDtoolbox.zip` on Linux) 12 | -------------------------------------------------------------------------------- /sunrgbd/matlab/extract_rgbd_data_v1.m: -------------------------------------------------------------------------------- 1 | % Copyright (c) Facebook, Inc. and its affiliates. 2 | % 3 | % This source code is licensed under the MIT license found in the 4 | % LICENSE file in the root directory of this source tree. 5 | 6 | %% Dump SUNRGBD data to our format 7 | % for each sample, we have RGB image, 2d boxes. 8 | % point cloud (in camera coordinate), calibration and 3d boxes. 9 | % 10 | % Extract using V1 labels. 11 | % 12 | % Author: Charles R. Qi 13 | % 14 | clear; close all; clc; 15 | addpath(genpath('.')) 16 | addpath('../OFFICIAL_SUNRGBD/SUNRGBDtoolbox') 17 | %% V1 2D&3D BB and Seg masks 18 | load('../OFFICIAL_SUNRGBD/SUNRGBDtoolbox/Metadata/SUNRGBDMeta.mat') 19 | % load('./Metadata/SUNRGBD2Dseg.mat') 20 | 21 | %% Create folders 22 | det_label_folder = '../sunrgbd_trainval/label_v1/'; 23 | mkdir(det_label_folder); 24 | %% Read 25 | for imageId = 1:10335 26 | imageId 27 | try 28 | data = SUNRGBDMeta(imageId); 29 | data.depthpath(1:16) = ''; 30 | data.depthpath = strcat('../OFFICIAL_SUNRGBD/SUNRGBD', data.depthpath); 31 | data.rgbpath(1:16) = ''; 32 | data.rgbpath = strcat('../OFFICIAL_SUNRGBD/SUNRGBD', data.rgbpath); 33 | 34 | % MAT files are 3x smaller than TXT files. In Python we can use 35 | % scipy.io.loadmat('xxx.mat')['points3d_rgb'] to load the data. 36 | mat_filename = strcat(num2str(imageId,'%06d'), '.mat'); 37 | txt_filename = strcat(num2str(imageId,'%06d'), '.txt'); 38 | 39 | % Write 2D and 3D box label 40 | data2d = data; 41 | fid = fopen(strcat(det_label_folder, txt_filename), 'w'); 42 | for j = 1:length(data.groundtruth3DBB) 43 | centroid = data.groundtruth3DBB(j).centroid; 44 | classname = data.groundtruth3DBB(j).classname; 45 | orientation = data.groundtruth3DBB(j).orientation; 46 | coeffs = abs(data.groundtruth3DBB(j).coeffs); 47 | box2d = data2d.groundtruth2DBB(j).gtBb2D; 48 | fprintf(fid, '%s %d %d %d %d %f %f %f %f %f %f %f %f\n', classname, box2d(1), box2d(2), box2d(3), box2d(4), centroid(1), centroid(2), centroid(3), coeffs(1), coeffs(2), coeffs(3), orientation(1), orientation(2)); 49 | end 50 | fclose(fid); 51 | 52 | catch 53 | end 54 | 55 | end 56 | -------------------------------------------------------------------------------- /sunrgbd/matlab/extract_rgbd_data_v2.m: -------------------------------------------------------------------------------- 1 | % Copyright (c) Facebook, Inc. and its affiliates. 2 | % 3 | % This source code is licensed under the MIT license found in the 4 | % LICENSE file in the root directory of this source tree. 5 | 6 | %% Dump SUNRGBD data to our format 7 | % for each sample, we have RGB image, 2d boxes. 8 | % point cloud (in camera coordinate), calibration and 3d boxes. 9 | % 10 | % Compared to extract_rgbd_data.m in frustum_pointents, use v2 2D and 3D 11 | % bboxes. 12 | % 13 | % Author: Charles R. Qi 14 | % 15 | clear; close all; clc; 16 | addpath(genpath('.')) 17 | addpath('../OFFICIAL_SUNRGBD/SUNRGBDtoolbox/readData') 18 | %% V1 2D&3D BB and Seg masks 19 | % load('./Metadata/SUNRGBDMeta.mat') 20 | % load('./Metadata/SUNRGBD2Dseg.mat') 21 | 22 | %% V2 3DBB annotations (overwrites SUNRGBDMeta) 23 | load('../OFFICIAL_SUNRGBD/SUNRGBDMeta3DBB_v2.mat'); 24 | load('../OFFICIAL_SUNRGBD/SUNRGBDMeta2DBB_v2.mat'); 25 | %% Create folders 26 | depth_folder = '../sunrgbd_trainval/depth/'; 27 | image_folder = '../sunrgbd_trainval/image/'; 28 | calib_folder = '../sunrgbd_trainval/calib/'; 29 | det_label_folder = '../sunrgbd_trainval/label/'; 30 | seg_label_folder = '../sunrgbd_trainval/seg_label/'; 31 | mkdir(depth_folder); 32 | mkdir(image_folder); 33 | mkdir(calib_folder); 34 | mkdir(det_label_folder); 35 | mkdir(seg_label_folder); 36 | %% Read 37 | parfor imageId = 1:10335 38 | imageId 39 | try 40 | data = SUNRGBDMeta(imageId); 41 | data.depthpath(1:16) = ''; 42 | data.depthpath = strcat('../OFFICIAL_SUNRGBD', data.depthpath); 43 | data.rgbpath(1:16) = ''; 44 | data.rgbpath = strcat('../OFFICIAL_SUNRGBD', data.rgbpath); 45 | 46 | % Write point cloud in depth map 47 | [rgb,points3d,depthInpaint,imsize]=read3dPoints(data); 48 | rgb(isnan(points3d(:,1)),:) = []; 49 | points3d(isnan(points3d(:,1)),:) = []; 50 | points3d_rgb = [points3d, rgb]; 51 | 52 | % MAT files are 3x smaller than TXT files. In Python we can use 53 | % scipy.io.loadmat('xxx.mat')['points3d_rgb'] to load the data. 54 | mat_filename = strcat(num2str(imageId,'%06d'), '.mat'); 55 | txt_filename = strcat(num2str(imageId,'%06d'), '.txt'); 56 | parsave(strcat(depth_folder, mat_filename), points3d_rgb); 57 | 58 | % Write images 59 | copyfile(data.rgbpath, sprintf('%s/%06d.jpg', image_folder, imageId)); 60 | 61 | % Write calibration 62 | dlmwrite(strcat(calib_folder, txt_filename), data.Rtilt(:)', 'delimiter', ' '); 63 | dlmwrite(strcat(calib_folder, txt_filename), data.K(:)', 'delimiter', ' ', '-append'); 64 | 65 | % Write 2D and 3D box label 66 | data2d = SUNRGBDMeta2DBB(imageId); 67 | fid = fopen(strcat(det_label_folder, txt_filename), 'w'); 68 | for j = 1:length(data.groundtruth3DBB) 69 | centroid = data.groundtruth3DBB(j).centroid; 70 | classname = data.groundtruth3DBB(j).classname; 71 | orientation = data.groundtruth3DBB(j).orientation; 72 | coeffs = abs(data.groundtruth3DBB(j).coeffs); 73 | box2d = data2d.groundtruth2DBB(j).gtBb2D; 74 | assert(strcmp(data2d.groundtruth2DBB(j).classname, classname)); 75 | fprintf(fid, '%s %d %d %d %d %f %f %f %f %f %f %f %f\n', classname, box2d(1), box2d(2), box2d(3), box2d(4), centroid(1), centroid(2), centroid(3), coeffs(1), coeffs(2), coeffs(3), orientation(1), orientation(2)); 76 | end 77 | fclose(fid); 78 | 79 | catch 80 | end 81 | 82 | end 83 | 84 | function parsave(filename, instance) 85 | save(filename, 'instance'); 86 | end 87 | -------------------------------------------------------------------------------- /sunrgbd/matlab/extract_split.m: -------------------------------------------------------------------------------- 1 | % Copyright (c) Facebook, Inc. and its affiliates. 2 | % 3 | % This source code is licensed under the MIT license found in the 4 | % LICENSE file in the root directory of this source tree. 5 | 6 | %% Dump train/val split. 7 | % Author: Charles R. Qi 8 | 9 | addpath('../OFFICIAL_SUNRGBD/SUNRGBDtoolbox') 10 | 11 | %% Construct Hash Map 12 | hash_train = java.util.Hashtable; 13 | hash_val = java.util.Hashtable; 14 | 15 | split = load('../OFFICIAL_SUNRGBD/SUNRGBDtoolbox/traintestSUNRGBD/allsplit.mat'); 16 | 17 | N_train = length(split.alltrain); 18 | N_val = length(split.alltest); 19 | 20 | for i = 1:N_train 21 | folder_path = split.alltrain{i}; 22 | folder_path(1:16) = ''; 23 | hash_train.put(folder_path,0); 24 | end 25 | for i = 1:N_val 26 | folder_path = split.alltest{i}; 27 | folder_path(1:16) = ''; 28 | hash_val.put(folder_path,0); 29 | end 30 | 31 | %% Map data to train or val set. 32 | load('../OFFICIAL_SUNRGBD/SUNRGBDMeta3DBB_v2.mat'); 33 | 34 | fid_train = fopen('../sunrgbd_trainval/train_data_idx.txt', 'w'); 35 | fid_val = fopen('../sunrgbd_trainval/val_data_idx.txt', 'w'); 36 | 37 | for imageId = 1:10335 38 | data = SUNRGBDMeta(imageId); 39 | depthpath = data.depthpath; 40 | depthpath(1:16) = ''; 41 | [filepath,name,ext] = fileparts(depthpath); 42 | [filepath,name,ext] = fileparts(filepath); 43 | if hash_train.containsKey(filepath) 44 | fprintf(fid_train, '%d\n', imageId); 45 | elseif hash_val.containsKey(filepath) 46 | fprintf(fid_val, '%d\n', imageId); 47 | else 48 | a = 1; 49 | end 50 | end 51 | fclose(fid_train); 52 | fclose(fid_val); 53 | -------------------------------------------------------------------------------- /sunrgbd/model_util_sunrgbd.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import numpy as np 7 | import sys 8 | import os 9 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 10 | sys.path.append(BASE_DIR) 11 | ROOT_DIR = os.path.dirname(BASE_DIR) 12 | sys.path.append(os.path.join(ROOT_DIR, 'utils')) 13 | 14 | class SunrgbdDatasetConfig(object): 15 | def __init__(self): 16 | self.num_class = 10 17 | self.num_heading_bin = 12 18 | self.num_size_cluster = 10 19 | 20 | self.type2class={'bed':0, 'table':1, 'sofa':2, 'chair':3, 'toilet':4, 'desk':5, 'dresser':6, 'night_stand':7, 'bookshelf':8, 'bathtub':9} 21 | self.class2type = {self.type2class[t]:t for t in self.type2class} 22 | self.type2onehotclass={'bed':0, 'table':1, 'sofa':2, 'chair':3, 'toilet':4, 'desk':5, 'dresser':6, 'night_stand':7, 'bookshelf':8, 'bathtub':9} 23 | self.type_mean_size = {'bathtub': np.array([0.765840,1.398258,0.472728]), 24 | 'bed': np.array([2.114256,1.620300,0.927272]), 25 | 'bookshelf': np.array([0.404671,1.071108,1.688889]), 26 | 'chair': np.array([0.591958,0.552978,0.827272]), 27 | 'desk': np.array([0.695190,1.346299,0.736364]), 28 | 'dresser': np.array([0.528526,1.002642,1.172878]), 29 | 'night_stand': np.array([0.500618,0.632163,0.683424]), 30 | 'sofa': np.array([0.923508,1.867419,0.845495]), 31 | 'table': np.array([0.791118,1.279516,0.718182]), 32 | 'toilet': np.array([0.699104,0.454178,0.756250])} 33 | 34 | self.mean_size_arr = np.zeros((self.num_size_cluster, 3)) 35 | for i in range(self.num_size_cluster): 36 | self.mean_size_arr[i,:] = self.type_mean_size[self.class2type[i]] 37 | 38 | def size2class(self, size, type_name): 39 | ''' Convert 3D box size (l,w,h) to size class and size residual ''' 40 | size_class = self.type2class[type_name] 41 | size_residual = size - self.type_mean_size[type_name] 42 | return size_class, size_residual 43 | 44 | def class2size(self, pred_cls, residual): 45 | ''' Inverse function to size2class ''' 46 | mean_size = self.type_mean_size[self.class2type[pred_cls]] 47 | return mean_size + residual 48 | 49 | def angle2class(self, angle): 50 | ''' Convert continuous angle to discrete class 51 | [optinal] also small regression number from 52 | class center angle to current angle. 53 | 54 | angle is from 0-2pi (or -pi~pi), class center at 0, 1*(2pi/N), 2*(2pi/N) ... (N-1)*(2pi/N) 55 | return is class of int32 of 0,1,...,N-1 and a number such that 56 | class*(2pi/N) + number = angle 57 | ''' 58 | num_class = self.num_heading_bin 59 | angle = angle%(2*np.pi) 60 | assert(angle>=0 and angle<=2*np.pi) 61 | angle_per_class = 2*np.pi/float(num_class) 62 | shifted_angle = (angle+angle_per_class/2)%(2*np.pi) 63 | class_id = int(shifted_angle/angle_per_class) 64 | residual_angle = shifted_angle - (class_id*angle_per_class+angle_per_class/2) 65 | return class_id, residual_angle 66 | 67 | def class2angle(self, pred_cls, residual, to_label_format=True): 68 | ''' Inverse function to angle2class ''' 69 | num_class = self.num_heading_bin 70 | angle_per_class = 2*np.pi/float(num_class) 71 | angle_center = pred_cls * angle_per_class 72 | angle = angle_center + residual 73 | if to_label_format and angle>np.pi: 74 | angle = angle - 2*np.pi 75 | return angle 76 | 77 | def param2obb(self, center, heading_class, heading_residual, size_class, size_residual): 78 | heading_angle = self.class2angle(heading_class, heading_residual) 79 | box_size = self.class2size(int(size_class), size_residual) 80 | obb = np.zeros((7,)) 81 | obb[0:3] = center 82 | obb[3:6] = box_size 83 | obb[6] = heading_angle*-1 84 | return obb 85 | 86 | 87 | -------------------------------------------------------------------------------- /train.bash: -------------------------------------------------------------------------------- 1 | Training 2 | ##### 3 | #SUN-RGBD 4 | CUDA_VISIBLE_DEVICES=0 python train.py --dataset sunrgbd --log_dir log_sunrgbd 5 | 6 | 7 | python eval.py --dataset sunrgbd --checkpoint_path log_sunrgbd/checkpoint.tar --dump_dir eval_sunrgbd --cluster_sampling seed_fps --use_3d_nms --use_cls_nms --per_class_proposal 8 | 9 | # ScanNet 10 | CUDA_VISIBLE_DEVICES=0 python train.py --dataset scannet --log_dir log_scannet --num_point 40000 11 | 12 | python eval.py --dataset scannet --checkpoint_path log_scannet/checkpoint.tar --dump_dir eval_scannet --num_point 40000 --cluster_sampling seed_fps --use_3d_nms --use_cls_nms --per_class_proposal 13 | 14 | -------------------------------------------------------------------------------- /utils/eval_det.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | """ Generic Code for Object Detection Evaluation 7 | 8 | Input: 9 | For each class: 10 | For each image: 11 | Predictions: box, score 12 | Groundtruths: box 13 | 14 | Output: 15 | For each class: 16 | precision-recal and average precision 17 | 18 | Author: Charles R. Qi 19 | 20 | Ref: https://raw.githubusercontent.com/rbgirshick/py-faster-rcnn/master/lib/datasets/voc_eval.py 21 | """ 22 | import numpy as np 23 | 24 | def voc_ap(rec, prec, use_07_metric=False): 25 | """ ap = voc_ap(rec, prec, [use_07_metric]) 26 | Compute VOC AP given precision and recall. 27 | If use_07_metric is true, uses the 28 | VOC 07 11 point method (default:False). 29 | """ 30 | if use_07_metric: 31 | # 11 point metric 32 | ap = 0. 33 | for t in np.arange(0., 1.1, 0.1): 34 | if np.sum(rec >= t) == 0: 35 | p = 0 36 | else: 37 | p = np.max(prec[rec >= t]) 38 | ap = ap + p / 11. 39 | else: 40 | # correct AP calculation 41 | # first append sentinel values at the end 42 | mrec = np.concatenate(([0.], rec, [1.])) 43 | mpre = np.concatenate(([0.], prec, [0.])) 44 | 45 | # compute the precision envelope 46 | for i in range(mpre.size - 1, 0, -1): 47 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) 48 | 49 | # to calculate area under PR curve, look for points 50 | # where X axis (recall) changes value 51 | i = np.where(mrec[1:] != mrec[:-1])[0] 52 | 53 | # and sum (\Delta recall) * prec 54 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) 55 | return ap 56 | 57 | import os 58 | import sys 59 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 60 | from metric_util import calc_iou # axis-aligned 3D box IoU 61 | def get_iou(bb1, bb2): 62 | """ Compute IoU of two bounding boxes. 63 | ** Define your bod IoU function HERE ** 64 | """ 65 | #pass 66 | iou3d = calc_iou(bb1, bb2) 67 | return iou3d 68 | 69 | from box_util import box3d_iou 70 | def get_iou_obb(bb1,bb2): 71 | iou3d, iou2d = box3d_iou(bb1,bb2) 72 | return iou3d 73 | 74 | def get_iou_main(get_iou_func, args): 75 | return get_iou_func(*args) 76 | 77 | def eval_det_cls(pred, gt, ovthresh=0.25, use_07_metric=False, get_iou_func=get_iou): 78 | """ Generic functions to compute precision/recall for object detection 79 | for a single class. 80 | Input: 81 | pred: map of {img_id: [(bbox, score)]} where bbox is numpy array 82 | gt: map of {img_id: [bbox]} 83 | ovthresh: scalar, iou threshold 84 | use_07_metric: bool, if True use VOC07 11 point method 85 | Output: 86 | rec: numpy array of length nd 87 | prec: numpy array of length nd 88 | ap: scalar, average precision 89 | """ 90 | 91 | # construct gt objects 92 | class_recs = {} # {img_id: {'bbox': bbox list, 'det': matched list}} 93 | npos = 0 94 | for img_id in gt.keys(): 95 | bbox = np.array(gt[img_id]) 96 | det = [False] * len(bbox) 97 | npos += len(bbox) 98 | class_recs[img_id] = {'bbox': bbox, 'det': det} 99 | # pad empty list to all other imgids 100 | for img_id in pred.keys(): 101 | if img_id not in gt: 102 | class_recs[img_id] = {'bbox': np.array([]), 'det': []} 103 | 104 | # construct dets 105 | image_ids = [] 106 | confidence = [] 107 | BB = [] 108 | for img_id in pred.keys(): 109 | for box,score in pred[img_id]: 110 | image_ids.append(img_id) 111 | confidence.append(score) 112 | BB.append(box) 113 | confidence = np.array(confidence) 114 | BB = np.array(BB) # (nd,4 or 8,3 or 6) 115 | 116 | # sort by confidence 117 | sorted_ind = np.argsort(-confidence) 118 | sorted_scores = np.sort(-confidence) 119 | BB = BB[sorted_ind, ...] 120 | image_ids = [image_ids[x] for x in sorted_ind] 121 | 122 | # go down dets and mark TPs and FPs 123 | nd = len(image_ids) 124 | tp = np.zeros(nd) 125 | fp = np.zeros(nd) 126 | for d in range(nd): 127 | #if d%100==0: print(d) 128 | R = class_recs[image_ids[d]] 129 | bb = BB[d,...].astype(float) 130 | ovmax = -np.inf 131 | BBGT = R['bbox'].astype(float) 132 | 133 | if BBGT.size > 0: 134 | # compute overlaps 135 | for j in range(BBGT.shape[0]): 136 | iou = get_iou_main(get_iou_func, (bb, BBGT[j,...])) 137 | if iou > ovmax: 138 | ovmax = iou 139 | jmax = j 140 | 141 | #print d, ovmax 142 | if ovmax > ovthresh: 143 | if not R['det'][jmax]: 144 | tp[d] = 1. 145 | R['det'][jmax] = 1 146 | else: 147 | fp[d] = 1. 148 | else: 149 | fp[d] = 1. 150 | 151 | # compute precision recall 152 | fp = np.cumsum(fp) 153 | tp = np.cumsum(tp) 154 | rec = tp / float(npos) 155 | #print('NPOS: ', npos) 156 | # avoid divide by zero in case the first detection matches a difficult 157 | # ground truth 158 | prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) 159 | ap = voc_ap(rec, prec, use_07_metric) 160 | 161 | return rec, prec, ap 162 | 163 | def eval_det_cls_wrapper(arguments): 164 | pred, gt, ovthresh, use_07_metric, get_iou_func = arguments 165 | rec, prec, ap = eval_det_cls(pred, gt, ovthresh, use_07_metric, get_iou_func) 166 | return (rec, prec, ap) 167 | 168 | def eval_det(pred_all, gt_all, ovthresh=0.25, use_07_metric=False, get_iou_func=get_iou): 169 | """ Generic functions to compute precision/recall for object detection 170 | for multiple classes. 171 | Input: 172 | pred_all: map of {img_id: [(classname, bbox, score)]} 173 | gt_all: map of {img_id: [(classname, bbox)]} 174 | ovthresh: scalar, iou threshold 175 | use_07_metric: bool, if true use VOC07 11 point method 176 | Output: 177 | rec: {classname: rec} 178 | prec: {classname: prec_all} 179 | ap: {classname: scalar} 180 | """ 181 | pred = {} # map {classname: pred} 182 | gt = {} # map {classname: gt} 183 | for img_id in pred_all.keys(): 184 | for classname, bbox, score in pred_all[img_id]: 185 | if classname not in pred: pred[classname] = {} 186 | if img_id not in pred[classname]: 187 | pred[classname][img_id] = [] 188 | if classname not in gt: gt[classname] = {} 189 | if img_id not in gt[classname]: 190 | gt[classname][img_id] = [] 191 | pred[classname][img_id].append((bbox,score)) 192 | for img_id in gt_all.keys(): 193 | for classname, bbox in gt_all[img_id]: 194 | if classname not in gt: gt[classname] = {} 195 | if img_id not in gt[classname]: 196 | gt[classname][img_id] = [] 197 | gt[classname][img_id].append(bbox) 198 | 199 | rec = {} 200 | prec = {} 201 | ap = {} 202 | for classname in gt.keys(): 203 | print('Computing AP for class: ', classname) 204 | rec[classname], prec[classname], ap[classname] = eval_det_cls(pred[classname], gt[classname], ovthresh, use_07_metric, get_iou_func) 205 | print(classname, ap[classname]) 206 | 207 | return rec, prec, ap 208 | 209 | from multiprocessing import Pool 210 | def eval_det_multiprocessing(pred_all, gt_all, ovthresh=0.25, use_07_metric=False, get_iou_func=get_iou): 211 | """ Generic functions to compute precision/recall for object detection 212 | for multiple classes. 213 | Input: 214 | pred_all: map of {img_id: [(classname, bbox, score)]} 215 | gt_all: map of {img_id: [(classname, bbox)]} 216 | ovthresh: scalar, iou threshold 217 | use_07_metric: bool, if true use VOC07 11 point method 218 | Output: 219 | rec: {classname: rec} 220 | prec: {classname: prec_all} 221 | ap: {classname: scalar} 222 | """ 223 | pred = {} # map {classname: pred} 224 | gt = {} # map {classname: gt} 225 | for img_id in pred_all.keys(): 226 | for classname, bbox, score in pred_all[img_id]: 227 | if classname not in pred: pred[classname] = {} 228 | if img_id not in pred[classname]: 229 | pred[classname][img_id] = [] 230 | if classname not in gt: gt[classname] = {} 231 | if img_id not in gt[classname]: 232 | gt[classname][img_id] = [] 233 | pred[classname][img_id].append((bbox,score)) 234 | for img_id in gt_all.keys(): 235 | for classname, bbox in gt_all[img_id]: 236 | if classname not in gt: gt[classname] = {} 237 | if img_id not in gt[classname]: 238 | gt[classname][img_id] = [] 239 | gt[classname][img_id].append(bbox) 240 | 241 | rec = {} 242 | prec = {} 243 | ap = {} 244 | p = Pool(processes=10) 245 | ret_values = p.map(eval_det_cls_wrapper, [(pred[classname], gt[classname], ovthresh, use_07_metric, get_iou_func) for classname in gt.keys() if classname in pred]) 246 | p.close() 247 | for i, classname in enumerate(gt.keys()): 248 | if classname in pred: 249 | rec[classname], prec[classname], ap[classname] = ret_values[i] 250 | else: 251 | rec[classname] = 0 252 | prec[classname] = 0 253 | ap[classname] = 0 254 | print(classname, ap[classname]) 255 | 256 | return rec, prec, ap 257 | -------------------------------------------------------------------------------- /utils/metric_util.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | """ Utility functions for metric evaluation. 7 | 8 | Author: Or Litany and Charles R. Qi 9 | """ 10 | 11 | import os 12 | import sys 13 | import torch 14 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 15 | sys.path.append(BASE_DIR) 16 | 17 | import numpy as np 18 | 19 | # Mesh IO 20 | import trimesh 21 | 22 | 23 | # ---------------------------------------- 24 | # Precision and Recall 25 | # ---------------------------------------- 26 | 27 | def multi_scene_precision_recall(labels, pred, iou_thresh, conf_thresh, label_mask, pred_mask=None): 28 | ''' 29 | Args: 30 | labels: (B, N, 6) 31 | pred: (B, M, 6) 32 | iou_thresh: scalar 33 | conf_thresh: scalar 34 | label_mask: (B, N,) with values in 0 or 1 to indicate which GT boxes to consider. 35 | pred_mask: (B, M,) with values in 0 or 1 to indicate which PRED boxes to consider. 36 | Returns: 37 | TP,FP,FN,Precision,Recall 38 | ''' 39 | # Make sure the masks are not Torch tensor, otherwise the mask==1 returns uint8 array instead 40 | # of True/False array as in numpy 41 | assert(not torch.is_tensor(label_mask)) 42 | assert(not torch.is_tensor(pred_mask)) 43 | TP, FP, FN = 0, 0, 0 44 | if label_mask is None: label_mask = np.ones((labels.shape[0], labels.shape[1])) 45 | if pred_mask is None: pred_mask = np.ones((pred.shape[0], pred.shape[1])) 46 | for batch_idx in range(labels.shape[0]): 47 | TP_i, FP_i, FN_i = single_scene_precision_recall(labels[batch_idx, label_mask[batch_idx,:]==1, :], 48 | pred[batch_idx, pred_mask[batch_idx,:]==1, :], 49 | iou_thresh, conf_thresh) 50 | TP += TP_i 51 | FP += FP_i 52 | FN += FN_i 53 | 54 | return TP, FP, FN, precision_recall(TP, FP, FN) 55 | 56 | 57 | def single_scene_precision_recall(labels, pred, iou_thresh, conf_thresh): 58 | """Compute P and R for predicted bounding boxes. Ignores classes! 59 | Args: 60 | labels: (N x bbox) ground-truth bounding boxes (6 dims) 61 | pred: (M x (bbox + conf)) predicted bboxes with confidence and maybe classification 62 | Returns: 63 | TP, FP, FN 64 | """ 65 | 66 | 67 | # for each pred box with high conf (C), compute IoU with all gt boxes. 68 | # TP = number of times IoU > th ; FP = C - TP 69 | # FN - number of scene objects without good match 70 | 71 | gt_bboxes = labels[:, :6] 72 | 73 | num_scene_bboxes = gt_bboxes.shape[0] 74 | conf = pred[:, 6] 75 | 76 | conf_pred_bbox = pred[np.where(conf > conf_thresh)[0], :6] 77 | num_conf_pred_bboxes = conf_pred_bbox.shape[0] 78 | 79 | # init an array to keep iou between generated and scene bboxes 80 | iou_arr = np.zeros([num_conf_pred_bboxes, num_scene_bboxes]) 81 | for g_idx in range(num_conf_pred_bboxes): 82 | for s_idx in range(num_scene_bboxes): 83 | iou_arr[g_idx, s_idx] = calc_iou(conf_pred_bbox[g_idx ,:], gt_bboxes[s_idx, :]) 84 | 85 | 86 | good_match_arr = (iou_arr >= iou_thresh) 87 | 88 | TP = good_match_arr.any(axis=1).sum() 89 | FP = num_conf_pred_bboxes - TP 90 | FN = num_scene_bboxes - good_match_arr.any(axis=0).sum() 91 | 92 | return TP, FP, FN 93 | 94 | 95 | def precision_recall(TP, FP, FN): 96 | Prec = 1.0 * TP / (TP + FP) if TP+FP>0 else 0 97 | Rec = 1.0 * TP / (TP + FN) 98 | return Prec, Rec 99 | 100 | 101 | def calc_iou(box_a, box_b): 102 | """Computes IoU of two axis aligned bboxes. 103 | Args: 104 | box_a, box_b: 6D of center and lengths 105 | Returns: 106 | iou 107 | """ 108 | 109 | max_a = box_a[0:3] + box_a[3:6]/2 110 | max_b = box_b[0:3] + box_b[3:6]/2 111 | min_max = np.array([max_a, max_b]).min(0) 112 | 113 | min_a = box_a[0:3] - box_a[3:6]/2 114 | min_b = box_b[0:3] - box_b[3:6]/2 115 | max_min = np.array([min_a, min_b]).max(0) 116 | if not ((min_max > max_min).all()): 117 | return 0.0 118 | 119 | intersection = (min_max - max_min).prod() 120 | vol_a = box_a[3:6].prod() 121 | vol_b = box_b[3:6].prod() 122 | union = vol_a + vol_b - intersection 123 | return 1.0*intersection / union 124 | 125 | 126 | if __name__ == '__main__': 127 | print('running some tests') 128 | 129 | ############ 130 | ## Test IoU 131 | ############ 132 | box_a = np.array([0,0,0,1,1,1]) 133 | box_b = np.array([0,0,0,2,2,2]) 134 | expected_iou = 1.0/8 135 | pred_iou = calc_iou(box_a, box_b) 136 | assert expected_iou == pred_iou, 'function returned wrong IoU' 137 | 138 | box_a = np.array([0,0,0,1,1,1]) 139 | box_b = np.array([10,10,10,2,2,2]) 140 | expected_iou = 0.0 141 | pred_iou = calc_iou(box_a, box_b) 142 | assert expected_iou == pred_iou, 'function returned wrong IoU' 143 | 144 | print('IoU test -- PASSED') 145 | 146 | ######################### 147 | ## Test Precition Recall 148 | ######################### 149 | gt_boxes = np.array([[0,0,0,1,1,1],[3, 0, 1, 1, 10, 1]]) 150 | detected_boxes = np.array([[0,0,0,1,1,1, 1.0],[3, 0, 1, 1, 10, 1, 0.9]]) 151 | TP, FP, FN = single_scene_precision_recall(gt_boxes, detected_boxes, 0.5, 0.5) 152 | assert TP == 2 and FP == 0 and FN == 0 153 | assert precision_recall(TP, FP, FN) == (1, 1) 154 | 155 | detected_boxes = np.array([[0,0,0,1,1,1, 1.0]]) 156 | TP, FP, FN = single_scene_precision_recall(gt_boxes, detected_boxes, 0.5, 0.5) 157 | assert TP == 1 and FP == 0 and FN == 1 158 | assert precision_recall(TP, FP, FN) == (1, 0.5) 159 | 160 | detected_boxes = np.array([[0,0,0,1,1,1, 1.0], [-1,-1,0,0.1,0.1,1, 1.0]]) 161 | TP, FP, FN = single_scene_precision_recall(gt_boxes, detected_boxes, 0.5, 0.5) 162 | assert TP == 1 and FP == 1 and FN == 1 163 | assert precision_recall(TP, FP, FN) == (0.5, 0.5) 164 | 165 | # wrong box has low confidence 166 | detected_boxes = np.array([[0,0,0,1,1,1, 1.0], [-1,-1,0,0.1,0.1,1, 0.1]]) 167 | TP, FP, FN = single_scene_precision_recall(gt_boxes, detected_boxes, 0.5, 0.5) 168 | assert TP == 1 and FP == 0 and FN == 1 169 | assert precision_recall(TP, FP, FN) == (1, 0.5) 170 | 171 | print('Precition Recall test -- PASSED') 172 | 173 | -------------------------------------------------------------------------------- /utils/nms.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import numpy as np 7 | from pc_util import bbox_corner_dist_measure 8 | 9 | # boxes are axis aigned 2D boxes of shape (n,5) in FLOAT numbers with (x1,y1,x2,y2,score) 10 | ''' Ref: https://www.pyimagesearch.com/2015/02/16/faster-non-maximum-suppression-python/ 11 | Ref: https://github.com/vickyboy47/nms-python/blob/master/nms.py 12 | ''' 13 | def nms_2d(boxes, overlap_threshold): 14 | x1 = boxes[:,0] 15 | y1 = boxes[:,1] 16 | x2 = boxes[:,2] 17 | y2 = boxes[:,3] 18 | score = boxes[:,4] 19 | area = (x2-x1)*(y2-y1) 20 | 21 | I = np.argsort(score) 22 | pick = [] 23 | while (I.size!=0): 24 | last = I.size 25 | i = I[-1] 26 | pick.append(i) 27 | suppress = [last-1] 28 | for pos in range(last-1): 29 | j = I[pos] 30 | xx1 = max(x1[i],x1[j]) 31 | yy1 = max(y1[i],y1[j]) 32 | xx2 = min(x2[i],x2[j]) 33 | yy2 = min(y2[i],y2[j]) 34 | w = xx2-xx1 35 | h = yy2-yy1 36 | if (w>0 and h>0): 37 | o = w*h/area[j] 38 | print('Overlap is', o) 39 | if (o>overlap_threshold): 40 | suppress.append(pos) 41 | I = np.delete(I,suppress) 42 | return pick 43 | 44 | def nms_2d_faster(boxes, overlap_threshold, old_type=False): 45 | x1 = boxes[:,0] 46 | y1 = boxes[:,1] 47 | x2 = boxes[:,2] 48 | y2 = boxes[:,3] 49 | score = boxes[:,4] 50 | area = (x2-x1)*(y2-y1) 51 | 52 | I = np.argsort(score) 53 | pick = [] 54 | while (I.size!=0): 55 | last = I.size 56 | i = I[-1] 57 | pick.append(i) 58 | 59 | xx1 = np.maximum(x1[i], x1[I[:last-1]]) 60 | yy1 = np.maximum(y1[i], y1[I[:last-1]]) 61 | xx2 = np.minimum(x2[i], x2[I[:last-1]]) 62 | yy2 = np.minimum(y2[i], y2[I[:last-1]]) 63 | 64 | w = np.maximum(0, xx2-xx1) 65 | h = np.maximum(0, yy2-yy1) 66 | 67 | if old_type: 68 | o = (w*h)/area[I[:last-1]] 69 | else: 70 | inter = w*h 71 | o = inter / (area[i] + area[I[:last-1]] - inter) 72 | 73 | I = np.delete(I, np.concatenate(([last-1], np.where(o>overlap_threshold)[0]))) 74 | 75 | return pick 76 | 77 | def nms_3d_faster(boxes, overlap_threshold, old_type=False): 78 | x1 = boxes[:,0] 79 | y1 = boxes[:,1] 80 | z1 = boxes[:,2] 81 | x2 = boxes[:,3] 82 | y2 = boxes[:,4] 83 | z2 = boxes[:,5] 84 | score = boxes[:,6] 85 | area = (x2-x1)*(y2-y1)*(z2-z1) 86 | 87 | I = np.argsort(score) 88 | pick = [] 89 | while (I.size!=0): 90 | last = I.size 91 | i = I[-1] 92 | pick.append(i) 93 | 94 | xx1 = np.maximum(x1[i], x1[I[:last-1]]) 95 | yy1 = np.maximum(y1[i], y1[I[:last-1]]) 96 | zz1 = np.maximum(z1[i], z1[I[:last-1]]) 97 | xx2 = np.minimum(x2[i], x2[I[:last-1]]) 98 | yy2 = np.minimum(y2[i], y2[I[:last-1]]) 99 | zz2 = np.minimum(z2[i], z2[I[:last-1]]) 100 | 101 | l = np.maximum(0, xx2-xx1) 102 | w = np.maximum(0, yy2-yy1) 103 | h = np.maximum(0, zz2-zz1) 104 | 105 | if old_type: 106 | o = (l*w*h)/area[I[:last-1]] 107 | else: 108 | inter = l*w*h 109 | o = inter / (area[i] + area[I[:last-1]] - inter) 110 | 111 | I = np.delete(I, np.concatenate(([last-1], np.where(o>overlap_threshold)[0]))) 112 | 113 | return pick 114 | 115 | def nms_3d_faster_samecls(boxes, overlap_threshold, old_type=False): 116 | x1 = boxes[:,0] 117 | y1 = boxes[:,1] 118 | z1 = boxes[:,2] 119 | x2 = boxes[:,3] 120 | y2 = boxes[:,4] 121 | z2 = boxes[:,5] 122 | score = boxes[:,6] 123 | cls = boxes[:,7] 124 | area = (x2-x1)*(y2-y1)*(z2-z1) 125 | 126 | I = np.argsort(score) 127 | pick = [] 128 | while (I.size!=0): 129 | last = I.size 130 | i = I[-1] 131 | pick.append(i) 132 | 133 | xx1 = np.maximum(x1[i], x1[I[:last-1]]) 134 | yy1 = np.maximum(y1[i], y1[I[:last-1]]) 135 | zz1 = np.maximum(z1[i], z1[I[:last-1]]) 136 | xx2 = np.minimum(x2[i], x2[I[:last-1]]) 137 | yy2 = np.minimum(y2[i], y2[I[:last-1]]) 138 | zz2 = np.minimum(z2[i], z2[I[:last-1]]) 139 | cls1 = cls[i] 140 | cls2 = cls[I[:last-1]] 141 | 142 | l = np.maximum(0, xx2-xx1) 143 | w = np.maximum(0, yy2-yy1) 144 | h = np.maximum(0, zz2-zz1) 145 | 146 | if old_type: 147 | o = (l*w*h)/area[I[:last-1]] 148 | else: 149 | inter = l*w*h 150 | o = inter / (area[i] + area[I[:last-1]] - inter) 151 | o = o * (cls1==cls2) 152 | 153 | I = np.delete(I, np.concatenate(([last-1], np.where(o>overlap_threshold)[0]))) 154 | 155 | return pick 156 | 157 | 158 | def nms_crnr_dist(boxes, conf, overlap_threshold): 159 | 160 | I = np.argsort(conf) 161 | pick = [] 162 | while (I.size!=0): 163 | last = I.size 164 | i = I[-1] 165 | pick.append(i) 166 | 167 | scores = [] 168 | for ind in I[:-1]: 169 | scores.append(bbox_corner_dist_measure(boxes[i,:], boxes[ind, :])) 170 | 171 | I = np.delete(I, np.concatenate(([last-1], np.where(np.array(scores)>overlap_threshold)[0]))) 172 | 173 | return pick 174 | 175 | if __name__=='__main__': 176 | a = np.random.random((100,5)) 177 | print(nms_2d(a,0.9)) 178 | print(nms_2d_faster(a,0.9)) 179 | -------------------------------------------------------------------------------- /utils/nn_distance.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | """ Chamfer distance in Pytorch. 7 | Author: Charles R. Qi 8 | """ 9 | 10 | import torch 11 | import torch.nn as nn 12 | import numpy as np 13 | 14 | 15 | def huber_loss(error, delta=1.0): 16 | """ 17 | Args: 18 | error: Torch tensor (d1,d2,...,dk) 19 | Returns: 20 | loss: Torch tensor (d1,d2,...,dk) 21 | 22 | x = error = pred - gt or dist(pred,gt) 23 | 0.5 * |x|^2 if |x|<=d 24 | 0.5 * d^2 + d * (|x|-d) if |x|>d 25 | Ref: https://github.com/charlesq34/frustum-pointnets/blob/master/models/model_util.py 26 | """ 27 | abs_error = torch.abs(error) 28 | #quadratic = torch.min(abs_error, torch.FloatTensor([delta])) 29 | quadratic = torch.clamp(abs_error, max=delta) 30 | linear = (abs_error - quadratic) 31 | loss = 0.5 * quadratic**2 + delta * linear 32 | return loss 33 | 34 | def nn_distance(pc1, pc2, l1smooth=False, delta=1.0, l1=False): 35 | """ 36 | Input: 37 | pc1: (B,N,C) torch tensor 38 | pc2: (B,M,C) torch tensor 39 | l1smooth: bool, whether to use l1smooth loss 40 | delta: scalar, the delta used in l1smooth loss 41 | Output: 42 | dist1: (B,N) torch float32 tensor 43 | idx1: (B,N) torch int64 tensor 44 | dist2: (B,M) torch float32 tensor 45 | idx2: (B,M) torch int64 tensor 46 | """ 47 | N = pc1.shape[1] 48 | M = pc2.shape[1] 49 | pc1_expand_tile = pc1.unsqueeze(2).repeat(1,1,M,1) 50 | pc2_expand_tile = pc2.unsqueeze(1).repeat(1,N,1,1) 51 | pc_diff = pc1_expand_tile - pc2_expand_tile 52 | 53 | if l1smooth: 54 | pc_dist = torch.sum(huber_loss(pc_diff, delta), dim=-1) # (B,N,M) 55 | elif l1: 56 | pc_dist = torch.sum(torch.abs(pc_diff), dim=-1) # (B,N,M) 57 | else: 58 | pc_dist = torch.sum(pc_diff**2, dim=-1) # (B,N,M) 59 | dist1, idx1 = torch.min(pc_dist, dim=2) # (B,N) 60 | dist2, idx2 = torch.min(pc_dist, dim=1) # (B,M) 61 | return dist1, idx1, dist2, idx2 62 | 63 | def demo_nn_distance(): 64 | np.random.seed(0) 65 | pc1arr = np.random.random((1,5,3)) 66 | pc2arr = np.random.random((1,6,3)) 67 | pc1 = torch.from_numpy(pc1arr.astype(np.float32)) 68 | pc2 = torch.from_numpy(pc2arr.astype(np.float32)) 69 | dist1, idx1, dist2, idx2 = nn_distance(pc1, pc2) 70 | print(dist1) 71 | print(idx1) 72 | dist = np.zeros((5,6)) 73 | for i in range(5): 74 | for j in range(6): 75 | dist[i,j] = np.sum((pc1arr[0,i,:] - pc2arr[0,j,:]) ** 2) 76 | print(dist) 77 | print('-'*30) 78 | print('L1smooth dists:') 79 | dist1, idx1, dist2, idx2 = nn_distance(pc1, pc2, True) 80 | print(dist1) 81 | print(idx1) 82 | dist = np.zeros((5,6)) 83 | for i in range(5): 84 | for j in range(6): 85 | error = np.abs(pc1arr[0,i,:] - pc2arr[0,j,:]) 86 | quad = np.minimum(error, 1.0) 87 | linear = error - quad 88 | loss = 0.5*quad**2 + 1.0*linear 89 | dist[i,j] = np.sum(loss) 90 | print(dist) 91 | 92 | 93 | if __name__ == '__main__': 94 | demo_nn_distance() 95 | -------------------------------------------------------------------------------- /utils/tf_logger.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import tensorflow as tf 7 | import numpy as np 8 | import scipy.misc 9 | try: 10 | from StringIO import StringIO # Python 2.7 11 | except ImportError: 12 | from io import BytesIO # Python 3.x 13 | 14 | 15 | class Logger(object): 16 | 17 | def __init__(self, log_dir): 18 | """Create a summary writer logging to log_dir.""" 19 | self.writer = tf.summary.FileWriter(log_dir) 20 | 21 | def scalar_summary(self, tag, value, step): 22 | """Log a scalar variable.""" 23 | summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)]) 24 | self.writer.add_summary(summary, step) 25 | 26 | def image_summary(self, tag, images, step): 27 | """Log a list of images.""" 28 | 29 | img_summaries = [] 30 | for i, img in enumerate(images): 31 | # Write the image to a string 32 | try: 33 | s = StringIO() 34 | except: 35 | s = BytesIO() 36 | scipy.misc.toimage(img).save(s, format="png") 37 | 38 | # Create an Image object 39 | img_sum = tf.Summary.Image(encoded_image_string=s.getvalue(), 40 | height=img.shape[0], 41 | width=img.shape[1]) 42 | # Create a Summary value 43 | img_summaries.append(tf.Summary.Value(tag='%s/%d' % (tag, i), image=img_sum)) 44 | 45 | # Create and write Summary 46 | summary = tf.Summary(value=img_summaries) 47 | self.writer.add_summary(summary, step) 48 | 49 | def histo_summary(self, tag, values, step, bins=1000): 50 | """Log a histogram of the tensor of values.""" 51 | 52 | # Create a histogram using numpy 53 | counts, bin_edges = np.histogram(values, bins=bins) 54 | 55 | # Fill the fields of the histogram proto 56 | hist = tf.HistogramProto() 57 | hist.min = float(np.min(values)) 58 | hist.max = float(np.max(values)) 59 | hist.num = int(np.prod(values.shape)) 60 | hist.sum = float(np.sum(values)) 61 | hist.sum_squares = float(np.sum(values**2)) 62 | 63 | # Drop the start of the first bin 64 | bin_edges = bin_edges[1:] 65 | 66 | # Add bin edges and counts 67 | for edge in bin_edges: 68 | hist.bucket_limit.append(edge) 69 | for c in counts: 70 | hist.bucket.append(c) 71 | 72 | # Create and write Summary 73 | summary = tf.Summary(value=[tf.Summary.Value(tag=tag, histo=hist)]) 74 | self.writer.add_summary(summary, step) 75 | self.writer.flush() 76 | -------------------------------------------------------------------------------- /utils/tf_visualizer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | '''Code adapted from https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix''' 7 | import os 8 | import time 9 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 10 | import sys 11 | sys.path.append(BASE_DIR) 12 | import tf_logger 13 | 14 | 15 | class Visualizer(): 16 | def __init__(self, opt, name='train'): 17 | # self.opt = opt 18 | #self.logger = tf_logger.Logger(os.path.join(opt.logging_dir, opt.name)) 19 | #self.log_name = os.path.join(opt.checkpoint_dir, opt.name, 'loss_log.txt') 20 | self.logger = tf_logger.Logger(os.path.join(opt.log_dir, name)) 21 | self.log_name = os.path.join(opt.log_dir, 'tf_visualizer_log.txt') 22 | with open(self.log_name, "a") as log_file: 23 | now = time.strftime("%c") 24 | log_file.write('================ Training Loss (%s) ================\n' % now) 25 | 26 | # |visuals|: dictionary of images to save 27 | def log_images(self, visuals, step): 28 | for label, image_numpy in visuals.items(): 29 | self.logger.image_summary( 30 | label, [image_numpy], step) 31 | 32 | # scalars: dictionary of scalar labels and values 33 | def log_scalars(self, scalars, step): 34 | for label, val in scalars.items(): 35 | self.logger.scalar_summary(label, val, step) 36 | 37 | # scatter plots 38 | def plot_current_points(self, points, disp_offset=10): 39 | pass 40 | 41 | # scalars: same format as |scalars| of plot_current_scalars 42 | def print_current_scalars(self, epoch, i, scalars): 43 | message = '(epoch: %d, iters: %d) ' % (epoch, i) 44 | for k, v in scalars.items(): 45 | message += '%s: %.3f ' % (k, v) 46 | 47 | print(message) 48 | with open(self.log_name, "a") as log_file: 49 | log_file.write('%s\n' % message) 50 | --------------------------------------------------------------------------------