├── LICENSE
├── README.md
├── demo.py
├── doc
    ├── NIPS_new.jpeg
    ├── NIPS_new.jpg
    ├── NIPS_new.pdf
    ├── teaser.jpg
    └── tips.md
├── eval.py
├── models
    ├── ap_helper.py
    ├── backbone_module.py
    ├── backbone_module_SA2_denseaspp3_6.py
    ├── backbone_module_SA2_denseaspp3_6_12.py
    ├── backbone_module_enc_FP2_K8_G12_C3.py
    ├── boxnet.py
    ├── dump_helper.py
    ├── enc_layer.py
    ├── loss_helper.py
    ├── loss_helper_boxnet.py
    ├── proposal_module.py
    ├── votenet.py
    ├── votenet_SA2_denseaspp3_6.py
    ├── votenet_SA2_denseaspp3_6_12.py
    ├── votenet_enc_FP2_K8_G12_C3.py
    ├── votenet_enc_complex_FP2_K8_G12_C3.py
    └── voting_module.py
├── pointnet2
    ├── _ext_src
    │   ├── include
    │   │   ├── ball_query.h
    │   │   ├── cuda_utils.h
    │   │   ├── group_points.h
    │   │   ├── interpolate.h
    │   │   ├── sampling.h
    │   │   └── utils.h
    │   └── src
    │   │   ├── ball_query.cpp
    │   │   ├── ball_query_gpu.cu
    │   │   ├── bindings.cpp
    │   │   ├── group_points.cpp
    │   │   ├── group_points_gpu.cu
    │   │   ├── interpolate.cpp
    │   │   ├── interpolate_gpu.cu
    │   │   ├── sampling.cpp
    │   │   └── sampling_gpu.cu
    ├── pointnet2_modules.py
    ├── pointnet2_test.py
    ├── pointnet2_utils.py
    ├── pytorch_utils.py
    └── setup.py
├── scannet
    ├── README.md
    ├── batch_load_scannet_data.py
    ├── data_viz.py
    ├── load_scannet_data.py
    ├── meta_data
    │   ├── scannet_means.npz
    │   ├── scannet_train.txt
    │   ├── scannetv2-labels.combined.tsv
    │   ├── scannetv2_test.txt
    │   ├── scannetv2_train.txt
    │   └── scannetv2_val.txt
    ├── model_util_scannet.py
    ├── scannet_detection_dataset.py
    └── scannet_utils.py
├── sunrgbd
    ├── README.md
    ├── matlab
    │   ├── extract_rgbd_data_v1.m
    │   ├── extract_rgbd_data_v2.m
    │   └── extract_split.m
    ├── model_util_sunrgbd.py
    ├── sunrgbd_data.py
    ├── sunrgbd_detection_dataset.py
    └── sunrgbd_utils.py
├── train.bash
├── train.py
└── utils
    ├── box_util.py
    ├── eval_det.py
    ├── metric_util.py
    ├── nms.py
    ├── nn_distance.py
    ├── pc_util.py
    ├── tf_logger.py
    └── tf_visualizer.py


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) Facebook, Inc. and its affiliates.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Point Detectron
  2 | Created by Xu Liu, from <a href="https://air.jd.com/" target="_blank">JD AI Research</a> and <a href="https://www.u-tokyo.ac.jp/focus/ja/tags/?tag=UTOKYO%20VOICES" target="_blank">The University of Tokyo</a>.
  3 | 
  4 | ![teaser](https://github.com/AsahiLiu/PointDetectron/blob/main/doc/NIPS_new.jpeg)
  5 | 
  6 | ## Introduction
  7 | This repository is code release for our NeurIPS 2020 paper Group Contextual Encoding for 3D Poit Clouds (Online Paper [here](https://papers.nips.cc/paper/2020/hash/9b72e31dac81715466cd580a448cf823-Abstract.html)) and 3DV 2020 paper Dense Point Diffusion for 3D Detection (arXiv report [here](https://arxiv.org/pdf/))
  8 | 
  9 | This repository is built on the VoteNet, we empower VoteNet model with Group Contextual Encoding Block, Dense Point Diffusion modules as well as the Dilated Point Convolution.
 10 | ## Citation
 11 |   @article{liu2020group,
 12 |   title={Group Contextual Encoding for 3D Point Clouds},
 13 |   author={Liu, Xu and Li, Chengtao and Wang, Jian and Wang, Jingbo and Shi, Boxin and He, Xiaodong},
 14 |   journal={Advances in Neural Information Processing Systems},
 15 |   volume={33},
 16 |   year={2020}
 17 |   }
 18 | 
 19 | ## Installation
 20 | 
 21 | Install [Pytorch](https://pytorch.org/get-started/locally/) and [Tensorflow](https://github.com/tensorflow/tensorflow) (for TensorBoard). It is required that you have access to GPUs. Matlab is required to prepare data for SUN RGB-D. The code is tested with Ubuntu 18.04, Pytorch v1.1, TensorFlow v1.14, CUDA 10.0 and cuDNN v7.4. Note: there is some incompatibility with newer version of Pytorch (e.g. v1.3), which is to be fixed.
 22 | 
 23 | Compile the CUDA layers for [PointNet++](http://arxiv.org/abs/1706.02413), which we used in the backbone network:
 24 | 
 25 |     cd pointnet2
 26 |     python setup.py install
 27 | 
 28 | To see if the compilation is successful, try to run `python models/votenet.py` to see if a forward pass works.
 29 | 
 30 | Install the following Python dependencies (with `pip install`):
 31 | 
 32 |     matplotlib
 33 |     opencv-python
 34 |     torch-encoding
 35 |     plyfile
 36 |     'trimesh>=2.35.39,<2.35.40'
 37 | 
 38 | ## Run demo
 39 | 
 40 | Following VoteNet, you can  run the demo with the pretrained models under the project root path (`/path/to/project/demo_files`) and then run:
 41 | 
 42 |     python demo.py
 43 | 
 44 | The demo uses a pre-trained model (on SUN RGB-D) to detect objects in a point cloud from an indoor room of a table and a few chairs (from SUN RGB-D val set). You can use 3D visualization software such as the [MeshLab](http://www.meshlab.net/) to open the dumped file under `demo_files/sunrgbd_results` to see the 3D detection output. Specifically, open `***_pc.ply` and `***_pred_confident_nms_bbox.ply` to see the input point cloud and predicted 3D bounding boxes.
 45 | 
 46 | You can also run the following command to use another pretrained model on a ScanNet:
 47 | 
 48 |     python demo.py --dataset scannet --num_point 40000
 49 | 
 50 | Detection results will be dumped to `demo_files/scannet_results`.
 51 | 
 52 | ## Training and evaluating
 53 | 
 54 | ### Data preparation
 55 | Please follow the instructions of VoteNet to prepare for the datasets.
 56 | 
 57 | For SUN RGB-D, follow the [README](https://github.com/facebookresearch/votenet/blob/master/sunrgbd/README.md) under the `sunrgbd` folder.
 58 | 
 59 | For ScanNet, follow the [README](https://github.com/facebookresearch/votenet/blob/master/scannet/README.md) under the `scannet` folder.
 60 | 
 61 | ### Train and test on SUN RGB-D
 62 | 
 63 | To train a new  model ${MODEL_CONFIG} in the MODEL ZOO on SUN RGB-D data (depth images):
 64 | 
 65 |     CUDA_VISIBLE_DEVICES=0 python train.py --dataset sunrgbd --log_dir log_sunrgbd --model ${MODEL_CONFIG}
 66 | 
 67 | You can use `CUDA_VISIBLE_DEVICES=0,1,2` to specify which GPU(s) to use. Without specifying CUDA devices, the training will use all the available GPUs and train with data parallel (Note that due to I/O load, training speedup is not linear to the nubmer of GPUs used). 
 68 | While training you can check the `log_sunrgbd/log_train.txt` file on its progress, or use the TensorBoard to see loss curves.
 69 | 
 70 | To test the trained model with its checkpoint:
 71 | 
 72 |     python eval.py --dataset sunrgbd --checkpoint_path log_sunrgbd/checkpoint.tar --dump_dir eval_sunrgbd --cluster_sampling seed_fps --use_3d_nms --use_cls_nms --per_class_proposal  --model  ${MODEL_CONFIG}
 73 | 
 74 | Example results will be dumped in the `eval_sunrgbd` folder (or any other folder you specify). You can run `python eval.py -h` to see the full options for evaluation. After the evaluation, you can use MeshLab to visualize the predicted votes and 3D bounding boxes (select wireframe mode to view the boxes).
 75 | Final evaluation results will be printed on screen and also written in the `log_eval.txt` file under the dump directory. In default we evaluate with both AP@0.25 and AP@0.5 with 3D IoU on oriented boxes. 
 76 | 
 77 | ### Train and test on ScanNet
 78 | 
 79 | To train a  model ${MODEL_CONFIG} in the MODEL ZOO on Scannet data (fused scan):
 80 | 
 81 |     CUDA_VISIBLE_DEVICES=0 python train.py --dataset scannet --log_dir log_scannet --num_point 40000 --model  ${MODEL_CONFIG}
 82 | 
 83 | To test the trained model with its checkpoint:
 84 | 
 85 |     python eval.py --dataset scannet --checkpoint_path log_scannet/checkpoint.tar --dump_dir eval_scannet --num_point 40000 --cluster_sampling seed_fps --use_3d_nms --use_cls_nms --per_class_proposal --model  ${MODEL_CONFIG}
 86 |  
 87 | Example results will be dumped in the `eval_scannet` folder (or any other folder you specify). 
 88 | 
 89 | ### MODEL ZOO
 90 | 
 91 | |        MODEL SPECS                 |     $ {MODEL_CONFIG}          | SUN-RGBD | ScanNet |
 92 | |---------------------------------------------|----------:|----------:|:-------:|
 93 | | [Group Contextual Ecoding (K=8, G=12, C×3)](models/votenet_enc_FP2_K8_G12_C3.py)|votenet_enc_FP2_K8_G12_C3  | 60.7 | 60.8 |
 94 | | [SA2 - Dense Point Diffusion (3,6,12)](models/votenet_SA2_denseaspp3_6_12.py) |votenet_SA2_denseaspp3_6_12| 58.6 | 59.6 |
 95 | | [SA2 - Dense Point Diffusion (3,6)](models/votenet_SA2_denseaspp3_6.py)|votenet_SA2_denseaspp3_6| 58.7 | 58.9 |
 96 | | [VoteNet](models/votenet.py) | votenet (default)| 57.7 | 58.6 |
 97 | 
 98 | 
 99 | 
100 | The ablation models in the papers can be derived from the models listed above, therefore, we did not list them all.
101 | ### Train on your own data
102 | 
103 | [For Pro Users] If you have your own dataset with point clouds and annotated 3D bounding boxes, you can create a new dataset class and train VoteNet on your own data. To ease the proces, some tips are provided in this [doc](https://github.com/facebookresearch/votenet/blob/master/doc/tips.md).
104 | 
105 | ## Acknowledgements
106 | We want to thank Charles Qi for his VoteNet ([original codebase](https://github.com/facebookresearch/votenet)), Hang Zhang for his EncNet ([original codebase](https://hangzhang.org/PyTorch-Encoding/)) and  Erik Wijmans for his PointNet++ implementation in Pytorch ([original codebase](https://github.com/erikwijmans/Pointnet2_PyTorch)).
107 | 
108 | ## License
109 | votenet is relased under the MIT License. See the [LICENSE file](https://arxiv.org/pdf/1904.09664.pdf) for more details.
110 | 


--------------------------------------------------------------------------------
/demo.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | # 
  3 | # This source code is licensed under the MIT license found in the
  4 | # LICENSE file in the root directory of this source tree.
  5 | 
  6 | """ Demo of using VoteNet 3D object detector to detect objects from a point cloud.
  7 | """
  8 | 
  9 | import os
 10 | import sys
 11 | import numpy as np
 12 | import argparse
 13 | import importlib
 14 | import time
 15 | 
 16 | parser = argparse.ArgumentParser()
 17 | parser.add_argument('--dataset', default='sunrgbd', help='Dataset: sunrgbd or scannet [default: sunrgbd]')
 18 | parser.add_argument('--num_point', type=int, default=20000, help='Point Number [default: 20000]')
 19 | FLAGS = parser.parse_args()
 20 | 
 21 | import torch
 22 | import torch.nn as nn
 23 | import torch.optim as optim
 24 | 
 25 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 26 | ROOT_DIR = BASE_DIR
 27 | sys.path.append(os.path.join(ROOT_DIR, 'utils'))
 28 | sys.path.append(os.path.join(ROOT_DIR, 'models'))
 29 | from pc_util import random_sampling, read_ply
 30 | from ap_helper import parse_predictions
 31 | 
 32 | def preprocess_point_cloud(point_cloud):
 33 |     ''' Prepare the numpy point cloud (N,3) for forward pass '''
 34 |     point_cloud = point_cloud[:,0:3] # do not use color for now
 35 |     floor_height = np.percentile(point_cloud[:,2],0.99)
 36 |     height = point_cloud[:,2] - floor_height
 37 |     point_cloud = np.concatenate([point_cloud, np.expand_dims(height, 1)],1) # (N,4) or (N,7)
 38 |     point_cloud = random_sampling(point_cloud, FLAGS.num_point)
 39 |     pc = np.expand_dims(point_cloud.astype(np.float32), 0) # (1,40000,4)
 40 |     return pc
 41 | 
 42 | if __name__=='__main__':
 43 |     
 44 |     # Set file paths and dataset config
 45 |     demo_dir = os.path.join(BASE_DIR, 'demo_files') 
 46 |     if FLAGS.dataset == 'sunrgbd':
 47 |         sys.path.append(os.path.join(ROOT_DIR, 'sunrgbd'))
 48 |         from sunrgbd_detection_dataset import DC # dataset config
 49 |         checkpoint_path = os.path.join(demo_dir, 'pretrained_votenet_on_sunrgbd.tar')
 50 |         pc_path = os.path.join(demo_dir, 'input_pc_sunrgbd.ply')
 51 |     elif FLAGS.dataset == 'scannet':
 52 |         sys.path.append(os.path.join(ROOT_DIR, 'scannet'))
 53 |         from scannet_detection_dataset import DC # dataset config
 54 |         checkpoint_path = os.path.join(demo_dir, 'pretrained_votenet_on_scannet.tar')
 55 |         pc_path = os.path.join(demo_dir, 'input_pc_scannet.ply')
 56 |     else:
 57 |         print('Unkown dataset %s. Exiting.'%(DATASET))
 58 |         exit(-1)
 59 | 
 60 |     eval_config_dict = {'remove_empty_box': True, 'use_3d_nms': True, 'nms_iou': 0.25,
 61 |         'use_old_type_nms': False, 'cls_nms': False, 'per_class_proposal': False,
 62 |         'conf_thresh': 0.5, 'dataset_config': DC}
 63 | 
 64 |     # Init the model and optimzier
 65 |     MODEL = importlib.import_module('votenet') # import network module
 66 |     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 67 |     net = MODEL.VoteNet(num_proposal=256, input_feature_dim=1, vote_factor=1,
 68 |         sampling='seed_fps', num_class=DC.num_class,
 69 |         num_heading_bin=DC.num_heading_bin,
 70 |         num_size_cluster=DC.num_size_cluster,
 71 |         mean_size_arr=DC.mean_size_arr).to(device)
 72 |     print('Constructed model.')
 73 |     
 74 |     # Load checkpoint
 75 |     optimizer = optim.Adam(net.parameters(), lr=0.001)
 76 |     checkpoint = torch.load(checkpoint_path)
 77 |     net.load_state_dict(checkpoint['model_state_dict'])
 78 |     optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
 79 |     epoch = checkpoint['epoch']
 80 |     print("Loaded checkpoint %s (epoch: %d)"%(checkpoint_path, epoch))
 81 |    
 82 |     # Load and preprocess input point cloud 
 83 |     net.eval() # set model to eval mode (for bn and dp)
 84 |     point_cloud = read_ply(pc_path)
 85 |     pc = preprocess_point_cloud(point_cloud)
 86 |     print('Loaded point cloud data: %s'%(pc_path))
 87 |    
 88 |     # Model inference
 89 |     inputs = {'point_clouds': torch.from_numpy(pc).to(device)}
 90 |     tic = time.time()
 91 |     with torch.no_grad():
 92 |         end_points = net(inputs)
 93 |     toc = time.time()
 94 |     print('Inference time: %f'%(toc-tic))
 95 |     end_points['point_clouds'] = inputs['point_clouds']
 96 |     pred_map_cls = parse_predictions(end_points, eval_config_dict)
 97 |     print('Finished detection. %d object detected.'%(len(pred_map_cls[0])))
 98 |   
 99 |     dump_dir = os.path.join(demo_dir, '%s_results'%(FLAGS.dataset))
100 |     if not os.path.exists(dump_dir): os.mkdir(dump_dir) 
101 |     MODEL.dump_results(end_points, dump_dir, DC, True)
102 |     print('Dumped detection results to folder %s'%(dump_dir))
103 | 


--------------------------------------------------------------------------------
/doc/NIPS_new.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AsahiLiu/PointDetectron/634ffadfbc50d2f0dea4434e11d5977640f34004/doc/NIPS_new.jpeg


--------------------------------------------------------------------------------
/doc/NIPS_new.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AsahiLiu/PointDetectron/634ffadfbc50d2f0dea4434e11d5977640f34004/doc/NIPS_new.jpg


--------------------------------------------------------------------------------
/doc/NIPS_new.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AsahiLiu/PointDetectron/634ffadfbc50d2f0dea4434e11d5977640f34004/doc/NIPS_new.pdf


--------------------------------------------------------------------------------
/doc/teaser.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AsahiLiu/PointDetectron/634ffadfbc50d2f0dea4434e11d5977640f34004/doc/teaser.jpg


--------------------------------------------------------------------------------
/doc/tips.md:
--------------------------------------------------------------------------------
 1 | ### Train on your own data
 2 | 
 3 | [For Pro Users] If you have your own dataset with point clouds and annotated 3D bounding boxes, you can create a new dataset class and train VoteNet on your own data. To ease the proces, some tips are provided below.
 4 | 
 5 | Firstly, you need to store point clouds in the upright coordinate system (Z is up, Y is forward, X is right-ward) and 3D bounding boxes as its center (x,y,z), size (l,w,h) and heading angle (along the up-axis; rotation radius from +X towards -Y; +X is 0 and -Y is pi/4). You can refer to `sunrgbd/sunrgbd_data.py` as to how to compute the groundtruth votes (translational vectors from object points to 3D bounding box centers). If your dataset has instance segmentation annotation, you can also compute groundtruth votes on the fly in the dataset class -- refer to `scannet/batch_load_scannet_data.py` and `scannet/scannet_detection_dataset.py` for more details.
 6 | 
 7 | Secondly, you need to create a new dataset class as well as to specify some config information about the dataset. For config information, you can refer to `sunrgbd/model_util_config.py` as an example and modify the `num_classes`, `type2class`, `num_size_clusters`, `mean_size_arr` etc. The `mean_size_arr` is computed by going through all 3D bounding boxes in the train set and cluster them (either by geometric size or semantic class) into several clusters and then compute the median box size in each cluster (an example porcess is [here](https://github.com/facebookresearch/votenet/blob/7c19af314a3d12532dc3c8dbd05d1d404c75891e/sunrgbd/sunrgbd_data.py#L264)). In both SUN RGB-D and ScanNet, we only consider one tempalte box size for each semantic class, but you can have multiple size templates for each class too (in which case you also need to modify the `size2class` function in the config). For detection dataset class, you can refer to `sunrgbd/sunrgbd_detection_dataset.py` and modify based on it. The major thing to modify is the dataset paths (in `__init__` function) and data loading methods (at the beginning of the `__getitem__` function), which depend on where and how you store the data.
 8 | 
 9 | Lastly, after you make sure the dataset class returns the correct input point clouds and ground truth labels, you need to add the new dataset to the `train.py` file and `eval.py` file by augmenting the options of `FLAGS.dataset` argument (adding another `elif` to the dataset set up section). Then by selecting your new dataset in `train.py`, you should be able to train a VoteNet on your own data!
10 | 
11 | Note that the VoteNet was originally used on SUN RGB-D and ScanNet which only have either 1D or 0D rotations in their annotated bounding boxes. It is possible to extend the VoteNet to predict 3D rotations though. One simple way is to supervise the network to predict three Euler angles. To support it you will need to prepare ground truth labels and then change the prediction of the 1D `heading_angle` to prediction of three Euler angles in the network output; and modify the 3D bounding box parameterization and transformations accordingly.
12 | 
13 | Feel free to post an issue if you meet any difficulty during the process!
14 | 


--------------------------------------------------------------------------------
/eval.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | # 
  3 | # This source code is licensed under the MIT license found in the
  4 | # LICENSE file in the root directory of this source tree.
  5 | 
  6 | """ Evaluation routine for 3D object detection with SUN RGB-D and ScanNet.
  7 | """
  8 | 
  9 | import os
 10 | import sys
 11 | import numpy as np
 12 | from datetime import datetime
 13 | import argparse
 14 | import importlib
 15 | import torch
 16 | import torch.nn as nn
 17 | import torch.optim as optim
 18 | from torch.utils.data import DataLoader
 19 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 20 | ROOT_DIR = BASE_DIR
 21 | sys.path.append(os.path.join(ROOT_DIR, 'models'))
 22 | from ap_helper import APCalculator, parse_predictions, parse_groundtruths
 23 | 
 24 | parser = argparse.ArgumentParser()
 25 | parser.add_argument('--model', default='votenet', help='Model file name [default: votenet]')
 26 | parser.add_argument('--dataset', default='sunrgbd', help='Dataset name. sunrgbd or scannet. [default: sunrgbd]')
 27 | parser.add_argument('--checkpoint_path', default=None, help='Model checkpoint path [default: None]')
 28 | parser.add_argument('--dump_dir', default=None, help='Dump dir to save sample outputs [default: None]')
 29 | parser.add_argument('--num_point', type=int, default=20000, help='Point Number [default: 20000]')
 30 | parser.add_argument('--num_target', type=int, default=256, help='Point Number [default: 256]')
 31 | parser.add_argument('--batch_size', type=int, default=8, help='Batch Size during training [default: 8]')
 32 | parser.add_argument('--vote_factor', type=int, default=1, help='Number of votes generated from each seed [default: 1]')
 33 | parser.add_argument('--cluster_sampling', default='vote_fps', help='Sampling strategy for vote clusters: vote_fps, seed_fps, random [default: vote_fps]')
 34 | parser.add_argument('--ap_iou_thresholds', default='0.25,0.5', help='A list of AP IoU thresholds [default: 0.25,0.5]')
 35 | parser.add_argument('--no_height', action='store_true', help='Do NOT use height signal in input.')
 36 | parser.add_argument('--use_color', action='store_true', help='Use RGB color in input.')
 37 | parser.add_argument('--use_sunrgbd_v2', action='store_true', help='Use SUN RGB-D V2 box labels.')
 38 | parser.add_argument('--use_3d_nms', action='store_true', help='Use 3D NMS instead of 2D NMS.')
 39 | parser.add_argument('--use_cls_nms', action='store_true', help='Use per class NMS.')
 40 | parser.add_argument('--use_old_type_nms', action='store_true', help='Use old type of NMS, IoBox2Area.')
 41 | parser.add_argument('--per_class_proposal', action='store_true', help='Duplicate each proposal num_class times.')
 42 | parser.add_argument('--nms_iou', type=float, default=0.25, help='NMS IoU threshold. [default: 0.25]')
 43 | parser.add_argument('--conf_thresh', type=float, default=0.05, help='Filter out predictions with obj prob less than it. [default: 0.05]')
 44 | parser.add_argument('--faster_eval', action='store_true', help='Faster evaluation by skippling empty bounding box removal.')
 45 | parser.add_argument('--shuffle_dataset', action='store_true', help='Shuffle the dataset (random order).')
 46 | FLAGS = parser.parse_args()
 47 | 
 48 | if FLAGS.use_cls_nms:
 49 |     assert(FLAGS.use_3d_nms)
 50 | 
 51 | # ------------------------------------------------------------------------- GLOBAL CONFIG BEG
 52 | BATCH_SIZE = FLAGS.batch_size
 53 | NUM_POINT = FLAGS.num_point
 54 | DUMP_DIR = FLAGS.dump_dir
 55 | CHECKPOINT_PATH = FLAGS.checkpoint_path
 56 | assert(CHECKPOINT_PATH is not None)
 57 | FLAGS.DUMP_DIR = DUMP_DIR
 58 | AP_IOU_THRESHOLDS = [float(x) for x in FLAGS.ap_iou_thresholds.split(',')]
 59 | 
 60 | # Prepare DUMP_DIR
 61 | if not os.path.exists(DUMP_DIR): os.mkdir(DUMP_DIR)
 62 | DUMP_FOUT = open(os.path.join(DUMP_DIR, 'log_eval.txt'), 'w')
 63 | DUMP_FOUT.write(str(FLAGS)+'\n')
 64 | def log_string(out_str):
 65 |     DUMP_FOUT.write(out_str+'\n')
 66 |     DUMP_FOUT.flush()
 67 |     print(out_str)
 68 | 
 69 | # Init datasets and dataloaders 
 70 | def my_worker_init_fn(worker_id):
 71 |     np.random.seed(np.random.get_state()[1][0] + worker_id)
 72 | 
 73 | if FLAGS.dataset == 'sunrgbd':
 74 |     sys.path.append(os.path.join(ROOT_DIR, 'sunrgbd'))
 75 |     from sunrgbd_detection_dataset import SunrgbdDetectionVotesDataset, MAX_NUM_OBJ
 76 |     from model_util_sunrgbd import SunrgbdDatasetConfig
 77 |     DATASET_CONFIG = SunrgbdDatasetConfig()
 78 |     TEST_DATASET = SunrgbdDetectionVotesDataset('val', num_points=NUM_POINT,
 79 |         augment=False, use_color=FLAGS.use_color, use_height=(not FLAGS.no_height),
 80 |         use_v1=(not FLAGS.use_sunrgbd_v2))
 81 | elif FLAGS.dataset == 'scannet':
 82 |     sys.path.append(os.path.join(ROOT_DIR, 'scannet'))
 83 |     from scannet_detection_dataset import ScannetDetectionDataset, MAX_NUM_OBJ
 84 |     from model_util_scannet import ScannetDatasetConfig
 85 |     DATASET_CONFIG = ScannetDatasetConfig()
 86 |     TEST_DATASET = ScannetDetectionDataset('val', num_points=NUM_POINT,
 87 |         augment=False,
 88 |         use_color=FLAGS.use_color, use_height=(not FLAGS.no_height))
 89 | else:
 90 |     print('Unknown dataset %s. Exiting...'%(FLAGS.dataset))
 91 |     exit(-1)
 92 | print(len(TEST_DATASET))
 93 | TEST_DATALOADER = DataLoader(TEST_DATASET, batch_size=BATCH_SIZE,
 94 |     shuffle=FLAGS.shuffle_dataset, num_workers=4, worker_init_fn=my_worker_init_fn)
 95 | 
 96 | # Init the model and optimzier
 97 | MODEL = importlib.import_module(FLAGS.model) # import network module
 98 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 99 | num_input_channel = int(FLAGS.use_color)*3 + int(not FLAGS.no_height)*1
100 | 
101 | if FLAGS.model == 'boxnet':
102 |     Detector = MODEL.BoxNet
103 | else:
104 |     Detector = MODEL.VoteNet
105 | 
106 | net = Detector(num_class=DATASET_CONFIG.num_class,
107 |                num_heading_bin=DATASET_CONFIG.num_heading_bin,
108 |                num_size_cluster=DATASET_CONFIG.num_size_cluster,
109 |                mean_size_arr=DATASET_CONFIG.mean_size_arr,
110 |                num_proposal=FLAGS.num_target,
111 |                input_feature_dim=num_input_channel,
112 |                vote_factor=FLAGS.vote_factor,
113 |                sampling=FLAGS.cluster_sampling)
114 | net.to(device)
115 | criterion = MODEL.get_loss
116 | 
117 | # Load the Adam optimizer
118 | optimizer = optim.Adam(net.parameters(), lr=0.001)
119 | 
120 | # Load checkpoint if there is any
121 | if CHECKPOINT_PATH is not None and os.path.isfile(CHECKPOINT_PATH):
122 |     checkpoint = torch.load(CHECKPOINT_PATH)
123 |     net.load_state_dict(checkpoint['model_state_dict'])
124 |     optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
125 |     epoch = checkpoint['epoch']
126 |     log_string("Loaded checkpoint %s (epoch: %d)"%(CHECKPOINT_PATH, epoch))
127 | 
128 | # Used for AP calculation
129 | CONFIG_DICT = {'remove_empty_box': (not FLAGS.faster_eval), 'use_3d_nms': FLAGS.use_3d_nms, 'nms_iou': FLAGS.nms_iou,
130 |     'use_old_type_nms': FLAGS.use_old_type_nms, 'cls_nms': FLAGS.use_cls_nms, 'per_class_proposal': FLAGS.per_class_proposal,
131 |     'conf_thresh': FLAGS.conf_thresh, 'dataset_config':DATASET_CONFIG}
132 | # ------------------------------------------------------------------------- GLOBAL CONFIG END
133 | 
134 | def evaluate_one_epoch():
135 |     stat_dict = {}
136 |     ap_calculator_list = [APCalculator(iou_thresh, DATASET_CONFIG.class2type) \
137 |         for iou_thresh in AP_IOU_THRESHOLDS]
138 |     net.eval() # set model to eval mode (for bn and dp)
139 |     for batch_idx, batch_data_label in enumerate(TEST_DATALOADER):
140 |         if batch_idx % 10 == 0:
141 |             print('Eval batch: %d'%(batch_idx))
142 |         for key in batch_data_label:
143 |             batch_data_label[key] = batch_data_label[key].to(device)
144 |         
145 |         # Forward pass
146 |         inputs = {'point_clouds': batch_data_label['point_clouds']}
147 |         with torch.no_grad():
148 |             end_points = net(inputs)
149 | 
150 |         # Compute loss
151 |         for key in batch_data_label:
152 |             assert(key not in end_points)
153 |             end_points[key] = batch_data_label[key]
154 |         loss, end_points = criterion(end_points, DATASET_CONFIG)
155 | 
156 |         # Accumulate statistics and print out
157 |         for key in end_points:
158 |             if 'loss' in key or 'acc' in key or 'ratio' in key:
159 |                 if key not in stat_dict: stat_dict[key] = 0
160 |                 stat_dict[key] += end_points[key].item()
161 | 
162 |         batch_pred_map_cls = parse_predictions(end_points, CONFIG_DICT) 
163 |         batch_gt_map_cls = parse_groundtruths(end_points, CONFIG_DICT) 
164 |         for ap_calculator in ap_calculator_list:
165 |             ap_calculator.step(batch_pred_map_cls, batch_gt_map_cls)
166 |     
167 |         # Dump evaluation results for visualization
168 |         if batch_idx == 0:
169 |             MODEL.dump_results(end_points, DUMP_DIR, DATASET_CONFIG)
170 | 
171 |     # Log statistics
172 |     for key in sorted(stat_dict.keys()):
173 |         log_string('eval mean %s: %f'%(key, stat_dict[key]/(float(batch_idx+1))))
174 | 
175 |     # Evaluate average precision
176 |     for i, ap_calculator in enumerate(ap_calculator_list):
177 |         print('-'*10, 'iou_thresh: %f'%(AP_IOU_THRESHOLDS[i]), '-'*10)
178 |         metrics_dict = ap_calculator.compute_metrics()
179 |         for key in metrics_dict:
180 |             log_string('eval %s: %f'%(key, metrics_dict[key]))
181 | 
182 |     mean_loss = stat_dict['loss']/float(batch_idx+1)
183 |     return mean_loss
184 | 
185 | 
186 | def eval():
187 |     log_string(str(datetime.now()))
188 |     # Reset numpy seed.
189 |     # REF: https://github.com/pytorch/pytorch/issues/5059
190 |     np.random.seed()
191 |     loss = evaluate_one_epoch()
192 | 
193 | if __name__=='__main__':
194 |     eval()
195 | 


--------------------------------------------------------------------------------
/models/backbone_module.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | # 
  3 | # This source code is licensed under the MIT license found in the
  4 | # LICENSE file in the root directory of this source tree.
  5 | 
  6 | import torch
  7 | import torch.nn as nn
  8 | import torch.nn.functional as F
  9 | import numpy as np
 10 | import sys
 11 | import os
 12 | 
 13 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 14 | ROOT_DIR = os.path.dirname(BASE_DIR)
 15 | sys.path.append(ROOT_DIR)
 16 | sys.path.append(os.path.join(ROOT_DIR, 'utils'))
 17 | sys.path.append(os.path.join(ROOT_DIR, 'pointnet2'))
 18 | 
 19 | from pointnet2_modules import PointnetSAModuleVotes, PointnetFPModule
 20 | 
 21 | class Pointnet2Backbone(nn.Module):
 22 |     r"""
 23 |        Backbone network for point cloud feature learning.
 24 |        Based on Pointnet++ single-scale grouping network. 
 25 |         
 26 |        Parameters
 27 |        ----------
 28 |        input_feature_dim: int
 29 |             Number of input channels in the feature descriptor for each point.
 30 |             e.g. 3 for RGB.
 31 |     """
 32 |     def __init__(self, input_feature_dim=0):
 33 |         super().__init__()
 34 | 
 35 |         self.sa1 = PointnetSAModuleVotes(
 36 |                 npoint=2048,
 37 |                 radius=0.2,
 38 |                 nsample=64,
 39 |                 mlp=[input_feature_dim, 64, 64, 128],
 40 |                 use_xyz=True,
 41 |                 normalize_xyz=True
 42 |             )
 43 | 
 44 |         self.sa2 = PointnetSAModuleVotes(
 45 |                 npoint=1024,
 46 |                 radius=0.4,
 47 |                 nsample=32,
 48 |                 mlp=[128, 128, 128, 256],
 49 |                 use_xyz=True,
 50 |                 normalize_xyz=True
 51 |             )
 52 | 
 53 |         self.sa3 = PointnetSAModuleVotes(
 54 |                 npoint=512,
 55 |                 radius=0.8,
 56 |                 nsample=16,
 57 |                 mlp=[256, 128, 128, 256],
 58 |                 use_xyz=True,
 59 |                 normalize_xyz=True
 60 |             )
 61 | 
 62 |         self.sa4 = PointnetSAModuleVotes(
 63 |                 npoint=256,
 64 |                 radius=1.2,
 65 |                 nsample=16,
 66 |                 mlp=[256, 128, 128, 256],
 67 |                 use_xyz=True,
 68 |                 normalize_xyz=True
 69 |             )
 70 | 
 71 |         self.fp1 = PointnetFPModule(mlp=[256+256,256,256])
 72 |         self.fp2 = PointnetFPModule(mlp=[256+256,256,256])
 73 | 
 74 |     def _break_up_pc(self, pc):
 75 |         xyz = pc[..., 0:3].contiguous()
 76 |         features = (
 77 |             pc[..., 3:].transpose(1, 2).contiguous()
 78 |             if pc.size(-1) > 3 else None
 79 |         )
 80 | 
 81 |         return xyz, features
 82 | 
 83 |     def forward(self, pointcloud: torch.cuda.FloatTensor, end_points=None):
 84 |         r"""
 85 |             Forward pass of the network
 86 | 
 87 |             Parameters
 88 |             ----------
 89 |             pointcloud: Variable(torch.cuda.FloatTensor)
 90 |                 (B, N, 3 + input_feature_dim) tensor
 91 |                 Point cloud to run predicts on
 92 |                 Each point in the point-cloud MUST
 93 |                 be formated as (x, y, z, features...)
 94 | 
 95 |             Returns
 96 |             ----------
 97 |             end_points: {XXX_xyz, XXX_features, XXX_inds}
 98 |                 XXX_xyz: float32 Tensor of shape (B,K,3)
 99 |                 XXX_features: float32 Tensor of shape (B,K,D)
100 |                 XXX-inds: int64 Tensor of shape (B,K) values in [0,N-1]
101 |         """
102 |         if not end_points: end_points = {}
103 |         batch_size = pointcloud.shape[0]
104 | 
105 |         xyz, features = self._break_up_pc(pointcloud)
106 | 
107 |         # --------- 4 SET ABSTRACTION LAYERS ---------
108 |         xyz, features, fps_inds = self.sa1(xyz, features)
109 |         end_points['sa1_inds'] = fps_inds
110 |         end_points['sa1_xyz'] = xyz
111 |         end_points['sa1_features'] = features
112 | 
113 |         xyz, features, fps_inds = self.sa2(xyz, features) # this fps_inds is just 0,1,...,1023
114 |         end_points['sa2_inds'] = fps_inds
115 |         end_points['sa2_xyz'] = xyz
116 |         end_points['sa2_features'] = features
117 | 
118 |         xyz, features, fps_inds = self.sa3(xyz, features) # this fps_inds is just 0,1,...,511
119 |         end_points['sa3_xyz'] = xyz
120 |         end_points['sa3_features'] = features
121 | 
122 |         xyz, features, fps_inds = self.sa4(xyz, features) # this fps_inds is just 0,1,...,255
123 |         end_points['sa4_xyz'] = xyz
124 |         end_points['sa4_features'] = features
125 | 
126 |         # --------- 2 FEATURE UPSAMPLING LAYERS --------
127 |         features = self.fp1(end_points['sa3_xyz'], end_points['sa4_xyz'], end_points['sa3_features'], end_points['sa4_features'])
128 |         features = self.fp2(end_points['sa2_xyz'], end_points['sa3_xyz'], end_points['sa2_features'], features)
129 |         end_points['fp2_features'] = features
130 |         end_points['fp2_xyz'] = end_points['sa2_xyz']
131 |         num_seed = end_points['fp2_xyz'].shape[1]
132 |         end_points['fp2_inds'] = end_points['sa1_inds'][:,0:num_seed] # indices among the entire input point clouds
133 |         return end_points
134 | 
135 | 
136 | if __name__=='__main__':
137 |     backbone_net = Pointnet2Backbone(input_feature_dim=3).cuda()
138 |     print(backbone_net)
139 |     backbone_net.eval()
140 |     out = backbone_net(torch.rand(16,20000,6).cuda())
141 |     for key in sorted(out.keys()):
142 |         print(key, '\t', out[key].shape)
143 | 


--------------------------------------------------------------------------------
/models/backbone_module_SA2_denseaspp3_6.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | # 
  3 | # This source code is licensed under the MIT license found in the
  4 | # LICENSE file in the root directory of this source tree.
  5 | 
  6 | import torch
  7 | import torch.nn as nn
  8 | import torch.nn.functional as F
  9 | import numpy as np
 10 | import sys
 11 | import os
 12 | 
 13 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 14 | ROOT_DIR = os.path.dirname(BASE_DIR)
 15 | sys.path.append(ROOT_DIR)
 16 | sys.path.append(os.path.join(ROOT_DIR, 'utils'))
 17 | sys.path.append(os.path.join(ROOT_DIR, 'pointnet2'))
 18 | 
 19 | from pointnet2_modules import PointnetSAModuleVotes, PointnetFPModule
 20 | 
 21 | class Pointnet2Backbone(nn.Module):
 22 |     r"""
 23 |        Backbone network for point cloud feature learning.
 24 |        Based on Pointnet++ single-scale grouping network. 
 25 |         
 26 |        Parameters
 27 |        ----------
 28 |        input_feature_dim: int
 29 |             Number of input channels in the feature descriptor for each point.
 30 |             e.g. 3 for RGB.
 31 |     """
 32 |     def __init__(self, input_feature_dim=0):
 33 |         super().__init__()
 34 | 
 35 |         self.sa1 = PointnetSAModuleVotes(
 36 |                 npoint=2048,
 37 |                 radius=0.2,
 38 |                 nsample=64,
 39 |                 mlp=[input_feature_dim, 64, 64, 128],
 40 |                 use_xyz=True,
 41 |                 normalize_xyz=True
 42 |             )
 43 | 
 44 |         self.sa2 = PointnetSAModuleVotes(
 45 |                 npoint=1024,
 46 |                 radius=0.4,
 47 |                 nsample=32,
 48 |                 mlp=[128, 128, 128, 256],
 49 |                 use_xyz=True,
 50 |                 normalize_xyz=True
 51 |             )
 52 | 
 53 | 
 54 | 
 55 |         self.sa2_d3 = PointnetSAModuleVotes(
 56 |                 npoint=1024,
 57 |                 radius=0.8,
 58 |                 nsample=32*3,
 59 |                 mlp=[256, 128, 128, 256],
 60 |                 use_xyz=True,
 61 |                 normalize_xyz=True,
 62 |                 dilation = 3
 63 |             )
 64 | 
 65 | 
 66 |         self.sa2_d6 = PointnetSAModuleVotes(
 67 |                 npoint=1024,
 68 |                 radius=1.2,
 69 |                 nsample=32*6,
 70 |                 mlp=[256+256, 128, 128, 256],
 71 |                 use_xyz=True,
 72 |                 normalize_xyz=True,
 73 |                 dilation = 6
 74 |             )
 75 | 
 76 |         '''
 77 |         self.sa3 = PointnetSAModuleVotes(
 78 |                 npoint=512,
 79 |                 radius=0.8,
 80 |                 nsample=16,
 81 |                 mlp=[256, 128, 128, 256],
 82 |                 use_xyz=True,
 83 |                 normalize_xyz=True
 84 |             )
 85 | 
 86 |         self.sa4 = PointnetSAModuleVotes(
 87 |                 npoint=256,
 88 |                 radius=1.2,
 89 |                 nsample=16,
 90 |                 mlp=[256, 128, 128, 256],
 91 |                 use_xyz=True,
 92 |                 normalize_xyz=True
 93 |             )
 94 | 
 95 |         self.fp1 = PointnetFPModule(mlp=[256+256,256,256])
 96 |         self.fp2 = PointnetFPModule(mlp=[256+256,256,256])
 97 |         '''
 98 | 
 99 | 
100 |     def _break_up_pc(self, pc):
101 |         xyz = pc[..., 0:3].contiguous()
102 |         features = (
103 |             pc[..., 3:].transpose(1, 2).contiguous()
104 |             if pc.size(-1) > 3 else None
105 |         )
106 | 
107 |         return xyz, features
108 | 
109 |     def forward(self, pointcloud: torch.cuda.FloatTensor, end_points=None):
110 |         r"""
111 |             Forward pass of the network
112 | 
113 |             Parameters
114 |             ----------
115 |             pointcloud: Variable(torch.cuda.FloatTensor)
116 |                 (B, N, 3 + input_feature_dim) tensor
117 |                 Point cloud to run predicts on
118 |                 Each point in the point-cloud MUST
119 |                 be formated as (x, y, z, features...)
120 | 
121 |             Returns
122 |             ----------
123 |             end_points: {XXX_xyz, XXX_features, XXX_inds}
124 |                 XXX_xyz: float32 Tensor of shape (B,K,3)
125 |                 XXX_features: float32 Tensor of shape (B,K,D)
126 |                 XXX-inds: int64 Tensor of shape (B,K) values in [0,N-1]
127 |         """
128 |         if not end_points: end_points = {}
129 |         batch_size = pointcloud.shape[0]
130 | 
131 |         xyz, features = self._break_up_pc(pointcloud)
132 | 
133 |         # --------- 4 SET ABSTRACTION LAYERS ---------
134 |         xyz, features, fps_inds = self.sa1(xyz, features)
135 |         end_points['sa1_inds'] = fps_inds
136 |         end_points['sa1_xyz'] = xyz
137 |         end_points['sa1_features'] = features
138 | 
139 |         xyz, features, fps_inds = self.sa2(xyz, features) # this fps_inds is just 0,1,...,1023
140 |         end_points['sa2_inds'] = fps_inds
141 |         end_points['sa2_xyz'] = xyz
142 |         end_points['sa2_features'] = features
143 | 
144 | 
145 | 
146 |         _, features, _= self.sa2_d3(xyz, features)  # this fps_inds is just 0,1,...,1023
147 |         end_points['sa2_d_3_features'] = features
148 | 
149 |         features_out = torch.cat((end_points['sa2_features'],end_points['sa2_d_3_features']),dim=1)
150 | 
151 |         _, features, _= self.sa2_d6(xyz, features_out)  # this fps_inds is just 0,1,...,1023
152 |         end_points['sa2_d_6_features'] = features
153 | 
154 |         features_out = torch.cat((features_out,end_points['sa2_d_6_features']),dim=1)
155 | 
156 |         end_points['fp2_features'] = features_out
157 |         end_points['fp2_xyz'] = xyz
158 |         num_seed = end_points['fp2_xyz'].shape[1]
159 |         end_points['fp2_inds'] = end_points['sa1_inds'][:,0:num_seed] # indices among the entire input point clouds
160 |         return end_points
161 | 
162 | 
163 | if __name__=='__main__':
164 |     backbone_net = Pointnet2Backbone(input_feature_dim=3).cuda()
165 |     print(backbone_net)
166 |     backbone_net.eval()
167 |     out = backbone_net(torch.rand(16,20000,6).cuda())
168 |     for key in sorted(out.keys()):
169 |         print(key, '\t', out[key].shape)
170 | 


--------------------------------------------------------------------------------
/models/backbone_module_SA2_denseaspp3_6_12.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | # 
  3 | # This source code is licensed under the MIT license found in the
  4 | # LICENSE file in the root directory of this source tree.
  5 | 
  6 | import torch
  7 | import torch.nn as nn
  8 | import torch.nn.functional as F
  9 | import numpy as np
 10 | import sys
 11 | import os
 12 | 
 13 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 14 | ROOT_DIR = os.path.dirname(BASE_DIR)
 15 | sys.path.append(ROOT_DIR)
 16 | sys.path.append(os.path.join(ROOT_DIR, 'utils'))
 17 | sys.path.append(os.path.join(ROOT_DIR, 'pointnet2'))
 18 | 
 19 | from pointnet2_modules import PointnetSAModuleVotes, PointnetFPModule
 20 | 
 21 | class Pointnet2Backbone(nn.Module):
 22 |     r"""
 23 |        Backbone network for point cloud feature learning.
 24 |        Based on Pointnet++ single-scale grouping network. 
 25 |         
 26 |        Parameters
 27 |        ----------
 28 |        input_feature_dim: int
 29 |             Number of input channels in the feature descriptor for each point.
 30 |             e.g. 3 for RGB.
 31 |     """
 32 |     def __init__(self, input_feature_dim=0):
 33 |         super().__init__()
 34 | 
 35 |         self.sa1 = PointnetSAModuleVotes(
 36 |                 npoint=2048,
 37 |                 radius=0.2,
 38 |                 nsample=64,
 39 |                 mlp=[input_feature_dim, 64, 64, 128],
 40 |                 use_xyz=True,
 41 |                 normalize_xyz=True
 42 |             )
 43 | 
 44 |         self.sa2 = PointnetSAModuleVotes(
 45 |                 npoint=1024,
 46 |                 radius=0.4,
 47 |                 nsample=32,
 48 |                 mlp=[128, 128, 128, 256],
 49 |                 use_xyz=True,
 50 |                 normalize_xyz=True
 51 |             )
 52 | 
 53 | 
 54 | 
 55 |         self.sa2_d3 = PointnetSAModuleVotes(
 56 |                 npoint=1024,
 57 |                 radius=0.8,
 58 |                 nsample=32*3,
 59 |                 mlp=[256, 128, 128, 256],
 60 |                 use_xyz=True,
 61 |                 normalize_xyz=True,
 62 |                 dilation = 3
 63 |             )
 64 | 
 65 | 
 66 |         self.sa2_d6 = PointnetSAModuleVotes(
 67 |                 npoint=1024,
 68 |                 radius=1.2,
 69 |                 nsample=32*6,
 70 |                 mlp=[256+256, 128, 128, 256],
 71 |                 use_xyz=True,
 72 |                 normalize_xyz=True,
 73 |                 dilation = 6
 74 |             )
 75 | 
 76 | 
 77 |         self.sa2_d12 = PointnetSAModuleVotes(
 78 |                 npoint=1024,
 79 |                 radius=1.8,
 80 |                 nsample=32*12,
 81 |                 mlp=[256+256+256, 128, 128, 256],
 82 |                 use_xyz=True,
 83 |                 normalize_xyz=True,
 84 |                 dilation = 12
 85 |             )
 86 | 
 87 | 
 88 | 
 89 | 
 90 |         '''
 91 |         self.sa3 = PointnetSAModuleVotes(
 92 |                 npoint=512,
 93 |                 radius=0.8,
 94 |                 nsample=16,
 95 |                 mlp=[256, 128, 128, 256],
 96 |                 use_xyz=True,
 97 |                 normalize_xyz=True
 98 |             )
 99 | 
100 |         self.sa4 = PointnetSAModuleVotes(
101 |                 npoint=256,
102 |                 radius=1.2,
103 |                 nsample=16,
104 |                 mlp=[256, 128, 128, 256],
105 |                 use_xyz=True,
106 |                 normalize_xyz=True
107 |             )
108 | 
109 |         self.fp1 = PointnetFPModule(mlp=[256+256,256,256])
110 |         self.fp2 = PointnetFPModule(mlp=[256+256,256,256])
111 |         '''
112 | 
113 | 
114 |     def _break_up_pc(self, pc):
115 |         xyz = pc[..., 0:3].contiguous()
116 |         features = (
117 |             pc[..., 3:].transpose(1, 2).contiguous()
118 |             if pc.size(-1) > 3 else None
119 |         )
120 | 
121 |         return xyz, features
122 | 
123 |     def forward(self, pointcloud: torch.cuda.FloatTensor, end_points=None):
124 |         r"""
125 |             Forward pass of the network
126 | 
127 |             Parameters
128 |             ----------
129 |             pointcloud: Variable(torch.cuda.FloatTensor)
130 |                 (B, N, 3 + input_feature_dim) tensor
131 |                 Point cloud to run predicts on
132 |                 Each point in the point-cloud MUST
133 |                 be formated as (x, y, z, features...)
134 | 
135 |             Returns
136 |             ----------
137 |             end_points: {XXX_xyz, XXX_features, XXX_inds}
138 |                 XXX_xyz: float32 Tensor of shape (B,K,3)
139 |                 XXX_features: float32 Tensor of shape (B,K,D)
140 |                 XXX-inds: int64 Tensor of shape (B,K) values in [0,N-1]
141 |         """
142 |         if not end_points: end_points = {}
143 |         batch_size = pointcloud.shape[0]
144 | 
145 |         xyz, features = self._break_up_pc(pointcloud)
146 | 
147 |         # --------- 4 SET ABSTRACTION LAYERS ---------
148 |         xyz, features, fps_inds = self.sa1(xyz, features)
149 |         end_points['sa1_inds'] = fps_inds
150 |         end_points['sa1_xyz'] = xyz
151 |         end_points['sa1_features'] = features
152 | 
153 |         xyz, features, fps_inds = self.sa2(xyz, features) # this fps_inds is just 0,1,...,1023
154 |         end_points['sa2_inds'] = fps_inds
155 |         end_points['sa2_xyz'] = xyz
156 |         end_points['sa2_features'] = features
157 | 
158 | 
159 | 
160 |         _, features, _= self.sa2_d3(xyz, features)  # this fps_inds is just 0,1,...,1023
161 |         end_points['sa2_d_3_features'] = features
162 | 
163 |         features_out = torch.cat((end_points['sa2_features'],end_points['sa2_d_3_features']),dim=1)
164 | 
165 |         _, features, _= self.sa2_d6(xyz, features_out)  # this fps_inds is just 0,1,...,1023
166 |         end_points['sa2_d_6_features'] = features
167 | 
168 |         features_out = torch.cat((features_out,end_points['sa2_d_6_features']),dim=1)
169 | 
170 |         _, features, _= self.sa2_d12(xyz, features_out)  # this fps_inds is just 0,1,...,1023
171 |         end_points['sa2_d_12_features'] = features
172 | 
173 |         features_out = torch.cat((features_out, end_points['sa2_d_12_features']), dim=1)
174 | 
175 |         end_points['fp2_features'] = features_out
176 |         end_points['fp2_xyz'] = xyz
177 |         num_seed = end_points['fp2_xyz'].shape[1]
178 |         end_points['fp2_inds'] = end_points['sa1_inds'][:,0:num_seed] # indices among the entire input point clouds
179 |         return end_points
180 | 
181 | 
182 | if __name__=='__main__':
183 |     backbone_net = Pointnet2Backbone(input_feature_dim=3).cuda()
184 |     print(backbone_net)
185 |     backbone_net.eval()
186 |     out = backbone_net(torch.rand(16,20000,6).cuda())
187 |     for key in sorted(out.keys()):
188 |         print(key, '\t', out[key].shape)
189 | 


--------------------------------------------------------------------------------
/models/backbone_module_enc_FP2_K8_G12_C3.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | # 
  3 | # This source code is licensed under the MIT license found in the
  4 | # LICENSE file in the root directory of this source tree.
  5 | 
  6 | import torch
  7 | import torch.nn as nn
  8 | import torch.nn.functional as F
  9 | import numpy as np
 10 | import sys
 11 | import os
 12 | 
 13 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 14 | ROOT_DIR = os.path.dirname(BASE_DIR)
 15 | sys.path.append(ROOT_DIR)
 16 | sys.path.append(os.path.join(ROOT_DIR, 'utils'))
 17 | sys.path.append(os.path.join(ROOT_DIR, 'pointnet2'))
 18 | 
 19 | from pointnet2_modules import PointnetSAModuleVotes, PointnetFPModule
 20 | from enc_layer import PointnetSAModuleVotes_enc, PointnetSAModuleVotes_group_enc
 21 | 
 22 | class Pointnet2Backbone(nn.Module):
 23 |     r"""
 24 |        Backbone network for point cloud feature learning.
 25 |        Based on Pointnet++ single-scale grouping network. 
 26 |         
 27 |        Parameters
 28 |        ----------
 29 |        input_feature_dim: int
 30 |             Number of input channels in the feature descriptor for each point.
 31 |             e.g. 3 for RGB.
 32 |     """
 33 |     def __init__(self, input_feature_dim=0):
 34 |         super().__init__()
 35 | 
 36 |         self.sa1 = PointnetSAModuleVotes_group_enc(
 37 |                 npoint=2048,
 38 |                 radius=0.2,
 39 |                 nsample=64,
 40 |                 mlp=[input_feature_dim, 64, 64, 128*3],
 41 |                 use_xyz=True,
 42 |                 normalize_xyz=True,
 43 |                 dilation=1,
 44 |                 K=8,
 45 |                 G=12
 46 |             )
 47 | 
 48 |         self.sa2 = PointnetSAModuleVotes_group_enc(
 49 |                 npoint=1024,
 50 |                 radius=0.4,
 51 |                 nsample=32,
 52 |                 mlp=[128*3, 128, 128, 256*3],
 53 |                 use_xyz=True,
 54 |                 normalize_xyz=True,
 55 |                 dilation=1,
 56 |                 K=8,
 57 |                 G=12
 58 |             )
 59 | 
 60 |         self.sa3 = PointnetSAModuleVotes_group_enc(
 61 |                 npoint=512,
 62 |                 radius=0.8,
 63 |                 nsample=16,
 64 |                 mlp=[256*3, 128, 128, 256*3],
 65 |                 use_xyz=True,
 66 |                 normalize_xyz=True,
 67 |                 dilation=1,
 68 |                 K=8,
 69 |                 G=12
 70 |             )
 71 | 
 72 |         self.sa4 = PointnetSAModuleVotes_group_enc(
 73 |                 npoint=256,
 74 |                 radius=1.2,
 75 |                 nsample=16,
 76 |                 mlp=[256*3, 128, 128, 256*3],
 77 |                 use_xyz=True,
 78 |                 normalize_xyz=True,
 79 |                 dilation=1,
 80 |                 K=8,
 81 |                 G=12
 82 |             )
 83 | 
 84 |         self.fp1 = PointnetFPModule(mlp=[(256+256)*3,256,256*3])
 85 |         self.fp2 = PointnetFPModule(mlp=[(256+256)*3,256,256*3])
 86 | 
 87 |     def _break_up_pc(self, pc):
 88 |         xyz = pc[..., 0:3].contiguous()
 89 |         features = (
 90 |             pc[..., 3:].transpose(1, 2).contiguous()
 91 |             if pc.size(-1) > 3 else None
 92 |         )
 93 | 
 94 |         return xyz, features
 95 | 
 96 |     def forward(self, pointcloud: torch.cuda.FloatTensor, end_points=None):
 97 |         r"""
 98 |             Forward pass of the network
 99 | 
100 |             Parameters
101 |             ----------
102 |             pointcloud: Variable(torch.cuda.FloatTensor)
103 |                 (B, N, 3 + input_feature_dim) tensor
104 |                 Point cloud to run predicts on
105 |                 Each point in the point-cloud MUST
106 |                 be formated as (x, y, z, features...)
107 | 
108 |             Returns
109 |             ----------
110 |             end_points: {XXX_xyz, XXX_features, XXX_inds}
111 |                 XXX_xyz: float32 Tensor of shape (B,K,3)
112 |                 XXX_features: float32 Tensor of shape (B,K,D)
113 |                 XXX-inds: int64 Tensor of shape (B,K) values in [0,N-1]
114 |         """
115 |         if not end_points: end_points = {}
116 |         batch_size = pointcloud.shape[0]
117 | 
118 |         xyz, features = self._break_up_pc(pointcloud)
119 | 
120 |         # --------- 4 SET ABSTRACTION LAYERS ---------
121 |         xyz, features, fps_inds = self.sa1(xyz, features)
122 |         end_points['sa1_inds'] = fps_inds
123 |         end_points['sa1_xyz'] = xyz
124 |         end_points['sa1_features'] = features
125 | 
126 |         xyz, features, fps_inds = self.sa2(xyz, features) # this fps_inds is just 0,1,...,1023
127 |         end_points['sa2_inds'] = fps_inds
128 |         end_points['sa2_xyz'] = xyz
129 |         end_points['sa2_features'] = features
130 | 
131 |         xyz, features, fps_inds = self.sa3(xyz, features) # this fps_inds is just 0,1,...,511
132 |         end_points['sa3_xyz'] = xyz
133 |         end_points['sa3_features'] = features
134 | 
135 |         xyz, features, fps_inds = self.sa4(xyz, features) # this fps_inds is just 0,1,...,255
136 |         end_points['sa4_xyz'] = xyz
137 |         end_points['sa4_features'] = features
138 | 
139 |         # --------- 2 FEATURE UPSAMPLING LAYERS --------
140 |         features = self.fp1(end_points['sa3_xyz'], end_points['sa4_xyz'], end_points['sa3_features'], end_points['sa4_features'])
141 |         features = self.fp2(end_points['sa2_xyz'], end_points['sa3_xyz'], end_points['sa2_features'], features)
142 |         end_points['fp2_features'] = features
143 |         end_points['fp2_xyz'] = end_points['sa2_xyz']
144 |         num_seed = end_points['fp2_xyz'].shape[1]
145 |         end_points['fp2_inds'] = end_points['sa1_inds'][:,0:num_seed] # indices among the entire input point clouds
146 |         return end_points
147 | 
148 | 
149 | if __name__=='__main__':
150 |     backbone_net = Pointnet2Backbone(input_feature_dim=3).cuda()
151 |     print(backbone_net)
152 |     backbone_net.eval()
153 |     out = backbone_net(torch.rand(16,20000,6).cuda())
154 |     for key in sorted(out.keys()):
155 |         print(key, '\t', out[key].shape)
156 | 


--------------------------------------------------------------------------------
/models/boxnet.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | # 
  3 | # This source code is licensed under the MIT license found in the
  4 | # LICENSE file in the root directory of this source tree.
  5 | 
  6 | import torch
  7 | import torch.nn as nn
  8 | import numpy as np
  9 | import sys
 10 | import os
 11 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 12 | ROOT_DIR = os.path.dirname(BASE_DIR)
 13 | sys.path.append(BASE_DIR)
 14 | from backbone_module import Pointnet2Backbone
 15 | from proposal_module import ProposalModule
 16 | from dump_helper import dump_results
 17 | from loss_helper_boxnet import get_loss
 18 | 
 19 | 
 20 | class BoxNet(nn.Module):
 21 |     r"""
 22 |         A deep neural network for 3D object detection with end-to-end optimizable hough voting.
 23 | 
 24 |         Parameters
 25 |         ----------
 26 |         num_class: int
 27 |             Number of semantics classes to predict over -- size of softmax classifier
 28 |         num_heading_bin: int
 29 |         num_size_cluster: int
 30 |         input_feature_dim: (default: 0)
 31 |             Input dim in the feature descriptor for each point.  If the point cloud is Nx9, this
 32 |             value should be 6 as in an Nx9 point cloud, 3 of the channels are xyz, and 6 are feature descriptors
 33 |         num_proposal: int (default: 128)
 34 |             Number of proposals/detections generated from the network. Each proposal is a 3D OBB with a semantic class.
 35 |         vote_factor: (default: 1)
 36 |             Number of votes generated from each seed point.
 37 |     """
 38 | 
 39 |     def __init__(self, num_class, num_heading_bin, num_size_cluster, mean_size_arr,
 40 |         input_feature_dim=0, num_proposal=128, vote_factor=1, sampling='vote_fps'):
 41 |         super().__init__()
 42 | 
 43 |         self.num_class = num_class
 44 |         self.num_heading_bin = num_heading_bin
 45 |         self.num_size_cluster = num_size_cluster
 46 |         self.mean_size_arr = mean_size_arr
 47 |         assert(mean_size_arr.shape[0] == self.num_size_cluster)
 48 |         self.input_feature_dim = input_feature_dim
 49 |         self.num_proposal = num_proposal
 50 |         self.vote_factor = vote_factor
 51 |         self.sampling=sampling
 52 | 
 53 |         # Backbone point feature learning
 54 |         self.backbone_net = Pointnet2Backbone(input_feature_dim=self.input_feature_dim)
 55 | 
 56 |         # Box proposal, aggregation and detection
 57 |         self.pnet = ProposalModule(num_class, num_heading_bin, num_size_cluster,
 58 |             mean_size_arr, num_proposal, sampling)
 59 | 
 60 |     def forward(self, inputs):
 61 |         """ Forward pass of the network
 62 | 
 63 |         Args:
 64 |             inputs: dict
 65 |                 {point_clouds}
 66 | 
 67 |                 point_clouds: Variable(torch.cuda.FloatTensor)
 68 |                     (B, N, 3 + input_channels) tensor
 69 |                     Point cloud to run predicts on
 70 |                     Each point in the point-cloud MUST
 71 |                     be formated as (x, y, z, features...)
 72 |         Returns:
 73 |             end_points: dict
 74 |         """
 75 |         end_points = {}
 76 |         batch_size = inputs['point_clouds'].shape[0]
 77 | 
 78 |         end_points = self.backbone_net(inputs['point_clouds'], end_points)
 79 |         xyz = end_points['fp2_xyz']
 80 |         features = end_points['fp2_features']
 81 |         end_points['seed_inds'] = end_points['fp2_inds']
 82 |         end_points['seed_xyz'] = xyz
 83 |         end_points['seed_features'] = features
 84 |        
 85 |         # Directly predict bounding boxes (skips voting) 
 86 |         end_points = self.pnet(xyz, features, end_points)
 87 | 
 88 |         return end_points
 89 | 
 90 | 
 91 | if __name__=='__main__':
 92 |     sys.path.append(os.path.join(ROOT_DIR, 'sunrgbd'))
 93 |     from sunrgbd_detection_dataset import SunrgbdDetectionVotesDataset, DC
 94 | 
 95 |     # Define dataset
 96 |     TRAIN_DATASET = SunrgbdDetectionVotesDataset('train', num_points=20000, use_v1=True)
 97 | 
 98 |     # Define model
 99 |     model = BoxNet(10,12,10,np.random.random((10,3))).cuda()
100 | 
101 |     # Model forward pass
102 |     sample = TRAIN_DATASET[5]
103 |     inputs = {'point_clouds': torch.from_numpy(sample['point_clouds']).unsqueeze(0).cuda()}
104 |     end_points = model(inputs)
105 |     for key in end_points:
106 |         print(key, end_points[key])
107 | 
108 |     # Compute loss
109 |     for key in sample:
110 |         end_points[key] = torch.from_numpy(sample[key]).unsqueeze(0).cuda()
111 |     loss, end_points = get_loss(end_points, DC)
112 |     print('loss', loss)
113 |     end_points['point_clouds'] = inputs['point_clouds']
114 |     end_points['pred_mask'] = np.ones((1,128))
115 |     dump_results(end_points, 'tmp', DC)
116 | 


--------------------------------------------------------------------------------
/models/dump_helper.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | # 
  3 | # This source code is licensed under the MIT license found in the
  4 | # LICENSE file in the root directory of this source tree.
  5 | 
  6 | import numpy as np
  7 | import torch
  8 | import os
  9 | import sys
 10 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 11 | ROOT_DIR = os.path.dirname(BASE_DIR)
 12 | sys.path.append(os.path.join(ROOT_DIR, 'utils'))
 13 | import pc_util
 14 | 
 15 | DUMP_CONF_THRESH = 0.5 # Dump boxes with obj prob larger than that.
 16 | 
 17 | def softmax(x):
 18 |     ''' Numpy function for softmax'''
 19 |     shape = x.shape
 20 |     probs = np.exp(x - np.max(x, axis=len(shape)-1, keepdims=True))
 21 |     probs /= np.sum(probs, axis=len(shape)-1, keepdims=True)
 22 |     return probs
 23 | 
 24 | def dump_results(end_points, dump_dir, config, inference_switch=False):
 25 |     ''' Dump results.
 26 | 
 27 |     Args:
 28 |         end_points: dict
 29 |             {..., pred_mask}
 30 |             pred_mask is a binary mask array of size (batch_size, num_proposal) computed by running NMS and empty box removal
 31 |     Returns:
 32 |         None
 33 |     '''
 34 |     if not os.path.exists(dump_dir):
 35 |         os.system('mkdir %s'%(dump_dir))
 36 | 
 37 |     # INPUT
 38 |     point_clouds = end_points['point_clouds'].cpu().numpy()
 39 |     batch_size = point_clouds.shape[0]
 40 | 
 41 |     # NETWORK OUTPUTS
 42 |     seed_xyz = end_points['seed_xyz'].detach().cpu().numpy() # (B,num_seed,3)
 43 |     if 'vote_xyz' in end_points:
 44 |         aggregated_vote_xyz = end_points['aggregated_vote_xyz'].detach().cpu().numpy()
 45 |         vote_xyz = end_points['vote_xyz'].detach().cpu().numpy() # (B,num_seed,3)
 46 |         aggregated_vote_xyz = end_points['aggregated_vote_xyz'].detach().cpu().numpy()
 47 |     objectness_scores = end_points['objectness_scores'].detach().cpu().numpy() # (B,K,2)
 48 |     pred_center = end_points['center'].detach().cpu().numpy() # (B,K,3)
 49 |     pred_heading_class = torch.argmax(end_points['heading_scores'], -1) # B,num_proposal
 50 |     pred_heading_residual = torch.gather(end_points['heading_residuals'], 2, pred_heading_class.unsqueeze(-1)) # B,num_proposal,1
 51 |     pred_heading_class = pred_heading_class.detach().cpu().numpy() # B,num_proposal
 52 |     pred_heading_residual = pred_heading_residual.squeeze(2).detach().cpu().numpy() # B,num_proposal
 53 |     pred_size_class = torch.argmax(end_points['size_scores'], -1) # B,num_proposal
 54 |     pred_size_residual = torch.gather(end_points['size_residuals'], 2, pred_size_class.unsqueeze(-1).unsqueeze(-1).repeat(1,1,1,3)) # B,num_proposal,1,3
 55 |     pred_size_residual = pred_size_residual.squeeze(2).detach().cpu().numpy() # B,num_proposal,3
 56 | 
 57 |     # OTHERS
 58 |     pred_mask = end_points['pred_mask'] # B,num_proposal
 59 |     idx_beg = 0
 60 | 
 61 |     for i in range(batch_size):
 62 |         pc = point_clouds[i,:,:]
 63 |         objectness_prob = softmax(objectness_scores[i,:,:])[:,1] # (K,)
 64 | 
 65 |         # Dump various point clouds
 66 |         pc_util.write_ply(pc, os.path.join(dump_dir, '%06d_pc.ply'%(idx_beg+i)))
 67 |         pc_util.write_ply(seed_xyz[i,:,:], os.path.join(dump_dir, '%06d_seed_pc.ply'%(idx_beg+i)))
 68 |         if 'vote_xyz' in end_points:
 69 |             pc_util.write_ply(end_points['vote_xyz'][i,:,:], os.path.join(dump_dir, '%06d_vgen_pc.ply'%(idx_beg+i)))
 70 |             pc_util.write_ply(aggregated_vote_xyz[i,:,:], os.path.join(dump_dir, '%06d_aggregated_vote_pc.ply'%(idx_beg+i)))
 71 |             pc_util.write_ply(aggregated_vote_xyz[i,:,:], os.path.join(dump_dir, '%06d_aggregated_vote_pc.ply'%(idx_beg+i)))
 72 |         pc_util.write_ply(pred_center[i,:,0:3], os.path.join(dump_dir, '%06d_proposal_pc.ply'%(idx_beg+i)))
 73 |         if np.sum(objectness_prob>DUMP_CONF_THRESH)>0:
 74 |             pc_util.write_ply(pred_center[i,objectness_prob>DUMP_CONF_THRESH,0:3], os.path.join(dump_dir, '%06d_confident_proposal_pc.ply'%(idx_beg+i)))
 75 | 
 76 |         # Dump predicted bounding boxes
 77 |         if np.sum(objectness_prob>DUMP_CONF_THRESH)>0:
 78 |             num_proposal = pred_center.shape[1]
 79 |             obbs = []
 80 |             for j in range(num_proposal):
 81 |                 obb = config.param2obb(pred_center[i,j,0:3], pred_heading_class[i,j], pred_heading_residual[i,j],
 82 |                                 pred_size_class[i,j], pred_size_residual[i,j])
 83 |                 obbs.append(obb)
 84 |             if len(obbs)>0:
 85 |                 obbs = np.vstack(tuple(obbs)) # (num_proposal, 7)
 86 |                 pc_util.write_oriented_bbox(obbs[objectness_prob>DUMP_CONF_THRESH,:], os.path.join(dump_dir, '%06d_pred_confident_bbox.ply'%(idx_beg+i)))
 87 |                 pc_util.write_oriented_bbox(obbs[np.logical_and(objectness_prob>DUMP_CONF_THRESH, pred_mask[i,:]==1),:], os.path.join(dump_dir, '%06d_pred_confident_nms_bbox.ply'%(idx_beg+i)))
 88 |                 pc_util.write_oriented_bbox(obbs[pred_mask[i,:]==1,:], os.path.join(dump_dir, '%06d_pred_nms_bbox.ply'%(idx_beg+i)))
 89 |                 pc_util.write_oriented_bbox(obbs, os.path.join(dump_dir, '%06d_pred_bbox.ply'%(idx_beg+i)))
 90 | 
 91 |     # Return if it is at inference time. No dumping of groundtruths
 92 |     if inference_switch:
 93 |         return
 94 | 
 95 |     # LABELS
 96 |     gt_center = end_points['center_label'].cpu().numpy() # (B,MAX_NUM_OBJ,3)
 97 |     gt_mask = end_points['box_label_mask'].cpu().numpy() # B,K2
 98 |     gt_heading_class = end_points['heading_class_label'].cpu().numpy() # B,K2
 99 |     gt_heading_residual = end_points['heading_residual_label'].cpu().numpy() # B,K2
100 |     gt_size_class = end_points['size_class_label'].cpu().numpy() # B,K2
101 |     gt_size_residual = end_points['size_residual_label'].cpu().numpy() # B,K2,3
102 |     objectness_label = end_points['objectness_label'].detach().cpu().numpy() # (B,K,)
103 |     objectness_mask = end_points['objectness_mask'].detach().cpu().numpy() # (B,K,)
104 | 
105 |     for i in range(batch_size):
106 |         if np.sum(objectness_label[i,:])>0:
107 |             pc_util.write_ply(pred_center[i,objectness_label[i,:]>0,0:3], os.path.join(dump_dir, '%06d_gt_positive_proposal_pc.ply'%(idx_beg+i)))
108 |         if np.sum(objectness_mask[i,:])>0:
109 |             pc_util.write_ply(pred_center[i,objectness_mask[i,:]>0,0:3], os.path.join(dump_dir, '%06d_gt_mask_proposal_pc.ply'%(idx_beg+i)))
110 |         pc_util.write_ply(gt_center[i,:,0:3], os.path.join(dump_dir, '%06d_gt_centroid_pc.ply'%(idx_beg+i)))
111 |         pc_util.write_ply_color(pred_center[i,:,0:3], objectness_label[i,:], os.path.join(dump_dir, '%06d_proposal_pc_objectness_label.obj'%(idx_beg+i)))
112 | 
113 |         # Dump GT bounding boxes
114 |         obbs = []
115 |         for j in range(gt_center.shape[1]):
116 |             if gt_mask[i,j] == 0: continue
117 |             obb = config.param2obb(gt_center[i,j,0:3], gt_heading_class[i,j], gt_heading_residual[i,j],
118 |                             gt_size_class[i,j], gt_size_residual[i,j])
119 |             obbs.append(obb)
120 |         if len(obbs)>0:
121 |             obbs = np.vstack(tuple(obbs)) # (num_gt_objects, 7)
122 |             pc_util.write_oriented_bbox(obbs, os.path.join(dump_dir, '%06d_gt_bbox.ply'%(idx_beg+i)))
123 | 
124 |     # OPTIONALL, also dump prediction and gt details
125 |     if 'batch_pred_map_cls' in end_points:
126 |         for ii in range(batch_size):
127 |             fout = open(os.path.join(dump_dir, '%06d_pred_map_cls.txt'%(ii)), 'w')
128 |             for t in end_points['batch_pred_map_cls'][ii]:
129 |                 fout.write(str(t[0])+' ')
130 |                 fout.write(",".join([str(x) for x in list(t[1].flatten())]))
131 |                 fout.write(' '+str(t[2]))
132 |                 fout.write('\n')
133 |             fout.close()
134 |     if 'batch_gt_map_cls' in end_points:
135 |         for ii in range(batch_size):
136 |             fout = open(os.path.join(dump_dir, '%06d_gt_map_cls.txt'%(ii)), 'w')
137 |             for t in end_points['batch_gt_map_cls'][ii]:
138 |                 fout.write(str(t[0])+' ')
139 |                 fout.write(",".join([str(x) for x in list(t[1].flatten())]))
140 |                 fout.write('\n')
141 |             fout.close()
142 | 


--------------------------------------------------------------------------------
/models/loss_helper_boxnet.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | # 
  3 | # This source code is licensed under the MIT license found in the
  4 | # LICENSE file in the root directory of this source tree.
  5 | 
  6 | import torch
  7 | import torch.nn as nn
  8 | import numpy as np
  9 | import sys
 10 | import os
 11 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 12 | ROOT_DIR = os.path.dirname(BASE_DIR)
 13 | sys.path.append(os.path.join(ROOT_DIR, 'utils'))
 14 | from nn_distance import nn_distance, huber_loss
 15 | sys.path.append(BASE_DIR)
 16 | from loss_helper import compute_box_and_sem_cls_loss
 17 | 
 18 | OBJECTNESS_CLS_WEIGHTS = [0.2,0.8] # put larger weights on positive objectness
 19 | 
 20 | def compute_objectness_loss(end_points):
 21 |     """ Compute objectness loss for the proposals.
 22 | 
 23 |     Args:
 24 |         end_points: dict (read-only)
 25 | 
 26 |     Returns:
 27 |         objectness_loss: scalar Tensor
 28 |         objectness_label: (batch_size, num_seed) Tensor with value 0 or 1
 29 |         objectness_mask: (batch_size, num_seed) Tensor with value 0 or 1
 30 |         object_assignment: (batch_size, num_seed) Tensor with long int
 31 |             within [0,num_gt_object-1]
 32 |     """ 
 33 |     # Associate proposal and GT objects by point-to-point distances
 34 |     aggregated_vote_xyz = end_points['aggregated_vote_xyz']
 35 |     gt_center = end_points['center_label'][:,:,0:3]
 36 |     B = gt_center.shape[0]
 37 |     K = aggregated_vote_xyz.shape[1]
 38 |     K2 = gt_center.shape[1]
 39 |     dist1, ind1, dist2, _ = nn_distance(aggregated_vote_xyz, gt_center) # dist1: BxK, dist2: BxK2
 40 | 
 41 |     # Generate objectness label and mask
 42 |     # NOTE: Different from VoteNet, here we use seed label as objectness label.
 43 |     seed_inds = end_points['seed_inds'].long() # B,num_seed in [0,num_points-1]
 44 |     seed_gt_votes_mask = torch.gather(end_points['vote_label_mask'], 1, seed_inds)
 45 |     end_points['seed_labels'] = seed_gt_votes_mask
 46 |     aggregated_vote_inds = end_points['aggregated_vote_inds']
 47 |     objectness_label = torch.gather(end_points['seed_labels'], 1, aggregated_vote_inds.long()) # select (B,K) from (B,1024)
 48 |     objectness_mask = torch.ones((objectness_label.shape[0], objectness_label.shape[1])).cuda() # no ignore zone anymore
 49 | 
 50 |     # Compute objectness loss
 51 |     objectness_scores = end_points['objectness_scores']
 52 |     criterion = nn.CrossEntropyLoss(torch.Tensor(OBJECTNESS_CLS_WEIGHTS).cuda(), reduction='none')
 53 |     objectness_loss = criterion(objectness_scores.transpose(2,1), objectness_label)
 54 |     objectness_loss = torch.sum(objectness_loss * objectness_mask)/(torch.sum(objectness_mask)+1e-6)
 55 | 
 56 |     # Set assignment
 57 |     object_assignment = ind1 # (B,K) with values in 0,1,...,K2-1
 58 | 
 59 |     return objectness_loss, objectness_label, objectness_mask, object_assignment
 60 | 
 61 | 
 62 | def get_loss(end_points, config):
 63 |     """ Loss functions
 64 | 
 65 |     Args:
 66 |         end_points: dict
 67 |             {   
 68 |                 seed_xyz, seed_inds,
 69 |                 center,
 70 |                 heading_scores, heading_residuals_normalized,
 71 |                 size_scores, size_residuals_normalized,
 72 |                 sem_cls_scores, #seed_logits,#
 73 |                 center_label,
 74 |                 heading_class_label, heading_residual_label,
 75 |                 size_class_label, size_residual_label,
 76 |                 sem_cls_label,
 77 |                 box_label_mask,
 78 |                 vote_label, vote_label_mask
 79 |             }
 80 |         config: dataset config instance
 81 |     Returns:
 82 |         loss: pytorch scalar tensor
 83 |         end_points: dict
 84 |     """
 85 | 
 86 |     # Obj loss
 87 |     objectness_loss, objectness_label, objectness_mask, object_assignment = \
 88 |         compute_objectness_loss(end_points)
 89 |     end_points['objectness_loss'] = objectness_loss
 90 |     end_points['objectness_label'] = objectness_label
 91 |     end_points['objectness_mask'] = objectness_mask
 92 |     end_points['object_assignment'] = object_assignment
 93 |     total_num_proposal = objectness_label.shape[0]*objectness_label.shape[1]
 94 |     end_points['pos_ratio'] = \
 95 |         torch.sum(objectness_label.float().cuda())/float(total_num_proposal)
 96 |     end_points['neg_ratio'] = \
 97 |         torch.sum(objectness_mask.float())/float(total_num_proposal) - end_points['pos_ratio']
 98 | 
 99 |     # Box loss and sem cls loss
100 |     center_loss, heading_cls_loss, heading_reg_loss, size_cls_loss, size_reg_loss, sem_cls_loss = \
101 |         compute_box_and_sem_cls_loss(end_points, config)
102 |     end_points['center_loss'] = center_loss
103 |     end_points['heading_cls_loss'] = heading_cls_loss
104 |     end_points['heading_reg_loss'] = heading_reg_loss
105 |     end_points['size_cls_loss'] = size_cls_loss
106 |     end_points['size_reg_loss'] = size_reg_loss
107 |     end_points['sem_cls_loss'] = sem_cls_loss
108 |     box_loss = center_loss + 0.1*heading_cls_loss + heading_reg_loss + 0.1*size_cls_loss + size_reg_loss
109 |     end_points['box_loss'] = box_loss
110 | 
111 |     # Final loss function
112 |     loss = 0.5*objectness_loss + box_loss + 0.1*sem_cls_loss
113 |     loss *= 10
114 |     end_points['loss'] = loss
115 | 
116 |     # --------------------------------------------
117 |     # Some other statistics
118 |     obj_pred_val = torch.argmax(end_points['objectness_scores'], 2) # B,K
119 |     obj_acc = torch.sum((obj_pred_val==objectness_label.long()).float()*objectness_mask)/(torch.sum(objectness_mask)+1e-6)
120 |     end_points['obj_acc'] = obj_acc
121 | 
122 |     return loss, end_points
123 | 


--------------------------------------------------------------------------------
/models/proposal_module.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | # 
  3 | # This source code is licensed under the MIT license found in the
  4 | # LICENSE file in the root directory of this source tree.
  5 | 
  6 | import torch
  7 | import torch.nn as nn
  8 | import torch.nn.functional as F
  9 | import numpy as np
 10 | import os
 11 | import sys
 12 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 13 | ROOT_DIR = os.path.dirname(BASE_DIR)
 14 | sys.path.append(os.path.join(ROOT_DIR, 'pointnet2'))
 15 | from pointnet2_modules import PointnetSAModuleVotes
 16 | import pointnet2_utils
 17 | 
 18 | def decode_scores(net, end_points, num_class, num_heading_bin, num_size_cluster, mean_size_arr):
 19 |     net_transposed = net.transpose(2,1) # (batch_size, 1024, ..)
 20 |     batch_size = net_transposed.shape[0]
 21 |     num_proposal = net_transposed.shape[1]
 22 | 
 23 |     objectness_scores = net_transposed[:,:,0:2]
 24 |     end_points['objectness_scores'] = objectness_scores
 25 |     
 26 |     base_xyz = end_points['aggregated_vote_xyz'] # (batch_size, num_proposal, 3)
 27 |     center = base_xyz + net_transposed[:,:,2:5] # (batch_size, num_proposal, 3)
 28 |     end_points['center'] = center
 29 | 
 30 |     heading_scores = net_transposed[:,:,5:5+num_heading_bin]
 31 |     heading_residuals_normalized = net_transposed[:,:,5+num_heading_bin:5+num_heading_bin*2]
 32 |     end_points['heading_scores'] = heading_scores # Bxnum_proposalxnum_heading_bin
 33 |     end_points['heading_residuals_normalized'] = heading_residuals_normalized # Bxnum_proposalxnum_heading_bin (should be -1 to 1)
 34 |     end_points['heading_residuals'] = heading_residuals_normalized * (np.pi/num_heading_bin) # Bxnum_proposalxnum_heading_bin
 35 | 
 36 |     size_scores = net_transposed[:,:,5+num_heading_bin*2:5+num_heading_bin*2+num_size_cluster]
 37 |     size_residuals_normalized = net_transposed[:,:,5+num_heading_bin*2+num_size_cluster:5+num_heading_bin*2+num_size_cluster*4].view([batch_size, num_proposal, num_size_cluster, 3]) # Bxnum_proposalxnum_size_clusterx3
 38 |     end_points['size_scores'] = size_scores
 39 |     end_points['size_residuals_normalized'] = size_residuals_normalized
 40 |     end_points['size_residuals'] = size_residuals_normalized * torch.from_numpy(mean_size_arr.astype(np.float32)).cuda().unsqueeze(0).unsqueeze(0)
 41 | 
 42 |     sem_cls_scores = net_transposed[:,:,5+num_heading_bin*2+num_size_cluster*4:] # Bxnum_proposalx10
 43 |     end_points['sem_cls_scores'] = sem_cls_scores
 44 |     return end_points
 45 | 
 46 | 
 47 | class ProposalModule(nn.Module):
 48 |     def __init__(self, num_class, num_heading_bin, num_size_cluster, mean_size_arr, num_proposal, sampling, seed_feat_dim=256):
 49 |         super().__init__() 
 50 | 
 51 |         self.num_class = num_class
 52 |         self.num_heading_bin = num_heading_bin
 53 |         self.num_size_cluster = num_size_cluster
 54 |         self.mean_size_arr = mean_size_arr
 55 |         self.num_proposal = num_proposal
 56 |         self.sampling = sampling
 57 |         self.seed_feat_dim = seed_feat_dim
 58 | 
 59 |         # Vote clustering
 60 |         self.vote_aggregation = PointnetSAModuleVotes( 
 61 |                 npoint=self.num_proposal,
 62 |                 radius=0.3,
 63 |                 nsample=16,
 64 |                 mlp=[self.seed_feat_dim, 128, 128, 128],
 65 |                 use_xyz=True,
 66 |                 normalize_xyz=True
 67 |             )
 68 |     
 69 |         # Object proposal/detection
 70 |         # Objectness scores (2), center residual (3),
 71 |         # heading class+residual (num_heading_bin*2), size class+residual(num_size_cluster*4)
 72 |         self.conv1 = torch.nn.Conv1d(128,128,1)
 73 |         self.conv2 = torch.nn.Conv1d(128,128,1)
 74 |         self.conv3 = torch.nn.Conv1d(128,2+3+num_heading_bin*2+num_size_cluster*4+self.num_class,1)
 75 |         self.bn1 = torch.nn.BatchNorm1d(128)
 76 |         self.bn2 = torch.nn.BatchNorm1d(128)
 77 | 
 78 |     def forward(self, xyz, features, end_points):
 79 |         """
 80 |         Args:
 81 |             xyz: (B,K,3)
 82 |             features: (B,C,K)
 83 |         Returns:
 84 |             scores: (B,num_proposal,2+3+NH*2+NS*4) 
 85 |         """
 86 |         if self.sampling == 'vote_fps':
 87 |             # Farthest point sampling (FPS) on votes
 88 |             xyz, features, fps_inds = self.vote_aggregation(xyz, features)
 89 |             sample_inds = fps_inds
 90 |         elif self.sampling == 'seed_fps': 
 91 |             # FPS on seed and choose the votes corresponding to the seeds
 92 |             # This gets us a slightly better coverage of *object* votes than vote_fps (which tends to get more cluster votes)
 93 |             sample_inds = pointnet2_utils.furthest_point_sample(end_points['seed_xyz'], self.num_proposal)
 94 |             xyz, features, _ = self.vote_aggregation(xyz, features, sample_inds)
 95 |         elif self.sampling == 'random':
 96 |             # Random sampling from the votes
 97 |             num_seed = end_points['seed_xyz'].shape[1]
 98 |             batch_size = end_points['seed_xyz'].shape[0]
 99 |             sample_inds = torch.randint(0, num_seed, (batch_size, self.num_proposal), dtype=torch.int).cuda()
100 |             xyz, features, _ = self.vote_aggregation(xyz, features, sample_inds)
101 |         else:
102 |             log_string('Unknown sampling strategy: %s. Exiting!'%(self.sampling))
103 |             exit()
104 |         end_points['aggregated_vote_xyz'] = xyz # (batch_size, num_proposal, 3)
105 |         end_points['aggregated_vote_inds'] = sample_inds # (batch_size, num_proposal,) # should be 0,1,2,...,num_proposal
106 | 
107 |         # --------- PROPOSAL GENERATION ---------
108 |         net = F.relu(self.bn1(self.conv1(features))) 
109 |         net = F.relu(self.bn2(self.conv2(net))) 
110 |         net = self.conv3(net) # (batch_size, 2+3+num_heading_bin*2+num_size_cluster*4, num_proposal)
111 | 
112 |         end_points = decode_scores(net, end_points, self.num_class, self.num_heading_bin, self.num_size_cluster, self.mean_size_arr)
113 |         return end_points
114 | 
115 | if __name__=='__main__':
116 |     sys.path.append(os.path.join(ROOT_DIR, 'sunrgbd'))
117 |     from sunrgbd_detection_dataset import SunrgbdDetectionVotesDataset, DC
118 |     net = ProposalModule(DC.num_class, DC.num_heading_bin,
119 |         DC.num_size_cluster, DC.mean_size_arr,
120 |         128, 'seed_fps').cuda()
121 |     end_points = {'seed_xyz': torch.rand(8,1024,3).cuda()}
122 |     out = net(torch.rand(8,1024,3).cuda(), torch.rand(8,256,1024).cuda(), end_points)
123 |     for key in out:
124 |         print(key, out[key].shape)
125 | 


--------------------------------------------------------------------------------
/models/votenet.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | # 
  3 | # This source code is licensed under the MIT license found in the
  4 | # LICENSE file in the root directory of this source tree.
  5 | 
  6 | """ Deep hough voting network for 3D object detection in point clouds.
  7 | 
  8 | Author: Charles R. Qi and Or Litany
  9 | """
 10 | 
 11 | import torch
 12 | import torch.nn as nn
 13 | import numpy as np
 14 | import sys
 15 | import os
 16 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 17 | ROOT_DIR = os.path.dirname(BASE_DIR)
 18 | sys.path.append(BASE_DIR)
 19 | from backbone_module import Pointnet2Backbone
 20 | from voting_module import VotingModule
 21 | from proposal_module import ProposalModule
 22 | from dump_helper import dump_results
 23 | from loss_helper import get_loss
 24 | 
 25 | 
 26 | class VoteNet(nn.Module):
 27 |     r"""
 28 |         A deep neural network for 3D object detection with end-to-end optimizable hough voting.
 29 | 
 30 |         Parameters
 31 |         ----------
 32 |         num_class: int
 33 |             Number of semantics classes to predict over -- size of softmax classifier
 34 |         num_heading_bin: int
 35 |         num_size_cluster: int
 36 |         input_feature_dim: (default: 0)
 37 |             Input dim in the feature descriptor for each point.  If the point cloud is Nx9, this
 38 |             value should be 6 as in an Nx9 point cloud, 3 of the channels are xyz, and 6 are feature descriptors
 39 |         num_proposal: int (default: 128)
 40 |             Number of proposals/detections generated from the network. Each proposal is a 3D OBB with a semantic class.
 41 |         vote_factor: (default: 1)
 42 |             Number of votes generated from each seed point.
 43 |     """
 44 | 
 45 |     def __init__(self, num_class, num_heading_bin, num_size_cluster, mean_size_arr,
 46 |         input_feature_dim=0, num_proposal=128, vote_factor=1, sampling='vote_fps'):
 47 |         super().__init__()
 48 | 
 49 |         self.num_class = num_class
 50 |         self.num_heading_bin = num_heading_bin
 51 |         self.num_size_cluster = num_size_cluster
 52 |         self.mean_size_arr = mean_size_arr
 53 |         assert(mean_size_arr.shape[0] == self.num_size_cluster)
 54 |         self.input_feature_dim = input_feature_dim
 55 |         self.num_proposal = num_proposal
 56 |         self.vote_factor = vote_factor
 57 |         self.sampling=sampling
 58 | 
 59 |         # Backbone point feature learning
 60 |         self.backbone_net = Pointnet2Backbone(input_feature_dim=self.input_feature_dim)
 61 | 
 62 |         # Hough voting
 63 |         self.vgen = VotingModule(self.vote_factor, 256)
 64 | 
 65 |         # Vote aggregation and detection
 66 |         self.pnet = ProposalModule(num_class, num_heading_bin, num_size_cluster,
 67 |             mean_size_arr, num_proposal, sampling)
 68 | 
 69 |     def forward(self, inputs):
 70 |         """ Forward pass of the network
 71 | 
 72 |         Args:
 73 |             inputs: dict
 74 |                 {point_clouds}
 75 | 
 76 |                 point_clouds: Variable(torch.cuda.FloatTensor)
 77 |                     (B, N, 3 + input_channels) tensor
 78 |                     Point cloud to run predicts on
 79 |                     Each point in the point-cloud MUST
 80 |                     be formated as (x, y, z, features...)
 81 |         Returns:
 82 |             end_points: dict
 83 |         """
 84 |         end_points = {}
 85 |         batch_size = inputs['point_clouds'].shape[0]
 86 | 
 87 |         end_points = self.backbone_net(inputs['point_clouds'], end_points)
 88 |                 
 89 |         # --------- HOUGH VOTING ---------
 90 |         xyz = end_points['fp2_xyz']
 91 |         features = end_points['fp2_features']
 92 |         end_points['seed_inds'] = end_points['fp2_inds']
 93 |         end_points['seed_xyz'] = xyz
 94 |         end_points['seed_features'] = features
 95 |         
 96 |         xyz, features = self.vgen(xyz, features)
 97 |         features_norm = torch.norm(features, p=2, dim=1)
 98 |         features = features.div(features_norm.unsqueeze(1))
 99 |         end_points['vote_xyz'] = xyz
100 |         end_points['vote_features'] = features
101 | 
102 |         end_points = self.pnet(xyz, features, end_points)
103 | 
104 |         return end_points
105 | 
106 | 
107 | if __name__=='__main__':
108 |     sys.path.append(os.path.join(ROOT_DIR, 'sunrgbd'))
109 |     from sunrgbd_detection_dataset import SunrgbdDetectionVotesDataset, DC
110 |     from loss_helper import get_loss
111 | 
112 |     # Define model
113 |     model = VoteNet(10,12,10,np.random.random((10,3))).cuda()
114 |     
115 |     try:
116 |         # Define dataset
117 |         TRAIN_DATASET = SunrgbdDetectionVotesDataset('train', num_points=20000, use_v1=True)
118 | 
119 |         # Model forward pass
120 |         sample = TRAIN_DATASET[5]
121 |         inputs = {'point_clouds': torch.from_numpy(sample['point_clouds']).unsqueeze(0).cuda()}
122 |     except:
123 |         print('Dataset has not been prepared. Use a random sample.')
124 |         inputs = {'point_clouds': torch.rand((20000,3)).unsqueeze(0).cuda()}
125 | 
126 |     end_points = model(inputs)
127 |     for key in end_points:
128 |         print(key, end_points[key])
129 | 
130 |     try:
131 |         # Compute loss
132 |         for key in sample:
133 |             end_points[key] = torch.from_numpy(sample[key]).unsqueeze(0).cuda()
134 |         loss, end_points = get_loss(end_points, DC)
135 |         print('loss', loss)
136 |         end_points['point_clouds'] = inputs['point_clouds']
137 |         end_points['pred_mask'] = np.ones((1,128))
138 |         dump_results(end_points, 'tmp', DC)
139 |     except:
140 |         print('Dataset has not been prepared. Skip loss and dump.')
141 | 


--------------------------------------------------------------------------------
/models/votenet_SA2_denseaspp3_6.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | # 
  3 | # This source code is licensed under the MIT license found in the
  4 | # LICENSE file in the root directory of this source tree.
  5 | 
  6 | """ Deep hough voting network for 3D object detection in point clouds.
  7 | 
  8 | Author: Charles R. Qi and Or Litany
  9 | """
 10 | 
 11 | import torch
 12 | import torch.nn as nn
 13 | import numpy as np
 14 | import sys
 15 | import os
 16 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 17 | ROOT_DIR = os.path.dirname(BASE_DIR)
 18 | sys.path.append(BASE_DIR)
 19 | from backbone_module_SA2_denseaspp3_6 import Pointnet2Backbone
 20 | from voting_module import VotingModule
 21 | from proposal_module import ProposalModule
 22 | from dump_helper import dump_results
 23 | from loss_helper import get_loss
 24 | 
 25 | 
 26 | class VoteNet(nn.Module):
 27 |     r"""
 28 |         A deep neural network for 3D object detection with end-to-end optimizable hough voting.
 29 | 
 30 |         Parameters
 31 |         ----------
 32 |         num_class: int
 33 |             Number of semantics classes to predict over -- size of softmax classifier
 34 |         num_heading_bin: int
 35 |         num_size_cluster: int
 36 |         input_feature_dim: (default: 0)
 37 |             Input dim in the feature descriptor for each point.  If the point cloud is Nx9, this
 38 |             value should be 6 as in an Nx9 point cloud, 3 of the channels are xyz, and 6 are feature descriptors
 39 |         num_proposal: int (default: 128)
 40 |             Number of proposals/detections generated from the network. Each proposal is a 3D OBB with a semantic class.
 41 |         vote_factor: (default: 1)
 42 |             Number of votes generated from each seed point.
 43 |     """
 44 | 
 45 |     def __init__(self, num_class, num_heading_bin, num_size_cluster, mean_size_arr,
 46 |         input_feature_dim=0, num_proposal=128, vote_factor=1, sampling='vote_fps'):
 47 |         super().__init__()
 48 | 
 49 |         self.num_class = num_class
 50 |         self.num_heading_bin = num_heading_bin
 51 |         self.num_size_cluster = num_size_cluster
 52 |         self.mean_size_arr = mean_size_arr
 53 |         assert(mean_size_arr.shape[0] == self.num_size_cluster)
 54 |         self.input_feature_dim = input_feature_dim
 55 |         self.num_proposal = num_proposal
 56 |         self.vote_factor = vote_factor
 57 |         self.sampling=sampling
 58 | 
 59 | 
 60 |         # Backbone point feature learning
 61 |         self.backbone_net = Pointnet2Backbone(input_feature_dim=self.input_feature_dim)
 62 | 
 63 |         # Hough voting
 64 |         self.vgen = VotingModule(self.vote_factor, 256+256+256)
 65 | 
 66 |         # Vote aggregation and detection
 67 |         self.pnet = ProposalModule(num_class, num_heading_bin, num_size_cluster,
 68 |             mean_size_arr, num_proposal, sampling,seed_feat_dim=256+256+256)
 69 | 
 70 |     def forward(self, inputs):
 71 |         """ Forward pass of the network
 72 | 
 73 |         Args:
 74 |             inputs: dict
 75 |                 {point_clouds}
 76 | 
 77 |                 point_clouds: Variable(torch.cuda.FloatTensor)
 78 |                     (B, N, 3 + input_channels) tensor
 79 |                     Point cloud to run predicts on
 80 |                     Each point in the point-cloud MUST
 81 |                     be formated as (x, y, z, features...)
 82 |         Returns:
 83 |             end_points: dict
 84 |         """
 85 |         end_points = {}
 86 |         batch_size = inputs['point_clouds'].shape[0]
 87 | 
 88 |         end_points = self.backbone_net(inputs['point_clouds'], end_points)
 89 |                 
 90 |         # --------- HOUGH VOTING ---------
 91 |         xyz = end_points['fp2_xyz']
 92 |         features = end_points['fp2_features']
 93 |         end_points['seed_inds'] = end_points['fp2_inds']
 94 |         end_points['seed_xyz'] = xyz
 95 |         end_points['seed_features'] = features
 96 |         
 97 |         xyz, features = self.vgen(xyz, features)
 98 |         features_norm = torch.norm(features, p=2, dim=1)
 99 |         features = features.div(features_norm.unsqueeze(1))
100 |         end_points['vote_xyz'] = xyz
101 |         end_points['vote_features'] = features
102 | 
103 |         end_points = self.pnet(xyz, features, end_points)
104 | 
105 |         return end_points
106 | 
107 | 
108 | if __name__=='__main__':
109 |     sys.path.append(os.path.join(ROOT_DIR, 'sunrgbd'))
110 |     from sunrgbd_detection_dataset import SunrgbdDetectionVotesDataset, DC
111 |     from loss_helper import get_loss
112 | 
113 |     # Define model
114 |     model = VoteNet(10,12,10,np.random.random((10,3))).cuda()
115 |     
116 |     try:
117 |         # Define dataset
118 |         TRAIN_DATASET = SunrgbdDetectionVotesDataset('train', num_points=20000, use_v1=True)
119 | 
120 |         # Model forward pass
121 |         sample = TRAIN_DATASET[5]
122 |         inputs = {'point_clouds': torch.from_numpy(sample['point_clouds']).unsqueeze(0).cuda()}
123 |     except:
124 |         print('Dataset has not been prepared. Use a random sample.')
125 |         inputs = {'point_clouds': torch.rand((20000,3)).unsqueeze(0).cuda()}
126 | 
127 |     end_points = model(inputs)
128 |     for key in end_points:
129 |         print(key, end_points[key])
130 | 
131 |     try:
132 |         # Compute loss
133 |         for key in sample:
134 |             end_points[key] = torch.from_numpy(sample[key]).unsqueeze(0).cuda()
135 |         loss, end_points = get_loss(end_points, DC)
136 |         print('loss', loss)
137 |         end_points['point_clouds'] = inputs['point_clouds']
138 |         end_points['pred_mask'] = np.ones((1,128))
139 |         dump_results(end_points, 'tmp', DC)
140 |     except:
141 |         print('Dataset has not been prepared. Skip loss and dump.')
142 | 


--------------------------------------------------------------------------------
/models/votenet_SA2_denseaspp3_6_12.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | # 
  3 | # This source code is licensed under the MIT license found in the
  4 | # LICENSE file in the root directory of this source tree.
  5 | 
  6 | """ Deep hough voting network for 3D object detection in point clouds.
  7 | 
  8 | Author: Charles R. Qi and Or Litany
  9 | """
 10 | 
 11 | import torch
 12 | import torch.nn as nn
 13 | import numpy as np
 14 | import sys
 15 | import os
 16 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 17 | ROOT_DIR = os.path.dirname(BASE_DIR)
 18 | sys.path.append(BASE_DIR)
 19 | from backbone_module_SA2_denseaspp3_6_12 import Pointnet2Backbone
 20 | from voting_module import VotingModule
 21 | from proposal_module import ProposalModule
 22 | from dump_helper import dump_results
 23 | from loss_helper import get_loss
 24 | 
 25 | 
 26 | class VoteNet(nn.Module):
 27 |     r"""
 28 |         A deep neural network for 3D object detection with end-to-end optimizable hough voting.
 29 | 
 30 |         Parameters
 31 |         ----------
 32 |         num_class: int
 33 |             Number of semantics classes to predict over -- size of softmax classifier
 34 |         num_heading_bin: int
 35 |         num_size_cluster: int
 36 |         input_feature_dim: (default: 0)
 37 |             Input dim in the feature descriptor for each point.  If the point cloud is Nx9, this
 38 |             value should be 6 as in an Nx9 point cloud, 3 of the channels are xyz, and 6 are feature descriptors
 39 |         num_proposal: int (default: 128)
 40 |             Number of proposals/detections generated from the network. Each proposal is a 3D OBB with a semantic class.
 41 |         vote_factor: (default: 1)
 42 |             Number of votes generated from each seed point.
 43 |     """
 44 | 
 45 |     def __init__(self, num_class, num_heading_bin, num_size_cluster, mean_size_arr,
 46 |         input_feature_dim=0, num_proposal=128, vote_factor=1, sampling='vote_fps'):
 47 |         super().__init__()
 48 | 
 49 |         self.num_class = num_class
 50 |         self.num_heading_bin = num_heading_bin
 51 |         self.num_size_cluster = num_size_cluster
 52 |         self.mean_size_arr = mean_size_arr
 53 |         assert(mean_size_arr.shape[0] == self.num_size_cluster)
 54 |         self.input_feature_dim = input_feature_dim
 55 |         self.num_proposal = num_proposal
 56 |         self.vote_factor = vote_factor
 57 |         self.sampling=sampling
 58 | 
 59 |         # Backbone point feature learning
 60 |         self.backbone_net = Pointnet2Backbone(input_feature_dim=self.input_feature_dim)
 61 | 
 62 |         # Hough voting
 63 |         self.vgen = VotingModule(self.vote_factor, 256+256+256+256)
 64 | 
 65 |         # Vote aggregation and detection
 66 |         self.pnet = ProposalModule(num_class, num_heading_bin, num_size_cluster,
 67 |             mean_size_arr, num_proposal, sampling,seed_feat_dim=256+256+256+256)
 68 | 
 69 |     def forward(self, inputs):
 70 |         """ Forward pass of the network
 71 | 
 72 |         Args:
 73 |             inputs: dict
 74 |                 {point_clouds}
 75 | 
 76 |                 point_clouds: Variable(torch.cuda.FloatTensor)
 77 |                     (B, N, 3 + input_channels) tensor
 78 |                     Point cloud to run predicts on
 79 |                     Each point in the point-cloud MUST
 80 |                     be formated as (x, y, z, features...)
 81 |         Returns:
 82 |             end_points: dict
 83 |         """
 84 |         end_points = {}
 85 |         batch_size = inputs['point_clouds'].shape[0]
 86 | 
 87 |         end_points = self.backbone_net(inputs['point_clouds'], end_points)
 88 |                 
 89 |         # --------- HOUGH VOTING ---------
 90 |         xyz = end_points['fp2_xyz']
 91 |         features = end_points['fp2_features']
 92 |         end_points['seed_inds'] = end_points['fp2_inds']
 93 |         end_points['seed_xyz'] = xyz
 94 |         end_points['seed_features'] = features
 95 |         
 96 |         xyz, features = self.vgen(xyz, features)
 97 |         features_norm = torch.norm(features, p=2, dim=1)
 98 |         features = features.div(features_norm.unsqueeze(1))
 99 |         end_points['vote_xyz'] = xyz
100 |         end_points['vote_features'] = features
101 | 
102 |         end_points = self.pnet(xyz, features, end_points)
103 |         del xyz,features
104 | 
105 |         return end_points
106 | 
107 | 
108 | if __name__=='__main__':
109 |     sys.path.append(os.path.join(ROOT_DIR, 'sunrgbd'))
110 |     from sunrgbd_detection_dataset import SunrgbdDetectionVotesDataset, DC
111 |     from loss_helper import get_loss
112 | 
113 |     # Define model
114 |     model = VoteNet(10,12,10,np.random.random((10,3))).cuda()
115 |     
116 |     try:
117 |         # Define dataset
118 |         TRAIN_DATASET = SunrgbdDetectionVotesDataset('train', num_points=20000, use_v1=True)
119 | 
120 |         # Model forward pass
121 |         sample = TRAIN_DATASET[5]
122 |         inputs = {'point_clouds': torch.from_numpy(sample['point_clouds']).unsqueeze(0).cuda()}
123 |     except:
124 |         print('Dataset has not been prepared. Use a random sample.')
125 |         inputs = {'point_clouds': torch.rand((20000,3)).unsqueeze(0).cuda()}
126 | 
127 |     end_points = model(inputs)
128 |     for key in end_points:
129 |         print(key, end_points[key])
130 | 
131 |     try:
132 |         # Compute loss
133 |         for key in sample:
134 |             end_points[key] = torch.from_numpy(sample[key]).unsqueeze(0).cuda()
135 |         loss, end_points = get_loss(end_points, DC)
136 |         print('loss', loss)
137 |         end_points['point_clouds'] = inputs['point_clouds']
138 |         end_points['pred_mask'] = np.ones((1,128))
139 |         dump_results(end_points, 'tmp', DC)
140 |     except:
141 |         print('Dataset has not been prepared. Skip loss and dump.')
142 | 


--------------------------------------------------------------------------------
/models/votenet_enc_FP2_K8_G12_C3.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | # 
  3 | # This source code is licensed under the MIT license found in the
  4 | # LICENSE file in the root directory of this source tree.
  5 | 
  6 | """ Deep hough voting network for 3D object detection in point clouds.
  7 | 
  8 | Author: Charles R. Qi and Or Litany
  9 | """
 10 | 
 11 | import torch
 12 | import torch.nn as nn
 13 | import numpy as np
 14 | import sys
 15 | import os
 16 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 17 | ROOT_DIR = os.path.dirname(BASE_DIR)
 18 | sys.path.append(BASE_DIR)
 19 | from backbone_module_enc_FP2_K8_G12_C3  import Pointnet2Backbone
 20 | from voting_module import VotingModule
 21 | from proposal_module import ProposalModule
 22 | from dump_helper import dump_results
 23 | from loss_helper import get_loss
 24 | 
 25 | 
 26 | class VoteNet(nn.Module):
 27 |     r"""
 28 |         A deep neural network for 3D object detection with end-to-end optimizable hough voting.
 29 | 
 30 |         Parameters
 31 |         ----------
 32 |         num_class: int
 33 |             Number of semantics classes to predict over -- size of softmax classifier
 34 |         num_heading_bin: int
 35 |         num_size_cluster: int
 36 |         input_feature_dim: (default: 0)
 37 |             Input dim in the feature descriptor for each point.  If the point cloud is Nx9, this
 38 |             value should be 6 as in an Nx9 point cloud, 3 of the channels are xyz, and 6 are feature descriptors
 39 |         num_proposal: int (default: 128)
 40 |             Number of proposals/detections generated from the network. Each proposal is a 3D OBB with a semantic class.
 41 |         vote_factor: (default: 1)
 42 |             Number of votes generated from each seed point.
 43 |     """
 44 | 
 45 |     def __init__(self, num_class, num_heading_bin, num_size_cluster, mean_size_arr,
 46 |         input_feature_dim=0, num_proposal=128, vote_factor=1, sampling='vote_fps'):
 47 |         super().__init__()
 48 | 
 49 |         self.num_class = num_class
 50 |         self.num_heading_bin = num_heading_bin
 51 |         self.num_size_cluster = num_size_cluster
 52 |         self.mean_size_arr = mean_size_arr
 53 |         assert(mean_size_arr.shape[0] == self.num_size_cluster)
 54 |         self.input_feature_dim = input_feature_dim
 55 |         self.num_proposal = num_proposal
 56 |         self.vote_factor = vote_factor
 57 |         self.sampling=sampling
 58 | 
 59 |         # Backbone point feature learning
 60 |         self.backbone_net = Pointnet2Backbone(input_feature_dim=self.input_feature_dim)
 61 | 
 62 |         # Hough voting
 63 |         self.vgen = VotingModule(self.vote_factor, 256+256+256)
 64 | 
 65 |         # Vote aggregation and detection
 66 |         self.pnet = ProposalModule(num_class, num_heading_bin, num_size_cluster,
 67 |             mean_size_arr, num_proposal, sampling, seed_feat_dim=256+256+256)
 68 | 
 69 |     def forward(self, inputs):
 70 |         """ Forward pass of the network
 71 | 
 72 |         Args:
 73 |             inputs: dict
 74 |                 {point_clouds}
 75 | 
 76 |                 point_clouds: Variable(torch.cuda.FloatTensor)
 77 |                     (B, N, 3 + input_channels) tensor
 78 |                     Point cloud to run predicts on
 79 |                     Each point in the point-cloud MUST
 80 |                     be formated as (x, y, z, features...)
 81 |         Returns:
 82 |             end_points: dict
 83 |         """
 84 |         end_points = {}
 85 |         batch_size = inputs['point_clouds'].shape[0]
 86 | 
 87 |         end_points = self.backbone_net(inputs['point_clouds'], end_points)
 88 |                 
 89 |         # --------- HOUGH VOTING ---------
 90 |         xyz = end_points['fp2_xyz']
 91 |         features = end_points['fp2_features']
 92 |         end_points['seed_inds'] = end_points['fp2_inds']
 93 |         end_points['seed_xyz'] = xyz
 94 |         end_points['seed_features'] = features
 95 |         
 96 |         xyz, features = self.vgen(xyz, features)
 97 |         features_norm = torch.norm(features, p=2, dim=1)
 98 |         features = features.div(features_norm.unsqueeze(1))
 99 |         end_points['vote_xyz'] = xyz
100 |         end_points['vote_features'] = features
101 | 
102 |         end_points = self.pnet(xyz, features, end_points)
103 | 
104 |         return end_points
105 | 
106 | 
107 | if __name__=='__main__':
108 |     sys.path.append(os.path.join(ROOT_DIR, 'sunrgbd'))
109 |     from sunrgbd_detection_dataset import SunrgbdDetectionVotesDataset, DC
110 |     from loss_helper import get_loss
111 | 
112 |     # Define model
113 |     model = VoteNet(10,12,10,np.random.random((10,3))).cuda()
114 |     
115 |     try:
116 |         # Define dataset
117 |         TRAIN_DATASET = SunrgbdDetectionVotesDataset('train', num_points=20000, use_v1=True)
118 | 
119 |         # Model forward pass
120 |         sample = TRAIN_DATASET[5]
121 |         inputs = {'point_clouds': torch.from_numpy(sample['point_clouds']).unsqueeze(0).cuda()}
122 |     except:
123 |         print('Dataset has not been prepared. Use a random sample.')
124 |         inputs = {'point_clouds': torch.rand((20000,3)).unsqueeze(0).cuda()}
125 | 
126 |     end_points = model(inputs)
127 |     for key in end_points:
128 |         print(key, end_points[key])
129 | 
130 |     try:
131 |         # Compute loss
132 |         for key in sample:
133 |             end_points[key] = torch.from_numpy(sample[key]).unsqueeze(0).cuda()
134 |         loss, end_points = get_loss(end_points, DC)
135 |         print('loss', loss)
136 |         end_points['point_clouds'] = inputs['point_clouds']
137 |         end_points['pred_mask'] = np.ones((1,128))
138 |         dump_results(end_points, 'tmp', DC)
139 |     except:
140 |         print('Dataset has not been prepared. Skip loss and dump.')
141 | 


--------------------------------------------------------------------------------
/models/votenet_enc_complex_FP2_K8_G12_C3.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | # 
  3 | # This source code is licensed under the MIT license found in the
  4 | # LICENSE file in the root directory of this source tree.
  5 | 
  6 | """ Deep hough voting network for 3D object detection in point clouds.
  7 | 
  8 | Author: Charles R. Qi and Or Litany
  9 | """
 10 | 
 11 | import torch
 12 | import torch.nn as nn
 13 | import numpy as np
 14 | import sys
 15 | import os
 16 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 17 | ROOT_DIR = os.path.dirname(BASE_DIR)
 18 | sys.path.append(BASE_DIR)
 19 | from backbone_module_enc_complex_FP2_K8_G12_C3  import Pointnet2Backbone
 20 | from voting_module import VotingModule
 21 | from proposal_module import ProposalModule
 22 | from dump_helper import dump_results
 23 | from loss_helper import get_loss
 24 | 
 25 | 
 26 | class VoteNet(nn.Module):
 27 |     r"""
 28 |         A deep neural network for 3D object detection with end-to-end optimizable hough voting.
 29 | 
 30 |         Parameters
 31 |         ----------
 32 |         num_class: int
 33 |             Number of semantics classes to predict over -- size of softmax classifier
 34 |         num_heading_bin: int
 35 |         num_size_cluster: int
 36 |         input_feature_dim: (default: 0)
 37 |             Input dim in the feature descriptor for each point.  If the point cloud is Nx9, this
 38 |             value should be 6 as in an Nx9 point cloud, 3 of the channels are xyz, and 6 are feature descriptors
 39 |         num_proposal: int (default: 128)
 40 |             Number of proposals/detections generated from the network. Each proposal is a 3D OBB with a semantic class.
 41 |         vote_factor: (default: 1)
 42 |             Number of votes generated from each seed point.
 43 |     """
 44 | 
 45 |     def __init__(self, num_class, num_heading_bin, num_size_cluster, mean_size_arr,
 46 |         input_feature_dim=0, num_proposal=128, vote_factor=1, sampling='vote_fps'):
 47 |         super().__init__()
 48 | 
 49 |         self.num_class = num_class
 50 |         self.num_heading_bin = num_heading_bin
 51 |         self.num_size_cluster = num_size_cluster
 52 |         self.mean_size_arr = mean_size_arr
 53 |         assert(mean_size_arr.shape[0] == self.num_size_cluster)
 54 |         self.input_feature_dim = input_feature_dim
 55 |         self.num_proposal = num_proposal
 56 |         self.vote_factor = vote_factor
 57 |         self.sampling=sampling
 58 | 
 59 |         # Backbone point feature learning
 60 |         self.backbone_net = Pointnet2Backbone(input_feature_dim=self.input_feature_dim)
 61 | 
 62 |         # Hough voting
 63 |         self.vgen = VotingModule(self.vote_factor, 256+256+256)
 64 | 
 65 |         # Vote aggregation and detection
 66 |         self.pnet = ProposalModule(num_class, num_heading_bin, num_size_cluster,
 67 |             mean_size_arr, num_proposal, sampling, seed_feat_dim=256+256+256)
 68 | 
 69 |     def forward(self, inputs):
 70 |         """ Forward pass of the network
 71 | 
 72 |         Args:
 73 |             inputs: dict
 74 |                 {point_clouds}
 75 | 
 76 |                 point_clouds: Variable(torch.cuda.FloatTensor)
 77 |                     (B, N, 3 + input_channels) tensor
 78 |                     Point cloud to run predicts on
 79 |                     Each point in the point-cloud MUST
 80 |                     be formated as (x, y, z, features...)
 81 |         Returns:
 82 |             end_points: dict
 83 |         """
 84 |         end_points = {}
 85 |         batch_size = inputs['point_clouds'].shape[0]
 86 | 
 87 |         end_points = self.backbone_net(inputs['point_clouds'], end_points)
 88 |                 
 89 |         # --------- HOUGH VOTING ---------
 90 |         xyz = end_points['fp2_xyz']
 91 |         features = end_points['fp2_features']
 92 |         end_points['seed_inds'] = end_points['fp2_inds']
 93 |         end_points['seed_xyz'] = xyz
 94 |         end_points['seed_features'] = features
 95 |         
 96 |         xyz, features = self.vgen(xyz, features)
 97 |         features_norm = torch.norm(features, p=2, dim=1)
 98 |         features = features.div(features_norm.unsqueeze(1))
 99 |         end_points['vote_xyz'] = xyz
100 |         end_points['vote_features'] = features
101 | 
102 |         end_points = self.pnet(xyz, features, end_points)
103 | 
104 |         return end_points
105 | 
106 | 
107 | if __name__=='__main__':
108 |     sys.path.append(os.path.join(ROOT_DIR, 'sunrgbd'))
109 |     from sunrgbd_detection_dataset import SunrgbdDetectionVotesDataset, DC
110 |     from loss_helper import get_loss
111 | 
112 |     # Define model
113 |     model = VoteNet(10,12,10,np.random.random((10,3))).cuda()
114 |     
115 |     try:
116 |         # Define dataset
117 |         TRAIN_DATASET = SunrgbdDetectionVotesDataset('train', num_points=20000, use_v1=True)
118 | 
119 |         # Model forward pass
120 |         sample = TRAIN_DATASET[5]
121 |         inputs = {'point_clouds': torch.from_numpy(sample['point_clouds']).unsqueeze(0).cuda()}
122 |     except:
123 |         print('Dataset has not been prepared. Use a random sample.')
124 |         inputs = {'point_clouds': torch.rand((20000,3)).unsqueeze(0).cuda()}
125 | 
126 |     end_points = model(inputs)
127 |     for key in end_points:
128 |         print(key, end_points[key])
129 | 
130 |     try:
131 |         # Compute loss
132 |         for key in sample:
133 |             end_points[key] = torch.from_numpy(sample[key]).unsqueeze(0).cuda()
134 |         loss, end_points = get_loss(end_points, DC)
135 |         print('loss', loss)
136 |         end_points['point_clouds'] = inputs['point_clouds']
137 |         end_points['pred_mask'] = np.ones((1,128))
138 |         dump_results(end_points, 'tmp', DC)
139 |     except:
140 |         print('Dataset has not been prepared. Skip loss and dump.')
141 | 


--------------------------------------------------------------------------------
/models/voting_module.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | ''' Voting module: generate votes from XYZ and features of seed points.
 7 | 
 8 | Date: July, 2019
 9 | Author: Charles R. Qi and Or Litany
10 | '''
11 | 
12 | import torch
13 | import torch.nn as nn
14 | import torch.nn.functional as F
15 | 
16 | class VotingModule(nn.Module):
17 |     def __init__(self, vote_factor, seed_feature_dim):
18 |         """ Votes generation from seed point features.
19 | 
20 |         Args:
21 |             vote_facotr: int
22 |                 number of votes generated from each seed point
23 |             seed_feature_dim: int
24 |                 number of channels of seed point features
25 |             vote_feature_dim: int
26 |                 number of channels of vote features
27 |         """
28 |         super().__init__()
29 |         self.vote_factor = vote_factor
30 |         self.in_dim = seed_feature_dim
31 |         self.out_dim = self.in_dim # due to residual feature, in_dim has to be == out_dim
32 |         self.conv1 = torch.nn.Conv1d(self.in_dim, self.in_dim, 1)
33 |         self.conv2 = torch.nn.Conv1d(self.in_dim, self.in_dim, 1)
34 |         self.conv3 = torch.nn.Conv1d(self.in_dim, (3+self.out_dim) * self.vote_factor, 1)
35 |         self.bn1 = torch.nn.BatchNorm1d(self.in_dim)
36 |         self.bn2 = torch.nn.BatchNorm1d(self.in_dim)
37 |         
38 |     def forward(self, seed_xyz, seed_features):
39 |         """ Forward pass.
40 | 
41 |         Arguments:
42 |             seed_xyz: (batch_size, num_seed, 3) Pytorch tensor
43 |             seed_features: (batch_size, feature_dim, num_seed) Pytorch tensor
44 |         Returns:
45 |             vote_xyz: (batch_size, num_seed*vote_factor, 3)
46 |             vote_features: (batch_size, vote_feature_dim, num_seed*vote_factor)
47 |         """
48 |         batch_size = seed_xyz.shape[0]
49 |         num_seed = seed_xyz.shape[1]
50 |         num_vote = num_seed*self.vote_factor
51 |         net = F.relu(self.bn1(self.conv1(seed_features))) 
52 |         net = F.relu(self.bn2(self.conv2(net))) 
53 |         net = self.conv3(net) # (batch_size, (3+out_dim)*vote_factor, num_seed)
54 |                 
55 |         net = net.transpose(2,1).view(batch_size, num_seed, self.vote_factor, 3+self.out_dim)
56 |         offset = net[:,:,:,0:3]
57 |         vote_xyz = seed_xyz.unsqueeze(2) + offset
58 |         vote_xyz = vote_xyz.contiguous().view(batch_size, num_vote, 3)
59 |         
60 |         residual_features = net[:,:,:,3:] # (batch_size, num_seed, vote_factor, out_dim)
61 |         vote_features = seed_features.transpose(2,1).unsqueeze(2) + residual_features
62 |         vote_features = vote_features.contiguous().view(batch_size, num_vote, self.out_dim)
63 |         vote_features = vote_features.transpose(2,1).contiguous()
64 |         
65 |         return vote_xyz, vote_features
66 |  
67 | if __name__=='__main__':
68 |     net = VotingModule(2, 256).cuda()
69 |     xyz, features = net(torch.rand(8,1024,3).cuda(), torch.rand(8,256,1024).cuda())
70 |     print('xyz', xyz.shape)
71 |     print('features', features.shape)
72 | 


--------------------------------------------------------------------------------
/pointnet2/_ext_src/include/ball_query.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates.
 2 | // 
 3 | // This source code is licensed under the MIT license found in the
 4 | // LICENSE file in the root directory of this source tree.
 5 | 
 6 | #pragma once
 7 | #include <torch/extension.h>
 8 | 
 9 | at::Tensor ball_query(at::Tensor new_xyz, at::Tensor xyz, const float radius,
10 |                       const int nsample);
11 | 


--------------------------------------------------------------------------------
/pointnet2/_ext_src/include/cuda_utils.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates.
 2 | // 
 3 | // This source code is licensed under the MIT license found in the
 4 | // LICENSE file in the root directory of this source tree.
 5 | 
 6 | #ifndef _CUDA_UTILS_H
 7 | #define _CUDA_UTILS_H
 8 | 
 9 | #include <ATen/ATen.h>
10 | #include <ATen/cuda/CUDAContext.h>
11 | #include <cmath>
12 | 
13 | #include <cuda.h>
14 | #include <cuda_runtime.h>
15 | 
16 | #include <vector>
17 | 
18 | #define TOTAL_THREADS 512
19 | 
20 | inline int opt_n_threads(int work_size) {
21 |   const int pow_2 = std::log(static_cast<double>(work_size)) / std::log(2.0);
22 | 
23 |   return max(min(1 << pow_2, TOTAL_THREADS), 1);
24 | }
25 | 
26 | inline dim3 opt_block_config(int x, int y) {
27 |   const int x_threads = opt_n_threads(x);
28 |   const int y_threads =
29 |       max(min(opt_n_threads(y), TOTAL_THREADS / x_threads), 1);
30 |   dim3 block_config(x_threads, y_threads, 1);
31 | 
32 |   return block_config;
33 | }
34 | 
35 | #define CUDA_CHECK_ERRORS()                                           \
36 |   do {                                                                \
37 |     cudaError_t err = cudaGetLastError();                             \
38 |     if (cudaSuccess != err) {                                         \
39 |       fprintf(stderr, "CUDA kernel failed : %s\n%s at L:%d in %s\n",  \
40 |               cudaGetErrorString(err), __PRETTY_FUNCTION__, __LINE__, \
41 |               __FILE__);                                              \
42 |       exit(-1);                                                       \
43 |     }                                                                 \
44 |   } while (0)
45 | 
46 | #endif
47 | 


--------------------------------------------------------------------------------
/pointnet2/_ext_src/include/group_points.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates.
 2 | // 
 3 | // This source code is licensed under the MIT license found in the
 4 | // LICENSE file in the root directory of this source tree.
 5 | 
 6 | #pragma once
 7 | #include <torch/extension.h>
 8 | 
 9 | at::Tensor group_points(at::Tensor points, at::Tensor idx);
10 | at::Tensor group_points_grad(at::Tensor grad_out, at::Tensor idx, const int n);
11 | 


--------------------------------------------------------------------------------
/pointnet2/_ext_src/include/interpolate.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates.
 2 | // 
 3 | // This source code is licensed under the MIT license found in the
 4 | // LICENSE file in the root directory of this source tree.
 5 | 
 6 | #pragma once
 7 | 
 8 | #include <torch/extension.h>
 9 | #include <vector>
10 | 
11 | std::vector<at::Tensor> three_nn(at::Tensor unknowns, at::Tensor knows);
12 | at::Tensor three_interpolate(at::Tensor points, at::Tensor idx,
13 |                              at::Tensor weight);
14 | at::Tensor three_interpolate_grad(at::Tensor grad_out, at::Tensor idx,
15 |                                   at::Tensor weight, const int m);
16 | 


--------------------------------------------------------------------------------
/pointnet2/_ext_src/include/sampling.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates.
 2 | // 
 3 | // This source code is licensed under the MIT license found in the
 4 | // LICENSE file in the root directory of this source tree.
 5 | 
 6 | #pragma once
 7 | #include <torch/extension.h>
 8 | 
 9 | at::Tensor gather_points(at::Tensor points, at::Tensor idx);
10 | at::Tensor gather_points_grad(at::Tensor grad_out, at::Tensor idx, const int n);
11 | at::Tensor furthest_point_sampling(at::Tensor points, const int nsamples);
12 | 


--------------------------------------------------------------------------------
/pointnet2/_ext_src/include/utils.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates.
 2 | // 
 3 | // This source code is licensed under the MIT license found in the
 4 | // LICENSE file in the root directory of this source tree.
 5 | 
 6 | #pragma once
 7 | #include <ATen/cuda/CUDAContext.h>
 8 | #include <torch/extension.h>
 9 | 
10 | #define CHECK_CUDA(x)                                          \
11 |   do {                                                         \
12 |     AT_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor"); \
13 |   } while (0)
14 | 
15 | #define CHECK_CONTIGUOUS(x)                                         \
16 |   do {                                                              \
17 |     AT_CHECK(x.is_contiguous(), #x " must be a contiguous tensor"); \
18 |   } while (0)
19 | 
20 | #define CHECK_IS_INT(x)                              \
21 |   do {                                               \
22 |     AT_CHECK(x.scalar_type() == at::ScalarType::Int, \
23 |              #x " must be an int tensor");           \
24 |   } while (0)
25 | 
26 | #define CHECK_IS_FLOAT(x)                              \
27 |   do {                                                 \
28 |     AT_CHECK(x.scalar_type() == at::ScalarType::Float, \
29 |              #x " must be a float tensor");            \
30 |   } while (0)
31 | 


--------------------------------------------------------------------------------
/pointnet2/_ext_src/src/ball_query.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates.
 2 | // 
 3 | // This source code is licensed under the MIT license found in the
 4 | // LICENSE file in the root directory of this source tree.
 5 | 
 6 | #include "ball_query.h"
 7 | #include "utils.h"
 8 | 
 9 | void query_ball_point_kernel_wrapper(int b, int n, int m, float radius,
10 |                                      int nsample, const float *new_xyz,
11 |                                      const float *xyz, int *idx);
12 | 
13 | at::Tensor ball_query(at::Tensor new_xyz, at::Tensor xyz, const float radius,
14 |                       const int nsample) {
15 |   CHECK_CONTIGUOUS(new_xyz);
16 |   CHECK_CONTIGUOUS(xyz);
17 |   CHECK_IS_FLOAT(new_xyz);
18 |   CHECK_IS_FLOAT(xyz);
19 | 
20 |   if (new_xyz.type().is_cuda()) {
21 |     CHECK_CUDA(xyz);
22 |   }
23 | 
24 |   at::Tensor idx =
25 |       torch::zeros({new_xyz.size(0), new_xyz.size(1), nsample},
26 |                    at::device(new_xyz.device()).dtype(at::ScalarType::Int));
27 | 
28 |   if (new_xyz.type().is_cuda()) {
29 |     query_ball_point_kernel_wrapper(xyz.size(0), xyz.size(1), new_xyz.size(1),
30 |                                     radius, nsample, new_xyz.data<float>(),
31 |                                     xyz.data<float>(), idx.data<int>());
32 |   } else {
33 |     AT_CHECK(false, "CPU not supported");
34 |   }
35 | 
36 |   return idx;
37 | }
38 | 


--------------------------------------------------------------------------------
/pointnet2/_ext_src/src/ball_query_gpu.cu:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates.
 2 | // 
 3 | // This source code is licensed under the MIT license found in the
 4 | // LICENSE file in the root directory of this source tree.
 5 | 
 6 | #include <math.h>
 7 | #include <stdio.h>
 8 | #include <stdlib.h>
 9 | 
10 | #include "cuda_utils.h"
11 | 
12 | // input: new_xyz(b, m, 3) xyz(b, n, 3)
13 | // output: idx(b, m, nsample)
14 | __global__ void query_ball_point_kernel(int b, int n, int m, float radius,
15 |                                         int nsample,
16 |                                         const float *__restrict__ new_xyz,
17 |                                         const float *__restrict__ xyz,
18 |                                         int *__restrict__ idx) {
19 |   int batch_index = blockIdx.x;
20 |   xyz += batch_index * n * 3;
21 |   new_xyz += batch_index * m * 3;
22 |   idx += m * nsample * batch_index;
23 | 
24 |   int index = threadIdx.x;
25 |   int stride = blockDim.x;
26 | 
27 |   float radius2 = radius * radius;
28 |   for (int j = index; j < m; j += stride) {
29 |     float new_x = new_xyz[j * 3 + 0];
30 |     float new_y = new_xyz[j * 3 + 1];
31 |     float new_z = new_xyz[j * 3 + 2];
32 |     for (int k = 0, cnt = 0; k < n && cnt < nsample; ++k) {
33 |       float x = xyz[k * 3 + 0];
34 |       float y = xyz[k * 3 + 1];
35 |       float z = xyz[k * 3 + 2];
36 |       float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) +
37 |                  (new_z - z) * (new_z - z);
38 |       if (d2 < radius2) {
39 |         if (cnt == 0) {
40 |           for (int l = 0; l < nsample; ++l) {
41 |             idx[j * nsample + l] = k;
42 |           }
43 |         }
44 |         idx[j * nsample + cnt] = k;
45 |         ++cnt;
46 |       }
47 |     }
48 |   }
49 | }
50 | 
51 | void query_ball_point_kernel_wrapper(int b, int n, int m, float radius,
52 |                                      int nsample, const float *new_xyz,
53 |                                      const float *xyz, int *idx) {
54 |   cudaStream_t stream = at::cuda::getCurrentCUDAStream();
55 |   query_ball_point_kernel<<<b, opt_n_threads(m), 0, stream>>>(
56 |       b, n, m, radius, nsample, new_xyz, xyz, idx);
57 | 
58 |   CUDA_CHECK_ERRORS();
59 | }
60 | 


--------------------------------------------------------------------------------
/pointnet2/_ext_src/src/bindings.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates.
 2 | // 
 3 | // This source code is licensed under the MIT license found in the
 4 | // LICENSE file in the root directory of this source tree.
 5 | 
 6 | #include "ball_query.h"
 7 | #include "group_points.h"
 8 | #include "interpolate.h"
 9 | #include "sampling.h"
10 | 
11 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
12 |   m.def("gather_points", &gather_points);
13 |   m.def("gather_points_grad", &gather_points_grad);
14 |   m.def("furthest_point_sampling", &furthest_point_sampling);
15 | 
16 |   m.def("three_nn", &three_nn);
17 |   m.def("three_interpolate", &three_interpolate);
18 |   m.def("three_interpolate_grad", &three_interpolate_grad);
19 | 
20 |   m.def("ball_query", &ball_query);
21 | 
22 |   m.def("group_points", &group_points);
23 |   m.def("group_points_grad", &group_points_grad);
24 | }
25 | 


--------------------------------------------------------------------------------
/pointnet2/_ext_src/src/group_points.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates.
 2 | // 
 3 | // This source code is licensed under the MIT license found in the
 4 | // LICENSE file in the root directory of this source tree.
 5 | 
 6 | #include "group_points.h"
 7 | #include "utils.h"
 8 | 
 9 | void group_points_kernel_wrapper(int b, int c, int n, int npoints, int nsample,
10 |                                  const float *points, const int *idx,
11 |                                  float *out);
12 | 
13 | void group_points_grad_kernel_wrapper(int b, int c, int n, int npoints,
14 |                                       int nsample, const float *grad_out,
15 |                                       const int *idx, float *grad_points);
16 | 
17 | at::Tensor group_points(at::Tensor points, at::Tensor idx) {
18 |   CHECK_CONTIGUOUS(points);
19 |   CHECK_CONTIGUOUS(idx);
20 |   CHECK_IS_FLOAT(points);
21 |   CHECK_IS_INT(idx);
22 | 
23 |   if (points.type().is_cuda()) {
24 |     CHECK_CUDA(idx);
25 |   }
26 | 
27 |   at::Tensor output =
28 |       torch::zeros({points.size(0), points.size(1), idx.size(1), idx.size(2)},
29 |                    at::device(points.device()).dtype(at::ScalarType::Float));
30 | 
31 |   if (points.type().is_cuda()) {
32 |     group_points_kernel_wrapper(points.size(0), points.size(1), points.size(2),
33 |                                 idx.size(1), idx.size(2), points.data<float>(),
34 |                                 idx.data<int>(), output.data<float>());
35 |   } else {
36 |     AT_CHECK(false, "CPU not supported");
37 |   }
38 | 
39 |   return output;
40 | }
41 | 
42 | at::Tensor group_points_grad(at::Tensor grad_out, at::Tensor idx, const int n) {
43 |   CHECK_CONTIGUOUS(grad_out);
44 |   CHECK_CONTIGUOUS(idx);
45 |   CHECK_IS_FLOAT(grad_out);
46 |   CHECK_IS_INT(idx);
47 | 
48 |   if (grad_out.type().is_cuda()) {
49 |     CHECK_CUDA(idx);
50 |   }
51 | 
52 |   at::Tensor output =
53 |       torch::zeros({grad_out.size(0), grad_out.size(1), n},
54 |                    at::device(grad_out.device()).dtype(at::ScalarType::Float));
55 | 
56 |   if (grad_out.type().is_cuda()) {
57 |     group_points_grad_kernel_wrapper(
58 |         grad_out.size(0), grad_out.size(1), n, idx.size(1), idx.size(2),
59 |         grad_out.data<float>(), idx.data<int>(), output.data<float>());
60 |   } else {
61 |     AT_CHECK(false, "CPU not supported");
62 |   }
63 | 
64 |   return output;
65 | }
66 | 


--------------------------------------------------------------------------------
/pointnet2/_ext_src/src/group_points_gpu.cu:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates.
 2 | // 
 3 | // This source code is licensed under the MIT license found in the
 4 | // LICENSE file in the root directory of this source tree.
 5 | 
 6 | #include <stdio.h>
 7 | #include <stdlib.h>
 8 | 
 9 | #include "cuda_utils.h"
10 | 
11 | // input: points(b, c, n) idx(b, npoints, nsample)
12 | // output: out(b, c, npoints, nsample)
13 | __global__ void group_points_kernel(int b, int c, int n, int npoints,
14 |                                     int nsample,
15 |                                     const float *__restrict__ points,
16 |                                     const int *__restrict__ idx,
17 |                                     float *__restrict__ out) {
18 |   int batch_index = blockIdx.x;
19 |   points += batch_index * n * c;
20 |   idx += batch_index * npoints * nsample;
21 |   out += batch_index * npoints * nsample * c;
22 | 
23 |   const int index = threadIdx.y * blockDim.x + threadIdx.x;
24 |   const int stride = blockDim.y * blockDim.x;
25 |   for (int i = index; i < c * npoints; i += stride) {
26 |     const int l = i / npoints;
27 |     const int j = i % npoints;
28 |     for (int k = 0; k < nsample; ++k) {
29 |       int ii = idx[j * nsample + k];
30 |       out[(l * npoints + j) * nsample + k] = points[l * n + ii];
31 |     }
32 |   }
33 | }
34 | 
35 | void group_points_kernel_wrapper(int b, int c, int n, int npoints, int nsample,
36 |                                  const float *points, const int *idx,
37 |                                  float *out) {
38 |   cudaStream_t stream = at::cuda::getCurrentCUDAStream();
39 | 
40 |   group_points_kernel<<<b, opt_block_config(npoints, c), 0, stream>>>(
41 |       b, c, n, npoints, nsample, points, idx, out);
42 | 
43 |   CUDA_CHECK_ERRORS();
44 | }
45 | 
46 | // input: grad_out(b, c, npoints, nsample), idx(b, npoints, nsample)
47 | // output: grad_points(b, c, n)
48 | __global__ void group_points_grad_kernel(int b, int c, int n, int npoints,
49 |                                          int nsample,
50 |                                          const float *__restrict__ grad_out,
51 |                                          const int *__restrict__ idx,
52 |                                          float *__restrict__ grad_points) {
53 |   int batch_index = blockIdx.x;
54 |   grad_out += batch_index * npoints * nsample * c;
55 |   idx += batch_index * npoints * nsample;
56 |   grad_points += batch_index * n * c;
57 | 
58 |   const int index = threadIdx.y * blockDim.x + threadIdx.x;
59 |   const int stride = blockDim.y * blockDim.x;
60 |   for (int i = index; i < c * npoints; i += stride) {
61 |     const int l = i / npoints;
62 |     const int j = i % npoints;
63 |     for (int k = 0; k < nsample; ++k) {
64 |       int ii = idx[j * nsample + k];
65 |       atomicAdd(grad_points + l * n + ii,
66 |                 grad_out[(l * npoints + j) * nsample + k]);
67 |     }
68 |   }
69 | }
70 | 
71 | void group_points_grad_kernel_wrapper(int b, int c, int n, int npoints,
72 |                                       int nsample, const float *grad_out,
73 |                                       const int *idx, float *grad_points) {
74 |   cudaStream_t stream = at::cuda::getCurrentCUDAStream();
75 | 
76 |   group_points_grad_kernel<<<b, opt_block_config(npoints, c), 0, stream>>>(
77 |       b, c, n, npoints, nsample, grad_out, idx, grad_points);
78 | 
79 |   CUDA_CHECK_ERRORS();
80 | }
81 | 


--------------------------------------------------------------------------------
/pointnet2/_ext_src/src/interpolate.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Facebook, Inc. and its affiliates.
  2 | // 
  3 | // This source code is licensed under the MIT license found in the
  4 | // LICENSE file in the root directory of this source tree.
  5 | 
  6 | #include "interpolate.h"
  7 | #include "utils.h"
  8 | 
  9 | void three_nn_kernel_wrapper(int b, int n, int m, const float *unknown,
 10 |                              const float *known, float *dist2, int *idx);
 11 | void three_interpolate_kernel_wrapper(int b, int c, int m, int n,
 12 |                                       const float *points, const int *idx,
 13 |                                       const float *weight, float *out);
 14 | void three_interpolate_grad_kernel_wrapper(int b, int c, int n, int m,
 15 |                                            const float *grad_out,
 16 |                                            const int *idx, const float *weight,
 17 |                                            float *grad_points);
 18 | 
 19 | std::vector<at::Tensor> three_nn(at::Tensor unknowns, at::Tensor knows) {
 20 |   CHECK_CONTIGUOUS(unknowns);
 21 |   CHECK_CONTIGUOUS(knows);
 22 |   CHECK_IS_FLOAT(unknowns);
 23 |   CHECK_IS_FLOAT(knows);
 24 | 
 25 |   if (unknowns.type().is_cuda()) {
 26 |     CHECK_CUDA(knows);
 27 |   }
 28 | 
 29 |   at::Tensor idx =
 30 |       torch::zeros({unknowns.size(0), unknowns.size(1), 3},
 31 |                    at::device(unknowns.device()).dtype(at::ScalarType::Int));
 32 |   at::Tensor dist2 =
 33 |       torch::zeros({unknowns.size(0), unknowns.size(1), 3},
 34 |                    at::device(unknowns.device()).dtype(at::ScalarType::Float));
 35 | 
 36 |   if (unknowns.type().is_cuda()) {
 37 |     three_nn_kernel_wrapper(unknowns.size(0), unknowns.size(1), knows.size(1),
 38 |                             unknowns.data<float>(), knows.data<float>(),
 39 |                             dist2.data<float>(), idx.data<int>());
 40 |   } else {
 41 |     AT_CHECK(false, "CPU not supported");
 42 |   }
 43 | 
 44 |   return {dist2, idx};
 45 | }
 46 | 
 47 | at::Tensor three_interpolate(at::Tensor points, at::Tensor idx,
 48 |                              at::Tensor weight) {
 49 |   CHECK_CONTIGUOUS(points);
 50 |   CHECK_CONTIGUOUS(idx);
 51 |   CHECK_CONTIGUOUS(weight);
 52 |   CHECK_IS_FLOAT(points);
 53 |   CHECK_IS_INT(idx);
 54 |   CHECK_IS_FLOAT(weight);
 55 | 
 56 |   if (points.type().is_cuda()) {
 57 |     CHECK_CUDA(idx);
 58 |     CHECK_CUDA(weight);
 59 |   }
 60 | 
 61 |   at::Tensor output =
 62 |       torch::zeros({points.size(0), points.size(1), idx.size(1)},
 63 |                    at::device(points.device()).dtype(at::ScalarType::Float));
 64 | 
 65 |   if (points.type().is_cuda()) {
 66 |     three_interpolate_kernel_wrapper(
 67 |         points.size(0), points.size(1), points.size(2), idx.size(1),
 68 |         points.data<float>(), idx.data<int>(), weight.data<float>(),
 69 |         output.data<float>());
 70 |   } else {
 71 |     AT_CHECK(false, "CPU not supported");
 72 |   }
 73 | 
 74 |   return output;
 75 | }
 76 | at::Tensor three_interpolate_grad(at::Tensor grad_out, at::Tensor idx,
 77 |                                   at::Tensor weight, const int m) {
 78 |   CHECK_CONTIGUOUS(grad_out);
 79 |   CHECK_CONTIGUOUS(idx);
 80 |   CHECK_CONTIGUOUS(weight);
 81 |   CHECK_IS_FLOAT(grad_out);
 82 |   CHECK_IS_INT(idx);
 83 |   CHECK_IS_FLOAT(weight);
 84 | 
 85 |   if (grad_out.type().is_cuda()) {
 86 |     CHECK_CUDA(idx);
 87 |     CHECK_CUDA(weight);
 88 |   }
 89 | 
 90 |   at::Tensor output =
 91 |       torch::zeros({grad_out.size(0), grad_out.size(1), m},
 92 |                    at::device(grad_out.device()).dtype(at::ScalarType::Float));
 93 | 
 94 |   if (grad_out.type().is_cuda()) {
 95 |     three_interpolate_grad_kernel_wrapper(
 96 |         grad_out.size(0), grad_out.size(1), grad_out.size(2), m,
 97 |         grad_out.data<float>(), idx.data<int>(), weight.data<float>(),
 98 |         output.data<float>());
 99 |   } else {
100 |     AT_CHECK(false, "CPU not supported");
101 |   }
102 | 
103 |   return output;
104 | }
105 | 


--------------------------------------------------------------------------------
/pointnet2/_ext_src/src/interpolate_gpu.cu:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Facebook, Inc. and its affiliates.
  2 | // 
  3 | // This source code is licensed under the MIT license found in the
  4 | // LICENSE file in the root directory of this source tree.
  5 | 
  6 | #include <math.h>
  7 | #include <stdio.h>
  8 | #include <stdlib.h>
  9 | 
 10 | #include "cuda_utils.h"
 11 | 
 12 | // input: unknown(b, n, 3) known(b, m, 3)
 13 | // output: dist2(b, n, 3), idx(b, n, 3)
 14 | __global__ void three_nn_kernel(int b, int n, int m,
 15 |                                 const float *__restrict__ unknown,
 16 |                                 const float *__restrict__ known,
 17 |                                 float *__restrict__ dist2,
 18 |                                 int *__restrict__ idx) {
 19 |   int batch_index = blockIdx.x;
 20 |   unknown += batch_index * n * 3;
 21 |   known += batch_index * m * 3;
 22 |   dist2 += batch_index * n * 3;
 23 |   idx += batch_index * n * 3;
 24 | 
 25 |   int index = threadIdx.x;
 26 |   int stride = blockDim.x;
 27 |   for (int j = index; j < n; j += stride) {
 28 |     float ux = unknown[j * 3 + 0];
 29 |     float uy = unknown[j * 3 + 1];
 30 |     float uz = unknown[j * 3 + 2];
 31 | 
 32 |     double best1 = 1e40, best2 = 1e40, best3 = 1e40;
 33 |     int besti1 = 0, besti2 = 0, besti3 = 0;
 34 |     for (int k = 0; k < m; ++k) {
 35 |       float x = known[k * 3 + 0];
 36 |       float y = known[k * 3 + 1];
 37 |       float z = known[k * 3 + 2];
 38 |       float d = (ux - x) * (ux - x) + (uy - y) * (uy - y) + (uz - z) * (uz - z);
 39 |       if (d < best1) {
 40 |         best3 = best2;
 41 |         besti3 = besti2;
 42 |         best2 = best1;
 43 |         besti2 = besti1;
 44 |         best1 = d;
 45 |         besti1 = k;
 46 |       } else if (d < best2) {
 47 |         best3 = best2;
 48 |         besti3 = besti2;
 49 |         best2 = d;
 50 |         besti2 = k;
 51 |       } else if (d < best3) {
 52 |         best3 = d;
 53 |         besti3 = k;
 54 |       }
 55 |     }
 56 |     dist2[j * 3 + 0] = best1;
 57 |     dist2[j * 3 + 1] = best2;
 58 |     dist2[j * 3 + 2] = best3;
 59 | 
 60 |     idx[j * 3 + 0] = besti1;
 61 |     idx[j * 3 + 1] = besti2;
 62 |     idx[j * 3 + 2] = besti3;
 63 |   }
 64 | }
 65 | 
 66 | void three_nn_kernel_wrapper(int b, int n, int m, const float *unknown,
 67 |                              const float *known, float *dist2, int *idx) {
 68 |   cudaStream_t stream = at::cuda::getCurrentCUDAStream();
 69 |   three_nn_kernel<<<b, opt_n_threads(n), 0, stream>>>(b, n, m, unknown, known,
 70 |                                                       dist2, idx);
 71 | 
 72 |   CUDA_CHECK_ERRORS();
 73 | }
 74 | 
 75 | // input: points(b, c, m), idx(b, n, 3), weight(b, n, 3)
 76 | // output: out(b, c, n)
 77 | __global__ void three_interpolate_kernel(int b, int c, int m, int n,
 78 |                                          const float *__restrict__ points,
 79 |                                          const int *__restrict__ idx,
 80 |                                          const float *__restrict__ weight,
 81 |                                          float *__restrict__ out) {
 82 |   int batch_index = blockIdx.x;
 83 |   points += batch_index * m * c;
 84 | 
 85 |   idx += batch_index * n * 3;
 86 |   weight += batch_index * n * 3;
 87 | 
 88 |   out += batch_index * n * c;
 89 | 
 90 |   const int index = threadIdx.y * blockDim.x + threadIdx.x;
 91 |   const int stride = blockDim.y * blockDim.x;
 92 |   for (int i = index; i < c * n; i += stride) {
 93 |     const int l = i / n;
 94 |     const int j = i % n;
 95 |     float w1 = weight[j * 3 + 0];
 96 |     float w2 = weight[j * 3 + 1];
 97 |     float w3 = weight[j * 3 + 2];
 98 | 
 99 |     int i1 = idx[j * 3 + 0];
100 |     int i2 = idx[j * 3 + 1];
101 |     int i3 = idx[j * 3 + 2];
102 | 
103 |     out[i] = points[l * m + i1] * w1 + points[l * m + i2] * w2 +
104 |              points[l * m + i3] * w3;
105 |   }
106 | }
107 | 
108 | void three_interpolate_kernel_wrapper(int b, int c, int m, int n,
109 |                                       const float *points, const int *idx,
110 |                                       const float *weight, float *out) {
111 |   cudaStream_t stream = at::cuda::getCurrentCUDAStream();
112 |   three_interpolate_kernel<<<b, opt_block_config(n, c), 0, stream>>>(
113 |       b, c, m, n, points, idx, weight, out);
114 | 
115 |   CUDA_CHECK_ERRORS();
116 | }
117 | 
118 | // input: grad_out(b, c, n), idx(b, n, 3), weight(b, n, 3)
119 | // output: grad_points(b, c, m)
120 | 
121 | __global__ void three_interpolate_grad_kernel(
122 |     int b, int c, int n, int m, const float *__restrict__ grad_out,
123 |     const int *__restrict__ idx, const float *__restrict__ weight,
124 |     float *__restrict__ grad_points) {
125 |   int batch_index = blockIdx.x;
126 |   grad_out += batch_index * n * c;
127 |   idx += batch_index * n * 3;
128 |   weight += batch_index * n * 3;
129 |   grad_points += batch_index * m * c;
130 | 
131 |   const int index = threadIdx.y * blockDim.x + threadIdx.x;
132 |   const int stride = blockDim.y * blockDim.x;
133 |   for (int i = index; i < c * n; i += stride) {
134 |     const int l = i / n;
135 |     const int j = i % n;
136 |     float w1 = weight[j * 3 + 0];
137 |     float w2 = weight[j * 3 + 1];
138 |     float w3 = weight[j * 3 + 2];
139 | 
140 |     int i1 = idx[j * 3 + 0];
141 |     int i2 = idx[j * 3 + 1];
142 |     int i3 = idx[j * 3 + 2];
143 | 
144 |     atomicAdd(grad_points + l * m + i1, grad_out[i] * w1);
145 |     atomicAdd(grad_points + l * m + i2, grad_out[i] * w2);
146 |     atomicAdd(grad_points + l * m + i3, grad_out[i] * w3);
147 |   }
148 | }
149 | 
150 | void three_interpolate_grad_kernel_wrapper(int b, int c, int n, int m,
151 |                                            const float *grad_out,
152 |                                            const int *idx, const float *weight,
153 |                                            float *grad_points) {
154 |   cudaStream_t stream = at::cuda::getCurrentCUDAStream();
155 |   three_interpolate_grad_kernel<<<b, opt_block_config(n, c), 0, stream>>>(
156 |       b, c, n, m, grad_out, idx, weight, grad_points);
157 | 
158 |   CUDA_CHECK_ERRORS();
159 | }
160 | 


--------------------------------------------------------------------------------
/pointnet2/_ext_src/src/sampling.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates.
 2 | // 
 3 | // This source code is licensed under the MIT license found in the
 4 | // LICENSE file in the root directory of this source tree.
 5 | 
 6 | #include "sampling.h"
 7 | #include "utils.h"
 8 | 
 9 | void gather_points_kernel_wrapper(int b, int c, int n, int npoints,
10 |                                   const float *points, const int *idx,
11 |                                   float *out);
12 | void gather_points_grad_kernel_wrapper(int b, int c, int n, int npoints,
13 |                                        const float *grad_out, const int *idx,
14 |                                        float *grad_points);
15 | 
16 | void furthest_point_sampling_kernel_wrapper(int b, int n, int m,
17 |                                             const float *dataset, float *temp,
18 |                                             int *idxs);
19 | 
20 | at::Tensor gather_points(at::Tensor points, at::Tensor idx) {
21 |   CHECK_CONTIGUOUS(points);
22 |   CHECK_CONTIGUOUS(idx);
23 |   CHECK_IS_FLOAT(points);
24 |   CHECK_IS_INT(idx);
25 | 
26 |   if (points.type().is_cuda()) {
27 |     CHECK_CUDA(idx);
28 |   }
29 | 
30 |   at::Tensor output =
31 |       torch::zeros({points.size(0), points.size(1), idx.size(1)},
32 |                    at::device(points.device()).dtype(at::ScalarType::Float));
33 | 
34 |   if (points.type().is_cuda()) {
35 |     gather_points_kernel_wrapper(points.size(0), points.size(1), points.size(2),
36 |                                  idx.size(1), points.data<float>(),
37 |                                  idx.data<int>(), output.data<float>());
38 |   } else {
39 |     AT_CHECK(false, "CPU not supported");
40 |   }
41 | 
42 |   return output;
43 | }
44 | 
45 | at::Tensor gather_points_grad(at::Tensor grad_out, at::Tensor idx,
46 |                               const int n) {
47 |   CHECK_CONTIGUOUS(grad_out);
48 |   CHECK_CONTIGUOUS(idx);
49 |   CHECK_IS_FLOAT(grad_out);
50 |   CHECK_IS_INT(idx);
51 | 
52 |   if (grad_out.type().is_cuda()) {
53 |     CHECK_CUDA(idx);
54 |   }
55 | 
56 |   at::Tensor output =
57 |       torch::zeros({grad_out.size(0), grad_out.size(1), n},
58 |                    at::device(grad_out.device()).dtype(at::ScalarType::Float));
59 | 
60 |   if (grad_out.type().is_cuda()) {
61 |     gather_points_grad_kernel_wrapper(grad_out.size(0), grad_out.size(1), n,
62 |                                       idx.size(1), grad_out.data<float>(),
63 |                                       idx.data<int>(), output.data<float>());
64 |   } else {
65 |     AT_CHECK(false, "CPU not supported");
66 |   }
67 | 
68 |   return output;
69 | }
70 | at::Tensor furthest_point_sampling(at::Tensor points, const int nsamples) {
71 |   CHECK_CONTIGUOUS(points);
72 |   CHECK_IS_FLOAT(points);
73 | 
74 |   at::Tensor output =
75 |       torch::zeros({points.size(0), nsamples},
76 |                    at::device(points.device()).dtype(at::ScalarType::Int));
77 | 
78 |   at::Tensor tmp =
79 |       torch::full({points.size(0), points.size(1)}, 1e10,
80 |                   at::device(points.device()).dtype(at::ScalarType::Float));
81 | 
82 |   if (points.type().is_cuda()) {
83 |     furthest_point_sampling_kernel_wrapper(
84 |         points.size(0), points.size(1), nsamples, points.data<float>(),
85 |         tmp.data<float>(), output.data<int>());
86 |   } else {
87 |     AT_CHECK(false, "CPU not supported");
88 |   }
89 | 
90 |   return output;
91 | }
92 | 


--------------------------------------------------------------------------------
/pointnet2/_ext_src/src/sampling_gpu.cu:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Facebook, Inc. and its affiliates.
  2 | // 
  3 | // This source code is licensed under the MIT license found in the
  4 | // LICENSE file in the root directory of this source tree.
  5 | 
  6 | #include <stdio.h>
  7 | #include <stdlib.h>
  8 | 
  9 | #include "cuda_utils.h"
 10 | 
 11 | // input: points(b, c, n) idx(b, m)
 12 | // output: out(b, c, m)
 13 | __global__ void gather_points_kernel(int b, int c, int n, int m,
 14 |                                      const float *__restrict__ points,
 15 |                                      const int *__restrict__ idx,
 16 |                                      float *__restrict__ out) {
 17 |   for (int i = blockIdx.x; i < b; i += gridDim.x) {
 18 |     for (int l = blockIdx.y; l < c; l += gridDim.y) {
 19 |       for (int j = threadIdx.x; j < m; j += blockDim.x) {
 20 |         int a = idx[i * m + j];
 21 |         out[(i * c + l) * m + j] = points[(i * c + l) * n + a];
 22 |       }
 23 |     }
 24 |   }
 25 | }
 26 | 
 27 | void gather_points_kernel_wrapper(int b, int c, int n, int npoints,
 28 |                                   const float *points, const int *idx,
 29 |                                   float *out) {
 30 |   gather_points_kernel<<<dim3(b, c, 1), opt_n_threads(npoints), 0,
 31 |                          at::cuda::getCurrentCUDAStream()>>>(b, c, n, npoints,
 32 |                                                              points, idx, out);
 33 | 
 34 |   CUDA_CHECK_ERRORS();
 35 | }
 36 | 
 37 | // input: grad_out(b, c, m) idx(b, m)
 38 | // output: grad_points(b, c, n)
 39 | __global__ void gather_points_grad_kernel(int b, int c, int n, int m,
 40 |                                           const float *__restrict__ grad_out,
 41 |                                           const int *__restrict__ idx,
 42 |                                           float *__restrict__ grad_points) {
 43 |   for (int i = blockIdx.x; i < b; i += gridDim.x) {
 44 |     for (int l = blockIdx.y; l < c; l += gridDim.y) {
 45 |       for (int j = threadIdx.x; j < m; j += blockDim.x) {
 46 |         int a = idx[i * m + j];
 47 |         atomicAdd(grad_points + (i * c + l) * n + a,
 48 |                   grad_out[(i * c + l) * m + j]);
 49 |       }
 50 |     }
 51 |   }
 52 | }
 53 | 
 54 | void gather_points_grad_kernel_wrapper(int b, int c, int n, int npoints,
 55 |                                        const float *grad_out, const int *idx,
 56 |                                        float *grad_points) {
 57 |   gather_points_grad_kernel<<<dim3(b, c, 1), opt_n_threads(npoints), 0,
 58 |                               at::cuda::getCurrentCUDAStream()>>>(
 59 |       b, c, n, npoints, grad_out, idx, grad_points);
 60 | 
 61 |   CUDA_CHECK_ERRORS();
 62 | }
 63 | 
 64 | __device__ void __update(float *__restrict__ dists, int *__restrict__ dists_i,
 65 |                          int idx1, int idx2) {
 66 |   const float v1 = dists[idx1], v2 = dists[idx2];
 67 |   const int i1 = dists_i[idx1], i2 = dists_i[idx2];
 68 |   dists[idx1] = max(v1, v2);
 69 |   dists_i[idx1] = v2 > v1 ? i2 : i1;
 70 | }
 71 | 
 72 | // Input dataset: (b, n, 3), tmp: (b, n)
 73 | // Ouput idxs (b, m)
 74 | template <unsigned int block_size>
 75 | __global__ void furthest_point_sampling_kernel(
 76 |     int b, int n, int m, const float *__restrict__ dataset,
 77 |     float *__restrict__ temp, int *__restrict__ idxs) {
 78 |   if (m <= 0) return;
 79 |   __shared__ float dists[block_size];
 80 |   __shared__ int dists_i[block_size];
 81 | 
 82 |   int batch_index = blockIdx.x;
 83 |   dataset += batch_index * n * 3;
 84 |   temp += batch_index * n;
 85 |   idxs += batch_index * m;
 86 | 
 87 |   int tid = threadIdx.x;
 88 |   const int stride = block_size;
 89 | 
 90 |   int old = 0;
 91 |   if (threadIdx.x == 0) idxs[0] = old;
 92 | 
 93 |   __syncthreads();
 94 |   for (int j = 1; j < m; j++) {
 95 |     int besti = 0;
 96 |     float best = -1;
 97 |     float x1 = dataset[old * 3 + 0];
 98 |     float y1 = dataset[old * 3 + 1];
 99 |     float z1 = dataset[old * 3 + 2];
100 |     for (int k = tid; k < n; k += stride) {
101 |       float x2, y2, z2;
102 |       x2 = dataset[k * 3 + 0];
103 |       y2 = dataset[k * 3 + 1];
104 |       z2 = dataset[k * 3 + 2];
105 |       float mag = (x2 * x2) + (y2 * y2) + (z2 * z2);
106 |       if (mag <= 1e-3) continue;
107 | 
108 |       float d =
109 |           (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1) + (z2 - z1) * (z2 - z1);
110 | 
111 |       float d2 = min(d, temp[k]);
112 |       temp[k] = d2;
113 |       besti = d2 > best ? k : besti;
114 |       best = d2 > best ? d2 : best;
115 |     }
116 |     dists[tid] = best;
117 |     dists_i[tid] = besti;
118 |     __syncthreads();
119 | 
120 |     if (block_size >= 512) {
121 |       if (tid < 256) {
122 |         __update(dists, dists_i, tid, tid + 256);
123 |       }
124 |       __syncthreads();
125 |     }
126 |     if (block_size >= 256) {
127 |       if (tid < 128) {
128 |         __update(dists, dists_i, tid, tid + 128);
129 |       }
130 |       __syncthreads();
131 |     }
132 |     if (block_size >= 128) {
133 |       if (tid < 64) {
134 |         __update(dists, dists_i, tid, tid + 64);
135 |       }
136 |       __syncthreads();
137 |     }
138 |     if (block_size >= 64) {
139 |       if (tid < 32) {
140 |         __update(dists, dists_i, tid, tid + 32);
141 |       }
142 |       __syncthreads();
143 |     }
144 |     if (block_size >= 32) {
145 |       if (tid < 16) {
146 |         __update(dists, dists_i, tid, tid + 16);
147 |       }
148 |       __syncthreads();
149 |     }
150 |     if (block_size >= 16) {
151 |       if (tid < 8) {
152 |         __update(dists, dists_i, tid, tid + 8);
153 |       }
154 |       __syncthreads();
155 |     }
156 |     if (block_size >= 8) {
157 |       if (tid < 4) {
158 |         __update(dists, dists_i, tid, tid + 4);
159 |       }
160 |       __syncthreads();
161 |     }
162 |     if (block_size >= 4) {
163 |       if (tid < 2) {
164 |         __update(dists, dists_i, tid, tid + 2);
165 |       }
166 |       __syncthreads();
167 |     }
168 |     if (block_size >= 2) {
169 |       if (tid < 1) {
170 |         __update(dists, dists_i, tid, tid + 1);
171 |       }
172 |       __syncthreads();
173 |     }
174 | 
175 |     old = dists_i[0];
176 |     if (tid == 0) idxs[j] = old;
177 |   }
178 | }
179 | 
180 | void furthest_point_sampling_kernel_wrapper(int b, int n, int m,
181 |                                             const float *dataset, float *temp,
182 |                                             int *idxs) {
183 |   unsigned int n_threads = opt_n_threads(n);
184 | 
185 |   cudaStream_t stream = at::cuda::getCurrentCUDAStream();
186 | 
187 |   switch (n_threads) {
188 |     case 512:
189 |       furthest_point_sampling_kernel<512>
190 |           <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
191 |       break;
192 |     case 256:
193 |       furthest_point_sampling_kernel<256>
194 |           <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
195 |       break;
196 |     case 128:
197 |       furthest_point_sampling_kernel<128>
198 |           <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
199 |       break;
200 |     case 64:
201 |       furthest_point_sampling_kernel<64>
202 |           <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
203 |       break;
204 |     case 32:
205 |       furthest_point_sampling_kernel<32>
206 |           <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
207 |       break;
208 |     case 16:
209 |       furthest_point_sampling_kernel<16>
210 |           <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
211 |       break;
212 |     case 8:
213 |       furthest_point_sampling_kernel<8>
214 |           <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
215 |       break;
216 |     case 4:
217 |       furthest_point_sampling_kernel<4>
218 |           <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
219 |       break;
220 |     case 2:
221 |       furthest_point_sampling_kernel<2>
222 |           <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
223 |       break;
224 |     case 1:
225 |       furthest_point_sampling_kernel<1>
226 |           <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
227 |       break;
228 |     default:
229 |       furthest_point_sampling_kernel<512>
230 |           <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
231 |   }
232 | 
233 |   CUDA_CHECK_ERRORS();
234 | }
235 | 


--------------------------------------------------------------------------------
/pointnet2/pointnet2_test.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | ''' Testing customized ops. '''
 7 | 
 8 | import torch
 9 | from torch.autograd import gradcheck
10 | import numpy as np
11 | 
12 | import os
13 | import sys
14 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
15 | sys.path.append(BASE_DIR)
16 | import pointnet2_utils
17 | 
18 | def test_interpolation_grad():
19 |     batch_size = 1
20 |     feat_dim = 2
21 |     m = 4
22 |     feats = torch.randn(batch_size, feat_dim, m, requires_grad=True).float().cuda()
23 |     
24 |     def interpolate_func(inputs):
25 |         idx = torch.from_numpy(np.array([[[0,1,2],[1,2,3]]])).int().cuda()
26 |         weight = torch.from_numpy(np.array([[[1,1,1],[2,2,2]]])).float().cuda()
27 |         interpolated_feats = pointnet2_utils.three_interpolate(inputs, idx, weight)
28 |         return interpolated_feats
29 |     
30 |     assert (gradcheck(interpolate_func, feats, atol=1e-1, rtol=1e-1))
31 | 
32 | if __name__=='__main__':
33 |     test_interpolation_grad()
34 | 


--------------------------------------------------------------------------------
/pointnet2/pytorch_utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | # 
  3 | # This source code is licensed under the MIT license found in the
  4 | # LICENSE file in the root directory of this source tree.
  5 | 
  6 | ''' Modified based on Ref: https://github.com/erikwijmans/Pointnet2_PyTorch '''
  7 | import torch
  8 | import torch.nn as nn
  9 | from typing import List, Tuple
 10 | 
 11 | 
 12 | class SharedMLP(nn.Sequential):
 13 | 
 14 |     def __init__(
 15 |             self,
 16 |             args: List[int],
 17 |             *,
 18 |             bn: bool = False,
 19 |             activation=nn.ReLU(inplace=True),
 20 |             preact: bool = False,
 21 |             first: bool = False,
 22 |             name: str = ""
 23 |     ):
 24 |         super().__init__()
 25 | 
 26 |         for i in range(len(args) - 1):
 27 |             self.add_module(
 28 |                 name + 'layer{}'.format(i),
 29 |                 Conv2d(
 30 |                     args[i],
 31 |                     args[i + 1],
 32 |                     bn=(not first or not preact or (i != 0)) and bn,
 33 |                     activation=activation
 34 |                     if (not first or not preact or (i != 0)) else None,
 35 |                     preact=preact
 36 |                 )
 37 |             )
 38 | 
 39 | 
 40 | 
 41 | class _BNBase(nn.Sequential):
 42 | 
 43 |     def __init__(self, in_size, batch_norm=None, name=""):
 44 |         super().__init__()
 45 |         self.add_module(name + "bn", batch_norm(in_size))
 46 | 
 47 |         nn.init.constant_(self[0].weight, 1.0)
 48 |         nn.init.constant_(self[0].bias, 0)
 49 | 
 50 | 
 51 | class BatchNorm1d(_BNBase):
 52 | 
 53 |     def __init__(self, in_size: int, *, name: str = ""):
 54 |         super().__init__(in_size, batch_norm=nn.BatchNorm1d, name=name)
 55 | 
 56 | 
 57 | class BatchNorm2d(_BNBase):
 58 | 
 59 |     def __init__(self, in_size: int, name: str = ""):
 60 |         super().__init__(in_size, batch_norm=nn.BatchNorm2d, name=name)
 61 | 
 62 | 
 63 | class BatchNorm3d(_BNBase):
 64 | 
 65 |     def __init__(self, in_size: int, name: str = ""):
 66 |         super().__init__(in_size, batch_norm=nn.BatchNorm3d, name=name)
 67 | 
 68 | 
 69 | class _ConvBase(nn.Sequential):
 70 | 
 71 |     def __init__(
 72 |             self,
 73 |             in_size,
 74 |             out_size,
 75 |             kernel_size,
 76 |             stride,
 77 |             padding,
 78 |             activation,
 79 |             bn,
 80 |             init,
 81 |             conv=None,
 82 |             batch_norm=None,
 83 |             bias=True,
 84 |             preact=False,
 85 |             name=""
 86 |     ):
 87 |         super().__init__()
 88 | 
 89 |         bias = bias and (not bn)
 90 |         conv_unit = conv(
 91 |             in_size,
 92 |             out_size,
 93 |             kernel_size=kernel_size,
 94 |             stride=stride,
 95 |             padding=padding,
 96 |             bias=bias
 97 |         )
 98 |         init(conv_unit.weight)
 99 |         if bias:
100 |             nn.init.constant_(conv_unit.bias, 0)
101 | 
102 |         if bn:
103 |             if not preact:
104 |                 bn_unit = batch_norm(out_size)
105 |             else:
106 |                 bn_unit = batch_norm(in_size)
107 | 
108 |         if preact:
109 |             if bn:
110 |                 self.add_module(name + 'bn', bn_unit)
111 | 
112 |             if activation is not None:
113 |                 self.add_module(name + 'activation', activation)
114 | 
115 |         self.add_module(name + 'conv', conv_unit)
116 | 
117 |         if not preact:
118 |             if bn:
119 |                 self.add_module(name + 'bn', bn_unit)
120 | 
121 |             if activation is not None:
122 |                 self.add_module(name + 'activation', activation)
123 | 
124 | 
125 | 
126 | 
127 | class Conv1d(_ConvBase):
128 | 
129 |     def __init__(
130 |             self,
131 |             in_size: int,
132 |             out_size: int,
133 |             *,
134 |             kernel_size: int = 1,
135 |             stride: int = 1,
136 |             padding: int = 0,
137 |             activation=nn.ReLU(inplace=True),
138 |             bn: bool = False,
139 |             init=nn.init.kaiming_normal_,
140 |             bias: bool = True,
141 |             preact: bool = False,
142 |             name: str = ""
143 |     ):
144 |         super().__init__(
145 |             in_size,
146 |             out_size,
147 |             kernel_size,
148 |             stride,
149 |             padding,
150 |             activation,
151 |             bn,
152 |             init,
153 |             conv=nn.Conv1d,
154 |             batch_norm=BatchNorm1d,
155 |             bias=bias,
156 |             preact=preact,
157 |             name=name
158 |         )
159 | 
160 | 
161 | class Conv2d(_ConvBase):
162 | 
163 |     def __init__(
164 |             self,
165 |             in_size: int,
166 |             out_size: int,
167 |             *,
168 |             kernel_size: Tuple[int, int] = (1, 1),
169 |             stride: Tuple[int, int] = (1, 1),
170 |             padding: Tuple[int, int] = (0, 0),
171 |             activation=nn.ReLU(inplace=True),
172 |             bn: bool = False,
173 |             init=nn.init.kaiming_normal_,
174 |             bias: bool = True,
175 |             preact: bool = False,
176 |             name: str = ""
177 |     ):
178 |         super().__init__(
179 |             in_size,
180 |             out_size,
181 |             kernel_size,
182 |             stride,
183 |             padding,
184 |             activation,
185 |             bn,
186 |             init,
187 |             conv=nn.Conv2d,
188 |             batch_norm=BatchNorm2d,
189 |             bias=bias,
190 |             preact=preact,
191 |             name=name
192 |         )
193 | 
194 | 
195 | 
196 | 
197 | 
198 | class Conv3d(_ConvBase):
199 | 
200 |     def __init__(
201 |             self,
202 |             in_size: int,
203 |             out_size: int,
204 |             *,
205 |             kernel_size: Tuple[int, int, int] = (1, 1, 1),
206 |             stride: Tuple[int, int, int] = (1, 1, 1),
207 |             padding: Tuple[int, int, int] = (0, 0, 0),
208 |             activation=nn.ReLU(inplace=True),
209 |             bn: bool = False,
210 |             init=nn.init.kaiming_normal_,
211 |             bias: bool = True,
212 |             preact: bool = False,
213 |             name: str = ""
214 |     ):
215 |         super().__init__(
216 |             in_size,
217 |             out_size,
218 |             kernel_size,
219 |             stride,
220 |             padding,
221 |             activation,
222 |             bn,
223 |             init,
224 |             conv=nn.Conv3d,
225 |             batch_norm=BatchNorm3d,
226 |             bias=bias,
227 |             preact=preact,
228 |             name=name
229 |         )
230 | 
231 | 
232 | class FC(nn.Sequential):
233 | 
234 |     def __init__(
235 |             self,
236 |             in_size: int,
237 |             out_size: int,
238 |             *,
239 |             activation=nn.ReLU(inplace=True),
240 |             bn: bool = False,
241 |             init=None,
242 |             preact: bool = False,
243 |             name: str = ""
244 |     ):
245 |         super().__init__()
246 | 
247 |         fc = nn.Linear(in_size, out_size, bias=not bn)
248 |         if init is not None:
249 |             init(fc.weight)
250 |         if not bn:
251 |             nn.init.constant_(fc.bias, 0)
252 | 
253 |         if preact:
254 |             if bn:
255 |                 self.add_module(name + 'bn', BatchNorm1d(in_size))
256 | 
257 |             if activation is not None:
258 |                 self.add_module(name + 'activation', activation)
259 | 
260 |         self.add_module(name + 'fc', fc)
261 | 
262 |         if not preact:
263 |             if bn:
264 |                 self.add_module(name + 'bn', BatchNorm1d(out_size))
265 | 
266 |             if activation is not None:
267 |                 self.add_module(name + 'activation', activation)
268 | 
269 | def set_bn_momentum_default(bn_momentum):
270 | 
271 |     def fn(m):
272 |         if isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d)):
273 |             m.momentum = bn_momentum
274 | 
275 |     return fn
276 | 
277 | 
278 | class BNMomentumScheduler(object):
279 | 
280 |     def __init__(
281 |             self, model, bn_lambda, last_epoch=-1,
282 |             setter=set_bn_momentum_default
283 |     ):
284 |         if not isinstance(model, nn.Module):
285 |             raise RuntimeError(
286 |                 "Class '{}' is not a PyTorch nn Module".format(
287 |                     type(model).__name__
288 |                 )
289 |             )
290 | 
291 |         self.model = model
292 |         self.setter = setter
293 |         self.lmbd = bn_lambda
294 | 
295 |         self.step(last_epoch + 1)
296 |         self.last_epoch = last_epoch
297 | 
298 |     def step(self, epoch=None):
299 |         if epoch is None:
300 |             epoch = self.last_epoch + 1
301 | 
302 |         self.last_epoch = epoch
303 |         self.model.apply(self.setter(self.lmbd(epoch)))
304 | 
305 | 
306 | 


--------------------------------------------------------------------------------
/pointnet2/setup.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | from setuptools import setup
 7 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
 8 | import glob
 9 | 
10 | _ext_src_root = "_ext_src"
11 | _ext_sources = glob.glob("{}/src/*.cpp".format(_ext_src_root)) + glob.glob(
12 |     "{}/src/*.cu".format(_ext_src_root)
13 | )
14 | _ext_headers = glob.glob("{}/include/*".format(_ext_src_root))
15 | 
16 | setup(
17 |     name='pointnet2',
18 |     ext_modules=[
19 |         CUDAExtension(
20 |             name='pointnet2._ext',
21 |             sources=_ext_sources,
22 |             extra_compile_args={
23 |                 "cxx": ["-O2", "-I{}".format("{}/include".format(_ext_src_root))],
24 |                 "nvcc": ["-O2", "-I{}".format("{}/include".format(_ext_src_root))],
25 |             },
26 |         )
27 |     ],
28 |     cmdclass={
29 |         'build_ext': BuildExtension
30 |     }
31 | )
32 | 


--------------------------------------------------------------------------------
/scannet/README.md:
--------------------------------------------------------------------------------
1 | ### Prepare ScanNet Data
2 | 
3 | 1. Download ScanNet v2 data [HERE](https://github.com/ScanNet/ScanNet). Move/link the `scans` folder such that under `scans` there should be folders with names such as `scene0001_01`.
4 | 
5 | 2. Extract point clouds and annotations (semantic seg, instance seg etc.) by running `python batch_load_scannet_data.py`, which will create a folder named `scannet_train_detection_data` here.
6 | 


--------------------------------------------------------------------------------
/scannet/batch_load_scannet_data.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | """ Batch mode in loading Scannet scenes with vertices and ground truth labels
 7 | for semantic and instance segmentations
 8 | 
 9 | Usage example: python ./batch_load_scannet_data.py
10 | """
11 | import os
12 | import sys
13 | import datetime
14 | import numpy as np
15 | from load_scannet_data import export
16 | import pdb
17 | 
18 | SCANNET_DIR = 'scans'
19 | TRAIN_SCAN_NAMES = [line.rstrip() for line in open('meta_data/scannet_train.txt')]
20 | LABEL_MAP_FILE = 'meta_data/scannetv2-labels.combined.tsv'
21 | DONOTCARE_CLASS_IDS = np.array([])
22 | OBJ_CLASS_IDS = np.array([3,4,5,6,7,8,9,10,11,12,14,16,24,28,33,34,36,39])
23 | MAX_NUM_POINT = 50000
24 | OUTPUT_FOLDER = './scannet_train_detection_data'
25 | 
26 | def export_one_scan(scan_name, output_filename_prefix):    
27 |     mesh_file = os.path.join(SCANNET_DIR, scan_name, scan_name + '_vh_clean_2.ply')
28 |     agg_file = os.path.join(SCANNET_DIR, scan_name, scan_name + '.aggregation.json')
29 |     seg_file = os.path.join(SCANNET_DIR, scan_name, scan_name + '_vh_clean_2.0.010000.segs.json')
30 |     meta_file = os.path.join(SCANNET_DIR, scan_name, scan_name + '.txt') # includes axisAlignment info for the train set scans.   
31 |     mesh_vertices, semantic_labels, instance_labels, instance_bboxes, instance2semantic = \
32 |         export(mesh_file, agg_file, seg_file, meta_file, LABEL_MAP_FILE, None)
33 | 
34 |     mask = np.logical_not(np.in1d(semantic_labels, DONOTCARE_CLASS_IDS))
35 |     mesh_vertices = mesh_vertices[mask,:]
36 |     semantic_labels = semantic_labels[mask]
37 |     instance_labels = instance_labels[mask]
38 | 
39 |     num_instances = len(np.unique(instance_labels))
40 |     print('Num of instances: ', num_instances)
41 | 
42 |     bbox_mask = np.in1d(instance_bboxes[:,-1], OBJ_CLASS_IDS)
43 |     instance_bboxes = instance_bboxes[bbox_mask,:]
44 |     print('Num of care instances: ', instance_bboxes.shape[0])
45 | 
46 |     N = mesh_vertices.shape[0]
47 |     if N > MAX_NUM_POINT:
48 |         choices = np.random.choice(N, MAX_NUM_POINT, replace=False)
49 |         mesh_vertices = mesh_vertices[choices, :]
50 |         semantic_labels = semantic_labels[choices]
51 |         instance_labels = instance_labels[choices]
52 | 
53 |     np.save(output_filename_prefix+'_vert.npy', mesh_vertices)
54 |     np.save(output_filename_prefix+'_sem_label.npy', semantic_labels)
55 |     np.save(output_filename_prefix+'_ins_label.npy', instance_labels)
56 |     np.save(output_filename_prefix+'_bbox.npy', instance_bboxes)
57 | 
58 | def batch_export():
59 |     if not os.path.exists(OUTPUT_FOLDER):
60 |         print('Creating new data folder: {}'.format(OUTPUT_FOLDER))                
61 |         os.mkdir(OUTPUT_FOLDER)        
62 |         
63 |     for scan_name in TRAIN_SCAN_NAMES:
64 |         print('-'*20+'begin')
65 |         print(datetime.datetime.now())
66 |         print(scan_name)
67 |         output_filename_prefix = os.path.join(OUTPUT_FOLDER, scan_name) 
68 |         if os.path.isfile(output_filename_prefix+'_vert.npy'):
69 |             print('File already exists. skipping.')
70 |             print('-'*20+'done')
71 |             continue
72 |         try:            
73 |             export_one_scan(scan_name, output_filename_prefix)
74 |         except:
75 |             print('Failed export scan: %s'%(scan_name))            
76 |         print('-'*20+'done')
77 | 
78 | if __name__=='__main__':    
79 |     batch_export()
80 | 


--------------------------------------------------------------------------------
/scannet/data_viz.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import sys
 7 | import os
 8 | 
 9 | BASE_DIR = os.path.dirname(__file__)
10 | sys.path.append(BASE_DIR)
11 | 
12 | import numpy as np
13 | import pc_util
14 | 
15 | scene_name = 'scannet_train_detection_data/scene0002_00'
16 | output_folder = 'data_viz_dump'
17 | 
18 | data = np.load(scene_name+'_vert.npy')
19 | scene_points = data[:,0:3]
20 | colors = data[:,3:]
21 | instance_labels = np.load(scene_name+'_ins_label.npy')
22 | semantic_labels = np.load(scene_name+'_sem_label.npy')
23 | instance_bboxes = np.load(scene_name+'_bbox.npy')
24 | 
25 | print(np.unique(instance_labels))
26 | print(np.unique(semantic_labels))
27 | input()
28 | if not os.path.exists(output_folder):
29 |     os.mkdir(output_folder)
30 | 
31 | # Write scene as OBJ file for visualization
32 | pc_util.write_ply_rgb(scene_points, colors, os.path.join(output_folder, 'scene.obj'))
33 | pc_util.write_ply_color(scene_points, instance_labels, os.path.join(output_folder, 'scene_instance.obj'))
34 | pc_util.write_ply_color(scene_points, semantic_labels, os.path.join(output_folder, 'scene_semantic.obj'))
35 | 
36 | from model_util_scannet import ScannetDatasetConfig
37 | DC = ScannetDatasetConfig()
38 | print(instance_bboxes.shape)
39 | 


--------------------------------------------------------------------------------
/scannet/load_scannet_data.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | # 
  3 | # This source code is licensed under the MIT license found in the
  4 | # LICENSE file in the root directory of this source tree.
  5 | 
  6 | """ Load Scannet scenes with vertices and ground truth labels
  7 | for semantic and instance segmentations
  8 | """
  9 | 
 10 | # python imports
 11 | import math
 12 | import os, sys, argparse
 13 | import inspect
 14 | import json
 15 | import pdb
 16 | 
 17 | try:
 18 |     import numpy as np
 19 | except:
 20 |     print("Failed to import numpy package.")
 21 |     sys.exit(-1)
 22 | 
 23 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
 24 | import scannet_utils
 25 | 
 26 | def read_aggregation(filename):
 27 |     assert os.path.isfile(filename)
 28 |     object_id_to_segs = {}
 29 |     label_to_segs = {}
 30 |     with open(filename) as f:
 31 |         data = json.load(f)
 32 |         num_objects = len(data['segGroups'])
 33 |         for i in range(num_objects):
 34 |             object_id = data['segGroups'][i]['objectId'] + 1 # instance ids should be 1-indexed
 35 |             label = data['segGroups'][i]['label']
 36 |             segs = data['segGroups'][i]['segments']
 37 |             object_id_to_segs[object_id] = segs
 38 |             if label in label_to_segs:
 39 |                 label_to_segs[label].extend(segs)
 40 |             else:
 41 |                 label_to_segs[label] = segs
 42 |     return object_id_to_segs, label_to_segs
 43 | 
 44 | 
 45 | def read_segmentation(filename):
 46 |     assert os.path.isfile(filename)
 47 |     seg_to_verts = {}
 48 |     with open(filename) as f:
 49 |         data = json.load(f)
 50 |         num_verts = len(data['segIndices'])
 51 |         for i in range(num_verts):
 52 |             seg_id = data['segIndices'][i]
 53 |             if seg_id in seg_to_verts:
 54 |                 seg_to_verts[seg_id].append(i)
 55 |             else:
 56 |                 seg_to_verts[seg_id] = [i]
 57 |     return seg_to_verts, num_verts
 58 | 
 59 | 
 60 | def export(mesh_file, agg_file, seg_file, meta_file, label_map_file, output_file=None):
 61 |     """ points are XYZ RGB (RGB in 0-255),
 62 |     semantic label as nyu40 ids,
 63 |     instance label as 1-#instance,
 64 |     box as (cx,cy,cz,dx,dy,dz,semantic_label)
 65 |     """
 66 |     label_map = scannet_utils.read_label_mapping(label_map_file,
 67 |         label_from='raw_category', label_to='nyu40id')    
 68 |     mesh_vertices = scannet_utils.read_mesh_vertices_rgb(mesh_file)
 69 | 
 70 |     # Load scene axis alignment matrix
 71 |     lines = open(meta_file).readlines()
 72 |     for line in lines:
 73 |         if 'axisAlignment' in line:
 74 |             axis_align_matrix = [float(x) \
 75 |                 for x in line.rstrip().strip('axisAlignment = ').split(' ')]
 76 |             break
 77 |     axis_align_matrix = np.array(axis_align_matrix).reshape((4,4))
 78 |     pts = np.ones((mesh_vertices.shape[0], 4))
 79 |     pts[:,0:3] = mesh_vertices[:,0:3]
 80 |     pts = np.dot(pts, axis_align_matrix.transpose()) # Nx4
 81 |     mesh_vertices[:,0:3] = pts[:,0:3]
 82 | 
 83 |     # Load semantic and instance labels
 84 |     object_id_to_segs, label_to_segs = read_aggregation(agg_file)
 85 |     seg_to_verts, num_verts = read_segmentation(seg_file)
 86 |     label_ids = np.zeros(shape=(num_verts), dtype=np.uint32) # 0: unannotated
 87 |     object_id_to_label_id = {}
 88 |     for label, segs in label_to_segs.items():
 89 |         label_id = label_map[label]
 90 |         for seg in segs:
 91 |             verts = seg_to_verts[seg]
 92 |             label_ids[verts] = label_id
 93 |     instance_ids = np.zeros(shape=(num_verts), dtype=np.uint32) # 0: unannotated
 94 |     num_instances = len(np.unique(list(object_id_to_segs.keys())))
 95 |     for object_id, segs in object_id_to_segs.items():
 96 |         for seg in segs:
 97 |             verts = seg_to_verts[seg]
 98 |             instance_ids[verts] = object_id
 99 |             if object_id not in object_id_to_label_id:
100 |                 object_id_to_label_id[object_id] = label_ids[verts][0]
101 |     instance_bboxes = np.zeros((num_instances,7))
102 |     for obj_id in object_id_to_segs:
103 |         label_id = object_id_to_label_id[obj_id]
104 |         obj_pc = mesh_vertices[instance_ids==obj_id, 0:3]
105 |         if len(obj_pc) == 0: continue
106 |         # Compute axis aligned box
107 |         # An axis aligned bounding box is parameterized by
108 |         # (cx,cy,cz) and (dx,dy,dz) and label id
109 |         # where (cx,cy,cz) is the center point of the box,
110 |         # dx is the x-axis length of the box.
111 |         xmin = np.min(obj_pc[:,0])
112 |         ymin = np.min(obj_pc[:,1])
113 |         zmin = np.min(obj_pc[:,2])
114 |         xmax = np.max(obj_pc[:,0])
115 |         ymax = np.max(obj_pc[:,1])
116 |         zmax = np.max(obj_pc[:,2])
117 |         bbox = np.array([(xmin+xmax)/2, (ymin+ymax)/2, (zmin+zmax)/2,
118 |             xmax-xmin, ymax-ymin, zmax-zmin, label_id])
119 |         # NOTE: this assumes obj_id is in 1,2,3,.,,,.NUM_INSTANCES
120 |         instance_bboxes[obj_id-1,:] = bbox 
121 | 
122 |     if output_file is not None:
123 |         np.save(output_file+'_vert.npy', mesh_vertices)
124 |         np.save(output_file+'_sem_label.npy', label_ids)
125 |         np.save(output_file+'_ins_label.npy', instance_ids)
126 |         np.save(output_file+'_bbox.npy', instance_bboxes)
127 | 
128 |     return mesh_vertices, label_ids, instance_ids,\
129 |         instance_bboxes, object_id_to_label_id
130 | 
131 | def main():
132 |     parser = argparse.ArgumentParser()
133 |     parser.add_argument('--scan_path', required=True, help='path to scannet scene (e.g., data/ScanNet/v2/scene0000_00')
134 |     parser.add_argument('--output_file', required=True, help='output file')
135 |     parser.add_argument('--label_map_file', required=True, help='path to scannetv2-labels.combined.tsv')
136 |     opt = parser.parse_args()
137 | 
138 |     scan_name = os.path.split(opt.scan_path)[-1]
139 |     mesh_file = os.path.join(opt.scan_path, scan_name + '_vh_clean_2.ply')
140 |     agg_file = os.path.join(opt.scan_path, scan_name + '.aggregation.json')
141 |     seg_file = os.path.join(opt.scan_path, scan_name + '_vh_clean_2.0.010000.segs.json')
142 |     meta_file = os.path.join(opt.scan_path, scan_name + '.txt') # includes axisAlignment info for the train set scans.
143 |     export(mesh_file, agg_file, seg_file, meta_file, opt.label_map_file, opt.output_file)
144 | 
145 | if __name__ == '__main__':
146 |     main()
147 | 


--------------------------------------------------------------------------------
/scannet/meta_data/scannet_means.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AsahiLiu/PointDetectron/634ffadfbc50d2f0dea4434e11d5977640f34004/scannet/meta_data/scannet_means.npz


--------------------------------------------------------------------------------
/scannet/meta_data/scannetv2_test.txt:
--------------------------------------------------------------------------------
  1 | scene0707_00
  2 | scene0708_00
  3 | scene0709_00
  4 | scene0710_00
  5 | scene0711_00
  6 | scene0712_00
  7 | scene0713_00
  8 | scene0714_00
  9 | scene0715_00
 10 | scene0716_00
 11 | scene0717_00
 12 | scene0718_00
 13 | scene0719_00
 14 | scene0720_00
 15 | scene0721_00
 16 | scene0722_00
 17 | scene0723_00
 18 | scene0724_00
 19 | scene0725_00
 20 | scene0726_00
 21 | scene0727_00
 22 | scene0728_00
 23 | scene0729_00
 24 | scene0730_00
 25 | scene0731_00
 26 | scene0732_00
 27 | scene0733_00
 28 | scene0734_00
 29 | scene0735_00
 30 | scene0736_00
 31 | scene0737_00
 32 | scene0738_00
 33 | scene0739_00
 34 | scene0740_00
 35 | scene0741_00
 36 | scene0742_00
 37 | scene0743_00
 38 | scene0744_00
 39 | scene0745_00
 40 | scene0746_00
 41 | scene0747_00
 42 | scene0748_00
 43 | scene0749_00
 44 | scene0750_00
 45 | scene0751_00
 46 | scene0752_00
 47 | scene0753_00
 48 | scene0754_00
 49 | scene0755_00
 50 | scene0756_00
 51 | scene0757_00
 52 | scene0758_00
 53 | scene0759_00
 54 | scene0760_00
 55 | scene0761_00
 56 | scene0762_00
 57 | scene0763_00
 58 | scene0764_00
 59 | scene0765_00
 60 | scene0766_00
 61 | scene0767_00
 62 | scene0768_00
 63 | scene0769_00
 64 | scene0770_00
 65 | scene0771_00
 66 | scene0772_00
 67 | scene0773_00
 68 | scene0774_00
 69 | scene0775_00
 70 | scene0776_00
 71 | scene0777_00
 72 | scene0778_00
 73 | scene0779_00
 74 | scene0780_00
 75 | scene0781_00
 76 | scene0782_00
 77 | scene0783_00
 78 | scene0784_00
 79 | scene0785_00
 80 | scene0786_00
 81 | scene0787_00
 82 | scene0788_00
 83 | scene0789_00
 84 | scene0790_00
 85 | scene0791_00
 86 | scene0792_00
 87 | scene0793_00
 88 | scene0794_00
 89 | scene0795_00
 90 | scene0796_00
 91 | scene0797_00
 92 | scene0798_00
 93 | scene0799_00
 94 | scene0800_00
 95 | scene0801_00
 96 | scene0802_00
 97 | scene0803_00
 98 | scene0804_00
 99 | scene0805_00
100 | scene0806_00
101 | 


--------------------------------------------------------------------------------
/scannet/meta_data/scannetv2_val.txt:
--------------------------------------------------------------------------------
  1 | scene0568_00
  2 | scene0568_01
  3 | scene0568_02
  4 | scene0304_00
  5 | scene0488_00
  6 | scene0488_01
  7 | scene0412_00
  8 | scene0412_01
  9 | scene0217_00
 10 | scene0019_00
 11 | scene0019_01
 12 | scene0414_00
 13 | scene0575_00
 14 | scene0575_01
 15 | scene0575_02
 16 | scene0426_00
 17 | scene0426_01
 18 | scene0426_02
 19 | scene0426_03
 20 | scene0549_00
 21 | scene0549_01
 22 | scene0578_00
 23 | scene0578_01
 24 | scene0578_02
 25 | scene0665_00
 26 | scene0665_01
 27 | scene0050_00
 28 | scene0050_01
 29 | scene0050_02
 30 | scene0257_00
 31 | scene0025_00
 32 | scene0025_01
 33 | scene0025_02
 34 | scene0583_00
 35 | scene0583_01
 36 | scene0583_02
 37 | scene0701_00
 38 | scene0701_01
 39 | scene0701_02
 40 | scene0580_00
 41 | scene0580_01
 42 | scene0565_00
 43 | scene0169_00
 44 | scene0169_01
 45 | scene0655_00
 46 | scene0655_01
 47 | scene0655_02
 48 | scene0063_00
 49 | scene0221_00
 50 | scene0221_01
 51 | scene0591_00
 52 | scene0591_01
 53 | scene0591_02
 54 | scene0678_00
 55 | scene0678_01
 56 | scene0678_02
 57 | scene0462_00
 58 | scene0427_00
 59 | scene0595_00
 60 | scene0193_00
 61 | scene0193_01
 62 | scene0164_00
 63 | scene0164_01
 64 | scene0164_02
 65 | scene0164_03
 66 | scene0598_00
 67 | scene0598_01
 68 | scene0598_02
 69 | scene0599_00
 70 | scene0599_01
 71 | scene0599_02
 72 | scene0328_00
 73 | scene0300_00
 74 | scene0300_01
 75 | scene0354_00
 76 | scene0458_00
 77 | scene0458_01
 78 | scene0423_00
 79 | scene0423_01
 80 | scene0423_02
 81 | scene0307_00
 82 | scene0307_01
 83 | scene0307_02
 84 | scene0606_00
 85 | scene0606_01
 86 | scene0606_02
 87 | scene0432_00
 88 | scene0432_01
 89 | scene0608_00
 90 | scene0608_01
 91 | scene0608_02
 92 | scene0651_00
 93 | scene0651_01
 94 | scene0651_02
 95 | scene0430_00
 96 | scene0430_01
 97 | scene0689_00
 98 | scene0357_00
 99 | scene0357_01
100 | scene0574_00
101 | scene0574_01
102 | scene0574_02
103 | scene0329_00
104 | scene0329_01
105 | scene0329_02
106 | scene0153_00
107 | scene0153_01
108 | scene0616_00
109 | scene0616_01
110 | scene0671_00
111 | scene0671_01
112 | scene0618_00
113 | scene0382_00
114 | scene0382_01
115 | scene0490_00
116 | scene0621_00
117 | scene0607_00
118 | scene0607_01
119 | scene0149_00
120 | scene0695_00
121 | scene0695_01
122 | scene0695_02
123 | scene0695_03
124 | scene0389_00
125 | scene0377_00
126 | scene0377_01
127 | scene0377_02
128 | scene0342_00
129 | scene0139_00
130 | scene0629_00
131 | scene0629_01
132 | scene0629_02
133 | scene0496_00
134 | scene0633_00
135 | scene0633_01
136 | scene0518_00
137 | scene0652_00
138 | scene0406_00
139 | scene0406_01
140 | scene0406_02
141 | scene0144_00
142 | scene0144_01
143 | scene0494_00
144 | scene0278_00
145 | scene0278_01
146 | scene0316_00
147 | scene0609_00
148 | scene0609_01
149 | scene0609_02
150 | scene0609_03
151 | scene0084_00
152 | scene0084_01
153 | scene0084_02
154 | scene0696_00
155 | scene0696_01
156 | scene0696_02
157 | scene0351_00
158 | scene0351_01
159 | scene0643_00
160 | scene0644_00
161 | scene0645_00
162 | scene0645_01
163 | scene0645_02
164 | scene0081_00
165 | scene0081_01
166 | scene0081_02
167 | scene0647_00
168 | scene0647_01
169 | scene0535_00
170 | scene0353_00
171 | scene0353_01
172 | scene0353_02
173 | scene0559_00
174 | scene0559_01
175 | scene0559_02
176 | scene0593_00
177 | scene0593_01
178 | scene0246_00
179 | scene0653_00
180 | scene0653_01
181 | scene0064_00
182 | scene0064_01
183 | scene0356_00
184 | scene0356_01
185 | scene0356_02
186 | scene0030_00
187 | scene0030_01
188 | scene0030_02
189 | scene0222_00
190 | scene0222_01
191 | scene0338_00
192 | scene0338_01
193 | scene0338_02
194 | scene0378_00
195 | scene0378_01
196 | scene0378_02
197 | scene0660_00
198 | scene0553_00
199 | scene0553_01
200 | scene0553_02
201 | scene0527_00
202 | scene0663_00
203 | scene0663_01
204 | scene0663_02
205 | scene0664_00
206 | scene0664_01
207 | scene0664_02
208 | scene0334_00
209 | scene0334_01
210 | scene0334_02
211 | scene0046_00
212 | scene0046_01
213 | scene0046_02
214 | scene0203_00
215 | scene0203_01
216 | scene0203_02
217 | scene0088_00
218 | scene0088_01
219 | scene0088_02
220 | scene0088_03
221 | scene0086_00
222 | scene0086_01
223 | scene0086_02
224 | scene0670_00
225 | scene0670_01
226 | scene0256_00
227 | scene0256_01
228 | scene0256_02
229 | scene0249_00
230 | scene0441_00
231 | scene0658_00
232 | scene0704_00
233 | scene0704_01
234 | scene0187_00
235 | scene0187_01
236 | scene0131_00
237 | scene0131_01
238 | scene0131_02
239 | scene0207_00
240 | scene0207_01
241 | scene0207_02
242 | scene0461_00
243 | scene0011_00
244 | scene0011_01
245 | scene0343_00
246 | scene0251_00
247 | scene0077_00
248 | scene0077_01
249 | scene0684_00
250 | scene0684_01
251 | scene0550_00
252 | scene0686_00
253 | scene0686_01
254 | scene0686_02
255 | scene0208_00
256 | scene0500_00
257 | scene0500_01
258 | scene0552_00
259 | scene0552_01
260 | scene0648_00
261 | scene0648_01
262 | scene0435_00
263 | scene0435_01
264 | scene0435_02
265 | scene0435_03
266 | scene0690_00
267 | scene0690_01
268 | scene0693_00
269 | scene0693_01
270 | scene0693_02
271 | scene0700_00
272 | scene0700_01
273 | scene0700_02
274 | scene0699_00
275 | scene0231_00
276 | scene0231_01
277 | scene0231_02
278 | scene0697_00
279 | scene0697_01
280 | scene0697_02
281 | scene0697_03
282 | scene0474_00
283 | scene0474_01
284 | scene0474_02
285 | scene0474_03
286 | scene0474_04
287 | scene0474_05
288 | scene0355_00
289 | scene0355_01
290 | scene0146_00
291 | scene0146_01
292 | scene0146_02
293 | scene0196_00
294 | scene0702_00
295 | scene0702_01
296 | scene0702_02
297 | scene0314_00
298 | scene0277_00
299 | scene0277_01
300 | scene0277_02
301 | scene0095_00
302 | scene0095_01
303 | scene0015_00
304 | scene0100_00
305 | scene0100_01
306 | scene0100_02
307 | scene0558_00
308 | scene0558_01
309 | scene0558_02
310 | scene0685_00
311 | scene0685_01
312 | scene0685_02
313 | 


--------------------------------------------------------------------------------
/scannet/model_util_scannet.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import numpy as np
 7 | import sys
 8 | import os
 9 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
10 | sys.path.append(BASE_DIR)
11 | ROOT_DIR = os.path.dirname(BASE_DIR)
12 | sys.path.append(os.path.join(ROOT_DIR, 'utils'))
13 | from box_util import get_3d_box
14 | 
15 | class ScannetDatasetConfig(object):
16 |     def __init__(self):
17 |         self.num_class = 18
18 |         self.num_heading_bin = 1
19 |         self.num_size_cluster = 18
20 | 
21 |         self.type2class = {'cabinet':0, 'bed':1, 'chair':2, 'sofa':3, 'table':4, 'door':5,
22 |             'window':6,'bookshelf':7,'picture':8, 'counter':9, 'desk':10, 'curtain':11,
23 |             'refrigerator':12, 'showercurtrain':13, 'toilet':14, 'sink':15, 'bathtub':16, 'garbagebin':17}  
24 |         self.class2type = {self.type2class[t]:t for t in self.type2class}
25 |         self.nyu40ids = np.array([3,4,5,6,7,8,9,10,11,12,14,16,24,28,33,34,36,39])
26 |         self.nyu40id2class = {nyu40id: i for i,nyu40id in enumerate(list(self.nyu40ids))}
27 |         self.mean_size_arr = np.load(os.path.join(ROOT_DIR,'scannet/meta_data/scannet_means.npz'))['arr_0']
28 |         self.type_mean_size = {}
29 |         for i in range(self.num_size_cluster):
30 |             self.type_mean_size[self.class2type[i]] = self.mean_size_arr[i,:]
31 | 
32 |     def angle2class(self, angle):
33 |         ''' Convert continuous angle to discrete class
34 |             [optinal] also small regression number from  
35 |             class center angle to current angle.
36 |            
37 |             angle is from 0-2pi (or -pi~pi), class center at 0, 1*(2pi/N), 2*(2pi/N) ...  (N-1)*(2pi/N)
38 |             return is class of int32 of 0,1,...,N-1 and a number such that
39 |                 class*(2pi/N) + number = angle
40 | 
41 |             NOT USED.
42 |         '''
43 |         assert(False)
44 |     
45 |     def class2angle(self, pred_cls, residual, to_label_format=True):
46 |         ''' Inverse function to angle2class.
47 |         
48 |         As ScanNet only has axis-alined boxes so angles are always 0. '''
49 |         return 0
50 | 
51 |     def size2class(self, size, type_name):
52 |         ''' Convert 3D box size (l,w,h) to size class and size residual '''
53 |         size_class = self.type2class[type_name]
54 |         size_residual = size - self.type_mean_size[type_name]
55 |         return size_class, size_residual
56 |     
57 |     def class2size(self, pred_cls, residual):
58 |         ''' Inverse function to size2class '''        
59 |         return self.mean_size_arr[pred_cls, :] + residual
60 | 
61 |     def param2obb(self, center, heading_class, heading_residual, size_class, size_residual):
62 |         heading_angle = self.class2angle(heading_class, heading_residual)
63 |         box_size = self.class2size(int(size_class), size_residual)
64 |         obb = np.zeros((7,))
65 |         obb[0:3] = center
66 |         obb[3:6] = box_size
67 |         obb[6] = heading_angle*-1
68 |         return obb
69 | 
70 | def rotate_aligned_boxes(input_boxes, rot_mat):    
71 |     centers, lengths = input_boxes[:,0:3], input_boxes[:,3:6]    
72 |     new_centers = np.dot(centers, np.transpose(rot_mat))
73 |            
74 |     dx, dy = lengths[:,0]/2.0, lengths[:,1]/2.0
75 |     new_x = np.zeros((dx.shape[0], 4))
76 |     new_y = np.zeros((dx.shape[0], 4))
77 |     
78 |     for i, crnr in enumerate([(-1,-1), (1, -1), (1, 1), (-1, 1)]):        
79 |         crnrs = np.zeros((dx.shape[0], 3))
80 |         crnrs[:,0] = crnr[0]*dx
81 |         crnrs[:,1] = crnr[1]*dy
82 |         crnrs = np.dot(crnrs, np.transpose(rot_mat))
83 |         new_x[:,i] = crnrs[:,0]
84 |         new_y[:,i] = crnrs[:,1]
85 |     
86 |     
87 |     new_dx = 2.0*np.max(new_x, 1)
88 |     new_dy = 2.0*np.max(new_y, 1)    
89 |     new_lengths = np.stack((new_dx, new_dy, lengths[:,2]), axis=1)
90 |                   
91 |     return np.concatenate([new_centers, new_lengths], axis=1)
92 | 


--------------------------------------------------------------------------------
/scannet/scannet_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | ''' Ref: https://github.com/ScanNet/ScanNet/blob/master/BenchmarkScripts '''
 7 | import os
 8 | import sys
 9 | import json
10 | import csv
11 | 
12 | try:
13 |     import numpy as np
14 | except:
15 |     print("Failed to import numpy package.")
16 |     sys.exit(-1)
17 | 
18 | try:
19 |     from plyfile import PlyData, PlyElement
20 | except:
21 |     print("Please install the module 'plyfile' for PLY i/o, e.g.")
22 |     print("pip install plyfile")
23 |     sys.exit(-1)
24 | 
25 | def represents_int(s):
26 |     ''' if string s represents an int. '''
27 |     try: 
28 |         int(s)
29 |         return True
30 |     except ValueError:
31 |         return False
32 | 
33 | 
34 | def read_label_mapping(filename, label_from='raw_category', label_to='nyu40id'):
35 |     assert os.path.isfile(filename)
36 |     mapping = dict()
37 |     with open(filename) as csvfile:
38 |         reader = csv.DictReader(csvfile, delimiter='\t')
39 |         for row in reader:
40 |             mapping[row[label_from]] = int(row[label_to])
41 |     if represents_int(list(mapping.keys())[0]):
42 |         mapping = {int(k):v for k,v in mapping.items()}
43 |     return mapping
44 | 
45 | def read_mesh_vertices(filename):
46 |     """ read XYZ for each vertex.
47 |     """
48 |     assert os.path.isfile(filename)
49 |     with open(filename, 'rb') as f:
50 |         plydata = PlyData.read(f)
51 |         num_verts = plydata['vertex'].count
52 |         vertices = np.zeros(shape=[num_verts, 3], dtype=np.float32)
53 |         vertices[:,0] = plydata['vertex'].data['x']
54 |         vertices[:,1] = plydata['vertex'].data['y']
55 |         vertices[:,2] = plydata['vertex'].data['z']
56 |     return vertices
57 | 
58 | def read_mesh_vertices_rgb(filename):
59 |     """ read XYZ RGB for each vertex.
60 |     Note: RGB values are in 0-255
61 |     """
62 |     assert os.path.isfile(filename)
63 |     with open(filename, 'rb') as f:
64 |         plydata = PlyData.read(f)
65 |         num_verts = plydata['vertex'].count
66 |         vertices = np.zeros(shape=[num_verts, 6], dtype=np.float32)
67 |         vertices[:,0] = plydata['vertex'].data['x']
68 |         vertices[:,1] = plydata['vertex'].data['y']
69 |         vertices[:,2] = plydata['vertex'].data['z']
70 |         vertices[:,3] = plydata['vertex'].data['red']
71 |         vertices[:,4] = plydata['vertex'].data['green']
72 |         vertices[:,5] = plydata['vertex'].data['blue']
73 |     return vertices
74 | 
75 | 
76 | 


--------------------------------------------------------------------------------
/sunrgbd/README.md:
--------------------------------------------------------------------------------
 1 | ### Prepare SUN RGB-D Data
 2 | 
 3 | 1. Download SUNRGBD v2 data [HERE](http://rgbd.cs.princeton.edu/data/) (SUNRGBD.zip, SUNRGBDMeta2DBB_v2.mat, SUNRGBDMeta3DBB_v2.mat) and the toolkits (SUNRGBDtoolbox.zip). Move all the downloaded files under OFFICIAL_SUNRGBD. Unzip the zip files.
 4 | 
 5 | 2. Extract point clouds and annotations (class, v2 2D -- xmin,ymin,xmax,ymax, and 3D bounding boxes -- centroids, size, 2D heading) by running `extract_split.m`, `extract_rgbd_data_v2.m` and `extract_rgbd_data_v1.m` under the `matlab` folder.
 6 | 
 7 | 3. Prepare data by running `python sunrgbd_data.py --gen_v1_data`
 8 | 
 9 | You can also examine and visualize the data with `python sunrgbd_data.py --viz` and use MeshLab to view the generated PLY files at `data_viz_dump`. 
10 | 
11 | NOTE: SUNRGBDtoolbox.zip should have MD5 hash `18d22e1761d36352f37232cba102f91f` (you can check the hash with `md5 SUNRGBDtoolbox.zip` on Mac OS or `md5sum SUNRGBDtoolbox.zip` on Linux)
12 | 


--------------------------------------------------------------------------------
/sunrgbd/matlab/extract_rgbd_data_v1.m:
--------------------------------------------------------------------------------
 1 | % Copyright (c) Facebook, Inc. and its affiliates.
 2 | % 
 3 | % This source code is licensed under the MIT license found in the
 4 | % LICENSE file in the root directory of this source tree.
 5 | 
 6 | %% Dump SUNRGBD data to our format
 7 | % for each sample, we have RGB image, 2d boxes.
 8 | % point cloud (in camera coordinate), calibration and 3d boxes.
 9 | %
10 | % Extract using V1 labels.
11 | %
12 | % Author: Charles R. Qi
13 | %
14 | clear; close all; clc;
15 | addpath(genpath('.'))
16 | addpath('../OFFICIAL_SUNRGBD/SUNRGBDtoolbox')
17 | %% V1 2D&3D BB and Seg masks
18 | load('../OFFICIAL_SUNRGBD/SUNRGBDtoolbox/Metadata/SUNRGBDMeta.mat')
19 | % load('./Metadata/SUNRGBD2Dseg.mat')
20 | 
21 | %% Create folders
22 | det_label_folder = '../sunrgbd_trainval/label_v1/';
23 | mkdir(det_label_folder);
24 | %% Read
25 | for imageId = 1:10335
26 |     imageId
27 | try
28 | data = SUNRGBDMeta(imageId);
29 | data.depthpath(1:16) = '';
30 | data.depthpath = strcat('../OFFICIAL_SUNRGBD/SUNRGBD', data.depthpath);
31 | data.rgbpath(1:16) = '';
32 | data.rgbpath = strcat('../OFFICIAL_SUNRGBD/SUNRGBD', data.rgbpath);
33 | 
34 | % MAT files are 3x smaller than TXT files. In Python we can use
35 | % scipy.io.loadmat('xxx.mat')['points3d_rgb'] to load the data.
36 | mat_filename = strcat(num2str(imageId,'%06d'), '.mat');
37 | txt_filename = strcat(num2str(imageId,'%06d'), '.txt');
38 | 
39 | % Write 2D and 3D box label
40 | data2d = data;
41 | fid = fopen(strcat(det_label_folder, txt_filename), 'w');
42 | for j = 1:length(data.groundtruth3DBB)
43 |     centroid = data.groundtruth3DBB(j).centroid;
44 |     classname = data.groundtruth3DBB(j).classname;
45 |     orientation = data.groundtruth3DBB(j).orientation;
46 |     coeffs = abs(data.groundtruth3DBB(j).coeffs);
47 |     box2d = data2d.groundtruth2DBB(j).gtBb2D;
48 |     fprintf(fid, '%s %d %d %d %d %f %f %f %f %f %f %f %f\n', classname, box2d(1), box2d(2), box2d(3), box2d(4), centroid(1), centroid(2), centroid(3), coeffs(1), coeffs(2), coeffs(3), orientation(1), orientation(2));
49 | end
50 | fclose(fid);
51 | 
52 | catch
53 | end
54 | 
55 | end
56 | 


--------------------------------------------------------------------------------
/sunrgbd/matlab/extract_rgbd_data_v2.m:
--------------------------------------------------------------------------------
 1 | % Copyright (c) Facebook, Inc. and its affiliates.
 2 | % 
 3 | % This source code is licensed under the MIT license found in the
 4 | % LICENSE file in the root directory of this source tree.
 5 | 
 6 | %% Dump SUNRGBD data to our format
 7 | % for each sample, we have RGB image, 2d boxes.
 8 | % point cloud (in camera coordinate), calibration and 3d boxes.
 9 | %
10 | % Compared to extract_rgbd_data.m in frustum_pointents, use v2 2D and 3D
11 | % bboxes.
12 | %
13 | % Author: Charles R. Qi
14 | %
15 | clear; close all; clc;
16 | addpath(genpath('.'))
17 | addpath('../OFFICIAL_SUNRGBD/SUNRGBDtoolbox/readData')
18 | %% V1 2D&3D BB and Seg masks
19 | % load('./Metadata/SUNRGBDMeta.mat')
20 | % load('./Metadata/SUNRGBD2Dseg.mat')
21 | 
22 | %% V2 3DBB annotations (overwrites SUNRGBDMeta)
23 | load('../OFFICIAL_SUNRGBD/SUNRGBDMeta3DBB_v2.mat');
24 | load('../OFFICIAL_SUNRGBD/SUNRGBDMeta2DBB_v2.mat');
25 | %% Create folders
26 | depth_folder = '../sunrgbd_trainval/depth/';
27 | image_folder = '../sunrgbd_trainval/image/';
28 | calib_folder = '../sunrgbd_trainval/calib/';
29 | det_label_folder = '../sunrgbd_trainval/label/';
30 | seg_label_folder = '../sunrgbd_trainval/seg_label/';
31 | mkdir(depth_folder);
32 | mkdir(image_folder);
33 | mkdir(calib_folder);
34 | mkdir(det_label_folder);
35 | mkdir(seg_label_folder);
36 | %% Read
37 | parfor imageId = 1:10335
38 |     imageId
39 | try
40 | data = SUNRGBDMeta(imageId);
41 | data.depthpath(1:16) = '';
42 | data.depthpath = strcat('../OFFICIAL_SUNRGBD', data.depthpath);
43 | data.rgbpath(1:16) = '';
44 | data.rgbpath = strcat('../OFFICIAL_SUNRGBD', data.rgbpath);
45 | 
46 | % Write point cloud in depth map
47 | [rgb,points3d,depthInpaint,imsize]=read3dPoints(data);
48 | rgb(isnan(points3d(:,1)),:) = [];
49 | points3d(isnan(points3d(:,1)),:) = [];
50 | points3d_rgb = [points3d, rgb];
51 | 
52 | % MAT files are 3x smaller than TXT files. In Python we can use
53 | % scipy.io.loadmat('xxx.mat')['points3d_rgb'] to load the data.
54 | mat_filename = strcat(num2str(imageId,'%06d'), '.mat');
55 | txt_filename = strcat(num2str(imageId,'%06d'), '.txt');
56 | parsave(strcat(depth_folder, mat_filename), points3d_rgb);
57 | 
58 | % Write images
59 | copyfile(data.rgbpath, sprintf('%s/%06d.jpg', image_folder, imageId));
60 | 
61 | % Write calibration
62 | dlmwrite(strcat(calib_folder, txt_filename), data.Rtilt(:)', 'delimiter', ' ');
63 | dlmwrite(strcat(calib_folder, txt_filename), data.K(:)', 'delimiter', ' ', '-append');
64 | 
65 | % Write 2D and 3D box label
66 | data2d = SUNRGBDMeta2DBB(imageId);
67 | fid = fopen(strcat(det_label_folder, txt_filename), 'w');
68 | for j = 1:length(data.groundtruth3DBB)
69 |     centroid = data.groundtruth3DBB(j).centroid;
70 |     classname = data.groundtruth3DBB(j).classname;
71 |     orientation = data.groundtruth3DBB(j).orientation;
72 |     coeffs = abs(data.groundtruth3DBB(j).coeffs);
73 |     box2d = data2d.groundtruth2DBB(j).gtBb2D;
74 |     assert(strcmp(data2d.groundtruth2DBB(j).classname, classname));
75 |     fprintf(fid, '%s %d %d %d %d %f %f %f %f %f %f %f %f\n', classname, box2d(1), box2d(2), box2d(3), box2d(4), centroid(1), centroid(2), centroid(3), coeffs(1), coeffs(2), coeffs(3), orientation(1), orientation(2));
76 | end
77 | fclose(fid);
78 | 
79 | catch
80 | end
81 | 
82 | end
83 | 
84 | function parsave(filename, instance)
85 | save(filename, 'instance');
86 | end
87 | 


--------------------------------------------------------------------------------
/sunrgbd/matlab/extract_split.m:
--------------------------------------------------------------------------------
 1 | % Copyright (c) Facebook, Inc. and its affiliates.
 2 | % 
 3 | % This source code is licensed under the MIT license found in the
 4 | % LICENSE file in the root directory of this source tree.
 5 | 
 6 | %% Dump train/val split.
 7 | % Author: Charles R. Qi
 8 | 
 9 | addpath('../OFFICIAL_SUNRGBD/SUNRGBDtoolbox')
10 | 
11 | %% Construct Hash Map
12 | hash_train = java.util.Hashtable;
13 | hash_val = java.util.Hashtable;
14 | 
15 | split = load('../OFFICIAL_SUNRGBD/SUNRGBDtoolbox/traintestSUNRGBD/allsplit.mat');
16 | 
17 | N_train = length(split.alltrain);
18 | N_val = length(split.alltest);
19 | 
20 | for i = 1:N_train
21 |     folder_path = split.alltrain{i};
22 |     folder_path(1:16) = '';
23 |     hash_train.put(folder_path,0);
24 | end
25 | for i = 1:N_val
26 |     folder_path = split.alltest{i};
27 |     folder_path(1:16) = '';
28 |     hash_val.put(folder_path,0);
29 | end
30 | 
31 | %% Map data to train or val set.
32 | load('../OFFICIAL_SUNRGBD/SUNRGBDMeta3DBB_v2.mat');
33 | 
34 | fid_train = fopen('../sunrgbd_trainval/train_data_idx.txt', 'w');
35 | fid_val = fopen('../sunrgbd_trainval/val_data_idx.txt', 'w');
36 | 
37 | for imageId = 1:10335
38 |     data = SUNRGBDMeta(imageId);
39 |     depthpath = data.depthpath;
40 |     depthpath(1:16) = '';
41 |     [filepath,name,ext] = fileparts(depthpath);
42 |     [filepath,name,ext] = fileparts(filepath);
43 |     if hash_train.containsKey(filepath)
44 |         fprintf(fid_train, '%d\n', imageId);
45 |     elseif hash_val.containsKey(filepath)
46 |         fprintf(fid_val, '%d\n', imageId);
47 |     else
48 |         a = 1;
49 |     end
50 | end
51 | fclose(fid_train);
52 | fclose(fid_val);
53 | 


--------------------------------------------------------------------------------
/sunrgbd/model_util_sunrgbd.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import numpy as np
 7 | import sys
 8 | import os
 9 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
10 | sys.path.append(BASE_DIR)
11 | ROOT_DIR = os.path.dirname(BASE_DIR)
12 | sys.path.append(os.path.join(ROOT_DIR, 'utils'))
13 | 
14 | class SunrgbdDatasetConfig(object):
15 |     def __init__(self):
16 |         self.num_class = 10
17 |         self.num_heading_bin = 12
18 |         self.num_size_cluster = 10
19 | 
20 |         self.type2class={'bed':0, 'table':1, 'sofa':2, 'chair':3, 'toilet':4, 'desk':5, 'dresser':6, 'night_stand':7, 'bookshelf':8, 'bathtub':9}
21 |         self.class2type = {self.type2class[t]:t for t in self.type2class}
22 |         self.type2onehotclass={'bed':0, 'table':1, 'sofa':2, 'chair':3, 'toilet':4, 'desk':5, 'dresser':6, 'night_stand':7, 'bookshelf':8, 'bathtub':9}
23 |         self.type_mean_size = {'bathtub': np.array([0.765840,1.398258,0.472728]),
24 |                           'bed': np.array([2.114256,1.620300,0.927272]),
25 |                           'bookshelf': np.array([0.404671,1.071108,1.688889]),
26 |                           'chair': np.array([0.591958,0.552978,0.827272]),
27 |                           'desk': np.array([0.695190,1.346299,0.736364]),
28 |                           'dresser': np.array([0.528526,1.002642,1.172878]),
29 |                           'night_stand': np.array([0.500618,0.632163,0.683424]),
30 |                           'sofa': np.array([0.923508,1.867419,0.845495]),
31 |                           'table': np.array([0.791118,1.279516,0.718182]),
32 |                           'toilet': np.array([0.699104,0.454178,0.756250])}
33 | 
34 |         self.mean_size_arr = np.zeros((self.num_size_cluster, 3))
35 |         for i in range(self.num_size_cluster):
36 |             self.mean_size_arr[i,:] = self.type_mean_size[self.class2type[i]]
37 | 
38 |     def size2class(self, size, type_name):
39 |         ''' Convert 3D box size (l,w,h) to size class and size residual '''
40 |         size_class = self.type2class[type_name]
41 |         size_residual = size - self.type_mean_size[type_name]
42 |         return size_class, size_residual
43 |     
44 |     def class2size(self, pred_cls, residual):
45 |         ''' Inverse function to size2class '''
46 |         mean_size = self.type_mean_size[self.class2type[pred_cls]]
47 |         return mean_size + residual
48 |     
49 |     def angle2class(self, angle):
50 |         ''' Convert continuous angle to discrete class
51 |             [optinal] also small regression number from  
52 |             class center angle to current angle.
53 |            
54 |             angle is from 0-2pi (or -pi~pi), class center at 0, 1*(2pi/N), 2*(2pi/N) ...  (N-1)*(2pi/N)
55 |             return is class of int32 of 0,1,...,N-1 and a number such that
56 |                 class*(2pi/N) + number = angle
57 |         '''
58 |         num_class = self.num_heading_bin
59 |         angle = angle%(2*np.pi)
60 |         assert(angle>=0 and angle<=2*np.pi)
61 |         angle_per_class = 2*np.pi/float(num_class)
62 |         shifted_angle = (angle+angle_per_class/2)%(2*np.pi)
63 |         class_id = int(shifted_angle/angle_per_class)
64 |         residual_angle = shifted_angle - (class_id*angle_per_class+angle_per_class/2)
65 |         return class_id, residual_angle
66 |     
67 |     def class2angle(self, pred_cls, residual, to_label_format=True):
68 |         ''' Inverse function to angle2class '''
69 |         num_class = self.num_heading_bin
70 |         angle_per_class = 2*np.pi/float(num_class)
71 |         angle_center = pred_cls * angle_per_class
72 |         angle = angle_center + residual
73 |         if to_label_format and angle>np.pi:
74 |             angle = angle - 2*np.pi
75 |         return angle
76 | 
77 |     def param2obb(self, center, heading_class, heading_residual, size_class, size_residual):
78 |         heading_angle = self.class2angle(heading_class, heading_residual)
79 |         box_size = self.class2size(int(size_class), size_residual)
80 |         obb = np.zeros((7,))
81 |         obb[0:3] = center
82 |         obb[3:6] = box_size
83 |         obb[6] = heading_angle*-1
84 |         return obb
85 | 
86 | 
87 | 


--------------------------------------------------------------------------------
/train.bash:
--------------------------------------------------------------------------------
 1 | Training
 2 | #####
 3 | #SUN-RGBD
 4 | CUDA_VISIBLE_DEVICES=0 python train.py --dataset sunrgbd --log_dir log_sunrgbd
 5 | 
 6 | 
 7 | python eval.py --dataset sunrgbd --checkpoint_path log_sunrgbd/checkpoint.tar --dump_dir eval_sunrgbd --cluster_sampling seed_fps --use_3d_nms --use_cls_nms --per_class_proposal
 8 | 
 9 | # ScanNet
10 | CUDA_VISIBLE_DEVICES=0 python train.py --dataset scannet --log_dir log_scannet --num_point 40000
11 | 
12 | python eval.py --dataset scannet --checkpoint_path log_scannet/checkpoint.tar --dump_dir eval_scannet --num_point 40000 --cluster_sampling seed_fps --use_3d_nms --use_cls_nms --per_class_proposal
13 | 
14 | 


--------------------------------------------------------------------------------
/utils/eval_det.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | # 
  3 | # This source code is licensed under the MIT license found in the
  4 | # LICENSE file in the root directory of this source tree.
  5 | 
  6 | """ Generic Code for Object Detection Evaluation
  7 | 
  8 |     Input:
  9 |     For each class:
 10 |         For each image:
 11 |             Predictions: box, score
 12 |             Groundtruths: box
 13 |     
 14 |     Output:
 15 |     For each class:
 16 |         precision-recal and average precision
 17 |     
 18 |     Author: Charles R. Qi
 19 |     
 20 |     Ref: https://raw.githubusercontent.com/rbgirshick/py-faster-rcnn/master/lib/datasets/voc_eval.py
 21 | """
 22 | import numpy as np
 23 | 
 24 | def voc_ap(rec, prec, use_07_metric=False):
 25 |     """ ap = voc_ap(rec, prec, [use_07_metric])
 26 |     Compute VOC AP given precision and recall.
 27 |     If use_07_metric is true, uses the
 28 |     VOC 07 11 point method (default:False).
 29 |     """
 30 |     if use_07_metric:
 31 |         # 11 point metric
 32 |         ap = 0.
 33 |         for t in np.arange(0., 1.1, 0.1):
 34 |             if np.sum(rec >= t) == 0:
 35 |                 p = 0
 36 |             else:
 37 |                 p = np.max(prec[rec >= t])
 38 |             ap = ap + p / 11.
 39 |     else:
 40 |         # correct AP calculation
 41 |         # first append sentinel values at the end
 42 |         mrec = np.concatenate(([0.], rec, [1.]))
 43 |         mpre = np.concatenate(([0.], prec, [0.]))
 44 | 
 45 |         # compute the precision envelope
 46 |         for i in range(mpre.size - 1, 0, -1):
 47 |             mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
 48 | 
 49 |         # to calculate area under PR curve, look for points
 50 |         # where X axis (recall) changes value
 51 |         i = np.where(mrec[1:] != mrec[:-1])[0]
 52 | 
 53 |         # and sum (\Delta recall) * prec
 54 |         ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
 55 |     return ap
 56 | 
 57 | import os
 58 | import sys
 59 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 60 | from metric_util import calc_iou # axis-aligned 3D box IoU
 61 | def get_iou(bb1, bb2):
 62 |     """ Compute IoU of two bounding boxes.
 63 |         ** Define your bod IoU function HERE **
 64 |     """
 65 |     #pass
 66 |     iou3d = calc_iou(bb1, bb2)
 67 |     return iou3d
 68 | 
 69 | from box_util import box3d_iou
 70 | def get_iou_obb(bb1,bb2):
 71 |     iou3d, iou2d = box3d_iou(bb1,bb2)
 72 |     return iou3d
 73 | 
 74 | def get_iou_main(get_iou_func, args):
 75 |     return get_iou_func(*args)
 76 | 
 77 | def eval_det_cls(pred, gt, ovthresh=0.25, use_07_metric=False, get_iou_func=get_iou):
 78 |     """ Generic functions to compute precision/recall for object detection
 79 |         for a single class.
 80 |         Input:
 81 |             pred: map of {img_id: [(bbox, score)]} where bbox is numpy array
 82 |             gt: map of {img_id: [bbox]}
 83 |             ovthresh: scalar, iou threshold
 84 |             use_07_metric: bool, if True use VOC07 11 point method
 85 |         Output:
 86 |             rec: numpy array of length nd
 87 |             prec: numpy array of length nd
 88 |             ap: scalar, average precision
 89 |     """
 90 | 
 91 |     # construct gt objects
 92 |     class_recs = {} # {img_id: {'bbox': bbox list, 'det': matched list}}
 93 |     npos = 0
 94 |     for img_id in gt.keys():
 95 |         bbox = np.array(gt[img_id])
 96 |         det = [False] * len(bbox)
 97 |         npos += len(bbox)
 98 |         class_recs[img_id] = {'bbox': bbox, 'det': det}
 99 |     # pad empty list to all other imgids
100 |     for img_id in pred.keys():
101 |         if img_id not in gt:
102 |             class_recs[img_id] = {'bbox': np.array([]), 'det': []}
103 | 
104 |     # construct dets
105 |     image_ids = []
106 |     confidence = []
107 |     BB = []
108 |     for img_id in pred.keys():
109 |         for box,score in pred[img_id]:
110 |             image_ids.append(img_id)
111 |             confidence.append(score)
112 |             BB.append(box)
113 |     confidence = np.array(confidence)
114 |     BB = np.array(BB) # (nd,4 or 8,3 or 6)
115 | 
116 |     # sort by confidence
117 |     sorted_ind = np.argsort(-confidence)
118 |     sorted_scores = np.sort(-confidence)
119 |     BB = BB[sorted_ind, ...]
120 |     image_ids = [image_ids[x] for x in sorted_ind]
121 | 
122 |     # go down dets and mark TPs and FPs
123 |     nd = len(image_ids)
124 |     tp = np.zeros(nd)
125 |     fp = np.zeros(nd)
126 |     for d in range(nd):
127 |         #if d%100==0: print(d)
128 |         R = class_recs[image_ids[d]]
129 |         bb = BB[d,...].astype(float)
130 |         ovmax = -np.inf
131 |         BBGT = R['bbox'].astype(float)
132 | 
133 |         if BBGT.size > 0:
134 |             # compute overlaps
135 |             for j in range(BBGT.shape[0]):
136 |                 iou = get_iou_main(get_iou_func, (bb, BBGT[j,...]))
137 |                 if iou > ovmax:
138 |                     ovmax = iou
139 |                     jmax = j
140 | 
141 |         #print d, ovmax
142 |         if ovmax > ovthresh:
143 |             if not R['det'][jmax]:
144 |                 tp[d] = 1.
145 |                 R['det'][jmax] = 1
146 |             else:
147 |                 fp[d] = 1.
148 |         else:
149 |             fp[d] = 1.
150 | 
151 |     # compute precision recall
152 |     fp = np.cumsum(fp)
153 |     tp = np.cumsum(tp)
154 |     rec = tp / float(npos)
155 |     #print('NPOS: ', npos)
156 |     # avoid divide by zero in case the first detection matches a difficult
157 |     # ground truth
158 |     prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
159 |     ap = voc_ap(rec, prec, use_07_metric)
160 | 
161 |     return rec, prec, ap
162 | 
163 | def eval_det_cls_wrapper(arguments):
164 |     pred, gt, ovthresh, use_07_metric, get_iou_func = arguments
165 |     rec, prec, ap = eval_det_cls(pred, gt, ovthresh, use_07_metric, get_iou_func)
166 |     return (rec, prec, ap)
167 | 
168 | def eval_det(pred_all, gt_all, ovthresh=0.25, use_07_metric=False, get_iou_func=get_iou):
169 |     """ Generic functions to compute precision/recall for object detection
170 |         for multiple classes.
171 |         Input:
172 |             pred_all: map of {img_id: [(classname, bbox, score)]}
173 |             gt_all: map of {img_id: [(classname, bbox)]}
174 |             ovthresh: scalar, iou threshold
175 |             use_07_metric: bool, if true use VOC07 11 point method
176 |         Output:
177 |             rec: {classname: rec}
178 |             prec: {classname: prec_all}
179 |             ap: {classname: scalar}
180 |     """
181 |     pred = {} # map {classname: pred}
182 |     gt = {} # map {classname: gt}
183 |     for img_id in pred_all.keys():
184 |         for classname, bbox, score in pred_all[img_id]:
185 |             if classname not in pred: pred[classname] = {}
186 |             if img_id not in pred[classname]:
187 |                 pred[classname][img_id] = []
188 |             if classname not in gt: gt[classname] = {}
189 |             if img_id not in gt[classname]:
190 |                 gt[classname][img_id] = []
191 |             pred[classname][img_id].append((bbox,score))
192 |     for img_id in gt_all.keys():
193 |         for classname, bbox in gt_all[img_id]:
194 |             if classname not in gt: gt[classname] = {}
195 |             if img_id not in gt[classname]:
196 |                 gt[classname][img_id] = []
197 |             gt[classname][img_id].append(bbox)
198 | 
199 |     rec = {}
200 |     prec = {}
201 |     ap = {}
202 |     for classname in gt.keys():
203 |         print('Computing AP for class: ', classname)
204 |         rec[classname], prec[classname], ap[classname] = eval_det_cls(pred[classname], gt[classname], ovthresh, use_07_metric, get_iou_func)
205 |         print(classname, ap[classname])
206 |     
207 |     return rec, prec, ap 
208 | 
209 | from multiprocessing import Pool
210 | def eval_det_multiprocessing(pred_all, gt_all, ovthresh=0.25, use_07_metric=False, get_iou_func=get_iou):
211 |     """ Generic functions to compute precision/recall for object detection
212 |         for multiple classes.
213 |         Input:
214 |             pred_all: map of {img_id: [(classname, bbox, score)]}
215 |             gt_all: map of {img_id: [(classname, bbox)]}
216 |             ovthresh: scalar, iou threshold
217 |             use_07_metric: bool, if true use VOC07 11 point method
218 |         Output:
219 |             rec: {classname: rec}
220 |             prec: {classname: prec_all}
221 |             ap: {classname: scalar}
222 |     """
223 |     pred = {} # map {classname: pred}
224 |     gt = {} # map {classname: gt}
225 |     for img_id in pred_all.keys():
226 |         for classname, bbox, score in pred_all[img_id]:
227 |             if classname not in pred: pred[classname] = {}
228 |             if img_id not in pred[classname]:
229 |                 pred[classname][img_id] = []
230 |             if classname not in gt: gt[classname] = {}
231 |             if img_id not in gt[classname]:
232 |                 gt[classname][img_id] = []
233 |             pred[classname][img_id].append((bbox,score))
234 |     for img_id in gt_all.keys():
235 |         for classname, bbox in gt_all[img_id]:
236 |             if classname not in gt: gt[classname] = {}
237 |             if img_id not in gt[classname]:
238 |                 gt[classname][img_id] = []
239 |             gt[classname][img_id].append(bbox)
240 | 
241 |     rec = {}
242 |     prec = {}
243 |     ap = {}
244 |     p = Pool(processes=10)
245 |     ret_values = p.map(eval_det_cls_wrapper, [(pred[classname], gt[classname], ovthresh, use_07_metric, get_iou_func) for classname in gt.keys() if classname in pred])
246 |     p.close()
247 |     for i, classname in enumerate(gt.keys()):
248 |         if classname in pred:
249 |             rec[classname], prec[classname], ap[classname] = ret_values[i]
250 |         else:
251 |             rec[classname] = 0
252 |             prec[classname] = 0
253 |             ap[classname] = 0
254 |         print(classname, ap[classname])
255 |     
256 |     return rec, prec, ap 
257 | 


--------------------------------------------------------------------------------
/utils/metric_util.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | # 
  3 | # This source code is licensed under the MIT license found in the
  4 | # LICENSE file in the root directory of this source tree.
  5 | 
  6 | """ Utility functions for metric evaluation.
  7 | 
  8 | Author: Or Litany and Charles R. Qi
  9 | """
 10 | 
 11 | import os
 12 | import sys
 13 | import torch
 14 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 15 | sys.path.append(BASE_DIR)
 16 | 
 17 | import numpy as np
 18 | 
 19 | # Mesh IO
 20 | import trimesh
 21 | 
 22 |  
 23 | # ----------------------------------------
 24 | # Precision and Recall
 25 | # ----------------------------------------
 26 | 
 27 | def multi_scene_precision_recall(labels, pred, iou_thresh, conf_thresh, label_mask, pred_mask=None):
 28 |     '''
 29 |     Args:
 30 |         labels: (B, N, 6)
 31 |         pred: (B, M, 6)
 32 |         iou_thresh: scalar
 33 |         conf_thresh: scalar
 34 |         label_mask: (B, N,) with values in 0 or 1 to indicate which GT boxes to consider.
 35 |         pred_mask: (B, M,) with values in 0 or 1 to indicate which PRED boxes to consider.
 36 |     Returns:
 37 |         TP,FP,FN,Precision,Recall
 38 |     '''
 39 |     # Make sure the masks are not Torch tensor, otherwise the mask==1 returns uint8 array instead
 40 |     # of True/False array as in numpy
 41 |     assert(not torch.is_tensor(label_mask))
 42 |     assert(not torch.is_tensor(pred_mask))
 43 |     TP, FP, FN = 0, 0, 0
 44 |     if label_mask is None: label_mask = np.ones((labels.shape[0], labels.shape[1]))
 45 |     if pred_mask is None: pred_mask = np.ones((pred.shape[0], pred.shape[1]))
 46 |     for batch_idx in range(labels.shape[0]):
 47 |         TP_i, FP_i, FN_i = single_scene_precision_recall(labels[batch_idx, label_mask[batch_idx,:]==1, :],
 48 |                                                          pred[batch_idx, pred_mask[batch_idx,:]==1, :],
 49 |                                                          iou_thresh, conf_thresh)
 50 |         TP += TP_i
 51 |         FP += FP_i
 52 |         FN += FN_i
 53 |     
 54 |     return TP, FP, FN, precision_recall(TP, FP, FN)
 55 |       
 56 | 
 57 | def single_scene_precision_recall(labels, pred, iou_thresh, conf_thresh):
 58 |     """Compute P and R for predicted bounding boxes. Ignores classes!
 59 |     Args:
 60 |         labels: (N x bbox) ground-truth bounding boxes (6 dims) 
 61 |         pred: (M x (bbox + conf)) predicted bboxes with confidence and maybe classification
 62 |     Returns:
 63 |         TP, FP, FN
 64 |     """
 65 |     
 66 |     
 67 |     # for each pred box with high conf (C), compute IoU with all gt boxes. 
 68 |     # TP = number of times IoU > th ; FP = C - TP 
 69 |     # FN - number of scene objects without good match
 70 |     
 71 |     gt_bboxes = labels[:, :6]      
 72 |     
 73 |     num_scene_bboxes = gt_bboxes.shape[0]
 74 |     conf = pred[:, 6]    
 75 |         
 76 |     conf_pred_bbox = pred[np.where(conf > conf_thresh)[0], :6]
 77 |     num_conf_pred_bboxes = conf_pred_bbox.shape[0]
 78 |     
 79 |     # init an array to keep iou between generated and scene bboxes
 80 |     iou_arr = np.zeros([num_conf_pred_bboxes, num_scene_bboxes])    
 81 |     for g_idx in range(num_conf_pred_bboxes):
 82 |         for s_idx in range(num_scene_bboxes):            
 83 |             iou_arr[g_idx, s_idx] = calc_iou(conf_pred_bbox[g_idx ,:], gt_bboxes[s_idx, :])
 84 |     
 85 |     
 86 |     good_match_arr = (iou_arr >= iou_thresh)
 87 |             
 88 |     TP = good_match_arr.any(axis=1).sum()    
 89 |     FP = num_conf_pred_bboxes - TP        
 90 |     FN = num_scene_bboxes - good_match_arr.any(axis=0).sum()
 91 |     
 92 |     return TP, FP, FN
 93 |     
 94 | 
 95 | def precision_recall(TP, FP, FN):
 96 |     Prec = 1.0 * TP / (TP + FP) if TP+FP>0 else 0
 97 |     Rec = 1.0 * TP / (TP + FN)
 98 |     return Prec, Rec
 99 |     
100 | 
101 | def calc_iou(box_a, box_b):
102 |     """Computes IoU of two axis aligned bboxes.
103 |     Args:
104 |         box_a, box_b: 6D of center and lengths        
105 |     Returns:
106 |         iou
107 |     """        
108 |         
109 |     max_a = box_a[0:3] + box_a[3:6]/2
110 |     max_b = box_b[0:3] + box_b[3:6]/2    
111 |     min_max = np.array([max_a, max_b]).min(0)
112 |         
113 |     min_a = box_a[0:3] - box_a[3:6]/2
114 |     min_b = box_b[0:3] - box_b[3:6]/2
115 |     max_min = np.array([min_a, min_b]).max(0)
116 |     if not ((min_max > max_min).all()):
117 |         return 0.0
118 | 
119 |     intersection = (min_max - max_min).prod()
120 |     vol_a = box_a[3:6].prod()
121 |     vol_b = box_b[3:6].prod()
122 |     union = vol_a + vol_b - intersection
123 |     return 1.0*intersection / union
124 | 
125 | 
126 | if __name__ == '__main__':
127 |     print('running some tests')
128 |     
129 |     ############
130 |     ## Test IoU 
131 |     ############
132 |     box_a = np.array([0,0,0,1,1,1])
133 |     box_b = np.array([0,0,0,2,2,2])
134 |     expected_iou = 1.0/8
135 |     pred_iou = calc_iou(box_a, box_b)
136 |     assert expected_iou == pred_iou, 'function returned wrong IoU'
137 |     
138 |     box_a = np.array([0,0,0,1,1,1])
139 |     box_b = np.array([10,10,10,2,2,2])
140 |     expected_iou = 0.0
141 |     pred_iou = calc_iou(box_a, box_b)
142 |     assert expected_iou == pred_iou, 'function returned wrong IoU'
143 |     
144 |     print('IoU test -- PASSED')
145 |     
146 |     #########################
147 |     ## Test Precition Recall 
148 |     #########################
149 |     gt_boxes = np.array([[0,0,0,1,1,1],[3, 0, 1, 1, 10, 1]])
150 |     detected_boxes = np.array([[0,0,0,1,1,1, 1.0],[3, 0, 1, 1, 10, 1, 0.9]])
151 |     TP, FP, FN = single_scene_precision_recall(gt_boxes, detected_boxes, 0.5, 0.5)
152 |     assert TP == 2 and FP == 0 and FN == 0
153 |     assert precision_recall(TP, FP, FN) == (1, 1)
154 |     
155 |     detected_boxes = np.array([[0,0,0,1,1,1, 1.0]])
156 |     TP, FP, FN = single_scene_precision_recall(gt_boxes, detected_boxes, 0.5, 0.5)
157 |     assert TP == 1 and FP == 0 and FN == 1
158 |     assert precision_recall(TP, FP, FN) == (1, 0.5)
159 |     
160 |     detected_boxes = np.array([[0,0,0,1,1,1, 1.0], [-1,-1,0,0.1,0.1,1, 1.0]])
161 |     TP, FP, FN = single_scene_precision_recall(gt_boxes, detected_boxes, 0.5, 0.5)
162 |     assert TP == 1 and FP == 1 and FN == 1
163 |     assert precision_recall(TP, FP, FN) == (0.5, 0.5)
164 |     
165 |     # wrong box has low confidence
166 |     detected_boxes = np.array([[0,0,0,1,1,1, 1.0], [-1,-1,0,0.1,0.1,1, 0.1]])
167 |     TP, FP, FN = single_scene_precision_recall(gt_boxes, detected_boxes, 0.5, 0.5)
168 |     assert TP == 1 and FP == 0 and FN == 1
169 |     assert precision_recall(TP, FP, FN) == (1, 0.5)
170 |     
171 |     print('Precition Recall test -- PASSED')
172 |     
173 | 


--------------------------------------------------------------------------------
/utils/nms.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | # 
  3 | # This source code is licensed under the MIT license found in the
  4 | # LICENSE file in the root directory of this source tree.
  5 | 
  6 | import numpy as np
  7 | from pc_util import bbox_corner_dist_measure
  8 | 
  9 | # boxes are axis aigned 2D boxes of shape (n,5) in FLOAT numbers with (x1,y1,x2,y2,score)
 10 | ''' Ref: https://www.pyimagesearch.com/2015/02/16/faster-non-maximum-suppression-python/
 11 | Ref: https://github.com/vickyboy47/nms-python/blob/master/nms.py 
 12 | '''
 13 | def nms_2d(boxes, overlap_threshold):
 14 |     x1 = boxes[:,0]
 15 |     y1 = boxes[:,1]
 16 |     x2 = boxes[:,2]
 17 |     y2 = boxes[:,3]
 18 |     score = boxes[:,4]
 19 |     area = (x2-x1)*(y2-y1)
 20 | 
 21 |     I = np.argsort(score)
 22 |     pick = []
 23 |     while (I.size!=0):
 24 |         last = I.size
 25 |         i = I[-1]
 26 |         pick.append(i)
 27 |         suppress = [last-1]
 28 |         for pos in range(last-1):
 29 |             j = I[pos]
 30 |             xx1 = max(x1[i],x1[j])
 31 |             yy1 = max(y1[i],y1[j])
 32 |             xx2 = min(x2[i],x2[j])
 33 |             yy2 = min(y2[i],y2[j])
 34 |             w = xx2-xx1
 35 |             h = yy2-yy1
 36 |             if (w>0 and h>0):
 37 |                 o = w*h/area[j]
 38 |                 print('Overlap is', o)
 39 |                 if (o>overlap_threshold):
 40 |                     suppress.append(pos)
 41 |         I = np.delete(I,suppress)
 42 |     return pick
 43 | 
 44 | def nms_2d_faster(boxes, overlap_threshold, old_type=False):
 45 |     x1 = boxes[:,0]
 46 |     y1 = boxes[:,1]
 47 |     x2 = boxes[:,2]
 48 |     y2 = boxes[:,3]
 49 |     score = boxes[:,4]
 50 |     area = (x2-x1)*(y2-y1)
 51 | 
 52 |     I = np.argsort(score)
 53 |     pick = []
 54 |     while (I.size!=0):
 55 |         last = I.size
 56 |         i = I[-1]
 57 |         pick.append(i)
 58 | 
 59 |         xx1 = np.maximum(x1[i], x1[I[:last-1]])
 60 |         yy1 = np.maximum(y1[i], y1[I[:last-1]])
 61 |         xx2 = np.minimum(x2[i], x2[I[:last-1]])
 62 |         yy2 = np.minimum(y2[i], y2[I[:last-1]])
 63 | 
 64 |         w = np.maximum(0, xx2-xx1)
 65 |         h = np.maximum(0, yy2-yy1)
 66 | 
 67 |         if old_type:
 68 |             o = (w*h)/area[I[:last-1]]
 69 |         else:
 70 |             inter = w*h
 71 |             o = inter / (area[i] + area[I[:last-1]] - inter)
 72 | 
 73 |         I = np.delete(I, np.concatenate(([last-1], np.where(o>overlap_threshold)[0])))
 74 | 
 75 |     return pick
 76 | 
 77 | def nms_3d_faster(boxes, overlap_threshold, old_type=False):
 78 |     x1 = boxes[:,0]
 79 |     y1 = boxes[:,1]
 80 |     z1 = boxes[:,2]
 81 |     x2 = boxes[:,3]
 82 |     y2 = boxes[:,4]
 83 |     z2 = boxes[:,5]
 84 |     score = boxes[:,6]
 85 |     area = (x2-x1)*(y2-y1)*(z2-z1)
 86 | 
 87 |     I = np.argsort(score)
 88 |     pick = []
 89 |     while (I.size!=0):
 90 |         last = I.size
 91 |         i = I[-1]
 92 |         pick.append(i)
 93 | 
 94 |         xx1 = np.maximum(x1[i], x1[I[:last-1]])
 95 |         yy1 = np.maximum(y1[i], y1[I[:last-1]])
 96 |         zz1 = np.maximum(z1[i], z1[I[:last-1]])
 97 |         xx2 = np.minimum(x2[i], x2[I[:last-1]])
 98 |         yy2 = np.minimum(y2[i], y2[I[:last-1]])
 99 |         zz2 = np.minimum(z2[i], z2[I[:last-1]])
100 | 
101 |         l = np.maximum(0, xx2-xx1)
102 |         w = np.maximum(0, yy2-yy1)
103 |         h = np.maximum(0, zz2-zz1)
104 | 
105 |         if old_type:
106 |             o = (l*w*h)/area[I[:last-1]]
107 |         else:
108 |             inter = l*w*h
109 |             o = inter / (area[i] + area[I[:last-1]] - inter)
110 | 
111 |         I = np.delete(I, np.concatenate(([last-1], np.where(o>overlap_threshold)[0])))
112 | 
113 |     return pick
114 | 
115 | def nms_3d_faster_samecls(boxes, overlap_threshold, old_type=False):
116 |     x1 = boxes[:,0]
117 |     y1 = boxes[:,1]
118 |     z1 = boxes[:,2]
119 |     x2 = boxes[:,3]
120 |     y2 = boxes[:,4]
121 |     z2 = boxes[:,5]
122 |     score = boxes[:,6]
123 |     cls = boxes[:,7]
124 |     area = (x2-x1)*(y2-y1)*(z2-z1)
125 | 
126 |     I = np.argsort(score)
127 |     pick = []
128 |     while (I.size!=0):
129 |         last = I.size
130 |         i = I[-1]
131 |         pick.append(i)
132 | 
133 |         xx1 = np.maximum(x1[i], x1[I[:last-1]])
134 |         yy1 = np.maximum(y1[i], y1[I[:last-1]])
135 |         zz1 = np.maximum(z1[i], z1[I[:last-1]])
136 |         xx2 = np.minimum(x2[i], x2[I[:last-1]])
137 |         yy2 = np.minimum(y2[i], y2[I[:last-1]])
138 |         zz2 = np.minimum(z2[i], z2[I[:last-1]])
139 |         cls1 = cls[i]
140 |         cls2 = cls[I[:last-1]]
141 | 
142 |         l = np.maximum(0, xx2-xx1)
143 |         w = np.maximum(0, yy2-yy1)
144 |         h = np.maximum(0, zz2-zz1)
145 | 
146 |         if old_type:
147 |             o = (l*w*h)/area[I[:last-1]]
148 |         else:
149 |             inter = l*w*h
150 |             o = inter / (area[i] + area[I[:last-1]] - inter)
151 |         o = o * (cls1==cls2)
152 | 
153 |         I = np.delete(I, np.concatenate(([last-1], np.where(o>overlap_threshold)[0])))
154 | 
155 |     return pick
156 | 
157 | 
158 | def nms_crnr_dist(boxes, conf, overlap_threshold):
159 |         
160 |     I = np.argsort(conf)
161 |     pick = []
162 |     while (I.size!=0):
163 |         last = I.size
164 |         i = I[-1]
165 |         pick.append(i)        
166 |         
167 |         scores = []
168 |         for ind in I[:-1]:
169 |             scores.append(bbox_corner_dist_measure(boxes[i,:], boxes[ind, :]))
170 | 
171 |         I = np.delete(I, np.concatenate(([last-1], np.where(np.array(scores)>overlap_threshold)[0])))
172 | 
173 |     return pick
174 | 
175 | if __name__=='__main__':
176 |     a = np.random.random((100,5))
177 |     print(nms_2d(a,0.9))
178 |     print(nms_2d_faster(a,0.9))
179 | 


--------------------------------------------------------------------------------
/utils/nn_distance.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | """ Chamfer distance in Pytorch.
 7 | Author: Charles R. Qi
 8 | """
 9 | 
10 | import torch
11 | import torch.nn as nn
12 | import numpy as np
13 | 
14 | 
15 | def huber_loss(error, delta=1.0):
16 |     """
17 |     Args:
18 |         error: Torch tensor (d1,d2,...,dk)
19 |     Returns:
20 |         loss: Torch tensor (d1,d2,...,dk)
21 | 
22 |     x = error = pred - gt or dist(pred,gt)
23 |     0.5 * |x|^2                 if |x|<=d
24 |     0.5 * d^2 + d * (|x|-d)     if |x|>d
25 |     Ref: https://github.com/charlesq34/frustum-pointnets/blob/master/models/model_util.py
26 |     """
27 |     abs_error = torch.abs(error)
28 |     #quadratic = torch.min(abs_error, torch.FloatTensor([delta]))
29 |     quadratic = torch.clamp(abs_error, max=delta)
30 |     linear = (abs_error - quadratic)
31 |     loss = 0.5 * quadratic**2 + delta * linear
32 |     return loss
33 | 
34 | def nn_distance(pc1, pc2, l1smooth=False, delta=1.0, l1=False):
35 |     """
36 |     Input:
37 |         pc1: (B,N,C) torch tensor
38 |         pc2: (B,M,C) torch tensor
39 |         l1smooth: bool, whether to use l1smooth loss
40 |         delta: scalar, the delta used in l1smooth loss
41 |     Output:
42 |         dist1: (B,N) torch float32 tensor
43 |         idx1: (B,N) torch int64 tensor
44 |         dist2: (B,M) torch float32 tensor
45 |         idx2: (B,M) torch int64 tensor
46 |     """
47 |     N = pc1.shape[1]
48 |     M = pc2.shape[1]
49 |     pc1_expand_tile = pc1.unsqueeze(2).repeat(1,1,M,1)
50 |     pc2_expand_tile = pc2.unsqueeze(1).repeat(1,N,1,1)
51 |     pc_diff = pc1_expand_tile - pc2_expand_tile
52 |     
53 |     if l1smooth:
54 |         pc_dist = torch.sum(huber_loss(pc_diff, delta), dim=-1) # (B,N,M)
55 |     elif l1:
56 |         pc_dist = torch.sum(torch.abs(pc_diff), dim=-1) # (B,N,M)
57 |     else:
58 |         pc_dist = torch.sum(pc_diff**2, dim=-1) # (B,N,M)
59 |     dist1, idx1 = torch.min(pc_dist, dim=2) # (B,N)
60 |     dist2, idx2 = torch.min(pc_dist, dim=1) # (B,M)
61 |     return dist1, idx1, dist2, idx2
62 | 
63 | def demo_nn_distance():
64 |     np.random.seed(0)
65 |     pc1arr = np.random.random((1,5,3))
66 |     pc2arr = np.random.random((1,6,3))
67 |     pc1 = torch.from_numpy(pc1arr.astype(np.float32))
68 |     pc2 = torch.from_numpy(pc2arr.astype(np.float32))
69 |     dist1, idx1, dist2, idx2 = nn_distance(pc1, pc2)
70 |     print(dist1)
71 |     print(idx1)
72 |     dist = np.zeros((5,6))
73 |     for i in range(5):
74 |         for j in range(6):
75 |             dist[i,j] = np.sum((pc1arr[0,i,:] - pc2arr[0,j,:]) ** 2)
76 |     print(dist)
77 |     print('-'*30)
78 |     print('L1smooth dists:')
79 |     dist1, idx1, dist2, idx2 = nn_distance(pc1, pc2, True)
80 |     print(dist1)
81 |     print(idx1)
82 |     dist = np.zeros((5,6))
83 |     for i in range(5):
84 |         for j in range(6):
85 |             error = np.abs(pc1arr[0,i,:] - pc2arr[0,j,:])
86 |             quad = np.minimum(error, 1.0)
87 |             linear = error - quad
88 |             loss = 0.5*quad**2 + 1.0*linear
89 |             dist[i,j] = np.sum(loss)
90 |     print(dist)
91 | 
92 | 
93 | if __name__ == '__main__':
94 |     demo_nn_distance()
95 | 


--------------------------------------------------------------------------------
/utils/tf_logger.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | import tensorflow as tf
 7 | import numpy as np
 8 | import scipy.misc 
 9 | try:
10 |     from StringIO import StringIO  # Python 2.7
11 | except ImportError:
12 |     from io import BytesIO         # Python 3.x
13 | 
14 | 
15 | class Logger(object):
16 |     
17 |     def __init__(self, log_dir):
18 |         """Create a summary writer logging to log_dir."""
19 |         self.writer = tf.summary.FileWriter(log_dir)
20 | 
21 |     def scalar_summary(self, tag, value, step):
22 |         """Log a scalar variable."""
23 |         summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)])
24 |         self.writer.add_summary(summary, step)
25 | 
26 |     def image_summary(self, tag, images, step):
27 |         """Log a list of images."""
28 | 
29 |         img_summaries = []
30 |         for i, img in enumerate(images):
31 |             # Write the image to a string
32 |             try:
33 |                 s = StringIO()
34 |             except:
35 |                 s = BytesIO()
36 |             scipy.misc.toimage(img).save(s, format="png")
37 | 
38 |             # Create an Image object
39 |             img_sum = tf.Summary.Image(encoded_image_string=s.getvalue(),
40 |                                        height=img.shape[0],
41 |                                        width=img.shape[1])
42 |             # Create a Summary value
43 |             img_summaries.append(tf.Summary.Value(tag='%s/%d' % (tag, i), image=img_sum))
44 | 
45 |         # Create and write Summary
46 |         summary = tf.Summary(value=img_summaries)
47 |         self.writer.add_summary(summary, step)
48 |         
49 |     def histo_summary(self, tag, values, step, bins=1000):
50 |         """Log a histogram of the tensor of values."""
51 | 
52 |         # Create a histogram using numpy
53 |         counts, bin_edges = np.histogram(values, bins=bins)
54 | 
55 |         # Fill the fields of the histogram proto
56 |         hist = tf.HistogramProto()
57 |         hist.min = float(np.min(values))
58 |         hist.max = float(np.max(values))
59 |         hist.num = int(np.prod(values.shape))
60 |         hist.sum = float(np.sum(values))
61 |         hist.sum_squares = float(np.sum(values**2))
62 | 
63 |         # Drop the start of the first bin
64 |         bin_edges = bin_edges[1:]
65 | 
66 |         # Add bin edges and counts
67 |         for edge in bin_edges:
68 |             hist.bucket_limit.append(edge)
69 |         for c in counts:
70 |             hist.bucket.append(c)
71 | 
72 |         # Create and write Summary
73 |         summary = tf.Summary(value=[tf.Summary.Value(tag=tag, histo=hist)])
74 |         self.writer.add_summary(summary, step)
75 |         self.writer.flush()
76 | 


--------------------------------------------------------------------------------
/utils/tf_visualizer.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | '''Code adapted from https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix'''
 7 | import os
 8 | import time
 9 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
10 | import sys
11 | sys.path.append(BASE_DIR)
12 | import tf_logger
13 | 
14 | 
15 | class Visualizer():
16 |     def __init__(self, opt, name='train'):
17 |         # self.opt = opt
18 |         #self.logger = tf_logger.Logger(os.path.join(opt.logging_dir, opt.name))
19 |         #self.log_name = os.path.join(opt.checkpoint_dir, opt.name, 'loss_log.txt')
20 |         self.logger = tf_logger.Logger(os.path.join(opt.log_dir, name))
21 |         self.log_name = os.path.join(opt.log_dir, 'tf_visualizer_log.txt')
22 |         with open(self.log_name, "a") as log_file:
23 |             now = time.strftime("%c")
24 |             log_file.write('================ Training Loss (%s) ================\n' % now)
25 | 
26 |     # |visuals|: dictionary of images to save
27 |     def log_images(self, visuals, step):
28 |             for label, image_numpy in visuals.items():
29 |                 self.logger.image_summary(
30 |                     label, [image_numpy], step)
31 | 
32 |     # scalars: dictionary of scalar labels and values
33 |     def log_scalars(self, scalars, step):
34 |         for label, val in scalars.items():
35 |             self.logger.scalar_summary(label, val, step)
36 | 
37 |     # scatter plots
38 |     def plot_current_points(self, points, disp_offset=10):
39 |         pass
40 | 
41 |     # scalars: same format as |scalars| of plot_current_scalars
42 |     def print_current_scalars(self, epoch, i, scalars):
43 |         message = '(epoch: %d, iters: %d) ' % (epoch, i)
44 |         for k, v in scalars.items():
45 |             message += '%s: %.3f ' % (k, v)
46 | 
47 |         print(message)
48 |         with open(self.log_name, "a") as log_file:
49 |             log_file.write('%s\n' % message)
50 | 


--------------------------------------------------------------------------------