├── 3D_Object_Detection.ipynb ├── CONTRIBUTING.md ├── Deep Learning Approach To RGB-D Object Detection.pdf ├── LICENSE ├── README.md ├── checkpoint_sunrgbd.tar ├── conversion_point_cloud.py ├── demo_files ├── input_pc_sunrgbd.ply └── sunrgbd_results │ ├── 000000_aggregated_vote_pc.ply │ ├── 000000_confident_proposal_pc.ply │ ├── 000000_pc.ply │ ├── 000000_pred_bbox.ply │ ├── 000000_pred_confident_bbox.ply │ ├── 000000_pred_confident_nms_bbox.ply │ ├── 000000_pred_map_cls.txt │ ├── 000000_pred_nms_bbox.ply │ ├── 000000_proposal_pc.ply │ ├── 000000_seed_pc.ply │ └── 000000_vgen_pc.ply ├── eval.py ├── log ├── checkpoint.tar ├── log_train.txt ├── tf_visualizer_log.txt └── train │ └── events.out.tfevents.1598432199.2a9a360a68aa ├── log_sunrgbd ├── log_train.txt ├── test │ ├── events.out.tfevents.1597829517.29e4cd465605 │ └── events.out.tfevents.1597855641.13c5136526d7 ├── tf_visualizer_log.txt └── train │ ├── events.out.tfevents.1597776640.e3d3ae681f94 │ ├── events.out.tfevents.1597821535.29e4cd465605 │ ├── events.out.tfevents.1597841030.13c5136526d7 │ └── events.out.tfevents.1598298644.a8025551b9df ├── models ├── __pycache__ │ ├── ap_helper.cpython-35.pyc │ ├── ap_helper.cpython-36.pyc │ ├── backbone_module.cpython-35.pyc │ ├── backbone_module.cpython-36.pyc │ ├── detectnet.cpython-35.pyc │ ├── detectnet.cpython-36.pyc │ ├── dump_helper.cpython-35.pyc │ ├── dump_helper.cpython-36.pyc │ ├── loss_helper.cpython-35.pyc │ ├── loss_helper.cpython-36.pyc │ ├── proposal_module.cpython-35.pyc │ ├── proposal_module.cpython-36.pyc │ ├── results_save.cpython-36.pyc │ ├── votenet.cpython-35.pyc │ ├── voting.cpython-36.pyc │ ├── voting_module.cpython-35.pyc │ └── voting_module.cpython-36.pyc ├── ap_helper.py ├── backbone_module.py ├── detectnet.py ├── loss_helper.py ├── loss_helper_boxnet.py ├── proposal_module.py ├── results_save.py ├── voting.py └── voting_module.py ├── pointnet2 ├── __pycache__ │ ├── pointnet2_modules.cpython-35.pyc │ ├── pointnet2_modules.cpython-36.pyc │ ├── pointnet2_utils.cpython-35.pyc │ ├── pointnet2_utils.cpython-36.pyc │ ├── pytorch_utils.cpython-35.pyc │ └── pytorch_utils.cpython-36.pyc ├── _ext_src │ ├── include │ │ ├── ball_query.h │ │ ├── cuda_utils.h │ │ ├── group_points.h │ │ ├── interpolate.h │ │ ├── sampling.h │ │ └── utils.h │ └── src │ │ ├── ball_query.cpp │ │ ├── ball_query_gpu.cu │ │ ├── bindings.cpp │ │ ├── group_points.cpp │ │ ├── group_points_gpu.cu │ │ ├── interpolate.cpp │ │ ├── interpolate_gpu.cu │ │ ├── sampling.cpp │ │ └── sampling_gpu.cu ├── build │ ├── lib.linux-x86_64-3.6 │ │ └── pointnet2 │ │ │ └── _ext.cpython-36m-x86_64-linux-gnu.so │ └── temp.linux-x86_64-3.6 │ │ └── _ext_src │ │ └── src │ │ ├── ball_query.o │ │ ├── ball_query_gpu.o │ │ ├── bindings.o │ │ ├── group_points.o │ │ ├── group_points_gpu.o │ │ ├── interpolate.o │ │ ├── interpolate_gpu.o │ │ ├── sampling.o │ │ └── sampling_gpu.o ├── dist │ └── pointnet2-0.0.0-py3.6-linux-x86_64.egg ├── pointnet2.egg-info │ ├── PKG-INFO │ ├── SOURCES.txt │ ├── dependency_links.txt │ └── top_level.txt ├── pointnet2_modules.py ├── pointnet2_test.py ├── pointnet2_utils.py ├── pytorch_utils.py └── setup.py ├── sunrgbd ├── drive-download-20200818T081036Z-001.zip ├── drive-download-20200818T081133Z-001.zip ├── model_util_sunrgbd.py ├── sunrgbd_data.py ├── sunrgbd_detection_dataset.py └── sunrgbd_utils.py ├── test_run.py ├── train.py ├── utils ├── __pycache__ │ ├── box_util.cpython-35.pyc │ ├── box_util.cpython-36.pyc │ ├── eval_det.cpython-35.pyc │ ├── eval_det.cpython-36.pyc │ ├── metric_util.cpython-35.pyc │ ├── metric_util.cpython-36.pyc │ ├── nms.cpython-36.pyc │ ├── nn_distance.cpython-35.pyc │ ├── nn_distance.cpython-36.pyc │ ├── pc_util.cpython-35.pyc │ ├── pc_util.cpython-36.pyc │ ├── tf_logger.cpython-35.pyc │ ├── tf_logger.cpython-36.pyc │ ├── tf_visualizer.cpython-35.pyc │ └── tf_visualizer.cpython-36.pyc ├── box_util.py ├── eval_det.py ├── metric_util.py ├── nms.py ├── nn_distance.py ├── pc_util.py ├── tf_logger.py └── tf_visualizer.py └── visualize_ply.py /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to votenet 2 | We want to make contributing to this project as easy and transparent as 3 | possible. 4 | 5 | ## Pull Requests 6 | We actively welcome your pull requests. 7 | 8 | 1. Fork the repo and create your branch from `master`. 9 | 2. If you've added code that should be tested, add tests. 10 | 3. If you've changed APIs, update the documentation. 11 | 4. Ensure the test suite passes. 12 | 5. Make sure your code lints. 13 | 6. If you haven't already, complete the Contributor License Agreement ("CLA"). 14 | 15 | ## Contributor License Agreement ("CLA") 16 | In order to accept your pull request, we need you to submit a CLA. You only need 17 | to do this once to work on any of Facebook's open source projects. 18 | 19 | Complete your CLA here: 20 | 21 | ## Issues 22 | We use GitHub issues to track public bugs. Please ensure your description is 23 | clear and has sufficient instructions to be able to reproduce the issue. 24 | 25 | Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe 26 | disclosure of security bugs. In those cases, please go through the process 27 | outlined on that page and do not file a public issue. 28 | 29 | ## License 30 | By contributing to votenet, you agree that your contributions will be licensed 31 | under the LICENSE file in the root directory of this source tree. 32 | -------------------------------------------------------------------------------- /Deep Learning Approach To RGB-D Object Detection.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/Deep Learning Approach To RGB-D Object Detection.pdf -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Facebook, Inc. and its affiliates. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 3D-object-detection 2 | 1. I have used preprocessed data from https://drive.google.com/file/d/1P_uFQcvVFf10TLxjIaFMfjto8ZHON-N2/view?usp=sharing 3 | which contains preprocessed training and validation data. 4 | 5 | 2. This is put to training using train.py 6 | 7 | 3. A trained model is saved and after evaluation, eval_sunrgbd folder contains results on validation data. 8 | You have to use 3 files from eval_sunrgbd folder from drive 9 | • **_pc.ply (It contains point clouds of input ) 10 | • *_pred_confident_nms_bbox.ply(It contains predicted bounding boxes) 11 | • *_pred_map_cls.txt (It contains information of class the output belongs to) 12 | Suppose in *_pred_map_cls.txt, it is written. 13 | 3 14 | 0--------0.9844583 15 | 3 16 | 0.35467-------277 17 | 1 18 | 3.55---0.45 19 | here, 3,3 and 1 are class names from ['bed','table','sofa','chair','toilet','desk','dresser','night_stand','bookshelf','bathtub'] 20 | 21 | which means 3 is chair and 1 is table etc 22 | 23 | 4. Output is always a ply file and we can visualize it in Meshlab software. 24 | 25 | 5. test_run.py is set to take input stream of ply files from a folder and save corresponding results in a directory. 26 | 27 | 7. Google Drive containing all my project files used in google colab 28 | https://drive.google.com/drive/folders/1ScYig5Jx61cnWL7RnpE5L3qQyNgA_OiR?usp=sharing 29 | 30 | training and validation datasets can be downloaded from the google drive located in 31 | /sunrgbd/sunrgbd_pc_bbox_votes_50k_v1_train and /sunrgbd/sunrgbd_pc_bbox_votes_50k_v1_val 32 | and place them in your sunrgbd folder to run a your host computer. 33 | 34 | 35 | steps on google colab available in 3D_Object_Detection.ipynb 36 | 1. download and install cuda 10 37 | 38 | 2. go to pointnet2 directory 39 | 40 | 3. CUDA 10 Layers were compiled for the backbone network PointNet++ 41 | !python setup.py install 42 | 43 | 3. then go to /3d-object-detection folder and train model using this command 44 | !python train.py --log_dir log 45 | 46 | 4. test and evaluate with the checkpoint_sunrgbd.tar 47 | !python eval.py 48 | 49 | you can also run demo by 50 | !python demo.py 51 | 52 | Please Refer to the project paper here 53 | https://github.com/tayoshittu/3D-Object-Detection/blob/main/Deep%20Learning%20Approach%20To%20RGB-D%20Object%20Detection.pdf 54 | -------------------------------------------------------------------------------- /checkpoint_sunrgbd.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/checkpoint_sunrgbd.tar -------------------------------------------------------------------------------- /conversion_point_cloud.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import numpy as np 4 | from PIL import Image 5 | import imageio 6 | import OpenEXR 7 | import struct 8 | import os 9 | 10 | def get_pointcloud(color_image,depth_image,camera_intrinsics): 11 | """ creates 3D point cloud of rgb images by taking depth information 12 | 13 | input : color image: numpy array[h,w,c], dtype= uint8 14 | depth image: numpy array[h,w] values of all channels will be same 15 | 16 | output : camera_points, color_points - both of shape(no. of pixels, 3) 17 | """ 18 | 19 | image_height = depth_image.shape[0] 20 | image_width = depth_image.shape[1] 21 | pixel_x,pixel_y = np.meshgrid(np.linspace(0,image_width-1,image_width), 22 | np.linspace(0,image_height-1,image_height)) 23 | camera_points_x = np.multiply(pixel_x-camera_intrinsics[0,2],depth_image/camera_intrinsics[0,0]) 24 | camera_points_y = np.multiply(pixel_y-camera_intrinsics[1,2],depth_image/camera_intrinsics[1,1]) 25 | camera_points_z = depth_image 26 | camera_points = np.array([camera_points_x,camera_points_y,camera_points_z]).transpose(1,2,0).reshape(-1,3) 27 | 28 | color_points = color_image.reshape(-1,3) 29 | 30 | # Remove invalid 3D points (where depth == 0) 31 | valid_depth_ind = np.where(depth_image.flatten() > 0)[0] 32 | camera_points = camera_points[valid_depth_ind,:] 33 | color_points = color_points[valid_depth_ind,:] 34 | print(camera_points) 35 | return camera_points,color_points 36 | 37 | def write_pointcloud(filename,xyz_points,rgb_points=None): 38 | 39 | """ creates a .pkl file of the point clouds generated 40 | 41 | """ 42 | 43 | assert xyz_points.shape[1] == 3,'Input XYZ points should be Nx3 float array' 44 | if rgb_points is None: 45 | rgb_points = np.ones(xyz_points.shape).astype(np.uint8)*255 46 | assert xyz_points.shape == rgb_points.shape,'Input RGB colors should be Nx3 float array and have same size as input XYZ points' 47 | 48 | # Write header of .ply file 49 | fid = open(filename,'wb') 50 | fid.write(bytes('ply\n', 'utf-8')) 51 | fid.write(bytes('format binary_little_endian 1.0\n', 'utf-8')) 52 | fid.write(bytes('element vertex %d\n'%xyz_points.shape[0], 'utf-8')) 53 | fid.write(bytes('property float x\n', 'utf-8')) 54 | fid.write(bytes('property float y\n', 'utf-8')) 55 | fid.write(bytes('property float z\n', 'utf-8')) 56 | fid.write(bytes('property uchar red\n', 'utf-8')) 57 | fid.write(bytes('property uchar green\n', 'utf-8')) 58 | fid.write(bytes('property uchar blue\n', 'utf-8')) 59 | fid.write(bytes('end_header\n', 'utf-8')) 60 | # Write 3D points to .ply file 61 | for i in range(xyz_points.shape[0]): 62 | fid.write(bytearray(struct.pack("fffccc", xyz_points[i, 0], xyz_points[i, 1], xyz_points[i, 2], 63 | rgb_points[i, 0].tostring(), rgb_points[i, 1].tostring(), 64 | rgb_points[i, 2].tostring()))) 65 | fid.close() 66 | 67 | 68 | 69 | 70 | ############################################################ 71 | # Main 72 | ############################################################ 73 | 74 | if __name__ == '__main__': 75 | import argparse 76 | 77 | # Parse command line arguments 78 | parser = argparse.ArgumentParser( 79 | description='create point cloud from depth and rgb image.') 80 | parser.add_argument('--rgb_filename', required=False,default='/media/zirsha/New Volume/votenet-master/depth/table_chair_rgb.png', 81 | help='path to the rgb image') 82 | parser.add_argument('--depth_filename', required=False,default='/media/zirsha/New Volume/votenet-master/depth/table_chair.png', 83 | help="path to the depth image ") 84 | parser.add_argument('--output_directory', required=False,default='sky', 85 | help="directory to save the point cloud file") 86 | parser.add_argument('--fx', required=False, type=float,default='1.5', 87 | help="focal length along x-axis (longer side) in pixels") 88 | parser.add_argument('--fy', required=False, type=float,default='2.1', 89 | help="focal length along y-axis (shorter side) in pixels") 90 | parser.add_argument('--cx', required=False, type=float,default='3.2', 91 | help="centre of image along x-axis") 92 | parser.add_argument('--cy', required=False, type=float,default='2.3', 93 | help="centre of image along y-axis") 94 | 95 | args = parser.parse_args() 96 | 97 | color_data = imageio.imread(args.rgb_filename) 98 | # color_data = np.asarray(im_color, dtype = "uint8") 99 | 100 | if os.path.splitext(os.path.basename(args.depth_filename))[1] == '.npy': 101 | depth_data = np.load(args.depth_filename) 102 | else: 103 | im_depth = imageio.imread(args.depth_filename) 104 | depth_data = im_depth[:,:,0] # values of all channels are equal 105 | 106 | 107 | # camera_intrinsics = [[fx 0 cx], 108 | # [0 fy cy], 109 | # [0 0 1]] 110 | args.fx=550.0 111 | args.cx=150.0 112 | args.fy=401.0 113 | args.cy=150.0 114 | camera_intrinsics = np.asarray([[args.fx, 0, args.cx], [0, args.fy, args.cy], [0, 0, 1]]) 115 | 116 | filename = os.path.basename(args.rgb_filename)[:9] + '-pointCloud.ply' 117 | output_filename = os.path.join(args.output_directory, filename) 118 | 119 | print("Creating the point Cloud file at : ", output_filename ) 120 | camera_points, color_points = get_pointcloud(color_data, depth_data, camera_intrinsics) 121 | 122 | write_pointcloud(output_filename, camera_points) 123 | -------------------------------------------------------------------------------- /demo_files/sunrgbd_results/000000_confident_proposal_pc.ply: -------------------------------------------------------------------------------- 1 | ply 2 | format ascii 1.0 3 | element vertex 7 4 | comment vertices 5 | property float x 6 | property float y 7 | property float z 8 | end_header 9 | 0.956275641918182373 3.91149473190307617 -0.67634814977645874 10 | 1.01935124397277832 4.07627439498901367 -0.612929821014404297 11 | 1.03549671173095703 4.04654788970947266 -0.698067724704742432 12 | 1.17398190498352051 4.04927158355712891 -0.570743322372436523 13 | 1.04493403434753418 3.9962763786315918 -0.691671848297119141 14 | 1.06223893165588379 4.02136898040771484 -0.622945129871368408 15 | 1.05127692222595215 4.0289306640625 -0.635803282260894775 16 | -------------------------------------------------------------------------------- /demo_files/sunrgbd_results/000000_pred_bbox.ply: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/demo_files/sunrgbd_results/000000_pred_bbox.ply -------------------------------------------------------------------------------- /demo_files/sunrgbd_results/000000_pred_confident_bbox.ply: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/demo_files/sunrgbd_results/000000_pred_confident_bbox.ply -------------------------------------------------------------------------------- /demo_files/sunrgbd_results/000000_pred_confident_nms_bbox.ply: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/demo_files/sunrgbd_results/000000_pred_confident_nms_bbox.ply -------------------------------------------------------------------------------- /demo_files/sunrgbd_results/000000_pred_map_cls.txt: -------------------------------------------------------------------------------- 1 | 0 1.755361943906632,1.2037526707277297,2.833021677387803,-0.0540469532732184,1.2037526707277297,3.1092142208586964,0.2833405440389247,1.2037526707277297,5.319527112590224,2.092749441218775,1.2037526707277297,5.043334569119331,1.755361943906632,0.02210697130107886,2.833021677387803,-0.0540469532732184,0.02210697130107886,3.1092142208586964,0.2833405440389247,0.02210697130107886,5.319527112590224,2.092749441218775,0.02210697130107886,5.043334569119331 0.65244097 2 | -------------------------------------------------------------------------------- /demo_files/sunrgbd_results/000000_pred_nms_bbox.ply: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/demo_files/sunrgbd_results/000000_pred_nms_bbox.ply -------------------------------------------------------------------------------- /eval.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | """ Evaluation routine for 3D object detection with SUN RGB-D and ScanNet. 7 | """ 8 | # --dataset sunrgbd --checkpoint_path log_sunrgbd/checkpoint.tar --dump_dir eval_sunrgbd --cluster_sampling seed_fps --use_3d_nms --use_cls_nms --per_class_proposal 9 | import os 10 | import sys 11 | import numpy as np 12 | from datetime import datetime 13 | import argparse 14 | import importlib 15 | import torch 16 | import torch.nn as nn 17 | import torch.optim as optim 18 | from torch.utils.data import DataLoader 19 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 20 | ROOT_DIR = BASE_DIR 21 | sys.path.append(os.path.join(ROOT_DIR, 'models')) 22 | from ap_helper import AP_Measurement, parse_predictions, parse_groundtruths 23 | from results_save import save_results 24 | parser = argparse.ArgumentParser() 25 | parser.add_argument('--model', default='detectnet', help='Model file name [default: votenet]') 26 | parser.add_argument('--dataset', default='sunrgbd', help='Dataset name. sunrgbd or scannet. [default: sunrgbd]') 27 | parser.add_argument('--checkpoint_path', default='checkpoint_sunrgbd.tar', help='Model checkpoint path [default: None]') 28 | parser.add_argument('--dump_dir', default='eval_sunrgbd', help='Dump dir to save sample outputs [default: None]') 29 | parser.add_argument('--num_point', type=int, default=20000, help='Point Number [default: 20000]') 30 | parser.add_argument('--num_target', type=int, default=256, help='Point Number [default: 256]') 31 | parser.add_argument('--batch_size', type=int, default=8, help='Batch Size during training [default: 8]') 32 | parser.add_argument('--vote_factor', type=int, default=1, help='Number of votes generated from each seed [default: 1]') 33 | parser.add_argument('--cluster_sampling', default='seed_fps', help='Sampling strategy for vote clusters: vote_fps, seed_fps, random [default: vote_fps]') 34 | parser.add_argument('--ap_iou_thresholds', default='0.25,0.5', help='A list of AP IoU thresholds [default: 0.25,0.5]') 35 | parser.add_argument('--no_height', action='store_true', help='Do NOT use height signal in input.') 36 | parser.add_argument('--use_color', action='store_true', help='Use RGB color in input.') 37 | parser.add_argument('--use_sunrgbd_v2', action='store_true', help='Use SUN RGB-D V2 box labels.') 38 | parser.add_argument('--use_3d_nms', action='store_true', help='Use 3D NMS instead of 2D NMS.') 39 | parser.add_argument('--use_cls_nms', action='store_true', help='Use per class NMS.') 40 | parser.add_argument('--use_old_type_nms', action='store_true', help='Use old type of NMS, IoBox2Area.') 41 | parser.add_argument('--per_class_proposal', action='store_true', help='Duplicate each proposal num_class times.') 42 | parser.add_argument('--nms_iou', type=float, default=0.25, help='NMS IoU threshold. [default: 0.25]') 43 | parser.add_argument('--conf_thresh', type=float, default=0.05, help='Filter out predictions with obj prob less than it. [default: 0.05]') 44 | parser.add_argument('--faster_eval', action='store_true', help='Faster evaluation by skippling empty bounding box removal.') 45 | parser.add_argument('--shuffle_dataset', action='store_true', help='Shuffle the dataset (random order).') 46 | FLAGS = parser.parse_args() 47 | 48 | if FLAGS.use_cls_nms: 49 | assert(FLAGS.use_3d_nms) 50 | 51 | # ------------------------------------------------------------------------- GLOBAL CONFIG BEG 52 | BATCH_SIZE = FLAGS.batch_size 53 | NUM_POINT = FLAGS.num_point 54 | DUMP_DIR = FLAGS.dump_dir 55 | CHECKPOINT_PATH = FLAGS.checkpoint_path 56 | assert(CHECKPOINT_PATH is not None) 57 | FLAGS.DUMP_DIR = DUMP_DIR 58 | AP_IOU_THRESHOLDS = [float(x) for x in FLAGS.ap_iou_thresholds.split(',')] 59 | 60 | # Prepare DUMP_DIR 61 | if not os.path.exists(DUMP_DIR): os.mkdir(DUMP_DIR) 62 | DUMP_FOUT = open(os.path.join(DUMP_DIR, 'log_eval.txt'), 'w') 63 | DUMP_FOUT.write(str(FLAGS)+'\n') 64 | def log_string(out_str): 65 | DUMP_FOUT.write(out_str+'\n') 66 | DUMP_FOUT.flush() 67 | print(out_str) 68 | 69 | # Init datasets and dataloaders 70 | def my_worker_init_fn(worker_id): 71 | np.random.seed(np.random.get_state()[1][0] + worker_id) 72 | 73 | if FLAGS.dataset == 'sunrgbd': 74 | sys.path.append(os.path.join(ROOT_DIR, 'sunrgbd')) 75 | from sunrgbd_detection_dataset import SunrgbdDetectionVotesDataset, MAX_NUM_OBJ 76 | from model_util_sunrgbd import SunrgbdDatasetConfig 77 | DATASET_CONFIG = SunrgbdDatasetConfig() 78 | TEST_DATASET = SunrgbdDetectionVotesDataset('val', num_points=NUM_POINT, 79 | augment=False, use_color=FLAGS.use_color, use_height=(not FLAGS.no_height), 80 | use_v1=(not FLAGS.use_sunrgbd_v2)) 81 | elif FLAGS.dataset == 'scannet': 82 | sys.path.append(os.path.join(ROOT_DIR, 'scannet')) 83 | from scannet_detection_dataset import ScannetDetectionDataset, MAX_NUM_OBJ 84 | from model_util_scannet import ScannetDatasetConfig 85 | DATASET_CONFIG = ScannetDatasetConfig() 86 | TEST_DATASET = ScannetDetectionDataset('val', num_points=NUM_POINT, 87 | augment=False, 88 | use_color=FLAGS.use_color, use_height=(not FLAGS.no_height)) 89 | else: 90 | print('Unknown dataset %s. Exiting...'%(FLAGS.dataset)) 91 | exit(-1) 92 | print(len(TEST_DATASET)) 93 | TEST_DATALOADER = DataLoader(TEST_DATASET, batch_size=BATCH_SIZE, 94 | shuffle=FLAGS.shuffle_dataset, num_workers=4, worker_init_fn=my_worker_init_fn) 95 | 96 | # Init the model and optimzier 97 | MODEL = importlib.import_module(FLAGS.model) # import network module 98 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 99 | num_input_channel = int(FLAGS.use_color)*3 + int(not FLAGS.no_height)*1 100 | 101 | if FLAGS.model == 'boxnet': 102 | Detector = MODEL.BoxNet 103 | else: 104 | Detector = MODEL.DetectNet 105 | 106 | net = Detector(num_class=DATASET_CONFIG.num_class, 107 | num_heading_bin=DATASET_CONFIG.num_heading_bin, 108 | num_size_cluster=DATASET_CONFIG.num_size_cluster, 109 | mean_size_arr=DATASET_CONFIG.mean_size_arr, 110 | num_proposal=FLAGS.num_target, 111 | input_feature_dim=num_input_channel, 112 | vote_factor=FLAGS.vote_factor, 113 | sampling=FLAGS.cluster_sampling) 114 | net.to(device) 115 | criterion = MODEL.get_loss 116 | 117 | # Load the Adam optimizer 118 | optimizer = optim.Adam(net.parameters(), lr=0.001) 119 | 120 | # Load checkpoint if there is any 121 | if CHECKPOINT_PATH is not None and os.path.isfile(CHECKPOINT_PATH): 122 | checkpoint = torch.load(CHECKPOINT_PATH) 123 | net.load_state_dict(checkpoint['model_state_dict']) 124 | optimizer.load_state_dict(checkpoint['optimizer_state_dict']) 125 | epoch = checkpoint['epoch'] 126 | log_string("Loaded checkpoint %s (epoch: %d)"%(CHECKPOINT_PATH, epoch)) 127 | 128 | # Used for AP calculation 129 | CONFIG_DICT = {'remove_empty_box': (not FLAGS.faster_eval), 'use_3d_nms': FLAGS.use_3d_nms, 'nms_iou': FLAGS.nms_iou, 130 | 'use_old_type_nms': FLAGS.use_old_type_nms, 'cls_nms': FLAGS.use_cls_nms, 'per_class_proposal': FLAGS.per_class_proposal, 131 | 'conf_thresh': FLAGS.conf_thresh, 'dataset_config':DATASET_CONFIG} 132 | # ------------------------------------------------------------------------- GLOBAL CONFIG END 133 | 134 | def evaluate_one_epoch(): 135 | stat_dict = {} 136 | ap_calculator_list = [AP_Measurement(iou_thresh, DATASET_CONFIG.class2type) \ 137 | for iou_thresh in AP_IOU_THRESHOLDS] 138 | net.eval() # set model to eval mode (for bn and dp) 139 | for batch_idx, batch_data_label in enumerate(TEST_DATALOADER): 140 | if batch_idx % 10 == 0: 141 | print('Eval batch: %d'%(batch_idx)) 142 | 143 | for key in batch_data_label: 144 | batch_data_label[key] = batch_data_label[key].to(device) 145 | 146 | # Forward pass 147 | inputs = {'point_clouds': batch_data_label['point_clouds']} 148 | with torch.no_grad(): 149 | end_points = net(inputs) 150 | 151 | # Compute loss 152 | for key in batch_data_label: 153 | assert(key not in end_points) 154 | end_points[key] = batch_data_label[key] 155 | loss, end_points = criterion(end_points, DATASET_CONFIG) 156 | 157 | # Accumulate statistics and print out 158 | for key in end_points: 159 | if 'loss' in key or 'acc' in key or 'ratio' in key: 160 | if key not in stat_dict: stat_dict[key] = 0 161 | stat_dict[key] += end_points[key].item() 162 | 163 | batch_pred_map_cls = parse_predictions(end_points, CONFIG_DICT) 164 | batch_gt_map_cls = parse_groundtruths(end_points, CONFIG_DICT) 165 | for ap_calculator in ap_calculator_list: 166 | ap_calculator.step(batch_pred_map_cls, batch_gt_map_cls) 167 | 168 | # Dump evaluation results for visualization 169 | if batch_idx% 500 == 0: 170 | save_results(end_points, DUMP_DIR, DATASET_CONFIG) 171 | 172 | # Log statistics 173 | for key in sorted(stat_dict.keys()): 174 | log_string('eval mean %s: %f'%(key, stat_dict[key]/(float(batch_idx+1)))) 175 | 176 | # Evaluate average precision 177 | for i, ap_calculator in enumerate(ap_calculator_list): 178 | print('-'*10, 'iou_thresh: %f'%(AP_IOU_THRESHOLDS[i]), '-'*10) 179 | metrics_dict = ap_calculator.compute_metrics() 180 | for key in metrics_dict: 181 | log_string('eval %s: %f'%(key, metrics_dict[key])) 182 | 183 | mean_loss = stat_dict['loss']/float(batch_idx+1) 184 | return mean_loss 185 | 186 | 187 | def eval(): 188 | log_string(str(datetime.now())) 189 | # Reset numpy seed. 190 | # REF: https://github.com/pytorch/pytorch/issues/5059 191 | np.random.seed() 192 | loss = evaluate_one_epoch() 193 | 194 | if __name__=='__main__': 195 | eval() 196 | -------------------------------------------------------------------------------- /log/checkpoint.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/log/checkpoint.tar -------------------------------------------------------------------------------- /log/log_train.txt: -------------------------------------------------------------------------------- 1 | Namespace(DUMP_DIR='/content/drive/My Drive/3d_detector/log', ap_iou_thresh=0.25, batch_size=8, bn_decay_rate=0.5, bn_decay_step=20, checkpoint_path=None, cluster_sampling='vote_fps', dump_dir=None, dump_results=False, learning_rate=0.001, log_dir='log', lr_decay_rates='0.1,0.1,0.1', lr_decay_steps='80,120,160', max_epoch=180, no_height=False, num_point=20000, num_target=256, use_color=False, use_sunrgbd_v2=False, vote_factor=1, weight_decay=0) 2 | Namespace(DUMP_DIR='/content/drive/My Drive/3d_detector/log', ap_iou_thresh=0.25, batch_size=8, bn_decay_rate=0.5, bn_decay_step=20, checkpoint_path=None, cluster_sampling='vote_fps', dump_dir=None, dump_results=False, learning_rate=0.001, log_dir='log', lr_decay_rates='0.1,0.1,0.1', lr_decay_steps='80,120,160', max_epoch=180, no_height=False, num_point=20000, num_target=256, use_color=False, use_sunrgbd_v2=False, vote_factor=1, weight_decay=0) 3 | Namespace(DUMP_DIR='/content/drive/My Drive/3d_detector/log', ap_iou_thresh=0.25, batch_size=8, bn_decay_rate=0.5, bn_decay_step=20, checkpoint_path=None, cluster_sampling='vote_fps', dump_dir=None, dump_results=False, learning_rate=0.001, log_dir='log', lr_decay_rates='0.1,0.1,0.1', lr_decay_steps='80,120,160', max_epoch=190, no_height=False, num_point=20000, num_target=256, use_color=False, use_sunrgbd_v2=False, vote_factor=1, weight_decay=0) 4 | -------------------------------------------------------------------------------- /log/tf_visualizer_log.txt: -------------------------------------------------------------------------------- 1 | ================ Training Loss (Wed Aug 26 08:54:33 2020) ================ 2 | ================ Training Loss (Wed Aug 26 08:54:33 2020) ================ 3 | ================ Training Loss (Wed Aug 26 08:55:23 2020) ================ 4 | ================ Training Loss (Wed Aug 26 08:55:23 2020) ================ 5 | -------------------------------------------------------------------------------- /log/train/events.out.tfevents.1598432199.2a9a360a68aa: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/log/train/events.out.tfevents.1598432199.2a9a360a68aa -------------------------------------------------------------------------------- /log_sunrgbd/log_train.txt: -------------------------------------------------------------------------------- 1 | Namespace(DUMP_DIR='/content/drive/My Drive/3d_detector/log_sunrgbd', ap_iou_thresh=0.25, batch_size=8, bn_decay_rate=0.5, bn_decay_step=20, checkpoint_path=None, cluster_sampling='vote_fps', dump_dir=None, dump_results=False, learning_rate=0.001, log_dir='log_sunrgbd', lr_decay_rates='0.1,0.1,0.1', lr_decay_steps='80,120,160', max_epoch=180, no_height=False, num_point=20000, num_target=256, use_color=False, use_sunrgbd_v2=False, vote_factor=1, weight_decay=0) 2 | Namespace(DUMP_DIR='/content/drive/My Drive/3d_detector/log_sunrgbd', ap_iou_thresh=0.25, batch_size=8, bn_decay_rate=0.5, bn_decay_step=20, checkpoint_path=None, cluster_sampling='vote_fps', dump_dir=None, dump_results=False, learning_rate=0.001, log_dir='log_sunrgbd', lr_decay_rates='0.1,0.1,0.1', lr_decay_steps='80,120,160', max_epoch=180, no_height=False, num_point=20000, num_target=256, use_color=False, use_sunrgbd_v2=False, vote_factor=1, weight_decay=0) 3 | Namespace(DUMP_DIR='/content/drive/My Drive/3d_detector/log_sunrgbd', ap_iou_thresh=0.25, batch_size=8, bn_decay_rate=0.5, bn_decay_step=20, checkpoint_path=None, cluster_sampling='vote_fps', dump_dir=None, dump_results=False, learning_rate=0.001, log_dir='log_sunrgbd', lr_decay_rates='0.1,0.1,0.1', lr_decay_steps='80,120,160', max_epoch=180, no_height=False, num_point=20000, num_target=256, use_color=False, use_sunrgbd_v2=False, vote_factor=1, weight_decay=0) 4 | Namespace(DUMP_DIR='/content/drive/My Drive/3d_detector/log_sunrgbd', ap_iou_thresh=0.25, batch_size=8, bn_decay_rate=0.5, bn_decay_step=20, checkpoint_path=None, cluster_sampling='vote_fps', dump_dir=None, dump_results=False, learning_rate=0.001, log_dir='log_sunrgbd', lr_decay_rates='0.1,0.1,0.1', lr_decay_steps='80,120,160', max_epoch=180, no_height=False, num_point=20000, num_target=256, use_color=False, use_sunrgbd_v2=False, vote_factor=1, weight_decay=0) 5 | -------------------------------------------------------------------------------- /log_sunrgbd/test/events.out.tfevents.1597829517.29e4cd465605: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/log_sunrgbd/test/events.out.tfevents.1597829517.29e4cd465605 -------------------------------------------------------------------------------- /log_sunrgbd/test/events.out.tfevents.1597855641.13c5136526d7: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/log_sunrgbd/test/events.out.tfevents.1597855641.13c5136526d7 -------------------------------------------------------------------------------- /log_sunrgbd/tf_visualizer_log.txt: -------------------------------------------------------------------------------- 1 | ================ Training Loss (Tue Aug 18 18:48:28 2020) ================ 2 | ================ Training Loss (Tue Aug 18 18:48:28 2020) ================ 3 | ================ Training Loss (Wed Aug 19 07:17:23 2020) ================ 4 | ================ Training Loss (Wed Aug 19 07:17:24 2020) ================ 5 | ================ Training Loss (Wed Aug 19 12:42:42 2020) ================ 6 | ================ Training Loss (Wed Aug 19 12:42:43 2020) ================ 7 | ================ Training Loss (Mon Aug 24 19:49:20 2020) ================ 8 | ================ Training Loss (Mon Aug 24 19:49:21 2020) ================ 9 | -------------------------------------------------------------------------------- /log_sunrgbd/train/events.out.tfevents.1597776640.e3d3ae681f94: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/log_sunrgbd/train/events.out.tfevents.1597776640.e3d3ae681f94 -------------------------------------------------------------------------------- /log_sunrgbd/train/events.out.tfevents.1597821535.29e4cd465605: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/log_sunrgbd/train/events.out.tfevents.1597821535.29e4cd465605 -------------------------------------------------------------------------------- /log_sunrgbd/train/events.out.tfevents.1597841030.13c5136526d7: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/log_sunrgbd/train/events.out.tfevents.1597841030.13c5136526d7 -------------------------------------------------------------------------------- /log_sunrgbd/train/events.out.tfevents.1598298644.a8025551b9df: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/log_sunrgbd/train/events.out.tfevents.1598298644.a8025551b9df -------------------------------------------------------------------------------- /models/__pycache__/ap_helper.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/models/__pycache__/ap_helper.cpython-35.pyc -------------------------------------------------------------------------------- /models/__pycache__/ap_helper.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/models/__pycache__/ap_helper.cpython-36.pyc -------------------------------------------------------------------------------- /models/__pycache__/backbone_module.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/models/__pycache__/backbone_module.cpython-35.pyc -------------------------------------------------------------------------------- /models/__pycache__/backbone_module.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/models/__pycache__/backbone_module.cpython-36.pyc -------------------------------------------------------------------------------- /models/__pycache__/detectnet.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/models/__pycache__/detectnet.cpython-35.pyc -------------------------------------------------------------------------------- /models/__pycache__/detectnet.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/models/__pycache__/detectnet.cpython-36.pyc -------------------------------------------------------------------------------- /models/__pycache__/dump_helper.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/models/__pycache__/dump_helper.cpython-35.pyc -------------------------------------------------------------------------------- /models/__pycache__/dump_helper.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/models/__pycache__/dump_helper.cpython-36.pyc -------------------------------------------------------------------------------- /models/__pycache__/loss_helper.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/models/__pycache__/loss_helper.cpython-35.pyc -------------------------------------------------------------------------------- /models/__pycache__/loss_helper.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/models/__pycache__/loss_helper.cpython-36.pyc -------------------------------------------------------------------------------- /models/__pycache__/proposal_module.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/models/__pycache__/proposal_module.cpython-35.pyc -------------------------------------------------------------------------------- /models/__pycache__/proposal_module.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/models/__pycache__/proposal_module.cpython-36.pyc -------------------------------------------------------------------------------- /models/__pycache__/results_save.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/models/__pycache__/results_save.cpython-36.pyc -------------------------------------------------------------------------------- /models/__pycache__/votenet.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/models/__pycache__/votenet.cpython-35.pyc -------------------------------------------------------------------------------- /models/__pycache__/voting.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/models/__pycache__/voting.cpython-36.pyc -------------------------------------------------------------------------------- /models/__pycache__/voting_module.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/models/__pycache__/voting_module.cpython-35.pyc -------------------------------------------------------------------------------- /models/__pycache__/voting_module.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/models/__pycache__/voting_module.cpython-36.pyc -------------------------------------------------------------------------------- /models/backbone_module.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | import numpy as np 10 | import sys 11 | import os 12 | 13 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 14 | ROOT_DIR = os.path.dirname(BASE_DIR) 15 | sys.path.append(ROOT_DIR) 16 | sys.path.append(os.path.join(ROOT_DIR, 'utils')) 17 | sys.path.append(os.path.join(ROOT_DIR, 'pointnet2')) 18 | 19 | from pointnet2_modules import PointnetSAModuleVotes, PointnetFPModule 20 | 21 | class Pointnet2Backbone(nn.Module): 22 | r""" 23 | Backbone network for point cloud feature learning. 24 | Based on Pointnet++ single-scale grouping network. 25 | 26 | Parameters 27 | ---------- 28 | input_feature_dim: int 29 | Number of input channels in the feature descriptor for each point. 30 | e.g. 3 for RGB. 31 | """ 32 | def __init__(self, input_feature_dim=0): 33 | super().__init__() 34 | 35 | self.sa1 = PointnetSAModuleVotes( 36 | npoint=2048, 37 | radius=0.2, 38 | nsample=64, 39 | mlp=[input_feature_dim, 64, 64, 128], 40 | use_xyz=True, 41 | normalize_xyz=True 42 | ) 43 | 44 | self.sa2 = PointnetSAModuleVotes( 45 | npoint=1024, 46 | radius=0.4, 47 | nsample=32, 48 | mlp=[128, 128, 128, 256], 49 | use_xyz=True, 50 | normalize_xyz=True 51 | ) 52 | 53 | self.sa3 = PointnetSAModuleVotes( 54 | npoint=512, 55 | radius=0.8, 56 | nsample=16, 57 | mlp=[256, 128, 128, 256], 58 | use_xyz=True, 59 | normalize_xyz=True 60 | ) 61 | 62 | self.sa4 = PointnetSAModuleVotes( 63 | npoint=256, 64 | radius=1.2, 65 | nsample=16, 66 | mlp=[256, 128, 128, 256], 67 | use_xyz=True, 68 | normalize_xyz=True 69 | ) 70 | 71 | self.fp1 = PointnetFPModule(mlp=[256+256,256,256]) 72 | self.fp2 = PointnetFPModule(mlp=[256+256,256,256]) 73 | 74 | def _break_up_pc(self, pc): 75 | xyz = pc[..., 0:3].contiguous() 76 | features = ( 77 | pc[..., 3:].transpose(1, 2).contiguous() 78 | if pc.size(-1) > 3 else None 79 | ) 80 | 81 | return xyz, features 82 | 83 | def forward(self, pointcloud: torch.cuda.FloatTensor, end_points=None): 84 | r""" 85 | Forward pass of the network 86 | 87 | Parameters 88 | ---------- 89 | pointcloud: Variable(torch.cuda.FloatTensor) 90 | (B, N, 3 + input_feature_dim) tensor 91 | Point cloud to run predicts on 92 | Each point in the point-cloud MUST 93 | be formated as (x, y, z, features...) 94 | 95 | Returns 96 | ---------- 97 | end_points: {XXX_xyz, XXX_features, XXX_inds} 98 | XXX_xyz: float32 Tensor of shape (B,K,3) 99 | XXX_features: float32 Tensor of shape (B,K,D) 100 | XXX-inds: int64 Tensor of shape (B,K) values in [0,N-1] 101 | """ 102 | if not end_points: end_points = {} 103 | batch_size = pointcloud.shape[0] 104 | 105 | xyz, features = self._break_up_pc(pointcloud) 106 | 107 | # --------- 4 SET ABSTRACTION LAYERS --------- 108 | xyz, features, fps_inds = self.sa1(xyz, features) 109 | end_points['sa1_inds'] = fps_inds 110 | end_points['sa1_xyz'] = xyz 111 | end_points['sa1_features'] = features 112 | 113 | xyz, features, fps_inds = self.sa2(xyz, features) # this fps_inds is just 0,1,...,1023 114 | end_points['sa2_inds'] = fps_inds 115 | end_points['sa2_xyz'] = xyz 116 | end_points['sa2_features'] = features 117 | 118 | xyz, features, fps_inds = self.sa3(xyz, features) # this fps_inds is just 0,1,...,511 119 | end_points['sa3_xyz'] = xyz 120 | end_points['sa3_features'] = features 121 | 122 | xyz, features, fps_inds = self.sa4(xyz, features) # this fps_inds is just 0,1,...,255 123 | end_points['sa4_xyz'] = xyz 124 | end_points['sa4_features'] = features 125 | 126 | # --------- 2 FEATURE UPSAMPLING LAYERS -------- 127 | features = self.fp1(end_points['sa3_xyz'], end_points['sa4_xyz'], end_points['sa3_features'], end_points['sa4_features']) 128 | features = self.fp2(end_points['sa2_xyz'], end_points['sa3_xyz'], end_points['sa2_features'], features) 129 | end_points['fp2_features'] = features 130 | end_points['fp2_xyz'] = end_points['sa2_xyz'] 131 | num_seed = end_points['fp2_xyz'].shape[1] 132 | end_points['fp2_inds'] = end_points['sa1_inds'][:,0:num_seed] # indices among the entire input point clouds 133 | return end_points 134 | 135 | 136 | if __name__=='__main__': 137 | backbone_net = Pointnet2Backbone(input_feature_dim=3).cuda() 138 | print(backbone_net) 139 | backbone_net.eval() 140 | out = backbone_net(torch.rand(16,20000,6).cuda()) 141 | for key in sorted(out.keys()): 142 | print(key, '\t', out[key].shape) 143 | -------------------------------------------------------------------------------- /models/detectnet.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | """ Deep hough voting network for 3D object detection in point clouds. 7 | 8 | Author: Charles R. Qi and Or Litany 9 | """ 10 | 11 | import torch 12 | import torch.nn as nn 13 | import numpy as np 14 | import sys 15 | import os 16 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 17 | ROOT_DIR = os.path.dirname(BASE_DIR) 18 | sys.path.append(BASE_DIR) 19 | from backbone_module import Pointnet2Backbone 20 | from voting import VotingModule 21 | from proposal_module import ProposalModule 22 | 23 | from loss_helper import get_loss 24 | 25 | 26 | class DetectNet(nn.Module): 27 | r""" 28 | A deep neural network for 3D object detection with end-to-end optimizable hough voting. 29 | 30 | Parameters 31 | ---------- 32 | num_class: int 33 | Number of semantics classes to predict over -- size of softmax classifier 34 | num_heading_bin: int 35 | num_size_cluster: int 36 | input_feature_dim: (default: 0) 37 | Input dim in the feature descriptor for each point. If the point cloud is Nx9, this 38 | value should be 6 as in an Nx9 point cloud, 3 of the channels are xyz, and 6 are feature descriptors 39 | num_proposal: int (default: 128) 40 | Number of proposals/detections generated from the network. Each proposal is a 3D OBB with a semantic class. 41 | vote_factor: (default: 1) 42 | Number of votes generated from each seed point. 43 | """ 44 | 45 | def __init__(self, num_class, num_heading_bin, num_size_cluster, mean_size_arr, 46 | input_feature_dim=0, num_proposal=128, vote_factor=1, sampling='vote_fps'): 47 | super().__init__() 48 | 49 | self.num_class = num_class 50 | self.num_heading_bin = num_heading_bin 51 | self.num_size_cluster = num_size_cluster 52 | self.mean_size_arr = mean_size_arr 53 | assert(mean_size_arr.shape[0] == self.num_size_cluster) 54 | self.input_feature_dim = input_feature_dim 55 | self.num_proposal = num_proposal 56 | self.vote_factor = vote_factor 57 | self.sampling=sampling 58 | 59 | # Backbone point feature learning 60 | self.backbone_net = Pointnet2Backbone(input_feature_dim=self.input_feature_dim) 61 | 62 | # Hough voting 63 | self.vgen = VotingModule(self.vote_factor, 256) 64 | 65 | # Vote aggregation and detection 66 | self.pnet = ProposalModule(num_class, num_heading_bin, num_size_cluster, 67 | mean_size_arr, num_proposal, sampling) 68 | 69 | def forward(self, inputs): 70 | """ Forward pass of the network 71 | 72 | Args: 73 | inputs: dict 74 | {point_clouds} 75 | 76 | point_clouds: Variable(torch.cuda.FloatTensor) 77 | (B, N, 3 + input_channels) tensor 78 | Point cloud to run predicts on 79 | Each point in the point-cloud MUST 80 | be formated as (x, y, z, features...) 81 | Returns: 82 | end_points: dict 83 | """ 84 | end_points = {} 85 | batch_size = inputs['point_clouds'].shape[0] 86 | 87 | end_points = self.backbone_net(inputs['point_clouds'], end_points) 88 | 89 | # --------- HOUGH VOTING --------- 90 | xyz = end_points['fp2_xyz'] 91 | features = end_points['fp2_features'] 92 | end_points['seed_inds'] = end_points['fp2_inds'] 93 | end_points['seed_xyz'] = xyz 94 | end_points['seed_features'] = features 95 | 96 | xyz, features = self.vgen(xyz, features) 97 | features_norm = torch.norm(features, p=2, dim=1) 98 | features = features.div(features_norm.unsqueeze(1)) 99 | end_points['vote_xyz'] = xyz 100 | end_points['vote_features'] = features 101 | 102 | end_points = self.pnet(xyz, features, end_points) 103 | 104 | return end_points 105 | 106 | 107 | -------------------------------------------------------------------------------- /models/loss_helper.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import torch 7 | import torch.nn as nn 8 | import numpy as np 9 | import sys 10 | import os 11 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 12 | ROOT_DIR = os.path.dirname(BASE_DIR) 13 | sys.path.append(os.path.join(ROOT_DIR, 'utils')) 14 | from nn_distance import nn_distance, huber_loss 15 | 16 | FAR_THRESHOLD = 0.6 17 | NEAR_THRESHOLD = 0.3 18 | GT_VOTE_FACTOR = 3 # number of GT votes per point 19 | OBJECTNESS_CLS_WEIGHTS = [0.2,0.8] # put larger weights on positive objectness 20 | 21 | def compute_vote_loss(end_points): 22 | """ Compute vote loss: Match predicted votes to GT votes. 23 | 24 | Args: 25 | end_points: dict (read-only) 26 | 27 | Returns: 28 | vote_loss: scalar Tensor 29 | 30 | Overall idea: 31 | If the seed point belongs to an object (votes_label_mask == 1), 32 | then we require it to vote for the object center. 33 | 34 | Each seed point may vote for multiple translations v1,v2,v3 35 | A seed point may also be in the boxes of multiple objects: 36 | o1,o2,o3 with corresponding GT votes c1,c2,c3 37 | 38 | Then the loss for this seed point is: 39 | min(d(v_i,c_j)) for i=1,2,3 and j=1,2,3 40 | """ 41 | 42 | # Load ground truth votes and assign them to seed points 43 | batch_size = end_points['seed_xyz'].shape[0] 44 | num_seed = end_points['seed_xyz'].shape[1] # B,num_seed,3 45 | vote_xyz = end_points['vote_xyz'] # B,num_seed*vote_factor,3 46 | seed_inds = end_points['seed_inds'].long() # B,num_seed in [0,num_points-1] 47 | 48 | # Get groundtruth votes for the seed points 49 | # vote_label_mask: Use gather to select B,num_seed from B,num_point 50 | # non-object point has no GT vote mask = 0, object point has mask = 1 51 | # vote_label: Use gather to select B,num_seed,9 from B,num_point,9 52 | # with inds in shape B,num_seed,9 and 9 = GT_VOTE_FACTOR * 3 53 | seed_gt_votes_mask = torch.gather(end_points['vote_label_mask'], 1, seed_inds) 54 | seed_inds_expand = seed_inds.view(batch_size,num_seed,1).repeat(1,1,3*GT_VOTE_FACTOR) 55 | seed_gt_votes = torch.gather(end_points['vote_label'], 1, seed_inds_expand) 56 | seed_gt_votes += end_points['seed_xyz'].repeat(1,1,3) 57 | 58 | # Compute the min of min of distance 59 | vote_xyz_reshape = vote_xyz.view(batch_size*num_seed, -1, 3) # from B,num_seed*vote_factor,3 to B*num_seed,vote_factor,3 60 | seed_gt_votes_reshape = seed_gt_votes.view(batch_size*num_seed, GT_VOTE_FACTOR, 3) # from B,num_seed,3*GT_VOTE_FACTOR to B*num_seed,GT_VOTE_FACTOR,3 61 | # A predicted vote to no where is not penalized as long as there is a good vote near the GT vote. 62 | dist1, _, dist2, _ = nn_distance(vote_xyz_reshape, seed_gt_votes_reshape, l1=True) 63 | votes_dist, _ = torch.min(dist2, dim=1) # (B*num_seed,vote_factor) to (B*num_seed,) 64 | votes_dist = votes_dist.view(batch_size, num_seed) 65 | vote_loss = torch.sum(votes_dist*seed_gt_votes_mask.float())/(torch.sum(seed_gt_votes_mask.float())+1e-6) 66 | return vote_loss 67 | 68 | def compute_objectness_loss(end_points): 69 | """ Compute objectness loss for the proposals. 70 | 71 | Args: 72 | end_points: dict (read-only) 73 | 74 | Returns: 75 | objectness_loss: scalar Tensor 76 | objectness_label: (batch_size, num_seed) Tensor with value 0 or 1 77 | objectness_mask: (batch_size, num_seed) Tensor with value 0 or 1 78 | object_assignment: (batch_size, num_seed) Tensor with long int 79 | within [0,num_gt_object-1] 80 | """ 81 | # Associate proposal and GT objects by point-to-point distances 82 | aggregated_vote_xyz = end_points['aggregated_vote_xyz'] 83 | gt_center = end_points['center_label'][:,:,0:3] 84 | B = gt_center.shape[0] 85 | K = aggregated_vote_xyz.shape[1] 86 | K2 = gt_center.shape[1] 87 | dist1, ind1, dist2, _ = nn_distance(aggregated_vote_xyz, gt_center) # dist1: BxK, dist2: BxK2 88 | 89 | # Generate objectness label and mask 90 | # objectness_label: 1 if pred object center is within NEAR_THRESHOLD of any GT object 91 | # objectness_mask: 0 if pred object center is in gray zone (DONOTCARE), 1 otherwise 92 | euclidean_dist1 = torch.sqrt(dist1+1e-6) 93 | objectness_label = torch.zeros((B,K), dtype=torch.long).cuda() 94 | objectness_mask = torch.zeros((B,K)).cuda() 95 | objectness_label[euclidean_dist1FAR_THRESHOLD] = 1 98 | 99 | # Compute objectness loss 100 | objectness_scores = end_points['objectness_scores'] 101 | criterion = nn.CrossEntropyLoss(torch.Tensor(OBJECTNESS_CLS_WEIGHTS).cuda(), reduction='none') 102 | objectness_loss = criterion(objectness_scores.transpose(2,1), objectness_label) 103 | objectness_loss = torch.sum(objectness_loss * objectness_mask)/(torch.sum(objectness_mask)+1e-6) 104 | 105 | # Set assignment 106 | object_assignment = ind1 # (B,K) with values in 0,1,...,K2-1 107 | 108 | return objectness_loss, objectness_label, objectness_mask, object_assignment 109 | 110 | def compute_box_and_sem_cls_loss(end_points, config): 111 | """ Compute 3D bounding box and semantic classification loss. 112 | 113 | Args: 114 | end_points: dict (read-only) 115 | 116 | Returns: 117 | center_loss 118 | heading_cls_loss 119 | heading_reg_loss 120 | size_cls_loss 121 | size_reg_loss 122 | sem_cls_loss 123 | """ 124 | 125 | num_heading_bin = config.num_heading_bin 126 | num_size_cluster = config.num_size_cluster 127 | num_class = config.num_class 128 | mean_size_arr = config.mean_size_arr 129 | 130 | object_assignment = end_points['object_assignment'] 131 | batch_size = object_assignment.shape[0] 132 | 133 | # Compute center loss 134 | pred_center = end_points['center'] 135 | gt_center = end_points['center_label'][:,:,0:3] 136 | dist1, ind1, dist2, _ = nn_distance(pred_center, gt_center) # dist1: BxK, dist2: BxK2 137 | box_label_mask = end_points['box_label_mask'] 138 | objectness_label = end_points['objectness_label'].float() 139 | centroid_reg_loss1 = \ 140 | torch.sum(dist1*objectness_label)/(torch.sum(objectness_label)+1e-6) 141 | centroid_reg_loss2 = \ 142 | torch.sum(dist2*box_label_mask)/(torch.sum(box_label_mask)+1e-6) 143 | center_loss = centroid_reg_loss1 + centroid_reg_loss2 144 | 145 | # Compute heading loss 146 | heading_class_label = torch.gather(end_points['heading_class_label'], 1, object_assignment) # select (B,K) from (B,K2) 147 | criterion_heading_class = nn.CrossEntropyLoss(reduction='none') 148 | heading_class_loss = criterion_heading_class(end_points['heading_scores'].transpose(2,1), heading_class_label) # (B,K) 149 | heading_class_loss = torch.sum(heading_class_loss * objectness_label)/(torch.sum(objectness_label)+1e-6) 150 | 151 | heading_residual_label = torch.gather(end_points['heading_residual_label'], 1, object_assignment) # select (B,K) from (B,K2) 152 | heading_residual_normalized_label = heading_residual_label / (np.pi/num_heading_bin) 153 | 154 | # Ref: https://discuss.pytorch.org/t/convert-int-into-one-hot-format/507/3 155 | heading_label_one_hot = torch.cuda.FloatTensor(batch_size, heading_class_label.shape[1], num_heading_bin).zero_() 156 | heading_label_one_hot.scatter_(2, heading_class_label.unsqueeze(-1), 1) # src==1 so it's *one-hot* (B,K,num_heading_bin) 157 | heading_residual_normalized_loss = huber_loss(torch.sum(end_points['heading_residuals_normalized']*heading_label_one_hot, -1) - heading_residual_normalized_label, delta=1.0) # (B,K) 158 | heading_residual_normalized_loss = torch.sum(heading_residual_normalized_loss*objectness_label)/(torch.sum(objectness_label)+1e-6) 159 | 160 | # Compute size loss 161 | size_class_label = torch.gather(end_points['size_class_label'], 1, object_assignment) # select (B,K) from (B,K2) 162 | criterion_size_class = nn.CrossEntropyLoss(reduction='none') 163 | size_class_loss = criterion_size_class(end_points['size_scores'].transpose(2,1), size_class_label) # (B,K) 164 | size_class_loss = torch.sum(size_class_loss * objectness_label)/(torch.sum(objectness_label)+1e-6) 165 | 166 | size_residual_label = torch.gather(end_points['size_residual_label'], 1, object_assignment.unsqueeze(-1).repeat(1,1,3)) # select (B,K,3) from (B,K2,3) 167 | size_label_one_hot = torch.cuda.FloatTensor(batch_size, size_class_label.shape[1], num_size_cluster).zero_() 168 | size_label_one_hot.scatter_(2, size_class_label.unsqueeze(-1), 1) # src==1 so it's *one-hot* (B,K,num_size_cluster) 169 | size_label_one_hot_tiled = size_label_one_hot.unsqueeze(-1).repeat(1,1,1,3) # (B,K,num_size_cluster,3) 170 | predicted_size_residual_normalized = torch.sum(end_points['size_residuals_normalized']*size_label_one_hot_tiled, 2) # (B,K,3) 171 | 172 | mean_size_arr_expanded = torch.from_numpy(mean_size_arr.astype(np.float32)).cuda().unsqueeze(0).unsqueeze(0) # (1,1,num_size_cluster,3) 173 | mean_size_label = torch.sum(size_label_one_hot_tiled * mean_size_arr_expanded, 2) # (B,K,3) 174 | size_residual_label_normalized = size_residual_label / mean_size_label # (B,K,3) 175 | size_residual_normalized_loss = torch.mean(huber_loss(predicted_size_residual_normalized - size_residual_label_normalized, delta=1.0), -1) # (B,K,3) -> (B,K) 176 | size_residual_normalized_loss = torch.sum(size_residual_normalized_loss*objectness_label)/(torch.sum(objectness_label)+1e-6) 177 | 178 | # 3.4 Semantic cls loss 179 | sem_cls_label = torch.gather(end_points['sem_cls_label'], 1, object_assignment) # select (B,K) from (B,K2) 180 | criterion_sem_cls = nn.CrossEntropyLoss(reduction='none') 181 | sem_cls_loss = criterion_sem_cls(end_points['sem_cls_scores'].transpose(2,1), sem_cls_label) # (B,K) 182 | sem_cls_loss = torch.sum(sem_cls_loss * objectness_label)/(torch.sum(objectness_label)+1e-6) 183 | 184 | return center_loss, heading_class_loss, heading_residual_normalized_loss, size_class_loss, size_residual_normalized_loss, sem_cls_loss 185 | 186 | def get_loss(end_points, config): 187 | """ Loss functions 188 | 189 | Args: 190 | end_points: dict 191 | { 192 | seed_xyz, seed_inds, vote_xyz, 193 | center, 194 | heading_scores, heading_residuals_normalized, 195 | size_scores, size_residuals_normalized, 196 | sem_cls_scores, #seed_logits,# 197 | center_label, 198 | heading_class_label, heading_residual_label, 199 | size_class_label, size_residual_label, 200 | sem_cls_label, 201 | box_label_mask, 202 | vote_label, vote_label_mask 203 | } 204 | config: dataset config instance 205 | Returns: 206 | loss: pytorch scalar tensor 207 | end_points: dict 208 | """ 209 | 210 | # Vote loss 211 | vote_loss = compute_vote_loss(end_points) 212 | end_points['vote_loss'] = vote_loss 213 | 214 | # Obj loss 215 | objectness_loss, objectness_label, objectness_mask, object_assignment = \ 216 | compute_objectness_loss(end_points) 217 | end_points['objectness_loss'] = objectness_loss 218 | end_points['objectness_label'] = objectness_label 219 | end_points['objectness_mask'] = objectness_mask 220 | end_points['object_assignment'] = object_assignment 221 | total_num_proposal = objectness_label.shape[0]*objectness_label.shape[1] 222 | end_points['pos_ratio'] = \ 223 | torch.sum(objectness_label.float().cuda())/float(total_num_proposal) 224 | end_points['neg_ratio'] = \ 225 | torch.sum(objectness_mask.float())/float(total_num_proposal) - end_points['pos_ratio'] 226 | 227 | # Box loss and sem cls loss 228 | center_loss, heading_cls_loss, heading_reg_loss, size_cls_loss, size_reg_loss, sem_cls_loss = \ 229 | compute_box_and_sem_cls_loss(end_points, config) 230 | end_points['center_loss'] = center_loss 231 | end_points['heading_cls_loss'] = heading_cls_loss 232 | end_points['heading_reg_loss'] = heading_reg_loss 233 | end_points['size_cls_loss'] = size_cls_loss 234 | end_points['size_reg_loss'] = size_reg_loss 235 | end_points['sem_cls_loss'] = sem_cls_loss 236 | box_loss = center_loss + 0.1*heading_cls_loss + heading_reg_loss + 0.1*size_cls_loss + size_reg_loss 237 | end_points['box_loss'] = box_loss 238 | 239 | # Final loss function 240 | loss = vote_loss + 0.5*objectness_loss + box_loss + 0.1*sem_cls_loss 241 | loss *= 10 242 | end_points['loss'] = loss 243 | 244 | # -------------------------------------------- 245 | # Some other statistics 246 | obj_pred_val = torch.argmax(end_points['objectness_scores'], 2) # B,K 247 | obj_acc = torch.sum((obj_pred_val==objectness_label.long()).float()*objectness_mask)/(torch.sum(objectness_mask)+1e-6) 248 | end_points['obj_acc'] = obj_acc 249 | 250 | return loss, end_points 251 | -------------------------------------------------------------------------------- /models/loss_helper_boxnet.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import torch 7 | import torch.nn as nn 8 | import numpy as np 9 | import sys 10 | import os 11 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 12 | ROOT_DIR = os.path.dirname(BASE_DIR) 13 | sys.path.append(os.path.join(ROOT_DIR, 'utils')) 14 | from nn_distance import nn_distance, huber_loss 15 | sys.path.append(BASE_DIR) 16 | from loss_helper import compute_box_and_sem_cls_loss 17 | 18 | OBJECTNESS_CLS_WEIGHTS = [0.2,0.8] # put larger weights on positive objectness 19 | 20 | def compute_objectness_loss(end_points): 21 | """ Compute objectness loss for the proposals. 22 | 23 | Args: 24 | end_points: dict (read-only) 25 | 26 | Returns: 27 | objectness_loss: scalar Tensor 28 | objectness_label: (batch_size, num_seed) Tensor with value 0 or 1 29 | objectness_mask: (batch_size, num_seed) Tensor with value 0 or 1 30 | object_assignment: (batch_size, num_seed) Tensor with long int 31 | within [0,num_gt_object-1] 32 | """ 33 | # Associate proposal and GT objects by point-to-point distances 34 | aggregated_vote_xyz = end_points['aggregated_vote_xyz'] 35 | gt_center = end_points['center_label'][:,:,0:3] 36 | B = gt_center.shape[0] 37 | K = aggregated_vote_xyz.shape[1] 38 | K2 = gt_center.shape[1] 39 | dist1, ind1, dist2, _ = nn_distance(aggregated_vote_xyz, gt_center) # dist1: BxK, dist2: BxK2 40 | 41 | # Generate objectness label and mask 42 | # NOTE: Different from VoteNet, here we use seed label as objectness label. 43 | seed_inds = end_points['seed_inds'].long() # B,num_seed in [0,num_points-1] 44 | seed_gt_votes_mask = torch.gather(end_points['vote_label_mask'], 1, seed_inds) 45 | end_points['seed_labels'] = seed_gt_votes_mask 46 | aggregated_vote_inds = end_points['aggregated_vote_inds'] 47 | objectness_label = torch.gather(end_points['seed_labels'], 1, aggregated_vote_inds.long()) # select (B,K) from (B,1024) 48 | objectness_mask = torch.ones((objectness_label.shape[0], objectness_label.shape[1])).cuda() # no ignore zone anymore 49 | 50 | # Compute objectness loss 51 | objectness_scores = end_points['objectness_scores'] 52 | criterion = nn.CrossEntropyLoss(torch.Tensor(OBJECTNESS_CLS_WEIGHTS).cuda(), reduction='none') 53 | objectness_loss = criterion(objectness_scores.transpose(2,1), objectness_label) 54 | objectness_loss = torch.sum(objectness_loss * objectness_mask)/(torch.sum(objectness_mask)+1e-6) 55 | 56 | # Set assignment 57 | object_assignment = ind1 # (B,K) with values in 0,1,...,K2-1 58 | 59 | return objectness_loss, objectness_label, objectness_mask, object_assignment 60 | 61 | 62 | def get_loss(end_points, config): 63 | """ Loss functions 64 | 65 | Args: 66 | end_points: dict 67 | { 68 | seed_xyz, seed_inds, 69 | center, 70 | heading_scores, heading_residuals_normalized, 71 | size_scores, size_residuals_normalized, 72 | sem_cls_scores, #seed_logits,# 73 | center_label, 74 | heading_class_label, heading_residual_label, 75 | size_class_label, size_residual_label, 76 | sem_cls_label, 77 | box_label_mask, 78 | vote_label, vote_label_mask 79 | } 80 | config: dataset config instance 81 | Returns: 82 | loss: pytorch scalar tensor 83 | end_points: dict 84 | """ 85 | 86 | # Obj loss 87 | objectness_loss, objectness_label, objectness_mask, object_assignment = \ 88 | compute_objectness_loss(end_points) 89 | end_points['objectness_loss'] = objectness_loss 90 | end_points['objectness_label'] = objectness_label 91 | end_points['objectness_mask'] = objectness_mask 92 | end_points['object_assignment'] = object_assignment 93 | total_num_proposal = objectness_label.shape[0]*objectness_label.shape[1] 94 | end_points['pos_ratio'] = \ 95 | torch.sum(objectness_label.float().cuda())/float(total_num_proposal) 96 | end_points['neg_ratio'] = \ 97 | torch.sum(objectness_mask.float())/float(total_num_proposal) - end_points['pos_ratio'] 98 | 99 | # Box loss and sem cls loss 100 | center_loss, heading_cls_loss, heading_reg_loss, size_cls_loss, size_reg_loss, sem_cls_loss = \ 101 | compute_box_and_sem_cls_loss(end_points, config) 102 | end_points['center_loss'] = center_loss 103 | end_points['heading_cls_loss'] = heading_cls_loss 104 | end_points['heading_reg_loss'] = heading_reg_loss 105 | end_points['size_cls_loss'] = size_cls_loss 106 | end_points['size_reg_loss'] = size_reg_loss 107 | end_points['sem_cls_loss'] = sem_cls_loss 108 | box_loss = center_loss + 0.1*heading_cls_loss + heading_reg_loss + 0.1*size_cls_loss + size_reg_loss 109 | end_points['box_loss'] = box_loss 110 | 111 | # Final loss function 112 | loss = 0.5*objectness_loss + box_loss + 0.1*sem_cls_loss 113 | loss *= 10 114 | end_points['loss'] = loss 115 | 116 | # -------------------------------------------- 117 | # Some other statistics 118 | obj_pred_val = torch.argmax(end_points['objectness_scores'], 2) # B,K 119 | obj_acc = torch.sum((obj_pred_val==objectness_label.long()).float()*objectness_mask)/(torch.sum(objectness_mask)+1e-6) 120 | end_points['obj_acc'] = obj_acc 121 | 122 | return loss, end_points 123 | -------------------------------------------------------------------------------- /models/proposal_module.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | import numpy as np 10 | import os 11 | import sys 12 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 13 | ROOT_DIR = os.path.dirname(BASE_DIR) 14 | sys.path.append(os.path.join(ROOT_DIR, 'pointnet2')) 15 | from pointnet2_modules import PointnetSAModuleVotes 16 | import pointnet2_utils 17 | 18 | def decode_scores(net, end_points, num_class, num_heading_bin, num_size_cluster, mean_size_arr): 19 | net_transposed = net.transpose(2,1) # (batch_size, 1024, ..) 20 | batch_size = net_transposed.shape[0] 21 | num_proposal = net_transposed.shape[1] 22 | 23 | objectness_scores = net_transposed[:,:,0:2] 24 | end_points['objectness_scores'] = objectness_scores 25 | 26 | base_xyz = end_points['aggregated_vote_xyz'] # (batch_size, num_proposal, 3) 27 | center = base_xyz + net_transposed[:,:,2:5] # (batch_size, num_proposal, 3) 28 | end_points['center'] = center 29 | 30 | heading_scores = net_transposed[:,:,5:5+num_heading_bin] 31 | heading_residuals_normalized = net_transposed[:,:,5+num_heading_bin:5+num_heading_bin*2] 32 | end_points['heading_scores'] = heading_scores # Bxnum_proposalxnum_heading_bin 33 | end_points['heading_residuals_normalized'] = heading_residuals_normalized # Bxnum_proposalxnum_heading_bin (should be -1 to 1) 34 | end_points['heading_residuals'] = heading_residuals_normalized * (np.pi/num_heading_bin) # Bxnum_proposalxnum_heading_bin 35 | 36 | size_scores = net_transposed[:,:,5+num_heading_bin*2:5+num_heading_bin*2+num_size_cluster] 37 | size_residuals_normalized = net_transposed[:,:,5+num_heading_bin*2+num_size_cluster:5+num_heading_bin*2+num_size_cluster*4].view([batch_size, num_proposal, num_size_cluster, 3]) # Bxnum_proposalxnum_size_clusterx3 38 | end_points['size_scores'] = size_scores 39 | end_points['size_residuals_normalized'] = size_residuals_normalized 40 | end_points['size_residuals'] = size_residuals_normalized * torch.from_numpy(mean_size_arr.astype(np.float32)).cuda().unsqueeze(0).unsqueeze(0) 41 | 42 | sem_cls_scores = net_transposed[:,:,5+num_heading_bin*2+num_size_cluster*4:] # Bxnum_proposalx10 43 | end_points['sem_cls_scores'] = sem_cls_scores 44 | return end_points 45 | 46 | 47 | class ProposalModule(nn.Module): 48 | def __init__(self, num_class, num_heading_bin, num_size_cluster, mean_size_arr, num_proposal, sampling, seed_feat_dim=256): 49 | super().__init__() 50 | 51 | self.num_class = num_class 52 | self.num_heading_bin = num_heading_bin 53 | self.num_size_cluster = num_size_cluster 54 | self.mean_size_arr = mean_size_arr 55 | self.num_proposal = num_proposal 56 | self.sampling = sampling 57 | self.seed_feat_dim = seed_feat_dim 58 | 59 | # Vote clustering 60 | self.vote_aggregation = PointnetSAModuleVotes( 61 | npoint=self.num_proposal, 62 | radius=0.3, 63 | nsample=16, 64 | mlp=[self.seed_feat_dim, 128, 128, 128], 65 | use_xyz=True, 66 | normalize_xyz=True 67 | ) 68 | 69 | # Object proposal/detection 70 | # Objectness scores (2), center residual (3), 71 | # heading class+residual (num_heading_bin*2), size class+residual(num_size_cluster*4) 72 | self.conv1 = torch.nn.Conv1d(128,128,1) 73 | self.conv2 = torch.nn.Conv1d(128,128,1) 74 | self.conv3 = torch.nn.Conv1d(128,2+3+num_heading_bin*2+num_size_cluster*4+self.num_class,1) 75 | self.bn1 = torch.nn.BatchNorm1d(128) 76 | self.bn2 = torch.nn.BatchNorm1d(128) 77 | 78 | def forward(self, xyz, features, end_points): 79 | """ 80 | Args: 81 | xyz: (B,K,3) 82 | features: (B,C,K) 83 | Returns: 84 | scores: (B,num_proposal,2+3+NH*2+NS*4) 85 | """ 86 | if self.sampling == 'vote_fps': 87 | # Farthest point sampling (FPS) on votes 88 | xyz, features, fps_inds = self.vote_aggregation(xyz, features) 89 | sample_inds = fps_inds 90 | elif self.sampling == 'seed_fps': 91 | # FPS on seed and choose the votes corresponding to the seeds 92 | # This gets us a slightly better coverage of *object* votes than vote_fps (which tends to get more cluster votes) 93 | sample_inds = pointnet2_utils.furthest_point_sample(end_points['seed_xyz'], self.num_proposal) 94 | xyz, features, _ = self.vote_aggregation(xyz, features, sample_inds) 95 | elif self.sampling == 'random': 96 | # Random sampling from the votes 97 | num_seed = end_points['seed_xyz'].shape[1] 98 | batch_size = end_points['seed_xyz'].shape[0] 99 | sample_inds = torch.randint(0, num_seed, (batch_size, self.num_proposal), dtype=torch.int).cuda() 100 | xyz, features, _ = self.vote_aggregation(xyz, features, sample_inds) 101 | else: 102 | print('Unknown sampling strategy: %s. Exiting!'%(self.sampling)) 103 | exit() 104 | end_points['aggregated_vote_xyz'] = xyz # (batch_size, num_proposal, 3) 105 | end_points['aggregated_vote_inds'] = sample_inds # (batch_size, num_proposal,) # should be 0,1,2,...,num_proposal 106 | 107 | # --------- PROPOSAL GENERATION --------- 108 | net = F.relu(self.bn1(self.conv1(features))) 109 | net = F.relu(self.bn2(self.conv2(net))) 110 | net = self.conv3(net) # (batch_size, 2+3+num_heading_bin*2+num_size_cluster*4, num_proposal) 111 | 112 | end_points = decode_scores(net, end_points, self.num_class, self.num_heading_bin, self.num_size_cluster, self.mean_size_arr) 113 | return end_points 114 | 115 | -------------------------------------------------------------------------------- /models/results_save.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import numpy as np 7 | import torch 8 | import os 9 | import sys 10 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 11 | ROOT_DIR = os.path.dirname(BASE_DIR) 12 | sys.path.append(os.path.join(ROOT_DIR, 'utils')) 13 | import pc_util 14 | 15 | DUMP_CONF_THRESH = 0.5 # Dump boxes with obj prob larger than that. 16 | 17 | def softmax(x): 18 | ''' Numpy function for softmax''' 19 | shape = x.shape 20 | probs = np.exp(x - np.max(x, axis=len(shape)-1, keepdims=True)) 21 | probs /= np.sum(probs, axis=len(shape)-1, keepdims=True) 22 | return probs 23 | 24 | def save_results(end_points, dump_dir, config, inference_switch=False): 25 | ''' Dump results. 26 | 27 | Args: 28 | end_points: dict 29 | {..., pred_mask} 30 | pred_mask is a binary mask array of size (batch_size, num_proposal) computed by running NMS and empty box removal 31 | Returns: 32 | None 33 | ''' 34 | if not os.path.exists(dump_dir): 35 | os.system('mkdir %s'%(dump_dir)) 36 | 37 | # INPUT 38 | point_clouds = end_points['point_clouds'].cpu().numpy() 39 | batch_size = point_clouds.shape[0] 40 | 41 | # NETWORK OUTPUTS 42 | seed_xyz = end_points['seed_xyz'].detach().cpu().numpy() # (B,num_seed,3) 43 | if 'vote_xyz' in end_points: 44 | aggregated_vote_xyz = end_points['aggregated_vote_xyz'].detach().cpu().numpy() 45 | vote_xyz = end_points['vote_xyz'].detach().cpu().numpy() # (B,num_seed,3) 46 | aggregated_vote_xyz = end_points['aggregated_vote_xyz'].detach().cpu().numpy() 47 | objectness_scores = end_points['objectness_scores'].detach().cpu().numpy() # (B,K,2) 48 | pred_center = end_points['center'].detach().cpu().numpy() # (B,K,3) 49 | pred_heading_class = torch.argmax(end_points['heading_scores'], -1) # B,num_proposal 50 | pred_heading_residual = torch.gather(end_points['heading_residuals'], 2, pred_heading_class.unsqueeze(-1)) # B,num_proposal,1 51 | pred_heading_class = pred_heading_class.detach().cpu().numpy() # B,num_proposal 52 | pred_heading_residual = pred_heading_residual.squeeze(2).detach().cpu().numpy() # B,num_proposal 53 | pred_size_class = torch.argmax(end_points['size_scores'], -1) # B,num_proposal 54 | pred_size_residual = torch.gather(end_points['size_residuals'], 2, pred_size_class.unsqueeze(-1).unsqueeze(-1).repeat(1,1,1,3)) # B,num_proposal,1,3 55 | pred_size_residual = pred_size_residual.squeeze(2).detach().cpu().numpy() # B,num_proposal,3 56 | 57 | # OTHERS 58 | pred_mask = end_points['pred_mask'] # B,num_proposal 59 | idx_beg = 0 60 | 61 | for i in range(batch_size): 62 | pc = point_clouds[i,:,:] 63 | objectness_prob = softmax(objectness_scores[i,:,:])[:,1] # (K,) 64 | 65 | # Dump various point clouds 66 | pc_util.write_ply(pc, os.path.join(dump_dir, '%06d_pc.ply'%(idx_beg+i))) 67 | pc_util.write_ply(seed_xyz[i,:,:], os.path.join(dump_dir, '%06d_seed_pc.ply'%(idx_beg+i))) 68 | if 'vote_xyz' in end_points: 69 | pc_util.write_ply(end_points['vote_xyz'][i,:,:], os.path.join(dump_dir, '%06d_vgen_pc.ply'%(idx_beg+i))) 70 | pc_util.write_ply(aggregated_vote_xyz[i,:,:], os.path.join(dump_dir, '%06d_aggregated_vote_pc.ply'%(idx_beg+i))) 71 | pc_util.write_ply(aggregated_vote_xyz[i,:,:], os.path.join(dump_dir, '%06d_aggregated_vote_pc.ply'%(idx_beg+i))) 72 | pc_util.write_ply(pred_center[i,:,0:3], os.path.join(dump_dir, '%06d_proposal_pc.ply'%(idx_beg+i))) 73 | if np.sum(objectness_prob>DUMP_CONF_THRESH)>0: 74 | pc_util.write_ply(pred_center[i,objectness_prob>DUMP_CONF_THRESH,0:3], os.path.join(dump_dir, '%06d_confident_proposal_pc.ply'%(idx_beg+i))) 75 | 76 | # Dump predicted bounding boxes 77 | if np.sum(objectness_prob>DUMP_CONF_THRESH)>0: 78 | num_proposal = pred_center.shape[1] 79 | obbs = [] 80 | for j in range(num_proposal): 81 | obb = config.param2obb(pred_center[i,j,0:3], pred_heading_class[i,j], pred_heading_residual[i,j], 82 | pred_size_class[i,j], pred_size_residual[i,j]) 83 | obbs.append(obb) 84 | if len(obbs)>0: 85 | obbs = np.vstack(tuple(obbs)) # (num_proposal, 7) 86 | pc_util.write_oriented_bbox(obbs[objectness_prob>DUMP_CONF_THRESH,:], os.path.join(dump_dir, '%06d_pred_confident_bbox.ply'%(idx_beg+i))) 87 | pc_util.write_oriented_bbox(obbs[np.logical_and(objectness_prob>DUMP_CONF_THRESH, pred_mask[i,:]==1),:], os.path.join(dump_dir, '%06d_pred_confident_nms_bbox.ply'%(idx_beg+i))) 88 | pc_util.write_oriented_bbox(obbs[pred_mask[i,:]==1,:], os.path.join(dump_dir, '%06d_pred_nms_bbox.ply'%(idx_beg+i))) 89 | pc_util.write_oriented_bbox(obbs, os.path.join(dump_dir, '%06d_pred_bbox.ply'%(idx_beg+i))) 90 | if 'batch_pred_map_cls' in end_points: 91 | for ii in range(batch_size): 92 | fout = open(os.path.join(dump_dir, '%06d_pred_map_cls.txt' % (ii)), 'w') 93 | for t in end_points['batch_pred_map_cls'][ii]: 94 | fout.write(str(t[0]) + ' ') 95 | fout.write(",".join([str(x) for x in list(t[1].flatten())])) 96 | fout.write(' ' + str(t[2])) 97 | fout.write('\n') 98 | fout.close() 99 | # Return if it is at inference time. No dumping of groundtruths 100 | if inference_switch: 101 | return 102 | 103 | # LABELS 104 | gt_center = end_points['center_label'].cpu().numpy() # (B,MAX_NUM_OBJ,3) 105 | gt_mask = end_points['box_label_mask'].cpu().numpy() # B,K2 106 | gt_heading_class = end_points['heading_class_label'].cpu().numpy() # B,K2 107 | gt_heading_residual = end_points['heading_residual_label'].cpu().numpy() # B,K2 108 | gt_size_class = end_points['size_class_label'].cpu().numpy() # B,K2 109 | gt_size_residual = end_points['size_residual_label'].cpu().numpy() # B,K2,3 110 | objectness_label = end_points['objectness_label'].detach().cpu().numpy() # (B,K,) 111 | objectness_mask = end_points['objectness_mask'].detach().cpu().numpy() # (B,K,) 112 | 113 | for i in range(batch_size): 114 | if np.sum(objectness_label[i,:])>0: 115 | pc_util.write_ply(pred_center[i,objectness_label[i,:]>0,0:3], os.path.join(dump_dir, '%06d_gt_positive_proposal_pc.ply'%(idx_beg+i))) 116 | if np.sum(objectness_mask[i,:])>0: 117 | pc_util.write_ply(pred_center[i,objectness_mask[i,:]>0,0:3], os.path.join(dump_dir, '%06d_gt_mask_proposal_pc.ply'%(idx_beg+i))) 118 | pc_util.write_ply(gt_center[i,:,0:3], os.path.join(dump_dir, '%06d_gt_centroid_pc.ply'%(idx_beg+i))) 119 | pc_util.write_ply_color(pred_center[i,:,0:3], objectness_label[i,:], os.path.join(dump_dir, '%06d_proposal_pc_objectness_label.obj'%(idx_beg+i))) 120 | 121 | # Dump GT bounding boxes 122 | obbs = [] 123 | for j in range(gt_center.shape[1]): 124 | if gt_mask[i,j] == 0: continue 125 | obb = config.param2obb(gt_center[i,j,0:3], gt_heading_class[i,j], gt_heading_residual[i,j], 126 | gt_size_class[i,j], gt_size_residual[i,j]) 127 | obbs.append(obb) 128 | if len(obbs)>0: 129 | obbs = np.vstack(tuple(obbs)) # (num_gt_objects, 7) 130 | pc_util.write_oriented_bbox(obbs, os.path.join(dump_dir, '%06d_gt_bbox.ply'%(idx_beg+i))) 131 | 132 | # OPTIONALL, also dump prediction and gt details 133 | if 'batch_pred_map_cls' in end_points: 134 | for ii in range(batch_size): 135 | fout = open(os.path.join(dump_dir, '%06d_pred_map_cls.txt'%(ii)), 'w') 136 | for t in end_points['batch_pred_map_cls'][ii]: 137 | fout.write(str(t[0])+' ') 138 | fout.write(",".join([str(x) for x in list(t[1].flatten())])) 139 | fout.write(' '+str(t[2])) 140 | fout.write('\n') 141 | fout.close() 142 | if 'batch_gt_map_cls' in end_points: 143 | for ii in range(batch_size): 144 | fout = open(os.path.join(dump_dir, '%06d_gt_map_cls.txt'%(ii)), 'w') 145 | for t in end_points['batch_gt_map_cls'][ii]: 146 | fout.write(str(t[0])+' ') 147 | fout.write(",".join([str(x) for x in list(t[1].flatten())])) 148 | fout.write('\n') 149 | fout.close() 150 | -------------------------------------------------------------------------------- /models/voting.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | ''' Voting module: generate votes from XYZ and features of seed points. 7 | 8 | Date: July, 2019 9 | Author: Charles R. Qi and Or Litany 10 | ''' 11 | 12 | import torch 13 | import torch.nn as nn 14 | import torch.nn.functional as F 15 | 16 | class VotingModule(nn.Module): 17 | def __init__(self, vote_factor, seed_feature_dim): 18 | """ Votes generation from seed point features. 19 | 20 | Args: 21 | vote_facotr: int 22 | number of votes generated from each seed point 23 | seed_feature_dim: int 24 | number of channels of seed point features 25 | vote_feature_dim: int 26 | number of channels of vote features 27 | """ 28 | super().__init__() 29 | self.vote_factor = vote_factor 30 | self.in_dim = seed_feature_dim 31 | self.out_dim = self.in_dim # due to residual feature, in_dim has to be == out_dim 32 | self.conv1 = torch.nn.Conv1d(self.in_dim, self.in_dim, 1) 33 | self.conv2 = torch.nn.Conv1d(self.in_dim, self.in_dim, 1) 34 | self.conv3 = torch.nn.Conv1d(self.in_dim, (3+self.out_dim) * self.vote_factor, 1) 35 | self.bn1 = torch.nn.BatchNorm1d(self.in_dim) 36 | self.bn2 = torch.nn.BatchNorm1d(self.in_dim) 37 | 38 | def forward(self, seed_xyz, seed_features): 39 | """ Forward pass. 40 | 41 | Arguments: 42 | seed_xyz: (batch_size, num_seed, 3) Pytorch tensor 43 | seed_features: (batch_size, feature_dim, num_seed) Pytorch tensor 44 | Returns: 45 | vote_xyz: (batch_size, num_seed*vote_factor, 3) 46 | vote_features: (batch_size, vote_feature_dim, num_seed*vote_factor) 47 | """ 48 | batch_size = seed_xyz.shape[0] 49 | num_seed = seed_xyz.shape[1] 50 | num_vote = num_seed*self.vote_factor 51 | net = F.relu(self.bn1(self.conv1(seed_features))) 52 | net = F.relu(self.bn2(self.conv2(net))) 53 | net = self.conv3(net) # (batch_size, (3+out_dim)*vote_factor, num_seed) 54 | 55 | net = net.transpose(2,1).view(batch_size, num_seed, self.vote_factor, 3+self.out_dim) 56 | offset = net[:,:,:,0:3] 57 | vote_xyz = seed_xyz.unsqueeze(2) + offset 58 | vote_xyz = vote_xyz.contiguous().view(batch_size, num_vote, 3) 59 | 60 | residual_features = net[:,:,:,3:] # (batch_size, num_seed, vote_factor, out_dim) 61 | vote_features = seed_features.transpose(2,1).unsqueeze(2) + residual_features 62 | vote_features = vote_features.contiguous().view(batch_size, num_vote, self.out_dim) 63 | vote_features = vote_features.transpose(2,1).contiguous() 64 | 65 | return vote_xyz, vote_features 66 | 67 | if __name__=='__main__': 68 | net = VotingModule(2, 256).cuda() 69 | xyz, features = net(torch.rand(8,1024,3).cuda(), torch.rand(8,256,1024).cuda()) 70 | print('xyz', xyz.shape) 71 | print('features', features.shape) 72 | -------------------------------------------------------------------------------- /models/voting_module.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | ''' Voting module: generate votes from XYZ and features of seed points. 7 | 8 | Date: July, 2019 9 | Author: Charles R. Qi and Or Litany 10 | ''' 11 | 12 | import torch 13 | import torch.nn as nn 14 | import torch.nn.functional as F 15 | 16 | class VotingModule(nn.Module): 17 | def __init__(self, vote_factor, seed_feature_dim): 18 | """ Votes generation from seed point features. 19 | 20 | Args: 21 | vote_facotr: int 22 | number of votes generated from each seed point 23 | seed_feature_dim: int 24 | number of channels of seed point features 25 | vote_feature_dim: int 26 | number of channels of vote features 27 | """ 28 | super().__init__() 29 | self.vote_factor = vote_factor 30 | self.in_dim = seed_feature_dim 31 | self.out_dim = self.in_dim # due to residual feature, in_dim has to be == out_dim 32 | self.conv1 = torch.nn.Conv1d(self.in_dim, self.in_dim, 1) 33 | self.conv2 = torch.nn.Conv1d(self.in_dim, self.in_dim, 1) 34 | self.conv3 = torch.nn.Conv1d(self.in_dim, self.in_dim, 1) 35 | self.conv4 = torch.nn.Conv1d(self.in_dim, (3+self.out_dim) * self.vote_factor, 1) 36 | self.bn1 = torch.nn.BatchNorm1d(self.in_dim) 37 | self.bn2 = torch.nn.BatchNorm1d(self.in_dim) 38 | 39 | def forward(self, seed_xyz, seed_features): 40 | """ Forward pass. 41 | 42 | Arguments: 43 | seed_xyz: (batch_size, num_seed, 3) Pytorch tensor 44 | seed_features: (batch_size, feature_dim, num_seed) Pytorch tensor 45 | Returns: 46 | vote_xyz: (batch_size, num_seed*vote_factor, 3) 47 | vote_features: (batch_size, vote_feature_dim, num_seed*vote_factor) 48 | """ 49 | batch_size = seed_xyz.shape[0] 50 | num_seed = seed_xyz.shape[1] 51 | num_vote = num_seed*self.vote_factor 52 | net = F.relu(self.bn1(self.conv1(seed_features))) 53 | net = F.relu(self.bn2(self.conv2(net))) 54 | net = self.conv4(net) # (batch_size, (3+out_dim)*vote_factor, num_seed) 55 | 56 | net = net.transpose(2,1).view(batch_size, num_seed, self.vote_factor, 3+self.out_dim) 57 | offset = net[:,:,:,0:3] 58 | vote_xyz = seed_xyz.unsqueeze(2) + offset 59 | vote_xyz = vote_xyz.contiguous().view(batch_size, num_vote, 3) 60 | 61 | residual_features = net[:,:,:,3:] # (batch_size, num_seed, vote_factor, out_dim) 62 | vote_features = seed_features.transpose(2,1).unsqueeze(2) + residual_features 63 | vote_features = vote_features.contiguous().view(batch_size, num_vote, self.out_dim) 64 | vote_features = vote_features.transpose(2,1).contiguous() 65 | 66 | return vote_xyz, vote_features 67 | 68 | if __name__=='__main__': 69 | net = VotingModule(2, 256).cuda() 70 | xyz, features = net(torch.rand(8,1024,3).cuda(), torch.rand(8,256,1024).cuda()) 71 | print('xyz', xyz.shape) 72 | print('features', features.shape) 73 | -------------------------------------------------------------------------------- /pointnet2/__pycache__/pointnet2_modules.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/pointnet2/__pycache__/pointnet2_modules.cpython-35.pyc -------------------------------------------------------------------------------- /pointnet2/__pycache__/pointnet2_modules.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/pointnet2/__pycache__/pointnet2_modules.cpython-36.pyc -------------------------------------------------------------------------------- /pointnet2/__pycache__/pointnet2_utils.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/pointnet2/__pycache__/pointnet2_utils.cpython-35.pyc -------------------------------------------------------------------------------- /pointnet2/__pycache__/pointnet2_utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/pointnet2/__pycache__/pointnet2_utils.cpython-36.pyc -------------------------------------------------------------------------------- /pointnet2/__pycache__/pytorch_utils.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/pointnet2/__pycache__/pytorch_utils.cpython-35.pyc -------------------------------------------------------------------------------- /pointnet2/__pycache__/pytorch_utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/pointnet2/__pycache__/pytorch_utils.cpython-36.pyc -------------------------------------------------------------------------------- /pointnet2/_ext_src/include/ball_query.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #pragma once 7 | #include 8 | 9 | at::Tensor ball_query(at::Tensor new_xyz, at::Tensor xyz, const float radius, 10 | const int nsample); 11 | -------------------------------------------------------------------------------- /pointnet2/_ext_src/include/cuda_utils.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #ifndef _CUDA_UTILS_H 7 | #define _CUDA_UTILS_H 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | #include 15 | 16 | #include 17 | 18 | #define TOTAL_THREADS 512 19 | 20 | inline int opt_n_threads(int work_size) { 21 | const int pow_2 = std::log(static_cast(work_size)) / std::log(2.0); 22 | 23 | return max(min(1 << pow_2, TOTAL_THREADS), 1); 24 | } 25 | 26 | inline dim3 opt_block_config(int x, int y) { 27 | const int x_threads = opt_n_threads(x); 28 | const int y_threads = 29 | max(min(opt_n_threads(y), TOTAL_THREADS / x_threads), 1); 30 | dim3 block_config(x_threads, y_threads, 1); 31 | 32 | return block_config; 33 | } 34 | 35 | #define CUDA_CHECK_ERRORS() \ 36 | do { \ 37 | cudaError_t err = cudaGetLastError(); \ 38 | if (cudaSuccess != err) { \ 39 | fprintf(stderr, "CUDA kernel failed : %s\n%s at L:%d in %s\n", \ 40 | cudaGetErrorString(err), __PRETTY_FUNCTION__, __LINE__, \ 41 | __FILE__); \ 42 | exit(-1); \ 43 | } \ 44 | } while (0) 45 | 46 | #endif 47 | -------------------------------------------------------------------------------- /pointnet2/_ext_src/include/group_points.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #pragma once 7 | #include 8 | 9 | at::Tensor group_points(at::Tensor points, at::Tensor idx); 10 | at::Tensor group_points_grad(at::Tensor grad_out, at::Tensor idx, const int n); 11 | -------------------------------------------------------------------------------- /pointnet2/_ext_src/include/interpolate.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #pragma once 7 | 8 | #include 9 | #include 10 | 11 | std::vector three_nn(at::Tensor unknowns, at::Tensor knows); 12 | at::Tensor three_interpolate(at::Tensor points, at::Tensor idx, 13 | at::Tensor weight); 14 | at::Tensor three_interpolate_grad(at::Tensor grad_out, at::Tensor idx, 15 | at::Tensor weight, const int m); 16 | -------------------------------------------------------------------------------- /pointnet2/_ext_src/include/sampling.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #pragma once 7 | #include 8 | 9 | at::Tensor gather_points(at::Tensor points, at::Tensor idx); 10 | at::Tensor gather_points_grad(at::Tensor grad_out, at::Tensor idx, const int n); 11 | at::Tensor furthest_point_sampling(at::Tensor points, const int nsamples); 12 | -------------------------------------------------------------------------------- /pointnet2/_ext_src/include/utils.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #pragma once 7 | #include 8 | #include 9 | 10 | #define CHECK_CUDA(x) \ 11 | do { \ 12 | AT_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor"); \ 13 | } while (0) 14 | 15 | #define CHECK_CONTIGUOUS(x) \ 16 | do { \ 17 | AT_CHECK(x.is_contiguous(), #x " must be a contiguous tensor"); \ 18 | } while (0) 19 | 20 | #define CHECK_IS_INT(x) \ 21 | do { \ 22 | AT_CHECK(x.scalar_type() == at::ScalarType::Int, \ 23 | #x " must be an int tensor"); \ 24 | } while (0) 25 | 26 | #define CHECK_IS_FLOAT(x) \ 27 | do { \ 28 | AT_CHECK(x.scalar_type() == at::ScalarType::Float, \ 29 | #x " must be a float tensor"); \ 30 | } while (0) 31 | -------------------------------------------------------------------------------- /pointnet2/_ext_src/src/ball_query.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #include "ball_query.h" 7 | #include "utils.h" 8 | 9 | void query_ball_point_kernel_wrapper(int b, int n, int m, float radius, 10 | int nsample, const float *new_xyz, 11 | const float *xyz, int *idx); 12 | 13 | at::Tensor ball_query(at::Tensor new_xyz, at::Tensor xyz, const float radius, 14 | const int nsample) { 15 | CHECK_CONTIGUOUS(new_xyz); 16 | CHECK_CONTIGUOUS(xyz); 17 | CHECK_IS_FLOAT(new_xyz); 18 | CHECK_IS_FLOAT(xyz); 19 | 20 | if (new_xyz.type().is_cuda()) { 21 | CHECK_CUDA(xyz); 22 | } 23 | 24 | at::Tensor idx = 25 | torch::zeros({new_xyz.size(0), new_xyz.size(1), nsample}, 26 | at::device(new_xyz.device()).dtype(at::ScalarType::Int)); 27 | 28 | if (new_xyz.type().is_cuda()) { 29 | query_ball_point_kernel_wrapper(xyz.size(0), xyz.size(1), new_xyz.size(1), 30 | radius, nsample, new_xyz.data(), 31 | xyz.data(), idx.data()); 32 | } else { 33 | AT_CHECK(false, "CPU not supported"); 34 | } 35 | 36 | return idx; 37 | } 38 | -------------------------------------------------------------------------------- /pointnet2/_ext_src/src/ball_query_gpu.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include "cuda_utils.h" 11 | 12 | // input: new_xyz(b, m, 3) xyz(b, n, 3) 13 | // output: idx(b, m, nsample) 14 | __global__ void query_ball_point_kernel(int b, int n, int m, float radius, 15 | int nsample, 16 | const float *__restrict__ new_xyz, 17 | const float *__restrict__ xyz, 18 | int *__restrict__ idx) { 19 | int batch_index = blockIdx.x; 20 | xyz += batch_index * n * 3; 21 | new_xyz += batch_index * m * 3; 22 | idx += m * nsample * batch_index; 23 | 24 | int index = threadIdx.x; 25 | int stride = blockDim.x; 26 | 27 | float radius2 = radius * radius; 28 | for (int j = index; j < m; j += stride) { 29 | float new_x = new_xyz[j * 3 + 0]; 30 | float new_y = new_xyz[j * 3 + 1]; 31 | float new_z = new_xyz[j * 3 + 2]; 32 | for (int k = 0, cnt = 0; k < n && cnt < nsample; ++k) { 33 | float x = xyz[k * 3 + 0]; 34 | float y = xyz[k * 3 + 1]; 35 | float z = xyz[k * 3 + 2]; 36 | float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + 37 | (new_z - z) * (new_z - z); 38 | if (d2 < radius2) { 39 | if (cnt == 0) { 40 | for (int l = 0; l < nsample; ++l) { 41 | idx[j * nsample + l] = k; 42 | } 43 | } 44 | idx[j * nsample + cnt] = k; 45 | ++cnt; 46 | } 47 | } 48 | } 49 | } 50 | 51 | void query_ball_point_kernel_wrapper(int b, int n, int m, float radius, 52 | int nsample, const float *new_xyz, 53 | const float *xyz, int *idx) { 54 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 55 | query_ball_point_kernel<<>>( 56 | b, n, m, radius, nsample, new_xyz, xyz, idx); 57 | 58 | CUDA_CHECK_ERRORS(); 59 | } 60 | -------------------------------------------------------------------------------- /pointnet2/_ext_src/src/bindings.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #include "ball_query.h" 7 | #include "group_points.h" 8 | #include "interpolate.h" 9 | #include "sampling.h" 10 | 11 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 12 | m.def("gather_points", &gather_points); 13 | m.def("gather_points_grad", &gather_points_grad); 14 | m.def("furthest_point_sampling", &furthest_point_sampling); 15 | 16 | m.def("three_nn", &three_nn); 17 | m.def("three_interpolate", &three_interpolate); 18 | m.def("three_interpolate_grad", &three_interpolate_grad); 19 | 20 | m.def("ball_query", &ball_query); 21 | 22 | m.def("group_points", &group_points); 23 | m.def("group_points_grad", &group_points_grad); 24 | } 25 | -------------------------------------------------------------------------------- /pointnet2/_ext_src/src/group_points.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #include "group_points.h" 7 | #include "utils.h" 8 | 9 | void group_points_kernel_wrapper(int b, int c, int n, int npoints, int nsample, 10 | const float *points, const int *idx, 11 | float *out); 12 | 13 | void group_points_grad_kernel_wrapper(int b, int c, int n, int npoints, 14 | int nsample, const float *grad_out, 15 | const int *idx, float *grad_points); 16 | 17 | at::Tensor group_points(at::Tensor points, at::Tensor idx) { 18 | CHECK_CONTIGUOUS(points); 19 | CHECK_CONTIGUOUS(idx); 20 | CHECK_IS_FLOAT(points); 21 | CHECK_IS_INT(idx); 22 | 23 | if (points.type().is_cuda()) { 24 | CHECK_CUDA(idx); 25 | } 26 | 27 | at::Tensor output = 28 | torch::zeros({points.size(0), points.size(1), idx.size(1), idx.size(2)}, 29 | at::device(points.device()).dtype(at::ScalarType::Float)); 30 | 31 | if (points.type().is_cuda()) { 32 | group_points_kernel_wrapper(points.size(0), points.size(1), points.size(2), 33 | idx.size(1), idx.size(2), points.data(), 34 | idx.data(), output.data()); 35 | } else { 36 | AT_CHECK(false, "CPU not supported"); 37 | } 38 | 39 | return output; 40 | } 41 | 42 | at::Tensor group_points_grad(at::Tensor grad_out, at::Tensor idx, const int n) { 43 | CHECK_CONTIGUOUS(grad_out); 44 | CHECK_CONTIGUOUS(idx); 45 | CHECK_IS_FLOAT(grad_out); 46 | CHECK_IS_INT(idx); 47 | 48 | if (grad_out.type().is_cuda()) { 49 | CHECK_CUDA(idx); 50 | } 51 | 52 | at::Tensor output = 53 | torch::zeros({grad_out.size(0), grad_out.size(1), n}, 54 | at::device(grad_out.device()).dtype(at::ScalarType::Float)); 55 | 56 | if (grad_out.type().is_cuda()) { 57 | group_points_grad_kernel_wrapper( 58 | grad_out.size(0), grad_out.size(1), n, idx.size(1), idx.size(2), 59 | grad_out.data(), idx.data(), output.data()); 60 | } else { 61 | AT_CHECK(false, "CPU not supported"); 62 | } 63 | 64 | return output; 65 | } 66 | -------------------------------------------------------------------------------- /pointnet2/_ext_src/src/group_points_gpu.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #include 7 | #include 8 | 9 | #include "cuda_utils.h" 10 | 11 | // input: points(b, c, n) idx(b, npoints, nsample) 12 | // output: out(b, c, npoints, nsample) 13 | __global__ void group_points_kernel(int b, int c, int n, int npoints, 14 | int nsample, 15 | const float *__restrict__ points, 16 | const int *__restrict__ idx, 17 | float *__restrict__ out) { 18 | int batch_index = blockIdx.x; 19 | points += batch_index * n * c; 20 | idx += batch_index * npoints * nsample; 21 | out += batch_index * npoints * nsample * c; 22 | 23 | const int index = threadIdx.y * blockDim.x + threadIdx.x; 24 | const int stride = blockDim.y * blockDim.x; 25 | for (int i = index; i < c * npoints; i += stride) { 26 | const int l = i / npoints; 27 | const int j = i % npoints; 28 | for (int k = 0; k < nsample; ++k) { 29 | int ii = idx[j * nsample + k]; 30 | out[(l * npoints + j) * nsample + k] = points[l * n + ii]; 31 | } 32 | } 33 | } 34 | 35 | void group_points_kernel_wrapper(int b, int c, int n, int npoints, int nsample, 36 | const float *points, const int *idx, 37 | float *out) { 38 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 39 | 40 | group_points_kernel<<>>( 41 | b, c, n, npoints, nsample, points, idx, out); 42 | 43 | CUDA_CHECK_ERRORS(); 44 | } 45 | 46 | // input: grad_out(b, c, npoints, nsample), idx(b, npoints, nsample) 47 | // output: grad_points(b, c, n) 48 | __global__ void group_points_grad_kernel(int b, int c, int n, int npoints, 49 | int nsample, 50 | const float *__restrict__ grad_out, 51 | const int *__restrict__ idx, 52 | float *__restrict__ grad_points) { 53 | int batch_index = blockIdx.x; 54 | grad_out += batch_index * npoints * nsample * c; 55 | idx += batch_index * npoints * nsample; 56 | grad_points += batch_index * n * c; 57 | 58 | const int index = threadIdx.y * blockDim.x + threadIdx.x; 59 | const int stride = blockDim.y * blockDim.x; 60 | for (int i = index; i < c * npoints; i += stride) { 61 | const int l = i / npoints; 62 | const int j = i % npoints; 63 | for (int k = 0; k < nsample; ++k) { 64 | int ii = idx[j * nsample + k]; 65 | atomicAdd(grad_points + l * n + ii, 66 | grad_out[(l * npoints + j) * nsample + k]); 67 | } 68 | } 69 | } 70 | 71 | void group_points_grad_kernel_wrapper(int b, int c, int n, int npoints, 72 | int nsample, const float *grad_out, 73 | const int *idx, float *grad_points) { 74 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 75 | 76 | group_points_grad_kernel<<>>( 77 | b, c, n, npoints, nsample, grad_out, idx, grad_points); 78 | 79 | CUDA_CHECK_ERRORS(); 80 | } 81 | -------------------------------------------------------------------------------- /pointnet2/_ext_src/src/interpolate.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #include "interpolate.h" 7 | #include "utils.h" 8 | 9 | void three_nn_kernel_wrapper(int b, int n, int m, const float *unknown, 10 | const float *known, float *dist2, int *idx); 11 | void three_interpolate_kernel_wrapper(int b, int c, int m, int n, 12 | const float *points, const int *idx, 13 | const float *weight, float *out); 14 | void three_interpolate_grad_kernel_wrapper(int b, int c, int n, int m, 15 | const float *grad_out, 16 | const int *idx, const float *weight, 17 | float *grad_points); 18 | 19 | std::vector three_nn(at::Tensor unknowns, at::Tensor knows) { 20 | CHECK_CONTIGUOUS(unknowns); 21 | CHECK_CONTIGUOUS(knows); 22 | CHECK_IS_FLOAT(unknowns); 23 | CHECK_IS_FLOAT(knows); 24 | 25 | if (unknowns.type().is_cuda()) { 26 | CHECK_CUDA(knows); 27 | } 28 | 29 | at::Tensor idx = 30 | torch::zeros({unknowns.size(0), unknowns.size(1), 3}, 31 | at::device(unknowns.device()).dtype(at::ScalarType::Int)); 32 | at::Tensor dist2 = 33 | torch::zeros({unknowns.size(0), unknowns.size(1), 3}, 34 | at::device(unknowns.device()).dtype(at::ScalarType::Float)); 35 | 36 | if (unknowns.type().is_cuda()) { 37 | three_nn_kernel_wrapper(unknowns.size(0), unknowns.size(1), knows.size(1), 38 | unknowns.data(), knows.data(), 39 | dist2.data(), idx.data()); 40 | } else { 41 | AT_CHECK(false, "CPU not supported"); 42 | } 43 | 44 | return {dist2, idx}; 45 | } 46 | 47 | at::Tensor three_interpolate(at::Tensor points, at::Tensor idx, 48 | at::Tensor weight) { 49 | CHECK_CONTIGUOUS(points); 50 | CHECK_CONTIGUOUS(idx); 51 | CHECK_CONTIGUOUS(weight); 52 | CHECK_IS_FLOAT(points); 53 | CHECK_IS_INT(idx); 54 | CHECK_IS_FLOAT(weight); 55 | 56 | if (points.type().is_cuda()) { 57 | CHECK_CUDA(idx); 58 | CHECK_CUDA(weight); 59 | } 60 | 61 | at::Tensor output = 62 | torch::zeros({points.size(0), points.size(1), idx.size(1)}, 63 | at::device(points.device()).dtype(at::ScalarType::Float)); 64 | 65 | if (points.type().is_cuda()) { 66 | three_interpolate_kernel_wrapper( 67 | points.size(0), points.size(1), points.size(2), idx.size(1), 68 | points.data(), idx.data(), weight.data(), 69 | output.data()); 70 | } else { 71 | AT_CHECK(false, "CPU not supported"); 72 | } 73 | 74 | return output; 75 | } 76 | at::Tensor three_interpolate_grad(at::Tensor grad_out, at::Tensor idx, 77 | at::Tensor weight, const int m) { 78 | CHECK_CONTIGUOUS(grad_out); 79 | CHECK_CONTIGUOUS(idx); 80 | CHECK_CONTIGUOUS(weight); 81 | CHECK_IS_FLOAT(grad_out); 82 | CHECK_IS_INT(idx); 83 | CHECK_IS_FLOAT(weight); 84 | 85 | if (grad_out.type().is_cuda()) { 86 | CHECK_CUDA(idx); 87 | CHECK_CUDA(weight); 88 | } 89 | 90 | at::Tensor output = 91 | torch::zeros({grad_out.size(0), grad_out.size(1), m}, 92 | at::device(grad_out.device()).dtype(at::ScalarType::Float)); 93 | 94 | if (grad_out.type().is_cuda()) { 95 | three_interpolate_grad_kernel_wrapper( 96 | grad_out.size(0), grad_out.size(1), grad_out.size(2), m, 97 | grad_out.data(), idx.data(), weight.data(), 98 | output.data()); 99 | } else { 100 | AT_CHECK(false, "CPU not supported"); 101 | } 102 | 103 | return output; 104 | } 105 | -------------------------------------------------------------------------------- /pointnet2/_ext_src/src/interpolate_gpu.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include "cuda_utils.h" 11 | 12 | // input: unknown(b, n, 3) known(b, m, 3) 13 | // output: dist2(b, n, 3), idx(b, n, 3) 14 | __global__ void three_nn_kernel(int b, int n, int m, 15 | const float *__restrict__ unknown, 16 | const float *__restrict__ known, 17 | float *__restrict__ dist2, 18 | int *__restrict__ idx) { 19 | int batch_index = blockIdx.x; 20 | unknown += batch_index * n * 3; 21 | known += batch_index * m * 3; 22 | dist2 += batch_index * n * 3; 23 | idx += batch_index * n * 3; 24 | 25 | int index = threadIdx.x; 26 | int stride = blockDim.x; 27 | for (int j = index; j < n; j += stride) { 28 | float ux = unknown[j * 3 + 0]; 29 | float uy = unknown[j * 3 + 1]; 30 | float uz = unknown[j * 3 + 2]; 31 | 32 | double best1 = 1e40, best2 = 1e40, best3 = 1e40; 33 | int besti1 = 0, besti2 = 0, besti3 = 0; 34 | for (int k = 0; k < m; ++k) { 35 | float x = known[k * 3 + 0]; 36 | float y = known[k * 3 + 1]; 37 | float z = known[k * 3 + 2]; 38 | float d = (ux - x) * (ux - x) + (uy - y) * (uy - y) + (uz - z) * (uz - z); 39 | if (d < best1) { 40 | best3 = best2; 41 | besti3 = besti2; 42 | best2 = best1; 43 | besti2 = besti1; 44 | best1 = d; 45 | besti1 = k; 46 | } else if (d < best2) { 47 | best3 = best2; 48 | besti3 = besti2; 49 | best2 = d; 50 | besti2 = k; 51 | } else if (d < best3) { 52 | best3 = d; 53 | besti3 = k; 54 | } 55 | } 56 | dist2[j * 3 + 0] = best1; 57 | dist2[j * 3 + 1] = best2; 58 | dist2[j * 3 + 2] = best3; 59 | 60 | idx[j * 3 + 0] = besti1; 61 | idx[j * 3 + 1] = besti2; 62 | idx[j * 3 + 2] = besti3; 63 | } 64 | } 65 | 66 | void three_nn_kernel_wrapper(int b, int n, int m, const float *unknown, 67 | const float *known, float *dist2, int *idx) { 68 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 69 | three_nn_kernel<<>>(b, n, m, unknown, known, 70 | dist2, idx); 71 | 72 | CUDA_CHECK_ERRORS(); 73 | } 74 | 75 | // input: points(b, c, m), idx(b, n, 3), weight(b, n, 3) 76 | // output: out(b, c, n) 77 | __global__ void three_interpolate_kernel(int b, int c, int m, int n, 78 | const float *__restrict__ points, 79 | const int *__restrict__ idx, 80 | const float *__restrict__ weight, 81 | float *__restrict__ out) { 82 | int batch_index = blockIdx.x; 83 | points += batch_index * m * c; 84 | 85 | idx += batch_index * n * 3; 86 | weight += batch_index * n * 3; 87 | 88 | out += batch_index * n * c; 89 | 90 | const int index = threadIdx.y * blockDim.x + threadIdx.x; 91 | const int stride = blockDim.y * blockDim.x; 92 | for (int i = index; i < c * n; i += stride) { 93 | const int l = i / n; 94 | const int j = i % n; 95 | float w1 = weight[j * 3 + 0]; 96 | float w2 = weight[j * 3 + 1]; 97 | float w3 = weight[j * 3 + 2]; 98 | 99 | int i1 = idx[j * 3 + 0]; 100 | int i2 = idx[j * 3 + 1]; 101 | int i3 = idx[j * 3 + 2]; 102 | 103 | out[i] = points[l * m + i1] * w1 + points[l * m + i2] * w2 + 104 | points[l * m + i3] * w3; 105 | } 106 | } 107 | 108 | void three_interpolate_kernel_wrapper(int b, int c, int m, int n, 109 | const float *points, const int *idx, 110 | const float *weight, float *out) { 111 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 112 | three_interpolate_kernel<<>>( 113 | b, c, m, n, points, idx, weight, out); 114 | 115 | CUDA_CHECK_ERRORS(); 116 | } 117 | 118 | // input: grad_out(b, c, n), idx(b, n, 3), weight(b, n, 3) 119 | // output: grad_points(b, c, m) 120 | 121 | __global__ void three_interpolate_grad_kernel( 122 | int b, int c, int n, int m, const float *__restrict__ grad_out, 123 | const int *__restrict__ idx, const float *__restrict__ weight, 124 | float *__restrict__ grad_points) { 125 | int batch_index = blockIdx.x; 126 | grad_out += batch_index * n * c; 127 | idx += batch_index * n * 3; 128 | weight += batch_index * n * 3; 129 | grad_points += batch_index * m * c; 130 | 131 | const int index = threadIdx.y * blockDim.x + threadIdx.x; 132 | const int stride = blockDim.y * blockDim.x; 133 | for (int i = index; i < c * n; i += stride) { 134 | const int l = i / n; 135 | const int j = i % n; 136 | float w1 = weight[j * 3 + 0]; 137 | float w2 = weight[j * 3 + 1]; 138 | float w3 = weight[j * 3 + 2]; 139 | 140 | int i1 = idx[j * 3 + 0]; 141 | int i2 = idx[j * 3 + 1]; 142 | int i3 = idx[j * 3 + 2]; 143 | 144 | atomicAdd(grad_points + l * m + i1, grad_out[i] * w1); 145 | atomicAdd(grad_points + l * m + i2, grad_out[i] * w2); 146 | atomicAdd(grad_points + l * m + i3, grad_out[i] * w3); 147 | } 148 | } 149 | 150 | void three_interpolate_grad_kernel_wrapper(int b, int c, int n, int m, 151 | const float *grad_out, 152 | const int *idx, const float *weight, 153 | float *grad_points) { 154 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 155 | three_interpolate_grad_kernel<<>>( 156 | b, c, n, m, grad_out, idx, weight, grad_points); 157 | 158 | CUDA_CHECK_ERRORS(); 159 | } 160 | -------------------------------------------------------------------------------- /pointnet2/_ext_src/src/sampling.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #include "sampling.h" 7 | #include "utils.h" 8 | 9 | void gather_points_kernel_wrapper(int b, int c, int n, int npoints, 10 | const float *points, const int *idx, 11 | float *out); 12 | void gather_points_grad_kernel_wrapper(int b, int c, int n, int npoints, 13 | const float *grad_out, const int *idx, 14 | float *grad_points); 15 | 16 | void furthest_point_sampling_kernel_wrapper(int b, int n, int m, 17 | const float *dataset, float *temp, 18 | int *idxs); 19 | 20 | at::Tensor gather_points(at::Tensor points, at::Tensor idx) { 21 | CHECK_CONTIGUOUS(points); 22 | CHECK_CONTIGUOUS(idx); 23 | CHECK_IS_FLOAT(points); 24 | CHECK_IS_INT(idx); 25 | 26 | if (points.type().is_cuda()) { 27 | CHECK_CUDA(idx); 28 | } 29 | 30 | at::Tensor output = 31 | torch::zeros({points.size(0), points.size(1), idx.size(1)}, 32 | at::device(points.device()).dtype(at::ScalarType::Float)); 33 | 34 | if (points.type().is_cuda()) { 35 | gather_points_kernel_wrapper(points.size(0), points.size(1), points.size(2), 36 | idx.size(1), points.data(), 37 | idx.data(), output.data()); 38 | } else { 39 | AT_CHECK(false, "CPU not supported"); 40 | } 41 | 42 | return output; 43 | } 44 | 45 | at::Tensor gather_points_grad(at::Tensor grad_out, at::Tensor idx, 46 | const int n) { 47 | CHECK_CONTIGUOUS(grad_out); 48 | CHECK_CONTIGUOUS(idx); 49 | CHECK_IS_FLOAT(grad_out); 50 | CHECK_IS_INT(idx); 51 | 52 | if (grad_out.type().is_cuda()) { 53 | CHECK_CUDA(idx); 54 | } 55 | 56 | at::Tensor output = 57 | torch::zeros({grad_out.size(0), grad_out.size(1), n}, 58 | at::device(grad_out.device()).dtype(at::ScalarType::Float)); 59 | 60 | if (grad_out.type().is_cuda()) { 61 | gather_points_grad_kernel_wrapper(grad_out.size(0), grad_out.size(1), n, 62 | idx.size(1), grad_out.data(), 63 | idx.data(), output.data()); 64 | } else { 65 | AT_CHECK(false, "CPU not supported"); 66 | } 67 | 68 | return output; 69 | } 70 | at::Tensor furthest_point_sampling(at::Tensor points, const int nsamples) { 71 | CHECK_CONTIGUOUS(points); 72 | CHECK_IS_FLOAT(points); 73 | 74 | at::Tensor output = 75 | torch::zeros({points.size(0), nsamples}, 76 | at::device(points.device()).dtype(at::ScalarType::Int)); 77 | 78 | at::Tensor tmp = 79 | torch::full({points.size(0), points.size(1)}, 1e10, 80 | at::device(points.device()).dtype(at::ScalarType::Float)); 81 | 82 | if (points.type().is_cuda()) { 83 | furthest_point_sampling_kernel_wrapper( 84 | points.size(0), points.size(1), nsamples, points.data(), 85 | tmp.data(), output.data()); 86 | } else { 87 | AT_CHECK(false, "CPU not supported"); 88 | } 89 | 90 | return output; 91 | } 92 | -------------------------------------------------------------------------------- /pointnet2/_ext_src/src/sampling_gpu.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. 2 | // 3 | // This source code is licensed under the MIT license found in the 4 | // LICENSE file in the root directory of this source tree. 5 | 6 | #include 7 | #include 8 | 9 | #include "cuda_utils.h" 10 | 11 | // input: points(b, c, n) idx(b, m) 12 | // output: out(b, c, m) 13 | __global__ void gather_points_kernel(int b, int c, int n, int m, 14 | const float *__restrict__ points, 15 | const int *__restrict__ idx, 16 | float *__restrict__ out) { 17 | for (int i = blockIdx.x; i < b; i += gridDim.x) { 18 | for (int l = blockIdx.y; l < c; l += gridDim.y) { 19 | for (int j = threadIdx.x; j < m; j += blockDim.x) { 20 | int a = idx[i * m + j]; 21 | out[(i * c + l) * m + j] = points[(i * c + l) * n + a]; 22 | } 23 | } 24 | } 25 | } 26 | 27 | void gather_points_kernel_wrapper(int b, int c, int n, int npoints, 28 | const float *points, const int *idx, 29 | float *out) { 30 | gather_points_kernel<<>>(b, c, n, npoints, 32 | points, idx, out); 33 | 34 | CUDA_CHECK_ERRORS(); 35 | } 36 | 37 | // input: grad_out(b, c, m) idx(b, m) 38 | // output: grad_points(b, c, n) 39 | __global__ void gather_points_grad_kernel(int b, int c, int n, int m, 40 | const float *__restrict__ grad_out, 41 | const int *__restrict__ idx, 42 | float *__restrict__ grad_points) { 43 | for (int i = blockIdx.x; i < b; i += gridDim.x) { 44 | for (int l = blockIdx.y; l < c; l += gridDim.y) { 45 | for (int j = threadIdx.x; j < m; j += blockDim.x) { 46 | int a = idx[i * m + j]; 47 | atomicAdd(grad_points + (i * c + l) * n + a, 48 | grad_out[(i * c + l) * m + j]); 49 | } 50 | } 51 | } 52 | } 53 | 54 | void gather_points_grad_kernel_wrapper(int b, int c, int n, int npoints, 55 | const float *grad_out, const int *idx, 56 | float *grad_points) { 57 | gather_points_grad_kernel<<>>( 59 | b, c, n, npoints, grad_out, idx, grad_points); 60 | 61 | CUDA_CHECK_ERRORS(); 62 | } 63 | 64 | __device__ void __update(float *__restrict__ dists, int *__restrict__ dists_i, 65 | int idx1, int idx2) { 66 | const float v1 = dists[idx1], v2 = dists[idx2]; 67 | const int i1 = dists_i[idx1], i2 = dists_i[idx2]; 68 | dists[idx1] = max(v1, v2); 69 | dists_i[idx1] = v2 > v1 ? i2 : i1; 70 | } 71 | 72 | // Input dataset: (b, n, 3), tmp: (b, n) 73 | // Ouput idxs (b, m) 74 | template 75 | __global__ void furthest_point_sampling_kernel( 76 | int b, int n, int m, const float *__restrict__ dataset, 77 | float *__restrict__ temp, int *__restrict__ idxs) { 78 | if (m <= 0) return; 79 | __shared__ float dists[block_size]; 80 | __shared__ int dists_i[block_size]; 81 | 82 | int batch_index = blockIdx.x; 83 | dataset += batch_index * n * 3; 84 | temp += batch_index * n; 85 | idxs += batch_index * m; 86 | 87 | int tid = threadIdx.x; 88 | const int stride = block_size; 89 | 90 | int old = 0; 91 | if (threadIdx.x == 0) idxs[0] = old; 92 | 93 | __syncthreads(); 94 | for (int j = 1; j < m; j++) { 95 | int besti = 0; 96 | float best = -1; 97 | float x1 = dataset[old * 3 + 0]; 98 | float y1 = dataset[old * 3 + 1]; 99 | float z1 = dataset[old * 3 + 2]; 100 | for (int k = tid; k < n; k += stride) { 101 | float x2, y2, z2; 102 | x2 = dataset[k * 3 + 0]; 103 | y2 = dataset[k * 3 + 1]; 104 | z2 = dataset[k * 3 + 2]; 105 | float mag = (x2 * x2) + (y2 * y2) + (z2 * z2); 106 | if (mag <= 1e-3) continue; 107 | 108 | float d = 109 | (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1) + (z2 - z1) * (z2 - z1); 110 | 111 | float d2 = min(d, temp[k]); 112 | temp[k] = d2; 113 | besti = d2 > best ? k : besti; 114 | best = d2 > best ? d2 : best; 115 | } 116 | dists[tid] = best; 117 | dists_i[tid] = besti; 118 | __syncthreads(); 119 | 120 | if (block_size >= 512) { 121 | if (tid < 256) { 122 | __update(dists, dists_i, tid, tid + 256); 123 | } 124 | __syncthreads(); 125 | } 126 | if (block_size >= 256) { 127 | if (tid < 128) { 128 | __update(dists, dists_i, tid, tid + 128); 129 | } 130 | __syncthreads(); 131 | } 132 | if (block_size >= 128) { 133 | if (tid < 64) { 134 | __update(dists, dists_i, tid, tid + 64); 135 | } 136 | __syncthreads(); 137 | } 138 | if (block_size >= 64) { 139 | if (tid < 32) { 140 | __update(dists, dists_i, tid, tid + 32); 141 | } 142 | __syncthreads(); 143 | } 144 | if (block_size >= 32) { 145 | if (tid < 16) { 146 | __update(dists, dists_i, tid, tid + 16); 147 | } 148 | __syncthreads(); 149 | } 150 | if (block_size >= 16) { 151 | if (tid < 8) { 152 | __update(dists, dists_i, tid, tid + 8); 153 | } 154 | __syncthreads(); 155 | } 156 | if (block_size >= 8) { 157 | if (tid < 4) { 158 | __update(dists, dists_i, tid, tid + 4); 159 | } 160 | __syncthreads(); 161 | } 162 | if (block_size >= 4) { 163 | if (tid < 2) { 164 | __update(dists, dists_i, tid, tid + 2); 165 | } 166 | __syncthreads(); 167 | } 168 | if (block_size >= 2) { 169 | if (tid < 1) { 170 | __update(dists, dists_i, tid, tid + 1); 171 | } 172 | __syncthreads(); 173 | } 174 | 175 | old = dists_i[0]; 176 | if (tid == 0) idxs[j] = old; 177 | } 178 | } 179 | 180 | void furthest_point_sampling_kernel_wrapper(int b, int n, int m, 181 | const float *dataset, float *temp, 182 | int *idxs) { 183 | unsigned int n_threads = opt_n_threads(n); 184 | 185 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 186 | 187 | switch (n_threads) { 188 | case 512: 189 | furthest_point_sampling_kernel<512> 190 | <<>>(b, n, m, dataset, temp, idxs); 191 | break; 192 | case 256: 193 | furthest_point_sampling_kernel<256> 194 | <<>>(b, n, m, dataset, temp, idxs); 195 | break; 196 | case 128: 197 | furthest_point_sampling_kernel<128> 198 | <<>>(b, n, m, dataset, temp, idxs); 199 | break; 200 | case 64: 201 | furthest_point_sampling_kernel<64> 202 | <<>>(b, n, m, dataset, temp, idxs); 203 | break; 204 | case 32: 205 | furthest_point_sampling_kernel<32> 206 | <<>>(b, n, m, dataset, temp, idxs); 207 | break; 208 | case 16: 209 | furthest_point_sampling_kernel<16> 210 | <<>>(b, n, m, dataset, temp, idxs); 211 | break; 212 | case 8: 213 | furthest_point_sampling_kernel<8> 214 | <<>>(b, n, m, dataset, temp, idxs); 215 | break; 216 | case 4: 217 | furthest_point_sampling_kernel<4> 218 | <<>>(b, n, m, dataset, temp, idxs); 219 | break; 220 | case 2: 221 | furthest_point_sampling_kernel<2> 222 | <<>>(b, n, m, dataset, temp, idxs); 223 | break; 224 | case 1: 225 | furthest_point_sampling_kernel<1> 226 | <<>>(b, n, m, dataset, temp, idxs); 227 | break; 228 | default: 229 | furthest_point_sampling_kernel<512> 230 | <<>>(b, n, m, dataset, temp, idxs); 231 | } 232 | 233 | CUDA_CHECK_ERRORS(); 234 | } 235 | -------------------------------------------------------------------------------- /pointnet2/build/lib.linux-x86_64-3.6/pointnet2/_ext.cpython-36m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/pointnet2/build/lib.linux-x86_64-3.6/pointnet2/_ext.cpython-36m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /pointnet2/build/temp.linux-x86_64-3.6/_ext_src/src/ball_query.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/pointnet2/build/temp.linux-x86_64-3.6/_ext_src/src/ball_query.o -------------------------------------------------------------------------------- /pointnet2/build/temp.linux-x86_64-3.6/_ext_src/src/ball_query_gpu.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/pointnet2/build/temp.linux-x86_64-3.6/_ext_src/src/ball_query_gpu.o -------------------------------------------------------------------------------- /pointnet2/build/temp.linux-x86_64-3.6/_ext_src/src/bindings.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/pointnet2/build/temp.linux-x86_64-3.6/_ext_src/src/bindings.o -------------------------------------------------------------------------------- /pointnet2/build/temp.linux-x86_64-3.6/_ext_src/src/group_points.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/pointnet2/build/temp.linux-x86_64-3.6/_ext_src/src/group_points.o -------------------------------------------------------------------------------- /pointnet2/build/temp.linux-x86_64-3.6/_ext_src/src/group_points_gpu.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/pointnet2/build/temp.linux-x86_64-3.6/_ext_src/src/group_points_gpu.o -------------------------------------------------------------------------------- /pointnet2/build/temp.linux-x86_64-3.6/_ext_src/src/interpolate.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/pointnet2/build/temp.linux-x86_64-3.6/_ext_src/src/interpolate.o -------------------------------------------------------------------------------- /pointnet2/build/temp.linux-x86_64-3.6/_ext_src/src/interpolate_gpu.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/pointnet2/build/temp.linux-x86_64-3.6/_ext_src/src/interpolate_gpu.o -------------------------------------------------------------------------------- /pointnet2/build/temp.linux-x86_64-3.6/_ext_src/src/sampling.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/pointnet2/build/temp.linux-x86_64-3.6/_ext_src/src/sampling.o -------------------------------------------------------------------------------- /pointnet2/build/temp.linux-x86_64-3.6/_ext_src/src/sampling_gpu.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/pointnet2/build/temp.linux-x86_64-3.6/_ext_src/src/sampling_gpu.o -------------------------------------------------------------------------------- /pointnet2/dist/pointnet2-0.0.0-py3.6-linux-x86_64.egg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/pointnet2/dist/pointnet2-0.0.0-py3.6-linux-x86_64.egg -------------------------------------------------------------------------------- /pointnet2/pointnet2.egg-info/PKG-INFO: -------------------------------------------------------------------------------- 1 | Metadata-Version: 1.0 2 | Name: pointnet2 3 | Version: 0.0.0 4 | Summary: UNKNOWN 5 | Home-page: UNKNOWN 6 | Author: UNKNOWN 7 | Author-email: UNKNOWN 8 | License: UNKNOWN 9 | Description: UNKNOWN 10 | Platform: UNKNOWN 11 | -------------------------------------------------------------------------------- /pointnet2/pointnet2.egg-info/SOURCES.txt: -------------------------------------------------------------------------------- 1 | setup.py 2 | _ext_src/src/ball_query.cpp 3 | _ext_src/src/ball_query_gpu.cu 4 | _ext_src/src/bindings.cpp 5 | _ext_src/src/group_points.cpp 6 | _ext_src/src/group_points_gpu.cu 7 | _ext_src/src/interpolate.cpp 8 | _ext_src/src/interpolate_gpu.cu 9 | _ext_src/src/sampling.cpp 10 | _ext_src/src/sampling_gpu.cu 11 | pointnet2.egg-info/PKG-INFO 12 | pointnet2.egg-info/SOURCES.txt 13 | pointnet2.egg-info/dependency_links.txt 14 | pointnet2.egg-info/top_level.txt -------------------------------------------------------------------------------- /pointnet2/pointnet2.egg-info/dependency_links.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /pointnet2/pointnet2.egg-info/top_level.txt: -------------------------------------------------------------------------------- 1 | pointnet2 2 | -------------------------------------------------------------------------------- /pointnet2/pointnet2_test.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | ''' Testing customized ops. ''' 7 | 8 | import torch 9 | from torch.autograd import gradcheck 10 | import numpy as np 11 | 12 | import os 13 | import sys 14 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 15 | sys.path.append(BASE_DIR) 16 | import pointnet2_utils 17 | 18 | def test_interpolation_grad(): 19 | batch_size = 1 20 | feat_dim = 2 21 | m = 4 22 | feats = torch.randn(batch_size, feat_dim, m, requires_grad=True).float().cuda() 23 | 24 | def interpolate_func(inputs): 25 | idx = torch.from_numpy(np.array([[[0,1,2],[1,2,3]]])).int().cuda() 26 | weight = torch.from_numpy(np.array([[[1,1,1],[2,2,2]]])).float().cuda() 27 | interpolated_feats = pointnet2_utils.three_interpolate(inputs, idx, weight) 28 | return interpolated_feats 29 | 30 | assert (gradcheck(interpolate_func, feats, atol=1e-1, rtol=1e-1)) 31 | 32 | if __name__=='__main__': 33 | test_interpolation_grad() 34 | -------------------------------------------------------------------------------- /pointnet2/pytorch_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | ''' Modified based on Ref: https://github.com/erikwijmans/Pointnet2_PyTorch ''' 7 | import torch 8 | import torch.nn as nn 9 | from typing import List, Tuple 10 | 11 | class SharedMLP(nn.Sequential): 12 | 13 | def __init__( 14 | self, 15 | args: List[int], 16 | *, 17 | bn: bool = False, 18 | activation=nn.ReLU(inplace=True), 19 | preact: bool = False, 20 | first: bool = False, 21 | name: str = "" 22 | ): 23 | super().__init__() 24 | 25 | for i in range(len(args) - 1): 26 | self.add_module( 27 | name + 'layer{}'.format(i), 28 | Conv2d( 29 | args[i], 30 | args[i + 1], 31 | bn=(not first or not preact or (i != 0)) and bn, 32 | activation=activation 33 | if (not first or not preact or (i != 0)) else None, 34 | preact=preact 35 | ) 36 | ) 37 | 38 | 39 | class _BNBase(nn.Sequential): 40 | 41 | def __init__(self, in_size, batch_norm=None, name=""): 42 | super().__init__() 43 | self.add_module(name + "bn", batch_norm(in_size)) 44 | 45 | nn.init.constant_(self[0].weight, 1.0) 46 | nn.init.constant_(self[0].bias, 0) 47 | 48 | 49 | class BatchNorm1d(_BNBase): 50 | 51 | def __init__(self, in_size: int, *, name: str = ""): 52 | super().__init__(in_size, batch_norm=nn.BatchNorm1d, name=name) 53 | 54 | 55 | class BatchNorm2d(_BNBase): 56 | 57 | def __init__(self, in_size: int, name: str = ""): 58 | super().__init__(in_size, batch_norm=nn.BatchNorm2d, name=name) 59 | 60 | 61 | class BatchNorm3d(_BNBase): 62 | 63 | def __init__(self, in_size: int, name: str = ""): 64 | super().__init__(in_size, batch_norm=nn.BatchNorm3d, name=name) 65 | 66 | 67 | class _ConvBase(nn.Sequential): 68 | 69 | def __init__( 70 | self, 71 | in_size, 72 | out_size, 73 | kernel_size, 74 | stride, 75 | padding, 76 | activation, 77 | bn, 78 | init, 79 | conv=None, 80 | batch_norm=None, 81 | bias=True, 82 | preact=False, 83 | name="" 84 | ): 85 | super().__init__() 86 | 87 | bias = bias and (not bn) 88 | conv_unit = conv( 89 | in_size, 90 | out_size, 91 | kernel_size=kernel_size, 92 | stride=stride, 93 | padding=padding, 94 | bias=bias 95 | ) 96 | init(conv_unit.weight) 97 | if bias: 98 | nn.init.constant_(conv_unit.bias, 0) 99 | 100 | if bn: 101 | if not preact: 102 | bn_unit = batch_norm(out_size) 103 | else: 104 | bn_unit = batch_norm(in_size) 105 | 106 | if preact: 107 | if bn: 108 | self.add_module(name + 'bn', bn_unit) 109 | 110 | if activation is not None: 111 | self.add_module(name + 'activation', activation) 112 | 113 | self.add_module(name + 'conv', conv_unit) 114 | 115 | if not preact: 116 | if bn: 117 | self.add_module(name + 'bn', bn_unit) 118 | 119 | if activation is not None: 120 | self.add_module(name + 'activation', activation) 121 | 122 | 123 | class Conv1d(_ConvBase): 124 | 125 | def __init__( 126 | self, 127 | in_size: int, 128 | out_size: int, 129 | *, 130 | kernel_size: int = 1, 131 | stride: int = 1, 132 | padding: int = 0, 133 | activation=nn.ReLU(inplace=True), 134 | bn: bool = False, 135 | init=nn.init.kaiming_normal_, 136 | bias: bool = True, 137 | preact: bool = False, 138 | name: str = "" 139 | ): 140 | super().__init__( 141 | in_size, 142 | out_size, 143 | kernel_size, 144 | stride, 145 | padding, 146 | activation, 147 | bn, 148 | init, 149 | conv=nn.Conv1d, 150 | batch_norm=BatchNorm1d, 151 | bias=bias, 152 | preact=preact, 153 | name=name 154 | ) 155 | 156 | 157 | class Conv2d(_ConvBase): 158 | 159 | def __init__( 160 | self, 161 | in_size: int, 162 | out_size: int, 163 | *, 164 | kernel_size: Tuple[int, int] = (1, 1), 165 | stride: Tuple[int, int] = (1, 1), 166 | padding: Tuple[int, int] = (0, 0), 167 | activation=nn.ReLU(inplace=True), 168 | bn: bool = False, 169 | init=nn.init.kaiming_normal_, 170 | bias: bool = True, 171 | preact: bool = False, 172 | name: str = "" 173 | ): 174 | super().__init__( 175 | in_size, 176 | out_size, 177 | kernel_size, 178 | stride, 179 | padding, 180 | activation, 181 | bn, 182 | init, 183 | conv=nn.Conv2d, 184 | batch_norm=BatchNorm2d, 185 | bias=bias, 186 | preact=preact, 187 | name=name 188 | ) 189 | 190 | 191 | class Conv3d(_ConvBase): 192 | 193 | def __init__( 194 | self, 195 | in_size: int, 196 | out_size: int, 197 | *, 198 | kernel_size: Tuple[int, int, int] = (1, 1, 1), 199 | stride: Tuple[int, int, int] = (1, 1, 1), 200 | padding: Tuple[int, int, int] = (0, 0, 0), 201 | activation=nn.ReLU(inplace=True), 202 | bn: bool = False, 203 | init=nn.init.kaiming_normal_, 204 | bias: bool = True, 205 | preact: bool = False, 206 | name: str = "" 207 | ): 208 | super().__init__( 209 | in_size, 210 | out_size, 211 | kernel_size, 212 | stride, 213 | padding, 214 | activation, 215 | bn, 216 | init, 217 | conv=nn.Conv3d, 218 | batch_norm=BatchNorm3d, 219 | bias=bias, 220 | preact=preact, 221 | name=name 222 | ) 223 | 224 | 225 | class FC(nn.Sequential): 226 | 227 | def __init__( 228 | self, 229 | in_size: int, 230 | out_size: int, 231 | *, 232 | activation=nn.ReLU(inplace=True), 233 | bn: bool = False, 234 | init=None, 235 | preact: bool = False, 236 | name: str = "" 237 | ): 238 | super().__init__() 239 | 240 | fc = nn.Linear(in_size, out_size, bias=not bn) 241 | if init is not None: 242 | init(fc.weight) 243 | if not bn: 244 | nn.init.constant_(fc.bias, 0) 245 | 246 | if preact: 247 | if bn: 248 | self.add_module(name + 'bn', BatchNorm1d(in_size)) 249 | 250 | if activation is not None: 251 | self.add_module(name + 'activation', activation) 252 | 253 | self.add_module(name + 'fc', fc) 254 | 255 | if not preact: 256 | if bn: 257 | self.add_module(name + 'bn', BatchNorm1d(out_size)) 258 | 259 | if activation is not None: 260 | self.add_module(name + 'activation', activation) 261 | 262 | def set_bn_momentum_default(bn_momentum): 263 | 264 | def fn(m): 265 | if isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d)): 266 | m.momentum = bn_momentum 267 | 268 | return fn 269 | 270 | 271 | class BNMomentumScheduler(object): 272 | 273 | def __init__( 274 | self, model, bn_lambda, last_epoch=-1, 275 | setter=set_bn_momentum_default 276 | ): 277 | if not isinstance(model, nn.Module): 278 | raise RuntimeError( 279 | "Class '{}' is not a PyTorch nn Module".format( 280 | type(model).__name__ 281 | ) 282 | ) 283 | 284 | self.model = model 285 | self.setter = setter 286 | self.lmbd = bn_lambda 287 | 288 | self.step(last_epoch + 1) 289 | self.last_epoch = last_epoch 290 | 291 | def step(self, epoch=None): 292 | if epoch is None: 293 | epoch = self.last_epoch + 1 294 | 295 | self.last_epoch = epoch 296 | self.model.apply(self.setter(self.lmbd(epoch))) 297 | 298 | 299 | -------------------------------------------------------------------------------- /pointnet2/setup.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | from setuptools import setup 7 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 8 | import glob 9 | 10 | _ext_src_root = "_ext_src" 11 | _ext_sources = glob.glob("{}/src/*.cpp".format(_ext_src_root)) + glob.glob( 12 | "{}/src/*.cu".format(_ext_src_root) 13 | ) 14 | _ext_headers = glob.glob("{}/include/*".format(_ext_src_root)) 15 | 16 | setup( 17 | name='pointnet2', 18 | ext_modules=[ 19 | CUDAExtension( 20 | name='pointnet2._ext', 21 | sources=_ext_sources, 22 | extra_compile_args={ 23 | "cxx": ["-O2", "-I{}".format("{}/include".format(_ext_src_root))], 24 | "nvcc": ["-O2", "-I{}".format("{}/include".format(_ext_src_root))], 25 | }, 26 | ) 27 | ], 28 | cmdclass={ 29 | 'build_ext': BuildExtension 30 | } 31 | ) 32 | -------------------------------------------------------------------------------- /sunrgbd/drive-download-20200818T081036Z-001.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/sunrgbd/drive-download-20200818T081036Z-001.zip -------------------------------------------------------------------------------- /sunrgbd/drive-download-20200818T081133Z-001.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/sunrgbd/drive-download-20200818T081133Z-001.zip -------------------------------------------------------------------------------- /sunrgbd/model_util_sunrgbd.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import numpy as np 7 | import sys 8 | import os 9 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 10 | sys.path.append(BASE_DIR) 11 | ROOT_DIR = os.path.dirname(BASE_DIR) 12 | sys.path.append(os.path.join(ROOT_DIR, 'utils')) 13 | 14 | class SunrgbdDatasetConfig(object): 15 | def __init__(self): 16 | self.num_class = 10 17 | self.num_heading_bin = 12 18 | self.num_size_cluster = 10 19 | 20 | self.type2class={'bed':0, 'table':1, 'sofa':2, 'chair':3, 'toilet':4, 'desk':5, 'dresser':6, 'night_stand':7, 'bookshelf':8, 'bathtub':9} 21 | self.class2type = {self.type2class[t]:t for t in self.type2class} 22 | self.type2onehotclass={'bed':0, 'table':1, 'sofa':2, 'chair':3, 'toilet':4, 'desk':5, 'dresser':6, 'night_stand':7, 'bookshelf':8, 'bathtub':9} 23 | self.type_mean_size = {'bathtub': np.array([0.765840,1.398258,0.472728]), 24 | 'bed': np.array([2.114256,1.620300,0.927272]), 25 | 'bookshelf': np.array([0.404671,1.071108,1.688889]), 26 | 'chair': np.array([0.591958,0.552978,0.827272]), 27 | 'desk': np.array([0.695190,1.346299,0.736364]), 28 | 'dresser': np.array([0.528526,1.002642,1.172878]), 29 | 'night_stand': np.array([0.500618,0.632163,0.683424]), 30 | 'sofa': np.array([0.923508,1.867419,0.845495]), 31 | 'table': np.array([0.791118,1.279516,0.718182]), 32 | 'toilet': np.array([0.699104,0.454178,0.756250])} 33 | 34 | self.mean_size_arr = np.zeros((self.num_size_cluster, 3)) 35 | for i in range(self.num_size_cluster): 36 | self.mean_size_arr[i,:] = self.type_mean_size[self.class2type[i]] 37 | 38 | def size2class(self, size, type_name): 39 | ''' Convert 3D box size (l,w,h) to size class and size residual ''' 40 | size_class = self.type2class[type_name] 41 | size_residual = size - self.type_mean_size[type_name] 42 | return size_class, size_residual 43 | 44 | def class2size(self, pred_cls, residual): 45 | ''' Inverse function to size2class ''' 46 | mean_size = self.type_mean_size[self.class2type[pred_cls]] 47 | return mean_size + residual 48 | 49 | def angle2class(self, angle): 50 | ''' Convert continuous angle to discrete classDATASET_CONFIG 51 | [optinal] also small regression number from 52 | class center angle to current angle. 53 | 54 | angle is from 0-2pi (or -pi~pi), class center at 0, 1*(2pi/N), 2*(2pi/N) ... (N-1)*(2pi/N) 55 | return is class of int32 of 0,1,...,N-1 and a number such that 56 | class*(2pi/N) + number = angle 57 | ''' 58 | num_class = self.num_heading_bin 59 | angle = angle%(2*np.pi) 60 | assert(angle>=0 and angle<=2*np.pi) 61 | angle_per_class = 2*np.pi/float(num_class) 62 | shifted_angle = (angle+angle_per_class/2)%(2*np.pi) 63 | class_id = int(shifted_angle/angle_per_class) 64 | residual_angle = shifted_angle - (class_id*angle_per_class+angle_per_class/2) 65 | return class_id, residual_angle 66 | 67 | def class2angle(self, pred_cls, residual, to_label_format=True): 68 | ''' Inverse function to angle2class ''' 69 | num_class = self.num_heading_bin 70 | angle_per_class = 2*np.pi/float(num_class) 71 | angle_center = pred_cls * angle_per_class 72 | angle = angle_center + residual 73 | if to_label_format and angle>np.pi: 74 | angle = angle - 2*np.pi 75 | return angle 76 | 77 | def param2obb(self, center, heading_class, heading_residual, size_class, size_residual): 78 | heading_angle = self.class2angle(heading_class, heading_residual) 79 | box_size = self.class2size(int(size_class), size_residual) 80 | obb = np.zeros((7,)) 81 | obb[0:3] = center 82 | obb[3:6] = box_size 83 | obb[6] = heading_angle*-1 84 | return obb 85 | 86 | 87 | -------------------------------------------------------------------------------- /sunrgbd/sunrgbd_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | ''' Provides Python helper function to read My SUNRGBD dataset. 7 | 8 | Author: Charles R. Qi 9 | Date: October, 2017 10 | 11 | Updated by Charles R. Qi 12 | Date: December, 2018 13 | Note: removed basis loading. 14 | ''' 15 | import numpy as np 16 | import cv2 17 | import os 18 | import scipy.io as sio # to load .mat files for depth points 19 | 20 | type2class={'bed':0, 'table':1, 'sofa':2, 'chair':3, 'toilet':4, 'desk':5, 'dresser':6, 'night_stand':7, 'bookshelf':8, 'bathtub':9} 21 | class2type = {type2class[t]:t for t in type2class} 22 | 23 | 24 | def flip_axis_to_camera(pc): 25 | ''' Flip X-right,Y-forward,Z-up to X-right,Y-down,Z-forward 26 | Input and output are both (N,3) array 27 | ''' 28 | pc2 = np.copy(pc) 29 | pc2[:,[0,1,2]] = pc2[:,[0,2,1]] # cam X,Y,Z = depth X,-Z,Y 30 | pc2[:,1] *= -1 31 | return pc2 32 | 33 | def flip_axis_to_depth(pc): 34 | pc2 = np.copy(pc) 35 | pc2[:,[0,1,2]] = pc2[:,[0,2,1]] # depth X,Y,Z = cam X,Z,-Y 36 | pc2[:,2] *= -1 37 | return pc2 38 | 39 | 40 | class SUNObject3d(object): 41 | def __init__(self, line): 42 | data = line.split(' ') 43 | data[1:] = [float(x) for x in data[1:]] 44 | self.classname = data[0] 45 | self.xmin = data[1] 46 | self.ymin = data[2] 47 | self.xmax = data[1]+data[3] 48 | self.ymax = data[2]+data[4] 49 | self.box2d = np.array([self.xmin,self.ymin,self.xmax,self.ymax]) 50 | self.centroid = np.array([data[5],data[6],data[7]]) 51 | self.unused_dimension = np.array([data[8],data[9],data[10]]) 52 | self.w = data[8] 53 | self.l = data[9] 54 | self.h = data[10] 55 | self.orientation = np.zeros((3,)) 56 | self.orientation[0] = data[11] 57 | self.orientation[1] = data[12] 58 | self.heading_angle = -1 * np.arctan2(self.orientation[1], self.orientation[0]) 59 | 60 | class SUNRGBD_Calibration(object): 61 | ''' Calibration matrices and utils 62 | We define five coordinate system in SUN RGBD dataset 63 | 64 | camera coodinate: 65 | Z is forward, Y is downward, X is rightward 66 | 67 | depth coordinate: 68 | Just change axis order and flip up-down axis from camera coord 69 | 70 | upright depth coordinate: tilted depth coordinate by Rtilt such that Z is gravity direction, 71 | Z is up-axis, Y is forward, X is right-ward 72 | 73 | upright camera coordinate: 74 | Just change axis order and flip up-down axis from upright depth coordinate 75 | 76 | image coordinate: 77 | ----> x-axis (u) 78 | | 79 | v 80 | y-axis (v) 81 | 82 | depth points are stored in upright depth coordinate. 83 | labels for 3d box (basis, centroid, size) are in upright depth coordinate. 84 | 2d boxes are in image coordinate 85 | 86 | We generate frustum point cloud and 3d box in upright camera coordinate 87 | ''' 88 | 89 | def __init__(self, calib_filepath): 90 | lines = [line.rstrip() for line in open(calib_filepath)] 91 | Rtilt = np.array([float(x) for x in lines[0].split(' ')]) 92 | self.Rtilt = np.reshape(Rtilt, (3,3), order='F') 93 | K = np.array([float(x) for x in lines[1].split(' ')]) 94 | self.K = np.reshape(K, (3,3), order='F') 95 | self.f_u = self.K[0,0] 96 | self.f_v = self.K[1,1] 97 | self.c_u = self.K[0,2] 98 | self.c_v = self.K[1,2] 99 | 100 | def project_upright_depth_to_camera(self, pc): 101 | ''' project point cloud from depth coord to camera coordinate 102 | Input: (N,3) Output: (N,3) 103 | ''' 104 | # Project upright depth to depth coordinate 105 | pc2 = np.dot(np.transpose(self.Rtilt), np.transpose(pc[:,0:3])) # (3,n) 106 | return flip_axis_to_camera(np.transpose(pc2)) 107 | 108 | def project_upright_depth_to_image(self, pc): 109 | ''' Input: (N,3) Output: (N,2) UV and (N,) depth ''' 110 | pc2 = self.project_upright_depth_to_camera(pc) 111 | uv = np.dot(pc2, np.transpose(self.K)) # (n,3) 112 | uv[:,0] /= uv[:,2] 113 | uv[:,1] /= uv[:,2] 114 | return uv[:,0:2], pc2[:,2] 115 | 116 | def project_upright_depth_to_upright_camera(self, pc): 117 | return flip_axis_to_camera(pc) 118 | 119 | def project_upright_camera_to_upright_depth(self, pc): 120 | return flip_axis_to_depth(pc) 121 | 122 | def project_image_to_camera(self, uv_depth): 123 | n = uv_depth.shape[0] 124 | x = ((uv_depth[:,0]-self.c_u)*uv_depth[:,2])/self.f_u 125 | y = ((uv_depth[:,1]-self.c_v)*uv_depth[:,2])/self.f_v 126 | pts_3d_camera = np.zeros((n,3)) 127 | pts_3d_camera[:,0] = x 128 | pts_3d_camera[:,1] = y 129 | pts_3d_camera[:,2] = uv_depth[:,2] 130 | return pts_3d_camera 131 | 132 | def project_image_to_upright_camerea(self, uv_depth): 133 | pts_3d_camera = self.project_image_to_camera(uv_depth) 134 | pts_3d_depth = flip_axis_to_depth(pts_3d_camera) 135 | pts_3d_upright_depth = np.transpose(np.dot(self.Rtilt, np.transpose(pts_3d_depth))) 136 | return self.project_upright_depth_to_upright_camera(pts_3d_upright_depth) 137 | 138 | 139 | 140 | def rotx(t): 141 | """Rotation about the x-axis.""" 142 | c = np.cos(t) 143 | s = np.sin(t) 144 | return np.array([[1, 0, 0], 145 | [0, c, -s], 146 | [0, s, c]]) 147 | 148 | 149 | def roty(t): 150 | """Rotation about the y-axis.""" 151 | c = np.cos(t) 152 | s = np.sin(t) 153 | return np.array([[c, 0, s], 154 | [0, 1, 0], 155 | [-s, 0, c]]) 156 | 157 | 158 | def rotz(t): 159 | """Rotation about the z-axis.""" 160 | c = np.cos(t) 161 | s = np.sin(t) 162 | return np.array([[c, -s, 0], 163 | [s, c, 0], 164 | [0, 0, 1]]) 165 | 166 | 167 | def transform_from_rot_trans(R, t): 168 | """Transforation matrix from rotation matrix and translation vector.""" 169 | R = R.reshape(3, 3) 170 | t = t.reshape(3, 1) 171 | return np.vstack((np.hstack([R, t]), [0, 0, 0, 1])) 172 | 173 | 174 | def inverse_rigid_trans(Tr): 175 | """Inverse a rigid body transform matrix (3x4 as [R|t]) 176 | [R'|-R't; 0|1] 177 | """ 178 | inv_Tr = np.zeros_like(Tr) # 3x4 179 | inv_Tr[0:3,0:3] = np.transpose(Tr[0:3,0:3]) 180 | inv_Tr[0:3,3] = np.dot(-np.transpose(Tr[0:3,0:3]), Tr[0:3,3]) 181 | return inv_Tr 182 | 183 | def read_sunrgbd_label(label_filename): 184 | lines = [line.rstrip() for line in open(label_filename)] 185 | objects = [SUNObject3d(line) for line in lines] 186 | return objects 187 | 188 | def load_image(img_filename): 189 | return cv2.imread(img_filename) 190 | 191 | def load_depth_points(depth_filename): 192 | depth = np.loadtxt(depth_filename) 193 | return depth 194 | 195 | def load_depth_points_mat(depth_filename): 196 | depth = sio.loadmat(depth_filename)['instance'] 197 | return depth 198 | 199 | def random_shift_box2d(box2d, shift_ratio=0.1): 200 | ''' Randomly shift box center, randomly scale width and height 201 | ''' 202 | r = shift_ratio 203 | xmin,ymin,xmax,ymax = box2d 204 | h = ymax-ymin 205 | w = xmax-xmin 206 | cx = (xmin+xmax)/2.0 207 | cy = (ymin+ymax)/2.0 208 | cx2 = cx + w*r*(np.random.random()*2-1) 209 | cy2 = cy + h*r*(np.random.random()*2-1) 210 | h2 = h*(1+np.random.random()*2*r-r) # 0.9 to 1.1 211 | w2 = w*(1+np.random.random()*2*r-r) # 0.9 to 1.1 212 | return np.array([cx2-w2/2.0, cy2-h2/2.0, cx2+w2/2.0, cy2+h2/2.0]) 213 | 214 | def in_hull(p, hull): 215 | from scipy.spatial import Delaunay 216 | if not isinstance(hull,Delaunay): 217 | hull = Delaunay(hull) 218 | return hull.find_simplex(p)>=0 219 | 220 | def extract_pc_in_box3d(pc, box3d): 221 | ''' pc: (N,3), box3d: (8,3) ''' 222 | box3d_roi_inds = in_hull(pc[:,0:3], box3d) 223 | return pc[box3d_roi_inds,:], box3d_roi_inds 224 | 225 | 226 | def my_compute_box_3d(center, size, heading_angle): 227 | R = rotz(-1*heading_angle) 228 | l,w,h = size 229 | x_corners = [-l,l,l,-l,-l,l,l,-l] 230 | y_corners = [w,w,-w,-w,w,w,-w,-w] 231 | z_corners = [h,h,h,h,-h,-h,-h,-h] 232 | corners_3d = np.dot(R, np.vstack([x_corners, y_corners, z_corners])) 233 | corners_3d[0,:] += center[0] 234 | corners_3d[1,:] += center[1] 235 | corners_3d[2,:] += center[2] 236 | return np.transpose(corners_3d) 237 | 238 | 239 | def compute_box_3d(obj, calib): 240 | ''' Takes an object and a projection matrix (P) and projects the 3d 241 | bounding box into the image plane. 242 | Returns: 243 | corners_2d: (8,2) array in image coord. 244 | corners_3d: (8,3) array in in upright depth coord. 245 | ''' 246 | center = obj.centroid 247 | 248 | # compute rotational matrix around yaw axis 249 | R = rotz(-1*obj.heading_angle) 250 | #b,a,c = dimension 251 | #print R, a,b,c 252 | 253 | # 3d bounding box dimensions 254 | l = obj.l # along heading arrow 255 | w = obj.w # perpendicular to heading arrow 256 | h = obj.h 257 | 258 | # rotate and translate 3d bounding box 259 | x_corners = [-l,l,l,-l,-l,l,l,-l] 260 | y_corners = [w,w,-w,-w,w,w,-w,-w] 261 | z_corners = [h,h,h,h,-h,-h,-h,-h] 262 | corners_3d = np.dot(R, np.vstack([x_corners, y_corners, z_corners])) 263 | corners_3d[0,:] += center[0] 264 | corners_3d[1,:] += center[1] 265 | corners_3d[2,:] += center[2] 266 | 267 | # project the 3d bounding box into the image plane 268 | corners_2d,_ = calib.project_upright_depth_to_image(np.transpose(corners_3d)) 269 | #print 'corners_2d: ', corners_2d 270 | return corners_2d, np.transpose(corners_3d) 271 | 272 | def compute_orientation_3d(obj, calib): 273 | ''' Takes an object and a projection matrix (P) and projects the 3d 274 | object orientation vector into the image plane. 275 | Returns: 276 | orientation_2d: (2,2) array in image coord. 277 | orientation_3d: (2,3) array in depth coord. 278 | ''' 279 | 280 | # orientation in object coordinate system 281 | ori = obj.orientation 282 | orientation_3d = np.array([[0, ori[0]],[0, ori[1]],[0,0]]) 283 | center = obj.centroid 284 | orientation_3d[0,:] = orientation_3d[0,:] + center[0] 285 | orientation_3d[1,:] = orientation_3d[1,:] + center[1] 286 | orientation_3d[2,:] = orientation_3d[2,:] + center[2] 287 | 288 | # project orientation into the image plane 289 | orientation_2d,_ = calib.project_upright_depth_to_image(np.transpose(orientation_3d)) 290 | return orientation_2d, np.transpose(orientation_3d) 291 | 292 | def draw_projected_box3d(image, qs, color=(255,255,255), thickness=2): 293 | ''' Draw 3d bounding box in image 294 | qs: (8,2) array of vertices for the 3d box in following order: 295 | 1 -------- 0 296 | /| /| 297 | 2 -------- 3 . 298 | | | | | 299 | . 5 -------- 4 300 | |/ |/ 301 | 6 -------- 7 302 | ''' 303 | qs = qs.astype(np.int32) 304 | for k in range(0,4): 305 | #http://docs.enthought.com/mayavi/mayavi/auto/mlab_helper_functions.html 306 | i,j=k,(k+1)%4 307 | cv2.line(image, (qs[i,0],qs[i,1]), (qs[j,0],qs[j,1]), color, thickness, cv2.CV_AA) # use LINE_AA for opencv3 308 | 309 | i,j=k+4,(k+1)%4 + 4 310 | cv2.line(image, (qs[i,0],qs[i,1]), (qs[j,0],qs[j,1]), color, thickness, cv2.CV_AA) 311 | 312 | i,j=k,k+4 313 | cv2.line(image, (qs[i,0],qs[i,1]), (qs[j,0],qs[j,1]), color, thickness, cv2.CV_AA) 314 | return image 315 | 316 | 317 | import pickle 318 | import gzip 319 | 320 | def save_zipped_pickle(obj, filename, protocol=-1): 321 | with gzip.open(filename, 'wb') as f: 322 | pickle.dump(obj, f, protocol) 323 | 324 | def load_zipped_pickle(filename): 325 | with gzip.open(filename, 'rb') as f: 326 | loaded_object = pickle.load(f) 327 | return loaded_object 328 | -------------------------------------------------------------------------------- /test_run.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import numpy as np 4 | import argparse 5 | import importlib 6 | import time 7 | import torch 8 | import torch.nn as nn 9 | import torch.optim as optim 10 | 11 | 12 | parser = argparse.ArgumentParser() 13 | parser.add_argument('--num_point', type=int, default=20000, help='Point Number [default: 20000]') 14 | FLAGS = parser.parse_args() 15 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 16 | ROOT_DIR = BASE_DIR 17 | sys.path.append(os.path.join(ROOT_DIR, 'utils')) 18 | sys.path.append(os.path.join(ROOT_DIR, 'models')) 19 | 20 | from pc_util import random_sampling, read_ply 21 | from ap_helper import parse_predictions 22 | from sunrgbd_detection_dataset import DC # dataset config 23 | from results_save import save_results 24 | 25 | 26 | def preprocess_point_cloud(point_cloud): 27 | ''' Prepare the numpy point cloud (N,3) for forward pass ''' 28 | point_cloud = point_cloud[:,0:3] # do not use color for now 29 | floor_height = np.percentile(point_cloud[:,2],0.99) 30 | height = point_cloud[:,2] - floor_height 31 | point_cloud = np.concatenate([point_cloud, np.expand_dims(height, 1)],1) # (N,4) or (N,7) 32 | point_cloud = random_sampling(point_cloud, FLAGS.num_point) 33 | pc = np.expand_dims(point_cloud.astype(np.float32), 0) # (1,40000,4) 34 | return pc 35 | 36 | 37 | if __name__=='__main__': 38 | 39 | # Set file paths and dataset config 40 | demo_dir = os.path.join(BASE_DIR, 'demo_files') 41 | sys.path.append(os.path.join(ROOT_DIR, 'sunrgbd')) 42 | checkpoint_path = os.path.join(ROOT_DIR, 'checkpoint_sunrgbd.tar') 43 | eval_config_dict = {'remove_empty_box': True, 'use_3d_nms': True, 'nms_iou': 0.25, 44 | 'use_old_type_nms': False, 'cls_nms': False, 'per_class_proposal': False, 45 | 'conf_thresh': 0.5, 'dataset_config': DC} 46 | path_to_point_clouds='/content/drive/My Drive/3d_detector/point_clouds' 47 | # Init the model and optimzier 48 | MODEL = importlib.import_module('detectnet') # import network module 49 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 50 | net = MODEL.DetectNet(num_proposal=256, input_feature_dim=1, vote_factor=1, 51 | sampling='seed_fps', num_class=DC.num_class, 52 | num_heading_bin=DC.num_heading_bin, 53 | num_size_cluster=DC.num_size_cluster, 54 | mean_size_arr=DC.mean_size_arr).to(device) 55 | 56 | # Load checkpoint 57 | optimizer = optim.Adam(net.parameters(), lr=0.001) 58 | checkpoint = torch.load(checkpoint_path) 59 | net.load_state_dict(checkpoint['model_state_dict']) 60 | optimizer.load_state_dict(checkpoint['optimizer_state_dict']) 61 | epoch = checkpoint['epoch'] 62 | print("Loaded checkpoint %s "%(checkpoint_path)) 63 | 64 | 65 | for pc_path in os.listdir(path_to_point_clouds): 66 | # Load and preprocess input point cloud 67 | net.eval() # set model to eval mode (for bn and dp) 68 | point_cloud = read_ply(path_to_point_clouds+'/'+pc_path) 69 | pc = preprocess_point_cloud(point_cloud) 70 | print('Loaded point cloud data: %s'%(pc_path)) 71 | 72 | # Model inference 73 | inputs = {'point_clouds': torch.from_numpy(pc).to(device)} 74 | tic = time.time() 75 | with torch.no_grad(): 76 | end_points = net(inputs) 77 | toc = time.time() 78 | print('Inference time: %f'%(toc-tic)) 79 | end_points['point_clouds'] = inputs['point_clouds'] 80 | pred_map_cls = parse_predictions(end_points, eval_config_dict) 81 | # print(pred_map_cls) 82 | print('Finished detection. %d object detected.'%(len(pred_map_cls[0]))) 83 | dataset='sunrgbd' 84 | dump_dir = os.path.join(demo_dir, '%s_results'%(dataset)) 85 | if not os.path.exists(dump_dir): os.mkdir(dump_dir) 86 | save_results(end_points, dump_dir, DC, True) 87 | print('Results are saved in %s'%(dump_dir)) 88 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | 2 | """ Training routine for 3D object detection with SUN RGB-D.""" 3 | 4 | # Sample usage: 5 | # python train.py --log_dir log_sunrgbd 6 | 7 | 8 | import os 9 | import sys 10 | import numpy as np 11 | from datetime import datetime 12 | import argparse 13 | import importlib 14 | 15 | import torch 16 | import torch.nn as nn 17 | import torch.optim as optim 18 | from torch.optim import lr_scheduler 19 | from torch.utils.data import DataLoader 20 | 21 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 22 | ROOT_DIR = BASE_DIR 23 | sys.path.append(os.path.join(ROOT_DIR, 'utils')) 24 | sys.path.append(os.path.join(ROOT_DIR, 'pointnet2')) 25 | sys.path.append(os.path.join(ROOT_DIR, 'models')) 26 | from pytorch_utils import BNMomentumScheduler 27 | from tf_visualizer import log_writing 28 | from ap_helper import AP_Measurement, parse_predictions, parse_groundtruths 29 | from sunrgbd_detection_dataset import SunrgbdDetectionVotesDataset, MAX_NUM_OBJ 30 | from model_util_sunrgbd import SunrgbdDatasetConfig 31 | parser = argparse.ArgumentParser() 32 | parser.add_argument('--checkpoint_path', default=None, help='Model checkpoint path [default: None]') 33 | parser.add_argument('--log_dir', default='log', help='Dump dir to save model checkpoint [default: log]') 34 | parser.add_argument('--dump_dir', default=None, help='Dump dir to save sample outputs [default: None]') 35 | parser.add_argument('--num_point', type=int, default=20000, help='Point Number [default: 20000]') 36 | parser.add_argument('--num_target', type=int, default=256, help='Proposal number [default: 256]') 37 | parser.add_argument('--vote_factor', type=int, default=1, help='Vote factor [default: 1]') 38 | parser.add_argument('--cluster_sampling', default='vote_fps', help='Sampling strategy for vote clusters: vote_fps, seed_fps, random [default: vote_fps]') 39 | parser.add_argument('--ap_iou_thresh', type=float, default=0.25, help='AP IoU threshold [default: 0.25]') 40 | parser.add_argument('--max_epoch', type=int, default=190, help='Epoch to run [default: 180]') 41 | parser.add_argument('--batch_size', type=int, default=8, help='Batch Size during training [default: 8]') 42 | parser.add_argument('--learning_rate', type=float, default=0.001, help='Initial learning rate [default: 0.001]') 43 | parser.add_argument('--weight_decay', type=float, default=0, help='Optimization L2 weight decay [default: 0]') 44 | parser.add_argument('--bn_decay_step', type=int, default=20, help='Period of BN decay (in epochs) [default: 20]') 45 | parser.add_argument('--bn_decay_rate', type=float, default=0.5, help='Decay rate for BN decay [default: 0.5]') 46 | parser.add_argument('--lr_decay_steps', default='80,120,160', help='When to decay the learning rate (in epochs) [default: 80,120,160]') 47 | parser.add_argument('--lr_decay_rates', default='0.1,0.1,0.1', help='Decay rates for lr decay [default: 0.1,0.1,0.1]') 48 | parser.add_argument('--no_height', action='store_true', help='Do NOT use height signal in input.') 49 | parser.add_argument('--use_color', action='store_true', help='Use RGB color in input.') 50 | parser.add_argument('--use_sunrgbd_v2', action='store_true', help='Use V2 box labels for SUN RGB-D dataset') 51 | parser.add_argument('--dump_results', action='store_true', help='Dump results.') 52 | FLAGS = parser.parse_args() 53 | 54 | # ------------------------------------------------------------------------- GLOBAL CONFIG BEG 55 | BATCH_SIZE = FLAGS.batch_size 56 | NUM_POINT = FLAGS.num_point 57 | MAX_EPOCH = FLAGS.max_epoch 58 | BASE_LEARNING_RATE = FLAGS.learning_rate 59 | BN_DECAY_STEP = FLAGS.bn_decay_step 60 | BN_DECAY_RATE = FLAGS.bn_decay_rate 61 | LR_DECAY_STEPS = [int(x) for x in FLAGS.lr_decay_steps.split(',')] 62 | LR_DECAY_RATES = [float(x) for x in FLAGS.lr_decay_rates.split(',')] 63 | assert(len(LR_DECAY_STEPS)==len(LR_DECAY_RATES)) 64 | LOG_DIR = FLAGS.log_dir 65 | DEFAULT_DUMP_DIR = os.path.join(BASE_DIR, os.path.basename(LOG_DIR)) 66 | DUMP_DIR = FLAGS.dump_dir if FLAGS.dump_dir is not None else DEFAULT_DUMP_DIR 67 | DEFAULT_CHECKPOINT_PATH = os.path.join(LOG_DIR, 'checkpoint.tar') 68 | CHECKPOINT_PATH = FLAGS.checkpoint_path if FLAGS.checkpoint_path is not None \ 69 | else DEFAULT_CHECKPOINT_PATH 70 | FLAGS.DUMP_DIR = DUMP_DIR 71 | 72 | 73 | def get_current_lr(epoch): 74 | lr = BASE_LEARNING_RATE 75 | for i,lr_decay_epoch in enumerate(LR_DECAY_STEPS): 76 | if epoch >= lr_decay_epoch: 77 | lr *= LR_DECAY_RATES[i] 78 | return lr 79 | 80 | def adjust_learning_rate(optimizer, epoch): 81 | lr = get_current_lr(epoch) 82 | for param_group in optimizer.param_groups: 83 | param_group['lr'] = lr 84 | 85 | 86 | # Prepare LOG_DIR and DUMP_DIR 87 | if not os.path.exists(LOG_DIR): 88 | os.mkdir(LOG_DIR) 89 | 90 | LOG_FOUT = open(os.path.join(LOG_DIR, 'log_train.txt'), 'a') 91 | LOG_FOUT.write(str(FLAGS)+'\n') 92 | if not os.path.exists(DUMP_DIR): os.mkdir(DUMP_DIR) 93 | 94 | 95 | # Init datasets and dataloaders 96 | def my_worker_init_fn(worker_id): 97 | np.random.seed(np.random.get_state()[1][0] + worker_id) 98 | 99 | # Create Dataset and Dataloader 100 | sys.path.append(os.path.join(ROOT_DIR, 'sunrgbd')) 101 | 102 | DATASET_CONFIG = SunrgbdDatasetConfig() 103 | 104 | TRAIN_DATASET = SunrgbdDetectionVotesDataset('train', num_points=NUM_POINT, 105 | augment=True, 106 | use_color=FLAGS.use_color, use_height=(not FLAGS.no_height), 107 | use_v1=(not FLAGS.use_sunrgbd_v2)) 108 | TEST_DATASET = SunrgbdDetectionVotesDataset('val', num_points=NUM_POINT, 109 | augment=False, 110 | use_color=FLAGS.use_color, use_height=(not FLAGS.no_height), 111 | use_v1=(not FLAGS.use_sunrgbd_v2)) 112 | 113 | print('Training set',len(TRAIN_DATASET)) 114 | print('Test Set', len(TEST_DATASET)) 115 | 116 | TRAIN_DATALOADER = DataLoader(TRAIN_DATASET, batch_size=BATCH_SIZE, 117 | shuffle=True, num_workers=4, worker_init_fn=my_worker_init_fn) 118 | TEST_DATALOADER = DataLoader(TEST_DATASET, batch_size=BATCH_SIZE, 119 | shuffle=True, num_workers=4, worker_init_fn=my_worker_init_fn) 120 | 121 | print('Training Dataloader',len(TRAIN_DATALOADER)) 122 | print('Testing Dataloader', len(TEST_DATALOADER)) 123 | 124 | 125 | # Init the model and optimzier 126 | MODEL = importlib.import_module('detectnet') # import network module 127 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 128 | num_input_channel = int(FLAGS.use_color)*3 + int(not FLAGS.no_height)*1 129 | 130 | 131 | Detector = MODEL.DetectNet 132 | 133 | net = Detector(num_class=DATASET_CONFIG.num_class, 134 | num_heading_bin=DATASET_CONFIG.num_heading_bin, 135 | num_size_cluster=DATASET_CONFIG.num_size_cluster, 136 | mean_size_arr=DATASET_CONFIG.mean_size_arr, 137 | num_proposal=FLAGS.num_target, 138 | input_feature_dim=num_input_channel, 139 | vote_factor=FLAGS.vote_factor, 140 | sampling=FLAGS.cluster_sampling) 141 | 142 | 143 | net.to(device) 144 | criterion = MODEL.get_loss 145 | 146 | # Load the Adam optimizer 147 | optimizer = optim.Adam(net.parameters(), lr=BASE_LEARNING_RATE, weight_decay=FLAGS.weight_decay) 148 | 149 | # Load checkpoint if there is any 150 | it = -1 # for the initialize value of `LambdaLR` and `BNMomentumScheduler` 151 | start_epoch = 0 152 | if CHECKPOINT_PATH is not None and os.path.isfile(CHECKPOINT_PATH): 153 | checkpoint = torch.load(CHECKPOINT_PATH) 154 | net.load_state_dict(checkpoint['model_state_dict']) 155 | optimizer.load_state_dict(checkpoint['optimizer_state_dict']) 156 | start_epoch = checkpoint['epoch'] 157 | print("-> loaded checkpoint %s (epoch: %d)"%(CHECKPOINT_PATH, start_epoch)) 158 | 159 | # Decay Batchnorm momentum from 0.5 to 0.999 160 | # note: pytorch's BN momentum (default 0.1)= 1 - tensorflow's BN momentum 161 | BN_MOMENTUM_INIT = 0.5 162 | BN_MOMENTUM_MAX = 0.001 163 | bn_lbmd = lambda it: max(BN_MOMENTUM_INIT * BN_DECAY_RATE**(int(it / BN_DECAY_STEP)), BN_MOMENTUM_MAX) 164 | bnm_scheduler = BNMomentumScheduler(net, bn_lambda=bn_lbmd, last_epoch=start_epoch-1) 165 | 166 | 167 | 168 | # TFBoard Visualizers 169 | TRAIN_VISUALIZER = log_writing(FLAGS, 'train') 170 | TEST_VISUALIZER = log_writing(FLAGS, 'test') 171 | 172 | 173 | # Used for AP calculation 174 | CONFIG_DICT = {'remove_empty_box':False, 'use_3d_nms':True, 175 | 'nms_iou':0.25, 'use_old_type_nms':False, 'cls_nms':True, 176 | 'per_class_proposal': True, 'conf_thresh':0.05, 177 | 'dataset_config':DATASET_CONFIG} 178 | 179 | # ------------------------------------------------------------------------- GLOBAL CONFIG END 180 | 181 | def train_step(): 182 | stat_dict = {} # collect statistics 183 | adjust_learning_rate(optimizer, EPOCH_CNT) 184 | bnm_scheduler.step() # decay BN momentum 185 | net.train() # set model to training mode 186 | for batch_idx, batch_data_label in enumerate(TRAIN_DATALOADER): 187 | for key in batch_data_label: 188 | batch_data_label[key] = batch_data_label[key].to(device) 189 | 190 | # Forward pass 191 | optimizer.zero_grad() 192 | inputs = {'point_clouds': batch_data_label['point_clouds']} 193 | end_points = net(inputs) 194 | 195 | # Compute loss and gradients, update parameters. 196 | for key in batch_data_label: 197 | assert(key not in end_points) 198 | end_points[key] = batch_data_label[key] 199 | loss, end_points = criterion(end_points, DATASET_CONFIG) 200 | loss.backward() 201 | optimizer.step() 202 | 203 | # Accumulate statistics and print out 204 | for key in end_points: 205 | if 'loss' in key or 'acc' in key or 'ratio' in key: 206 | if key not in stat_dict: stat_dict[key] = 0 207 | stat_dict[key] += end_points[key].item() 208 | 209 | batch_interval = 10 210 | if (batch_idx+1) % batch_interval == 0: 211 | print(' ---- batch: %03d ----' % (batch_idx+1)) 212 | TRAIN_VISUALIZER.log_scalars({key:stat_dict[key]/batch_interval for key in stat_dict}, 213 | (EPOCH_CNT*len(TRAIN_DATALOADER)+batch_idx)*BATCH_SIZE) 214 | for key in sorted(stat_dict.keys()): 215 | print('mean %s: %f'%(key, stat_dict[key]/batch_interval)) 216 | stat_dict[key] = 0 217 | 218 | 219 | if __name__=='__main__': 220 | global EPOCH_CNT 221 | min_loss = 1e10 222 | loss = 0 223 | print((MAX_EPOCH)) 224 | for epoch in range(start_epoch, MAX_EPOCH): 225 | EPOCH_CNT = epoch 226 | print('Epoch: %03d' % (epoch)) 227 | np.random.seed() 228 | train_step() 229 | if EPOCH_CNT == 0 or EPOCH_CNT % 10 == 9: # Eval every 10 epochs 230 | stat_dict = {} # collect statistics 231 | ap_Measurement = AP_Measurement(ap_iou_thresh=FLAGS.ap_iou_thresh, 232 | class2type_map=DATASET_CONFIG.class2type) 233 | net.eval() # set model to eval mode (for bn and dp) 234 | for batch_idx, batch_data_label in enumerate(TEST_DATALOADER): 235 | if batch_idx % 10 == 0: 236 | print('Eval batch: %d' % (batch_idx)) 237 | for key in batch_data_label: 238 | batch_data_label[key] = batch_data_label[key].to(device) 239 | 240 | # Forward pass 241 | inputs = {'point_clouds': batch_data_label['point_clouds']} 242 | with torch.no_grad(): 243 | end_points = net(inputs) 244 | 245 | # Compute loss 246 | for key in batch_data_label: 247 | assert (key not in end_points) 248 | end_points[key] = batch_data_label[key] 249 | loss, end_points = criterion(end_points, DATASET_CONFIG) 250 | 251 | # Accumulate statistics and print out 252 | for key in end_points: 253 | if 'loss' in key or 'acc' in key or 'ratio' in key: 254 | if key not in stat_dict: stat_dict[key] = 0 255 | stat_dict[key] += end_points[key].item() 256 | 257 | batch_pred_map_cls = parse_predictions(end_points, CONFIG_DICT) 258 | batch_gt_map_cls = parse_groundtruths(end_points, CONFIG_DICT) 259 | ap_Measurement.step(batch_pred_map_cls, batch_gt_map_cls) 260 | 261 | # Dump evaluation results for visualization 262 | if FLAGS.dump_results and batch_idx == 0 and EPOCH_CNT % 10 == 0: 263 | MODEL.dump_results(end_points, DUMP_DIR, DATASET_CONFIG) 264 | 265 | # Log statistics 266 | TEST_VISUALIZER.log_scalars({key: stat_dict[key] / float(batch_idx + 1) for key in stat_dict}, 267 | (EPOCH_CNT + 1) * len(TRAIN_DATALOADER) * BATCH_SIZE) 268 | for key in sorted(stat_dict.keys()): 269 | print('eval mean %s: %f' % (key, stat_dict[key] / (float(batch_idx + 1)))) 270 | 271 | # Evaluate average precision 272 | metrics_dict = ap_Measurement.compute_metrics() 273 | for key in metrics_dict: 274 | print('eval %s: %f' % (key, metrics_dict[key])) 275 | 276 | mean_loss = stat_dict['loss'] / float(batch_idx + 1) 277 | # Save checkpoint 278 | save_dict = {'epoch': epoch + 1, # after training one epoch, the start_epoch should be epoch+1 279 | 'optimizer_state_dict': optimizer.state_dict(), 280 | 'loss': loss, 281 | } 282 | try: # with nn.DataParallel() the net is added as a submodule of DataParallel 283 | save_dict['model_state_dict'] = net.module.state_dict() 284 | except: 285 | save_dict['model_state_dict'] = net.state_dict() 286 | torch.save(save_dict, os.path.join(LOG_DIR, 'checkpoint.tar')) 287 | -------------------------------------------------------------------------------- /utils/__pycache__/box_util.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/utils/__pycache__/box_util.cpython-35.pyc -------------------------------------------------------------------------------- /utils/__pycache__/box_util.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/utils/__pycache__/box_util.cpython-36.pyc -------------------------------------------------------------------------------- /utils/__pycache__/eval_det.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/utils/__pycache__/eval_det.cpython-35.pyc -------------------------------------------------------------------------------- /utils/__pycache__/eval_det.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/utils/__pycache__/eval_det.cpython-36.pyc -------------------------------------------------------------------------------- /utils/__pycache__/metric_util.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/utils/__pycache__/metric_util.cpython-35.pyc -------------------------------------------------------------------------------- /utils/__pycache__/metric_util.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/utils/__pycache__/metric_util.cpython-36.pyc -------------------------------------------------------------------------------- /utils/__pycache__/nms.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/utils/__pycache__/nms.cpython-36.pyc -------------------------------------------------------------------------------- /utils/__pycache__/nn_distance.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/utils/__pycache__/nn_distance.cpython-35.pyc -------------------------------------------------------------------------------- /utils/__pycache__/nn_distance.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/utils/__pycache__/nn_distance.cpython-36.pyc -------------------------------------------------------------------------------- /utils/__pycache__/pc_util.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/utils/__pycache__/pc_util.cpython-35.pyc -------------------------------------------------------------------------------- /utils/__pycache__/pc_util.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/utils/__pycache__/pc_util.cpython-36.pyc -------------------------------------------------------------------------------- /utils/__pycache__/tf_logger.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/utils/__pycache__/tf_logger.cpython-35.pyc -------------------------------------------------------------------------------- /utils/__pycache__/tf_logger.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/utils/__pycache__/tf_logger.cpython-36.pyc -------------------------------------------------------------------------------- /utils/__pycache__/tf_visualizer.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/utils/__pycache__/tf_visualizer.cpython-35.pyc -------------------------------------------------------------------------------- /utils/__pycache__/tf_visualizer.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tayoshittu/3D-Object-Detection/9db4e6ab005bff76ca4a2eeb071dd40bd47bd664/utils/__pycache__/tf_visualizer.cpython-36.pyc -------------------------------------------------------------------------------- /utils/box_util.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | """ Helper functions for calculating 2D and 3D bounding box IoU. 7 | 8 | Collected and written by Charles R. Qi 9 | Last modified: Jul 2019 10 | """ 11 | from __future__ import print_function 12 | 13 | import numpy as np 14 | from scipy.spatial import ConvexHull 15 | 16 | def polygon_clip(subjectPolygon, clipPolygon): 17 | """ Clip a polygon with another polygon. 18 | 19 | Ref: https://rosettacode.org/wiki/Sutherland-Hodgman_polygon_clipping#Python 20 | 21 | Args: 22 | subjectPolygon: a list of (x,y) 2d points, any polygon. 23 | clipPolygon: a list of (x,y) 2d points, has to be *convex* 24 | Note: 25 | **points have to be counter-clockwise ordered** 26 | 27 | Return: 28 | a list of (x,y) vertex point for the intersection polygon. 29 | """ 30 | def inside(p): 31 | return(cp2[0]-cp1[0])*(p[1]-cp1[1]) > (cp2[1]-cp1[1])*(p[0]-cp1[0]) 32 | 33 | def computeIntersection(): 34 | dc = [ cp1[0] - cp2[0], cp1[1] - cp2[1] ] 35 | dp = [ s[0] - e[0], s[1] - e[1] ] 36 | n1 = cp1[0] * cp2[1] - cp1[1] * cp2[0] 37 | n2 = s[0] * e[1] - s[1] * e[0] 38 | n3 = 1.0 / (dc[0] * dp[1] - dc[1] * dp[0]) 39 | return [(n1*dp[0] - n2*dc[0]) * n3, (n1*dp[1] - n2*dc[1]) * n3] 40 | 41 | outputList = subjectPolygon 42 | cp1 = clipPolygon[-1] 43 | 44 | for clipVertex in clipPolygon: 45 | cp2 = clipVertex 46 | inputList = outputList 47 | outputList = [] 48 | s = inputList[-1] 49 | 50 | for subjectVertex in inputList: 51 | e = subjectVertex 52 | if inside(e): 53 | if not inside(s): 54 | outputList.append(computeIntersection()) 55 | outputList.append(e) 56 | elif inside(s): 57 | outputList.append(computeIntersection()) 58 | s = e 59 | cp1 = cp2 60 | if len(outputList) == 0: 61 | return None 62 | return(outputList) 63 | 64 | def poly_area(x,y): 65 | """ Ref: http://stackoverflow.com/questions/24467972/calculate-area-of-polygon-given-x-y-coordinates """ 66 | return 0.5*np.abs(np.dot(x,np.roll(y,1))-np.dot(y,np.roll(x,1))) 67 | 68 | def convex_hull_intersection(p1, p2): 69 | """ Compute area of two convex hull's intersection area. 70 | p1,p2 are a list of (x,y) tuples of hull vertices. 71 | return a list of (x,y) for the intersection and its volume 72 | """ 73 | inter_p = polygon_clip(p1,p2) 74 | if inter_p is not None: 75 | hull_inter = ConvexHull(inter_p) 76 | return inter_p, hull_inter.volume 77 | else: 78 | return None, 0.0 79 | 80 | def box3d_vol(corners): 81 | ''' corners: (8,3) no assumption on axis direction ''' 82 | a = np.sqrt(np.sum((corners[0,:] - corners[1,:])**2)) 83 | b = np.sqrt(np.sum((corners[1,:] - corners[2,:])**2)) 84 | c = np.sqrt(np.sum((corners[0,:] - corners[4,:])**2)) 85 | return a*b*c 86 | 87 | def is_clockwise(p): 88 | x = p[:,0] 89 | y = p[:,1] 90 | return np.dot(x,np.roll(y,1))-np.dot(y,np.roll(x,1)) > 0 91 | 92 | def box3d_iou(corners1, corners2): 93 | ''' Compute 3D bounding box IoU. 94 | 95 | Input: 96 | corners1: numpy array (8,3), assume up direction is negative Y 97 | corners2: numpy array (8,3), assume up direction is negative Y 98 | Output: 99 | iou: 3D bounding box IoU 100 | iou_2d: bird's eye view 2D bounding box IoU 101 | 102 | todo (rqi): add more description on corner points' orders. 103 | ''' 104 | # corner points are in counter clockwise order 105 | rect1 = [(corners1[i,0], corners1[i,2]) for i in range(3,-1,-1)] 106 | rect2 = [(corners2[i,0], corners2[i,2]) for i in range(3,-1,-1)] 107 | area1 = poly_area(np.array(rect1)[:,0], np.array(rect1)[:,1]) 108 | area2 = poly_area(np.array(rect2)[:,0], np.array(rect2)[:,1]) 109 | inter, inter_area = convex_hull_intersection(rect1, rect2) 110 | iou_2d = inter_area/(area1+area2-inter_area) 111 | ymax = min(corners1[0,1], corners2[0,1]) 112 | ymin = max(corners1[4,1], corners2[4,1]) 113 | inter_vol = inter_area * max(0.0, ymax-ymin) 114 | vol1 = box3d_vol(corners1) 115 | vol2 = box3d_vol(corners2) 116 | iou = inter_vol / (vol1 + vol2 - inter_vol) 117 | return iou, iou_2d 118 | 119 | 120 | def get_iou(bb1, bb2): 121 | """ 122 | Calculate the Intersection over Union (IoU) of two 2D bounding boxes. 123 | 124 | Parameters 125 | ---------- 126 | bb1 : dict 127 | Keys: {'x1', 'x2', 'y1', 'y2'} 128 | The (x1, y1) position is at the top left corner, 129 | the (x2, y2) position is at the bottom right corner 130 | bb2 : dict 131 | Keys: {'x1', 'x2', 'y1', 'y2'} 132 | The (x, y) position is at the top left corner, 133 | the (x2, y2) position is at the bottom right corner 134 | 135 | Returns 136 | ------- 137 | float 138 | in [0, 1] 139 | """ 140 | assert bb1['x1'] < bb1['x2'] 141 | assert bb1['y1'] < bb1['y2'] 142 | assert bb2['x1'] < bb2['x2'] 143 | assert bb2['y1'] < bb2['y2'] 144 | 145 | # determine the coordinates of the intersection rectangle 146 | x_left = max(bb1['x1'], bb2['x1']) 147 | y_top = max(bb1['y1'], bb2['y1']) 148 | x_right = min(bb1['x2'], bb2['x2']) 149 | y_bottom = min(bb1['y2'], bb2['y2']) 150 | 151 | if x_right < x_left or y_bottom < y_top: 152 | return 0.0 153 | 154 | # The intersection of two axis-aligned bounding boxes is always an 155 | # axis-aligned bounding box 156 | intersection_area = (x_right - x_left) * (y_bottom - y_top) 157 | 158 | # compute the area of both AABBs 159 | bb1_area = (bb1['x2'] - bb1['x1']) * (bb1['y2'] - bb1['y1']) 160 | bb2_area = (bb2['x2'] - bb2['x1']) * (bb2['y2'] - bb2['y1']) 161 | 162 | # compute the intersection over union by taking the intersection 163 | # area and dividing it by the sum of prediction + ground-truth 164 | # areas - the interesection area 165 | iou = intersection_area / float(bb1_area + bb2_area - intersection_area) 166 | assert iou >= 0.0 167 | assert iou <= 1.0 168 | return iou 169 | 170 | def box2d_iou(box1, box2): 171 | ''' Compute 2D bounding box IoU. 172 | 173 | Input: 174 | box1: tuple of (xmin,ymin,xmax,ymax) 175 | box2: tuple of (xmin,ymin,xmax,ymax) 176 | Output: 177 | iou: 2D IoU scalar 178 | ''' 179 | return get_iou({'x1':box1[0], 'y1':box1[1], 'x2':box1[2], 'y2':box1[3]}, \ 180 | {'x1':box2[0], 'y1':box2[1], 'x2':box2[2], 'y2':box2[3]}) 181 | 182 | # ----------------------------------------------------------- 183 | # Convert from box parameters to 184 | # ----------------------------------------------------------- 185 | def roty(t): 186 | """Rotation about the y-axis.""" 187 | c = np.cos(t) 188 | s = np.sin(t) 189 | return np.array([[c, 0, s], 190 | [0, 1, 0], 191 | [-s, 0, c]]) 192 | 193 | def roty_batch(t): 194 | """Rotation about the y-axis. 195 | t: (x1,x2,...xn) 196 | return: (x1,x2,...,xn,3,3) 197 | """ 198 | input_shape = t.shape 199 | output = np.zeros(tuple(list(input_shape)+[3,3])) 200 | c = np.cos(t) 201 | s = np.sin(t) 202 | output[...,0,0] = c 203 | output[...,0,2] = s 204 | output[...,1,1] = 1 205 | output[...,2,0] = -s 206 | output[...,2,2] = c 207 | return output 208 | 209 | 210 | def get_3d_box(box_size, heading_angle, center): 211 | ''' box_size is array(l,w,h), heading_angle is radius clockwise from pos x axis, center is xyz of box center 212 | output (8,3) array for 3D box cornders 213 | Similar to utils/compute_orientation_3d 214 | ''' 215 | R = roty(heading_angle) 216 | l,w,h = box_size 217 | x_corners = [l/2,l/2,-l/2,-l/2,l/2,l/2,-l/2,-l/2]; 218 | y_corners = [h/2,h/2,h/2,h/2,-h/2,-h/2,-h/2,-h/2]; 219 | z_corners = [w/2,-w/2,-w/2,w/2,w/2,-w/2,-w/2,w/2]; 220 | corners_3d = np.dot(R, np.vstack([x_corners,y_corners,z_corners])) 221 | corners_3d[0,:] = corners_3d[0,:] + center[0]; 222 | corners_3d[1,:] = corners_3d[1,:] + center[1]; 223 | corners_3d[2,:] = corners_3d[2,:] + center[2]; 224 | corners_3d = np.transpose(corners_3d) 225 | return corners_3d 226 | 227 | def get_3d_box_batch(box_size, heading_angle, center): 228 | ''' box_size: [x1,x2,...,xn,3] 229 | heading_angle: [x1,x2,...,xn] 230 | center: [x1,x2,...,xn,3] 231 | Return: 232 | [x1,x3,...,xn,8,3] 233 | ''' 234 | input_shape = heading_angle.shape 235 | R = roty_batch(heading_angle) 236 | l = np.expand_dims(box_size[...,0], -1) # [x1,...,xn,1] 237 | w = np.expand_dims(box_size[...,1], -1) 238 | h = np.expand_dims(box_size[...,2], -1) 239 | corners_3d = np.zeros(tuple(list(input_shape)+[8,3])) 240 | corners_3d[...,:,0] = np.concatenate((l/2,l/2,-l/2,-l/2,l/2,l/2,-l/2,-l/2), -1) 241 | corners_3d[...,:,1] = np.concatenate((h/2,h/2,h/2,h/2,-h/2,-h/2,-h/2,-h/2), -1) 242 | corners_3d[...,:,2] = np.concatenate((w/2,-w/2,-w/2,w/2,w/2,-w/2,-w/2,w/2), -1) 243 | tlist = [i for i in range(len(input_shape))] 244 | tlist += [len(input_shape)+1, len(input_shape)] 245 | corners_3d = np.matmul(corners_3d, np.transpose(R, tuple(tlist))) 246 | corners_3d += np.expand_dims(center, -2) 247 | return corners_3d 248 | 249 | if __name__=='__main__': 250 | 251 | # Function for polygon ploting 252 | import matplotlib 253 | from matplotlib.patches import Polygon 254 | from matplotlib.collections import PatchCollection 255 | import matplotlib.pyplot as plt 256 | def plot_polys(plist,scale=500.0): 257 | fig, ax = plt.subplots() 258 | patches = [] 259 | for p in plist: 260 | poly = Polygon(np.array(p)/scale, True) 261 | patches.append(poly) 262 | 263 | pc = PatchCollection(patches, cmap=matplotlib.cm.jet, alpha=0.5) 264 | colors = 100*np.random.rand(len(patches)) 265 | pc.set_array(np.array(colors)) 266 | ax.add_collection(pc) 267 | plt.show() 268 | 269 | # Demo on ConvexHull 270 | points = np.random.rand(30, 2) # 30 random points in 2-D 271 | hull = ConvexHull(points) 272 | # **In 2D "volume" is is area, "area" is perimeter 273 | print(('Hull area: ', hull.volume)) 274 | for simplex in hull.simplices: 275 | print(simplex) 276 | 277 | # Demo on convex hull overlaps 278 | sub_poly = [(0,0),(300,0),(300,300),(0,300)] 279 | clip_poly = [(150,150),(300,300),(150,450),(0,300)] 280 | inter_poly = polygon_clip(sub_poly, clip_poly) 281 | print(poly_area(np.array(inter_poly)[:,0], np.array(inter_poly)[:,1])) 282 | 283 | # Test convex hull interaction function 284 | rect1 = [(50,0),(50,300),(300,300),(300,0)] 285 | rect2 = [(150,150),(300,300),(150,450),(0,300)] 286 | plot_polys([rect1, rect2]) 287 | inter, area = convex_hull_intersection(rect1, rect2) 288 | print((inter, area)) 289 | if inter is not None: 290 | print(poly_area(np.array(inter)[:,0], np.array(inter)[:,1])) 291 | 292 | print('------------------') 293 | rect1 = [(0.30026005199835404, 8.9408694211408424), \ 294 | (-1.1571105364358421, 9.4686676477075533), \ 295 | (0.1777082043006144, 13.154404877812102), \ 296 | (1.6350787927348105, 12.626606651245391)] 297 | rect1 = [rect1[0], rect1[3], rect1[2], rect1[1]] 298 | rect2 = [(0.23908745901608636, 8.8551095691132886), \ 299 | (-1.2771419487733995, 9.4269062966181956), \ 300 | (0.13138836963152717, 13.161896351296868), \ 301 | (1.647617777421013, 12.590099623791961)] 302 | rect2 = [rect2[0], rect2[3], rect2[2], rect2[1]] 303 | plot_polys([rect1, rect2]) 304 | inter, area = convex_hull_intersection(rect1, rect2) 305 | print((inter, area)) 306 | -------------------------------------------------------------------------------- /utils/eval_det.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | """ Generic Code for Object Detection Evaluation 7 | 8 | Input: 9 | For each class: 10 | For each image: 11 | Predictions: box, score 12 | Groundtruths: box 13 | 14 | Output: 15 | For each class: 16 | precision-recal and average precision 17 | 18 | Author: Charles R. Qi 19 | 20 | Ref: https://raw.githubusercontent.com/rbgirshick/py-faster-rcnn/master/lib/datasets/voc_eval.py 21 | """ 22 | import numpy as np 23 | 24 | def voc_ap(rec, prec, use_07_metric=False): 25 | """ ap = voc_ap(rec, prec, [use_07_metric]) 26 | Compute VOC AP given precision and recall. 27 | If use_07_metric is true, uses the 28 | VOC 07 11 point method (default:False). 29 | """ 30 | if use_07_metric: 31 | # 11 point metric 32 | ap = 0. 33 | for t in np.arange(0., 1.1, 0.1): 34 | if np.sum(rec >= t) == 0: 35 | p = 0 36 | else: 37 | p = np.max(prec[rec >= t]) 38 | ap = ap + p / 11. 39 | else: 40 | # correct AP calculation 41 | # first append sentinel values at the end 42 | mrec = np.concatenate(([0.], rec, [1.])) 43 | mpre = np.concatenate(([0.], prec, [0.])) 44 | 45 | # compute the precision envelope 46 | for i in range(mpre.size - 1, 0, -1): 47 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) 48 | 49 | # to calculate area under PR curve, look for points 50 | # where X axis (recall) changes value 51 | i = np.where(mrec[1:] != mrec[:-1])[0] 52 | 53 | # and sum (\Delta recall) * prec 54 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) 55 | return ap 56 | 57 | import os 58 | import sys 59 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 60 | from metric_util import calc_iou # axis-aligned 3D box IoU 61 | def get_iou(bb1, bb2): 62 | """ Compute IoU of two bounding boxes. 63 | ** Define your bod IoU function HERE ** 64 | """ 65 | #pass 66 | iou3d = calc_iou(bb1, bb2) 67 | return iou3d 68 | 69 | from box_util import box3d_iou 70 | def get_iou_obb(bb1,bb2): 71 | iou3d, iou2d = box3d_iou(bb1,bb2) 72 | return iou3d 73 | 74 | def get_iou_main(get_iou_func, args): 75 | return get_iou_func(*args) 76 | 77 | def eval_det_cls(pred, gt, ovthresh=0.25, use_07_metric=False, get_iou_func=get_iou): 78 | """ Generic functions to compute precision/recall for object detection 79 | for a single class. 80 | Input: 81 | pred: map of {img_id: [(bbox, score)]} where bbox is numpy array 82 | gt: map of {img_id: [bbox]} 83 | ovthresh: scalar, iou threshold 84 | use_07_metric: bool, if True use VOC07 11 point method 85 | Output: 86 | rec: numpy array of length nd 87 | prec: numpy array of length nd 88 | ap: scalar, average precision 89 | """ 90 | 91 | # construct gt objects 92 | class_recs = {} # {img_id: {'bbox': bbox list, 'det': matched list}} 93 | npos = 0 94 | for img_id in gt.keys(): 95 | bbox = np.array(gt[img_id]) 96 | det = [False] * len(bbox) 97 | npos += len(bbox) 98 | class_recs[img_id] = {'bbox': bbox, 'det': det} 99 | # pad empty list to all other imgids 100 | for img_id in pred.keys(): 101 | if img_id not in gt: 102 | class_recs[img_id] = {'bbox': np.array([]), 'det': []} 103 | 104 | # construct dets 105 | image_ids = [] 106 | confidence = [] 107 | BB = [] 108 | for img_id in pred.keys(): 109 | for box,score in pred[img_id]: 110 | image_ids.append(img_id) 111 | confidence.append(score) 112 | BB.append(box) 113 | confidence = np.array(confidence) 114 | BB = np.array(BB) # (nd,4 or 8,3 or 6) 115 | 116 | # sort by confidence 117 | sorted_ind = np.argsort(-confidence) 118 | sorted_scores = np.sort(-confidence) 119 | BB = BB[sorted_ind, ...] 120 | image_ids = [image_ids[x] for x in sorted_ind] 121 | 122 | # go down dets and mark TPs and FPs 123 | nd = len(image_ids) 124 | tp = np.zeros(nd) 125 | fp = np.zeros(nd) 126 | for d in range(nd): 127 | #if d%100==0: print(d) 128 | R = class_recs[image_ids[d]] 129 | bb = BB[d,...].astype(float) 130 | ovmax = -np.inf 131 | BBGT = R['bbox'].astype(float) 132 | 133 | if BBGT.size > 0: 134 | # compute overlaps 135 | for j in range(BBGT.shape[0]): 136 | iou = get_iou_main(get_iou_func, (bb, BBGT[j,...])) 137 | if iou > ovmax: 138 | ovmax = iou 139 | jmax = j 140 | 141 | #print d, ovmax 142 | if ovmax > ovthresh: 143 | if not R['det'][jmax]: 144 | tp[d] = 1. 145 | R['det'][jmax] = 1 146 | else: 147 | fp[d] = 1. 148 | else: 149 | fp[d] = 1. 150 | 151 | # compute precision recall 152 | fp = np.cumsum(fp) 153 | tp = np.cumsum(tp) 154 | rec = tp / float(npos) 155 | #print('NPOS: ', npos) 156 | # avoid divide by zero in case the first detection matches a difficult 157 | # ground truth 158 | prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) 159 | ap = voc_ap(rec, prec, use_07_metric) 160 | 161 | return rec, prec, ap 162 | 163 | def eval_det_cls_wrapper(arguments): 164 | pred, gt, ovthresh, use_07_metric, get_iou_func = arguments 165 | rec, prec, ap = eval_det_cls(pred, gt, ovthresh, use_07_metric, get_iou_func) 166 | return (rec, prec, ap) 167 | 168 | def eval_det(pred_all, gt_all, ovthresh=0.25, use_07_metric=False, get_iou_func=get_iou): 169 | """ Generic functions to compute precision/recall for object detection 170 | for multiple classes. 171 | Input: 172 | pred_all: map of {img_id: [(classname, bbox, score)]} 173 | gt_all: map of {img_id: [(classname, bbox)]} 174 | ovthresh: scalar, iou threshold 175 | use_07_metric: bool, if true use VOC07 11 point method 176 | Output: 177 | rec: {classname: rec} 178 | prec: {classname: prec_all} 179 | ap: {classname: scalar} 180 | """ 181 | pred = {} # map {classname: pred} 182 | gt = {} # map {classname: gt} 183 | for img_id in pred_all.keys(): 184 | for classname, bbox, score in pred_all[img_id]: 185 | if classname not in pred: pred[classname] = {} 186 | if img_id not in pred[classname]: 187 | pred[classname][img_id] = [] 188 | if classname not in gt: gt[classname] = {} 189 | if img_id not in gt[classname]: 190 | gt[classname][img_id] = [] 191 | pred[classname][img_id].append((bbox,score)) 192 | for img_id in gt_all.keys(): 193 | for classname, bbox in gt_all[img_id]: 194 | if classname not in gt: gt[classname] = {} 195 | if img_id not in gt[classname]: 196 | gt[classname][img_id] = [] 197 | gt[classname][img_id].append(bbox) 198 | 199 | rec = {} 200 | prec = {} 201 | ap = {} 202 | for classname in gt.keys(): 203 | print('Computing AP for class: ', classname) 204 | rec[classname], prec[classname], ap[classname] = eval_det_cls(pred[classname], gt[classname], ovthresh, use_07_metric, get_iou_func) 205 | print(classname, ap[classname]) 206 | 207 | return rec, prec, ap 208 | 209 | from multiprocessing import Pool 210 | def eval_det_multiprocessing(pred_all, gt_all, ovthresh=0.25, use_07_metric=False, get_iou_func=get_iou): 211 | """ Generic functions to compute precision/recall for object detection 212 | for multiple classes. 213 | Input: 214 | pred_all: map of {img_id: [(classname, bbox, score)]} 215 | gt_all: map of {img_id: [(classname, bbox)]} 216 | ovthresh: scalar, iou threshold 217 | use_07_metric: bool, if true use VOC07 11 point method 218 | Output: 219 | rec: {classname: rec} 220 | prec: {classname: prec_all} 221 | ap: {classname: scalar} 222 | """ 223 | pred = {} # map {classname: pred} 224 | gt = {} # map {classname: gt} 225 | for img_id in pred_all.keys(): 226 | for classname, bbox, score in pred_all[img_id]: 227 | if classname not in pred: pred[classname] = {} 228 | if img_id not in pred[classname]: 229 | pred[classname][img_id] = [] 230 | if classname not in gt: gt[classname] = {} 231 | if img_id not in gt[classname]: 232 | gt[classname][img_id] = [] 233 | pred[classname][img_id].append((bbox,score)) 234 | for img_id in gt_all.keys(): 235 | for classname, bbox in gt_all[img_id]: 236 | if classname not in gt: gt[classname] = {} 237 | if img_id not in gt[classname]: 238 | gt[classname][img_id] = [] 239 | gt[classname][img_id].append(bbox) 240 | 241 | rec = {} 242 | prec = {} 243 | ap = {} 244 | p = Pool(processes=10) 245 | ret_values = p.map(eval_det_cls_wrapper, [(pred[classname], gt[classname], ovthresh, use_07_metric, get_iou_func) for classname in gt.keys() if classname in pred]) 246 | p.close() 247 | for i, classname in enumerate(gt.keys()): 248 | if classname in pred: 249 | rec[classname], prec[classname], ap[classname] = ret_values[i] 250 | else: 251 | rec[classname] = 0 252 | prec[classname] = 0 253 | ap[classname] = 0 254 | print(classname, ap[classname]) 255 | 256 | return rec, prec, ap 257 | -------------------------------------------------------------------------------- /utils/metric_util.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | """ Utility functions for metric evaluation. 7 | 8 | Author: Or Litany and Charles R. Qi 9 | """ 10 | 11 | import os 12 | import sys 13 | import torch 14 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 15 | sys.path.append(BASE_DIR) 16 | 17 | import numpy as np 18 | 19 | # Mesh IO 20 | import trimesh 21 | 22 | 23 | # ---------------------------------------- 24 | # Precision and Recall 25 | # ---------------------------------------- 26 | 27 | def multi_scene_precision_recall(labels, pred, iou_thresh, conf_thresh, label_mask, pred_mask=None): 28 | ''' 29 | Args: 30 | labels: (B, N, 6) 31 | pred: (B, M, 6) 32 | iou_thresh: scalar 33 | conf_thresh: scalar 34 | label_mask: (B, N,) with values in 0 or 1 to indicate which GT boxes to consider. 35 | pred_mask: (B, M,) with values in 0 or 1 to indicate which PRED boxes to consider. 36 | Returns: 37 | TP,FP,FN,Precision,Recall 38 | ''' 39 | # Make sure the masks are not Torch tensor, otherwise the mask==1 returns uint8 array instead 40 | # of True/False array as in numpy 41 | assert(not torch.is_tensor(label_mask)) 42 | assert(not torch.is_tensor(pred_mask)) 43 | TP, FP, FN = 0, 0, 0 44 | if label_mask is None: label_mask = np.ones((labels.shape[0], labels.shape[1])) 45 | if pred_mask is None: pred_mask = np.ones((pred.shape[0], pred.shape[1])) 46 | for batch_idx in range(labels.shape[0]): 47 | TP_i, FP_i, FN_i = single_scene_precision_recall(labels[batch_idx, label_mask[batch_idx,:]==1, :], 48 | pred[batch_idx, pred_mask[batch_idx,:]==1, :], 49 | iou_thresh, conf_thresh) 50 | TP += TP_i 51 | FP += FP_i 52 | FN += FN_i 53 | 54 | return TP, FP, FN, precision_recall(TP, FP, FN) 55 | 56 | 57 | def single_scene_precision_recall(labels, pred, iou_thresh, conf_thresh): 58 | """Compute P and R for predicted bounding boxes. Ignores classes! 59 | Args: 60 | labels: (N x bbox) ground-truth bounding boxes (6 dims) 61 | pred: (M x (bbox + conf)) predicted bboxes with confidence and maybe classification 62 | Returns: 63 | TP, FP, FN 64 | """ 65 | 66 | 67 | # for each pred box with high conf (C), compute IoU with all gt boxes. 68 | # TP = number of times IoU > th ; FP = C - TP 69 | # FN - number of scene objects without good match 70 | 71 | gt_bboxes = labels[:, :6] 72 | 73 | num_scene_bboxes = gt_bboxes.shape[0] 74 | conf = pred[:, 6] 75 | 76 | conf_pred_bbox = pred[np.where(conf > conf_thresh)[0], :6] 77 | num_conf_pred_bboxes = conf_pred_bbox.shape[0] 78 | 79 | # init an array to keep iou between generated and scene bboxes 80 | iou_arr = np.zeros([num_conf_pred_bboxes, num_scene_bboxes]) 81 | for g_idx in range(num_conf_pred_bboxes): 82 | for s_idx in range(num_scene_bboxes): 83 | iou_arr[g_idx, s_idx] = calc_iou(conf_pred_bbox[g_idx ,:], gt_bboxes[s_idx, :]) 84 | 85 | 86 | good_match_arr = (iou_arr >= iou_thresh) 87 | 88 | TP = good_match_arr.any(axis=1).sum() 89 | FP = num_conf_pred_bboxes - TP 90 | FN = num_scene_bboxes - good_match_arr.any(axis=0).sum() 91 | 92 | return TP, FP, FN 93 | 94 | 95 | def precision_recall(TP, FP, FN): 96 | Prec = 1.0 * TP / (TP + FP) if TP+FP>0 else 0 97 | Rec = 1.0 * TP / (TP + FN) 98 | return Prec, Rec 99 | 100 | 101 | def calc_iou(box_a, box_b): 102 | """Computes IoU of two axis aligned bboxes. 103 | Args: 104 | box_a, box_b: 6D of center and lengths 105 | Returns: 106 | iou 107 | """ 108 | 109 | max_a = box_a[0:3] + box_a[3:6]/2 110 | max_b = box_b[0:3] + box_b[3:6]/2 111 | min_max = np.array([max_a, max_b]).min(0) 112 | 113 | min_a = box_a[0:3] - box_a[3:6]/2 114 | min_b = box_b[0:3] - box_b[3:6]/2 115 | max_min = np.array([min_a, min_b]).max(0) 116 | if not ((min_max > max_min).all()): 117 | return 0.0 118 | 119 | intersection = (min_max - max_min).prod() 120 | vol_a = box_a[3:6].prod() 121 | vol_b = box_b[3:6].prod() 122 | union = vol_a + vol_b - intersection 123 | return 1.0*intersection / union 124 | 125 | 126 | if __name__ == '__main__': 127 | print('running some tests') 128 | 129 | ############ 130 | ## Test IoU 131 | ############ 132 | box_a = np.array([0,0,0,1,1,1]) 133 | box_b = np.array([0,0,0,2,2,2]) 134 | expected_iou = 1.0/8 135 | pred_iou = calc_iou(box_a, box_b) 136 | assert expected_iou == pred_iou, 'function returned wrong IoU' 137 | 138 | box_a = np.array([0,0,0,1,1,1]) 139 | box_b = np.array([10,10,10,2,2,2]) 140 | expected_iou = 0.0 141 | pred_iou = calc_iou(box_a, box_b) 142 | assert expected_iou == pred_iou, 'function returned wrong IoU' 143 | 144 | print('IoU test -- PASSED') 145 | 146 | ######################### 147 | ## Test Precition Recall 148 | ######################### 149 | gt_boxes = np.array([[0,0,0,1,1,1],[3, 0, 1, 1, 10, 1]]) 150 | detected_boxes = np.array([[0,0,0,1,1,1, 1.0],[3, 0, 1, 1, 10, 1, 0.9]]) 151 | TP, FP, FN = single_scene_precision_recall(gt_boxes, detected_boxes, 0.5, 0.5) 152 | assert TP == 2 and FP == 0 and FN == 0 153 | assert precision_recall(TP, FP, FN) == (1, 1) 154 | 155 | detected_boxes = np.array([[0,0,0,1,1,1, 1.0]]) 156 | TP, FP, FN = single_scene_precision_recall(gt_boxes, detected_boxes, 0.5, 0.5) 157 | assert TP == 1 and FP == 0 and FN == 1 158 | assert precision_recall(TP, FP, FN) == (1, 0.5) 159 | 160 | detected_boxes = np.array([[0,0,0,1,1,1, 1.0], [-1,-1,0,0.1,0.1,1, 1.0]]) 161 | TP, FP, FN = single_scene_precision_recall(gt_boxes, detected_boxes, 0.5, 0.5) 162 | assert TP == 1 and FP == 1 and FN == 1 163 | assert precision_recall(TP, FP, FN) == (0.5, 0.5) 164 | 165 | # wrong box has low confidence 166 | detected_boxes = np.array([[0,0,0,1,1,1, 1.0], [-1,-1,0,0.1,0.1,1, 0.1]]) 167 | TP, FP, FN = single_scene_precision_recall(gt_boxes, detected_boxes, 0.5, 0.5) 168 | assert TP == 1 and FP == 0 and FN == 1 169 | assert precision_recall(TP, FP, FN) == (1, 0.5) 170 | 171 | print('Precition Recall test -- PASSED') 172 | 173 | -------------------------------------------------------------------------------- /utils/nms.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import numpy as np 7 | from pc_util import bbox_corner_dist_measure 8 | 9 | # boxes are axis aigned 2D boxes of shape (n,5) in FLOAT numbers with (x1,y1,x2,y2,score) 10 | ''' Ref: https://www.pyimagesearch.com/2015/02/16/faster-non-maximum-suppression-python/ 11 | Ref: https://github.com/vickyboy47/nms-python/blob/master/nms.py 12 | ''' 13 | def nms_2d(boxes, overlap_threshold): 14 | x1 = boxes[:,0] 15 | y1 = boxes[:,1] 16 | x2 = boxes[:,2] 17 | y2 = boxes[:,3] 18 | score = boxes[:,4] 19 | area = (x2-x1)*(y2-y1) 20 | 21 | I = np.argsort(score) 22 | pick = [] 23 | while (I.size!=0): 24 | last = I.size 25 | i = I[-1] 26 | pick.append(i) 27 | suppress = [last-1] 28 | for pos in range(last-1): 29 | j = I[pos] 30 | xx1 = max(x1[i],x1[j]) 31 | yy1 = max(y1[i],y1[j]) 32 | xx2 = min(x2[i],x2[j]) 33 | yy2 = min(y2[i],y2[j]) 34 | w = xx2-xx1 35 | h = yy2-yy1 36 | if (w>0 and h>0): 37 | o = w*h/area[j] 38 | print('Overlap is', o) 39 | if (o>overlap_threshold): 40 | suppress.append(pos) 41 | I = np.delete(I,suppress) 42 | return pick 43 | 44 | def nms_2d_faster(boxes, overlap_threshold, old_type=False): 45 | x1 = boxes[:,0] 46 | y1 = boxes[:,1] 47 | x2 = boxes[:,2] 48 | y2 = boxes[:,3] 49 | score = boxes[:,4] 50 | area = (x2-x1)*(y2-y1) 51 | 52 | I = np.argsort(score) 53 | pick = [] 54 | while (I.size!=0): 55 | last = I.size 56 | i = I[-1] 57 | pick.append(i) 58 | 59 | xx1 = np.maximum(x1[i], x1[I[:last-1]]) 60 | yy1 = np.maximum(y1[i], y1[I[:last-1]]) 61 | xx2 = np.minimum(x2[i], x2[I[:last-1]]) 62 | yy2 = np.minimum(y2[i], y2[I[:last-1]]) 63 | 64 | w = np.maximum(0, xx2-xx1) 65 | h = np.maximum(0, yy2-yy1) 66 | 67 | if old_type: 68 | o = (w*h)/area[I[:last-1]] 69 | else: 70 | inter = w*h 71 | o = inter / (area[i] + area[I[:last-1]] - inter) 72 | 73 | I = np.delete(I, np.concatenate(([last-1], np.where(o>overlap_threshold)[0]))) 74 | 75 | return pick 76 | 77 | def nms_crnr_dist(boxes, conf, overlap_threshold): 78 | 79 | I = np.argsort(conf) 80 | pick = [] 81 | while (I.size!=0): 82 | last = I.size 83 | i = I[-1] 84 | pick.append(i) 85 | 86 | scores = [] 87 | for ind in I[:-1]: 88 | scores.append(bbox_corner_dist_measure(boxes[i,:], boxes[ind, :])) 89 | 90 | I = np.delete(I, np.concatenate(([last-1], np.where(np.array(scores)>overlap_threshold)[0]))) 91 | 92 | return pick 93 | 94 | -------------------------------------------------------------------------------- /utils/nn_distance.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | """ Chamfer distance in Pytorch. 7 | Author: Charles R. Qi 8 | """ 9 | 10 | import torch 11 | import torch.nn as nn 12 | import numpy as np 13 | 14 | 15 | def huber_loss(error, delta=1.0): 16 | """ 17 | Args: 18 | error: Torch tensor (d1,d2,...,dk) 19 | Returns: 20 | loss: Torch tensor (d1,d2,...,dk) 21 | 22 | x = error = pred - gt or dist(pred,gt) 23 | 0.5 * |x|^2 if |x|<=d 24 | 0.5 * d^2 + d * (|x|-d) if |x|>d 25 | Ref: https://github.com/charlesq34/frustum-pointnets/blob/master/models/model_util.py 26 | """ 27 | abs_error = torch.abs(error) 28 | #quadratic = torch.min(abs_error, torch.FloatTensor([delta])) 29 | quadratic = torch.clamp(abs_error, max=delta) 30 | linear = (abs_error - quadratic) 31 | loss = 0.5 * quadratic**2 + delta * linear 32 | return loss 33 | 34 | def nn_distance(pc1, pc2, l1smooth=False, delta=1.0, l1=False): 35 | """ 36 | Input: 37 | pc1: (B,N,C) torch tensor 38 | pc2: (B,M,C) torch tensor 39 | l1smooth: bool, whether to use l1smooth loss 40 | delta: scalar, the delta used in l1smooth loss 41 | Output: 42 | dist1: (B,N) torch float32 tensor 43 | idx1: (B,N) torch int64 tensor 44 | dist2: (B,M) torch float32 tensor 45 | idx2: (B,M) torch int64 tensor 46 | """ 47 | N = pc1.shape[1] 48 | M = pc2.shape[1] 49 | pc1_expand_tile = pc1.unsqueeze(2).repeat(1,1,M,1) 50 | pc2_expand_tile = pc2.unsqueeze(1).repeat(1,N,1,1) 51 | pc_diff = pc1_expand_tile - pc2_expand_tile 52 | 53 | if l1smooth: 54 | pc_dist = torch.sum(huber_loss(pc_diff, delta), dim=-1) # (B,N,M) 55 | elif l1: 56 | pc_dist = torch.sum(torch.abs(pc_diff), dim=-1) # (B,N,M) 57 | else: 58 | pc_dist = torch.sum(pc_diff**2, dim=-1) # (B,N,M) 59 | dist1, idx1 = torch.min(pc_dist, dim=2) # (B,N) 60 | dist2, idx2 = torch.min(pc_dist, dim=1) # (B,M) 61 | return dist1, idx1, dist2, idx2 62 | 63 | def demo_nn_distance(): 64 | np.random.seed(0) 65 | pc1arr = np.random.random((1,5,3)) 66 | pc2arr = np.random.random((1,6,3)) 67 | pc1 = torch.from_numpy(pc1arr.astype(np.float32)) 68 | pc2 = torch.from_numpy(pc2arr.astype(np.float32)) 69 | dist1, idx1, dist2, idx2 = nn_distance(pc1, pc2) 70 | print(dist1) 71 | print(idx1) 72 | dist = np.zeros((5,6)) 73 | for i in range(5): 74 | for j in range(6): 75 | dist[i,j] = np.sum((pc1arr[0,i,:] - pc2arr[0,j,:]) ** 2) 76 | print(dist) 77 | print('-'*30) 78 | print('L1smooth dists:') 79 | dist1, idx1, dist2, idx2 = nn_distance(pc1, pc2, True) 80 | print(dist1) 81 | print(idx1) 82 | dist = np.zeros((5,6)) 83 | for i in range(5): 84 | for j in range(6): 85 | error = np.abs(pc1arr[0,i,:] - pc2arr[0,j,:]) 86 | quad = np.minimum(error, 1.0) 87 | linear = error - quad 88 | loss = 0.5*quad**2 + 1.0*linear 89 | dist[i,j] = np.sum(loss) 90 | print(dist) 91 | 92 | 93 | if __name__ == '__main__': 94 | demo_nn_distance() 95 | -------------------------------------------------------------------------------- /utils/tf_logger.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | import tensorflow as tf 7 | import numpy as np 8 | import scipy.misc 9 | try: 10 | from StringIO import StringIO # Python 2.7 11 | except ImportError: 12 | from io import BytesIO # Python 3.x 13 | 14 | 15 | class Logger(object): 16 | 17 | def __init__(self, log_dir): 18 | """Create a summary writer logging to log_dir.""" 19 | self.writer = tf.summary.FileWriter(log_dir) 20 | 21 | def scalar_summary(self, tag, value, step): 22 | """Log a scalar variable.""" 23 | summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)]) 24 | self.writer.add_summary(summary, step) 25 | 26 | def image_summary(self, tag, images, step): 27 | """Log a list of images.""" 28 | 29 | img_summaries = [] 30 | for i, img in enumerate(images): 31 | # Write the image to a string 32 | try: 33 | s = StringIO() 34 | except: 35 | s = BytesIO() 36 | scipy.misc.toimage(img).save(s, format="png") 37 | 38 | # Create an Image object 39 | img_sum = tf.Summary.Image(encoded_image_string=s.getvalue(), 40 | height=img.shape[0], 41 | width=img.shape[1]) 42 | # Create a Summary value 43 | img_summaries.append(tf.Summary.Value(tag='%s/%d' % (tag, i), image=img_sum)) 44 | 45 | # Create and write Summary 46 | summary = tf.Summary(value=img_summaries) 47 | self.writer.add_summary(summary, step) 48 | 49 | def histo_summary(self, tag, values, step, bins=1000): 50 | """Log a histogram of the tensor of values.""" 51 | 52 | # Create a histogram using numpy 53 | counts, bin_edges = np.histogram(values, bins=bins) 54 | 55 | # Fill the fields of the histogram proto 56 | hist = tf.HistogramProto() 57 | hist.min = float(np.min(values)) 58 | hist.max = float(np.max(values)) 59 | hist.num = int(np.prod(values.shape)) 60 | hist.sum = float(np.sum(values)) 61 | hist.sum_squares = float(np.sum(values**2)) 62 | 63 | # Drop the start of the first bin 64 | bin_edges = bin_edges[1:] 65 | 66 | # Add bin edges and counts 67 | for edge in bin_edges: 68 | hist.bucket_limit.append(edge) 69 | for c in counts: 70 | hist.bucket.append(c) 71 | 72 | # Create and write Summary 73 | summary = tf.Summary(value=[tf.Summary.Value(tag=tag, histo=hist)]) 74 | self.writer.add_summary(summary, step) 75 | self.writer.flush() 76 | -------------------------------------------------------------------------------- /utils/tf_visualizer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # 3 | # This source code is licensed under the MIT license found in the 4 | # LICENSE file in the root directory of this source tree. 5 | 6 | '''Code adapted from https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix''' 7 | import os 8 | import time 9 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 10 | import sys 11 | sys.path.append(BASE_DIR) 12 | import tf_logger 13 | 14 | 15 | class log_writing(): 16 | def __init__(self, opt, name='train'): 17 | # self.opt = opt 18 | #self.logger = tf_logger.Logger(os.path.join(opt.logging_dir, opt.name)) 19 | #self.log_name = os.path.join(opt.checkpoint_dir, opt.name, 'loss_log.txt') 20 | self.logger = tf_logger.Logger(os.path.join(opt.log_dir, name)) 21 | self.log_name = os.path.join(opt.log_dir, 'tf_visualizer_log.txt') 22 | with open(self.log_name, "a") as log_file: 23 | now = time.strftime("%c") 24 | log_file.write('================ Training Loss (%s) ================\n' % now) 25 | 26 | # |visuals|: dictionary of images to save 27 | def log_images(self, visuals, step): 28 | for label, image_numpy in visuals.items(): 29 | self.logger.image_summary( 30 | label, [image_numpy], step) 31 | 32 | # scalars: dictionary of scalar labels and values 33 | def log_scalars(self, scalars, step): 34 | for label, val in scalars.items(): 35 | self.logger.scalar_summary(label, val, step) 36 | 37 | # scatter plots 38 | def plot_current_points(self, points, disp_offset=10): 39 | pass 40 | 41 | # scalars: same format as |scalars| of plot_current_scalars 42 | def print_current_scalars(self, epoch, i, scalars): 43 | message = '(epoch: %d, iters: %d) ' % (epoch, i) 44 | for k, v in scalars.items(): 45 | message += '%s: %.3f ' % (k, v) 46 | 47 | print(message) 48 | with open(self.log_name, "a") as log_file: 49 | log_file.write('%s\n' % message) 50 | -------------------------------------------------------------------------------- /visualize_ply.py: -------------------------------------------------------------------------------- 1 | import pyvista as pv 2 | from pyvista import examples 3 | filename='demo_files/sunrgbd_results/000000_pred_confident_nms_bbox.ply' 4 | mesh = pv.read(filename) 5 | cpos = mesh.plot() 6 | 7 | 8 | # Copyright (c) Facebook, Inc. and its affiliates. 9 | # 10 | # This source code is licensed under the MIT license found in the 11 | # LICENSE file in the root directory of this source tree. 12 | 13 | """ Batch mode in loading Scannet scenes with vertices and ground truth labels 14 | for semantic and instance segmentations 15 | Usage example: python ./batch_load_scannet_data.py 16 | """ 17 | # import os 18 | # import sys 19 | # import datetime 20 | # import numpy as np 21 | # import pdb 22 | # import matplotlib.pyplot as pyplot 23 | # import open3d as o3d 24 | # from scipy.spatial.distance import directed_hausdorff 25 | # import json 26 | # import pickle 27 | # import random 28 | # import scipy.io as sio 29 | # from pc_util import params2bbox, write_ply_rgb 30 | # 31 | # THRESH = 0 32 | # THRESH2 = -0.1 33 | # DATA_DIR = os.path.join('/home/bo/data/sunrgbd/sunrgbd_pc_bbox_votes_50k_v1_val') # path of sunrgbd dataset 34 | # VAL_SCAN_NAMES = sorted(list(set([os.path.basename(x)[0:6] for x in os.listdir(DATA_DIR)]))) 35 | # PRED_PATH= '/home/bo/projects/cvpr2020/detection/new/new/sunrgbd/code_sunrgbd/indoor_scene_understanding/dump_sunrgbd/result' # path of predictions 36 | # 37 | # DONOTCARE_CLASS_IDS = np.array([]) 38 | # MAX_NUM_POINT = 40000 39 | # mode = sys.argv[1] 40 | # 41 | # color_mapping = {1:[30,144,255], 2:[255,69,0], 3:[255,215,0], 4:[50,205,50], 5:[255,127,80], 42 | # 6:[255,20,147], 7:[100,149,237], 8:[255,127,80],9:[210,105,30], 10:[221,160,221],11:[95,158, 160]} 43 | # 44 | # def create_lineset_old(bbox, colors=[1, 0, 0]): 45 | # ''' create bounding box 46 | # ''' 47 | # xmin = bbox[0] - bbox[3] / 2 48 | # xmax = bbox[0] + bbox[3] / 2 49 | # ymin = bbox[1] - bbox[4] / 2 50 | # ymax = bbox[1] + bbox[4] / 2 51 | # zmin = bbox[2] - bbox[5] / 2 52 | # zmax = bbox[2] + bbox[5] / 2 53 | # points = [[xmin, ymin, zmin], [xmin, ymin, zmax], [xmin, ymax, zmin], [xmin, ymax, zmax], 54 | # [xmax, ymin, zmin], [xmax, ymin, zmax], [xmax, ymax, zmin], [xmax, ymax, zmax]] 55 | # lines = [[0, 1], [0, 2], [2, 3], [1, 3], [0, 4], [1, 5], [3, 7], [2, 6], 56 | # [4, 5], [5, 7], [6, 7], [4, 6]] 57 | # line_set = o3d.geometry.LineSet() 58 | # line_set.points = o3d.utility.Vector3dVector(points) 59 | # line_set.lines = o3d.utility.Vector2iVector(lines) 60 | # line_set.colors = o3d.utility.Vector3dVector(np.tile(colors, [12, 1])) 61 | # return line_set 62 | # 63 | # 64 | # def create_lineset(bbox, colors=[1, 0, 0]): 65 | # ''' create bounding box 66 | # ''' 67 | # points = params2bbox(bbox) 68 | # lines = [[0, 1], [0, 2], [2, 3], [1, 3], [0, 4], [1, 5], [3, 7], [2, 6], 69 | # [4, 5], [5, 7], [6, 7], [4, 6]] 70 | # line_set = o3d.geometry.LineSet() 71 | # line_set.points = o3d.utility.Vector3dVector(points) 72 | # line_set.lines = o3d.utility.Vector2iVector(lines) 73 | # line_set.colors = o3d.utility.Vector3dVector(np.tile(colors, [12, 1])) 74 | # return line_set 75 | # 76 | # 77 | # def load_view_point(pcd, filename, window_name): 78 | # if mode=='pred': 79 | # left = 50 80 | # top=50 81 | # elif mode=='gt': 82 | # left = 1000 83 | # top=730 84 | # else: 85 | # print("model must be gt or pred") 86 | # return 87 | # 88 | # vis = o3d.visualization.Visualizer() 89 | # vis.create_window(window_name, width=880, height=680, left=left, top=top) 90 | # for part in pcd: 91 | # vis.add_geometry(part) 92 | # ctr = vis.get_view_control() 93 | # current_param = ctr.convert_to_pinhole_camera_parameters() 94 | # trajectory = o3d.io.read_pinhole_camera_trajectory(filename) 95 | # f = 983.80485869912241 96 | # cx = current_param.intrinsic.width / 2 - 0.5 97 | # cy = current_param.intrinsic.height / 2 - 0.5 98 | # trajectory.parameters[0].intrinsic.set_intrinsics(current_param.intrinsic.width, current_param.intrinsic.height, f, f, cx, cy) 99 | # 100 | # ctr.convert_from_pinhole_camera_parameters(trajectory.parameters[0]) 101 | # vis.run() 102 | # vis.destroy_window() 103 | # 104 | # def select_bbox(bboxes): 105 | # choose_ids = [] 106 | # for i in range(bboxes.shape[0]): 107 | # if bboxes[i,-1] in OBJ_CLASS_IDS: 108 | # choose_ids.append(i) 109 | # bboxes = bboxes[choose_ids] 110 | # return bboxes 111 | # 112 | # def export_one_scan(scan_name): 113 | # pt = np.load(os.path.join(DATA_DIR, scan_name+'_pc.npz'))['pc'] 114 | # np.savetxt(mode+'tmp.xyz', pt) 115 | # os.system("mv {}tmp.xyz {}tmp.xyzrgb".format(mode, mode)) 116 | # point_cloud = o3d.io.read_point_cloud(mode+'tmp.xyzrgb') 117 | # 118 | # pred_proposals = np.load(os.path.join(PRED_PATH, 'center'+scan_name+'_nms.npy')) 119 | # gt_bbox = sio.loadmat(os.path.join(PRED_PATH, 'center'+scan_name+'_gt.mat'))['gt'] 120 | # bb =[] 121 | # if mode=='gt': 122 | # boundingboxes = gt_bbox 123 | # elif mode =='pred': 124 | # boundingboxes = pred_proposals 125 | # else: 126 | # print("model must be gt or pred") 127 | # return 128 | # for i in range(boundingboxes.shape[0]): 129 | # c = np.array(color_mapping[int(boundingboxes[i,-1])])/255.0 130 | # for _ in range(2): 131 | # bb.append(create_lineset(boundingboxes[i]+0.005*(np.random.rand()-0.5)*2, colors=c)) 132 | # load_view_point([point_cloud] + bb, './viewpoint.json', window_name=scan_name+'_'+mode) 133 | # 134 | # 135 | # def batch_export(): 136 | # for i, scan_name in enumerate(VAL_SCAN_NAMES): 137 | # if not scan_name.endswith('10'): 138 | # continue 139 | # print('-'*20+'begin') 140 | # print(scan_name) 141 | # export_one_scan(scan_name) 142 | # print('-'*20+'done') 143 | # 144 | # if __name__=='__main__': 145 | # batch_export() --------------------------------------------------------------------------------