├── .DS_Store
├── .gitignore
├── LICENSE
├── README.md
├── UR_Robot.py
├── color_image.png
├── command_test.sh
├── command_train.sh
├── dataset
    ├── generate_graspness.py
    ├── graspnet_dataset.py
    ├── simplify_dataset.py
    └── vis_graspness.py
├── depth_image.png
├── doc
    ├── example_data
    │   ├── color.png
    │   ├── demo_result.png
    │   ├── depth.png
    │   ├── meta.mat
    │   └── workspace_mask.png
    └── teaser.png
├── grasp_real.py
├── infer_vis_grasp.py
├── knn
    ├── .DS_Store
    ├── knn_modules.py
    ├── setup.py
    └── src
    │   ├── cpu
    │       ├── knn_cpu.cpp
    │       └── vision.h
    │   ├── cuda
    │       ├── knn.cu
    │       └── vision.h
    │   ├── knn.h
    │   └── vision.cpp
├── models
    ├── backbone_resunet14.py
    ├── graspnet.py
    ├── loss.py
    ├── modules.py
    └── resnet.py
├── pointnet2
    ├── .DS_Store
    ├── _ext_src
    │   ├── .DS_Store
    │   ├── include
    │   │   ├── ball_query.h
    │   │   ├── cuda_utils.h
    │   │   ├── cylinder_query.h
    │   │   ├── group_points.h
    │   │   ├── interpolate.h
    │   │   ├── sampling.h
    │   │   └── utils.h
    │   └── src
    │   │   ├── ball_query.cpp
    │   │   ├── ball_query_gpu.cu
    │   │   ├── bindings.cpp
    │   │   ├── cylinder_query.cpp
    │   │   ├── cylinder_query_gpu.cu
    │   │   ├── group_points.cpp
    │   │   ├── group_points_gpu.cu
    │   │   ├── interpolate.cpp
    │   │   ├── interpolate_gpu.cu
    │   │   ├── sampling.cpp
    │   │   └── sampling_gpu.cu
    ├── pointnet2_modules.py
    ├── pointnet2_utils.py
    ├── pytorch_utils.py
    └── setup.py
├── real
    ├── .DS_Store
    ├── __init__.py
    ├── calibrate.py
    ├── cam_pose
    │   ├── camera_depth_scale.txt
    │   └── camera_pose.txt
    ├── camera_depth_scale.txt
    ├── camera_pose.txt
    ├── capture.py
    ├── color_image.png
    ├── realsenseD415.py
    ├── realsenseD435.py
    ├── robotiq_gripper.py
    ├── test.py
    ├── testForRealSense.py
    ├── testGripper.py
    ├── testReceiveCommand.py
    └── touch.py
├── requirements (copy).txt
├── requirements.txt
├── test.py
├── train.py
└── utils
    ├── collision_detector.py
    ├── data_utils.py
    ├── label_generation.py
    └── loss_utils.py


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TX-Leo/Graspness_6dof_robotic_grasping/bf2d0977272831834ea0edde0d426954ad6c4c8d/.DS_Store


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | **/__pycache__/**
 2 | *.ipynb
 3 | **/.ipynb_checkpoints/**
 4 | *.npy
 5 | *.npz
 6 | **/.vscode/**
 7 | **/grasp_label*/**
 8 | **/log*/**
 9 | **/dump*/**
10 | **/build/**
11 | *.o
12 | *.so
13 | *.egg
14 | **/*.egg-info/**
15 | logs
16 | dataset/tolerance
17 | **/.idea/


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | GRASPNET-BASELINE
  2 | SOFTWARE LICENSE AGREEMENT
  3 | ACADEMIC OR NON-PROFIT ORGANIZATION NONCOMMERCIAL RESEARCH USE ONLY
  4 | 
  5 | BY USING OR DOWNLOADING THE SOFTWARE, YOU ARE AGREEING TO THE TERMS OF THIS LICENSE AGREEMENT.  IF YOU DO NOT AGREE WITH THESE TERMS, YOU MAY NOT USE OR DOWNLOAD THE SOFTWARE.
  6 | 
  7 | This is a license agreement ("Agreement") between your academic institution or non-profit organization or self (called "Licensee" or "You" in this Agreement) and Shanghai Jiao Tong University (called "Licensor" in this Agreement).  All rights not specifically granted to you in this Agreement are reserved for Licensor. 
  8 | 
  9 | RESERVATION OF OWNERSHIP AND GRANT OF LICENSE: 
 10 | Licensor retains exclusive ownership of any copy of the Software (as defined below) licensed under this Agreement and hereby grants to Licensee a personal, non-exclusive, 
 11 | non-transferable license to use the Software for noncommercial research purposes, without the right to sublicense, pursuant to the terms and conditions of this Agreement.  As used in this Agreement, the term "Software" means (i) the actual copy of all or any portion of code for program routines made accessible to Licensee by Licensor pursuant to this Agreement, inclusive of backups, updates, and/or merged copies permitted hereunder or subsequently supplied by Licensor,  including all or any file structures, programming instructions, user interfaces and screen formats and sequences as well as any and all documentation and instructions related to it, and (ii) all or any derivatives and/or modifications created or made by You to any of the items specified in (i).
 12 | 
 13 | CONFIDENTIALITY: Licensee acknowledges that the Software is proprietary to Licensor, and as such, Licensee agrees to receive all such materials in confidence and use the Software only in accordance with the terms of this Agreement.  Licensee agrees to use reasonable effort to protect the Software from unauthorized use, reproduction, distribution, or publication.
 14 | 
 15 | PERMITTED USES:  The Software may be used for your own noncommercial internal research purposes. You understand and agree that Licensor is not obligated to implement any suggestions and/or feedback you might provide regarding the Software, but to the extent Licensor does so, you are not entitled to any compensation related thereto.
 16 | 
 17 | DERIVATIVES: You may create derivatives of or make modifications to the Software, however, You agree that all and any such derivatives and modifications will be owned by Licensor and become a part of the Software licensed to You under this Agreement.  You may only use such derivatives and modifications for your own noncommercial internal research purposes, and you may not otherwise use, distribute or copy such derivatives and modifications in violation of this Agreement.
 18 | 
 19 | BACKUPS:  If Licensee is an organization, it may make that number of copies of the Software necessary for internal noncommercial use at a single site within its organization provided that all information appearing in or on the original labels, including the copyright and trademark notices are copied onto the labels of the copies.
 20 | 
 21 | USES NOT PERMITTED:  You may not distribute, copy or use the Software except as explicitly permitted herein. Licensee has not been granted any trademark license as part of this Agreement and may not use the name or mark “AlphaPose", "Shanghai Jiao Tong" or any renditions thereof without the prior written permission of Licensor.
 22 | 
 23 | You may not sell, rent, lease, sublicense, lend, time-share or transfer, in whole or in part, or provide third parties access to prior or present versions (or any parts thereof) of the Software.
 24 | 
 25 | ASSIGNMENT: You may not assign this Agreement or your rights hereunder without the prior written consent of Licensor. Any attempted assignment without such consent shall be null and void.
 26 | 
 27 | TERM: The term of the license granted by this Agreement is from Licensee's acceptance of this Agreement by downloading the Software or by using the Software until terminated as provided below.
 28 | 
 29 | The Agreement automatically terminates without notice if you fail to comply with any provision of this Agreement.  Licensee may terminate this Agreement by ceasing using the Software.  Upon any termination of this Agreement, Licensee will delete any and all copies of the Software. You agree that all provisions which operate to protect the proprietary rights of Licensor shall remain in force should breach occur and that the obligation of confidentiality described in this Agreement is binding in perpetuity and, as such, survives the term of the Agreement.
 30 | 
 31 | FEE: Provided Licensee abides completely by the terms and conditions of this Agreement, there is no fee due to Licensor for Licensee's use of the Software in accordance with this Agreement.
 32 | 
 33 | DISCLAIMER OF WARRANTIES:  THE SOFTWARE IS PROVIDED "AS-IS" WITHOUT WARRANTY OF ANY KIND INCLUDING ANY WARRANTIES OF PERFORMANCE OR MERCHANTABILITY OR FITNESS FOR A PARTICULAR USE OR PURPOSE OR OF NON-INFRINGEMENT.  LICENSEE BEARS ALL RISK RELATING TO QUALITY AND PERFORMANCE OF THE SOFTWARE AND RELATED MATERIALS.
 34 | 
 35 | SUPPORT AND MAINTENANCE: No Software support or training by the Licensor is provided as part of this Agreement.  
 36 | 
 37 | EXCLUSIVE REMEDY AND LIMITATION OF LIABILITY: To the maximum extent permitted under applicable law, Licensor shall not be liable for direct, indirect, special, incidental, or consequential damages or lost profits related to Licensee's use of and/or inability to use the Software, even if Licensor is advised of the possibility of such damage.
 38 | 
 39 | EXPORT REGULATION: Licensee agrees to comply with any and all applicable 
 40 | U.S. export control laws, regulations, and/or other laws related to embargoes and sanction programs administered by the Office of Foreign Assets Control.
 41 | 
 42 | SEVERABILITY: If any provision(s) of this Agreement shall be held to be invalid, illegal, or unenforceable by a court or other tribunal of competent jurisdiction, the validity, legality and enforceability of the remaining provisions shall not in any way be affected or impaired thereby.
 43 | 
 44 | NO IMPLIED WAIVERS: No failure or delay by Licensor in enforcing any right or remedy under this Agreement shall be construed as a waiver of any future or other exercise of such right or remedy by Licensor.
 45 | 
 46 | ENTIRE AGREEMENT AND AMENDMENTS: This Agreement constitutes the sole and entire agreement between Licensee and Licensor as to the matter set forth herein and supersedes any previous agreements, understandings, and arrangements between the parties relating hereto.
 47 | 
 48 | 
 49 | 
 50 | ************************************************************************
 51 | 
 52 | THIRD-PARTY SOFTWARE NOTICES AND INFORMATION
 53 | 
 54 | This project incorporates material from the project(s) listed below (collectively, "Third Party Code").  This Third Party Code is licensed to you under their original license terms set forth below.  We reserves all other rights not expressly granted, whether by implication, estoppel or otherwise.
 55 | 
 56 | 1. PyTorch (https://github.com/pytorch/pytorch)
 57 | 
 58 | THIRD-PARTY SOFTWARE NOTICES AND INFORMATION
 59 | 
 60 | This project incorporates material from the project(s) listed below (collectively, "Third Party Code").  This Third Party Code is licensed to you under their original license terms set forth below.  We reserves all other rights not expressly granted, whether by implication, estoppel or otherwise.
 61 | 
 62 | From PyTorch:
 63 | 
 64 | Copyright (c) 2016-     Facebook, Inc            (Adam Paszke)
 65 | Copyright (c) 2014-     Facebook, Inc            (Soumith Chintala)
 66 | Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert)
 67 | Copyright (c) 2012-2014 Deepmind Technologies    (Koray Kavukcuoglu)
 68 | Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu)
 69 | Copyright (c) 2011-2013 NYU                      (Clement Farabet)
 70 | Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, Iain Melvin, Jason Weston)
 71 | Copyright (c) 2006      Idiap Research Institute (Samy Bengio)
 72 | Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, Samy Bengio, Johnny Mariethoz)
 73 | 
 74 | From Caffe2:
 75 | 
 76 | Copyright (c) 2016-present, Facebook Inc. All rights reserved.
 77 | 
 78 | All contributions by Facebook:
 79 | Copyright (c) 2016 Facebook Inc.
 80 | 
 81 | All contributions by Google:
 82 | Copyright (c) 2015 Google Inc.
 83 | All rights reserved.
 84 | 
 85 | All contributions by Yangqing Jia:
 86 | Copyright (c) 2015 Yangqing Jia
 87 | All rights reserved.
 88 | 
 89 | All contributions by Kakao Brain:
 90 | Copyright 2019-2020 Kakao Brain
 91 | 
 92 | All contributions from Caffe:
 93 | Copyright(c) 2013, 2014, 2015, the respective contributors
 94 | All rights reserved.
 95 | 
 96 | All other contributions:
 97 | Copyright(c) 2015, 2016 the respective contributors
 98 | All rights reserved.
 99 | 
100 | Caffe2 uses a copyright model similar to Caffe: each contributor holds
101 | copyright over their contributions to Caffe2. The project versioning records
102 | all such contribution and copyright details. If a contributor wants to further
103 | mark their specific copyright on a particular contribution, they should
104 | indicate their copyright solely in the commit message of the change when it is
105 | committed.
106 | 
107 | All rights reserved.
108 | 
109 | Redistribution and use in source and binary forms, with or without
110 | modification, are permitted provided that the following conditions are met:
111 | 
112 | 1. Redistributions of source code must retain the above copyright
113 |    notice, this list of conditions and the following disclaimer.
114 | 
115 | 2. Redistributions in binary form must reproduce the above copyright
116 |    notice, this list of conditions and the following disclaimer in the
117 |    documentation and/or other materials provided with the distribution.
118 | 
119 | 3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories America
120 |    and IDIAP Research Institute nor the names of its contributors may be
121 |    used to endorse or promote products derived from this software without
122 |    specific prior written permission.
123 | 
124 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
125 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
126 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
127 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
128 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
129 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
130 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
131 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
132 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
133 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
134 | POSSIBILITY OF SUCH DAMAGE.
135 | 
136 | 2. VoteNet (https://github.com/facebookresearch/votenet)
137 | 
138 | MIT License
139 | 
140 | Copyright (c) Facebook, Inc. and its affiliates.
141 | 
142 | Permission is hereby granted, free of charge, to any person obtaining a copy
143 | of this software and associated documentation files (the "Software"), to deal
144 | in the Software without restriction, including without limitation the rights
145 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
146 | copies of the Software, and to permit persons to whom the Software is
147 | furnished to do so, subject to the following conditions:
148 | 
149 | The above copyright notice and this permission notice shall be included in all
150 | copies or substantial portions of the Software.
151 | 
152 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
153 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
154 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
155 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
156 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
157 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
158 | SOFTWARE.
159 | 
160 | ************END OF THIRD-PARTY SOFTWARE NOTICES AND INFORMATION**********
161 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 此机械臂6dof抓取算法是graspness，此版本是在原作者基础上加上自己的的一些配置和操作，
 2 | 
 3 | 主要添加了：
 4 | - grasp_real.py:抓取主函数
 5 | - real文件夹:存放一些配置文件等
 6 | - UR_Robot.py:机械臂运动库
 7 | 
 8 | 以下是原工程的README.md:
 9 | ---
10 | 
11 | # GraspNet graspness
12 | My implementation of paper "Graspness Discovery in Clutters for Fast and Accurate Grasp Detection" (ICCV 2021).
13 | 
14 | [[paper](https://openaccess.thecvf.com/content/ICCV2021/papers/Wang_Graspness_Discovery_in_Clutters_for_Fast_and_Accurate_Grasp_Detection_ICCV_2021_paper.pdf)]
15 | [[dataset](https://graspnet.net/)]
16 | [[API](https://github.com/graspnet/graspnetAPI)]
17 | 
18 | 
19 | ## Requirements
20 | - Python 3
21 | - PyTorch 1.8
22 | - Open3d 0.8
23 | - TensorBoard 2.3
24 | - NumPy
25 | - SciPy
26 | - Pillow
27 | - tqdm
28 | - MinkowskiEngine
29 | 
30 | ## Installation
31 | Get the code.
32 | ```bash
33 | git clone https://github.com/rhett-chen/graspness_implementation.git
34 | cd graspnet-graspness
35 | ```
36 | Install packages via Pip.
37 | ```bash
38 | pip install -r requirements.txt
39 | ```
40 | Compile and install pointnet2 operators (code adapted from [votenet](https://github.com/facebookresearch/votenet)).
41 | ```bash
42 | cd pointnet2
43 | python setup.py install
44 | ```
45 | Compile and install knn operator (code adapted from [pytorch_knn_cuda](https://github.com/chrischoy/pytorch_knn_cuda)).
46 | ```bash
47 | cd knn
48 | python setup.py install
49 | ```
50 | Install graspnetAPI for evaluation.
51 | ```bash
52 | git clone https://github.com/graspnet/graspnetAPI.git
53 | cd graspnetAPI
54 | pip install .
55 | ```
56 | For MinkowskiEngine, please refer https://github.com/NVIDIA/MinkowskiEngine
57 | ## Point level Graspness Generation
58 | Point level graspness label are not included in the original dataset, and need additional generation. Make sure you have downloaded the orginal dataset from [GraspNet](https://graspnet.net/). The generation code is in [dataset/generate_graspness.py](dataset/generate_graspness.py).
59 | ```bash
60 | cd dataset
61 | python generate_graspness.py --dataset_root /data3/graspnet --camera_type kinect
62 | ```
63 | 
64 | ## Simplify dataset
65 | original dataset grasp_label files have redundant data,  We can significantly save the memory cost. The code is in [dataset/simplify_dataset.py](dataset/simplify_dataset.py)
66 | ```bash
67 | cd dataset
68 | python simplify_dataset.py --dataset_root /data3/graspnet
69 | ```
70 | 
71 | ## Training and Testing
72 | Training examples are shown in [command_train.sh](command_train.sh). `--dataset_root`, `--camera` and `--log_dir` should be specified according to your settings. You can use TensorBoard to visualize training process.
73 | 
74 | Testing examples are shown in [command_test.sh](command_test.sh), which contains inference and result evaluation. `--dataset_root`, `--camera`, `--checkpoint_path` and `--dump_dir` should be specified according to your settings. Set `--collision_thresh` to -1 for fast inference.
75 | 
76 | If you need the trained weights, you can contact me directly.
77 | 
78 | ## Results
79 | Results "In repo" report the model performance of my results without collision detection.
80 | 
81 | Evaluation results on Kinect camera:
82 | |          |        | Seen             |                  |        | Similar          |                  |        | Novel            |                  | 
83 | |:--------:|:------:|:----------------:|:----------------:|:------:|:----------------:|:----------------:|:------:|:----------------:|:----------------:|
84 | |          | __AP__ | AP<sub>0.8</sub> | AP<sub>0.4</sub> | __AP__ | AP<sub>0.8</sub> | AP<sub>0.4</sub> | __AP__ | AP<sub>0.8</sub> | AP<sub>0.4</sub> |
85 | | In paper | 61.19  | 71.46            | 56.04            | 47.39  | 56.78            | 40.43            | 19.01  | 23.73            | 10.60             |
86 | | In repo  | 61.83  | 73.28            | 54.14            | 51.13  | 62.53            | 41.57            | 19.94  | 24.90            | 11.02             |
87 | 
88 | 
89 | ## Troubleshooting
90 | If you meet the torch.floor error in MinkowskiEngine, you can simply solve it by changing the source code of MinkowskiEngine: 
91 | MinkowskiEngine/utils/quantization.py 262，from discrete_coordinates =_auto_floor(coordinates) to discrete_coordinates = coordinates
92 | ## Acknowledgement
93 | My code is mainly based on Graspnet-baseline  https://github.com/graspnet/graspnet-baseline.
94 | 


--------------------------------------------------------------------------------
/color_image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TX-Leo/Graspness_6dof_robotic_grasping/bf2d0977272831834ea0edde0d426954ad6c4c8d/color_image.png


--------------------------------------------------------------------------------
/command_test.sh:
--------------------------------------------------------------------------------
1 | CUDA_VISIBLE_DEVICES=4 python test.py --camera kinect --dump_dir logs/log_kn/dump_epoch10 --checkpoint_path logs/log_kn/minkresunet_epoch10.tar --batch_size 1 --dataset_root /data3/graspnet --infer --eval --collision_thresh -1


--------------------------------------------------------------------------------
/command_train.sh:
--------------------------------------------------------------------------------
1 | CUDA_VISIBLE_DEVICES=4 python train.py --camera kinect --log_dir logs/log_kn --batch_size 4 --learning_rate 0.001 --model_name minkuresunet --dataset_root /data3/graspnet


--------------------------------------------------------------------------------
/dataset/generate_graspness.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import os
  3 | from PIL import Image
  4 | import scipy.io as scio
  5 | import sys
  6 | ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
  7 | sys.path.append(ROOT_DIR)
  8 | from utils.data_utils import get_workspace_mask, CameraInfo, create_point_cloud_from_depth_image
  9 | from knn.knn_modules import knn
 10 | import torch
 11 | from graspnetAPI.utils.xmlhandler import xmlReader
 12 | from graspnetAPI.utils.utils import get_obj_pose_list, transform_points
 13 | import argparse
 14 | 
 15 | 
 16 | parser = argparse.ArgumentParser()
 17 | parser.add_argument('--dataset_root', default='/media/randy/299D817A2D97AD94/xxw/graspnet_dataset')
 18 | parser.add_argument('--camera_type', default='realsense', help='Camera split [realsense/kinect]')
 19 | 
 20 | 
 21 | if __name__ == '__main__':
 22 |     cfgs = parser.parse_args()
 23 |     dataset_root = cfgs.dataset_root   # set dataset root
 24 |     camera_type = cfgs.camera_type   # kinect / realsense
 25 |     save_path_root = os.path.join(dataset_root, 'graspness')
 26 | 
 27 |     num_views, num_angles, num_depths = 300, 12, 4
 28 |     fric_coef_thresh = 0.8
 29 |     point_grasp_num = num_views * num_angles * num_depths
 30 |     for scene_id in range(3,100):
 31 |         save_path = os.path.join(save_path_root, 'scene_' + str(scene_id).zfill(4), camera_type)
 32 |         if not os.path.exists(save_path):
 33 |             os.makedirs(save_path)
 34 |         labels = np.load(
 35 |             os.path.join(dataset_root, 'collision_label', 'scene_' + str(scene_id).zfill(4), 'collision_labels.npz'))
 36 |         collision_dump = []
 37 |         for j in range(len(labels)):
 38 |             collision_dump.append(labels['arr_{}'.format(j)])
 39 | 
 40 |         for ann_id in range(256):
 41 |             # get scene point cloud
 42 |             print('generating scene: {} ann: {}'.format(scene_id, ann_id))
 43 |             depth = np.array(Image.open(os.path.join(dataset_root, 'scenes', 'scene_' + str(scene_id).zfill(4),
 44 |                                                      camera_type, 'depth', str(ann_id).zfill(4) + '.png')))
 45 |             seg = np.array(Image.open(os.path.join(dataset_root, 'scenes', 'scene_' + str(scene_id).zfill(4),
 46 |                                                    camera_type, 'label', str(ann_id).zfill(4) + '.png')))
 47 |             meta = scio.loadmat(os.path.join(dataset_root, 'scenes', 'scene_' + str(scene_id).zfill(4),
 48 |                                              camera_type, 'meta', str(ann_id).zfill(4) + '.mat'))
 49 |             intrinsic = meta['intrinsic_matrix']
 50 |             factor_depth = meta['factor_depth']
 51 |             camera = CameraInfo(1280.0, 720.0, intrinsic[0][0], intrinsic[1][1], intrinsic[0][2], intrinsic[1][2],
 52 |                                 factor_depth)
 53 |             cloud = create_point_cloud_from_depth_image(depth, camera, organized=True)
 54 | 
 55 |             # remove outlier and get objectness label
 56 |             depth_mask = (depth > 0)
 57 |             camera_poses = np.load(os.path.join(dataset_root, 'scenes', 'scene_' + str(scene_id).zfill(4),
 58 |                                                 camera_type, 'camera_poses.npy'))
 59 |             camera_pose = camera_poses[ann_id]
 60 |             align_mat = np.load(os.path.join(dataset_root, 'scenes', 'scene_' + str(scene_id).zfill(4),
 61 |                                              camera_type, 'cam0_wrt_table.npy'))
 62 |             trans = np.dot(align_mat, camera_pose)
 63 |             workspace_mask = get_workspace_mask(cloud, seg, trans=trans, organized=True, outlier=0.02)
 64 |             mask = (depth_mask & workspace_mask)
 65 |             cloud_masked = cloud[mask]
 66 |             objectness_label = seg[mask]
 67 | 
 68 |             # get scene object and grasp info
 69 |             scene_reader = xmlReader(os.path.join(dataset_root, 'scenes', 'scene_' + str(scene_id).zfill(4),
 70 |                                                   camera_type, 'annotations', '%04d.xml' % ann_id))
 71 |             pose_vectors = scene_reader.getposevectorlist()
 72 |             obj_list, pose_list = get_obj_pose_list(camera_pose, pose_vectors)
 73 |             grasp_labels = {}
 74 |             for i in obj_list:
 75 |                 file = np.load(os.path.join(dataset_root, 'grasp_label', '{}_labels.npz'.format(str(i).zfill(3))),allow_pickle=True)
 76 |                 grasp_labels[i] = (file['points'].astype(np.float32), file['offsets'].astype(np.float32),
 77 |                                    file['scores'].astype(np.float32))
 78 | 
 79 |             grasp_points = []
 80 |             grasp_points_graspness = []
 81 |             for i, (obj_idx, trans_) in enumerate(zip(obj_list, pose_list)):
 82 |                 sampled_points, offsets, fric_coefs = grasp_labels[obj_idx]
 83 |                 collision = collision_dump[i]  # Npoints * num_views * num_angles * num_depths
 84 |                 num_points = sampled_points.shape[0]
 85 | 
 86 |                 valid_grasp_mask = ((fric_coefs <= fric_coef_thresh) & (fric_coefs > 0) & ~collision)
 87 |                 valid_grasp_mask = valid_grasp_mask.reshape(num_points, -1)
 88 |                 graspness = np.sum(valid_grasp_mask, axis=1) / point_grasp_num
 89 |                 target_points = transform_points(sampled_points, trans_)
 90 |                 target_points = transform_points(target_points, np.linalg.inv(camera_pose))  # fix bug
 91 |                 grasp_points.append(target_points)
 92 |                 grasp_points_graspness.append(graspness.reshape(num_points, 1))
 93 |             grasp_points = np.vstack(grasp_points)
 94 |             grasp_points_graspness = np.vstack(grasp_points_graspness)
 95 | 
 96 |             grasp_points = torch.from_numpy(grasp_points).cuda()
 97 |             grasp_points_graspness = torch.from_numpy(grasp_points_graspness).cuda()
 98 |             grasp_points = grasp_points.transpose(0, 1).contiguous().unsqueeze(0)
 99 | 
100 |             masked_points_num = cloud_masked.shape[0]
101 |             cloud_masked_graspness = np.zeros((masked_points_num, 1))
102 |             part_num = int(masked_points_num / 10000)
103 |             for i in range(1, part_num + 2):   # lack of cuda memory
104 |                 if i == part_num + 1:
105 |                     cloud_masked_partial = cloud_masked[10000 * part_num:]
106 |                     if len(cloud_masked_partial) == 0:
107 |                         break
108 |                 else:
109 |                     cloud_masked_partial = cloud_masked[10000 * (i - 1):(i * 10000)]
110 |                 cloud_masked_partial = torch.from_numpy(cloud_masked_partial).cuda()
111 |                 cloud_masked_partial = cloud_masked_partial.transpose(0, 1).contiguous().unsqueeze(0)
112 |                 nn_inds = knn(grasp_points, cloud_masked_partial, k=1).squeeze() - 1
113 |                 cloud_masked_graspness[10000 * (i - 1):(i * 10000)] = torch.index_select(
114 |                     grasp_points_graspness, 0, nn_inds).cpu().numpy()
115 | 
116 |             max_graspness = np.max(cloud_masked_graspness)
117 |             min_graspness = np.min(cloud_masked_graspness)
118 |             cloud_masked_graspness = (cloud_masked_graspness - min_graspness) / (max_graspness - min_graspness)
119 | 
120 |             np.save(os.path.join(save_path, str(ann_id).zfill(4) + '.npy'), cloud_masked_graspness)
121 | 


--------------------------------------------------------------------------------
/dataset/simplify_dataset.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import os
 3 | import argparse
 4 | 
 5 | 
 6 | parser = argparse.ArgumentParser()
 7 | parser.add_argument('--dataset_root', default='/media/randy/299D817A2D97AD94/xxw/graspnet_dataset')
 8 | 
 9 | 
10 | def simplify_grasp_labels(root, save_path):
11 |     """
12 |     original dataset grasp_label files have redundant data,  We can significantly save the memory cost
13 |     """
14 |     obj_names = list(range(88))
15 |     if not os.path.exists(save_path):
16 |         os.makedirs(save_path)
17 |     for i in obj_names:
18 |         print('\nsimplifying object {}:'.format(i))
19 |         label = np.load(os.path.join(root, 'grasp_label', '{}_labels.npz'.format(str(i).zfill(3))))
20 |         point_num = len(label['points'])
21 |         print('original shape:               ', label['points'].shape, label['offsets'].shape, label['scores'].shape)
22 |         if point_num > 4820:
23 |             idxs = np.random.choice(point_num, 4820, False)
24 |             points = label['points'][idxs]
25 |             offsets = label['offsets'][idxs]
26 |             scores = label['scores'][idxs]
27 |             print('Warning!!!  down sample object {}'.format(i))
28 |         else:
29 |             points = label['points']
30 |             scores = label['scores']
31 |             offsets = label['offsets']
32 |         width = offsets[:, :, :, :, 2]
33 |         print('after simplify, offset shape: ', points.shape, scores.shape, width.shape)
34 |         np.savez(os.path.join(save_path, '{}_labels.npz'.format(str(i).zfill(3))),
35 |                  points=points, scores=scores, width=width)
36 | 
37 | 
38 | if __name__ == '__main__':
39 |     cfgs = parser.parse_args()
40 |     root = cfgs.dataset_root  # set root and save path
41 |     save_path = os.path.join(root, 'grasp_label_simplified')
42 |     simplify_grasp_labels(root, save_path)
43 | 
44 | 


--------------------------------------------------------------------------------
/dataset/vis_graspness.py:
--------------------------------------------------------------------------------
 1 | import open3d as o3d
 2 | import scipy.io as scio
 3 | from PIL import Image
 4 | import os
 5 | import numpy as np
 6 | import sys
 7 | ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 8 | sys.path.append(ROOT_DIR)
 9 | from utils.data_utils import get_workspace_mask, CameraInfo, create_point_cloud_from_depth_image
10 | 
11 | data_path = '/media/bot/980A6F5E0A6F38801/datasets/graspnet/'
12 | scene_id = 'scene_0060'
13 | ann_id = '0000'
14 | camera_type = 'realsense'
15 | color = np.array(Image.open(os.path.join(data_path, 'scenes', scene_id, camera_type, 'rgb', ann_id + '.png')), dtype=np.float32) / 255.0
16 | depth = np.array(Image.open(os.path.join(data_path, 'scenes', scene_id, camera_type, 'depth', ann_id + '.png')))
17 | seg = np.array(Image.open(os.path.join(data_path, 'scenes', scene_id, camera_type, 'label', ann_id + '.png')))
18 | meta = scio.loadmat(os.path.join(data_path, 'scenes', scene_id, camera_type, 'meta', ann_id + '.mat'))
19 | intrinsic = meta['intrinsic_matrix']
20 | factor_depth = meta['factor_depth']
21 | camera = CameraInfo(1280.0, 720.0, intrinsic[0][0], intrinsic[1][1], intrinsic[0][2], intrinsic[1][2], factor_depth)
22 | point_cloud = create_point_cloud_from_depth_image(depth, camera, organized=True)
23 | depth_mask = (depth > 0)
24 | camera_poses = np.load(os.path.join(data_path, 'scenes', scene_id, camera_type, 'camera_poses.npy'))
25 | align_mat = np.load(os.path.join(data_path, 'scenes', scene_id, camera_type, 'cam0_wrt_table.npy'))
26 | trans = np.dot(align_mat, camera_poses[int(ann_id)])
27 | workspace_mask = get_workspace_mask(point_cloud, seg, trans=trans, organized=True, outlier=0.02)
28 | mask = (depth_mask & workspace_mask)
29 | point_cloud = point_cloud[mask]
30 | color = color[mask]
31 | seg = seg[mask]
32 | 
33 | graspness_full = np.load(os.path.join(data_path, 'graspness', scene_id, camera_type, ann_id + '.npy')).squeeze()
34 | graspness_full[seg == 0] = 0.
35 | print('graspness full scene: ', graspness_full.shape, (graspness_full > 0.1).sum())
36 | color[graspness_full > 0.1] = [0., 1., 0.]
37 | 
38 | 
39 | cloud = o3d.geometry.PointCloud()
40 | cloud.points = o3d.utility.Vector3dVector(point_cloud.astype(np.float32))
41 | cloud.colors = o3d.utility.Vector3dVector(color.astype(np.float32))
42 | o3d.visualization.draw_geometries([cloud])
43 | 


--------------------------------------------------------------------------------
/depth_image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TX-Leo/Graspness_6dof_robotic_grasping/bf2d0977272831834ea0edde0d426954ad6c4c8d/depth_image.png


--------------------------------------------------------------------------------
/doc/example_data/color.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TX-Leo/Graspness_6dof_robotic_grasping/bf2d0977272831834ea0edde0d426954ad6c4c8d/doc/example_data/color.png


--------------------------------------------------------------------------------
/doc/example_data/demo_result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TX-Leo/Graspness_6dof_robotic_grasping/bf2d0977272831834ea0edde0d426954ad6c4c8d/doc/example_data/demo_result.png


--------------------------------------------------------------------------------
/doc/example_data/depth.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TX-Leo/Graspness_6dof_robotic_grasping/bf2d0977272831834ea0edde0d426954ad6c4c8d/doc/example_data/depth.png


--------------------------------------------------------------------------------
/doc/example_data/meta.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TX-Leo/Graspness_6dof_robotic_grasping/bf2d0977272831834ea0edde0d426954ad6c4c8d/doc/example_data/meta.mat


--------------------------------------------------------------------------------
/doc/example_data/workspace_mask.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TX-Leo/Graspness_6dof_robotic_grasping/bf2d0977272831834ea0edde0d426954ad6c4c8d/doc/example_data/workspace_mask.png


--------------------------------------------------------------------------------
/doc/teaser.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TX-Leo/Graspness_6dof_robotic_grasping/bf2d0977272831834ea0edde0d426954ad6c4c8d/doc/teaser.png


--------------------------------------------------------------------------------
/grasp_real.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import numpy as np
  4 | import argparse
  5 | from PIL import Image
  6 | import time
  7 | import scipy.io as scio
  8 | import torch
  9 | import open3d as o3d
 10 | from graspnetAPI.graspnet_eval import GraspGroup
 11 | 
 12 | ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
 13 | sys.path.append(ROOT_DIR)
 14 | sys.path.append(os.path.join(ROOT_DIR, 'utils'))
 15 | 
 16 | from models.graspnet import GraspNet, pred_decode
 17 | from dataset.graspnet_dataset import minkowski_collate_fn
 18 | from collision_detector import ModelFreeCollisionDetector
 19 | from data_utils import CameraInfo, create_point_cloud_from_depth_image, get_workspace_mask
 20 | from UR_Robot import UR_Robot
 21 |  
 22 | parser = argparse.ArgumentParser()
 23 | parser.add_argument('--checkpoint_path', default='np15000_graspness1e-1_bs4_lr1e-3_viewres_dataaug_fps_14D_epoch10.tar')
 24 | #default='realsense_1120_epoch10.tar')
 25 | 
 26 | parser.add_argument('--seed_feat_dim', default=512, type=int, help='Point wise feature dim')
 27 | parser.add_argument('--camera', default='realsense', help='Camera split [realsense/kinect]')
 28 | parser.add_argument('--num_point', type=int, default=15000, help='Point Number [default: 15000]')
 29 | parser.add_argument('--batch_size', type=int, default=1, help='Batch Size during inference [default: 1]')
 30 | parser.add_argument('--voxel_size', type=float, default=0.005, help='Voxel Size for sparse convolution')
 31 | parser.add_argument('--collision_thresh', type=float, default=0.01,
 32 |                     help='Collision Threshold in collision detection [default: 0.01]')
 33 | parser.add_argument('--voxel_size_cd', type=float, default=0.01, help='Voxel Size for collision detection')
 34 | cfgs = parser.parse_args()
 35 | 
 36 | def data_process(ur_robot):
 37 |     # load real data
 38 |     intrinsic = ur_robot.cam_intrinsics
 39 |     factor_depth = 1.0 / ur_robot.cam_depth_scale
 40 | 
 41 |     rgb, depth = ur_robot.get_camera_data()
 42 |     color = rgb / 255
 43 |     color = color[:, :, ::-1]
 44 |     depth = depth.astype(np.float32)
 45 |     # load data
 46 |     #root='/media/randy/299D817A2D97AD94/xxw/graspness'
 47 |     #color = np.array(Image.open(os.path.join(root, 'scenes', 'scene_0000', 'realsense', 'rgb', '0000' + '.png')))
 48 |     #depth = np.array(Image.open(os.path.join(root, 'scenes', 'scene_0000', 'realsense', 'depth', '0000'+ '.png')))
 49 |     #color = np.array(Image.open(os.path.join(root,'color_image.png')), dtype=np.float32)
 50 |     #depth = np.array(Image.open(os.path.join(root,'depth_image.png')), dtype=np.float32)
 51 |     workspace_mask = np.array(Image.open(os.path.join('doc/example_data', 'workspace_mask.png'))) #[720,1280][241false,978false]
 52 |     camera = CameraInfo(1280.0, 720.0, intrinsic[0][0], intrinsic[1][1], intrinsic[0][2], intrinsic[1][2],
 53 |                         factor_depth)
 54 |     # compute workspace limits
 55 |     x_left_up,y_left_up = 0+25,242+25
 56 |     x_right_bottom,y_right_bottom = 719-25,977-25
 57 |     point_z = depth[x_left_up,y_left_up] / camera.scale
 58 |     point_x = (y_left_up - camera.cx) * point_z / camera.fx
 59 |     point_y = (x_left_up - camera.cy) * point_z / camera.fy
 60 |     point_left_up = (point_x,point_y,point_z)
 61 |     point_z = depth[x_right_bottom,y_right_bottom] / camera.scale
 62 |     point_x = (y_right_bottom - camera.cx) * point_z / camera.fx
 63 |     point_y = (x_right_bottom - camera.cy) * point_z / camera.fy
 64 |     point_right_bottom = (point_x, point_y, point_z)
 65 | 
 66 |     # generate cloud
 67 |     cloud = create_point_cloud_from_depth_image(depth, camera, organized=True)#720,1080,3
 68 | 
 69 |     # get valid points
 70 |     depth_mask = (depth > 0)
 71 |     # camera_poses = np.load(os.path.join(root, 'scenes', scene_id, camera_type, 'camera_poses.npy'))
 72 |     # align_mat = np.load(os.path.join(root, 'scenes', scene_id, camera_type, 'cam0_wrt_table.npy'))
 73 |     # trans = np.dot(align_mat, camera_poses[int(index)])
 74 |     # workspace_mask = get_workspace_mask(cloud, seg, trans=trans, organized=True, outlier=0.02)
 75 |     mask = (workspace_mask & depth_mask)
 76 | 
 77 |     cloud_masked = cloud[mask]#51225,3
 78 |     color_masked = color[mask]
 79 |     # sample points random
 80 |     if len(cloud_masked) >= cfgs.num_point:
 81 |         idxs = np.random.choice(len(cloud_masked), cfgs.num_point, replace=False)
 82 |     else:
 83 |         idxs1 = np.arange(len(cloud_masked))
 84 |         idxs2 = np.random.choice(len(cloud_masked), cfgs.num_point - len(cloud_masked), replace=True)
 85 |         idxs = np.concatenate([idxs1, idxs2], axis=0)
 86 |     cloud_sampled = cloud_masked[idxs]#15000,3
 87 |     color_sampled = color_masked[idxs]
 88 | 
 89 |     cloud = o3d.geometry.PointCloud()
 90 |     cloud.points = o3d.utility.Vector3dVector(cloud_masked.astype(np.float32))
 91 |     cloud.colors = o3d.utility.Vector3dVector(color_masked.astype(np.float32)) #51w points
 92 | 
 93 |     ret_dict = {'point_clouds': cloud_sampled.astype(np.float32),
 94 |                 'coors': cloud_sampled.astype(np.float32) / cfgs.voxel_size,
 95 |                 'feats': np.ones_like(cloud_sampled).astype(np.float32),
 96 |                 }
 97 |     return ret_dict,cloud,point_left_up,point_right_bottom
 98 | 
 99 | 
100 | def grasp(data_input,cloud_,point_left_up,point_right_bottom):
101 |     batch_data = minkowski_collate_fn([data_input])
102 |     net = GraspNet(seed_feat_dim=cfgs.seed_feat_dim, is_training=False)
103 |     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
104 |     net.to(device)
105 |     # Load checkpoint
106 |     checkpoint = torch.load(cfgs.checkpoint_path)
107 |     net.load_state_dict(checkpoint['model_state_dict'])
108 |     start_epoch = checkpoint['epoch']
109 |     print("-> loaded checkpoint %s (epoch: %d)" % (cfgs.checkpoint_path, start_epoch))
110 | 
111 |     net.eval()
112 | 
113 |     for key in batch_data:
114 |         if 'list' in key:
115 |             for i in range(len(batch_data[key])):
116 |                 for j in range(len(batch_data[key][i])):
117 |                     batch_data[key][i][j] = batch_data[key][i][j].to(device)
118 |         else:
119 |             batch_data[key] = batch_data[key].to(device)
120 |     # Forward pass
121 |     with torch.no_grad():
122 |         end_points = net(batch_data)
123 |         grasp_preds = pred_decode(end_points)#1024,17
124 |     preds = grasp_preds[0].detach().cpu().numpy()
125 |     gg = GraspGroup(preds)
126 | 
127 |     # collision detection
128 |     if cfgs.collision_thresh > 0:
129 |         cloud = data_input['point_clouds']
130 |         mfcdetector = ModelFreeCollisionDetector(cloud, voxel_size=cfgs.voxel_size_cd)
131 |         collision_mask = mfcdetector.detect(gg, approach_dist=0.05, collision_thresh=cfgs.collision_thresh)
132 |         gg = gg[~collision_mask]
133 |     if len(gg)==0:
134 |         print("detect nothing or have no grasp pose.")
135 |         return False
136 |     gg.nms()
137 |     gg.sort_by_score()
138 |     if len(gg)>50:
139 |         gg = gg[:50]
140 | 
141 |     # grasp pose is in workspace?
142 |     for i in range(len(gg)-1,-1,-1):
143 |         if gg[i].translation[0]< point_left_up[0]+0.02 or gg[i].translation[0] >point_right_bottom[0]-0.02\
144 |                 or gg[i].translation[1]<point_left_up[1]+0.02 or gg[i].translation[1]>point_right_bottom[1]-0.02:
145 |             gg.remove(i)
146 | 
147 |     if len(gg)==0:
148 |         print("detect nothing or have no grasp pose ")
149 |         return False
150 |     gg.sort_by_score()
151 |     #grippers = gg.to_open3d_geometry_list()
152 |     #grippers[0].paint_uniform_color([0, 1, 0])  # the best score grasp pose's color  is green
153 |     #o3d.visualization.draw_geometries([cloud_, *grippers])
154 | 
155 | 
156 |     # grasp pose transform
157 |     cam2robot = ur_robot.cam_pose
158 |     R_grasp2camera, t_grasp2camera = gg[0].rotation_matrix, gg[0].translation
159 |     rot_y_0point5pi = np.array([[0, 0, 1], [0, 1, 0], [-1, 0, 0]])
160 |     t_grasp2camera.shape = (1, 3)
161 |     grasp2camera = np.concatenate( 
162 |         (np.concatenate((R_grasp2camera, t_grasp2camera.T), axis=1), np.array([[0, 0, 0, 1]])), axis=0)
163 |     # print(f"primitive grasp2camera:{ur_robot.R2rpy(R_grasp2camera)*57.3}")
164 |     grasp2robot = np.dot(cam2robot, grasp2camera)
165 | 
166 |     t_grasp2robot = grasp2robot[0:3, 3]
167 |     R_grasp2robot = grasp2robot[0:3, 0:3]
168 |     R_grasp2robot = np.dot(R_grasp2robot, rot_y_0point5pi)
169 |     rpy_grasp2robot = ur_robot.R2rpy(R_grasp2robot)
170 |     print(f"t_grasp2robot:{t_grasp2robot}")
171 |     print(f"rpy_grasp2robot:{rpy_grasp2robot * 57.3}")
172 |     width = gg[0].width * 10 + 0.05
173 |     if width > 0.85:
174 |         width = 0.85
175 |     elif width < 0.4:
176 |         width = 0.4
177 |     # real grasp
178 |     t_grasp2robot = t_grasp2robot.tolist()
179 |     rpy_grasp2robot = rpy_grasp2robot.tolist()
180 |     success = True
181 |     success = ur_robot.grasp_ros(t_grasp2robot, rpy_grasp2robot, open_size=width)  # 6-dof
182 |     return success
183 | 
184 | 
185 | 
186 | if __name__ == '__main__':
187 | 
188 |     ur_robot = UR_Robot(tcp_host_ip="192.168.50.100", tcp_port=30003, workspace_limits=None, is_use_robotiq85=True,
189 |                         is_use_camera=True)
190 | 
191 |     grasp_result = []
192 |     iter = 0
193 |     while True:
194 |     #for i in range(50):
195 |         data_dict, cloud, point_left_up, point_right_bottom = data_process(ur_robot)
196 |         grasp_success = grasp(data_dict, cloud, point_left_up, point_right_bottom)
197 |         if grasp_success:
198 |             grasp_result.append(True)
199 |         else:
200 |             grasp_result.append(False)
201 |         # end
202 |         if (iter >= 2) and (not grasp_result[iter]) and (not grasp_result[iter - 1]) and (not grasp_result[iter - 2]):
203 |             print('grasp_result_array:', grasp_result)
204 |             print("finish...")
205 |             break
206 |         iter += 1
207 | 


--------------------------------------------------------------------------------
/infer_vis_grasp.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import numpy as np
  4 | import argparse
  5 | from PIL import Image
  6 | import time
  7 | import scipy.io as scio
  8 | import torch
  9 | import open3d as o3d
 10 | from graspnetAPI.graspnet_eval import GraspGroup
 11 | 
 12 | ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
 13 | sys.path.append(ROOT_DIR)
 14 | sys.path.append(os.path.join(ROOT_DIR, 'utils'))
 15 | from models.graspnet import GraspNet, pred_decode
 16 | from dataset.graspnet_dataset import minkowski_collate_fn
 17 | from collision_detector import ModelFreeCollisionDetector
 18 | from data_utils import CameraInfo, create_point_cloud_from_depth_image, get_workspace_mask
 19 | 
 20 | parser = argparse.ArgumentParser()
 21 | parser.add_argument('--dataset_root', default='/media/randy/299D817A2D97AD94/xxw/graspnet_dataset')
 22 | parser.add_argument('--checkpoint_path', default='np15000_graspness1e-1_bs4_lr1e-3_viewres_dataaug_fps_14D_epoch10.tar')
 23 | parser.add_argument('--dump_dir', help='Dump dir to save outputs', default='/logs')
 24 | parser.add_argument('--seed_feat_dim', default=512, type=int, help='Point wise feature dim')
 25 | parser.add_argument('--camera', default='realsense', help='Camera split [realsense/kinect]')
 26 | parser.add_argument('--num_point', type=int, default=15000, help='Point Number [default: 15000]')
 27 | parser.add_argument('--batch_size', type=int, default=1, help='Batch Size during inference [default: 1]')
 28 | parser.add_argument('--voxel_size', type=float, default=0.005, help='Voxel Size for sparse convolution')
 29 | parser.add_argument('--collision_thresh', type=float, default=-1,
 30 |                     help='Collision Threshold in collision detection [default: 0.01]')
 31 | parser.add_argument('--voxel_size_cd', type=float, default=0.01, help='Voxel Size for collision detection')
 32 | parser.add_argument('--infer', action='store_true', default=True)
 33 | parser.add_argument('--vis', action='store_true', default=False)
 34 | parser.add_argument('--scene', type=str, default='0187')
 35 | parser.add_argument('--index', type=str, default='0000')
 36 | cfgs = parser.parse_args()
 37 | 
 38 | # ------------------------------------------------------------------------- GLOBAL CONFIG BEG
 39 | # if not os.path.exists(cfgs.dump_dir):
 40 | #     os.mkdir(cfgs.dump_dir)
 41 | 
 42 | 
 43 | def data_process():
 44 |     root = cfgs.dataset_root
 45 |     camera_type = cfgs.camera
 46 | 
 47 |     color = np.array(Image.open(os.path.join(root, 'scenes', scene_id, camera_type, 'rgb', index + '.png')))
 48 |     depth = np.array(Image.open(os.path.join(root, 'scenes', scene_id, camera_type, 'depth', index + '.png')))
 49 |     seg = np.array(Image.open(os.path.join(root, 'scenes', scene_id, camera_type, 'label', index + '.png')))
 50 |     meta = scio.loadmat(os.path.join(root, 'scenes', scene_id, camera_type, 'meta', index + '.mat'))
 51 |     try:
 52 |         intrinsic = meta['intrinsic_matrix']
 53 |         factor_depth = meta['factor_depth']
 54 |     except Exception as e:
 55 |         print(repr(e))
 56 |     camera = CameraInfo(1280.0, 720.0, intrinsic[0][0], intrinsic[1][1], intrinsic[0][2], intrinsic[1][2],
 57 |                         factor_depth)
 58 |     # generate cloud
 59 |     cloud = create_point_cloud_from_depth_image(depth, camera, organized=True)
 60 | 
 61 |     # get valid points
 62 |     depth_mask = (depth > 0)
 63 |     camera_poses = np.load(os.path.join(root, 'scenes', scene_id, camera_type, 'camera_poses.npy'))
 64 |     align_mat = np.load(os.path.join(root, 'scenes', scene_id, camera_type, 'cam0_wrt_table.npy'))
 65 |     trans = np.dot(align_mat, camera_poses[int(index)])
 66 |     workspace_mask = get_workspace_mask(cloud, seg, trans=trans, organized=True, outlier=0.02)
 67 |     mask = (depth_mask & workspace_mask)
 68 | 
 69 |     cloud_masked = cloud[mask]
 70 |     color_masked = color[mask]
 71 | 
 72 |     # sample points random
 73 |     if len(cloud_masked) >= cfgs.num_point:
 74 |         idxs = np.random.choice(len(cloud_masked), cfgs.num_point, replace=False)
 75 |     else:
 76 |         idxs1 = np.arange(len(cloud_masked))
 77 |         idxs2 = np.random.choice(len(cloud_masked), cfgs.num_point - len(cloud_masked), replace=True)
 78 |         idxs = np.concatenate([idxs1, idxs2], axis=0)
 79 |     cloud_sampled = cloud_masked[idxs]
 80 | 
 81 |     ret_dict = {'point_clouds': cloud_sampled.astype(np.float32),
 82 |                 'coors': cloud_sampled.astype(np.float32) / cfgs.voxel_size,
 83 |                 'feats': np.ones_like(cloud_sampled).astype(np.float32),
 84 |                 }
 85 |     return ret_dict
 86 | 
 87 | 
 88 | # Init datasets and dataloaders
 89 | def my_worker_init_fn(worker_id):
 90 |     np.random.seed(np.random.get_state()[1][0] + worker_id)
 91 |     pass
 92 | 
 93 | 
 94 | def inference(data_input):
 95 |     batch_data = minkowski_collate_fn([data_input])
 96 |     net = GraspNet(seed_feat_dim=cfgs.seed_feat_dim, is_training=False)
 97 |     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 98 |     net.to(device)
 99 |     # Load checkpoint
100 |     checkpoint = torch.load(cfgs.checkpoint_path)
101 |     net.load_state_dict(checkpoint['model_state_dict'])
102 |     start_epoch = checkpoint['epoch']
103 |     print("-> loaded checkpoint %s (epoch: %d)" % (cfgs.checkpoint_path, start_epoch))
104 | 
105 |     net.eval()
106 |     tic = time.time()
107 | 
108 |     for key in batch_data:
109 |         if 'list' in key:
110 |             for i in range(len(batch_data[key])):
111 |                 for j in range(len(batch_data[key][i])):
112 |                     batch_data[key][i][j] = batch_data[key][i][j].to(device)
113 |         else:
114 |             batch_data[key] = batch_data[key].to(device)
115 |     # Forward pass
116 |     with torch.no_grad():
117 |         end_points = net(batch_data)
118 |         grasp_preds = pred_decode(end_points)
119 | 
120 |     preds = grasp_preds[0].detach().cpu().numpy()
121 |     gg = GraspGroup(preds)
122 |     # collision detection
123 |     if cfgs.collision_thresh > 0:
124 |         cloud = data_input['point_clouds']
125 |         mfcdetector = ModelFreeCollisionDetector(cloud, voxel_size=cfgs.voxel_size_cd)
126 |         collision_mask = mfcdetector.detect(gg, approach_dist=0.05, collision_thresh=cfgs.collision_thresh)
127 |         gg = gg[~collision_mask]
128 | 
129 |     gg.nms()
130 |     gg.sort_by_score()
131 |     gg = gg[:20]
132 |     grippers = gg.to_open3d_geometry_list()
133 |     grippers[0].paint_uniform_color([0, 1, 0])  # the best score grasp pose's color  is green
134 |     pc = data_dict['point_clouds']
135 |     cloud = o3d.geometry.PointCloud()
136 |     cloud.points = o3d.utility.Vector3dVector(pc.astype(np.float32))
137 |     o3d.visualization.draw_geometries([cloud, *grippers])
138 | 
139 |     # save grasps
140 |     # save_dir = os.path.join(cfgs.dump_dir, scene_id, cfgs.camera)
141 |     # save_path = os.path.join(save_dir, cfgs.index + '.npy')
142 |     # if not os.path.exists(save_dir):
143 |     #     os.makedirs(save_dir)
144 |     # gg.save_npy(save_path)
145 |     #
146 |     # toc = time.time()
147 |     # print('inference time: %fs' % (toc - tic))
148 | 
149 | 
150 | if __name__ == '__main__':
151 |     scene_id = 'scene_' + cfgs.scene
152 |     index = cfgs.index
153 |     data_dict = data_process()
154 | 
155 |     if cfgs.infer:
156 |         inference(data_dict)
157 |     # if cfgs.vis:
158 |     #     pc = data_dict['point_clouds']
159 |     #     gg = np.load(os.path.join(cfgs.dump_dir, scene_id, cfgs.camera, cfgs.index + '.npy'))
160 |     #     gg = GraspGroup(gg)
161 |     #     gg = gg.nms()
162 |     #     gg = gg.sort_by_score()
163 |     #     if gg.__len__() > 30:
164 |     #         gg = gg[:30]
165 |     #     grippers = gg.to_open3d_geometry_list()
166 |     #     cloud = o3d.geometry.PointCloud()
167 |     #     cloud.points = o3d.utility.Vector3dVector(pc.astype(np.float32))
168 |     #     o3d.visualization.draw_geometries([cloud, *grippers])
169 | 


--------------------------------------------------------------------------------
/knn/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TX-Leo/Graspness_6dof_robotic_grasping/bf2d0977272831834ea0edde0d426954ad6c4c8d/knn/.DS_Store


--------------------------------------------------------------------------------
/knn/knn_modules.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import gc
 3 | import operator as op
 4 | import functools
 5 | import torch
 6 | from torch.autograd import Variable, Function
 7 | from knn_pytorch import knn_pytorch
 8 | # import knn_pytorch
 9 | def knn(ref, query, k=1):
10 |   """ Compute k nearest neighbors for each query point.
11 |   """
12 |   device = ref.device
13 |   ref = ref.float().to(device)
14 |   query = query.float().to(device)
15 |   inds = torch.empty(query.shape[0], k, query.shape[2]).long().to(device)
16 |   knn_pytorch.knn(ref, query, inds)
17 |   return inds
18 | 


--------------------------------------------------------------------------------
/knn/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import glob
 4 | import os
 5 | 
 6 | import torch
 7 | from setuptools import find_packages
 8 | from setuptools import setup
 9 | from torch.utils.cpp_extension import CUDA_HOME
10 | from torch.utils.cpp_extension import CppExtension
11 | from torch.utils.cpp_extension import CUDAExtension
12 | 
13 | requirements = ["torch", "torchvision"]
14 | 
15 | 
16 | def get_extensions():
17 |     this_dir = os.path.dirname(os.path.abspath(__file__))
18 |     extensions_dir = os.path.join(this_dir, "src")
19 | 
20 |     main_file = glob.glob(os.path.join(extensions_dir, "*.cpp"))
21 |     source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp"))
22 |     source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu"))
23 | 
24 |     sources = main_file + source_cpu
25 |     extension = CppExtension
26 | 
27 |     extra_compile_args = {"cxx": []}
28 |     define_macros = []
29 | 
30 |     if torch.cuda.is_available() and CUDA_HOME is not None:
31 |         extension = CUDAExtension
32 |         sources += source_cuda
33 |         define_macros += [("WITH_CUDA", None)]
34 |         extra_compile_args["nvcc"] = [
35 |             "-DCUDA_HAS_FP16=1",
36 |             "-D__CUDA_NO_HALF_OPERATORS__",
37 |             "-D__CUDA_NO_HALF_CONVERSIONS__",
38 |             "-D__CUDA_NO_HALF2_OPERATORS__",
39 |         ]
40 | 
41 |     sources = [os.path.join(extensions_dir, s) for s in sources]
42 | 
43 |     include_dirs = [extensions_dir]
44 | 
45 |     ext_modules = [
46 |         extension(
47 |             "knn_pytorch.knn_pytorch",
48 |             sources,
49 |             include_dirs=include_dirs,
50 |             define_macros=define_macros,
51 |             extra_compile_args=extra_compile_args,
52 |         )
53 |     ]
54 | 
55 |     return ext_modules
56 | 
57 | 
58 | setup(
59 |     name="knn_pytorch",
60 |     version="0.1",
61 |     author="foolyc",
62 |     url="https://github.com/foolyc/torchKNN",
63 |     description="KNN implement in Pytorch 1.0 including both cpu version and gpu version",
64 |     ext_modules=get_extensions(),
65 |     cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
66 | )
67 | 


--------------------------------------------------------------------------------
/knn/src/cpu/knn_cpu.cpp:
--------------------------------------------------------------------------------
 1 | #include "cpu/vision.h"
 2 | 
 3 | 
 4 | void knn_cpu(float* ref_dev, int ref_width, float* query_dev, int query_width,
 5 |     int height, int k, float* dist_dev, long* ind_dev, long* ind_buf)
 6 | {
 7 |     // Compute all the distances
 8 |     for(int query_idx = 0;query_idx<query_width;query_idx++)
 9 |     {
10 |         for(int ref_idx = 0;ref_idx < ref_width;ref_idx++)
11 |         {
12 |             dist_dev[query_idx * ref_width + ref_idx] = 0;
13 |             for(int hi=0;hi<height;hi++)
14 |                 dist_dev[query_idx * ref_width + ref_idx] += (ref_dev[hi * ref_width + ref_idx] - query_dev[hi * query_width + query_idx]) * (ref_dev[hi * ref_width + ref_idx] - query_dev[hi * query_width + query_idx]);
15 |         }
16 |     }
17 | 
18 |     float temp_value;
19 |     long temp_idx;
20 |     // sort the distance and get the index
21 |     for(int query_idx = 0;query_idx<query_width;query_idx++)
22 |     {
23 |         for(int i = 0;i < ref_width;i++)
24 |         {
25 |             ind_buf[i] = i+1;
26 |         }
27 |         for(int i = 0;i < ref_width;i++)
28 |             for(int j = 0;j < ref_width - i - 1;j++)
29 |             {
30 |                 if(dist_dev[query_idx * ref_width + j] > dist_dev[query_idx * ref_width + j + 1])
31 |                 {
32 |                     temp_value = dist_dev[query_idx * ref_width + j];
33 |                     dist_dev[query_idx * ref_width + j] = dist_dev[query_idx * ref_width + j + 1];
34 |                     dist_dev[query_idx * ref_width + j + 1] = temp_value;
35 |                     temp_idx = ind_buf[j];
36 |                     ind_buf[j] = ind_buf[j + 1];
37 |                     ind_buf[j + 1] = temp_idx;
38 |                 }
39 | 
40 |             }
41 | 
42 |         for(int i = 0;i < k;i++)
43 |             ind_dev[query_idx + i * query_width] = ind_buf[i];
44 |         #if DEBUG
45 |         for(int i = 0;i < ref_width;i++)
46 |             printf("%d, ", ind_buf[i]);
47 |         printf("\n");
48 |         #endif
49 | 
50 |     }
51 | 
52 | 
53 | 
54 | 
55 | 
56 | }


--------------------------------------------------------------------------------
/knn/src/cpu/vision.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include <torch/extension.h>
3 | 
4 | void knn_cpu(float* ref_dev, int ref_width,
5 |     float* query_dev, int query_width,
6 |     int height, int k, float* dist_dev, long* ind_dev, long* ind_buf);


--------------------------------------------------------------------------------
/knn/src/cuda/knn.cu:
--------------------------------------------------------------------------------
  1 | /** Modifed version of knn-CUDA from https://github.com/vincentfpgarcia/kNN-CUDA
  2 |  * The modifications are
  3 |  *      removed texture memory usage
  4 |  *      removed split query KNN computation
  5 |  *      added feature extraction with bilinear interpolation
  6 |  *
  7 |  * Last modified by Christopher B. Choy <chrischoy@ai.stanford.edu> 12/23/2016
  8 |  */
  9 | 
 10 | // Includes
 11 | #include <cstdio>
 12 | #include "cuda.h"
 13 | 
 14 | #define IDX2D(i, j, dj) (dj * i + j)
 15 | #define IDX3D(i, j, k, dj, dk) (IDX2D(IDX2D(i, j, dj), k, dk))
 16 | 
 17 | #define BLOCK 512
 18 | #define MAX_STREAMS 512
 19 | 
 20 | // Constants used by the program
 21 | #define BLOCK_DIM                      16
 22 | #define DEBUG                          0
 23 | 
 24 | 
 25 | /**
 26 |   * Computes the distance between two matrix A (reference points) and
 27 |   * B (query points) containing respectively wA and wB points.
 28 |   *
 29 |   * @param A     pointer on the matrix A
 30 |   * @param wA    width of the matrix A = number of points in A
 31 |   * @param B     pointer on the matrix B
 32 |   * @param wB    width of the matrix B = number of points in B
 33 |   * @param dim   dimension of points = height of matrices A and B
 34 |   * @param AB    pointer on the matrix containing the wA*wB distances computed
 35 |   */
 36 | __global__ void cuComputeDistanceGlobal( float* A, int wA,
 37 |     float* B, int wB, int dim, float* AB){
 38 | 
 39 | // Declaration of the shared memory arrays As and Bs used to store the sub-matrix of A and B
 40 | __shared__ float shared_A[BLOCK_DIM][BLOCK_DIM];
 41 | __shared__ float shared_B[BLOCK_DIM][BLOCK_DIM];
 42 | 
 43 | 
 44 |   // Sub-matrix of A (begin, step, end) and Sub-matrix of B (begin, step)
 45 |   __shared__ int begin_A;
 46 |   __shared__ int begin_B;
 47 |   __shared__ int step_A;
 48 |   __shared__ int step_B;
 49 |   __shared__ int end_A;
 50 | 
 51 |   // Thread index
 52 |   int tx = threadIdx.x;
 53 |   int ty = threadIdx.y;
 54 | 
 55 |   // Other variables
 56 |   float tmp;
 57 |   float ssd = 0;
 58 | 
 59 |   // Loop parameters
 60 |   begin_A = BLOCK_DIM * blockIdx.y;
 61 |   begin_B = BLOCK_DIM * blockIdx.x;
 62 |   step_A  = BLOCK_DIM * wA;
 63 |   step_B  = BLOCK_DIM * wB;
 64 |   end_A   = begin_A + (dim-1) * wA;
 65 | 
 66 |     // Conditions
 67 |   int cond0 = (begin_A + tx < wA); // used to write in shared memory
 68 |   int cond1 = (begin_B + tx < wB); // used to write in shared memory & to computations and to write in output matrix
 69 |   int cond2 = (begin_A + ty < wA); // used to computations and to write in output matrix
 70 | 
 71 |   // Loop over all the sub-matrices of A and B required to compute the block sub-matrix
 72 |   for (int a = begin_A, b = begin_B; a <= end_A; a += step_A, b += step_B) {
 73 |     // Load the matrices from device memory to shared memory; each thread loads one element of each matrix
 74 |     if (a/wA + ty < dim){
 75 |       shared_A[ty][tx] = (cond0)? A[a + wA * ty + tx] : 0;
 76 |       shared_B[ty][tx] = (cond1)? B[b + wB * ty + tx] : 0;
 77 |     }
 78 |     else{
 79 |       shared_A[ty][tx] = 0;
 80 |       shared_B[ty][tx] = 0;
 81 |     }
 82 | 
 83 |     // Synchronize to make sure the matrices are loaded
 84 |     __syncthreads();
 85 | 
 86 |     // Compute the difference between the two matrixes; each thread computes one element of the block sub-matrix
 87 |     if (cond2 && cond1){
 88 |       for (int k = 0; k < BLOCK_DIM; ++k){
 89 |         tmp = shared_A[k][ty] - shared_B[k][tx];
 90 |         ssd += tmp*tmp;
 91 |       }
 92 |     }
 93 | 
 94 |     // Synchronize to make sure that the preceding computation is done before loading two new sub-matrices of A and B in the next iteration
 95 |     __syncthreads();
 96 |   }
 97 | 
 98 |   // Write the block sub-matrix to device memory; each thread writes one element
 99 |   if (cond2 && cond1)
100 |     AB[(begin_A + ty) * wB + begin_B + tx] = ssd;
101 | }
102 | 
103 | 
104 | /**
105 |   * Gathers k-th smallest distances for each column of the distance matrix in the top.
106 |   *
107 |   * @param dist        distance matrix
108 |   * @param ind         index matrix
109 |   * @param width       width of the distance matrix and of the index matrix
110 |   * @param height      height of the distance matrix and of the index matrix
111 |   * @param k           number of neighbors to consider
112 |   */
113 | __global__ void cuInsertionSort(float *dist, long *ind, int width, int height, int k){
114 | 
115 |   // Variables
116 |   int l, i, j;
117 |   float *p_dist;
118 |   long  *p_ind;
119 |   float curr_dist, max_dist;
120 |   long  curr_row,  max_row;
121 |   unsigned int xIndex = blockIdx.x * blockDim.x + threadIdx.x;
122 | 
123 |   if (xIndex<width){
124 |     // Pointer shift, initialization, and max value
125 |     p_dist   = dist + xIndex;
126 |     p_ind    = ind  + xIndex;
127 |     max_dist = p_dist[0];
128 |     p_ind[0] = 1;
129 | 
130 |     // Part 1 : sort kth firt elementZ
131 |     for (l=1; l<k; l++){
132 |       curr_row  = l * width;
133 |       curr_dist = p_dist[curr_row];
134 |       if (curr_dist<max_dist){
135 |         i=l-1;
136 |         for (int a=0; a<l-1; a++){
137 |           if (p_dist[a*width]>curr_dist){
138 |             i=a;
139 |             break;
140 |           }
141 |         }
142 |         for (j=l; j>i; j--){
143 |           p_dist[j*width] = p_dist[(j-1)*width];
144 |           p_ind[j*width]   = p_ind[(j-1)*width];
145 |         }
146 |         p_dist[i*width] = curr_dist;
147 |         p_ind[i*width]   = l+1;
148 |       } else {
149 |         p_ind[l*width] = l+1;
150 |       }
151 |       max_dist = p_dist[curr_row];
152 |     }
153 | 
154 |     // Part 2 : insert element in the k-th first lines
155 |     max_row = (k-1)*width;
156 |     for (l=k; l<height; l++){
157 |       curr_dist = p_dist[l*width];
158 |       if (curr_dist<max_dist){
159 |         i=k-1;
160 |         for (int a=0; a<k-1; a++){
161 |           if (p_dist[a*width]>curr_dist){
162 |             i=a;
163 |             break;
164 |           }
165 |         }
166 |         for (j=k-1; j>i; j--){
167 |           p_dist[j*width] = p_dist[(j-1)*width];
168 |           p_ind[j*width]   = p_ind[(j-1)*width];
169 |         }
170 |         p_dist[i*width] = curr_dist;
171 |         p_ind[i*width]   = l+1;
172 |         max_dist             = p_dist[max_row];
173 |       }
174 |     }
175 |   }
176 | }
177 | 
178 | 
179 | /**
180 |   * Computes the square root of the first line (width-th first element)
181 |   * of the distance matrix.
182 |   *
183 |   * @param dist    distance matrix
184 |   * @param width   width of the distance matrix
185 |   * @param k       number of neighbors to consider
186 |   */
187 | __global__ void cuParallelSqrt(float *dist, int width, int k){
188 |     unsigned int xIndex = blockIdx.x * blockDim.x + threadIdx.x;
189 |     unsigned int yIndex = blockIdx.y * blockDim.y + threadIdx.y;
190 |   if (xIndex<width && yIndex<k)
191 |     dist[yIndex*width + xIndex] = sqrt(dist[yIndex*width + xIndex]);
192 | }
193 | 
194 | 
195 | //-----------------------------------------------------------------------------------------------//
196 | //                                   K-th NEAREST NEIGHBORS                                      //
197 | //-----------------------------------------------------------------------------------------------//
198 | 
199 | /**
200 |   * K nearest neighbor algorithm
201 |   * - Initialize CUDA
202 |   * - Allocate device memory
203 |   * - Copy point sets (reference and query points) from host to device memory
204 |   * - Compute the distances + indexes to the k nearest neighbors for each query point
205 |   * - Copy distances from device to host memory
206 |   *
207 |   * @param ref_host      reference points ; pointer to linear matrix
208 |   * @param ref_nb        number of reference points ; width of the matrix
209 |   * @param query_host    query points ; pointer to linear matrix
210 |   * @param query_nb      number of query points ; width of the matrix
211 |   * @param dim           dimension of points ; height of the matrices
212 |   * @param k             number of neighbor to consider
213 |   * @param dist_host     distances to k nearest neighbors ; pointer to linear matrix
214 |   * @param dist_host     indexes of the k nearest neighbors ; pointer to linear matrix
215 |   *
216 |   */
217 | void knn_device(float* ref_dev, int ref_nb, float* query_dev, int query_nb,
218 |     int dim, int k, float* dist_dev, long* ind_dev, cudaStream_t stream){
219 | 
220 |   // Grids and threads
221 |   dim3 g_16x16(query_nb/16, ref_nb/16, 1);
222 |   dim3 t_16x16(16, 16, 1);
223 |   if (query_nb%16 != 0) g_16x16.x += 1;
224 |   if (ref_nb  %16 != 0) g_16x16.y += 1;
225 |   //
226 |   dim3 g_256x1(query_nb/256, 1, 1);
227 |   dim3 t_256x1(256, 1, 1);
228 |   if (query_nb%256 != 0) g_256x1.x += 1;
229 | 
230 |   dim3 g_k_16x16(query_nb/16, k/16, 1);
231 |   dim3 t_k_16x16(16, 16, 1);
232 |   if (query_nb%16 != 0) g_k_16x16.x += 1;
233 |   if (k  %16 != 0) g_k_16x16.y += 1;
234 | 
235 |   // Kernel 1: Compute all the distances
236 |   cuComputeDistanceGlobal<<<g_16x16, t_16x16, 0, stream>>>(ref_dev, ref_nb, query_dev, query_nb, dim, dist_dev);
237 | 
238 |   // Kernel 2: Sort each column
239 |   cuInsertionSort<<<g_256x1, t_256x1, 0, stream>>>(dist_dev, ind_dev, query_nb, ref_nb, k);
240 | 
241 |   // Kernel 3: Compute square root of k first elements
242 |   // cuParallelSqrt<<<g_k_16x16,t_k_16x16, 0, stream>>>(dist_dev, query_nb, k);
243 | 
244 | #if DEBUG
245 |   unsigned int  size_of_float = sizeof(float);
246 |   unsigned long size_of_long  = sizeof(long);
247 | 
248 |   float* dist_host = new float[query_nb * k];
249 |   long*  idx_host  = new long[query_nb * k];
250 | 
251 |   // Memory copy of output from device to host
252 |   cudaMemcpy(&dist_host[0], dist_dev,
253 |       query_nb * k *size_of_float, cudaMemcpyDeviceToHost);
254 | 
255 |   cudaMemcpy(&idx_host[0], ind_dev,
256 |       query_nb * k * size_of_long, cudaMemcpyDeviceToHost);
257 | 
258 |   int i = 0;
259 |   for(i = 0; i < 100; i++){
260 |     printf("IDX[%d]: %d\n", i, (int)idx_host[i]);
261 |   }
262 | #endif
263 | }
264 | 
265 | 
266 | 
267 | 
268 | 
269 | 
270 | 


--------------------------------------------------------------------------------
/knn/src/cuda/vision.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include <torch/extension.h>
3 | #include <THC/THC.h>
4 | 
5 | void knn_device(float* ref_dev, int ref_width,
6 |     float* query_dev, int query_width,
7 |     int height, int k, float* dist_dev, long* ind_dev, cudaStream_t stream);


--------------------------------------------------------------------------------
/knn/src/knn.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "cpu/vision.h"
 3 | 
 4 | #ifdef WITH_CUDA
 5 | #include "cuda/vision.h"
 6 | #include <THC/THC.h>
 7 | extern THCState *state;
 8 | #endif
 9 | 
10 | 
11 | 
12 | int knn(at::Tensor& ref, at::Tensor& query, at::Tensor& idx)
13 | {
14 | 
15 |     // TODO check dimensions
16 |     long batch, ref_nb, query_nb, dim, k;
17 |     batch = ref.size(0);
18 |     dim = ref.size(1);
19 |     k = idx.size(1);
20 |     ref_nb = ref.size(2);
21 |     query_nb = query.size(2);
22 | 
23 |     float *ref_dev = ref.data<float>();
24 |     float *query_dev = query.data<float>();
25 |     long *idx_dev = idx.data<long>();
26 | 
27 | 
28 | 
29 | 
30 |   if (ref.type().is_cuda()) {
31 | #ifdef WITH_CUDA
32 |     // TODO raise error if not compiled with CUDA
33 |     float *dist_dev = (float*)THCudaMalloc(state, ref_nb * query_nb * sizeof(float));
34 | 
35 |     for (int b = 0; b < batch; b++)
36 |     {
37 |     // knn_device(ref_dev + b * dim * ref_nb, ref_nb, query_dev + b * dim * query_nb, query_nb, dim, k,
38 |     //   dist_dev, idx_dev + b * k * query_nb, THCState_getCurrentStream(state));
39 |       knn_device(ref_dev + b * dim * ref_nb, ref_nb, query_dev + b * dim * query_nb, query_nb, dim, k,
40 |       dist_dev, idx_dev + b * k * query_nb, c10::cuda::getCurrentCUDAStream());
41 |     }
42 |     THCudaFree(state, dist_dev);
43 |     cudaError_t err = cudaGetLastError();
44 |     if (err != cudaSuccess)
45 |     {
46 |         printf("error in knn: %s\n", cudaGetErrorString(err));
47 |         THError("aborting");
48 |     }
49 |     return 1;
50 | #else
51 |     AT_ERROR("Not compiled with GPU support");
52 | #endif
53 |   }
54 | 
55 | 
56 |     float *dist_dev = (float*)malloc(ref_nb * query_nb * sizeof(float));
57 |     long *ind_buf = (long*)malloc(ref_nb * sizeof(long));
58 |     for (int b = 0; b < batch; b++) {
59 |     knn_cpu(ref_dev + b * dim * ref_nb, ref_nb, query_dev + b * dim * query_nb, query_nb, dim, k,
60 |       dist_dev, idx_dev + b * k * query_nb, ind_buf);
61 |     }
62 | 
63 |     free(dist_dev);
64 |     free(ind_buf);
65 | 
66 |     return 1;
67 | 
68 | }
69 | 


--------------------------------------------------------------------------------
/knn/src/vision.cpp:
--------------------------------------------------------------------------------
1 | #include "knn.h"
2 | 
3 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
4 |   m.def("knn", &knn, "k-nearest neighbors");
5 | }
6 | 


--------------------------------------------------------------------------------
/models/backbone_resunet14.py:
--------------------------------------------------------------------------------
  1 | import MinkowskiEngine as ME
  2 | from MinkowskiEngine.modules.resnet_block import BasicBlock, Bottleneck
  3 | from models.resnet import ResNetBase
  4 | 
  5 | 
  6 | class MinkUNetBase(ResNetBase):
  7 |     BLOCK = None
  8 |     PLANES = None
  9 |     DILATIONS = (1, 1, 1, 1, 1, 1, 1, 1)
 10 |     LAYERS = (2, 2, 2, 2, 2, 2, 2, 2)
 11 |     PLANES = (32, 64, 128, 256, 256, 128, 96, 96)
 12 |     INIT_DIM = 32
 13 |     OUT_TENSOR_STRIDE = 1
 14 | 
 15 |     # To use the model, must call initialize_coords before forward pass.
 16 |     # Once data is processed, call clear to reset the model before calling
 17 |     # initialize_coords
 18 |     def __init__(self, in_channels, out_channels, D=3):
 19 |         ResNetBase.__init__(self, in_channels, out_channels, D)
 20 | 
 21 |     def network_initialization(self, in_channels, out_channels, D):
 22 |         # Output of the first conv concated to conv6
 23 |         self.inplanes = self.INIT_DIM
 24 |         self.conv0p1s1 = ME.MinkowskiConvolution(
 25 |             in_channels, self.inplanes, kernel_size=5, dimension=D)
 26 | 
 27 |         self.bn0 = ME.MinkowskiBatchNorm(self.inplanes)
 28 | 
 29 |         self.conv1p1s2 = ME.MinkowskiConvolution(
 30 |             self.inplanes, self.inplanes, kernel_size=2, stride=2, dimension=D)
 31 |         self.bn1 = ME.MinkowskiBatchNorm(self.inplanes)
 32 | 
 33 |         self.block1 = self._make_layer(self.BLOCK, self.PLANES[0],
 34 |                                        self.LAYERS[0])
 35 | 
 36 |         self.conv2p2s2 = ME.MinkowskiConvolution(
 37 |             self.inplanes, self.inplanes, kernel_size=2, stride=2, dimension=D)
 38 |         self.bn2 = ME.MinkowskiBatchNorm(self.inplanes)
 39 | 
 40 |         self.block2 = self._make_layer(self.BLOCK, self.PLANES[1],
 41 |                                        self.LAYERS[1])
 42 | 
 43 |         self.conv3p4s2 = ME.MinkowskiConvolution(
 44 |             self.inplanes, self.inplanes, kernel_size=2, stride=2, dimension=D)
 45 | 
 46 |         self.bn3 = ME.MinkowskiBatchNorm(self.inplanes)
 47 |         self.block3 = self._make_layer(self.BLOCK, self.PLANES[2],
 48 |                                        self.LAYERS[2])
 49 | 
 50 |         self.conv4p8s2 = ME.MinkowskiConvolution(
 51 |             self.inplanes, self.inplanes, kernel_size=2, stride=2, dimension=D)
 52 |         self.bn4 = ME.MinkowskiBatchNorm(self.inplanes)
 53 |         self.block4 = self._make_layer(self.BLOCK, self.PLANES[3],
 54 |                                        self.LAYERS[3])
 55 | 
 56 |         self.convtr4p16s2 = ME.MinkowskiConvolutionTranspose(
 57 |             self.inplanes, self.PLANES[4], kernel_size=2, stride=2, dimension=D)
 58 |         self.bntr4 = ME.MinkowskiBatchNorm(self.PLANES[4])
 59 | 
 60 |         self.inplanes = self.PLANES[4] + self.PLANES[2] * self.BLOCK.expansion
 61 |         self.block5 = self._make_layer(self.BLOCK, self.PLANES[4],
 62 |                                        self.LAYERS[4])
 63 |         self.convtr5p8s2 = ME.MinkowskiConvolutionTranspose(
 64 |             self.inplanes, self.PLANES[5], kernel_size=2, stride=2, dimension=D)
 65 |         self.bntr5 = ME.MinkowskiBatchNorm(self.PLANES[5])
 66 | 
 67 |         self.inplanes = self.PLANES[5] + self.PLANES[1] * self.BLOCK.expansion
 68 |         self.block6 = self._make_layer(self.BLOCK, self.PLANES[5],
 69 |                                        self.LAYERS[5])
 70 |         self.convtr6p4s2 = ME.MinkowskiConvolutionTranspose(
 71 |             self.inplanes, self.PLANES[6], kernel_size=2, stride=2, dimension=D)
 72 |         self.bntr6 = ME.MinkowskiBatchNorm(self.PLANES[6])
 73 | 
 74 |         self.inplanes = self.PLANES[6] + self.PLANES[0] * self.BLOCK.expansion
 75 |         self.block7 = self._make_layer(self.BLOCK, self.PLANES[6],
 76 |                                        self.LAYERS[6])
 77 |         self.convtr7p2s2 = ME.MinkowskiConvolutionTranspose(
 78 |             self.inplanes, self.PLANES[7], kernel_size=2, stride=2, dimension=D)
 79 |         self.bntr7 = ME.MinkowskiBatchNorm(self.PLANES[7])
 80 | 
 81 |         self.inplanes = self.PLANES[7] + self.INIT_DIM
 82 |         self.block8 = self._make_layer(self.BLOCK, self.PLANES[7],
 83 |                                        self.LAYERS[7])
 84 | 
 85 |         self.final = ME.MinkowskiConvolution(
 86 |             self.PLANES[7] * self.BLOCK.expansion,
 87 |             out_channels,
 88 |             kernel_size=1,
 89 |             bias=True,
 90 |             dimension=D)
 91 |         self.relu = ME.MinkowskiReLU(inplace=True)
 92 | 
 93 |     def forward(self, x):
 94 |         out = self.conv0p1s1(x)
 95 |         out = self.bn0(out)
 96 |         out_p1 = self.relu(out)
 97 | 
 98 |         out = self.conv1p1s2(out_p1)
 99 |         out = self.bn1(out)
100 |         out = self.relu(out)
101 |         out_b1p2 = self.block1(out)
102 | 
103 |         out = self.conv2p2s2(out_b1p2)
104 |         out = self.bn2(out)
105 |         out = self.relu(out)
106 |         out_b2p4 = self.block2(out)
107 | 
108 |         out = self.conv3p4s2(out_b2p4)
109 |         out = self.bn3(out)
110 |         out = self.relu(out)
111 |         out_b3p8 = self.block3(out)
112 | 
113 |         # tensor_stride=16
114 |         out = self.conv4p8s2(out_b3p8)
115 |         out = self.bn4(out)
116 |         out = self.relu(out)
117 |         out = self.block4(out)
118 | 
119 |         # tensor_stride=8
120 |         out = self.convtr4p16s2(out)
121 |         out = self.bntr4(out)
122 |         out = self.relu(out)
123 | 
124 |         out = ME.cat(out, out_b3p8)
125 |         out = self.block5(out)
126 | 
127 |         # tensor_stride=4
128 |         out = self.convtr5p8s2(out)
129 |         out = self.bntr5(out)
130 |         out = self.relu(out)
131 | 
132 |         out = ME.cat(out, out_b2p4)
133 |         out = self.block6(out)
134 | 
135 |         # tensor_stride=2
136 |         out = self.convtr6p4s2(out)
137 |         out = self.bntr6(out)
138 |         out = self.relu(out)
139 | 
140 |         out = ME.cat(out, out_b1p2)
141 |         out = self.block7(out)
142 | 
143 |         # tensor_stride=1
144 |         out = self.convtr7p2s2(out)
145 |         out = self.bntr7(out)
146 |         out = self.relu(out)
147 | 
148 |         out = ME.cat(out, out_p1)
149 |         out = self.block8(out)
150 | 
151 |         return self.final(out)
152 | 
153 | 
154 | class MinkUNet14(MinkUNetBase):
155 |     BLOCK = BasicBlock
156 |     LAYERS = (1, 1, 1, 1, 1, 1, 1, 1)
157 | 
158 | 
159 | class MinkUNet18(MinkUNetBase):
160 |     BLOCK = BasicBlock
161 |     LAYERS = (2, 2, 2, 2, 2, 2, 2, 2)
162 | 
163 | 
164 | class MinkUNet34(MinkUNetBase):
165 |     BLOCK = BasicBlock
166 |     LAYERS = (2, 3, 4, 6, 2, 2, 2, 2)
167 | 
168 | 
169 | class MinkUNet50(MinkUNetBase):
170 |     BLOCK = Bottleneck
171 |     LAYERS = (2, 3, 4, 6, 2, 2, 2, 2)
172 | 
173 | 
174 | class MinkUNet101(MinkUNetBase):
175 |     BLOCK = Bottleneck
176 |     LAYERS = (2, 3, 4, 23, 2, 2, 2, 2)
177 | 
178 | 
179 | class MinkUNet14A(MinkUNet14):
180 |     PLANES = (32, 64, 128, 256, 128, 128, 96, 96)
181 | 
182 | 
183 | class MinkUNet14B(MinkUNet14):
184 |     PLANES = (32, 64, 128, 256, 128, 128, 128, 128)
185 | 
186 | 
187 | class MinkUNet14C(MinkUNet14):
188 |     PLANES = (32, 64, 128, 256, 192, 192, 128, 128)
189 | 
190 | 
191 | class MinkUNet14Dori(MinkUNet14):
192 |     PLANES = (32, 64, 128, 256, 384, 384, 384, 384)
193 | 
194 | 
195 | class MinkUNet14E(MinkUNet14):
196 |     PLANES = (32, 64, 128, 256, 384, 384, 384, 384)
197 | 
198 | 
199 | class MinkUNet14D(MinkUNet14):
200 |     PLANES = (32, 64, 128, 256, 192, 192, 192, 192)
201 | 
202 | 
203 | class MinkUNet18A(MinkUNet18):
204 |     PLANES = (32, 64, 128, 256, 128, 128, 96, 96)
205 | 
206 | 
207 | class MinkUNet18B(MinkUNet18):
208 |     PLANES = (32, 64, 128, 256, 128, 128, 128, 128)
209 | 
210 | 
211 | class MinkUNet18D(MinkUNet18):
212 |     PLANES = (32, 64, 128, 256, 384, 384, 384, 384)
213 | 
214 | 
215 | class MinkUNet34A(MinkUNet34):
216 |     PLANES = (32, 64, 128, 256, 256, 128, 64, 64)
217 | 
218 | 
219 | class MinkUNet34B(MinkUNet34):
220 |     PLANES = (32, 64, 128, 256, 256, 128, 64, 32)
221 | 
222 | 
223 | class MinkUNet34C(MinkUNet34):
224 |     PLANES = (32, 64, 128, 256, 256, 128, 96, 96)
225 | 


--------------------------------------------------------------------------------
/models/graspnet.py:
--------------------------------------------------------------------------------
  1 | """ GraspNet baseline model definition.
  2 |     Author: chenxi-wang
  3 | """
  4 | 
  5 | import os
  6 | import sys
  7 | import numpy as np
  8 | import torch
  9 | import torch.nn as nn
 10 | import MinkowskiEngine as ME
 11 | 
 12 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 13 | ROOT_DIR = os.path.dirname(BASE_DIR)
 14 | sys.path.append(ROOT_DIR)
 15 | 
 16 | from models.backbone_resunet14 import MinkUNet14D
 17 | from models.modules import ApproachNet, GraspableNet, CloudCrop, SWADNet
 18 | from loss_utils import GRASP_MAX_WIDTH, NUM_VIEW, NUM_ANGLE, NUM_DEPTH, GRASPNESS_THRESHOLD, M_POINT
 19 | from label_generation import process_grasp_labels, match_grasp_view_and_label, batch_viewpoint_params_to_matrix
 20 | from pointnet2.pointnet2_utils import furthest_point_sample, gather_operation
 21 | 
 22 | 
 23 | class GraspNet(nn.Module):
 24 |     def __init__(self, cylinder_radius=0.05, seed_feat_dim=512, is_training=True):
 25 |         super().__init__()
 26 |         self.is_training = is_training
 27 |         self.seed_feature_dim = seed_feat_dim
 28 |         self.num_depth = NUM_DEPTH
 29 |         self.num_angle = NUM_ANGLE
 30 |         self.M_points = M_POINT
 31 |         self.num_view = NUM_VIEW
 32 | 
 33 |         self.backbone = MinkUNet14D(in_channels=3, out_channels=self.seed_feature_dim, D=3)
 34 |         self.graspable = GraspableNet(seed_feature_dim=self.seed_feature_dim)
 35 |         self.rotation = ApproachNet(self.num_view, seed_feature_dim=self.seed_feature_dim, is_training=self.is_training)
 36 |         self.crop = CloudCrop(nsample=16, cylinder_radius=cylinder_radius, seed_feature_dim=self.seed_feature_dim)
 37 |         self.swad = SWADNet(num_angle=self.num_angle, num_depth=self.num_depth)
 38 | 
 39 |     def forward(self, end_points):
 40 |         seed_xyz = end_points['point_clouds']  # use all sampled point cloud, B*Ns*3
 41 |         B, point_num, _ = seed_xyz.shape  # batch _size
 42 |         # point-wise features
 43 |         coordinates_batch = end_points['coors']
 44 |         features_batch = end_points['feats']
 45 |         mink_input = ME.SparseTensor(features_batch, coordinates=coordinates_batch)
 46 |         seed_features = self.backbone(mink_input).F
 47 |         seed_features = seed_features[end_points['quantize2original']].view(B, point_num, -1).transpose(1, 2)
 48 | 
 49 |         end_points = self.graspable(seed_features, end_points)
 50 |         seed_features_flipped = seed_features.transpose(1, 2)  # B*Ns*feat_dim
 51 |         objectness_score = end_points['objectness_score']
 52 |         graspness_score = end_points['graspness_score'].squeeze(1)
 53 |         objectness_pred = torch.argmax(objectness_score, 1)
 54 |         objectness_mask = (objectness_pred == 1)
 55 |         graspness_mask = graspness_score > GRASPNESS_THRESHOLD
 56 |         graspable_mask = objectness_mask & graspness_mask
 57 | 
 58 |         seed_features_graspable = []
 59 |         seed_xyz_graspable = []
 60 |         graspable_num_batch = 0.
 61 |         for i in range(B):
 62 |             cur_mask = graspable_mask[i]
 63 |             graspable_num_batch += cur_mask.sum()
 64 |             cur_feat = seed_features_flipped[i][cur_mask]  # Ns*feat_dim
 65 |             cur_seed_xyz = seed_xyz[i][cur_mask]  # Ns*3
 66 | 
 67 |             cur_seed_xyz = cur_seed_xyz.unsqueeze(0) # 1*Ns*3
 68 |             fps_idxs = furthest_point_sample(cur_seed_xyz, self.M_points)
 69 |             cur_seed_xyz_flipped = cur_seed_xyz.transpose(1, 2).contiguous()  # 1*3*Ns
 70 |             cur_seed_xyz = gather_operation(cur_seed_xyz_flipped, fps_idxs).transpose(1, 2).squeeze(0).contiguous() # Ns*3
 71 |             cur_feat_flipped = cur_feat.unsqueeze(0).transpose(1, 2).contiguous()  # 1*feat_dim*Ns
 72 |             cur_feat = gather_operation(cur_feat_flipped, fps_idxs).squeeze(0).contiguous() # feat_dim*Ns
 73 | 
 74 |             seed_features_graspable.append(cur_feat)
 75 |             seed_xyz_graspable.append(cur_seed_xyz)
 76 |         seed_xyz_graspable = torch.stack(seed_xyz_graspable, 0)  # B*Ns*3
 77 |         seed_features_graspable = torch.stack(seed_features_graspable)  # B*feat_dim*Ns
 78 |         end_points['xyz_graspable'] = seed_xyz_graspable
 79 |         end_points['graspable_count_stage1'] = graspable_num_batch / B
 80 | 
 81 |         end_points, res_feat = self.rotation(seed_features_graspable, end_points)
 82 |         seed_features_graspable = seed_features_graspable + res_feat
 83 | 
 84 |         if self.is_training:
 85 |             end_points = process_grasp_labels(end_points)
 86 |             grasp_top_views_rot, end_points = match_grasp_view_and_label(end_points)
 87 |         else:
 88 |             grasp_top_views_rot = end_points['grasp_top_view_rot']
 89 | 
 90 |         group_features = self.crop(seed_xyz_graspable.contiguous(), seed_features_graspable.contiguous(), grasp_top_views_rot)
 91 |         end_points = self.swad(group_features, end_points)
 92 | 
 93 |         return end_points
 94 | 
 95 | 
 96 | def pred_decode(end_points):
 97 |     batch_size = len(end_points['point_clouds'])
 98 |     grasp_preds = []
 99 |     for i in range(batch_size):
100 |         grasp_center = end_points['xyz_graspable'][i].float()# 1024,3
101 | 
102 |         grasp_score = end_points['grasp_score_pred'][i].float()# 1024,12,4
103 |         grasp_score = grasp_score.view(M_POINT, NUM_ANGLE*NUM_DEPTH)# 1024,48
104 |         grasp_score, grasp_score_inds = torch.max(grasp_score, -1)# 1024  # [M_POINT]
105 |         grasp_score = grasp_score.view(-1, 1)#1024,1
106 |         grasp_angle = (grasp_score_inds // NUM_DEPTH) * np.pi / 12# 1024
107 |         grasp_depth = (grasp_score_inds % NUM_DEPTH + 1) * 0.01#1024
108 |         grasp_depth = grasp_depth.view(-1, 1) #1024,1
109 |         grasp_width = 1.2 * end_points['grasp_width_pred'][i] / 10.#1024,12,4
110 |         grasp_width = grasp_width.view(M_POINT, NUM_ANGLE*NUM_DEPTH)#1024,48
111 |         grasp_width = torch.gather(grasp_width, 1, grasp_score_inds.view(-1, 1))#1024,1
112 |         grasp_width = torch.clamp(grasp_width, min=0., max=GRASP_MAX_WIDTH)#1024,1
113 | 
114 |         approaching = -end_points['grasp_top_view_xyz'][i].float()#1024,3
115 |         grasp_rot = batch_viewpoint_params_to_matrix(approaching, grasp_angle)#1024,3,3
116 |         grasp_rot = grasp_rot.view(M_POINT, 9)#1024,9
117 | 
118 |         # merge preds
119 |         grasp_height = 0.02 * torch.ones_like(grasp_score)
120 |         obj_ids = -1 * torch.ones_like(grasp_score)
121 |         grasp_preds.append( #1024,17
122 |             torch.cat([grasp_score, grasp_width, grasp_height, grasp_depth, grasp_rot, grasp_center, obj_ids], axis=-1))
123 |     return grasp_preds
124 | 


--------------------------------------------------------------------------------
/models/loss.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch
 3 | 
 4 | 
 5 | def get_loss(end_points):
 6 |     objectness_loss, end_points = compute_objectness_loss(end_points)
 7 |     graspness_loss, end_points = compute_graspness_loss(end_points)
 8 |     view_loss, end_points = compute_view_graspness_loss(end_points)
 9 |     score_loss, end_points = compute_score_loss(end_points)
10 |     width_loss, end_points = compute_width_loss(end_points)
11 |     loss = objectness_loss + 10 * graspness_loss + 100 * view_loss + 15 * score_loss + 10 * width_loss
12 |     end_points['loss/overall_loss'] = loss
13 |     return loss, end_points
14 | 
15 | 
16 | def compute_objectness_loss(end_points):
17 |     criterion = nn.CrossEntropyLoss(reduction='mean')
18 |     objectness_score = end_points['objectness_score']
19 |     objectness_label = end_points['objectness_label']
20 |     loss = criterion(objectness_score, objectness_label)
21 |     end_points['loss/stage1_objectness_loss'] = loss
22 | 
23 |     objectness_pred = torch.argmax(objectness_score, 1)
24 |     end_points['stage1_objectness_acc'] = (objectness_pred == objectness_label.long()).float().mean()
25 |     end_points['stage1_objectness_prec'] = (objectness_pred == objectness_label.long())[
26 |         objectness_pred == 1].float().mean()
27 |     end_points['stage1_objectness_recall'] = (objectness_pred == objectness_label.long())[
28 |         objectness_label == 1].float().mean()
29 |     return loss, end_points
30 | 
31 | 
32 | def compute_graspness_loss(end_points):
33 |     criterion = nn.SmoothL1Loss(reduction='none')
34 |     graspness_score = end_points['graspness_score'].squeeze(1)
35 |     graspness_label = end_points['graspness_label'].squeeze(-1)
36 |     loss_mask = end_points['objectness_label'].bool()
37 |     loss = criterion(graspness_score, graspness_label)
38 |     loss = loss[loss_mask]
39 |     loss = loss.mean()
40 |     
41 |     graspness_score_c = graspness_score.detach().clone()[loss_mask]
42 |     graspness_label_c = graspness_label.detach().clone()[loss_mask]
43 |     graspness_score_c = torch.clamp(graspness_score_c, 0., 0.99)
44 |     graspness_label_c = torch.clamp(graspness_label_c, 0., 0.99)
45 |     rank_error = (torch.abs(torch.trunc(graspness_score_c * 20) - torch.trunc(graspness_label_c * 20)) / 20.).mean()
46 |     end_points['stage1_graspness_acc_rank_error'] = rank_error
47 | 
48 |     end_points['loss/stage1_graspness_loss'] = loss
49 |     return loss, end_points
50 | 
51 | 
52 | def compute_view_graspness_loss(end_points):
53 |     criterion = nn.SmoothL1Loss(reduction='mean')
54 |     view_score = end_points['view_score']
55 |     view_label = end_points['batch_grasp_view_graspness']
56 |     loss = criterion(view_score, view_label)
57 |     end_points['loss/stage2_view_loss'] = loss
58 |     return loss, end_points
59 | 
60 | 
61 | def compute_score_loss(end_points):
62 |     criterion = nn.SmoothL1Loss(reduction='mean')
63 |     grasp_score_pred = end_points['grasp_score_pred']
64 |     grasp_score_label = end_points['batch_grasp_score']
65 |     loss = criterion(grasp_score_pred, grasp_score_label)
66 | 
67 |     end_points['loss/stage3_score_loss'] = loss
68 |     return loss, end_points
69 | 
70 | 
71 | def compute_width_loss(end_points):
72 |     criterion = nn.SmoothL1Loss(reduction='none')
73 |     grasp_width_pred = end_points['grasp_width_pred']
74 |     grasp_width_label = end_points['batch_grasp_width'] * 10
75 |     loss = criterion(grasp_width_pred, grasp_width_label)
76 |     grasp_score_label = end_points['batch_grasp_score']
77 |     loss_mask = grasp_score_label > 0
78 |     loss = loss[loss_mask].mean()
79 |     end_points['loss/stage3_width_loss'] = loss
80 |     return loss, end_points
81 | 


--------------------------------------------------------------------------------
/models/modules.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | 
  7 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
  8 | ROOT_DIR = os.path.dirname(BASE_DIR)
  9 | sys.path.append(ROOT_DIR)
 10 | 
 11 | import pointnet2.pytorch_utils as pt_utils
 12 | from pointnet2.pointnet2_utils import CylinderQueryAndGroup
 13 | from loss_utils import generate_grasp_views, batch_viewpoint_params_to_matrix
 14 | 
 15 | 
 16 | class GraspableNet(nn.Module):
 17 |     def __init__(self, seed_feature_dim):
 18 |         super().__init__()
 19 |         self.in_dim = seed_feature_dim
 20 |         self.conv_graspable = nn.Conv1d(self.in_dim, 3, 1)
 21 | 
 22 |     def forward(self, seed_features, end_points):
 23 |         graspable_score = self.conv_graspable(seed_features)  # (B, 3, num_seed)
 24 |         end_points['objectness_score'] = graspable_score[:, :2]
 25 |         end_points['graspness_score'] = graspable_score[:, 2]
 26 |         return end_points
 27 | 
 28 | 
 29 | class ApproachNet(nn.Module):
 30 |     def __init__(self, num_view, seed_feature_dim, is_training=True):
 31 |         super().__init__()
 32 |         self.num_view = num_view
 33 |         self.in_dim = seed_feature_dim
 34 |         self.is_training = is_training
 35 |         self.conv1 = nn.Conv1d(self.in_dim, self.in_dim, 1)
 36 |         self.conv2 = nn.Conv1d(self.in_dim, self.num_view, 1)
 37 | 
 38 |     def forward(self, seed_features, end_points):
 39 |         B, _, num_seed = seed_features.size()
 40 |         res_features = F.relu(self.conv1(seed_features), inplace=True)
 41 |         features = self.conv2(res_features)
 42 |         view_score = features.transpose(1, 2).contiguous() # (B, num_seed, num_view)
 43 |         end_points['view_score'] = view_score
 44 | 
 45 |         if self.is_training:
 46 |             # normalize view graspness score to 0~1
 47 |             view_score_ = view_score.clone().detach()
 48 |             view_score_max, _ = torch.max(view_score_, dim=2)
 49 |             view_score_min, _ = torch.min(view_score_, dim=2)
 50 |             view_score_max = view_score_max.unsqueeze(-1).expand(-1, -1, self.num_view)
 51 |             view_score_min = view_score_min.unsqueeze(-1).expand(-1, -1, self.num_view)
 52 |             view_score_ = (view_score_ - view_score_min) / (view_score_max - view_score_min + 1e-8)
 53 | 
 54 |             top_view_inds = []
 55 |             for i in range(B):
 56 |                 top_view_inds_batch = torch.multinomial(view_score_[i], 1, replacement=False)
 57 |                 top_view_inds.append(top_view_inds_batch)
 58 |             top_view_inds = torch.stack(top_view_inds, dim=0).squeeze(-1)  # B, num_seed
 59 |         else:
 60 |             _, top_view_inds = torch.max(view_score, dim=2)  # (B, num_seed)
 61 | 
 62 |             top_view_inds_ = top_view_inds.view(B, num_seed, 1, 1).expand(-1, -1, -1, 3).contiguous()
 63 |             template_views = generate_grasp_views(self.num_view).to(features.device)  # (num_view, 3)
 64 |             template_views = template_views.view(1, 1, self.num_view, 3).expand(B, num_seed, -1, -1).contiguous()
 65 |             vp_xyz = torch.gather(template_views, 2, top_view_inds_).squeeze(2)  # (B, num_seed, 3)
 66 |             vp_xyz_ = vp_xyz.view(-1, 3)
 67 |             batch_angle = torch.zeros(vp_xyz_.size(0), dtype=vp_xyz.dtype, device=vp_xyz.device)
 68 |             vp_rot = batch_viewpoint_params_to_matrix(-vp_xyz_, batch_angle).view(B, num_seed, 3, 3)
 69 |             end_points['grasp_top_view_xyz'] = vp_xyz
 70 |             end_points['grasp_top_view_rot'] = vp_rot
 71 | 
 72 |         end_points['grasp_top_view_inds'] = top_view_inds
 73 |         return end_points, res_features
 74 | 
 75 | 
 76 | class CloudCrop(nn.Module):
 77 |     def __init__(self, nsample, seed_feature_dim, cylinder_radius=0.05, hmin=-0.02, hmax=0.04):
 78 |         super().__init__()
 79 |         self.nsample = nsample
 80 |         self.in_dim = seed_feature_dim
 81 |         self.cylinder_radius = cylinder_radius
 82 |         mlps = [3 + self.in_dim, 256, 256]   # use xyz, so plus 3
 83 | 
 84 |         self.grouper = CylinderQueryAndGroup(radius=cylinder_radius, hmin=hmin, hmax=hmax, nsample=nsample,
 85 |                                              use_xyz=True, normalize_xyz=True)
 86 |         self.mlps = pt_utils.SharedMLP(mlps, bn=True)
 87 | 
 88 |     def forward(self, seed_xyz_graspable, seed_features_graspable, vp_rot):
 89 |         grouped_feature = self.grouper(seed_xyz_graspable, seed_xyz_graspable, vp_rot,
 90 |                                        seed_features_graspable)  # B*3 + feat_dim*M*K
 91 |         new_features = self.mlps(grouped_feature)  # (batch_size, mlps[-1], M, K)
 92 |         new_features = F.max_pool2d(new_features, kernel_size=[1, new_features.size(3)])  # (batch_size, mlps[-1], M, 1)
 93 |         new_features = new_features.squeeze(-1)   # (batch_size, mlps[-1], M)
 94 |         return new_features
 95 | 
 96 | 
 97 | class SWADNet(nn.Module):
 98 |     def __init__(self, num_angle, num_depth):
 99 |         super().__init__()
100 |         self.num_angle = num_angle
101 |         self.num_depth = num_depth
102 | 
103 |         self.conv1 = nn.Conv1d(256, 256, 1)  # input feat dim need to be consistent with CloudCrop module
104 |         self.conv_swad = nn.Conv1d(256, 2*num_angle*num_depth, 1)
105 | 
106 |     def forward(self, vp_features, end_points):
107 |         B, _, num_seed = vp_features.size()
108 |         vp_features = F.relu(self.conv1(vp_features), inplace=True)
109 |         vp_features = self.conv_swad(vp_features)
110 |         vp_features = vp_features.view(B, 2, self.num_angle, self.num_depth, num_seed)
111 |         vp_features = vp_features.permute(0, 1, 4, 2, 3)
112 | 
113 |         # split prediction
114 |         end_points['grasp_score_pred'] = vp_features[:, 0]  # B * num_seed * num angle * num_depth
115 |         end_points['grasp_width_pred'] = vp_features[:, 1]
116 |         return end_points
117 | 


--------------------------------------------------------------------------------
/models/resnet.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | 
  3 | try:
  4 |     import open3d as o3d
  5 | except ImportError:
  6 |     raise ImportError("Please install open3d with `pip install open3d`.")
  7 | 
  8 | import MinkowskiEngine as ME
  9 | from MinkowskiEngine.modules.resnet_block import BasicBlock, Bottleneck
 10 | 
 11 | 
 12 | class ResNetBase(nn.Module):
 13 |     BLOCK = None
 14 |     LAYERS = ()
 15 |     INIT_DIM = 64
 16 |     PLANES = (64, 128, 256, 512)
 17 | 
 18 |     def __init__(self, in_channels, out_channels, D=3):
 19 |         nn.Module.__init__(self)
 20 |         self.D = D
 21 |         assert self.BLOCK is not None
 22 | 
 23 |         self.network_initialization(in_channels, out_channels, D)
 24 |         self.weight_initialization()
 25 | 
 26 |     def network_initialization(self, in_channels, out_channels, D):
 27 | 
 28 |         self.inplanes = self.INIT_DIM
 29 |         self.conv1 = nn.Sequential(
 30 |             ME.MinkowskiConvolution(
 31 |                 in_channels, self.inplanes, kernel_size=3, stride=2, dimension=D
 32 |             ),
 33 |             ME.MinkowskiInstanceNorm(self.inplanes),
 34 |             ME.MinkowskiReLU(inplace=True),
 35 |             ME.MinkowskiMaxPooling(kernel_size=2, stride=2, dimension=D),
 36 |         )
 37 | 
 38 |         self.layer1 = self._make_layer(
 39 |             self.BLOCK, self.PLANES[0], self.LAYERS[0], stride=2
 40 |         )
 41 |         self.layer2 = self._make_layer(
 42 |             self.BLOCK, self.PLANES[1], self.LAYERS[1], stride=2
 43 |         )
 44 |         self.layer3 = self._make_layer(
 45 |             self.BLOCK, self.PLANES[2], self.LAYERS[2], stride=2
 46 |         )
 47 |         self.layer4 = self._make_layer(
 48 |             self.BLOCK, self.PLANES[3], self.LAYERS[3], stride=2
 49 |         )
 50 | 
 51 |         self.conv5 = nn.Sequential(
 52 |             ME.MinkowskiDropout(),
 53 |             ME.MinkowskiConvolution(
 54 |                 self.inplanes, self.inplanes, kernel_size=3, stride=3, dimension=D
 55 |             ),
 56 |             ME.MinkowskiInstanceNorm(self.inplanes),
 57 |             ME.MinkowskiGELU(),
 58 |         )
 59 | 
 60 |         self.glob_pool = ME.MinkowskiGlobalMaxPooling()
 61 | 
 62 |         self.final = ME.MinkowskiLinear(self.inplanes, out_channels, bias=True)
 63 | 
 64 |     def weight_initialization(self):
 65 |         for m in self.modules():
 66 |             if isinstance(m, ME.MinkowskiConvolution):
 67 |                 ME.utils.kaiming_normal_(m.kernel, mode="fan_out", nonlinearity="relu")
 68 | 
 69 |             if isinstance(m, ME.MinkowskiBatchNorm):
 70 |                 nn.init.constant_(m.bn.weight, 1)
 71 |                 nn.init.constant_(m.bn.bias, 0)
 72 | 
 73 |     def _make_layer(self, block, planes, blocks, stride=1, dilation=1, bn_momentum=0.1):
 74 |         downsample = None
 75 |         if stride != 1 or self.inplanes != planes * block.expansion:
 76 |             downsample = nn.Sequential(
 77 |                 ME.MinkowskiConvolution(
 78 |                     self.inplanes,
 79 |                     planes * block.expansion,
 80 |                     kernel_size=1,
 81 |                     stride=stride,
 82 |                     dimension=self.D,
 83 |                 ),
 84 |                 ME.MinkowskiBatchNorm(planes * block.expansion),
 85 |             )
 86 |         layers = []
 87 |         layers.append(
 88 |             block(
 89 |                 self.inplanes,
 90 |                 planes,
 91 |                 stride=stride,
 92 |                 dilation=dilation,
 93 |                 downsample=downsample,
 94 |                 dimension=self.D,
 95 |             )
 96 |         )
 97 |         self.inplanes = planes * block.expansion
 98 |         for i in range(1, blocks):
 99 |             layers.append(
100 |                 block(
101 |                     self.inplanes, planes, stride=1, dilation=dilation, dimension=self.D
102 |                 )
103 |             )
104 | 
105 |         return nn.Sequential(*layers)
106 | 
107 |     def forward(self, x: ME.SparseTensor):
108 |         x = self.conv1(x)
109 |         x = self.layer1(x)
110 |         x = self.layer2(x)
111 |         x = self.layer3(x)
112 |         x = self.layer4(x)
113 |         x = self.conv5(x)
114 |         x = self.glob_pool(x)
115 |         return self.final(x)
116 | 
117 | 
118 | class ResNet14(ResNetBase):
119 |     BLOCK = BasicBlock
120 |     LAYERS = (1, 1, 1, 1)
121 | 
122 | 
123 | class ResNet18(ResNetBase):
124 |     BLOCK = BasicBlock
125 |     LAYERS = (2, 2, 2, 2)
126 | 
127 | 
128 | class ResNet34(ResNetBase):
129 |     BLOCK = BasicBlock
130 |     LAYERS = (3, 4, 6, 3)
131 | 
132 | 
133 | class ResNet50(ResNetBase):
134 |     BLOCK = Bottleneck
135 |     LAYERS = (3, 4, 6, 3)
136 | 
137 | 
138 | class ResNet101(ResNetBase):
139 |     BLOCK = Bottleneck
140 |     LAYERS = (3, 4, 23, 3)
141 | 
142 | 
143 | class ResFieldNetBase(ResNetBase):
144 |     def network_initialization(self, in_channels, out_channels, D):
145 |         field_ch = 32
146 |         field_ch2 = 64
147 |         self.field_network = nn.Sequential(
148 |             ME.MinkowskiSinusoidal(in_channels, field_ch),
149 |             ME.MinkowskiBatchNorm(field_ch),
150 |             ME.MinkowskiReLU(inplace=True),
151 |             ME.MinkowskiLinear(field_ch, field_ch),
152 |             ME.MinkowskiBatchNorm(field_ch),
153 |             ME.MinkowskiReLU(inplace=True),
154 |             ME.MinkowskiToSparseTensor(),
155 |         )
156 |         self.field_network2 = nn.Sequential(
157 |             ME.MinkowskiSinusoidal(field_ch + in_channels, field_ch2),
158 |             ME.MinkowskiBatchNorm(field_ch2),
159 |             ME.MinkowskiReLU(inplace=True),
160 |             ME.MinkowskiLinear(field_ch2, field_ch2),
161 |             ME.MinkowskiBatchNorm(field_ch2),
162 |             ME.MinkowskiReLU(inplace=True),
163 |             ME.MinkowskiToSparseTensor(),
164 |         )
165 | 
166 |         ResNetBase.network_initialization(self, field_ch2, out_channels, D)
167 | 
168 |     def forward(self, x: ME.TensorField):
169 |         otensor = self.field_network(x)
170 |         otensor2 = self.field_network2(otensor.cat_slice(x))
171 |         return ResNetBase.forward(self, otensor2)
172 | 
173 | 
174 | class ResFieldNet14(ResFieldNetBase):
175 |     BLOCK = BasicBlock
176 |     LAYERS = (1, 1, 1, 1)
177 | 
178 | 
179 | class ResFieldNet18(ResFieldNetBase):
180 |     BLOCK = BasicBlock
181 |     LAYERS = (2, 2, 2, 2)
182 | 
183 | 
184 | class ResFieldNet34(ResFieldNetBase):
185 |     BLOCK = BasicBlock
186 |     LAYERS = (3, 4, 6, 3)
187 | 
188 | 
189 | class ResFieldNet50(ResFieldNetBase):
190 |     BLOCK = Bottleneck
191 |     LAYERS = (3, 4, 6, 3)
192 | 
193 | 
194 | class ResFieldNet101(ResFieldNetBase):
195 |     BLOCK = Bottleneck
196 |     LAYERS = (3, 4, 23, 3)
197 | 


--------------------------------------------------------------------------------
/pointnet2/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TX-Leo/Graspness_6dof_robotic_grasping/bf2d0977272831834ea0edde0d426954ad6c4c8d/pointnet2/.DS_Store


--------------------------------------------------------------------------------
/pointnet2/_ext_src/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TX-Leo/Graspness_6dof_robotic_grasping/bf2d0977272831834ea0edde0d426954ad6c4c8d/pointnet2/_ext_src/.DS_Store


--------------------------------------------------------------------------------
/pointnet2/_ext_src/include/ball_query.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates.
 2 | // 
 3 | // This source code is licensed under the MIT license found in the
 4 | // LICENSE file in the root directory of this source tree.
 5 | 
 6 | #pragma once
 7 | #include <torch/extension.h>
 8 | 
 9 | at::Tensor ball_query(at::Tensor new_xyz, at::Tensor xyz, const float radius,
10 |                       const int nsample);
11 | 


--------------------------------------------------------------------------------
/pointnet2/_ext_src/include/cuda_utils.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates.
 2 | // 
 3 | // This source code is licensed under the MIT license found in the
 4 | // LICENSE file in the root directory of this source tree.
 5 | 
 6 | #ifndef _CUDA_UTILS_H
 7 | #define _CUDA_UTILS_H
 8 | 
 9 | #include <ATen/ATen.h>
10 | #include <ATen/cuda/CUDAContext.h>
11 | #include <cmath>
12 | 
13 | #include <cuda.h>
14 | #include <cuda_runtime.h>
15 | 
16 | #include <vector>
17 | 
18 | #define TOTAL_THREADS 512
19 | 
20 | inline int opt_n_threads(int work_size) {
21 |   const int pow_2 = std::log(static_cast<double>(work_size)) / std::log(2.0);
22 | 
23 |   return max(min(1 << pow_2, TOTAL_THREADS), 1);
24 | }
25 | 
26 | inline dim3 opt_block_config(int x, int y) {
27 |   const int x_threads = opt_n_threads(x);
28 |   const int y_threads =
29 |       max(min(opt_n_threads(y), TOTAL_THREADS / x_threads), 1);
30 |   dim3 block_config(x_threads, y_threads, 1);
31 | 
32 |   return block_config;
33 | }
34 | 
35 | #define CUDA_CHECK_ERRORS()                                           \
36 |   do {                                                                \
37 |     cudaError_t err = cudaGetLastError();                             \
38 |     if (cudaSuccess != err) {                                         \
39 |       fprintf(stderr, "CUDA kernel failed : %s\n%s at L:%d in %s\n",  \
40 |               cudaGetErrorString(err), __PRETTY_FUNCTION__, __LINE__, \
41 |               __FILE__);                                              \
42 |       exit(-1);                                                       \
43 |     }                                                                 \
44 |   } while (0)
45 | 
46 | #endif
47 | 


--------------------------------------------------------------------------------
/pointnet2/_ext_src/include/cylinder_query.h:
--------------------------------------------------------------------------------
1 | // Author: chenxi-wang
2 | 
3 | #pragma once
4 | #include <torch/extension.h>
5 | 
6 | at::Tensor cylinder_query(at::Tensor new_xyz, at::Tensor xyz, at::Tensor rot, const float radius, const float hmin, const float hmax,
7 |                       const int nsample);
8 | 


--------------------------------------------------------------------------------
/pointnet2/_ext_src/include/group_points.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates.
 2 | // 
 3 | // This source code is licensed under the MIT license found in the
 4 | // LICENSE file in the root directory of this source tree.
 5 | 
 6 | #pragma once
 7 | #include <torch/extension.h>
 8 | 
 9 | at::Tensor group_points(at::Tensor points, at::Tensor idx);
10 | at::Tensor group_points_grad(at::Tensor grad_out, at::Tensor idx, const int n);
11 | 


--------------------------------------------------------------------------------
/pointnet2/_ext_src/include/interpolate.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates.
 2 | // 
 3 | // This source code is licensed under the MIT license found in the
 4 | // LICENSE file in the root directory of this source tree.
 5 | 
 6 | #pragma once
 7 | 
 8 | #include <torch/extension.h>
 9 | #include <vector>
10 | 
11 | std::vector<at::Tensor> three_nn(at::Tensor unknowns, at::Tensor knows);
12 | at::Tensor three_interpolate(at::Tensor points, at::Tensor idx,
13 |                              at::Tensor weight);
14 | at::Tensor three_interpolate_grad(at::Tensor grad_out, at::Tensor idx,
15 |                                   at::Tensor weight, const int m);
16 | 


--------------------------------------------------------------------------------
/pointnet2/_ext_src/include/sampling.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates.
 2 | // 
 3 | // This source code is licensed under the MIT license found in the
 4 | // LICENSE file in the root directory of this source tree.
 5 | 
 6 | #pragma once
 7 | #include <torch/extension.h>
 8 | 
 9 | at::Tensor gather_points(at::Tensor points, at::Tensor idx);
10 | at::Tensor gather_points_grad(at::Tensor grad_out, at::Tensor idx, const int n);
11 | at::Tensor furthest_point_sampling(at::Tensor points, const int nsamples);
12 | 


--------------------------------------------------------------------------------
/pointnet2/_ext_src/include/utils.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates.
 2 | // 
 3 | // This source code is licensed under the MIT license found in the
 4 | // LICENSE file in the root directory of this source tree.
 5 | 
 6 | #pragma once
 7 | #include <ATen/cuda/CUDAContext.h>
 8 | #include <torch/extension.h>
 9 | 
10 | #define CHECK_CUDA(x)                                          \
11 |   do {                                                         \
12 |     TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor"); \
13 |   } while (0)
14 | 
15 | #define CHECK_CONTIGUOUS(x)                                         \
16 |   do {                                                              \
17 |     TORCH_CHECK(x.is_contiguous(), #x " must be a contiguous tensor"); \
18 |   } while (0)
19 | 
20 | #define CHECK_IS_INT(x)                              \
21 |   do {                                               \
22 |     TORCH_CHECK(x.scalar_type() == at::ScalarType::Int, \
23 |              #x " must be an int tensor");           \
24 |   } while (0)
25 | 
26 | #define CHECK_IS_FLOAT(x)                              \
27 |   do {                                                 \
28 |     TORCH_CHECK(x.scalar_type() == at::ScalarType::Float, \
29 |              #x " must be a float tensor");            \
30 |   } while (0)
31 | 


--------------------------------------------------------------------------------
/pointnet2/_ext_src/src/ball_query.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates.
 2 | // 
 3 | // This source code is licensed under the MIT license found in the
 4 | // LICENSE file in the root directory of this source tree.
 5 | 
 6 | #include "ball_query.h"
 7 | #include "utils.h"
 8 | 
 9 | void query_ball_point_kernel_wrapper(int b, int n, int m, float radius,
10 |                                      int nsample, const float *new_xyz,
11 |                                      const float *xyz, int *idx);
12 | 
13 | at::Tensor ball_query(at::Tensor new_xyz, at::Tensor xyz, const float radius,
14 |                       const int nsample) {
15 |   CHECK_CONTIGUOUS(new_xyz);
16 |   CHECK_CONTIGUOUS(xyz);
17 |   CHECK_IS_FLOAT(new_xyz);
18 |   CHECK_IS_FLOAT(xyz);
19 | 
20 |   if (new_xyz.type().is_cuda()) {
21 |     CHECK_CUDA(xyz);
22 |   }
23 | 
24 |   at::Tensor idx =
25 |       torch::zeros({new_xyz.size(0), new_xyz.size(1), nsample},
26 |                    at::device(new_xyz.device()).dtype(at::ScalarType::Int));
27 | 
28 |   if (new_xyz.type().is_cuda()) {
29 |     query_ball_point_kernel_wrapper(xyz.size(0), xyz.size(1), new_xyz.size(1),
30 |                                     radius, nsample, new_xyz.data<float>(),
31 |                                     xyz.data<float>(), idx.data<int>());
32 |   } else {
33 |     TORCH_CHECK(false, "CPU not supported");
34 |   }
35 | 
36 |   return idx;
37 | }
38 | 


--------------------------------------------------------------------------------
/pointnet2/_ext_src/src/ball_query_gpu.cu:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates.
 2 | // 
 3 | // This source code is licensed under the MIT license found in the
 4 | // LICENSE file in the root directory of this source tree.
 5 | 
 6 | #include <math.h>
 7 | #include <stdio.h>
 8 | #include <stdlib.h>
 9 | 
10 | #include "cuda_utils.h"
11 | 
12 | // input: new_xyz(b, m, 3) xyz(b, n, 3)
13 | // output: idx(b, m, nsample)
14 | __global__ void query_ball_point_kernel(int b, int n, int m, float radius,
15 |                                         int nsample,
16 |                                         const float *__restrict__ new_xyz,
17 |                                         const float *__restrict__ xyz,
18 |                                         int *__restrict__ idx) {
19 |   int batch_index = blockIdx.x;
20 |   xyz += batch_index * n * 3;
21 |   new_xyz += batch_index * m * 3;
22 |   idx += m * nsample * batch_index;
23 | 
24 |   int index = threadIdx.x;
25 |   int stride = blockDim.x;
26 | 
27 |   float radius2 = radius * radius;
28 |   for (int j = index; j < m; j += stride) {
29 |     float new_x = new_xyz[j * 3 + 0];
30 |     float new_y = new_xyz[j * 3 + 1];
31 |     float new_z = new_xyz[j * 3 + 2];
32 |     for (int k = 0, cnt = 0; k < n && cnt < nsample; ++k) {
33 |       float x = xyz[k * 3 + 0];
34 |       float y = xyz[k * 3 + 1];
35 |       float z = xyz[k * 3 + 2];
36 |       float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) +
37 |                  (new_z - z) * (new_z - z);
38 |       if (d2 < radius2) {
39 |         if (cnt == 0) {
40 |           for (int l = 0; l < nsample; ++l) {
41 |             idx[j * nsample + l] = k;
42 |           }
43 |         }
44 |         idx[j * nsample + cnt] = k;
45 |         ++cnt;
46 |       }
47 |     }
48 |   }
49 | }
50 | 
51 | void query_ball_point_kernel_wrapper(int b, int n, int m, float radius,
52 |                                      int nsample, const float *new_xyz,
53 |                                      const float *xyz, int *idx) {
54 |   cudaStream_t stream = at::cuda::getCurrentCUDAStream();
55 |   query_ball_point_kernel<<<b, opt_n_threads(m), 0, stream>>>(
56 |       b, n, m, radius, nsample, new_xyz, xyz, idx);
57 | 
58 |   CUDA_CHECK_ERRORS();
59 | }
60 | 


--------------------------------------------------------------------------------
/pointnet2/_ext_src/src/bindings.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates.
 2 | // 
 3 | // This source code is licensed under the MIT license found in the
 4 | // LICENSE file in the root directory of this source tree.
 5 | 
 6 | #include "ball_query.h"
 7 | #include "group_points.h"
 8 | #include "interpolate.h"
 9 | #include "sampling.h"
10 | #include "cylinder_query.h"
11 | 
12 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
13 |   m.def("gather_points", &gather_points);
14 |   m.def("gather_points_grad", &gather_points_grad);
15 |   m.def("furthest_point_sampling", &furthest_point_sampling);
16 | 
17 |   m.def("three_nn", &three_nn);
18 |   m.def("three_interpolate", &three_interpolate);
19 |   m.def("three_interpolate_grad", &three_interpolate_grad);
20 | 
21 |   m.def("ball_query", &ball_query);
22 | 
23 |   m.def("group_points", &group_points);
24 |   m.def("group_points_grad", &group_points_grad);
25 | 
26 |   m.def("cylinder_query", &cylinder_query);
27 | }
28 | 


--------------------------------------------------------------------------------
/pointnet2/_ext_src/src/cylinder_query.cpp:
--------------------------------------------------------------------------------
 1 | // Author: chenxi-wang
 2 | 
 3 | #include "cylinder_query.h"
 4 | #include "utils.h"
 5 | 
 6 | void query_cylinder_point_kernel_wrapper(int b, int n, int m, float radius, float hmin, float hmax,
 7 |                                      int nsample, const float *new_xyz,
 8 |                                      const float *xyz, const float *rot, int *idx);
 9 | 
10 | at::Tensor cylinder_query(at::Tensor new_xyz, at::Tensor xyz, at::Tensor rot, const float radius, const float hmin, const float hmax,
11 |                       const int nsample) {
12 |   CHECK_CONTIGUOUS(new_xyz);
13 |   CHECK_CONTIGUOUS(xyz);
14 |   CHECK_CONTIGUOUS(rot);
15 |   CHECK_IS_FLOAT(new_xyz);
16 |   CHECK_IS_FLOAT(xyz);
17 |   CHECK_IS_FLOAT(rot);
18 | 
19 |   if (new_xyz.type().is_cuda()) {
20 |     CHECK_CUDA(xyz);
21 |     CHECK_CUDA(rot);
22 |   }
23 | 
24 |   at::Tensor idx =
25 |       torch::zeros({new_xyz.size(0), new_xyz.size(1), nsample},
26 |                    at::device(new_xyz.device()).dtype(at::ScalarType::Int));
27 | 
28 |   if (new_xyz.type().is_cuda()) {
29 |     query_cylinder_point_kernel_wrapper(xyz.size(0), xyz.size(1), new_xyz.size(1),
30 |                                     radius, hmin, hmax, nsample, new_xyz.data<float>(),
31 |                                     xyz.data<float>(), rot.data<float>(), idx.data<int>());
32 |   } else {
33 |     TORCH_CHECK(false, "CPU not supported");
34 |   }
35 | 
36 |   return idx;
37 | }
38 | 


--------------------------------------------------------------------------------
/pointnet2/_ext_src/src/cylinder_query_gpu.cu:
--------------------------------------------------------------------------------
 1 | // Author: chenxi-wang
 2 | 
 3 | #include <math.h>
 4 | #include <stdio.h>
 5 | #include <stdlib.h>
 6 | 
 7 | #include "cuda_utils.h"
 8 | 
 9 | __global__ void query_cylinder_point_kernel(int b, int n, int m, float radius, float hmin, float hmax,
10 |                                         int nsample,
11 |                                         const float *__restrict__ new_xyz,
12 |                                         const float *__restrict__ xyz,
13 |                                         const float *__restrict__ rot,
14 |                                         int *__restrict__ idx) {
15 |   int batch_index = blockIdx.x;
16 |   xyz += batch_index * n * 3;
17 |   new_xyz += batch_index * m * 3;
18 |   rot += batch_index * m * 9;
19 |   idx += m * nsample * batch_index;
20 | 
21 |   int index = threadIdx.x;
22 |   int stride = blockDim.x;
23 | 
24 |   float radius2 = radius * radius;
25 |   for (int j = index; j < m; j += stride) {
26 |     float new_x = new_xyz[j * 3 + 0];
27 |     float new_y = new_xyz[j * 3 + 1];
28 |     float new_z = new_xyz[j * 3 + 2];
29 |     float r0 = rot[j * 9 + 0];
30 |     float r1 = rot[j * 9 + 1];
31 |     float r2 = rot[j * 9 + 2];
32 |     float r3 = rot[j * 9 + 3];
33 |     float r4 = rot[j * 9 + 4];
34 |     float r5 = rot[j * 9 + 5];
35 |     float r6 = rot[j * 9 + 6];
36 |     float r7 = rot[j * 9 + 7];
37 |     float r8 = rot[j * 9 + 8];
38 |     for (int k = 0, cnt = 0; k < n && cnt < nsample; ++k) {
39 |       float x = xyz[k * 3 + 0] - new_x;
40 |       float y = xyz[k * 3 + 1] - new_y;
41 |       float z = xyz[k * 3 + 2] - new_z;
42 |       float x_rot = r0 * x + r3 * y + r6 * z;
43 |       float y_rot = r1 * x + r4 * y + r7 * z;
44 |       float z_rot = r2 * x + r5 * y + r8 * z;
45 |       float d2 = y_rot * y_rot + z_rot * z_rot;
46 |       if (d2 < radius2 && x_rot > hmin && x_rot < hmax) {
47 |         if (cnt == 0) {
48 |           for (int l = 0; l < nsample; ++l) {
49 |             idx[j * nsample + l] = k;
50 |           }
51 |         }
52 |         idx[j * nsample + cnt] = k;
53 |         ++cnt;
54 |       }
55 |     }
56 |   }
57 | }
58 | 
59 | void query_cylinder_point_kernel_wrapper(int b, int n, int m, float radius, float hmin, float hmax,
60 |                                      int nsample, const float *new_xyz,
61 |                                      const float *xyz, const float *rot, int *idx) {
62 |   cudaStream_t stream = at::cuda::getCurrentCUDAStream();
63 |   query_cylinder_point_kernel<<<b, opt_n_threads(m), 0, stream>>>(
64 |       b, n, m, radius, hmin, hmax, nsample, new_xyz, xyz, rot, idx);
65 | 
66 |   CUDA_CHECK_ERRORS();
67 | }
68 | 


--------------------------------------------------------------------------------
/pointnet2/_ext_src/src/group_points.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates.
 2 | // 
 3 | // This source code is licensed under the MIT license found in the
 4 | // LICENSE file in the root directory of this source tree.
 5 | 
 6 | #include "group_points.h"
 7 | #include "utils.h"
 8 | 
 9 | void group_points_kernel_wrapper(int b, int c, int n, int npoints, int nsample,
10 |                                  const float *points, const int *idx,
11 |                                  float *out);
12 | 
13 | void group_points_grad_kernel_wrapper(int b, int c, int n, int npoints,
14 |                                       int nsample, const float *grad_out,
15 |                                       const int *idx, float *grad_points);
16 | 
17 | at::Tensor group_points(at::Tensor points, at::Tensor idx) {
18 |   CHECK_CONTIGUOUS(points);
19 |   CHECK_CONTIGUOUS(idx);
20 |   CHECK_IS_FLOAT(points);
21 |   CHECK_IS_INT(idx);
22 | 
23 |   if (points.type().is_cuda()) {
24 |     CHECK_CUDA(idx);
25 |   }
26 | 
27 |   at::Tensor output =
28 |       torch::zeros({points.size(0), points.size(1), idx.size(1), idx.size(2)},
29 |                    at::device(points.device()).dtype(at::ScalarType::Float));
30 | 
31 |   if (points.type().is_cuda()) {
32 |     group_points_kernel_wrapper(points.size(0), points.size(1), points.size(2),
33 |                                 idx.size(1), idx.size(2), points.data<float>(),
34 |                                 idx.data<int>(), output.data<float>());
35 |   } else {
36 |     TORCH_CHECK(false, "CPU not supported");
37 |   }
38 | 
39 |   return output;
40 | }
41 | 
42 | at::Tensor group_points_grad(at::Tensor grad_out, at::Tensor idx, const int n) {
43 |   CHECK_CONTIGUOUS(grad_out);
44 |   CHECK_CONTIGUOUS(idx);
45 |   CHECK_IS_FLOAT(grad_out);
46 |   CHECK_IS_INT(idx);
47 | 
48 |   if (grad_out.type().is_cuda()) {
49 |     CHECK_CUDA(idx);
50 |   }
51 | 
52 |   at::Tensor output =
53 |       torch::zeros({grad_out.size(0), grad_out.size(1), n},
54 |                    at::device(grad_out.device()).dtype(at::ScalarType::Float));
55 | 
56 |   if (grad_out.type().is_cuda()) {
57 |     group_points_grad_kernel_wrapper(
58 |         grad_out.size(0), grad_out.size(1), n, idx.size(1), idx.size(2),
59 |         grad_out.data<float>(), idx.data<int>(), output.data<float>());
60 |   } else {
61 |     TORCH_CHECK(false, "CPU not supported");
62 |   }
63 | 
64 |   return output;
65 | }
66 | 


--------------------------------------------------------------------------------
/pointnet2/_ext_src/src/group_points_gpu.cu:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates.
 2 | // 
 3 | // This source code is licensed under the MIT license found in the
 4 | // LICENSE file in the root directory of this source tree.
 5 | 
 6 | #include <stdio.h>
 7 | #include <stdlib.h>
 8 | 
 9 | #include "cuda_utils.h"
10 | 
11 | // input: points(b, c, n) idx(b, npoints, nsample)
12 | // output: out(b, c, npoints, nsample)
13 | __global__ void group_points_kernel(int b, int c, int n, int npoints,
14 |                                     int nsample,
15 |                                     const float *__restrict__ points,
16 |                                     const int *__restrict__ idx,
17 |                                     float *__restrict__ out) {
18 |   int batch_index = blockIdx.x;
19 |   points += batch_index * n * c;
20 |   idx += batch_index * npoints * nsample;
21 |   out += batch_index * npoints * nsample * c;
22 | 
23 |   const int index = threadIdx.y * blockDim.x + threadIdx.x;
24 |   const int stride = blockDim.y * blockDim.x;
25 |   for (int i = index; i < c * npoints; i += stride) {
26 |     const int l = i / npoints;
27 |     const int j = i % npoints;
28 |     for (int k = 0; k < nsample; ++k) {
29 |       int ii = idx[j * nsample + k];
30 |       out[(l * npoints + j) * nsample + k] = points[l * n + ii];
31 |     }
32 |   }
33 | }
34 | 
35 | void group_points_kernel_wrapper(int b, int c, int n, int npoints, int nsample,
36 |                                  const float *points, const int *idx,
37 |                                  float *out) {
38 |   cudaStream_t stream = at::cuda::getCurrentCUDAStream();
39 | 
40 |   group_points_kernel<<<b, opt_block_config(npoints, c), 0, stream>>>(
41 |       b, c, n, npoints, nsample, points, idx, out);
42 | 
43 |   CUDA_CHECK_ERRORS();
44 | }
45 | 
46 | // input: grad_out(b, c, npoints, nsample), idx(b, npoints, nsample)
47 | // output: grad_points(b, c, n)
48 | __global__ void group_points_grad_kernel(int b, int c, int n, int npoints,
49 |                                          int nsample,
50 |                                          const float *__restrict__ grad_out,
51 |                                          const int *__restrict__ idx,
52 |                                          float *__restrict__ grad_points) {
53 |   int batch_index = blockIdx.x;
54 |   grad_out += batch_index * npoints * nsample * c;
55 |   idx += batch_index * npoints * nsample;
56 |   grad_points += batch_index * n * c;
57 | 
58 |   const int index = threadIdx.y * blockDim.x + threadIdx.x;
59 |   const int stride = blockDim.y * blockDim.x;
60 |   for (int i = index; i < c * npoints; i += stride) {
61 |     const int l = i / npoints;
62 |     const int j = i % npoints;
63 |     for (int k = 0; k < nsample; ++k) {
64 |       int ii = idx[j * nsample + k];
65 |       atomicAdd(grad_points + l * n + ii,
66 |                 grad_out[(l * npoints + j) * nsample + k]);
67 |     }
68 |   }
69 | }
70 | 
71 | void group_points_grad_kernel_wrapper(int b, int c, int n, int npoints,
72 |                                       int nsample, const float *grad_out,
73 |                                       const int *idx, float *grad_points) {
74 |   cudaStream_t stream = at::cuda::getCurrentCUDAStream();
75 | 
76 |   group_points_grad_kernel<<<b, opt_block_config(npoints, c), 0, stream>>>(
77 |       b, c, n, npoints, nsample, grad_out, idx, grad_points);
78 | 
79 |   CUDA_CHECK_ERRORS();
80 | }
81 | 


--------------------------------------------------------------------------------
/pointnet2/_ext_src/src/interpolate.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Facebook, Inc. and its affiliates.
  2 | // 
  3 | // This source code is licensed under the MIT license found in the
  4 | // LICENSE file in the root directory of this source tree.
  5 | 
  6 | #include "interpolate.h"
  7 | #include "utils.h"
  8 | 
  9 | void three_nn_kernel_wrapper(int b, int n, int m, const float *unknown,
 10 |                              const float *known, float *dist2, int *idx);
 11 | void three_interpolate_kernel_wrapper(int b, int c, int m, int n,
 12 |                                       const float *points, const int *idx,
 13 |                                       const float *weight, float *out);
 14 | void three_interpolate_grad_kernel_wrapper(int b, int c, int n, int m,
 15 |                                            const float *grad_out,
 16 |                                            const int *idx, const float *weight,
 17 |                                            float *grad_points);
 18 | 
 19 | std::vector<at::Tensor> three_nn(at::Tensor unknowns, at::Tensor knows) {
 20 |   CHECK_CONTIGUOUS(unknowns);
 21 |   CHECK_CONTIGUOUS(knows);
 22 |   CHECK_IS_FLOAT(unknowns);
 23 |   CHECK_IS_FLOAT(knows);
 24 | 
 25 |   if (unknowns.type().is_cuda()) {
 26 |     CHECK_CUDA(knows);
 27 |   }
 28 | 
 29 |   at::Tensor idx =
 30 |       torch::zeros({unknowns.size(0), unknowns.size(1), 3},
 31 |                    at::device(unknowns.device()).dtype(at::ScalarType::Int));
 32 |   at::Tensor dist2 =
 33 |       torch::zeros({unknowns.size(0), unknowns.size(1), 3},
 34 |                    at::device(unknowns.device()).dtype(at::ScalarType::Float));
 35 | 
 36 |   if (unknowns.type().is_cuda()) {
 37 |     three_nn_kernel_wrapper(unknowns.size(0), unknowns.size(1), knows.size(1),
 38 |                             unknowns.data<float>(), knows.data<float>(),
 39 |                             dist2.data<float>(), idx.data<int>());
 40 |   } else {
 41 |     TORCH_CHECK(false, "CPU not supported");
 42 |   }
 43 | 
 44 |   return {dist2, idx};
 45 | }
 46 | 
 47 | at::Tensor three_interpolate(at::Tensor points, at::Tensor idx,
 48 |                              at::Tensor weight) {
 49 |   CHECK_CONTIGUOUS(points);
 50 |   CHECK_CONTIGUOUS(idx);
 51 |   CHECK_CONTIGUOUS(weight);
 52 |   CHECK_IS_FLOAT(points);
 53 |   CHECK_IS_INT(idx);
 54 |   CHECK_IS_FLOAT(weight);
 55 | 
 56 |   if (points.type().is_cuda()) {
 57 |     CHECK_CUDA(idx);
 58 |     CHECK_CUDA(weight);
 59 |   }
 60 | 
 61 |   at::Tensor output =
 62 |       torch::zeros({points.size(0), points.size(1), idx.size(1)},
 63 |                    at::device(points.device()).dtype(at::ScalarType::Float));
 64 | 
 65 |   if (points.type().is_cuda()) {
 66 |     three_interpolate_kernel_wrapper(
 67 |         points.size(0), points.size(1), points.size(2), idx.size(1),
 68 |         points.data<float>(), idx.data<int>(), weight.data<float>(),
 69 |         output.data<float>());
 70 |   } else {
 71 |     TORCH_CHECK(false, "CPU not supported");
 72 |   }
 73 | 
 74 |   return output;
 75 | }
 76 | at::Tensor three_interpolate_grad(at::Tensor grad_out, at::Tensor idx,
 77 |                                   at::Tensor weight, const int m) {
 78 |   CHECK_CONTIGUOUS(grad_out);
 79 |   CHECK_CONTIGUOUS(idx);
 80 |   CHECK_CONTIGUOUS(weight);
 81 |   CHECK_IS_FLOAT(grad_out);
 82 |   CHECK_IS_INT(idx);
 83 |   CHECK_IS_FLOAT(weight);
 84 | 
 85 |   if (grad_out.type().is_cuda()) {
 86 |     CHECK_CUDA(idx);
 87 |     CHECK_CUDA(weight);
 88 |   }
 89 | 
 90 |   at::Tensor output =
 91 |       torch::zeros({grad_out.size(0), grad_out.size(1), m},
 92 |                    at::device(grad_out.device()).dtype(at::ScalarType::Float));
 93 | 
 94 |   if (grad_out.type().is_cuda()) {
 95 |     three_interpolate_grad_kernel_wrapper(
 96 |         grad_out.size(0), grad_out.size(1), grad_out.size(2), m,
 97 |         grad_out.data<float>(), idx.data<int>(), weight.data<float>(),
 98 |         output.data<float>());
 99 |   } else {
100 |     TORCH_CHECK(false, "CPU not supported");
101 |   }
102 | 
103 |   return output;
104 | }
105 | 


--------------------------------------------------------------------------------
/pointnet2/_ext_src/src/interpolate_gpu.cu:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Facebook, Inc. and its affiliates.
  2 | // 
  3 | // This source code is licensed under the MIT license found in the
  4 | // LICENSE file in the root directory of this source tree.
  5 | 
  6 | #include <math.h>
  7 | #include <stdio.h>
  8 | #include <stdlib.h>
  9 | 
 10 | #include "cuda_utils.h"
 11 | 
 12 | // input: unknown(b, n, 3) known(b, m, 3)
 13 | // output: dist2(b, n, 3), idx(b, n, 3)
 14 | __global__ void three_nn_kernel(int b, int n, int m,
 15 |                                 const float *__restrict__ unknown,
 16 |                                 const float *__restrict__ known,
 17 |                                 float *__restrict__ dist2,
 18 |                                 int *__restrict__ idx) {
 19 |   int batch_index = blockIdx.x;
 20 |   unknown += batch_index * n * 3;
 21 |   known += batch_index * m * 3;
 22 |   dist2 += batch_index * n * 3;
 23 |   idx += batch_index * n * 3;
 24 | 
 25 |   int index = threadIdx.x;
 26 |   int stride = blockDim.x;
 27 |   for (int j = index; j < n; j += stride) {
 28 |     float ux = unknown[j * 3 + 0];
 29 |     float uy = unknown[j * 3 + 1];
 30 |     float uz = unknown[j * 3 + 2];
 31 | 
 32 |     double best1 = 1e40, best2 = 1e40, best3 = 1e40;
 33 |     int besti1 = 0, besti2 = 0, besti3 = 0;
 34 |     for (int k = 0; k < m; ++k) {
 35 |       float x = known[k * 3 + 0];
 36 |       float y = known[k * 3 + 1];
 37 |       float z = known[k * 3 + 2];
 38 |       float d = (ux - x) * (ux - x) + (uy - y) * (uy - y) + (uz - z) * (uz - z);
 39 |       if (d < best1) {
 40 |         best3 = best2;
 41 |         besti3 = besti2;
 42 |         best2 = best1;
 43 |         besti2 = besti1;
 44 |         best1 = d;
 45 |         besti1 = k;
 46 |       } else if (d < best2) {
 47 |         best3 = best2;
 48 |         besti3 = besti2;
 49 |         best2 = d;
 50 |         besti2 = k;
 51 |       } else if (d < best3) {
 52 |         best3 = d;
 53 |         besti3 = k;
 54 |       }
 55 |     }
 56 |     dist2[j * 3 + 0] = best1;
 57 |     dist2[j * 3 + 1] = best2;
 58 |     dist2[j * 3 + 2] = best3;
 59 | 
 60 |     idx[j * 3 + 0] = besti1;
 61 |     idx[j * 3 + 1] = besti2;
 62 |     idx[j * 3 + 2] = besti3;
 63 |   }
 64 | }
 65 | 
 66 | void three_nn_kernel_wrapper(int b, int n, int m, const float *unknown,
 67 |                              const float *known, float *dist2, int *idx) {
 68 |   cudaStream_t stream = at::cuda::getCurrentCUDAStream();
 69 |   three_nn_kernel<<<b, opt_n_threads(n), 0, stream>>>(b, n, m, unknown, known,
 70 |                                                       dist2, idx);
 71 | 
 72 |   CUDA_CHECK_ERRORS();
 73 | }
 74 | 
 75 | // input: points(b, c, m), idx(b, n, 3), weight(b, n, 3)
 76 | // output: out(b, c, n)
 77 | __global__ void three_interpolate_kernel(int b, int c, int m, int n,
 78 |                                          const float *__restrict__ points,
 79 |                                          const int *__restrict__ idx,
 80 |                                          const float *__restrict__ weight,
 81 |                                          float *__restrict__ out) {
 82 |   int batch_index = blockIdx.x;
 83 |   points += batch_index * m * c;
 84 | 
 85 |   idx += batch_index * n * 3;
 86 |   weight += batch_index * n * 3;
 87 | 
 88 |   out += batch_index * n * c;
 89 | 
 90 |   const int index = threadIdx.y * blockDim.x + threadIdx.x;
 91 |   const int stride = blockDim.y * blockDim.x;
 92 |   for (int i = index; i < c * n; i += stride) {
 93 |     const int l = i / n;
 94 |     const int j = i % n;
 95 |     float w1 = weight[j * 3 + 0];
 96 |     float w2 = weight[j * 3 + 1];
 97 |     float w3 = weight[j * 3 + 2];
 98 | 
 99 |     int i1 = idx[j * 3 + 0];
100 |     int i2 = idx[j * 3 + 1];
101 |     int i3 = idx[j * 3 + 2];
102 | 
103 |     out[i] = points[l * m + i1] * w1 + points[l * m + i2] * w2 +
104 |              points[l * m + i3] * w3;
105 |   }
106 | }
107 | 
108 | void three_interpolate_kernel_wrapper(int b, int c, int m, int n,
109 |                                       const float *points, const int *idx,
110 |                                       const float *weight, float *out) {
111 |   cudaStream_t stream = at::cuda::getCurrentCUDAStream();
112 |   three_interpolate_kernel<<<b, opt_block_config(n, c), 0, stream>>>(
113 |       b, c, m, n, points, idx, weight, out);
114 | 
115 |   CUDA_CHECK_ERRORS();
116 | }
117 | 
118 | // input: grad_out(b, c, n), idx(b, n, 3), weight(b, n, 3)
119 | // output: grad_points(b, c, m)
120 | 
121 | __global__ void three_interpolate_grad_kernel(
122 |     int b, int c, int n, int m, const float *__restrict__ grad_out,
123 |     const int *__restrict__ idx, const float *__restrict__ weight,
124 |     float *__restrict__ grad_points) {
125 |   int batch_index = blockIdx.x;
126 |   grad_out += batch_index * n * c;
127 |   idx += batch_index * n * 3;
128 |   weight += batch_index * n * 3;
129 |   grad_points += batch_index * m * c;
130 | 
131 |   const int index = threadIdx.y * blockDim.x + threadIdx.x;
132 |   const int stride = blockDim.y * blockDim.x;
133 |   for (int i = index; i < c * n; i += stride) {
134 |     const int l = i / n;
135 |     const int j = i % n;
136 |     float w1 = weight[j * 3 + 0];
137 |     float w2 = weight[j * 3 + 1];
138 |     float w3 = weight[j * 3 + 2];
139 | 
140 |     int i1 = idx[j * 3 + 0];
141 |     int i2 = idx[j * 3 + 1];
142 |     int i3 = idx[j * 3 + 2];
143 | 
144 |     atomicAdd(grad_points + l * m + i1, grad_out[i] * w1);
145 |     atomicAdd(grad_points + l * m + i2, grad_out[i] * w2);
146 |     atomicAdd(grad_points + l * m + i3, grad_out[i] * w3);
147 |   }
148 | }
149 | 
150 | void three_interpolate_grad_kernel_wrapper(int b, int c, int n, int m,
151 |                                            const float *grad_out,
152 |                                            const int *idx, const float *weight,
153 |                                            float *grad_points) {
154 |   cudaStream_t stream = at::cuda::getCurrentCUDAStream();
155 |   three_interpolate_grad_kernel<<<b, opt_block_config(n, c), 0, stream>>>(
156 |       b, c, n, m, grad_out, idx, weight, grad_points);
157 | 
158 |   CUDA_CHECK_ERRORS();
159 | }
160 | 


--------------------------------------------------------------------------------
/pointnet2/_ext_src/src/sampling.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates.
 2 | // 
 3 | // This source code is licensed under the MIT license found in the
 4 | // LICENSE file in the root directory of this source tree.
 5 | 
 6 | #include "sampling.h"
 7 | #include "utils.h"
 8 | 
 9 | void gather_points_kernel_wrapper(int b, int c, int n, int npoints,
10 |                                   const float *points, const int *idx,
11 |                                   float *out);
12 | void gather_points_grad_kernel_wrapper(int b, int c, int n, int npoints,
13 |                                        const float *grad_out, const int *idx,
14 |                                        float *grad_points);
15 | 
16 | void furthest_point_sampling_kernel_wrapper(int b, int n, int m,
17 |                                             const float *dataset, float *temp,
18 |                                             int *idxs);
19 | 
20 | at::Tensor gather_points(at::Tensor points, at::Tensor idx) {
21 |   CHECK_CONTIGUOUS(points);
22 |   CHECK_CONTIGUOUS(idx);
23 |   CHECK_IS_FLOAT(points);
24 |   CHECK_IS_INT(idx);
25 | 
26 |   if (points.type().is_cuda()) {
27 |     CHECK_CUDA(idx);
28 |   }
29 | 
30 |   at::Tensor output =
31 |       torch::zeros({points.size(0), points.size(1), idx.size(1)},
32 |                    at::device(points.device()).dtype(at::ScalarType::Float));
33 | 
34 |   if (points.type().is_cuda()) {
35 |     gather_points_kernel_wrapper(points.size(0), points.size(1), points.size(2),
36 |                                  idx.size(1), points.data<float>(),
37 |                                  idx.data<int>(), output.data<float>());
38 |   } else {
39 |     TORCH_CHECK(false, "CPU not supported");
40 |   }
41 | 
42 |   return output;
43 | }
44 | 
45 | at::Tensor gather_points_grad(at::Tensor grad_out, at::Tensor idx,
46 |                               const int n) {
47 |   CHECK_CONTIGUOUS(grad_out);
48 |   CHECK_CONTIGUOUS(idx);
49 |   CHECK_IS_FLOAT(grad_out);
50 |   CHECK_IS_INT(idx);
51 | 
52 |   if (grad_out.type().is_cuda()) {
53 |     CHECK_CUDA(idx);
54 |   }
55 | 
56 |   at::Tensor output =
57 |       torch::zeros({grad_out.size(0), grad_out.size(1), n},
58 |                    at::device(grad_out.device()).dtype(at::ScalarType::Float));
59 | 
60 |   if (grad_out.type().is_cuda()) {
61 |     gather_points_grad_kernel_wrapper(grad_out.size(0), grad_out.size(1), n,
62 |                                       idx.size(1), grad_out.data<float>(),
63 |                                       idx.data<int>(), output.data<float>());
64 |   } else {
65 |     TORCH_CHECK(false, "CPU not supported");
66 |   }
67 | 
68 |   return output;
69 | }
70 | at::Tensor furthest_point_sampling(at::Tensor points, const int nsamples) {
71 |   CHECK_CONTIGUOUS(points);
72 |   CHECK_IS_FLOAT(points);
73 | 
74 |   at::Tensor output =
75 |       torch::zeros({points.size(0), nsamples},
76 |                    at::device(points.device()).dtype(at::ScalarType::Int));
77 | 
78 |   at::Tensor tmp =
79 |       torch::full({points.size(0), points.size(1)}, 1e10,
80 |                   at::device(points.device()).dtype(at::ScalarType::Float));
81 | 
82 |   if (points.type().is_cuda()) {
83 |     furthest_point_sampling_kernel_wrapper(
84 |         points.size(0), points.size(1), nsamples, points.data<float>(),
85 |         tmp.data<float>(), output.data<int>());
86 |   } else {
87 |     TORCH_CHECK(false, "CPU not supported");
88 |   }
89 | 
90 |   return output;
91 | }
92 | 


--------------------------------------------------------------------------------
/pointnet2/_ext_src/src/sampling_gpu.cu:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Facebook, Inc. and its affiliates.
  2 | // 
  3 | // This source code is licensed under the MIT license found in the
  4 | // LICENSE file in the root directory of this source tree.
  5 | 
  6 | #include <stdio.h>
  7 | #include <stdlib.h>
  8 | 
  9 | #include "cuda_utils.h"
 10 | 
 11 | // input: points(b, c, n) idx(b, m)
 12 | // output: out(b, c, m)
 13 | __global__ void gather_points_kernel(int b, int c, int n, int m,
 14 |                                      const float *__restrict__ points,
 15 |                                      const int *__restrict__ idx,
 16 |                                      float *__restrict__ out) {
 17 |   for (int i = blockIdx.x; i < b; i += gridDim.x) {
 18 |     for (int l = blockIdx.y; l < c; l += gridDim.y) {
 19 |       for (int j = threadIdx.x; j < m; j += blockDim.x) {
 20 |         int a = idx[i * m + j];
 21 |         out[(i * c + l) * m + j] = points[(i * c + l) * n + a];
 22 |       }
 23 |     }
 24 |   }
 25 | }
 26 | 
 27 | void gather_points_kernel_wrapper(int b, int c, int n, int npoints,
 28 |                                   const float *points, const int *idx,
 29 |                                   float *out) {
 30 |   gather_points_kernel<<<dim3(b, c, 1), opt_n_threads(npoints), 0,
 31 |                          at::cuda::getCurrentCUDAStream()>>>(b, c, n, npoints,
 32 |                                                              points, idx, out);
 33 | 
 34 |   CUDA_CHECK_ERRORS();
 35 | }
 36 | 
 37 | // input: grad_out(b, c, m) idx(b, m)
 38 | // output: grad_points(b, c, n)
 39 | __global__ void gather_points_grad_kernel(int b, int c, int n, int m,
 40 |                                           const float *__restrict__ grad_out,
 41 |                                           const int *__restrict__ idx,
 42 |                                           float *__restrict__ grad_points) {
 43 |   for (int i = blockIdx.x; i < b; i += gridDim.x) {
 44 |     for (int l = blockIdx.y; l < c; l += gridDim.y) {
 45 |       for (int j = threadIdx.x; j < m; j += blockDim.x) {
 46 |         int a = idx[i * m + j];
 47 |         atomicAdd(grad_points + (i * c + l) * n + a,
 48 |                   grad_out[(i * c + l) * m + j]);
 49 |       }
 50 |     }
 51 |   }
 52 | }
 53 | 
 54 | void gather_points_grad_kernel_wrapper(int b, int c, int n, int npoints,
 55 |                                        const float *grad_out, const int *idx,
 56 |                                        float *grad_points) {
 57 |   gather_points_grad_kernel<<<dim3(b, c, 1), opt_n_threads(npoints), 0,
 58 |                               at::cuda::getCurrentCUDAStream()>>>(
 59 |       b, c, n, npoints, grad_out, idx, grad_points);
 60 | 
 61 |   CUDA_CHECK_ERRORS();
 62 | }
 63 | 
 64 | __device__ void __update(float *__restrict__ dists, int *__restrict__ dists_i,
 65 |                          int idx1, int idx2) {
 66 |   const float v1 = dists[idx1], v2 = dists[idx2];
 67 |   const int i1 = dists_i[idx1], i2 = dists_i[idx2];
 68 |   dists[idx1] = max(v1, v2);
 69 |   dists_i[idx1] = v2 > v1 ? i2 : i1;
 70 | }
 71 | 
 72 | // Input dataset: (b, n, 3), tmp: (b, n)
 73 | // Ouput idxs (b, m)
 74 | template <unsigned int block_size>
 75 | __global__ void furthest_point_sampling_kernel(
 76 |     int b, int n, int m, const float *__restrict__ dataset,
 77 |     float *__restrict__ temp, int *__restrict__ idxs) {
 78 |   if (m <= 0) return;
 79 |   __shared__ float dists[block_size];
 80 |   __shared__ int dists_i[block_size];
 81 | 
 82 |   int batch_index = blockIdx.x;
 83 |   dataset += batch_index * n * 3;
 84 |   temp += batch_index * n;
 85 |   idxs += batch_index * m;
 86 | 
 87 |   int tid = threadIdx.x;
 88 |   const int stride = block_size;
 89 | 
 90 |   int old = 0;
 91 |   if (threadIdx.x == 0) idxs[0] = old;
 92 | 
 93 |   __syncthreads();
 94 |   for (int j = 1; j < m; j++) {
 95 |     int besti = 0;
 96 |     float best = -1;
 97 |     float x1 = dataset[old * 3 + 0];
 98 |     float y1 = dataset[old * 3 + 1];
 99 |     float z1 = dataset[old * 3 + 2];
100 |     for (int k = tid; k < n; k += stride) {
101 |       float x2, y2, z2;
102 |       x2 = dataset[k * 3 + 0];
103 |       y2 = dataset[k * 3 + 1];
104 |       z2 = dataset[k * 3 + 2];
105 |       float mag = (x2 * x2) + (y2 * y2) + (z2 * z2);
106 |       if (mag <= 1e-3) continue;
107 | 
108 |       float d =
109 |           (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1) + (z2 - z1) * (z2 - z1);
110 | 
111 |       float d2 = min(d, temp[k]);
112 |       temp[k] = d2;
113 |       besti = d2 > best ? k : besti;
114 |       best = d2 > best ? d2 : best;
115 |     }
116 |     dists[tid] = best;
117 |     dists_i[tid] = besti;
118 |     __syncthreads();
119 | 
120 |     if (block_size >= 512) {
121 |       if (tid < 256) {
122 |         __update(dists, dists_i, tid, tid + 256);
123 |       }
124 |       __syncthreads();
125 |     }
126 |     if (block_size >= 256) {
127 |       if (tid < 128) {
128 |         __update(dists, dists_i, tid, tid + 128);
129 |       }
130 |       __syncthreads();
131 |     }
132 |     if (block_size >= 128) {
133 |       if (tid < 64) {
134 |         __update(dists, dists_i, tid, tid + 64);
135 |       }
136 |       __syncthreads();
137 |     }
138 |     if (block_size >= 64) {
139 |       if (tid < 32) {
140 |         __update(dists, dists_i, tid, tid + 32);
141 |       }
142 |       __syncthreads();
143 |     }
144 |     if (block_size >= 32) {
145 |       if (tid < 16) {
146 |         __update(dists, dists_i, tid, tid + 16);
147 |       }
148 |       __syncthreads();
149 |     }
150 |     if (block_size >= 16) {
151 |       if (tid < 8) {
152 |         __update(dists, dists_i, tid, tid + 8);
153 |       }
154 |       __syncthreads();
155 |     }
156 |     if (block_size >= 8) {
157 |       if (tid < 4) {
158 |         __update(dists, dists_i, tid, tid + 4);
159 |       }
160 |       __syncthreads();
161 |     }
162 |     if (block_size >= 4) {
163 |       if (tid < 2) {
164 |         __update(dists, dists_i, tid, tid + 2);
165 |       }
166 |       __syncthreads();
167 |     }
168 |     if (block_size >= 2) {
169 |       if (tid < 1) {
170 |         __update(dists, dists_i, tid, tid + 1);
171 |       }
172 |       __syncthreads();
173 |     }
174 | 
175 |     old = dists_i[0];
176 |     if (tid == 0) idxs[j] = old;
177 |   }
178 | }
179 | 
180 | void furthest_point_sampling_kernel_wrapper(int b, int n, int m,
181 |                                             const float *dataset, float *temp,
182 |                                             int *idxs) {
183 |   unsigned int n_threads = opt_n_threads(n);
184 | 
185 |   cudaStream_t stream = at::cuda::getCurrentCUDAStream();
186 | 
187 |   switch (n_threads) {
188 |     case 512:
189 |       furthest_point_sampling_kernel<512>
190 |           <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
191 |       break;
192 |     case 256:
193 |       furthest_point_sampling_kernel<256>
194 |           <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
195 |       break;
196 |     case 128:
197 |       furthest_point_sampling_kernel<128>
198 |           <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
199 |       break;
200 |     case 64:
201 |       furthest_point_sampling_kernel<64>
202 |           <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
203 |       break;
204 |     case 32:
205 |       furthest_point_sampling_kernel<32>
206 |           <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
207 |       break;
208 |     case 16:
209 |       furthest_point_sampling_kernel<16>
210 |           <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
211 |       break;
212 |     case 8:
213 |       furthest_point_sampling_kernel<8>
214 |           <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
215 |       break;
216 |     case 4:
217 |       furthest_point_sampling_kernel<4>
218 |           <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
219 |       break;
220 |     case 2:
221 |       furthest_point_sampling_kernel<2>
222 |           <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
223 |       break;
224 |     case 1:
225 |       furthest_point_sampling_kernel<1>
226 |           <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
227 |       break;
228 |     default:
229 |       furthest_point_sampling_kernel<512>
230 |           <<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
231 |   }
232 | 
233 |   CUDA_CHECK_ERRORS();
234 | }
235 | 


--------------------------------------------------------------------------------
/pointnet2/pytorch_utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | # 
  3 | # This source code is licensed under the MIT license found in the
  4 | # LICENSE file in the root directory of this source tree.
  5 | 
  6 | ''' Modified based on Ref: https://github.com/erikwijmans/Pointnet2_PyTorch '''
  7 | import torch
  8 | import torch.nn as nn
  9 | from typing import List, Tuple
 10 | 
 11 | class SharedMLP(nn.Sequential):
 12 | 
 13 |     def __init__(
 14 |             self,
 15 |             args: List[int],
 16 |             *,
 17 |             bn: bool = False,
 18 |             activation=nn.ReLU(inplace=True),
 19 |             preact: bool = False,
 20 |             first: bool = False,
 21 |             name: str = ""
 22 |     ):
 23 |         super().__init__()
 24 | 
 25 |         for i in range(len(args) - 1):
 26 |             self.add_module(
 27 |                 name + 'layer{}'.format(i),
 28 |                 Conv2d(
 29 |                     args[i],
 30 |                     args[i + 1],
 31 |                     bn=(not first or not preact or (i != 0)) and bn,
 32 |                     activation=activation
 33 |                     if (not first or not preact or (i != 0)) else None,
 34 |                     preact=preact
 35 |                 )
 36 |             )
 37 | 
 38 | 
 39 | class _BNBase(nn.Sequential):
 40 | 
 41 |     def __init__(self, in_size, batch_norm=None, name=""):
 42 |         super().__init__()
 43 |         self.add_module(name + "bn", batch_norm(in_size))
 44 | 
 45 |         nn.init.constant_(self[0].weight, 1.0)
 46 |         nn.init.constant_(self[0].bias, 0)
 47 | 
 48 | 
 49 | class BatchNorm1d(_BNBase):
 50 | 
 51 |     def __init__(self, in_size: int, *, name: str = ""):
 52 |         super().__init__(in_size, batch_norm=nn.BatchNorm1d, name=name)
 53 | 
 54 | 
 55 | class BatchNorm2d(_BNBase):
 56 | 
 57 |     def __init__(self, in_size: int, name: str = ""):
 58 |         super().__init__(in_size, batch_norm=nn.BatchNorm2d, name=name)
 59 | 
 60 | 
 61 | class BatchNorm3d(_BNBase):
 62 | 
 63 |     def __init__(self, in_size: int, name: str = ""):
 64 |         super().__init__(in_size, batch_norm=nn.BatchNorm3d, name=name)
 65 | 
 66 | 
 67 | class _ConvBase(nn.Sequential):
 68 | 
 69 |     def __init__(
 70 |             self,
 71 |             in_size,
 72 |             out_size,
 73 |             kernel_size,
 74 |             stride,
 75 |             padding,
 76 |             activation,
 77 |             bn,
 78 |             init,
 79 |             conv=None,
 80 |             batch_norm=None,
 81 |             bias=True,
 82 |             preact=False,
 83 |             name=""
 84 |     ):
 85 |         super().__init__()
 86 | 
 87 |         bias = bias and (not bn)
 88 |         conv_unit = conv(
 89 |             in_size,
 90 |             out_size,
 91 |             kernel_size=kernel_size,
 92 |             stride=stride,
 93 |             padding=padding,
 94 |             bias=bias
 95 |         )
 96 |         init(conv_unit.weight)
 97 |         if bias:
 98 |             nn.init.constant_(conv_unit.bias, 0)
 99 | 
100 |         if bn:
101 |             if not preact:
102 |                 bn_unit = batch_norm(out_size)
103 |             else:
104 |                 bn_unit = batch_norm(in_size)
105 | 
106 |         if preact:
107 |             if bn:
108 |                 self.add_module(name + 'bn', bn_unit)
109 | 
110 |             if activation is not None:
111 |                 self.add_module(name + 'activation', activation)
112 | 
113 |         self.add_module(name + 'conv', conv_unit)
114 | 
115 |         if not preact:
116 |             if bn:
117 |                 self.add_module(name + 'bn', bn_unit)
118 | 
119 |             if activation is not None:
120 |                 self.add_module(name + 'activation', activation)
121 | 
122 | 
123 | class Conv1d(_ConvBase):
124 | 
125 |     def __init__(
126 |             self,
127 |             in_size: int,
128 |             out_size: int,
129 |             *,
130 |             kernel_size: int = 1,
131 |             stride: int = 1,
132 |             padding: int = 0,
133 |             activation=nn.ReLU(inplace=True),
134 |             bn: bool = False,
135 |             init=nn.init.kaiming_normal_,
136 |             bias: bool = True,
137 |             preact: bool = False,
138 |             name: str = ""
139 |     ):
140 |         super().__init__(
141 |             in_size,
142 |             out_size,
143 |             kernel_size,
144 |             stride,
145 |             padding,
146 |             activation,
147 |             bn,
148 |             init,
149 |             conv=nn.Conv1d,
150 |             batch_norm=BatchNorm1d,
151 |             bias=bias,
152 |             preact=preact,
153 |             name=name
154 |         )
155 | 
156 | 
157 | class Conv2d(_ConvBase):
158 | 
159 |     def __init__(
160 |             self,
161 |             in_size: int,
162 |             out_size: int,
163 |             *,
164 |             kernel_size: Tuple[int, int] = (1, 1),
165 |             stride: Tuple[int, int] = (1, 1),
166 |             padding: Tuple[int, int] = (0, 0),
167 |             activation=nn.ReLU(inplace=True),
168 |             bn: bool = False,
169 |             init=nn.init.kaiming_normal_,
170 |             bias: bool = True,
171 |             preact: bool = False,
172 |             name: str = ""
173 |     ):
174 |         super().__init__(
175 |             in_size,
176 |             out_size,
177 |             kernel_size,
178 |             stride,
179 |             padding,
180 |             activation,
181 |             bn,
182 |             init,
183 |             conv=nn.Conv2d,
184 |             batch_norm=BatchNorm2d,
185 |             bias=bias,
186 |             preact=preact,
187 |             name=name
188 |         )
189 | 
190 | 
191 | class Conv3d(_ConvBase):
192 | 
193 |     def __init__(
194 |             self,
195 |             in_size: int,
196 |             out_size: int,
197 |             *,
198 |             kernel_size: Tuple[int, int, int] = (1, 1, 1),
199 |             stride: Tuple[int, int, int] = (1, 1, 1),
200 |             padding: Tuple[int, int, int] = (0, 0, 0),
201 |             activation=nn.ReLU(inplace=True),
202 |             bn: bool = False,
203 |             init=nn.init.kaiming_normal_,
204 |             bias: bool = True,
205 |             preact: bool = False,
206 |             name: str = ""
207 |     ):
208 |         super().__init__(
209 |             in_size,
210 |             out_size,
211 |             kernel_size,
212 |             stride,
213 |             padding,
214 |             activation,
215 |             bn,
216 |             init,
217 |             conv=nn.Conv3d,
218 |             batch_norm=BatchNorm3d,
219 |             bias=bias,
220 |             preact=preact,
221 |             name=name
222 |         )
223 | 
224 | 
225 | class FC(nn.Sequential):
226 | 
227 |     def __init__(
228 |             self,
229 |             in_size: int,
230 |             out_size: int,
231 |             *,
232 |             activation=nn.ReLU(inplace=True),
233 |             bn: bool = False,
234 |             init=None,
235 |             preact: bool = False,
236 |             name: str = ""
237 |     ):
238 |         super().__init__()
239 | 
240 |         fc = nn.Linear(in_size, out_size, bias=not bn)
241 |         if init is not None:
242 |             init(fc.weight)
243 |         if not bn:
244 |             nn.init.constant_(fc.bias, 0)
245 | 
246 |         if preact:
247 |             if bn:
248 |                 self.add_module(name + 'bn', BatchNorm1d(in_size))
249 | 
250 |             if activation is not None:
251 |                 self.add_module(name + 'activation', activation)
252 | 
253 |         self.add_module(name + 'fc', fc)
254 | 
255 |         if not preact:
256 |             if bn:
257 |                 self.add_module(name + 'bn', BatchNorm1d(out_size))
258 | 
259 |             if activation is not None:
260 |                 self.add_module(name + 'activation', activation)
261 | 
262 | def set_bn_momentum_default(bn_momentum):
263 | 
264 |     def fn(m):
265 |         if isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d)):
266 |             m.momentum = bn_momentum
267 | 
268 |     return fn
269 | 
270 | 
271 | class BNMomentumScheduler(object):
272 | 
273 |     def __init__(
274 |             self, model, bn_lambda, last_epoch=-1,
275 |             setter=set_bn_momentum_default
276 |     ):
277 |         if not isinstance(model, nn.Module):
278 |             raise RuntimeError(
279 |                 "Class '{}' is not a PyTorch nn Module".format(
280 |                     type(model).__name__
281 |                 )
282 |             )
283 | 
284 |         self.model = model
285 |         self.setter = setter
286 |         self.lmbd = bn_lambda
287 | 
288 |         self.step(last_epoch + 1)
289 |         self.last_epoch = last_epoch
290 | 
291 |     def step(self, epoch=None):
292 |         if epoch is None:
293 |             epoch = self.last_epoch + 1
294 | 
295 |         self.last_epoch = epoch
296 |         self.model.apply(self.setter(self.lmbd(epoch)))
297 | 
298 | 
299 | 


--------------------------------------------------------------------------------
/pointnet2/setup.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # 
 3 | # This source code is licensed under the MIT license found in the
 4 | # LICENSE file in the root directory of this source tree.
 5 | 
 6 | from setuptools import setup
 7 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
 8 | import glob
 9 | import os
10 | ROOT = os.path.dirname(os.path.abspath(__file__))
11 | 
12 | _ext_src_root = "_ext_src"
13 | _ext_sources = glob.glob("{}/src/*.cpp".format(_ext_src_root)) + glob.glob(
14 |     "{}/src/*.cu".format(_ext_src_root)
15 | )
16 | _ext_headers = glob.glob("{}/include/*".format(_ext_src_root))
17 | 
18 | setup(
19 |     name='pointnet2',
20 |     ext_modules=[
21 |         CUDAExtension(
22 |             name='pointnet2._ext',
23 |             sources=_ext_sources,
24 |             extra_compile_args={
25 |                 "cxx": ["-O2", "-I{}".format("{}/{}/include".format(ROOT, _ext_src_root))],
26 |                 "nvcc": ["-O2", "-I{}".format("{}/{}/include".format(ROOT, _ext_src_root))],
27 |             },
28 |         )
29 |     ],
30 |     cmdclass={
31 |         'build_ext': BuildExtension
32 |     }
33 | )
34 | 


--------------------------------------------------------------------------------
/real/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TX-Leo/Graspness_6dof_robotic_grasping/bf2d0977272831834ea0edde0d426954ad6c4c8d/real/.DS_Store


--------------------------------------------------------------------------------
/real/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TX-Leo/Graspness_6dof_robotic_grasping/bf2d0977272831834ea0edde0d426954ad6c4c8d/real/__init__.py


--------------------------------------------------------------------------------
/real/calibrate.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import matplotlib.pyplot as plt
  4 | import numpy as np
  5 | import time
  6 | import cv2
  7 | from UR_Robot import UR_Robot
  8 | from scipy import optimize  
  9 | from mpl_toolkits.mplot3d import Axes3D  
 10 | 
 11 | 
 12 | # User options (change me)
 13 | # --------------- Setup options ---------------
 14 | tcp_host_ip = '192.168.50.100' # IP and port to robot arm as TCP client (UR5)
 15 | tcp_port = 30003
 16 | # workspace_limits = np.asarray([[0.3, 0.75], [0.05, 0.4], [-0.2, -0.1]]) # Cols: min max, Rows: x y z (define workspace limits in robot coordinates)
 17 | # workspace_limits = np.asarray([[0.35, 0.55], [0, 0.35], [0.15, 0.25]])
 18 | workspace_limits = np.asarray([[0.2, 0.4], [0.4, 0.6], [0.05, 0.1]])
 19 | 
 20 | calib_grid_step = 0.05 #0.05
 21 | #checkerboard_offset_from_tool = [0,-0.13,0.02]  # change me!
 22 | checkerboard_offset_from_tool = [0,0.121,0]
 23 | tool_orientation = [-np.pi/2,0,0] # [0,-2.22,2.22] # [2.22,2.22,0]
 24 | # tool_orientation = [np.pi/2,np.pi/2,np.pi/2]
 25 | #---------------------------------------------
 26 | 
 27 | 
 28 | # Construct 3D calibration grid across workspace
 29 | print(1 + (workspace_limits[0][1] - workspace_limits[0][0])/calib_grid_step)
 30 | gridspace_x = np.linspace(workspace_limits[0][0], workspace_limits[0][1], int(1 + (workspace_limits[0][1] - workspace_limits[0][0])/calib_grid_step))
 31 | gridspace_y = np.linspace(workspace_limits[1][0], workspace_limits[1][1], int(1 + (workspace_limits[1][1] - workspace_limits[1][0])/calib_grid_step))
 32 | gridspace_z = np.linspace(workspace_limits[2][0], workspace_limits[2][1], int(1 + (workspace_limits[2][1] - workspace_limits[2][0])/calib_grid_step))
 33 | calib_grid_x, calib_grid_y, calib_grid_z = np.meshgrid(gridspace_x, gridspace_y, gridspace_z)
 34 | num_calib_grid_pts = calib_grid_x.shape[0]*calib_grid_x.shape[1]*calib_grid_x.shape[2]
 35 | 
 36 | calib_grid_x.shape = (num_calib_grid_pts,1)
 37 | calib_grid_y.shape = (num_calib_grid_pts,1)
 38 | calib_grid_z.shape = (num_calib_grid_pts,1)
 39 | calib_grid_pts = np.concatenate((calib_grid_x, calib_grid_y, calib_grid_z), axis=1)
 40 | 
 41 | measured_pts = []
 42 | observed_pts = []
 43 | observed_pix = []
 44 | 
 45 | # Move robot to home pose
 46 | print('Connecting to robot...')
 47 | robot = UR_Robot(tcp_host_ip,tcp_port,workspace_limits,is_use_robotiq85=False)
 48 | # robot.open_gripper()
 49 | 
 50 | # Slow down robot
 51 | robot.joint_acc = 1.4
 52 | robot.joint_vel = 1.05
 53 | 
 54 | # Make robot gripper point upwards
 55 | robot.move_j([-np.pi, -np.pi/2, np.pi/2, 0, np.pi/2, np.pi])
 56 | # self.home_joint_config = [-(0 / 360.0) * 2 * np.pi, -(90 / 360.0) * 2 * np.pi,
 57 | #                           (0 / 360.0) * 2 * np.pi, -(90 / 360.0) * 2 * np.pi,
 58 | #                           -(0 / 360.0) * 2 * np.pi, 0.0]
 59 | # [0,-np.pi/2,0,-np.pi/2,0,0]
 60 | # Move robot to each calibration point in workspace
 61 | print('Collecting data...')
 62 | for calib_pt_idx in range(num_calib_grid_pts):
 63 |     tool_position = calib_grid_pts[calib_pt_idx,:]
 64 |     tool_config = [tool_position[0],tool_position[1],tool_position[2],
 65 |            tool_orientation[0],tool_orientation[1],tool_orientation[2]]
 66 |     tool_config1 = [tool_position[0], tool_position[1], tool_position[2],
 67 |                    tool_orientation[0], tool_orientation[1], tool_orientation[2]]
 68 |     print(f"tool position and orientation:{tool_config1}")
 69 |     robot.move_j_p(tool_config)
 70 |     time.sleep(2)  # k
 71 |     
 72 |     # Find checkerboard center
 73 |     checkerboard_size = (5,5)
 74 |     refine_criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001)
 75 |     camera_color_img, camera_depth_img = robot.get_camera_data()
 76 |     bgr_color_data = cv2.cvtColor(camera_color_img, cv2.COLOR_RGB2BGR)
 77 |     gray_data = cv2.cvtColor(bgr_color_data, cv2.COLOR_RGB2GRAY)
 78 |     checkerboard_found, corners = cv2.findChessboardCorners(gray_data, checkerboard_size, None, cv2.CALIB_CB_ADAPTIVE_THRESH)
 79 |     print(checkerboard_found)
 80 |     if checkerboard_found:
 81 |         corners_refined = cv2.cornerSubPix(gray_data, corners, (5,5), (-1,-1), refine_criteria)
 82 | 
 83 |         # Get observed checkerboard center 3D point in camera space
 84 |         checkerboard_pix = np.round(corners_refined[12,0,:]).astype(int)
 85 |         checkerboard_z = camera_depth_img[checkerboard_pix[1]][checkerboard_pix[0]]
 86 |         checkerboard_x = np.multiply(checkerboard_pix[0]-robot.cam_intrinsics[0][2],checkerboard_z/robot.cam_intrinsics[0][0])
 87 |         checkerboard_y = np.multiply(checkerboard_pix[1]-robot.cam_intrinsics[1][2],checkerboard_z/robot.cam_intrinsics[1][1])
 88 |         if checkerboard_z == 0:
 89 |             continue
 90 | 
 91 |         # Save calibration point and observed checkerboard center
 92 |         observed_pts.append([checkerboard_x,checkerboard_y,checkerboard_z])
 93 |         # tool_position[2] += checkerboard_offset_from_tool
 94 |         tool_position = tool_position + checkerboard_offset_from_tool
 95 | 
 96 |         measured_pts.append(tool_position)
 97 |         observed_pix.append(checkerboard_pix)
 98 | 
 99 |         # Draw and display the corners
100 |         # vis = cv2.drawChessboardCorners(robot.camera.color_data, checkerboard_size, corners_refined, checkerboard_found)
101 |         vis = cv2.drawChessboardCorners(bgr_color_data, (1,1), corners_refined[12,:,:], checkerboard_found)
102 |         cv2.imwrite('%06d.png' % len(measured_pts), vis)
103 |         cv2.imshow('Calibration',vis)
104 |         cv2.waitKey(1000)
105 | 
106 | # Move robot back to home pose
107 | # robot.go_home()
108 | 
109 | measured_pts = np.asarray(measured_pts)
110 | observed_pts = np.asarray(observed_pts)
111 | observed_pix = np.asarray(observed_pix)
112 | world2camera = np.eye(4)
113 | 
114 | # Estimate rigid transform with SVD (from Nghia Ho)
115 | def get_rigid_transform(A, B):
116 |     assert len(A) == len(B)
117 |     N = A.shape[0] # Total points
118 |     centroid_A = np.mean(A, axis=0)
119 |     centroid_B = np.mean(B, axis=0)
120 |     AA = A - np.tile(centroid_A, (N, 1)) # Centre the points
121 |     BB = B - np.tile(centroid_B, (N, 1))
122 |     H = np.dot(np.transpose(AA), BB) # Dot is matrix multiplication for array
123 |     U, S, Vt = np.linalg.svd(H)
124 |     R = np.dot(Vt.T, U.T)
125 |     if np.linalg.det(R) < 0: # Special reflection case
126 |        Vt[2,:] *= -1
127 |        R = np.dot(Vt.T, U.T)
128 |     t = np.dot(-R, centroid_A.T) + centroid_B.T
129 |     return R, t
130 | 
131 | def get_rigid_transform_error(z_scale):
132 |     global measured_pts, observed_pts, observed_pix, world2camera, camera
133 | 
134 |     # Apply z offset and compute new observed points using camera intrinsics
135 |     observed_z = observed_pts[:,2:] * z_scale
136 |     observed_x = np.multiply(observed_pix[:,[0]]-robot.cam_intrinsics[0][2],observed_z/robot.cam_intrinsics[0][0])
137 |     observed_y = np.multiply(observed_pix[:,[1]]-robot.cam_intrinsics[1][2],observed_z/robot.cam_intrinsics[1][1])
138 |     new_observed_pts = np.concatenate((observed_x, observed_y, observed_z), axis=1)
139 | 
140 |     # Estimate rigid transform between measured points and new observed points
141 |     R, t = get_rigid_transform(np.asarray(measured_pts), np.asarray(new_observed_pts))
142 |     t.shape = (3,1)
143 |     world2camera = np.concatenate((np.concatenate((R, t), axis=1),np.array([[0, 0, 0, 1]])), axis=0)
144 | 
145 |     # Compute rigid transform error
146 |     registered_pts = np.dot(R,np.transpose(measured_pts)) + np.tile(t,(1,measured_pts.shape[0]))
147 |     error = np.transpose(registered_pts) - new_observed_pts
148 |     error = np.sum(np.multiply(error,error))
149 |     rmse = np.sqrt(error/measured_pts.shape[0])
150 |     return rmse
151 | 
152 | # Optimize z scale w.r.t. rigid transform error
153 | print('Calibrating...')
154 | z_scale_init = 1
155 | optim_result = optimize.minimize(get_rigid_transform_error, np.asarray(z_scale_init), method='Nelder-Mead')
156 | camera_depth_offset = optim_result.x
157 | 
158 | # Save camera optimized offset and camera pose
159 | print('Saving...')
160 | np.savetxt('camera_depth_scale.txt', camera_depth_offset, delimiter=' ')
161 | get_rigid_transform_error(camera_depth_offset)
162 | camera_pose = np.linalg.inv(world2camera)
163 | np.savetxt('camera_pose.txt', camera_pose, delimiter=' ')
164 | print('Done.')
165 | 
166 | # DEBUG CODE -----------------------------------------------------------------------------------
167 | 
168 | # np.savetxt('measured_pts.txt', np.asarray(measured_pts), delimiter=' ')
169 | # np.savetxt('observed_pts.txt', np.asarray(observed_pts), delimiter=' ')
170 | # np.savetxt('observed_pix.txt', np.asarray(observed_pix), delimiter=' ')
171 | # measured_pts = np.loadtxt('measured_pts.txt', delimiter=' ')
172 | # observed_pts = np.loadtxt('observed_pts.txt', delimiter=' ')
173 | # observed_pix = np.loadtxt('observed_pix.txt', delimiter=' ')
174 | 
175 | # fig = plt.figure()
176 | # ax = fig.add_subplot(111, projection='3d')
177 | # ax.scatter(measured_pts[:,0],measured_pts[:,1],measured_pts[:,2], c='blue')
178 | 
179 | # print(camera_depth_offset)
180 | # R, t = get_rigid_transform(np.asarray(measured_pts), np.asarray(observed_pts))
181 | # t.shape = (3,1)
182 | # camera_pose = np.concatenate((np.concatenate((R, t), axis=1),np.array([[0, 0, 0, 1]])), axis=0)
183 | # camera2robot = np.linalg.inv(camera_pose)
184 | # t_observed_pts = np.transpose(np.dot(camera2robot[0:3,0:3],np.transpose(observed_pts)) + np.tile(camera2robot[0:3,3:],(1,observed_pts.shape[0])))
185 | 
186 | # ax.scatter(t_observed_pts[:,0],t_observed_pts[:,1],t_observed_pts[:,2], c='red')
187 | 
188 | # new_observed_pts = observed_pts.copy()
189 | # new_observed_pts[:,2] = new_observed_pts[:,2] * camera_depth_offset[0]
190 | # R, t = get_rigid_transform(np.asarray(measured_pts), np.asarray(new_observed_pts))
191 | # t.shape = (3,1)
192 | # camera_pose = np.concatenate((np.concatenate((R, t), axis=1),np.array([[0, 0, 0, 1]])), axis=0)
193 | # camera2robot = np.linalg.inv(camera_pose)
194 | # t_new_observed_pts = np.transpose(np.dot(camera2robot[0:3,0:3],np.transpose(new_observed_pts)) + np.tile(camera2robot[0:3,3:],(1,new_observed_pts.shape[0])))
195 | 
196 | # ax.scatter(t_new_observed_pts[:,0],t_new_observed_pts[:,1],t_new_observed_pts[:,2], c='green')
197 | 
198 | # plt.show()


--------------------------------------------------------------------------------
/real/cam_pose/camera_depth_scale.txt:
--------------------------------------------------------------------------------
1 | 9.948730468741125155e-04
2 | 


--------------------------------------------------------------------------------
/real/cam_pose/camera_pose.txt:
--------------------------------------------------------------------------------
1 | -9.766495306940656818e-01 -1.773662733337518504e-01 1.212307686965546361e-01 5.543679023074974399e-01
2 | -2.041509203668498418e-01 9.419546311403927952e-01 -2.665405683691230987e-01 5.960364476569384173e-01
3 | -6.691857670653790502e-02 -2.850660940148158073e-01 -9.561690363814899785e-01 8.550696319465057593e-01
4 | 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00
5 | 


--------------------------------------------------------------------------------
/real/camera_depth_scale.txt:
--------------------------------------------------------------------------------
1 | 9.948730468741125155e-04
2 | 


--------------------------------------------------------------------------------
/real/camera_pose.txt:
--------------------------------------------------------------------------------
1 | -9.766495306940656818e-01 -1.773662733337518504e-01 1.212307686965546361e-01 5.543679023074974399e-01
2 | -2.041509203668498418e-01 9.419546311403927952e-01 -2.665405683691230987e-01 5.960364476569384173e-01
3 | -6.691857670653790502e-02 -2.850660940148158073e-01 -9.561690363814899785e-01 8.550696319465057593e-01
4 | 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00
5 | 


--------------------------------------------------------------------------------
/real/capture.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import time
 4 | import matplotlib.pyplot as plt
 5 | from realsenseD435 import RealsenseD435
 6 | # import pyrealsense2 as rs
 7 | 
 8 | camera = RealsenseD435()
 9 | time.sleep(1) # Give camera some time to load data
10 | 
11 | # while True:
12 | #     color_img, depth_img = camera.get_data()
13 | #     plt.subplot(211)
14 | #     plt.imshow(color_img)
15 | #     plt.subplot(212)
16 | #     plt.imshow(depth_img)
17 | #     plt.show()
18 | color_img, depth_img = camera.get_data()
19 | plt.subplot(211)
20 | plt.imshow(color_img)
21 | plt.subplot(212)
22 | plt.imshow(depth_img)
23 | plt.show()


--------------------------------------------------------------------------------
/real/color_image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TX-Leo/Graspness_6dof_robotic_grasping/bf2d0977272831834ea0edde0d426954ad6c4c8d/real/color_image.png


--------------------------------------------------------------------------------
/real/realsenseD415.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pyrealsense2 as rs
 3 | import cv2
 4 | 
 5 | class Camera(object):
 6 | 
 7 |     def __init__(self,width=640,height=480,fps=15):
 8 |         self.im_height = height
 9 |         self.im_width = width
10 |         self.fps = fps
11 |         self.intrinsics = None
12 |         self.scale = None
13 |         self.pipeline = None
14 |         self.connect()
15 |         # color_img, depth_img = self.get_data()
16 |         #print(color_img, depth_img)
17 | 
18 | 
19 |     def connect(self):
20 |         # Configure depth and color streams
21 |         self.pipeline = rs.pipeline()
22 |         config = rs.config()
23 |         config.enable_stream(rs.stream.depth, self.im_width, self.im_height, rs.format.z16, self.fps)
24 |         config.enable_stream(rs.stream.color, self.im_width, self.im_height, rs.format.bgr8, self.fps)
25 | 
26 |         # Start streaming
27 |         cfg = self.pipeline.start(config)
28 | 
29 |         # Determine intrinsics
30 |         rgb_profile = cfg.get_stream(rs.stream.color)
31 |         self.intrinsics = self.get_intrinsics(rgb_profile)
32 |         # Determine depth scale
33 |         self.scale = cfg.get_device().first_depth_sensor().get_depth_scale()
34 |         print("camera depth scale:",self.scale)
35 |         print("D415 have connected ...")
36 |     def get_data(self):
37 |         # Wait for a coherent pair of frames: depth and color
38 |         frames = self.pipeline.wait_for_frames()
39 | 
40 |         # align
41 |         align = rs.align(align_to=rs.stream.color)
42 |         aligned_frames = align.process(frames)
43 |         aligned_depth_frame = aligned_frames.get_depth_frame()
44 |         color_frame = aligned_frames.get_color_frame()
45 |         # no align
46 |         # depth_frame = frames.get_depth_frame()
47 |         # color_frame = frames.get_color_frame()
48 | 
49 |         # Convert images to numpy arrays
50 |         depth_image = np.asanyarray(aligned_depth_frame.get_data())
51 |         color_image = np.asanyarray(color_frame.get_data())
52 |         return color_image, depth_image
53 | 
54 |     def plot_image(self):
55 |         color_image,depth_image = self.get_data()
56 |         # Apply colormap on depth image (image must be converted to 8-bit per pixel first)
57 |         depth_colormap = cv2.applyColorMap(cv2.convertScaleAbs(depth_image, alpha=0.03), cv2.COLORMAP_JET)
58 | 
59 |         depth_colormap_dim = depth_colormap.shape
60 |         color_colormap_dim = color_image.shape
61 | 
62 |         # If depth and color resolutions are different, resize color image to match depth image for display
63 |         if depth_colormap_dim != color_colormap_dim:
64 |             resized_color_image = cv2.resize(color_image, dsize=(depth_colormap_dim[1], depth_colormap_dim[0]),
65 |                                              interpolation=cv2.INTER_AREA)
66 |             images = np.hstack((resized_color_image, depth_colormap))
67 |         else:
68 |             images = np.hstack((color_image, depth_colormap))
69 |         # Show images
70 |         cv2.namedWindow('RealSense', cv2.WINDOW_AUTOSIZE)
71 |         cv2.imshow('RealSense', images)
72 |         # cv2.imwrite('color_image.png', color_image)
73 |         cv2.waitKey(5000)
74 | 
75 |     def get_intrinsics(self,rgb_profile):
76 |         raw_intrinsics = rgb_profile.as_video_stream_profile().get_intrinsics()
77 |         print("camera intrinsics:", raw_intrinsics)
78 |         # camera intrinsics form is as follows.
79 |         #[[fx,0,ppx],
80 |         # [0,fy,ppy],
81 |         # [0,0,1]]
82 |         # intrinsics = np.array([615.284,0,309.623,0,614.557,247.967,0,0,1]).reshape(3,3) #640 480
83 |         intrinsics = np.array([raw_intrinsics.fx, 0, raw_intrinsics.ppx, 0, raw_intrinsics.fy, raw_intrinsics.ppy, 0, 0, 1]).reshape(3, 3)
84 |         # print(intrinsics)
85 |         return intrinsics
86 | if __name__== '__main__':
87 |     mycamera = Camera(width=1280,height=720)
88 |     # mycamera.get_data()
89 |     # mycamera.plot_image()
90 |     # print(mycamera.intrinsics)


--------------------------------------------------------------------------------
/real/realsenseD435.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import socket
  4 | import numpy as np
  5 | import cv2
  6 | import os
  7 | import time
  8 | import struct
  9 | import pyrealsense2 as rs
 10 | 
 11 | class RealsenseD435(object):
 12 | 
 13 |     def __init__(self):
 14 |         self.im_height = 720
 15 |         self.im_width = 1280
 16 |         self.intrinsics = None
 17 |         self.get_data()
 18 |         # color_img, depth_img = self.get_data()
 19 |         # print(color_img, depth_img)
 20 | 
 21 | 
 22 |     def get_data(self):
 23 |         # Return color image and depth image
 24 |         pipeline = rs.pipeline()
 25 |         config = rs.config()
 26 |         config.enable_stream(rs.stream.depth, self.im_width, self.im_height, rs.format.z16, 30)
 27 |         config.enable_stream(rs.stream.color, self.im_width, self.im_height, rs.format.bgr8, 30)
 28 |         profile = pipeline.start(config)
 29 |         frames = pipeline.wait_for_frames()
 30 |         depth = frames.get_depth_frame()
 31 |         color = frames.get_color_frame()
 32 |         depth_img=np.asarray(depth.get_data())
 33 |         color_img=np.asarray(color.get_data())
 34 |         # print(depth_img.dtype,color_img.dtype)
 35 |         # color_profile = color.get_profile()
 36 |         # cvsprofile = rs.video_stream_profile(color_profile)
 37 |         # color_intrin = cvsprofile.get_intrinsics()   # [ 640x480  p[325.14 244.014]  f[607.879 607.348]  Inverse Brown Conrady [0 0 0 0 0] ]
 38 | 
 39 |         # Get camera intrinsics
 40 |         self.intrinsics = [607.879,0,325.14,0,607.348,244.014,0,0,1]   # Change me!!!!!!!
 41 | 
 42 |         return color_img, depth_img
 43 | 
 44 | # VPG
 45 | # class Camera(object):
 46 | #
 47 | #     def __init__(self):
 48 | #
 49 | #         # Data options (change me)
 50 | #         self.im_height = 720
 51 | #         self.im_width = 1280
 52 | #         self.tcp_host_ip = '127.0.0.1'
 53 | #         self.tcp_port = 50000
 54 | #         self.buffer_size = 4098 # 4 KiB
 55 | #
 56 | #         # Connect to server
 57 | #         self.tcp_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
 58 | #         self.tcp_socket.connect((self.tcp_host_ip, self.tcp_port))
 59 | #
 60 | #         self.intrinsics = None
 61 | #         self.get_data()
 62 | #         # color_img, depth_img = self.get_data()
 63 | #         # print(color_img, depth_img)
 64 | #
 65 | #
 66 | #     def get_data(self):
 67 | #
 68 | #         # Ping the server with anything
 69 | #         self.tcp_socket.send(b'asdf')
 70 | #
 71 | #         # Fetch TCP data:
 72 | #         #     color camera intrinsics, 9 floats, number of bytes: 9 x 4
 73 | #         #     depth scale for converting depth from uint16 to float, 1 float, number of bytes: 4
 74 | #         #     depth image, self.im_width x self.im_height uint16, number of bytes: self.im_width x self.im_height x 2
 75 | #         #     color image, self.im_width x self.im_height x 3 uint8, number of bytes: self.im_width x self.im_height x 3
 76 | #         data = b''
 77 | #         while len(data) < (10*4 + self.im_height*self.im_width*5):
 78 | #             data += self.tcp_socket.recv(self.buffer_size)
 79 | #
 80 | #         # Reorganize TCP data into color and depth frame
 81 | #         self.intrinsics = np.fromstring(data[0:(9*4)], np.float32).reshape(3, 3)
 82 | #         depth_scale = np.fromstring(data[(9*4):(10*4)], np.float32)[0]
 83 | #         depth_img = np.fromstring(data[(10*4):((10*4)+self.im_width*self.im_height*2)], np.uint16).reshape(self.im_height, self.im_width)
 84 | #         color_img = np.fromstring(data[((10*4)+self.im_width*self.im_height*2):], np.uint8).reshape(self.im_height, self.im_width, 3)
 85 | #         depth_img = depth_img.astype(float) * depth_scale
 86 | #         return color_img, depth_img
 87 | 
 88 | 
 89 | # DEPRECATED CAMERA CLASS FOR REALSENSE WITH ROS
 90 | # ----------------------------------------------
 91 | 
 92 | # import rospy
 93 | # from realsense_camera.msg import StreamData
 94 | 
 95 | # class Camera(object):
 96 | 
 97 | 
 98 | #     def __init__(self):
 99 | 
100 | #         # Data options (change me)
101 | #         self.im_height = 720
102 | #         self.im_width = 1280
103 | 
104 | #         # RGB-D data variables
105 | #         self.color_data = np.zeros((self.im_height,self.im_width,3))
106 | #         self.depth_data = np.zeros((self.im_height,self.im_width))
107 | #         self.intrinsics = np.zeros((3,3))
108 | 
109 | #         # Start ROS subscriber to fetch RealSense RGB-D data
110 | #         rospy.init_node('listener', anonymous=True)
111 | #         rospy.Subscriber("/realsense_camera/stream", StreamData, self.realsense_stream_callback)
112 | 
113 | #         # Recording variables
114 | #         self.frame_idx = 0
115 | #         self.is_recording = False
116 | #         self.recording_directory = ''
117 | 
118 | #     # ROS subscriber callback function
119 | #     def realsense_stream_callback(self, data):
120 | #         tmp_color_data = np.asarray(bytearray(data.color))
121 | #         tmp_color_data.shape = (self.im_height,self.im_width,3)
122 | #         tmp_depth_data = np.asarray(data.depth)
123 | #         tmp_depth_data.shape = (self.im_height,self.im_width)
124 | #         tmp_depth_data = tmp_depth_data.astype(float)/10000
125 | #         tmp_intrinsics = np.asarray(data.intrinsics)
126 | #         tmp_intrinsics.shape = (3,3)
127 | 
128 | #         self.color_data = tmp_color_data
129 | #         self.depth_data = tmp_depth_data
130 | #         self.intrinsics = tmp_intrinsics
131 | 
132 | #         if self.is_recording:
133 | #             tmp_color_image = cv2.cvtColor(tmp_color_data, cv2.COLOR_RGB2BGR)
134 | #             cv2.imwrite(os.path.join(self.recording_directory, '%06d.color.png' % (self.frame_idx)), tmp_color_image)
135 | #             tmp_depth_image = np.round(tmp_depth_data * 10000).astype(np.uint16) # Save depth in 1e-4 meters
136 | #             cv2.imwrite(os.path.join(self.recording_directory, '%06d.depth.png' % (self.frame_idx)), tmp_depth_image)
137 | #             self.frame_idx += 1
138 | #         else:
139 | #             self.frame_idx = 0
140 | 
141 | #         time.sleep(0.1)
142 | 
143 | #     # Start/stop recording RGB-D video stream
144 | #     def start_recording(self, directory):
145 | #         self.recording_directory = directory
146 | #         self.is_recording = True
147 | #     def stop_recording(self):
148 | #         self.is_recording = False
149 | 
150 | 


--------------------------------------------------------------------------------
/real/test.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import math
 3 | import cv2
 4 | print('Saving...')
 5 | # np.savetxt('camera_depth_scale.txt',[1,2], delimiter=' ')
 6 | 
 7 | 
 8 | def rpy2R(rpy):  # [r,p,y] 单位rad
 9 |     rot_x = np.array([[1, 0, 0],
10 |                       [0, math.cos(rpy[0]), -math.sin(rpy[0])],
11 |                       [0, math.sin(rpy[0]), math.cos(rpy[0])]])
12 |     rot_y = np.array([[math.cos(rpy[1]), 0, math.sin(rpy[1])],
13 |                       [0, 1, 0],
14 |                       [-math.sin(rpy[1]), 0, math.cos(rpy[1])]])
15 |     rot_z = np.array([[math.cos(rpy[2]), -math.sin(rpy[2]), 0],
16 |                       [math.sin(rpy[2]), math.cos(rpy[2]), 0],
17 |                       [0, 0, 1]])
18 |     R = np.dot(rot_z, np.dot(rot_y, rot_x))
19 |     return R
20 | 
21 | 
22 | def R2rotating_vector(R):
23 |     theta = math.acos((R[0, 0] + R[1, 1] + R[2, 2] - 1) / 2)
24 |     print(f"theta:{theta}")
25 |     rx = (R[2, 1] - R[1, 2]) / (2 * math.sin(theta))
26 |     ry = (R[0, 2] - R[2, 0]) / (2 * math.sin(theta))
27 |     rz = (R[1, 0] - R[0, 1]) / (2 * math.sin(theta))
28 |     return np.array([rx, ry, rz])
29 | 
30 | if __name__ =="__main__":
31 |     R =  rpy2R([3.14,1.57,0])
32 |     print(R)
33 |     v1 = R2rotating_vector(R)
34 |     v2 = cv2.Rodrigues(R)
35 | 
36 |     print(v1)
37 |     print(v2[0])


--------------------------------------------------------------------------------
/real/testForRealSense.py:
--------------------------------------------------------------------------------
  1 | ## License: Apache 2.0. See LICENSE file in root directory.
  2 | ## Copyright(c) 2015-2017 Intel Corporation. All Rights Reserved.
  3 | 
  4 | ###############################################
  5 | ##      Open CV and Numpy integration        ##
  6 | ###############################################
  7 | 
  8 | import pyrealsense2 as rs
  9 | import numpy as np
 10 | import cv2
 11 | 
 12 | #  ********************************************TEST01**********************************************
 13 | # # Configure depth and color streams
 14 | # pipeline = rs.pipeline()
 15 | # config = rs.config()
 16 | #
 17 | # # Get device product line for setting a supporting resolution
 18 | # pipeline_wrapper = rs.pipeline_wrapper(pipeline)
 19 | # pipeline_profile = config.resolve(pipeline_wrapper)
 20 | # device = pipeline_profile.get_device()
 21 | # device_product_line = str(device.get_info(rs.camera_info.product_line))
 22 | #
 23 | # found_rgb = False
 24 | # for s in device.sensors:
 25 | #     if s.get_info(rs.camera_info.name) == 'RGB Camera':
 26 | #         found_rgb = True
 27 | #         break
 28 | # if not found_rgb:
 29 | #     print("The demo requires Depth camera with Color sensor")
 30 | #     exit(0)
 31 | #
 32 | # config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 30)
 33 | #
 34 | # if device_product_line == 'L500':
 35 | #     print("111")
 36 | #     config.enable_stream(rs.stream.color, 960, 540, rs.format.bgr8, 30)
 37 | # else:
 38 | #     config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30)
 39 | #
 40 | # # Start streaming
 41 | # pipeline.start(config)
 42 | #
 43 | # try:
 44 | #     while True:
 45 | #
 46 | #         # Wait for a coherent pair of frames: depth and color
 47 | #         frames = pipeline.wait_for_frames()
 48 | #         depth_frame = frames.get_depth_frame()
 49 | #         color_frame = frames.get_color_frame()
 50 | #         if not depth_frame or not color_frame:
 51 | #             continue
 52 | #
 53 | #         # Convert images to numpy arrays
 54 | #         depth_image = np.asanyarray(depth_frame.get_data())
 55 | #         print(depth_frame)
 56 | #         color_image = np.asanyarray(color_frame.get_data())
 57 | #         print(color_image)
 58 | #
 59 | #         # Apply colormap on depth image (image must be converted to 8-bit per pixel first)
 60 | #         depth_colormap = cv2.applyColorMap(cv2.convertScaleAbs(depth_image, alpha=0.03), cv2.COLORMAP_JET)
 61 | #
 62 | #         depth_colormap_dim = depth_colormap.shape
 63 | #         color_colormap_dim = color_image.shape
 64 | #
 65 | #         # If depth and color resolutions are different, resize color image to match depth image for display
 66 | #         if depth_colormap_dim != color_colormap_dim:
 67 | #             resized_color_image = cv2.resize(color_image, dsize=(depth_colormap_dim[1], depth_colormap_dim[0]), interpolation=cv2.INTER_AREA)
 68 | #             images = np.hstack((resized_color_image, depth_colormap))
 69 | #         else:
 70 | #             images = np.hstack((color_image, depth_colormap))
 71 | #
 72 | #         # Show images
 73 | #         cv2.namedWindow('RealSense', cv2.WINDOW_AUTOSIZE)
 74 | #         cv2.imshow('RealSense', iges)
 75 | #         cv2.waitKey(1)
 76 | #
 77 | # finally:
 78 | #
 79 | #     # Stop streaming
 80 | #     pipeline.stop()
 81 | 
 82 | 
 83 | 
 84 | #  ********************************************TEST02**********************************************
 85 | 
 86 | 
 87 | # 获取摄像头的内参是为了将像素坐标转化成实际坐标
 88 | 
 89 | pipeline = rs.pipeline()
 90 | config = rs.config()
 91 | config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 30)
 92 | config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30)
 93 | profile = pipeline.start(config)
 94 | frames = pipeline.wait_for_frames()
 95 | depth = frames.get_depth_frame()
 96 | color = frames.get_color_frame()
 97 | # 获取内参
 98 | depth_profile = depth.get_profile()
 99 | print(depth_profile)
100 | # <pyrealsense2.video_stream_profile: 1(0) 640x480 @ 30fps 1>
101 | print(type(depth_profile))
102 | # <class 'pyrealsense2.pyrealsense2.stream_profile'>
103 | print(depth_profile.fps())
104 | # 30
105 | print(depth_profile.stream_index())
106 | # 0
107 | print(depth_profile.stream_name())
108 | # Depth
109 | print(depth_profile.stream_type())
110 | # stream.depth
111 | print('', depth_profile.unique_id)
112 | # <bound method PyCapsule.unique_id of <pyrealsense2.video_stream_profile: 1(0) 640x480 @ 30fps 1>>
113 | 
114 | color_profile = color.get_profile()
115 | print(color_profile)
116 | # <pyrealsense2.video_stream_profile: 2(0) 640x480 @ 30fps 6>
117 | print(type(color_profile))
118 | # <class 'pyrealsense2.pyrealsense2.stream_profile'>
119 | print(depth_profile.fps())
120 | # 30
121 | print(depth_profile.stream_index())
122 | # 0
123 | 
124 | cvsprofile = rs.video_stream_profile(color_profile)
125 | dvsprofile = rs.video_stream_profile(depth_profile)
126 | 
127 | color_intrin = cvsprofile.get_intrinsics()
128 | print("color intrin:" , color_intrin)
129 | print("color intrin type:",type(color_intrin))
130 | # width: 640, height: 480, ppx: 318.482, ppy: 241.167, fx: 616.591, fy: 616.765, model: 2, coeffs: [0, 0, 0, 0, 0]
131 | 
132 | # depth_intrin = dvsprofile.get_intrinsics()
133 | # print(depth_intrin)
134 | # # width: 640, height: 480, ppx: 317.78, ppy: 236.709, fx: 382.544, fy: 382.544, model: 4, coeffs: [0, 0, 0, 0, 0]
135 | #
136 | # extrin = depth_profile.get_extrinsics_to(color_profile)
137 | # print(extrin)
138 | # # rotation: [0.999984, -0.00420567, -0.00380472, 0.00420863, 0.999991, 0.00076919, 0.00380145, -0.00078519, 0.999992]
139 | # translation: [0.0147755, 0.000203265, 0.00051274]
140 | 
141 | 


--------------------------------------------------------------------------------
/real/testGripper.py:
--------------------------------------------------------------------------------
 1 | import robotiq_gripper
 2 | import time
 3 | 
 4 | # ip = "127.0.0.1"
 5 | ip="192.168.50.139"
 6 | def log_info(gripper):
 7 |     print(f"Pos: {str(gripper.get_current_position()): >3}  "
 8 |           f"Open: {gripper.is_open(): <2}  "
 9 |           f"Closed: {gripper.is_closed(): <2}  ")
10 | 
11 | print("Creating gripper...")
12 | gripper = robotiq_gripper.RobotiqGripper()
13 | print("Connecting to gripper...")
14 | gripper.connect(ip, 63352)
15 | print("Resting gripper...")
16 | gripper._reset()
17 | print("Activating gripper...")
18 | gripper.activate()
19 | time.sleep(1.5)
20 | 
21 | print("Testing gripper......")
22 | gripper.move_and_wait_for_pos(255, 255, 255)
23 | log_info(gripper)
24 | print("gripper had closed!")
25 | time.sleep(1.5)
26 | gripper.move_and_wait_for_pos(0, 255, 255)
27 | log_info(gripper)
28 | print("gripper had opened!")
29 | time.sleep(1.5)
30 | 


--------------------------------------------------------------------------------
/real/testReceiveCommand.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TX-Leo/Graspness_6dof_robotic_grasping/bf2d0977272831834ea0edde0d426954ad6c4c8d/real/testReceiveCommand.py


--------------------------------------------------------------------------------
/real/touch.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import matplotlib.pyplot as plt
 4 | import numpy as np
 5 | import time
 6 | import cv2
 7 | from realsenseD415 import Camera
 8 | from UR_Robot import UR_Robot
 9 |     
10 | 
11 | # User options (change me)
12 | # --------------- Setup options ---------------
13 | tcp_host_ip = '192.168.50.100' # IP and port to robot arm as TCP client (UR5)
14 | tcp_port = 30003
15 | tool_orientation = [-np.pi,0,0]
16 | # ---------------------------------------------
17 | 
18 | # Move robot to home pose
19 | robot = UR_Robot(tcp_host_ip,tcp_port)
20 | # robot.move_j([-np.pi, -np.pi/2, np.pi/2, 0, np.pi/2, np.pi])
21 | grasp_home = [0.4, 0, 0.4, -np.pi, 0, 0]  # you can change me
22 | robot.move_j_p(grasp_home)
23 | robot.open_gripper()
24 | 
25 | # Slow down robot
26 | robot.joint_acc = 1.4
27 | robot.joint_vel = 1.05
28 | 
29 | # Callback function for clicking on OpenCV window
30 | click_point_pix = ()
31 | camera_color_img, camera_depth_img = robot.get_camera_data()
32 | def mouseclick_callback(event, x, y, flags, param):
33 |     if event == cv2.EVENT_LBUTTONDOWN:
34 |         global camera, robot, click_point_pix
35 |         click_point_pix = (x,y)
36 | 
37 |         # Get click point in camera coordinates
38 |         click_z = camera_depth_img[y][x] * robot.cam_depth_scale
39 |         click_x = np.multiply(x-robot.cam_intrinsics[0][2],click_z/robot.cam_intrinsics[0][0])
40 |         click_y = np.multiply(y-robot.cam_intrinsics[1][2],click_z/robot.cam_intrinsics[1][1])
41 |         if click_z == 0:
42 |             return
43 |         click_point = np.asarray([click_x,click_y,click_z])
44 |         click_point.shape = (3,1)
45 | 
46 |         # Convert camera to robot coordinates
47 |         # camera2robot = np.linalg.inv(robot.cam_pose)
48 |         camera2robot = robot.cam_pose
49 |         target_position = np.dot(camera2robot[0:3,0:3],click_point) + camera2robot[0:3,3:]
50 | 
51 |         target_position = target_position[0:3,0]
52 |         print(target_position)
53 |         print(target_position.shape)
54 |         destination=np.append(target_position,tool_orientation)
55 |         robot.plane_grasp([target_position[0],target_position[1],target_position[2]])
56 | 
57 | 
58 | # Show color and depth frames
59 | cv2.namedWindow('color')
60 | cv2.setMouseCallback('color', mouseclick_callback)
61 | cv2.namedWindow('depth')
62 | 
63 | while True:
64 |     camera_color_img, camera_depth_img = robot.get_camera_data()
65 |     bgr_data = cv2.cvtColor(camera_color_img, cv2.COLOR_RGB2BGR)
66 |     if len(click_point_pix) != 0:
67 |         bgr_data = cv2.circle(bgr_data, click_point_pix, 7, (0,0,255), 2)
68 |     cv2.imshow('color', bgr_data)
69 |     cv2.imshow('depth', camera_depth_img)
70 |     
71 |     if cv2.waitKey(1) == ord('c'):
72 |         break
73 | 
74 | cv2.destroyAllWindows()
75 | 


--------------------------------------------------------------------------------
/requirements (copy).txt:
--------------------------------------------------------------------------------
1 | torch>=1.8.0
2 | tensorboard==2.3
3 | numpy
4 | scipy
5 | open3d>=0.8
6 | Pillow
7 | tqdm
8 | MinkowskiEngine==0.5.4
9 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | tensorboard==2.3
2 | numpy
3 | scipy
4 | open3d>=0.8
5 | Pillow
6 | tqdm
7 | 
8 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import numpy as np
  4 | import argparse
  5 | import time
  6 | import torch
  7 | from torch.utils.data import DataLoader
  8 | from graspnetAPI.graspnet_eval import GraspGroup, GraspNetEval
  9 | 
 10 | ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
 11 | sys.path.append(os.path.join(ROOT_DIR, 'pointnet2'))
 12 | sys.path.append(os.path.join(ROOT_DIR, 'utils'))
 13 | sys.path.append(os.path.join(ROOT_DIR, 'models'))
 14 | sys.path.append(os.path.join(ROOT_DIR, 'dataset'))
 15 | 
 16 | from models.graspnet import GraspNet, pred_decode
 17 | from dataset.graspnet_dataset import GraspNetDataset, minkowski_collate_fn
 18 | from utils.collision_detector import ModelFreeCollisionDetector
 19 | 
 20 | parser = argparse.ArgumentParser()
 21 | parser.add_argument('--dataset_root', default=None, required=True)
 22 | parser.add_argument('--checkpoint_path', help='Model checkpoint path', default=None, required=True)
 23 | parser.add_argument('--dump_dir', help='Dump dir to save outputs', default=None, required=True)
 24 | parser.add_argument('--seed_feat_dim', default=512, type=int, help='Point wise feature dim')
 25 | parser.add_argument('--camera', default='kinect', help='Camera split [realsense/kinect]')
 26 | parser.add_argument('--num_point', type=int, default=15000, help='Point Number [default: 15000]')
 27 | parser.add_argument('--batch_size', type=int, default=1, help='Batch Size during inference [default: 1]')
 28 | parser.add_argument('--voxel_size', type=float, default=0.005, help='Voxel Size for sparse convolution')
 29 | parser.add_argument('--collision_thresh', type=float, default=0.01,
 30 |                     help='Collision Threshold in collision detection [default: 0.01]')
 31 | parser.add_argument('--voxel_size_cd', type=float, default=0.01, help='Voxel Size for collision detection')
 32 | parser.add_argument('--infer', action='store_true', default=False)
 33 | parser.add_argument('--eval', action='store_true', default=False)
 34 | cfgs = parser.parse_args()
 35 | 
 36 | # ------------------------------------------------------------------------- GLOBAL CONFIG BEG
 37 | if not os.path.exists(cfgs.dump_dir):
 38 |     os.mkdir(cfgs.dump_dir)
 39 | 
 40 | 
 41 | # Init datasets and dataloaders 
 42 | def my_worker_init_fn(worker_id):
 43 |     np.random.seed(np.random.get_state()[1][0] + worker_id)
 44 |     pass
 45 | 
 46 | 
 47 | def inference():
 48 |     test_dataset = GraspNetDataset(cfgs.dataset_root, split='test_seen', camera=cfgs.camera, num_points=cfgs.num_point,
 49 |                                    voxel_size=cfgs.voxel_size, remove_outlier=True, augment=False, load_label=False)
 50 |     print('Test dataset length: ', len(test_dataset))
 51 |     scene_list = test_dataset.scene_list()
 52 |     test_dataloader = DataLoader(test_dataset, batch_size=cfgs.batch_size, shuffle=False,
 53 |                                  num_workers=0, worker_init_fn=my_worker_init_fn, collate_fn=minkowski_collate_fn)
 54 |     print('Test dataloader length: ', len(test_dataloader))
 55 |     # Init the model
 56 |     net = GraspNet(seed_feat_dim=cfgs.seed_feat_dim, is_training=False)
 57 |     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 58 |     net.to(device)
 59 |     # Load checkpoint
 60 |     checkpoint = torch.load(cfgs.checkpoint_path)
 61 |     net.load_state_dict(checkpoint['model_state_dict'])
 62 |     start_epoch = checkpoint['epoch']
 63 |     print("-> loaded checkpoint %s (epoch: %d)" % (cfgs.checkpoint_path, start_epoch))
 64 | 
 65 |     batch_interval = 100
 66 |     net.eval()
 67 |     tic = time.time()
 68 |     for batch_idx, batch_data in enumerate(test_dataloader):
 69 |         for key in batch_data:
 70 |             if 'list' in key:
 71 |                 for i in range(len(batch_data[key])):
 72 |                     for j in range(len(batch_data[key][i])):
 73 |                         batch_data[key][i][j] = batch_data[key][i][j].to(device)
 74 |             else:
 75 |                 batch_data[key] = batch_data[key].to(device)
 76 | 
 77 |         # Forward pass
 78 |         with torch.no_grad():
 79 |             end_points = net(batch_data)
 80 |             grasp_preds = pred_decode(end_points)
 81 | 
 82 |         # Dump results for evaluation
 83 |         for i in range(cfgs.batch_size):
 84 |             data_idx = batch_idx * cfgs.batch_size + i
 85 |             preds = grasp_preds[i].detach().cpu().numpy()
 86 | 
 87 |             gg = GraspGroup(preds)
 88 |             # collision detection
 89 |             if cfgs.collision_thresh > 0:
 90 |                 cloud = test_dataset.get_data(data_idx, return_raw_cloud=True)
 91 |                 mfcdetector = ModelFreeCollisionDetector(cloud, voxel_size=cfgs.voxel_size_cd)
 92 |                 collision_mask = mfcdetector.detect(gg, approach_dist=0.05, collision_thresh=cfgs.collision_thresh)
 93 |                 gg = gg[~collision_mask]
 94 | 
 95 |             # save grasps
 96 |             save_dir = os.path.join(cfgs.dump_dir, scene_list[data_idx], cfgs.camera)
 97 |             save_path = os.path.join(save_dir, str(data_idx % 256).zfill(4) + '.npy')
 98 |             if not os.path.exists(save_dir):
 99 |                 os.makedirs(save_dir)
100 |             gg.save_npy(save_path)
101 | 
102 |         if (batch_idx + 1) % batch_interval == 0:
103 |             toc = time.time()
104 |             print('Eval batch: %d, time: %fs' % (batch_idx + 1, (toc - tic) / batch_interval))
105 |             tic = time.time()
106 | 
107 | 
108 | def evaluate(dump_dir):
109 |     ge = GraspNetEval(root=cfgs.dataset_root, camera=cfgs.camera, split='test_seen')
110 |     res, ap = ge.eval_seen(dump_folder=dump_dir, proc=6)
111 |     save_dir = os.path.join(cfgs.dump_dir, 'ap_{}.npy'.format(cfgs.camera))
112 |     np.save(save_dir, res)
113 | 
114 | 
115 | if __name__ == '__main__':
116 |     if cfgs.infer:
117 |         inference()
118 |     if cfgs.eval:
119 |         evaluate(cfgs.dump_dir)
120 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import numpy as np
  4 | from datetime import datetime
  5 | import argparse
  6 | 
  7 | import torch
  8 | import torch.optim as optim
  9 | from torch.utils.data import DataLoader
 10 | from torch.utils.tensorboard import SummaryWriter
 11 | 
 12 | ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
 13 | sys.path.append(os.path.join(ROOT_DIR, 'pointnet2'))
 14 | sys.path.append(os.path.join(ROOT_DIR, 'utils'))
 15 | sys.path.append(os.path.join(ROOT_DIR, 'models'))
 16 | sys.path.append(os.path.join(ROOT_DIR, 'dataset'))
 17 | 
 18 | from models.graspnet import GraspNet
 19 | from models.loss import get_loss
 20 | from dataset.graspnet_dataset import GraspNetDataset, minkowski_collate_fn, load_grasp_labels
 21 | 
 22 | parser = argparse.ArgumentParser()
 23 | parser.add_argument('--dataset_root', default=None, required=True)
 24 | parser.add_argument('--camera', default='kinect', help='Camera split [realsense/kinect]')
 25 | parser.add_argument('--checkpoint_path', help='Model checkpoint path', default=None)
 26 | parser.add_argument('--model_name', type=str, default=None)
 27 | parser.add_argument('--log_dir', default='logs/log')
 28 | parser.add_argument('--num_point', type=int, default=15000, help='Point Number [default: 20000]')
 29 | parser.add_argument('--seed_feat_dim', default=512, type=int, help='Point wise feature dim')
 30 | parser.add_argument('--voxel_size', type=float, default=0.005, help='Voxel Size to process point clouds ')
 31 | parser.add_argument('--max_epoch', type=int, default=10, help='Epoch to run [default: 18]')
 32 | parser.add_argument('--batch_size', type=int, default=4, help='Batch Size during training [default: 2]')
 33 | parser.add_argument('--learning_rate', type=float, default=0.001, help='Initial learning rate [default: 0.001]')
 34 | parser.add_argument('--resume', action='store_true', default=False, help='Whether to resume from checkpoint')
 35 | cfgs = parser.parse_args()
 36 | # ------------------------------------------------------------------------- GLOBAL CONFIG BEG
 37 | EPOCH_CNT = 0
 38 | CHECKPOINT_PATH = cfgs.checkpoint_path if cfgs.checkpoint_path is not None and cfgs.resume else None
 39 | if not os.path.exists(cfgs.log_dir):
 40 |     os.makedirs(cfgs.log_dir)
 41 | 
 42 | LOG_FOUT = open(os.path.join(cfgs.log_dir, 'log_train.txt'), 'a')
 43 | LOG_FOUT.write(str(cfgs) + '\n')
 44 | 
 45 | 
 46 | def log_string(out_str):
 47 |     LOG_FOUT.write(out_str + '\n')
 48 |     LOG_FOUT.flush()
 49 |     print(out_str)
 50 | 
 51 | 
 52 | # Init datasets and dataloaders 
 53 | def my_worker_init_fn(worker_id):
 54 |     np.random.seed(np.random.get_state()[1][0] + worker_id)
 55 |     pass
 56 | 
 57 | 
 58 | grasp_labels = load_grasp_labels(cfgs.dataset_root)
 59 | TRAIN_DATASET = GraspNetDataset(cfgs.dataset_root, grasp_labels=grasp_labels, camera=cfgs.camera, split='train',
 60 |                                 num_points=cfgs.num_point, voxel_size=cfgs.voxel_size,
 61 |                                 remove_outlier=True, augment=True, load_label=True)
 62 | print('train dataset length: ', len(TRAIN_DATASET))
 63 | TRAIN_DATALOADER = DataLoader(TRAIN_DATASET, batch_size=cfgs.batch_size, shuffle=True,
 64 |                               num_workers=0, worker_init_fn=my_worker_init_fn, collate_fn=minkowski_collate_fn)
 65 | print('train dataloader length: ', len(TRAIN_DATALOADER))
 66 | 
 67 | net = GraspNet(seed_feat_dim=cfgs.seed_feat_dim, is_training=True)
 68 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 69 | net.to(device)
 70 | # Load the Adam optimizer
 71 | optimizer = optim.Adam(net.parameters(), lr=cfgs.learning_rate)
 72 | start_epoch = 0
 73 | if CHECKPOINT_PATH is not None and os.path.isfile(CHECKPOINT_PATH):
 74 |     checkpoint = torch.load(CHECKPOINT_PATH)
 75 |     net.load_state_dict(checkpoint['model_state_dict'])
 76 |     optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
 77 |     start_epoch = checkpoint['epoch']
 78 |     log_string("-> loaded checkpoint %s (epoch: %d)" % (CHECKPOINT_PATH, start_epoch))
 79 | # TensorBoard Visualizers
 80 | TRAIN_WRITER = SummaryWriter(os.path.join(cfgs.log_dir, 'train'))
 81 | 
 82 | 
 83 | def get_current_lr(epoch):
 84 |     lr = cfgs.learning_rate
 85 |     lr = lr * (0.95 ** epoch)
 86 |     return lr
 87 | 
 88 | 
 89 | def adjust_learning_rate(optimizer, epoch):
 90 |     lr = get_current_lr(epoch)
 91 |     for param_group in optimizer.param_groups:
 92 |         param_group['lr'] = lr
 93 | 
 94 | 
 95 | def train_one_epoch():
 96 |     stat_dict = {}  # collect statistics
 97 |     adjust_learning_rate(optimizer, EPOCH_CNT)
 98 |     net.train()
 99 |     batch_interval = 20
100 |     for batch_idx, batch_data_label in enumerate(TRAIN_DATALOADER):
101 |         for key in batch_data_label:
102 |             if 'list' in key:
103 |                 for i in range(len(batch_data_label[key])):
104 |                     for j in range(len(batch_data_label[key][i])):
105 |                         batch_data_label[key][i][j] = batch_data_label[key][i][j].to(device)
106 |             else:
107 |                 batch_data_label[key] = batch_data_label[key].to(device)
108 | 
109 |         end_points = net(batch_data_label)
110 |         loss, end_points = get_loss(end_points)
111 |         loss.backward()
112 |         optimizer.step()
113 |         optimizer.zero_grad()
114 | 
115 |         for key in end_points:
116 |             if 'loss' in key or 'acc' in key or 'prec' in key or 'recall' in key or 'count' in key:
117 |                 if key not in stat_dict:
118 |                     stat_dict[key] = 0
119 |                 stat_dict[key] += end_points[key].item()
120 | 
121 |         if (batch_idx + 1) % batch_interval == 0:
122 |             log_string(' ----epoch: %03d  ---- batch: %03d ----' % (EPOCH_CNT, batch_idx + 1))
123 |             for key in sorted(stat_dict.keys()):
124 |                 TRAIN_WRITER.add_scalar(key, stat_dict[key] / batch_interval,
125 |                                         (EPOCH_CNT * len(TRAIN_DATALOADER) + batch_idx) * cfgs.batch_size)
126 |                 log_string('mean %s: %f' % (key, stat_dict[key] / batch_interval))
127 |                 stat_dict[key] = 0
128 | 
129 | 
130 | def train(start_epoch):
131 |     global EPOCH_CNT
132 |     for epoch in range(start_epoch, cfgs.max_epoch):
133 |         EPOCH_CNT = epoch
134 |         log_string('**** EPOCH %03d ****' % epoch)
135 |         log_string('Current learning rate: %f' % (get_current_lr(epoch)))
136 |         log_string(str(datetime.now()))
137 |         # Reset numpy seed.
138 |         # REF: https://github.com/pytorch/pytorch/issues/5059
139 |         np.random.seed()
140 |         train_one_epoch()
141 | 
142 |         save_dict = {'epoch': epoch + 1, 'optimizer_state_dict': optimizer.state_dict(),
143 |                      'model_state_dict': net.state_dict()}
144 |         torch.save(save_dict, os.path.join(cfgs.log_dir, cfgs.model_name + '_epoch' + str(epoch + 1).zfill(2) + '.tar'))
145 | 
146 | 
147 | if __name__ == '__main__':
148 |     train(start_epoch)
149 | 


--------------------------------------------------------------------------------
/utils/collision_detector.py:
--------------------------------------------------------------------------------
  1 | """ Collision detection to remove collided grasp pose predictions.
  2 | Author: chenxi-wang
  3 | """
  4 | 
  5 | import os
  6 | import sys
  7 | import numpy as np
  8 | import open3d as o3d
  9 | 
 10 | class ModelFreeCollisionDetector():
 11 |     """ Collision detection in scenes without object labels. Current finger width and length are fixed.
 12 | 
 13 |         Input:
 14 |                 scene_points: [numpy.ndarray, (N,3), numpy.float32]
 15 |                     the scene points to detect
 16 |                 voxel_size: [float]
 17 |                     used for downsample
 18 | 
 19 |         Example usage:
 20 |             mfcdetector = ModelFreeCollisionDetector(scene_points, voxel_size=0.005)
 21 |             collision_mask = mfcdetector.detect(grasp_group, approach_dist=0.03)
 22 |             collision_mask, iou_list = mfcdetector.detect(grasp_group, approach_dist=0.03, collision_thresh=0.05, return_ious=True)
 23 |             collision_mask, empty_mask = mfcdetector.detect(grasp_group, approach_dist=0.03, collision_thresh=0.05,
 24 |                                             return_empty_grasp=True, empty_thresh=0.01)
 25 |             collision_mask, empty_mask, iou_list = mfcdetector.detect(grasp_group, approach_dist=0.03, collision_thresh=0.05,
 26 |                                             return_empty_grasp=True, empty_thresh=0.01, return_ious=True)
 27 |     """
 28 |     def __init__(self, scene_points, voxel_size=0.005):
 29 |         self.finger_width = 0.01
 30 |         self.finger_length = 0.06
 31 |         self.voxel_size = voxel_size
 32 |         scene_cloud = o3d.geometry.PointCloud()
 33 |         scene_cloud.points = o3d.utility.Vector3dVector(scene_points)
 34 |         scene_cloud = scene_cloud.voxel_down_sample(voxel_size)
 35 |         self.scene_points = np.array(scene_cloud.points)
 36 | 
 37 |     def detect(self, grasp_group, approach_dist=0.03, collision_thresh=0.05, return_empty_grasp=False, empty_thresh=0.01, return_ious=False):
 38 |         """ Detect collision of grasps.
 39 | 
 40 |             Input:
 41 |                 grasp_group: [GraspGroup, M grasps]
 42 |                     the grasps to check
 43 |                 approach_dist: [float]
 44 |                     the distance for a gripper to move along approaching direction before grasping
 45 |                     this shifting space requires no point either
 46 |                 collision_thresh: [float]
 47 |                     if global collision iou is greater than this threshold,
 48 |                     a collision is detected
 49 |                 return_empty_grasp: [bool]
 50 |                     if True, return a mask to imply whether there are objects in a grasp
 51 |                 empty_thresh: [float]
 52 |                     if inner space iou is smaller than this threshold,
 53 |                     a collision is detected
 54 |                     only set when [return_empty_grasp] is True
 55 |                 return_ious: [bool]
 56 |                     if True, return global collision iou and part collision ious
 57 |                     
 58 |             Output:
 59 |                 collision_mask: [numpy.ndarray, (M,), numpy.bool]
 60 |                     True implies collision
 61 |                 [optional] empty_mask: [numpy.ndarray, (M,), numpy.bool]
 62 |                     True implies empty grasp
 63 |                     only returned when [return_empty_grasp] is True
 64 |                 [optional] iou_list: list of [numpy.ndarray, (M,), numpy.float32]
 65 |                     global and part collision ious, containing
 66 |                     [global_iou, left_iou, right_iou, bottom_iou, shifting_iou]
 67 |                     only returned when [return_ious] is True
 68 |         """
 69 |         approach_dist = max(approach_dist, self.finger_width)
 70 |         T = grasp_group.translations
 71 |         R = grasp_group.rotation_matrices
 72 |         heights = grasp_group.heights[:,np.newaxis]
 73 |         depths = grasp_group.depths[:,np.newaxis]
 74 |         widths = grasp_group.widths[:,np.newaxis]
 75 |         targets = self.scene_points[np.newaxis,:,:] - T[:,np.newaxis,:]
 76 |         targets = np.matmul(targets, R)
 77 | 
 78 |         ## collision detection
 79 |         # height mask
 80 |         mask1 = ((targets[:,:,2] > -heights/2) & (targets[:,:,2] < heights/2))
 81 |         # left finger mask
 82 |         mask2 = ((targets[:,:,0] > depths - self.finger_length) & (targets[:,:,0] < depths))
 83 |         mask3 = (targets[:,:,1] > -(widths/2 + self.finger_width))
 84 |         mask4 = (targets[:,:,1] < -widths/2)
 85 |         # right finger mask
 86 |         mask5 = (targets[:,:,1] < (widths/2 + self.finger_width))
 87 |         mask6 = (targets[:,:,1] > widths/2)
 88 |         # bottom mask
 89 |         mask7 = ((targets[:,:,0] <= depths - self.finger_length)\
 90 |                 & (targets[:,:,0] > depths - self.finger_length - self.finger_width))
 91 |         # shifting mask
 92 |         mask8 = ((targets[:,:,0] <= depths - self.finger_length - self.finger_width)\
 93 |                 & (targets[:,:,0] > depths - self.finger_length - self.finger_width - approach_dist))
 94 | 
 95 |         # get collision mask of each point
 96 |         left_mask = (mask1 & mask2 & mask3 & mask4)
 97 |         right_mask = (mask1 & mask2 & mask5 & mask6)
 98 |         bottom_mask = (mask1 & mask3 & mask5 & mask7)
 99 |         shifting_mask = (mask1 & mask3 & mask5 & mask8)
100 |         global_mask = (left_mask | right_mask | bottom_mask | shifting_mask)
101 | 
102 |         # calculate equivalant volume of each part
103 |         left_right_volume = (heights * self.finger_length * self.finger_width / (self.voxel_size**3)).reshape(-1)
104 |         bottom_volume = (heights * (widths+2*self.finger_width) * self.finger_width / (self.voxel_size**3)).reshape(-1)
105 |         shifting_volume = (heights * (widths+2*self.finger_width) * approach_dist / (self.voxel_size**3)).reshape(-1)
106 |         volume = left_right_volume*2 + bottom_volume + shifting_volume
107 | 
108 |         # get collision iou of each part
109 |         global_iou = global_mask.sum(axis=1) / (volume+1e-6)
110 | 
111 |         # get collison mask
112 |         collision_mask = (global_iou > collision_thresh)
113 | 
114 |         if not (return_empty_grasp or return_ious):
115 |             return collision_mask
116 | 
117 |         ret_value = [collision_mask,]
118 |         if return_empty_grasp:
119 |             inner_mask = (mask1 & mask2 & (~mask4) & (~mask6))
120 |             inner_volume = (heights * self.finger_length * widths / (self.voxel_size**3)).reshape(-1)
121 |             empty_mask = (inner_mask.sum(axis=-1)/inner_volume < empty_thresh)
122 |             ret_value.append(empty_mask)
123 |         if return_ious:
124 |             left_iou = left_mask.sum(axis=1) / (left_right_volume+1e-6)
125 |             right_iou = right_mask.sum(axis=1) / (left_right_volume+1e-6)
126 |             bottom_iou = bottom_mask.sum(axis=1) / (bottom_volume+1e-6)
127 |             shifting_iou = shifting_mask.sum(axis=1) / (shifting_volume+1e-6)
128 |             ret_value.append([global_iou, left_iou, right_iou, bottom_iou, shifting_iou])
129 |         return ret_value
130 | 


--------------------------------------------------------------------------------
/utils/data_utils.py:
--------------------------------------------------------------------------------
  1 | """ Tools for data processing.
  2 |     Author: chenxi-wang
  3 | """
  4 | 
  5 | import numpy as np
  6 | 
  7 | 
  8 | class CameraInfo():
  9 |     """ Camera intrisics for point cloud creation. """
 10 | 
 11 |     def __init__(self, width, height, fx, fy, cx, cy, scale):
 12 |         self.width = width
 13 |         self.height = height
 14 |         self.fx = fx
 15 |         self.fy = fy
 16 |         self.cx = cx
 17 |         self.cy = cy
 18 |         self.scale = scale
 19 | 
 20 | 
 21 | def create_point_cloud_from_depth_image(depth, camera, organized=True):
 22 |     """ Generate point cloud using depth image only.
 23 | 
 24 |         Input:
 25 |             depth: [numpy.ndarray, (H,W), numpy.float32]
 26 |                 depth image
 27 |             camera: [CameraInfo]
 28 |                 camera intrinsics
 29 |             organized: bool
 30 |                 whether to keep the cloud in image shape (H,W,3)
 31 | 
 32 |         Output:
 33 |             cloud: [numpy.ndarray, (H,W,3)/(H*W,3), numpy.float32]
 34 |                 generated cloud, (H,W,3) for organized=True, (H*W,3) for organized=False
 35 |     """
 36 |     assert (depth.shape[0] == camera.height and depth.shape[1] == camera.width)
 37 |     xmap = np.arange(camera.width)
 38 |     ymap = np.arange(camera.height)
 39 |     xmap, ymap = np.meshgrid(xmap, ymap)
 40 |     points_z = depth / camera.scale
 41 |     points_x = (xmap - camera.cx) * points_z / camera.fx
 42 |     points_y = (ymap - camera.cy) * points_z / camera.fy
 43 |     cloud = np.stack([points_x, points_y, points_z], axis=-1)
 44 |     if not organized:
 45 |         cloud = cloud.reshape([-1, 3])
 46 |     return cloud
 47 | 
 48 | 
 49 | def transform_point_cloud(cloud, transform, format='4x4'):
 50 |     """ Transform points to new coordinates with transformation matrix.
 51 | 
 52 |         Input:
 53 |             cloud: [np.ndarray, (N,3), np.float32]
 54 |                 points in original coordinates
 55 |             transform: [np.ndarray, (3,3)/(3,4)/(4,4), np.float32]
 56 |                 transformation matrix, could be rotation only or rotation+translation
 57 |             format: [string, '3x3'/'3x4'/'4x4']
 58 |                 the shape of transformation matrix
 59 |                 '3x3' --> rotation matrix
 60 |                 '3x4'/'4x4' --> rotation matrix + translation matrix
 61 | 
 62 |         Output:
 63 |             cloud_transformed: [np.ndarray, (N,3), np.float32]
 64 |                 points in new coordinates
 65 |     """
 66 |     if not (format == '3x3' or format == '4x4' or format == '3x4'):
 67 |         raise ValueError('Unknown transformation format, only support \'3x3\' or \'4x4\' or \'3x4\'.')
 68 |     if format == '3x3':
 69 |         cloud_transformed = np.dot(transform, cloud.T).T
 70 |     elif format == '4x4' or format == '3x4':
 71 |         ones = np.ones(cloud.shape[0])[:, np.newaxis]
 72 |         cloud_ = np.concatenate([cloud, ones], axis=1)
 73 |         cloud_transformed = np.dot(transform, cloud_.T).T
 74 |         cloud_transformed = cloud_transformed[:, :3]
 75 |     return cloud_transformed
 76 | 
 77 | 
 78 | def compute_point_dists(A, B):
 79 |     """ Compute pair-wise point distances in two matrices.
 80 | 
 81 |         Input:
 82 |             A: [np.ndarray, (N,3), np.float32]
 83 |                 point cloud A
 84 |             B: [np.ndarray, (M,3), np.float32]
 85 |                 point cloud B
 86 | 
 87 |         Output:
 88 |             dists: [np.ndarray, (N,M), np.float32]
 89 |                 distance matrix
 90 |     """
 91 |     A = A[:, np.newaxis, :]
 92 |     B = B[np.newaxis, :, :]
 93 |     dists = np.linalg.norm(A - B, axis=-1)
 94 |     return dists
 95 | 
 96 | 
 97 | def remove_invisible_grasp_points(cloud, grasp_points, pose, th=0.01):
 98 |     """ Remove invisible part of object model according to scene point cloud.
 99 | 
100 |         Input:
101 |             cloud: [np.ndarray, (N,3), np.float32]
102 |                 scene point cloud
103 |             grasp_points: [np.ndarray, (M,3), np.float32]
104 |                 grasp point label in object coordinates
105 |             pose: [np.ndarray, (4,4), np.float32]
106 |                 transformation matrix from object coordinates to world coordinates
107 |             th: [float]
108 |                 if the minimum distance between a grasp point and the scene points is greater than outlier, the point will be removed
109 | 
110 |         Output:
111 |             visible_mask: [np.ndarray, (M,), np.bool]
112 |                 mask to show the visible part of grasp points
113 |     """
114 |     grasp_points_trans = transform_point_cloud(grasp_points, pose)
115 |     dists = compute_point_dists(grasp_points_trans, cloud)
116 |     min_dists = dists.min(axis=1)
117 |     visible_mask = (min_dists < th)
118 |     return visible_mask
119 | 
120 | 
121 | def get_workspace_mask(cloud, seg, trans=None, organized=True, outlier=0):
122 |     """ Keep points in workspace as input.
123 | 
124 |         Input:
125 |             cloud: [np.ndarray, (H,W,3), np.float32]
126 |                 scene point cloud
127 |             seg: [np.ndarray, (H,W,), np.uint8]
128 |                 segmantation label of scene points
129 |             trans: [np.ndarray, (4,4), np.float32]
130 |                 transformation matrix for scene points, default: None.
131 |             organized: [bool]
132 |                 whether to keep the cloud in image shape (H,W,3)
133 |             outlier: [float]
134 |                 if the distance between a point and workspace is greater than outlier, the point will be removed
135 |                 
136 |         Output:
137 |             workspace_mask: [np.ndarray, (H,W)/(H*W,), np.bool]
138 |                 mask to indicate whether scene points are in workspace
139 |     """
140 |     if organized:
141 |         h, w, _ = cloud.shape
142 |         cloud = cloud.reshape([h * w, 3])
143 |         seg = seg.reshape(h * w)
144 |     if trans is not None:
145 |         cloud = transform_point_cloud(cloud, trans)
146 |     foreground = cloud[seg > 0]
147 |     xmin, ymin, zmin = foreground.min(axis=0)
148 |     xmax, ymax, zmax = foreground.max(axis=0)
149 |     mask_x = ((cloud[:, 0] > xmin - outlier) & (cloud[:, 0] < xmax + outlier))
150 |     mask_y = ((cloud[:, 1] > ymin - outlier) & (cloud[:, 1] < ymax + outlier))
151 |     mask_z = ((cloud[:, 2] > zmin - outlier) & (cloud[:, 2] < zmax + outlier))
152 |     workspace_mask = (mask_x & mask_y & mask_z)
153 |     if organized:
154 |         workspace_mask = workspace_mask.reshape([h, w])
155 | 
156 |     return workspace_mask
157 | 


--------------------------------------------------------------------------------
/utils/label_generation.py:
--------------------------------------------------------------------------------
  1 | """ Dynamically generate grasp labels during training.
  2 |     Author: chenxi-wang
  3 | """
  4 | 
  5 | import os
  6 | import sys
  7 | import torch
  8 | 
  9 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 10 | ROOT_DIR = os.path.dirname(BASE_DIR)
 11 | sys.path.append(ROOT_DIR)
 12 | # sys.path.append(os.path.join(ROOT_DIR, 'knn'))
 13 | 
 14 | from knn.knn_modules import knn
 15 | from loss_utils import GRASP_MAX_WIDTH, batch_viewpoint_params_to_matrix, \
 16 |     transform_point_cloud, generate_grasp_views
 17 | 
 18 | 
 19 | def process_grasp_labels(end_points):
 20 |     """ Process labels according to scene points and object poses. """
 21 |     seed_xyzs = end_points['xyz_graspable']  # (B, M_point, 3)
 22 |     batch_size, num_samples, _ = seed_xyzs.size()
 23 | 
 24 |     batch_grasp_points = []
 25 |     batch_grasp_views_rot = []
 26 |     batch_grasp_scores = []
 27 |     batch_grasp_widths = []
 28 |     for i in range(batch_size):
 29 |         seed_xyz = seed_xyzs[i]  # (Ns, 3)
 30 |         poses = end_points['object_poses_list'][i]  # [(3, 4),]
 31 | 
 32 |         # get merged grasp points for label computation
 33 |         grasp_points_merged = []
 34 |         grasp_views_rot_merged = []
 35 |         grasp_scores_merged = []
 36 |         grasp_widths_merged = []
 37 |         for obj_idx, pose in enumerate(poses):
 38 |             grasp_points = end_points['grasp_points_list'][i][obj_idx]  # (Np, 3)
 39 |             grasp_scores = end_points['grasp_scores_list'][i][obj_idx]  # (Np, V, A, D)
 40 |             grasp_widths = end_points['grasp_widths_list'][i][obj_idx]  # (Np, V, A, D)
 41 |             _, V, A, D = grasp_scores.size()
 42 |             num_grasp_points = grasp_points.size(0)
 43 |             # generate and transform template grasp views
 44 |             grasp_views = generate_grasp_views(V).to(pose.device)  # (V, 3)
 45 |             grasp_points_trans = transform_point_cloud(grasp_points, pose, '3x4')
 46 |             grasp_views_trans = transform_point_cloud(grasp_views, pose[:3, :3], '3x3')
 47 |             # generate and transform template grasp view rotation
 48 |             angles = torch.zeros(grasp_views.size(0), dtype=grasp_views.dtype, device=grasp_views.device)
 49 |             grasp_views_rot = batch_viewpoint_params_to_matrix(-grasp_views, angles)  # (V, 3, 3)
 50 |             grasp_views_rot_trans = torch.matmul(pose[:3, :3], grasp_views_rot)  # (V, 3, 3)
 51 | 
 52 |             # assign views
 53 |             grasp_views_ = grasp_views.transpose(0, 1).contiguous().unsqueeze(0)
 54 |             grasp_views_trans_ = grasp_views_trans.transpose(0, 1).contiguous().unsqueeze(0)
 55 |             view_inds = knn(grasp_views_trans_, grasp_views_, k=1).squeeze() - 1
 56 |             grasp_views_rot_trans = torch.index_select(grasp_views_rot_trans, 0, view_inds)  # (V, 3, 3)
 57 |             grasp_views_rot_trans = grasp_views_rot_trans.unsqueeze(0).expand(num_grasp_points, -1, -1,
 58 |                                                                               -1)  # (Np, V, 3, 3)
 59 |             grasp_scores = torch.index_select(grasp_scores, 1, view_inds)  # (Np, V, A, D)
 60 |             grasp_widths = torch.index_select(grasp_widths, 1, view_inds)  # (Np, V, A, D)
 61 |             # add to list
 62 |             grasp_points_merged.append(grasp_points_trans)
 63 |             grasp_views_rot_merged.append(grasp_views_rot_trans)
 64 |             grasp_scores_merged.append(grasp_scores)
 65 |             grasp_widths_merged.append(grasp_widths)
 66 | 
 67 |         grasp_points_merged = torch.cat(grasp_points_merged, dim=0)  # (Np', 3)
 68 |         grasp_views_rot_merged = torch.cat(grasp_views_rot_merged, dim=0)  # (Np', V, 3, 3)
 69 |         grasp_scores_merged = torch.cat(grasp_scores_merged, dim=0)  # (Np', V, A, D)
 70 |         grasp_widths_merged = torch.cat(grasp_widths_merged, dim=0)  # (Np', V, A, D)
 71 | 
 72 |         # compute nearest neighbors
 73 |         seed_xyz_ = seed_xyz.transpose(0, 1).contiguous().unsqueeze(0)  # (1, 3, Ns)
 74 |         grasp_points_merged_ = grasp_points_merged.transpose(0, 1).contiguous().unsqueeze(0)  # (1, 3, Np')
 75 |         nn_inds = knn(grasp_points_merged_, seed_xyz_, k=1).squeeze() - 1  # (Ns)
 76 | 
 77 |         # assign anchor points to real points
 78 |         grasp_points_merged = torch.index_select(grasp_points_merged, 0, nn_inds)  # (Ns, 3)
 79 |         grasp_views_rot_merged = torch.index_select(grasp_views_rot_merged, 0, nn_inds)  # (Ns, V, 3, 3)
 80 |         grasp_scores_merged = torch.index_select(grasp_scores_merged, 0, nn_inds)  # (Ns, V, A, D)
 81 |         grasp_widths_merged = torch.index_select(grasp_widths_merged, 0, nn_inds)  # (Ns, V, A, D)
 82 | 
 83 |         # add to batch
 84 |         batch_grasp_points.append(grasp_points_merged)
 85 |         batch_grasp_views_rot.append(grasp_views_rot_merged)
 86 |         batch_grasp_scores.append(grasp_scores_merged)
 87 |         batch_grasp_widths.append(grasp_widths_merged)
 88 | 
 89 |     batch_grasp_points = torch.stack(batch_grasp_points, 0)  # (B, Ns, 3)
 90 |     batch_grasp_views_rot = torch.stack(batch_grasp_views_rot, 0)  # (B, Ns, V, 3, 3)
 91 |     batch_grasp_scores = torch.stack(batch_grasp_scores, 0)  # (B, Ns, V, A, D)
 92 |     batch_grasp_widths = torch.stack(batch_grasp_widths, 0)  # (B, Ns, V, A, D)
 93 | 
 94 |     # compute view graspness
 95 |     view_u_threshold = 0.6
 96 |     view_grasp_num = 48
 97 |     batch_grasp_view_valid_mask = (batch_grasp_scores <= view_u_threshold) & (batch_grasp_scores > 0) # (B, Ns, V, A, D)
 98 |     batch_grasp_view_valid = batch_grasp_view_valid_mask.float()
 99 |     batch_grasp_view_graspness = torch.sum(torch.sum(batch_grasp_view_valid, dim=-1), dim=-1) / view_grasp_num  # (B, Ns, V)
100 |     view_graspness_min, _ = torch.min(batch_grasp_view_graspness, dim=-1)  # (B, Ns)
101 |     view_graspness_max, _ = torch.max(batch_grasp_view_graspness, dim=-1)
102 |     view_graspness_max = view_graspness_max.unsqueeze(-1).expand(-1, -1, 300)  # (B, Ns, V)
103 |     view_graspness_min = view_graspness_min.unsqueeze(-1).expand(-1, -1, 300)  # same shape as batch_grasp_view_graspness
104 |     batch_grasp_view_graspness = (batch_grasp_view_graspness - view_graspness_min) / (view_graspness_max - view_graspness_min + 1e-5)
105 | 
106 |     # process scores
107 |     label_mask = (batch_grasp_scores > 0) & (batch_grasp_widths <= GRASP_MAX_WIDTH)  # (B, Ns, V, A, D)
108 |     batch_grasp_scores[~label_mask] = 0
109 | 
110 |     end_points['batch_grasp_point'] = batch_grasp_points
111 |     end_points['batch_grasp_view_rot'] = batch_grasp_views_rot
112 |     end_points['batch_grasp_score'] = batch_grasp_scores
113 |     end_points['batch_grasp_width'] = batch_grasp_widths
114 |     end_points['batch_grasp_view_graspness'] = batch_grasp_view_graspness
115 | 
116 |     return end_points
117 | 
118 | 
119 | def match_grasp_view_and_label(end_points):
120 |     """ Slice grasp labels according to predicted views. """
121 |     top_view_inds = end_points['grasp_top_view_inds']  # (B, Ns)
122 |     template_views_rot = end_points['batch_grasp_view_rot']  # (B, Ns, V, 3, 3)
123 |     grasp_scores = end_points['batch_grasp_score']  # (B, Ns, V, A, D)
124 |     grasp_widths = end_points['batch_grasp_width']  # (B, Ns, V, A, D, 3)
125 | 
126 |     B, Ns, V, A, D = grasp_scores.size()
127 |     top_view_inds_ = top_view_inds.view(B, Ns, 1, 1, 1).expand(-1, -1, -1, 3, 3)
128 |     top_template_views_rot = torch.gather(template_views_rot, 2, top_view_inds_).squeeze(2)
129 |     top_view_inds_ = top_view_inds.view(B, Ns, 1, 1, 1).expand(-1, -1, -1, A, D)
130 |     top_view_grasp_scores = torch.gather(grasp_scores, 2, top_view_inds_).squeeze(2)
131 |     top_view_grasp_widths = torch.gather(grasp_widths, 2, top_view_inds_).squeeze(2)
132 | 
133 |     u_max = top_view_grasp_scores.max()
134 |     po_mask = top_view_grasp_scores > 0
135 |     po_mask_num = torch.sum(po_mask)
136 |     if po_mask_num > 0:
137 |         u_min = top_view_grasp_scores[po_mask].min()
138 |         top_view_grasp_scores[po_mask] = torch.log(u_max / top_view_grasp_scores[po_mask]) / (torch.log(u_max / u_min) + 1e-6)
139 | 
140 |     end_points['batch_grasp_score'] = top_view_grasp_scores  # (B, Ns, A, D)
141 |     end_points['batch_grasp_width'] = top_view_grasp_widths  # (B, Ns, A, D)
142 | 
143 |     return top_template_views_rot, end_points
144 | 


--------------------------------------------------------------------------------
/utils/loss_utils.py:
--------------------------------------------------------------------------------
  1 | """ Tools for loss computation.
  2 |     Author: chenxi-wang
  3 | """
  4 | 
  5 | import torch
  6 | import numpy as np
  7 | 
  8 | GRASP_MAX_WIDTH = 0.1
  9 | GRASPNESS_THRESHOLD = 0.1
 10 | NUM_VIEW = 300
 11 | NUM_ANGLE = 12
 12 | NUM_DEPTH = 4
 13 | M_POINT = 1024
 14 | 
 15 | 
 16 | def transform_point_cloud(cloud, transform, format='4x4'):
 17 |     """ Transform points to new coordinates with transformation matrix.
 18 | 
 19 |         Input:
 20 |             cloud: [torch.FloatTensor, (N,3)]
 21 |                 points in original coordinates
 22 |             transform: [torch.FloatTensor, (3,3)/(3,4)/(4,4)]
 23 |                 transformation matrix, could be rotation only or rotation+translation
 24 |             format: [string, '3x3'/'3x4'/'4x4']
 25 |                 the shape of transformation matrix
 26 |                 '3x3' --> rotation matrix
 27 |                 '3x4'/'4x4' --> rotation matrix + translation matrix
 28 | 
 29 |         Output:
 30 |             cloud_transformed: [torch.FloatTensor, (N,3)]
 31 |                 points in new coordinates
 32 |     """
 33 |     if not (format == '3x3' or format == '4x4' or format == '3x4'):
 34 |         raise ValueError('Unknown transformation format, only support \'3x3\' or \'4x4\' or \'3x4\'.')
 35 |     if format == '3x3':
 36 |         cloud_transformed = torch.matmul(transform, cloud.T).T
 37 |     elif format == '4x4' or format == '3x4':
 38 |         ones = cloud.new_ones(cloud.size(0), device=cloud.device).unsqueeze(-1)
 39 |         cloud_ = torch.cat([cloud, ones], dim=1)
 40 |         cloud_transformed = torch.matmul(transform, cloud_.T).T
 41 |         cloud_transformed = cloud_transformed[:, :3]
 42 |     return cloud_transformed
 43 | 
 44 | 
 45 | def generate_grasp_views(N=300, phi=(np.sqrt(5) - 1) / 2, center=np.zeros(3), r=1):
 46 |     """ View sampling on a unit sphere using Fibonacci lattices.
 47 |         Ref: https://arxiv.org/abs/0912.4540
 48 | 
 49 |         Input:
 50 |             N: [int]
 51 |                 number of sampled views
 52 |             phi: [float]
 53 |                 constant for view coordinate calculation, different phi's bring different distributions, default: (sqrt(5)-1)/2
 54 |             center: [np.ndarray, (3,), np.float32]
 55 |                 sphere center
 56 |             r: [float]
 57 |                 sphere radius
 58 | 
 59 |         Output:
 60 |             views: [torch.FloatTensor, (N,3)]
 61 |                 sampled view coordinates
 62 |     """
 63 |     views = []
 64 |     for i in range(N):
 65 |         zi = (2 * i + 1) / N - 1
 66 |         xi = np.sqrt(1 - zi ** 2) * np.cos(2 * i * np.pi * phi)
 67 |         yi = np.sqrt(1 - zi ** 2) * np.sin(2 * i * np.pi * phi)
 68 |         views.append([xi, yi, zi])
 69 |     views = r * np.array(views) + center
 70 |     return torch.from_numpy(views.astype(np.float32))
 71 | 
 72 | 
 73 | def batch_viewpoint_params_to_matrix(batch_towards, batch_angle):
 74 |     """ Transform approach vectors and in-plane rotation angles to rotation matrices.
 75 | 
 76 |         Input:
 77 |             batch_towards: [torch.FloatTensor, (N,3)]
 78 |                 approach vectors in batch
 79 |             batch_angle: [torch.floatTensor, (N,)]
 80 |                 in-plane rotation angles in batch
 81 |                 
 82 |         Output:
 83 |             batch_matrix: [torch.floatTensor, (N,3,3)]
 84 |                 rotation matrices in batch
 85 |     """
 86 |     axis_x = batch_towards
 87 |     ones = torch.ones(axis_x.shape[0], dtype=axis_x.dtype, device=axis_x.device)
 88 |     zeros = torch.zeros(axis_x.shape[0], dtype=axis_x.dtype, device=axis_x.device)
 89 |     axis_y = torch.stack([-axis_x[:, 1], axis_x[:, 0], zeros], dim=-1)
 90 |     mask_y = (torch.norm(axis_y, dim=-1) == 0)
 91 |     axis_y[mask_y, 1] = 1
 92 |     axis_x = axis_x / torch.norm(axis_x, dim=-1, keepdim=True)
 93 |     axis_y = axis_y / torch.norm(axis_y, dim=-1, keepdim=True)
 94 |     axis_z = torch.cross(axis_x, axis_y)
 95 |     sin = torch.sin(batch_angle)
 96 |     cos = torch.cos(batch_angle)
 97 |     R1 = torch.stack([ones, zeros, zeros, zeros, cos, -sin, zeros, sin, cos], dim=-1)
 98 |     R1 = R1.reshape([-1, 3, 3])
 99 |     R2 = torch.stack([axis_x, axis_y, axis_z], dim=-1)
100 |     batch_matrix = torch.matmul(R2, R1)
101 |     return batch_matrix
102 | 
103 | 
104 | def huber_loss(error, delta=1.0):
105 |     """
106 |     Args:
107 |         error: Torch tensor (d1,d2,...,dk)
108 |     Returns:
109 |         loss: Torch tensor (d1,d2,...,dk)
110 | 
111 |     x = error = pred - gt or dist(pred,gt)
112 |     0.5 * |x|^2                 if |x|<=d
113 |     0.5 * d^2 + d * (|x|-d)     if |x|>d
114 |     Author: Charles R. Qi
115 |     Ref: https://github.com/charlesq34/frustum-pointnets/blob/master/models/model_util.py
116 |     """
117 |     abs_error = torch.abs(error)
118 |     quadratic = torch.clamp(abs_error, max=delta)
119 |     linear = (abs_error - quadratic)
120 |     loss = 0.5 * quadratic ** 2 + delta * linear
121 |     return loss
122 | 


--------------------------------------------------------------------------------