├── doc
    └── method.jpg
├── utils
    ├── render_balls_so.so
    ├── compile_render_balls_so.sh
    ├── __pycache__
    │   ├── provider.cpython-34.pyc
    │   ├── provider.cpython-35.pyc
    │   ├── tf_util.cpython-34.pyc
    │   ├── tf_util.cpython-35.pyc
    │   ├── pointnet_util.cpython-34.pyc
    │   ├── pointnet_util.cpython-35.pyc
    │   ├── show3d_balls.cpython-34.pyc
    │   ├── transformations.cpython-34.pyc
    │   ├── pointnet_util_edge.cpython-35.pyc
    │   └── pointnet_util_edge_only.cpython-35.pyc
    ├── README.md
    ├── render_balls_so.cpp
    ├── show3d_balls.py
    ├── show3d_balls.py~
    ├── provider.py
    └── provider.py~
├── tf_ops
    ├── grouping
    │   ├── tf_grouping_g.cu.o
    │   ├── tf_grouping_so.so
    │   ├── __pycache__
    │   │   └── tf_grouping.cpython-35.pyc
    │   ├── test
    │   │   ├── compile.sh
    │   │   ├── selection_sort.cu
    │   │   ├── selection_sort_const.cu
    │   │   ├── selection_sort.cpp
    │   │   ├── query_ball_point.cpp
    │   │   ├── query_ball_point_block.cu
    │   │   ├── query_ball_point.cu
    │   │   └── query_ball_point_grid.cu
    │   ├── tf_grouping_compile.sh~
    │   ├── tf_grouping_compile.sh
    │   ├── tf_grouping_op_test.py
    │   ├── tf_grouping.py
    │   ├── tf_grouping.py~
    │   ├── tf_grouping_g.cu
    │   └── tf_grouping.cpp
    ├── sampling
    │   ├── tf_sampling_g.cu.o
    │   ├── tf_sampling_so.so
    │   ├── __pycache__
    │   │   ├── sample.cpython-34.pyc
    │   │   ├── sample.cpython-35.pyc
    │   │   ├── tf_sampling.cpython-34.pyc
    │   │   └── tf_sampling.cpython-35.pyc
    │   ├── tf_sampling_compile.sh~
    │   ├── tf_sampling_compile.sh
    │   ├── sample.py
    │   ├── sample.py~
    │   ├── tf_edge.py
    │   ├── tf_edge.py~
    │   ├── tf_sampling.py~
    │   ├── tf_sampling.py
    │   ├── tf_sampling_g.cu
    │   └── tf_sampling.cpp
    └── 3d_interpolation
    │   ├── tf_interpolate_so.so
    │   ├── __pycache__
    │       ├── tf_interpolate.cpython-34.pyc
    │       └── tf_interpolate.cpython-35.pyc
    │   ├── tf_interpolate_compile.sh~
    │   ├── tf_interpolate_compile.sh
    │   ├── tf_interpolate_op_test.py
    │   ├── visu_interpolation.py
    │   ├── tf_interpolate.py~
    │   ├── tf_interpolate.py
    │   ├── interpolate.cpp
    │   └── tf_interpolate.cpp
├── data
    └── README.md
├── models
    ├── __pycache__
    │   ├── pointnet2_cls_edge.cpython-35.pyc
    │   ├── pointnet2_cls_ssg.cpython-35.pyc
    │   ├── pointnet2_seg_orig.cpython-35.pyc
    │   ├── pointnet2_cls_joint.cpython-35.pyc
    │   ├── pointnet2_cls_edge_sunrgbd.cpython-35.pyc
    │   └── pointnet2_cls_edge_sunrgbd_scenes.cpython-35.pyc
    ├── pointnet2_cls_msg.py
    ├── pointnet2_cls_ssg.py
    ├── pointnet2_part_seg.py
    ├── pointnet2_cls_pose.py
    ├── pointnet2_sem_seg.py
    ├── pointnet2_part_seg_msg_one_hot.py
    ├── pointnet_cls_basic.py
    ├── pointnet2_cls_scene.py
    ├── frustum_pointnets_v1.py
    ├── pointnet2_cls_joint.py
    └── pointnet2_seg_orig.py
└── README.md


/doc/method.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merium88/Edge-Aware-PointNet/HEAD/doc/method.jpg


--------------------------------------------------------------------------------
/utils/render_balls_so.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merium88/Edge-Aware-PointNet/HEAD/utils/render_balls_so.so


--------------------------------------------------------------------------------
/tf_ops/grouping/tf_grouping_g.cu.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merium88/Edge-Aware-PointNet/HEAD/tf_ops/grouping/tf_grouping_g.cu.o


--------------------------------------------------------------------------------
/tf_ops/grouping/tf_grouping_so.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merium88/Edge-Aware-PointNet/HEAD/tf_ops/grouping/tf_grouping_so.so


--------------------------------------------------------------------------------
/tf_ops/sampling/tf_sampling_g.cu.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merium88/Edge-Aware-PointNet/HEAD/tf_ops/sampling/tf_sampling_g.cu.o


--------------------------------------------------------------------------------
/tf_ops/sampling/tf_sampling_so.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merium88/Edge-Aware-PointNet/HEAD/tf_ops/sampling/tf_sampling_so.so


--------------------------------------------------------------------------------
/utils/compile_render_balls_so.sh:
--------------------------------------------------------------------------------
1 | g++ -std=c++11 render_balls_so.cpp -o render_balls_so.so -shared -fPIC -O2 -D_GLIBCXX_USE_CXX11_ABI=0
2 | 
3 | 


--------------------------------------------------------------------------------
/utils/__pycache__/provider.cpython-34.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merium88/Edge-Aware-PointNet/HEAD/utils/__pycache__/provider.cpython-34.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/provider.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merium88/Edge-Aware-PointNet/HEAD/utils/__pycache__/provider.cpython-35.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/tf_util.cpython-34.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merium88/Edge-Aware-PointNet/HEAD/utils/__pycache__/tf_util.cpython-34.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/tf_util.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merium88/Edge-Aware-PointNet/HEAD/utils/__pycache__/tf_util.cpython-35.pyc


--------------------------------------------------------------------------------
/tf_ops/3d_interpolation/tf_interpolate_so.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merium88/Edge-Aware-PointNet/HEAD/tf_ops/3d_interpolation/tf_interpolate_so.so


--------------------------------------------------------------------------------
/utils/__pycache__/pointnet_util.cpython-34.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merium88/Edge-Aware-PointNet/HEAD/utils/__pycache__/pointnet_util.cpython-34.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/pointnet_util.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merium88/Edge-Aware-PointNet/HEAD/utils/__pycache__/pointnet_util.cpython-35.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/show3d_balls.cpython-34.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merium88/Edge-Aware-PointNet/HEAD/utils/__pycache__/show3d_balls.cpython-34.pyc


--------------------------------------------------------------------------------
/data/README.md:
--------------------------------------------------------------------------------
1 | ### 2.5D ModelNet40
2 | 
3 | Send an email to meriumahmed88@gmail.com to get a link and download 2.5D point clouds generated from ModelNet40. 
4 | 


--------------------------------------------------------------------------------
/tf_ops/sampling/__pycache__/sample.cpython-34.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merium88/Edge-Aware-PointNet/HEAD/tf_ops/sampling/__pycache__/sample.cpython-34.pyc


--------------------------------------------------------------------------------
/tf_ops/sampling/__pycache__/sample.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merium88/Edge-Aware-PointNet/HEAD/tf_ops/sampling/__pycache__/sample.cpython-35.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/transformations.cpython-34.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merium88/Edge-Aware-PointNet/HEAD/utils/__pycache__/transformations.cpython-34.pyc


--------------------------------------------------------------------------------
/models/__pycache__/pointnet2_cls_edge.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merium88/Edge-Aware-PointNet/HEAD/models/__pycache__/pointnet2_cls_edge.cpython-35.pyc


--------------------------------------------------------------------------------
/models/__pycache__/pointnet2_cls_ssg.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merium88/Edge-Aware-PointNet/HEAD/models/__pycache__/pointnet2_cls_ssg.cpython-35.pyc


--------------------------------------------------------------------------------
/models/__pycache__/pointnet2_seg_orig.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merium88/Edge-Aware-PointNet/HEAD/models/__pycache__/pointnet2_seg_orig.cpython-35.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/pointnet_util_edge.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merium88/Edge-Aware-PointNet/HEAD/utils/__pycache__/pointnet_util_edge.cpython-35.pyc


--------------------------------------------------------------------------------
/models/__pycache__/pointnet2_cls_joint.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merium88/Edge-Aware-PointNet/HEAD/models/__pycache__/pointnet2_cls_joint.cpython-35.pyc


--------------------------------------------------------------------------------
/tf_ops/grouping/__pycache__/tf_grouping.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merium88/Edge-Aware-PointNet/HEAD/tf_ops/grouping/__pycache__/tf_grouping.cpython-35.pyc


--------------------------------------------------------------------------------
/tf_ops/sampling/__pycache__/tf_sampling.cpython-34.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merium88/Edge-Aware-PointNet/HEAD/tf_ops/sampling/__pycache__/tf_sampling.cpython-34.pyc


--------------------------------------------------------------------------------
/tf_ops/sampling/__pycache__/tf_sampling.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merium88/Edge-Aware-PointNet/HEAD/tf_ops/sampling/__pycache__/tf_sampling.cpython-35.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/pointnet_util_edge_only.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merium88/Edge-Aware-PointNet/HEAD/utils/__pycache__/pointnet_util_edge_only.cpython-35.pyc


--------------------------------------------------------------------------------
/utils/README.md:
--------------------------------------------------------------------------------
1 | ## Utilility Functions for 3D Point Cloud Deep Learning
2 | 
3 | ### visualization tool
4 | 
5 |     sh compile_render_balls_so.sh
6 |     python show3d_balls.py
7 | 


--------------------------------------------------------------------------------
/models/__pycache__/pointnet2_cls_edge_sunrgbd.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merium88/Edge-Aware-PointNet/HEAD/models/__pycache__/pointnet2_cls_edge_sunrgbd.cpython-35.pyc


--------------------------------------------------------------------------------
/models/__pycache__/pointnet2_cls_edge_sunrgbd_scenes.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merium88/Edge-Aware-PointNet/HEAD/models/__pycache__/pointnet2_cls_edge_sunrgbd_scenes.cpython-35.pyc


--------------------------------------------------------------------------------
/tf_ops/3d_interpolation/__pycache__/tf_interpolate.cpython-34.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merium88/Edge-Aware-PointNet/HEAD/tf_ops/3d_interpolation/__pycache__/tf_interpolate.cpython-34.pyc


--------------------------------------------------------------------------------
/tf_ops/3d_interpolation/__pycache__/tf_interpolate.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Merium88/Edge-Aware-PointNet/HEAD/tf_ops/3d_interpolation/__pycache__/tf_interpolate.cpython-35.pyc


--------------------------------------------------------------------------------
/tf_ops/grouping/test/compile.sh:
--------------------------------------------------------------------------------
1 | g++ query_ball_point.cpp -o query_ball_point
2 | nvcc query_ball_point.cu -o query_ball_point_cuda
3 | nvcc query_ball_point_block.cu -o query_ball_point_block
4 | nvcc query_ball_point_grid.cu -o query_ball_point_grid
5 | g++ -Wall selection_sort.cpp -o selection_sort
6 | nvcc selection_sort.cu -o selection_sort_cuda
7 | 


--------------------------------------------------------------------------------
/tf_ops/3d_interpolation/tf_interpolate_compile.sh~:
--------------------------------------------------------------------------------
1 | # TF1.2
2 | g++ -std=c++11 tf_interpolate.cpp -o tf_interpolate_so.so -shared -fPIC -I /usr/local/lib/python2.7/dist-packages/tensorflow/include -I /usr/local/cuda-8.0/include -lcudart -L /usr/local/cuda-8.0/lib64/ -O2 -D_GLIBCXX_USE_CXX11_ABI=0
3 | 
4 | # TF1.4
5 | #g++ -std=c++11 tf_interpolate.cpp -o tf_interpolate_so.so -shared -fPIC -I /usr/local/lib/python2.7/dist-packages/tensorflow/include -I /usr/local/cuda-8.0/include -I /usr/local/lib/python2.7/dist-packages/tensorflow/include/external/nsync/public -lcudart -L /usr/local/cuda-8.0/lib64/ -L/usr/local/lib/python2.7/dist-packages/tensorflow -ltensorflow_framework -O2 -D_GLIBCXX_USE_CXX11_ABI=0
6 | 


--------------------------------------------------------------------------------
/tf_ops/3d_interpolation/tf_interpolate_compile.sh:
--------------------------------------------------------------------------------
1 | # TF1.2
2 | #g++ -std=c++11 tf_interpolate.cpp -o tf_interpolate_so.so -shared -fPIC -I /usr/local/lib/python2.7/dist-packages/tensorflow/include -I /usr/local/cuda-8.0/include -lcudart -L /usr/local/cuda-8.0/lib64/ -O2 -D_GLIBCXX_USE_CXX11_ABI=0
3 | 
4 | # TF1.4
5 | g++ -std=c++11 tf_interpolate.cpp -o tf_interpolate_so.so -shared -fPIC -I /home/mariam/virtualenv/venv1/lib/python3.5/site-packages/tensorflow/include -I /usr/local/cuda-9.0/include -I /home/mariam/virtualenv/venv1/lib/python3.5/site-packages/tensorflow/include/external/nsync/public -lcudart -L /usr/local/cuda-9.0/lib64/ -L/home/mariam/virtualenv/venv1/lib/python3.5/site-packages/tensorflow -ltensorflow_framework -O2 -D_GLIBCXX_USE_CXX11_ABI=0
6 | 


--------------------------------------------------------------------------------
/tf_ops/grouping/tf_grouping_compile.sh~:
--------------------------------------------------------------------------------
1 | #/bin/bash
2 | /usr/local/cuda-8.0/bin/nvcc tf_grouping_g.cu -o tf_grouping_g.cu.o -c -O2 -DGOOGLE_CUDA=1 -x cu -Xcompiler -fPIC
3 | 
4 | # TF1.2
5 | g++ -std=c++11 tf_grouping.cpp tf_grouping_g.cu.o -o tf_grouping_so.so -shared -fPIC -I /usr/local/lib/python2.7/dist-packages/tensorflow/include -I /usr/local/cuda-8.0/include -lcudart -L /usr/local/cuda-8.0/lib64/ -O2 -D_GLIBCXX_USE_CXX11_ABI=0
6 | 
7 | # TF1.4
8 | #g++ -std=c++11 tf_grouping.cpp tf_grouping_g.cu.o -o tf_grouping_so.so -shared -fPIC -I /usr/local/lib/python2.7/dist-packages/tensorflow/include -I /usr/local/cuda-8.0/include -I /usr/local/lib/python2.7/dist-packages/tensorflow/include/external/nsync/public -lcudart -L /usr/local/cuda-8.0/lib64/ -L/usr/local/lib/python2.7/dist-packages/tensorflow -ltensorflow_framework -O2 -D_GLIBCXX_USE_CXX11_ABI=0
9 | 


--------------------------------------------------------------------------------
/tf_ops/sampling/tf_sampling_compile.sh~:
--------------------------------------------------------------------------------
1 | #/bin/bash
2 | /usr/local/cuda-8.0/bin/nvcc tf_sampling_g.cu -o tf_sampling_g.cu.o -c -O2 -DGOOGLE_CUDA=1 -x cu -Xcompiler -fPIC
3 | 
4 | # TF1.2
5 | #g++ -std=c++11 tf_sampling.cpp tf_sampling_g.cu.o -o tf_sampling_so.so -shared -fPIC -I /usr/local/lib/python2.7/dist-packages/tensorflow/include -I /usr/local/cuda-8.0/include -lcudart -L /usr/local/cuda-8.0/lib64/ -O2 -D_GLIBCXX_USE_CXX11_ABI=0
6 | 
7 | # TF1.4
8 | g++ -std=c++11 tf_sampling.cpp tf_sampling_g.cu.o -o tf_sampling_so.so -shared -fPIC -I /usr/local/lib/python2.7/dist-packages/tensorflow/include -I /usr/local/cuda-8.0/include -I /usr/local/lib/python2.7/dist-packages/tensorflow/include/external/nsync/public -lcudart -L /usr/local/cuda-8.0/lib64/ -L/usr/local/lib/python2.7/dist-packages/tensorflow -ltensorflow_framework -O2 -D_GLIBCXX_USE_CXX11_ABI=0
9 | 


--------------------------------------------------------------------------------
/tf_ops/grouping/tf_grouping_compile.sh:
--------------------------------------------------------------------------------
1 | #/bin/bash
2 | /usr/local/cuda-9.0/bin/nvcc tf_grouping_g.cu -o tf_grouping_g.cu.o -c -O2 -DGOOGLE_CUDA=1 -x cu -Xcompiler -fPIC
3 | 
4 | # TF1.2
5 | #g++ -std=c++11 tf_grouping.cpp tf_grouping_g.cu.o -o tf_grouping_so.so -shared -fPIC -I /usr/local/lib/python2.7/dist-packages/tensorflow/include -I /usr/local/cuda-8.0/include -lcudart -L /usr/local/cuda-8.0/lib64/ -O2 -D_GLIBCXX_USE_CXX11_ABI=0
6 | 
7 | # TF1.4
8 | g++ -std=c++11 tf_grouping.cpp tf_grouping_g.cu.o -o tf_grouping_so.so -shared -fPIC -I /home/mariam/virtualenv/venv1/lib/python3.5/site-packages/tensorflow/include -I /usr/local/cuda-9.0/include -I /home/mariam/virtualenv/venv1/lib/python3.5/site-packages/tensorflow/include/external/nsync/public -lcudart -L /usr/local/cuda-9.0/lib64/ -L/home/mariam/virtualenv/venv1/lib/python3.5/site-packages/tensorflow -ltensorflow_framework -O2 -D_GLIBCXX_USE_CXX11_ABI=0
9 | 


--------------------------------------------------------------------------------
/tf_ops/sampling/tf_sampling_compile.sh:
--------------------------------------------------------------------------------
1 | #/bin/bash
2 | #/usr/local/cuda-8.0/bin/nvcc tf_sampling_g.cu -o tf_sampling_g.cu.o -c -O2 -DGOOGLE_CUDA=1 -x cu -Xcompiler -fPIC
3 | 
4 | # TF1.2
5 | #g++ -std=c++11 tf_sampling.cpp tf_sampling_g.cu.o -o tf_sampling_so.so -shared -fPIC -I /usr/local/lib/python2.7/dist-packages/tensorflow/include -I /usr/local/cuda-8.0/include -lcudart -L /usr/local/cuda-8.0/lib64/ -O2 -D_GLIBCXX_USE_CXX11_ABI=0
6 | 
7 | # TF1.4
8 | g++ -std=c++11 tf_sampling.cpp tf_sampling_g.cu.o -o tf_sampling_so.so -shared -fPIC -I /home/mariam/virtualenv/venv1/lib/python3.5/site-packages/tensorflow/include -I /usr/local/cuda-9.0/include -I /home/mariam/virtualenv/venv1/lib/python3.5/site-packages/tensorflow/include/external/nsync/public -lcudart -L /usr/local/cuda-9.0/lib64/ -L/home/mariam/virtualenv/venv1/lib/python3.5/site-packages/tensorflow -ltensorflow_framework -O2 -D_GLIBCXX_USE_CXX11_ABI=0
9 | 


--------------------------------------------------------------------------------
/tf_ops/3d_interpolation/tf_interpolate_op_test.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | from tf_interpolate import three_nn, three_interpolate
 4 | 
 5 | class GroupPointTest(tf.test.TestCase):
 6 |   def test(self):
 7 |     pass
 8 | 
 9 |   def test_grad(self):
10 |     with self.test_session():
11 |       points = tf.constant(np.random.random((1,8,16)).astype('float32'))
12 |       print points
13 |       xyz1 = tf.constant(np.random.random((1,128,3)).astype('float32'))
14 |       xyz2 = tf.constant(np.random.random((1,8,3)).astype('float32'))
15 |       dist, idx = three_nn(xyz1, xyz2)
16 |       weight = tf.ones_like(dist)/3.0
17 |       interpolated_points = three_interpolate(points, idx, weight)
18 |       print interpolated_points
19 |       err = tf.test.compute_gradient_error(points, (1,8,16), interpolated_points, (1,128,16))
20 |       print err
21 |       self.assertLess(err, 1e-4) 
22 | 
23 | if __name__=='__main__':
24 |   tf.test.main() 
25 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Edge-Aware-PointNet
 2 | ### Introduction
 3 | This work is based on our paper (https://www.researchgate.net/publication/333676783_EPN_Edge-Aware_PointNet_for_Object_Recognition_from_Multi-View_25D_Point_Clouds). We propose a novel architecture named Edge-Aware PointNet, that incorporates complementary edge information with the recently proposed PointNet++ framework, by making use of convolutional neural networks (CNNs).
 4 | 
 5 | ![prediction example](https://github.com/Merium88/Edge-Aware-PointNet/blob/master/doc/method.jpg)
 6 | 
 7 | In this repository, we release code and data for training the network Edge-Aware PointNet on point clouds sampled from 3D shapes.
 8 | 
 9 | ### Usage
10 | The code is written as an extension to the original PointNet++ thus the usage and training procedure is the same as for the original repository. (https://github.com/charlesq34/pointnet2)
11 | To train a model to classify point clouds sampled from ModelNet40:
12 | 
13 |         python train_modelnet40_edgecnn.py
14 | 
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/tf_ops/grouping/tf_grouping_op_test.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import numpy as np
 3 | from tf_grouping import query_ball_point, group_point
 4 | 
 5 | class GroupPointTest(tf.test.TestCase):
 6 |   def test(self):
 7 |     pass
 8 | 
 9 |   def test_grad(self):
10 |     with tf.device('/gpu:0'):
11 |       points = tf.constant(np.random.random((1,128,16)).astype('float32'))
12 |       print points
13 |       xyz1 = tf.constant(np.random.random((1,128,3)).astype('float32'))
14 |       xyz2 = tf.constant(np.random.random((1,8,3)).astype('float32'))
15 |       radius = 0.3 
16 |       nsample = 32
17 |       idx, pts_cnt = query_ball_point(radius, nsample, xyz1, xyz2)
18 |       grouped_points = group_point(points, idx)
19 |       print grouped_points
20 | 
21 |     with self.test_session():
22 |       print "---- Going to compute gradient error"
23 |       err = tf.test.compute_gradient_error(points, (1,128,16), grouped_points, (1,8,32,16))
24 |       print err
25 |       self.assertLess(err, 1e-4) 
26 | 
27 | if __name__=='__main__':
28 |   tf.test.main() 
29 | 


--------------------------------------------------------------------------------
/tf_ops/3d_interpolation/visu_interpolation.py:
--------------------------------------------------------------------------------
 1 | ''' Visualize part segmentation '''
 2 | import os
 3 | import sys
 4 | ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 5 | sys.path.append('/home/rqi/Projects/toolkits/visualization')
 6 | from show3d_balls import showpoints
 7 | import numpy as np
 8 | from tf_interpolate import three_nn, three_interpolate
 9 | import tensorflow as tf
10 | 
11 | 
12 | pts2 = np.array([[0,0,1],[1,0,0],[0,1,0],[1,1,0]]).astype('float32')
13 | xyz1 = np.random.random((100,3)).astype('float32')
14 | xyz2 = np.array([[0,0,0],[1,0,0],[0,1,0],[1,1,1]]).astype('float32')
15 | 
16 | def fun(xyz1,xyz2,pts2):
17 |     with tf.device('/cpu:0'):
18 |         points = tf.constant(np.expand_dims(pts2,0))
19 |         xyz1 = tf.constant(np.expand_dims(xyz1,0))
20 |         xyz2 = tf.constant(np.expand_dims(xyz2,0))
21 |         dist, idx = three_nn(xyz1, xyz2)
22 |         #weight = tf.ones_like(dist)/3.0
23 |         dist = tf.maximum(dist, 1e-10)
24 |         norm = tf.reduce_sum((1.0/dist),axis=2,keep_dims=True)
25 |         norm = tf.tile(norm, [1,1,3])
26 |         print norm
27 |         weight = (1.0/dist) / norm
28 |         interpolated_points = three_interpolate(points, idx, weight)
29 |     with tf.Session('') as sess:
30 |         tmp,pts1,d,w = sess.run([xyz1, interpolated_points, dist, weight])
31 |         #print w
32 |         pts1 = pts1.squeeze()
33 |     return pts1
34 | 
35 | pts1 = fun(xyz1,xyz2,pts2) 
36 | all_pts = np.zeros((104,3))
37 | all_pts[0:100,:] = pts1
38 | all_pts[100:,:] = pts2
39 | all_xyz = np.zeros((104,3))
40 | all_xyz[0:100,:]=xyz1
41 | all_xyz[100:,:]=xyz2
42 | showpoints(xyz2, pts2, ballradius=8)
43 | showpoints(xyz1, pts1, ballradius=8)
44 | showpoints(all_xyz, all_pts, ballradius=8)
45 | 


--------------------------------------------------------------------------------
/tf_ops/sampling/sample.py:
--------------------------------------------------------------------------------
 1 | """ PointNet++ Layers
 2 | 
 3 | Author: Charles R. Qi
 4 | Date: November 2017
 5 | """
 6 | 
 7 | import tensorflow as tf
 8 | import numpy as np
 9 | from tensorflow.python.framework import ops
10 | import sys
11 | import os
12 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
13 | sys.path.append(BASE_DIR)
14 | sampling_module=tf.load_op_library(os.path.join(BASE_DIR, 'tf_sampling_so.so'))
15 | import matplotlib.pyplot as plt
16 | from mpl_toolkits.mplot3d import Axes3D
17 | from mpl_toolkits.mplot3d.art3d import Poly3DCollection, Line3DCollection
18 | from tf_sampling import farthest_point_sample, gather_point
19 | 
20 | def sample(npoint, xyz):
21 |     # Add Visualization predicted
22 |     #fig = plt.figure()
23 |     #ax = fig.add_subplot(111, projection='3d')
24 |     #ax.scatter(xyz[0,:,0],xyz[0,:,1], xyz[0,:,2], c='red', marker='.')
25 |     #data = np.zeros((1,xyz.shape[0],3))
26 |     #data[0,...] = xyz
27 |     new_xyz=gather_point(xyz,farthest_point_sample(npoint,xyz))
28 |     
29 |     init = tf.global_variables_initializer()
30 |     config = tf.ConfigProto(allow_soft_placement = True)
31 |     sess = tf.Session(config = config)
32 |     sess = tf.Session()
33 |     sess.run(init)
34 |     new_data = new_xyz.eval(session = sess)
35 |     #new_data = new_data.transpose()
36 |     #ax.scatter(new_data[0,:,0],new_data[0,:,1], new_data[0,:,2], c='blue', marker='.')
37 |     #plt.show()
38 |     sess.close() 
39 |     return new_data
40 | 
41 | def farthest_point_sample_edge(npoint, xyz,edge_ids):
42 |     data = tf.Variable(tf.zeros([1,xyz.shape[1], 3], tf.int32),validate_shape = False)
43 |     data.set_shape((1,None,3)) 
44 |     data[0,...] = xyz[0,edge_ids,:]
45 |     new_xyz=gather_point(xyz,farthest_point_sample(npoint,xyz))
46 |     
47 |     return new_xyz
48 | 


--------------------------------------------------------------------------------
/tf_ops/sampling/sample.py~:
--------------------------------------------------------------------------------
 1 | """ PointNet++ Layers
 2 | 
 3 | Author: Charles R. Qi
 4 | Date: November 2017
 5 | """
 6 | 
 7 | import tensorflow as tf
 8 | import numpy as np
 9 | from tensorflow.python.framework import ops
10 | import sys
11 | import os
12 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
13 | sys.path.append(BASE_DIR)
14 | sampling_module=tf.load_op_library(os.path.join(BASE_DIR, 'tf_sampling_so.so'))
15 | import matplotlib.pyplot as plt
16 | from mpl_toolkits.mplot3d import Axes3D
17 | from mpl_toolkits.mplot3d.art3d import Poly3DCollection, Line3DCollection
18 | from tf_sampling import farthest_point_sample, gather_point
19 | 
20 | def sample(npoint, xyz):
21 |     # Add Visualization predicted
22 |     #fig = plt.figure()
23 |     #ax = fig.add_subplot(111, projection='3d')
24 |     #ax.scatter(xyz[0,:,0],xyz[0,:,1], xyz[0,:,2], c='red', marker='.')
25 |     data = np.zeros((1,xyz.shape[0],3))
26 |     data[0,...] = xyz
27 |     new_xyz=gather_point(data,farthest_point_sample(npoint,data))
28 |     
29 |     #init = tf.global_variables_initializer()
30 |     #config = tf.ConfigProto(allow_soft_placement = True)
31 |     #sess = tf.Session(config = config)
32 |     #sess = tf.Session()
33 |     #sess.run(init)
34 |     #new_data = new_xyz.eval(session = sess)
35 |     #new_data = new_data.transpose()
36 |     #ax.scatter(new_data[0,:,0],new_data[0,:,1], new_data[0,:,2], c='blue', marker='.')
37 |     #plt.show()
38 |     #sess.close() 
39 |     return new_xyz
40 | 
41 | def farthest_point_sample_edge(npoint, xyz,edge_ids):
42 |     data = tf.Variable(tf.zeros([1,xyz.shape[1], 3], tf.int32),validate_shape = False)
43 |     data.set_shape((1,None,3)) 
44 |     data[0,...] = xyz[0,edge_ids,:]
45 |     new_xyz=gather_point(xyz,farthest_point_sample(npoint,xyz))
46 |     
47 |     return new_xyz
48 | 


--------------------------------------------------------------------------------
/utils/render_balls_so.cpp:
--------------------------------------------------------------------------------
 1 | #include <cstdio>
 2 | #include <vector>
 3 | #include <algorithm>
 4 | #include <math.h>
 5 | using namespace std;
 6 | 
 7 | struct PointInfo{
 8 |     int x,y,z;
 9 |     float r,g,b;
10 | };
11 | 
12 | extern "C"{
13 | 
14 | void render_ball(int h,int w,unsigned char * show,int n,int * xyzs,float * c0,float * c1,float * c2,int r){
15 |     r=max(r,1);
16 |     vector<int> depth(h*w,-2100000000);
17 |     vector<PointInfo> pattern;
18 |     for (int dx=-r;dx<=r;dx++)
19 |         for (int dy=-r;dy<=r;dy++)
20 |             if (dx*dx+dy*dy<r*r){
21 |                 double dz=sqrt(double(r*r-dx*dx-dy*dy));
22 |                 PointInfo pinfo;
23 |                 pinfo.x=dx;
24 |                 pinfo.y=dy;
25 |                 pinfo.z=dz;
26 |                 pinfo.r=dz/r;
27 |                 pinfo.g=dz/r;
28 |                 pinfo.b=dz/r;
29 |                 pattern.push_back(pinfo);
30 |             }
31 |     double zmin=0,zmax=0;
32 |     for (int i=0;i<n;i++){
33 |         if (i==0){
34 |             zmin=xyzs[i*3+2]-r;
35 |             zmax=xyzs[i*3+2]+r;
36 |         }else{
37 |             zmin=min(zmin,double(xyzs[i*3+2]-r));
38 |             zmax=max(zmax,double(xyzs[i*3+2]+r));
39 |         }
40 |     }
41 |     for (int i=0;i<n;i++){
42 |         int x=xyzs[i*3+0],y=xyzs[i*3+1],z=xyzs[i*3+2];
43 |         for (int j=0;j<int(pattern.size());j++){
44 |             int x2=x+pattern[j].x;
45 |             int y2=y+pattern[j].y;
46 |             int z2=z+pattern[j].z;
47 |             if (!(x2<0 || x2>=h || y2<0 || y2>=w) && depth[x2*w+y2]<z2){
48 |                 depth[x2*w+y2]=z2;
49 |                 double intensity=min(1.0,(z2-zmin)/(zmax-zmin)*0.7+0.3);
50 |                 show[(x2*w+y2)*3+0]=pattern[j].b*c2[i]*intensity;
51 |                 show[(x2*w+y2)*3+1]=pattern[j].g*c0[i]*intensity;
52 |                 show[(x2*w+y2)*3+2]=pattern[j].r*c1[i]*intensity;
53 |             }
54 |         }
55 |     }
56 | }
57 | 
58 | }//extern "C"
59 | 


--------------------------------------------------------------------------------
/tf_ops/grouping/test/selection_sort.cu:
--------------------------------------------------------------------------------
 1 | #include <cstdio>
 2 | #include <ctime>
 3 | #include <cstring> // memset
 4 | #include <cstdlib> // rand, RAND_MAX
 5 | #include <cmath> // sqrtf
 6 | #include <string>
 7 | #include <vector>
 8 | using namespace std;
 9 | float randomf(){
10 |     return (rand()+0.5)/(RAND_MAX+1.0);
11 | }
12 | static double get_time(){
13 |     timespec tp;
14 |     clock_gettime(CLOCK_MONOTONIC,&tp);
15 |     return tp.tv_sec+tp.tv_nsec*1e-9;
16 | }
17 | 
18 | // input: k (1), distance matrix dist (b,m,n)
19 | // output: idx (b,m,k), val (b,m,k)
20 | __global__ void selection_sort_gpu(int b, int n, int m, int k, float *dist, int *idx, float *val) {
21 |     int batch_index = blockIdx.x;
22 |     dist+=m*n*batch_index;
23 |     idx+=m*k*batch_index;
24 |     val+=m*k*batch_index;
25 | 
26 |     int index = threadIdx.x;
27 |     int stride = blockDim.x;
28 | 
29 |     float *p_dist;
30 |     for (int j=index;j<m;j+=stride) {
31 |         p_dist = dist+j*n;
32 |         // selection sort for the first k elements
33 |         for (int s=0;s<k;++s) {
34 |             int min=s; 
35 |             // find the min
36 |             for (int t=s+1;t<n;++t) {
37 |                 if (p_dist[t]<p_dist[min]) {
38 |                     min = t;
39 |                 }
40 |             }
41 |             // update idx and val
42 |             idx[j*n+s] = min;
43 |             val[j*n+s] = p_dist[min];
44 |             // swap min-th and i-th element
45 |             float tmp = p_dist[min];
46 |             p_dist[min] = p_dist[s];
47 |             p_dist[s] = tmp;
48 |         }
49 |     }
50 | }
51 | 
52 | int main()
53 | {
54 |     //int b=32,n=10000,m=1000,k=128;
55 |     int b=32,n=2048,m=512,k=128;
56 |     float *dist;
57 |     int *idx;
58 |     float *val;
59 |     cudaMallocManaged(&dist, b*m*n*sizeof(float));
60 |     cudaMallocManaged(&idx, b*m*k*sizeof(int));
61 |     cudaMallocManaged(&val, b*m*k*sizeof(float));
62 |     cudaMemset(idx, 0, sizeof(int)*b*m*k);
63 |     for (int i=0;i<b*n*m;i++)
64 |         dist[i]=randomf();
65 | 
66 |     double t0=get_time();
67 |     selection_sort_gpu<<<b,256>>>(b,n,m,k,dist,idx,val);
68 |     cudaDeviceSynchronize();
69 |     printf("selection sort cpu time %f\n",get_time()-t0);
70 | 
71 |     return 0;
72 | }
73 | 


--------------------------------------------------------------------------------
/tf_ops/3d_interpolation/tf_interpolate.py~:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from tensorflow.python.framework import ops
 3 | import sys
 4 | import os
 5 | BASE_DIR = os.path.dirname(__file__)
 6 | sys.path.append(BASE_DIR)
 7 | interpolate_module=tf.load_op_library(os.path.join(BASE_DIR, 'tf_interpolate_so.so'))
 8 | def three_nn(xyz1, xyz2):
 9 |     '''
10 |     Input:
11 |         xyz1: (b,n,3) float32 array, unknown points
12 |         xyz2: (b,m,3) float32 array, known points
13 |     Output:
14 |         dist: (b,n,3) float32 array, distances to known points
15 |         idx: (b,n,3) int32 array, indices to known points
16 |     '''
17 |     return interpolate_module.three_nn(xyz1, xyz2)
18 | ops.NoGradient('ThreeNN')
19 | def three_interpolate(points, idx, weight):
20 |     '''
21 |     Input:
22 |         points: (b,m,c) float32 array, known points
23 |         idx: (b,n,3) int32 array, indices to known points
24 |         weight: (b,n,3) float32 array, weights on known points
25 |     Output:
26 |         out: (b,n,c) float32 array, interpolated point values
27 |     '''
28 |     return interpolate_module.three_interpolate(points, idx, weight)
29 | @tf.RegisterGradient('ThreeInterpolate')
30 | def _three_interpolate_grad(op, grad_out):
31 |     points = op.inputs[0]
32 |     idx = op.inputs[1]
33 |     weight = op.inputs[2]
34 |     return [interpolate_module.three_interpolate_grad(points, idx, weight, grad_out), None, None]
35 | 
36 | if __name__=='__main__':
37 |     import numpy as np
38 |     import time
39 |     np.random.seed(100)
40 |     pts = np.random.random((32,128,64)).astype('float32')
41 |     tmp1 = np.random.random((32,512,3)).astype('float32')
42 |     tmp2 = np.random.random((32,128,3)).astype('float32')
43 |     with tf.device('/cpu:0'):
44 |         points = tf.constant(pts)
45 |         xyz1 = tf.constant(tmp1)
46 |         xyz2 = tf.constant(tmp2)
47 |         dist, idx = three_nn(xyz1, xyz2)
48 |         weight = tf.ones_like(dist)/3.0
49 |         interpolated_points = three_interpolate(points, idx, weight)
50 |     with tf.Session('') as sess:
51 |         now = time.time() 
52 |         for _ in range(100):
53 |             ret = sess.run(interpolated_points)
54 |         print time.time() - now
55 |         print ret.shape, ret.dtype
56 |         #print ret
57 |     
58 |     
59 |     
60 | 


--------------------------------------------------------------------------------
/tf_ops/3d_interpolation/tf_interpolate.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from tensorflow.python.framework import ops
 3 | import sys
 4 | import os
 5 | BASE_DIR = os.path.dirname(__file__)
 6 | sys.path.append(BASE_DIR)
 7 | interpolate_module=tf.load_op_library(os.path.join(BASE_DIR, 'tf_interpolate_so.so'))
 8 | def three_nn(xyz1, xyz2):
 9 |     '''
10 |     Input:
11 |         xyz1: (b,n,3) float32 array, unknown points
12 |         xyz2: (b,m,3) float32 array, known points
13 |     Output:
14 |         dist: (b,n,3) float32 array, distances to known points
15 |         idx: (b,n,3) int32 array, indices to known points
16 |     '''
17 |     return interpolate_module.three_nn(xyz1, xyz2)
18 | ops.NoGradient('ThreeNN')
19 | def three_interpolate(points, idx, weight):
20 |     '''
21 |     Input:
22 |         points: (b,m,c) float32 array, known points
23 |         idx: (b,n,3) int32 array, indices to known points
24 |         weight: (b,n,3) float32 array, weights on known points
25 |     Output:
26 |         out: (b,n,c) float32 array, interpolated point values
27 |     '''
28 |     return interpolate_module.three_interpolate(points, idx, weight)
29 | @tf.RegisterGradient('ThreeInterpolate')
30 | def _three_interpolate_grad(op, grad_out):
31 |     points = op.inputs[0]
32 |     idx = op.inputs[1]
33 |     weight = op.inputs[2]
34 |     return [interpolate_module.three_interpolate_grad(points, idx, weight, grad_out), None, None]
35 | 
36 | if __name__=='__main__':
37 |     import numpy as np
38 |     import time
39 |     np.random.seed(100)
40 |     pts = np.random.random((32,128,64)).astype('float32')
41 |     tmp1 = np.random.random((32,512,3)).astype('float32')
42 |     tmp2 = np.random.random((32,128,3)).astype('float32')
43 |     with tf.device('/cpu:0'):
44 |         points = tf.constant(pts)
45 |         xyz1 = tf.constant(tmp1)
46 |         xyz2 = tf.constant(tmp2)
47 |         dist, idx = three_nn(xyz1, xyz2)
48 |         weight = tf.ones_like(dist)/3.0
49 |         interpolated_points = three_interpolate(points, idx, weight)
50 |     with tf.Session('') as sess:
51 |         now = time.time() 
52 |         for _ in range(100):
53 |             ret = sess.run(interpolated_points)
54 |         print (time.time() - now)
55 |         print (ret.shape, ret.dtype)
56 |         #print ret
57 |     
58 |     
59 |     
60 | 


--------------------------------------------------------------------------------
/tf_ops/sampling/tf_edge.py:
--------------------------------------------------------------------------------
 1 | """ Edge detection Layers
 2 | 
 3 | Author: Charles R. Qi
 4 | Date: November 2017
 5 | """
 6 | 
 7 | import os
 8 | import sys
 9 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
10 | ROOT_DIR = os.path.dirname(BASE_DIR)
11 | sys.path.append(os.path.join(ROOT_DIR, 'utils'))
12 | sys.path.append(os.path.join(ROOT_DIR, 'tf_ops/sampling'))
13 | sys.path.append(os.path.join(ROOT_DIR, 'tf_ops/grouping'))
14 | sys.path.append(os.path.join(ROOT_DIR, 'tf_ops/3d_interpolation'))
15 | from tf_sampling import farthest_point_sample, gather_point
16 | from tf_grouping import query_ball_point, group_point, knn_point
17 | from tf_interpolate import three_nn, three_interpolate
18 | import tensorflow as tf
19 | import numpy as np
20 | import tf_util
21 | 
22 | 
23 | 
24 | def sample_edge(npoint, radius, nsample, xyz, points):
25 |     '''
26 |     Input:
27 |         npoint: int32
28 |         radius: float32
29 |         nsample: int32
30 |         xyz: (batch_size, ndataset, 3) TF tensor
31 |         points: (batch_size, ndataset, channel) TF tensor, if None will just use xyz as points
32 | 
33 |     Output:
34 |         new_xyz: (batch_size, npoint, 3) TF tensor
35 |         new_points: (batch_size, npoint, nsample, 3+channel) TF tensor
36 |         idx: (batch_size, npoint, nsample) TF tensor, indices of local points as in ndataset points
37 |         grouped_xyz: (batch_size, npoint, nsample, 3) TF tensor, normalized point XYZs
38 |             (subtracted by seed point XYZ) in local regions
39 |     '''
40 |     new_xyz = gather_edge(xyz, farthest_point_sample(npoint, xyz)) 
41 |     new_xyz = gather_point(xyz, farthest_point_sample(npoint, xyz)) 
42 |     if knn:
43 |         #print('using knn')
44 |         _,idx = knn_point(nsample, xyz, new_xyz)
45 |     else:
46 |         idx, pts_cnt = query_ball_point(radius, nsample, xyz, new_xyz)
47 |     grouped_xyz = group_point(xyz, idx) # (batch_size, npoint, nsample, 3)
48 |     grouped_xyz -= tf.tile(tf.expand_dims(new_xyz, 2), [1,1,nsample,1]) # translation normalization
49 |     #print('grouped points',grouped_xyz.shape)
50 |     if points is not None:
51 |         grouped_points = group_point(points, idx) # (batch_size, npoint, nsample, channel)
52 |         if use_xyz:
53 |             new_points = tf.concat([grouped_xyz, grouped_points], axis=-1) # (batch_size, npoint, nample, 3+channel)
54 |         else:
55 |             new_points = grouped_points
56 |     else:
57 |         new_points = grouped_xyz
58 |     #print('new points',new_points.shape)
59 |     return new_xyz, new_points, idx, grouped_xyz
60 | 
61 | 


--------------------------------------------------------------------------------
/models/pointnet2_cls_msg.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | BASE_DIR = os.path.dirname(__file__)
 4 | sys.path.append(BASE_DIR)
 5 | sys.path.append(os.path.join(BASE_DIR, '../utils'))
 6 | import tensorflow as tf
 7 | import numpy as np
 8 | import tf_util
 9 | from pointnet_util import pointnet_sa_module, pointnet_sa_module_msg
10 | 
11 | def placeholder_inputs(batch_size, num_point):
12 |     pointclouds_pl = tf.placeholder(tf.float32, shape=(batch_size, num_point, 3))
13 |     labels_pl = tf.placeholder(tf.int32, shape=(batch_size))
14 |     return pointclouds_pl, labels_pl
15 | 
16 | 
17 | def get_model(point_cloud, is_training, bn_decay=None):
18 |     """ Classification PointNet, input is BxNx3, output Bx40 """
19 |     batch_size = point_cloud.get_shape()[0].value
20 |     num_point = point_cloud.get_shape()[1].value
21 |     end_points = {}
22 | 
23 |     l0_xyz = point_cloud
24 |     l0_points = None
25 | 
26 |     # Set abstraction layers
27 |     l1_xyz, l1_points = pointnet_sa_module_msg(l0_xyz, l0_points, 512, [0.1,0.2,0.4], [16,32,128], [[32,32,64], [64,64,128], [64,96,128]], is_training, bn_decay, scope='layer1', use_nchw=True)
28 |     l2_xyz, l2_points = pointnet_sa_module_msg(l1_xyz, l1_points, 128, [0.2,0.4,0.8], [32,64,128], [[64,64,128], [128,128,256], [128,128,256]], is_training, bn_decay, scope='layer2')
29 |     l3_xyz, l3_points, _ = pointnet_sa_module(l2_xyz, l2_points, npoint=None, radius=None, nsample=None, mlp=[256,512,1024], mlp2=None, group_all=True, is_training=is_training, bn_decay=bn_decay, scope='layer3')
30 | 
31 |     # Fully connected layers
32 |     net = tf.reshape(l3_points, [batch_size, -1])
33 |     net = tf_util.fully_connected(net, 512, bn=True, is_training=is_training, scope='fc1', bn_decay=bn_decay)
34 |     net = tf_util.dropout(net, keep_prob=0.4, is_training=is_training, scope='dp1')
35 |     net = tf_util.fully_connected(net, 256, bn=True, is_training=is_training, scope='fc2', bn_decay=bn_decay)
36 |     net = tf_util.dropout(net, keep_prob=0.4, is_training=is_training, scope='dp2')
37 |     net = tf_util.fully_connected(net, 40, activation_fn=None, scope='fc3')
38 | 
39 |     return net, end_points
40 | 
41 | 
42 | def get_loss(pred, label, end_points):
43 |     """ pred: B*NUM_CLASSES,
44 |         label: B, """
45 |     loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=pred, labels=label)
46 |     classify_loss = tf.reduce_mean(loss)
47 |     tf.summary.scalar('classify loss', classify_loss)
48 |     tf.add_to_collection('losses', classify_loss)
49 |     return classify_loss
50 | 
51 | 
52 | if __name__=='__main__':
53 |     with tf.Graph().as_default():
54 |         inputs = tf.zeros((32,1024,3))
55 |         net, _ = get_model(inputs, tf.constant(True))
56 |         print(net)
57 | 


--------------------------------------------------------------------------------
/tf_ops/sampling/tf_edge.py~:
--------------------------------------------------------------------------------
 1 | """ Edge detection Layers
 2 | 
 3 | Author: Charles R. Qi
 4 | Date: November 2017
 5 | """
 6 | 
 7 | import os
 8 | import sys
 9 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
10 | ROOT_DIR = os.path.dirname(BASE_DIR)
11 | sys.path.append(os.path.join(ROOT_DIR, 'utils'))
12 | sys.path.append(os.path.join(ROOT_DIR, 'tf_ops/sampling'))
13 | sys.path.append(os.path.join(ROOT_DIR, 'tf_ops/grouping'))
14 | sys.path.append(os.path.join(ROOT_DIR, 'tf_ops/3d_interpolation'))
15 | from tf_sampling import farthest_point_sample, gather_point
16 | from tf_grouping import query_ball_point, group_point, knn_point
17 | from tf_interpolate import three_nn, three_interpolate
18 | import tensorflow as tf
19 | import numpy as np
20 | import tf_util
21 | 
22 | 
23 | 
24 | def sample_and_group_edge(npoint, radius, nsample, xyz, points, knn=False, use_xyz=True):
25 |     '''
26 |     Input:
27 |         npoint: int32
28 |         radius: float32
29 |         nsample: int32
30 |         xyz: (batch_size, ndataset, 3) TF tensor
31 |         points: (batch_size, ndataset, channel) TF tensor, if None will just use xyz as points
32 |         knn: bool, if True use kNN instead of radius search
33 |         use_xyz: bool, if True concat XYZ with local point features, otherwise just use point features
34 |     Output:
35 |         new_xyz: (batch_size, npoint, 3) TF tensor
36 |         new_points: (batch_size, npoint, nsample, 3+channel) TF tensor
37 |         idx: (batch_size, npoint, nsample) TF tensor, indices of local points as in ndataset points
38 |         grouped_xyz: (batch_size, npoint, nsample, 3) TF tensor, normalized point XYZs
39 |             (subtracted by seed point XYZ) in local regions
40 |     '''
41 |     new_xyz = gather_edge(xyz, farthest_point_sample(npoint, xyz)) # (batch_size, npoint, 3)
42 |     new_xyz = gather_point(xyz, farthest_point_sample(npoint, xyz)) # (batch_size, npoint, 3)
43 |     if knn:
44 |         #print('using knn')
45 |         _,idx = knn_point(nsample, xyz, new_xyz)
46 |     else:
47 |         idx, pts_cnt = query_ball_point(radius, nsample, xyz, new_xyz)
48 |     grouped_xyz = group_point(xyz, idx) # (batch_size, npoint, nsample, 3)
49 |     grouped_xyz -= tf.tile(tf.expand_dims(new_xyz, 2), [1,1,nsample,1]) # translation normalization
50 |     #print('grouped points',grouped_xyz.shape)
51 |     if points is not None:
52 |         grouped_points = group_point(points, idx) # (batch_size, npoint, nsample, channel)
53 |         if use_xyz:
54 |             new_points = tf.concat([grouped_xyz, grouped_points], axis=-1) # (batch_size, npoint, nample, 3+channel)
55 |         else:
56 |             new_points = grouped_points
57 |     else:
58 |         new_points = grouped_xyz
59 |     #print('new points',new_points.shape)
60 |     return new_xyz, new_points, idx, grouped_xyz
61 | 
62 | 


--------------------------------------------------------------------------------
/tf_ops/grouping/test/selection_sort_const.cu:
--------------------------------------------------------------------------------
 1 | #include <cstdio>
 2 | #include <ctime>
 3 | #include <cstring> // memset
 4 | #include <cstdlib> // rand, RAND_MAX
 5 | #include <cmath> // sqrtf
 6 | #include <string>
 7 | #include <vector>
 8 | using namespace std;
 9 | float randomf(){
10 |     return (rand()+0.5)/(RAND_MAX+1.0);
11 | }
12 | static double get_time(){
13 |     timespec tp;
14 |     clock_gettime(CLOCK_MONOTONIC,&tp);
15 |     return tp.tv_sec+tp.tv_nsec*1e-9;
16 | }
17 | 
18 | // input: k (1), distance matrix dist (b,m,n)
19 | // output: idx (b,m,n), dist_out (b,m,n)
20 | __global__ void selection_sort_gpu(int b, int n, int m, int k, const float *dist, int *outi, float *out) {
21 |     int batch_index = blockIdx.x;
22 |     dist+=m*n*batch_index;
23 |     outi+=m*n*batch_index;
24 |     out+=m*n*batch_index;
25 | 
26 |     int index = threadIdx.x;
27 |     int stride = blockDim.x;
28 | 
29 |     // copy from dist to dist_out
30 |     for (int j=index;j<m;j+=stride) {
31 |         for (int s=0;s<n;++s) {
32 |             out[j*n+s] = dist[j*n+s];
33 |             outi[j*n+s] = s;
34 |         }
35 |     }
36 | 
37 |     float *p_dist;
38 |     for (int j=index;j<m;j+=stride) {
39 |         p_dist = out+j*n;
40 |         // selection sort for the first k elements
41 |         for (int s=0;s<k;++s) {
42 |             int min=s; 
43 |             // find the min
44 |             for (int t=s+1;t<n;++t) {
45 |                 if (p_dist[t]<p_dist[min]) {
46 |                     min = t;
47 |                 }
48 |             }
49 |             // swap min-th and i-th element
50 |             if (min!=s) {
51 |                 float tmp = p_dist[min];
52 |                 p_dist[min] = p_dist[s];
53 |                 p_dist[s] = tmp;
54 |                 int tmpi = outi[j*n+min];
55 |                 outi[j*n+min] = outi[j*n+s];
56 |                 outi[j*n+s] = tmpi;
57 |             }
58 |         }
59 |     }
60 | }
61 | 
62 | int main()
63 | {
64 |     //int b=32,n=10000,m=1000,k=128;
65 |     int b=32,n=2048,m=512,k=128;
66 |     //int b=2,n=4,m=2,k=3;
67 |     float *dist;
68 |     int *idx;
69 |     float *dist_out;
70 |     cudaMallocManaged(&dist, b*m*n*sizeof(float));
71 |     cudaMallocManaged(&idx, b*m*n*sizeof(int));
72 |     cudaMallocManaged(&dist_out, b*m*n*sizeof(float));
73 |     cudaMemset(idx, 0, sizeof(int)*b*m*n);
74 |     for (int i=0;i<b*n*m;i++)
75 |         dist[i]=randomf();
76 |     //for (int i=0;i<b*n*m;i++) {
77 |     //    dist[i] = float(10-i);
78 |     //    printf("%f ", dist[i]);
79 |     //}
80 |     //printf("\n");
81 | 
82 |     double t0=get_time();
83 |     selection_sort_gpu<<<b,256>>>(b,n,m,k,dist,idx,dist_out);
84 |     cudaDeviceSynchronize();
85 |     printf("selection sort cpu time %f\n",get_time()-t0);
86 |     
87 |     //for (int i=0;i<b*n*m;++i)
88 |     //    printf("%d ", idx[i]);
89 |     //printf("\n");
90 | 
91 |     return 0;
92 | }
93 | 


--------------------------------------------------------------------------------
/tf_ops/grouping/test/selection_sort.cpp:
--------------------------------------------------------------------------------
 1 | #include <cstdio>
 2 | #include <ctime>
 3 | #include <cstring> // memset
 4 | #include <cstdlib> // rand, RAND_MAX
 5 | #include <cmath> // sqrtf
 6 | #include <string>
 7 | #include <vector>
 8 | using namespace std;
 9 | float randomf(){
10 |     return (rand()+0.5)/(RAND_MAX+1.0);
11 | }
12 | static double get_time(){
13 |     timespec tp;
14 |     clock_gettime(CLOCK_MONOTONIC,&tp);
15 |     return tp.tv_sec+tp.tv_nsec*1e-9;
16 | }
17 | 
18 | // input: k (1), distance matrix dist (b,m,n)
19 | // output: idx (b,m,n), val (b,m,n)
20 | void selection_sort_cpu(int b, int n, int m, int k, const float *dist, int *idx, float *val) {
21 |     float *p_dist;
22 |     float tmp;
23 |     int tmpi;
24 |     for (int i=0;i<b;++i) {
25 |         for (int j=0;j<m;++j) {
26 |             for (int s=0;s<n;++s) {
27 |                 val[i*m*n+j*n+s] = dist[i*m*n+j*n+s];
28 |                 idx[i*m*n+j*n+s] = s;
29 |             }
30 |         }
31 |     }
32 | 
33 |     for (int i=0;i<b;++i) {
34 |         for (int j=0;j<m;++j) {
35 |             for (int s=0;s<n;++s)
36 |                 printf("%f ", dist[i*m*n+j*n+s]);
37 |             printf("\n");
38 |             p_dist = val+j*n;
39 |             // selection sort for the first k elements
40 |             for (int s=0;s<k;++s) {
41 |                 int min=s; 
42 |                 // find the min
43 |                 for (int t=s+1;t<n;++t) {
44 |                     if (p_dist[t]<p_dist[min]) {
45 |                         min = t;
46 |                     }
47 |                 }
48 |                 printf("%d\n", min);
49 |                 // swap min-th and i-th element
50 |                 if (min!=s) {
51 |                     tmp = p_dist[min];
52 |                     p_dist[min] = p_dist[s];
53 |                     p_dist[s] = tmp;
54 |                     tmpi = idx[j*n+min];
55 |                     idx[j*n+min] = idx[j*n+s];
56 |                     idx[j*n+s] = tmpi;
57 |                 }       
58 |             }
59 |         }
60 |         idx+=m*n;
61 |         val+=m*n;
62 |     }
63 | }
64 | 
65 | int main()
66 | {
67 |     //int b=32,n=10000,m=1000,k=128;
68 |     int b=2,n=4,m=2,k=3;
69 |     float *dist=new float[b*m*n];
70 |     int *idx=new int[b*m*n];
71 |     float *val=new float[b*m*n];
72 |     memset(idx, 0, sizeof(int)*b*m*n);
73 |     //for (int i=0;i<b*n*m;i++)
74 |     //    dist[i]=randomf();
75 |     for (int i=0;i<b*n*m;i++) {
76 |         dist[i] = float(10-i);
77 |         printf("%f ", dist[i]);
78 |     }
79 |     printf("\n");
80 | 
81 | 
82 | 
83 |     double t0=get_time();
84 |     selection_sort_cpu(b,n,m,k,dist,idx,val);
85 |     printf("selection sort cpu time %f\n",get_time()-t0);
86 | 
87 |     for (int i=0;i<b*n*m;++i)
88 |         printf("%d ", idx[i]);
89 |     printf("\n");
90 |     for (int i=0;i<b*n*m;++i)
91 |         printf("%f ", val[i]);
92 |     printf("\n");
93 |     return 0;
94 | }
95 | 


--------------------------------------------------------------------------------
/models/pointnet2_cls_ssg.py:
--------------------------------------------------------------------------------
 1 | """
 2 |     PointNet++ Model for point clouds classification
 3 | """
 4 | 
 5 | import os
 6 | import sys
 7 | BASE_DIR = os.path.dirname(__file__)
 8 | sys.path.append(BASE_DIR)
 9 | sys.path.append(os.path.join(BASE_DIR, '../utils'))
10 | import tensorflow as tf
11 | import numpy as np
12 | import tf_util
13 | from pointnet_util import pointnet_sa_module
14 | 
15 | def placeholder_inputs(batch_size, num_point):
16 |     pointclouds_pl = tf.placeholder(tf.float32, shape=(batch_size, num_point, 3))
17 |     labels_pl = tf.placeholder(tf.int32, shape=(batch_size))
18 |     return pointclouds_pl, labels_pl
19 | 
20 | def get_model(point_cloud, is_training, bn_decay=None):
21 |     """ Classification PointNet, input is BxNx3, output Bx40 """
22 |     batch_size = point_cloud.get_shape()[0].value
23 |     num_point = point_cloud.get_shape()[1].value
24 |     end_points = {}
25 |     l0_xyz = point_cloud
26 |     l0_points = None
27 |     end_points['l0_xyz'] = l0_xyz
28 | 
29 |     # Set abstraction layers
30 |     # Note: When using NCHW for layer 2, we see increased GPU memory usage (in TF1.4).
31 |     # So we only use NCHW for layer 1 until this issue can be resolved.
32 |     l1_xyz, l1_points, l1_indices = pointnet_sa_module(l0_xyz, l0_points, npoint=512, radius=0.2, nsample=32, mlp=[64,64,128], mlp2=None, group_all=False, is_training=is_training, bn_decay=bn_decay, scope='layer1', use_nchw=True)
33 |     l2_xyz, l2_points, l2_indices = pointnet_sa_module(l1_xyz, l1_points, npoint=128, radius=0.4, nsample=64, mlp=[128,128,256], mlp2=None, group_all=False, is_training=is_training, bn_decay=bn_decay, scope='layer2')
34 |     l3_xyz, l3_points, l3_indices = pointnet_sa_module(l2_xyz, l2_points, npoint=None, radius=None, nsample=None, mlp=[256,512,1024], mlp2=None, group_all=True, is_training=is_training, bn_decay=bn_decay, scope='layer3')
35 | 
36 |     # Fully connected layers
37 |     net = tf.reshape(l3_points, [batch_size, -1])
38 |     net = tf_util.fully_connected(net, 512, bn=True, is_training=is_training, scope='fc1', bn_decay=bn_decay)
39 |     net = tf_util.dropout(net, keep_prob=0.5, is_training=is_training, scope='dp1')
40 |     net = tf_util.fully_connected(net, 256, bn=True, is_training=is_training, scope='fc2', bn_decay=bn_decay)
41 |     net = tf_util.dropout(net, keep_prob=0.5, is_training=is_training, scope='dp2')
42 |     net = tf_util.fully_connected(net, 40, activation_fn=None, scope='fc3')
43 | 
44 |     return net, end_points
45 | 
46 | 
47 | def get_loss(pred, label, end_points):
48 |     """ pred: B*NUM_CLASSES,
49 |         label: B, """
50 |     loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=pred, labels=label)
51 |     classify_loss = tf.reduce_mean(loss)
52 |     tf.summary.scalar('classify loss', classify_loss)
53 |     tf.add_to_collection('losses', classify_loss)
54 |     return classify_loss
55 | 
56 | 
57 | if __name__=='__main__':
58 |     with tf.Graph().as_default():
59 |         inputs = tf.zeros((32,1024,3))
60 |         output, _ = get_model(inputs, tf.constant(True))
61 |         print(output)
62 | 


--------------------------------------------------------------------------------
/models/pointnet2_part_seg.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | BASE_DIR = os.path.dirname(__file__)
 4 | sys.path.append(BASE_DIR)
 5 | sys.path.append(os.path.join(BASE_DIR, '../utils'))
 6 | import tensorflow as tf
 7 | import numpy as np
 8 | import tf_util
 9 | from pointnet_util import pointnet_sa_module, pointnet_fp_module
10 | 
11 | def placeholder_inputs(batch_size, num_point):
12 |     pointclouds_pl = tf.placeholder(tf.float32, shape=(batch_size, num_point, 6))
13 |     labels_pl = tf.placeholder(tf.int32, shape=(batch_size, num_point))
14 |     return pointclouds_pl, labels_pl
15 | 
16 | 
17 | def get_model(point_cloud, is_training, bn_decay=None):
18 |     """ Part segmentation PointNet, input is BxNx6 (XYZ NormalX NormalY NormalZ), output Bx50 """
19 |     batch_size = point_cloud.get_shape()[0].value
20 |     num_point = point_cloud.get_shape()[1].value
21 |     end_points = {}
22 |     l0_xyz = tf.slice(point_cloud, [0,0,0], [-1,-1,3])
23 |     l0_points = tf.slice(point_cloud, [0,0,3], [-1,-1,3])
24 | 
25 |     # Set Abstraction layers
26 |     l1_xyz, l1_points, l1_indices = pointnet_sa_module(l0_xyz, l0_points, npoint=512, radius=0.2, nsample=64, mlp=[64,64,128], mlp2=None, group_all=False, is_training=is_training, bn_decay=bn_decay, scope='layer1')
27 |     l2_xyz, l2_points, l2_indices = pointnet_sa_module(l1_xyz, l1_points, npoint=128, radius=0.4, nsample=64, mlp=[128,128,256], mlp2=None, group_all=False, is_training=is_training, bn_decay=bn_decay, scope='layer2')
28 |     l3_xyz, l3_points, l3_indices = pointnet_sa_module(l2_xyz, l2_points, npoint=None, radius=None, nsample=None, mlp=[256,512,1024], mlp2=None, group_all=True, is_training=is_training, bn_decay=bn_decay, scope='layer3')
29 | 
30 |     # Feature Propagation layers
31 |     l2_points = pointnet_fp_module(l2_xyz, l3_xyz, l2_points, l3_points, [256,256], is_training, bn_decay, scope='fa_layer1')
32 |     l1_points = pointnet_fp_module(l1_xyz, l2_xyz, l1_points, l2_points, [256,128], is_training, bn_decay, scope='fa_layer2')
33 |     l0_points = pointnet_fp_module(l0_xyz, l1_xyz, tf.concat([l0_xyz,l0_points],axis=-1), l1_points, [128,128,128], is_training, bn_decay, scope='fa_layer3')
34 | 
35 |     # FC layers
36 |     net = tf_util.conv1d(l0_points, 128, 1, padding='VALID', bn=True, is_training=is_training, scope='fc1', bn_decay=bn_decay)
37 |     end_points['feats'] = net 
38 |     net = tf_util.dropout(net, keep_prob=0.5, is_training=is_training, scope='dp1')
39 |     net = tf_util.conv1d(net, 50, 1, padding='VALID', activation_fn=None, scope='fc2')
40 | 
41 |     return net, end_points
42 | 
43 | 
44 | def get_loss(pred, label):
45 |     """ pred: BxNxC,
46 |         label: BxN, """
47 |     loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=pred, labels=label)
48 |     classify_loss = tf.reduce_mean(loss)
49 |     tf.summary.scalar('classify loss', classify_loss)
50 |     tf.add_to_collection('losses', classify_loss)
51 |     return classify_loss
52 | 
53 | if __name__=='__main__':
54 |     with tf.Graph().as_default():
55 |         inputs = tf.zeros((32,2048,6))
56 |         net, _ = get_model(inputs, tf.constant(True))
57 |         print(net)
58 | 


--------------------------------------------------------------------------------
/tf_ops/sampling/tf_sampling.py~:
--------------------------------------------------------------------------------
 1 | ''' Furthest point sampling
 2 | Original author: Haoqiang Fan
 3 | Modified by Charles R. Qi
 4 | All Rights Reserved. 2017. 
 5 | '''
 6 | import tensorflow as tf
 7 | from tensorflow.python.framework import ops
 8 | import sys
 9 | import os
10 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
11 | sys.path.append(BASE_DIR)
12 | sampling_module=tf.load_op_library(os.path.join(BASE_DIR, 'tf_sampling_so.so'))
13 | def prob_sample(inp,inpr):
14 |     '''
15 | input:
16 |     batch_size * ncategory float32
17 |     batch_size * npoints   float32
18 | returns:
19 |     batch_size * npoints   int32
20 |     '''
21 |     return sampling_module.prob_sample(inp,inpr)
22 | ops.NoGradient('ProbSample')
23 | # TF1.0 API requires set shape in C++
24 | #@tf.RegisterShape('ProbSample')
25 | #def _prob_sample_shape(op):
26 | #    shape1=op.inputs[0].get_shape().with_rank(2)
27 | #    shape2=op.inputs[1].get_shape().with_rank(2)
28 | #    return [tf.TensorShape([shape2.dims[0],shape2.dims[1]])]
29 | def gather_point(inp,idx):
30 |     '''
31 | input:
32 |     batch_size * ndataset * 3   float32
33 |     batch_size * npoints        int32
34 | returns:
35 |     batch_size * npoints * 3    float32
36 |     '''
37 |     return sampling_module.gather_point(inp,idx)
38 | #@tf.RegisterShape('GatherPoint')
39 | #def _gather_point_shape(op):
40 | #    shape1=op.inputs[0].get_shape().with_rank(3)
41 | #    shape2=op.inputs[1].get_shape().with_rank(2)
42 | #    return [tf.TensorShape([shape1.dims[0],shape2.dims[1],shape1.dims[2]])]
43 | @tf.RegisterGradient('GatherPoint')
44 | def _gather_point_grad(op,out_g):
45 |     inp=op.inputs[0]
46 |     idx=op.inputs[1]
47 |     return [sampling_module.gather_point_grad(inp,idx,out_g),None]
48 | def farthest_point_sample(npoint,inp):
49 |     '''
50 | input:
51 |     int32
52 |     batch_size * ndataset * 3   float32
53 | returns:
54 |     batch_size * npoint         int32
55 |     '''
56 |     return sampling_module.farthest_point_sample(inp, npoint)
57 | ops.NoGradient('FarthestPointSample')
58 |     
59 | 
60 | if __name__=='__main__':
61 |     import numpy as np
62 |     np.random.seed(100)
63 |     triangles=np.random.rand(1,5,3,3).astype('float32')
64 |     with tf.device('/gpu:1'):
65 |         inp=tf.constant(triangles)
66 |         tria=inp[:,:,0,:]
67 |         trib=inp[:,:,1,:]
68 |         tric=inp[:,:,2,:]
69 |         areas=tf.sqrt(tf.reduce_sum(tf.cross(trib-tria,tric-tria)**2,2)+1e-9)
70 |         randomnumbers=tf.random_uniform((1,8192))
71 |         triids=prob_sample(areas,randomnumbers)
72 |         tria_sample=gather_point(tria,triids)
73 |         trib_sample=gather_point(trib,triids)
74 |         tric_sample=gather_point(tric,triids)
75 |         us=tf.random_uniform((1,8192))
76 |         vs=tf.random_uniform((1,8192))
77 |         uplusv=1-tf.abs(us+vs-1)
78 |         uminusv=us-vs
79 |         us=(uplusv+uminusv)*0.5
80 |         vs=(uplusv-uminusv)*0.5
81 |         pt_sample=tria_sample+(trib_sample-tria_sample)*tf.expand_dims(us,-1)+(tric_sample-tria_sample)*tf.expand_dims(vs,-1)
82 |         print 'pt_sample: ', pt_sample
83 |         reduced_sample=gather_point(pt_sample,farthest_point_sample(1024,pt_sample))
84 |         print reduced_sample
85 |     with tf.Session('') as sess:
86 |         ret=sess.run(reduced_sample)
87 |     print ret.shape,ret.dtype
88 |     import cPickle as pickle
89 |     pickle.dump(ret,open('1.pkl','wb'),-1)
90 | 


--------------------------------------------------------------------------------
/models/pointnet2_cls_pose.py:
--------------------------------------------------------------------------------
 1 | """
 2 |     PointNet++ Model for point clouds pose detection
 3 | """
 4 | 
 5 | import os
 6 | import sys
 7 | BASE_DIR = os.path.dirname(__file__)
 8 | sys.path.append(BASE_DIR)
 9 | sys.path.append(os.path.join(BASE_DIR, '../utils'))
10 | import tensorflow as tf
11 | import numpy as np
12 | import tf_util
13 | from pointnet_util import pointnet_sa_module
14 | from sklearn.metrics import mean_squared_error
15 | 
16 | def placeholder_inputs(batch_size, num_point):
17 |     pointclouds_pl = tf.placeholder(tf.float32, shape=(batch_size, num_point, 3))
18 |     labels_pl = tf.placeholder(tf.float32, shape=(batch_size,6))
19 |     return pointclouds_pl, labels_pl
20 | 
21 | def get_model(point_cloud, is_training, bn_decay=None):
22 |     """ Classification PointNet, input is BxNx3, output Bx40 """
23 |     batch_size = point_cloud.get_shape()[0].value
24 |     num_point = point_cloud.get_shape()[1].value
25 |     end_points = {}
26 |     l0_xyz = point_cloud
27 |     l0_points = None
28 |     end_points['l0_xyz'] = l0_xyz
29 | 
30 |     # Set abstraction layers
31 |     # Note: When using NCHW for layer 2, we see increased GPU memory usage (in TF1.4).
32 |     # So we only use NCHW for layer 1 until this issue can be resolved.   npoint=512,128 nsample = 32, 64
33 |     l1_xyz, l1_points, l1_indices = pointnet_sa_module(l0_xyz, l0_points, npoint=512, radius=1, nsample=64, mlp=[64,64,128], mlp2=None, group_all=False, is_training=is_training, bn_decay=bn_decay, scope='layer1', use_nchw=True)
34 |     l2_xyz, l2_points, l2_indices = pointnet_sa_module(l1_xyz, l1_points, npoint=128, radius=1, nsample=32, mlp=[128,128,256], mlp2=None, group_all=False, is_training=is_training, bn_decay=bn_decay, scope='layer2')
35 |     l3_xyz, l3_points, l3_indices = pointnet_sa_module(l2_xyz, l2_points, npoint=None, radius=None, nsample=None, mlp=[256,512,1024], mlp2=None, group_all=True, is_training=is_training, bn_decay=bn_decay, scope='layer3')
36 | 
37 |     # Fully connected layers for classification
38 |     net = tf.reshape(l3_points, [batch_size, -1])
39 |     print(net.shape)
40 |     net = tf_util.fully_connected(net, 512, bn=True, is_training=is_training, scope='fc1', bn_decay=bn_decay)#512
41 |     net = tf_util.dropout(net, keep_prob=0.5, is_training=is_training, scope='dp1')
42 |     net = tf_util.fully_connected(net, 256, bn=True, is_training=is_training, scope='fc2', bn_decay=bn_decay)
43 |     net = tf_util.dropout(net, keep_prob=0.5, is_training=is_training, scope='dp2')#256
44 |     #net = tf_util.fully_connected(net, 128, bn=True, is_training=is_training, scope='fc3', bn_decay=bn_decay)
45 |     #net = tf_util.dropout(net, keep_prob=0.5, is_training=is_training, scope='dp3')#256
46 |     net = tf_util.fully_connected(net, 6, activation_fn=None, scope='fc3')#out put is 3 euler angles and 3 translation values values
47 | 
48 |     return net, end_points
49 | 
50 | 
51 | def get_loss(pred, label, end_points):
52 |     """ pred: B*NUM_CLASSES,
53 |         label: B, """
54 |     regression_loss = tf.reduce_mean(tf.abs(label-pred))#tf.square
55 |     tf.summary.scalar('regression loss', regression_loss)
56 |     tf.add_to_collection('losses', regression_loss)
57 |     return regression_loss
58 | 
59 | 
60 | if __name__=='__main__':
61 |     with tf.Graph().as_default():
62 |         inputs = tf.zeros((32,1024,3))
63 |         output, _ = get_model(inputs, tf.constant(True))
64 |         print(output)
65 | 


--------------------------------------------------------------------------------
/tf_ops/sampling/tf_sampling.py:
--------------------------------------------------------------------------------
 1 | ''' Furthest point sampling
 2 | Original author: Haoqiang Fan
 3 | Modified by Charles R. Qi
 4 | All Rights Reserved. 2017. 
 5 | '''
 6 | import tensorflow as tf
 7 | from tensorflow.python.framework import ops
 8 | import sys
 9 | import os
10 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
11 | sys.path.append(BASE_DIR)
12 | sampling_module=tf.load_op_library(os.path.join(BASE_DIR, 'tf_sampling_so.so'))
13 | def prob_sample(inp,inpr):
14 |     '''
15 | input:
16 |     batch_size * ncategory float32
17 |     batch_size * npoints   float32
18 | returns:
19 |     batch_size * npoints   int32
20 |     '''
21 |     return sampling_module.prob_sample(inp,inpr)
22 | ops.NoGradient('ProbSample')
23 | # TF1.0 API requires set shape in C++
24 | #@tf.RegisterShape('ProbSample')
25 | #def _prob_sample_shape(op):
26 | #    shape1=op.inputs[0].get_shape().with_rank(2)
27 | #    shape2=op.inputs[1].get_shape().with_rank(2)
28 | #    return [tf.TensorShape([shape2.dims[0],shape2.dims[1]])]
29 | def gather_point(inp,idx):
30 |     '''
31 | input:
32 |     batch_size * ndataset * 3   float32
33 |     batch_size * npoints        int32
34 | returns:
35 |     batch_size * npoints * 3    float32
36 |     '''
37 |     return sampling_module.gather_point(inp,idx)
38 | #@tf.RegisterShape('GatherPoint')
39 | #def _gather_point_shape(op):
40 | #    shape1=op.inputs[0].get_shape().with_rank(3)
41 | #    shape2=op.inputs[1].get_shape().with_rank(2)
42 | #    return [tf.TensorShape([shape1.dims[0],shape2.dims[1],shape1.dims[2]])]
43 | @tf.RegisterGradient('GatherPoint')
44 | def _gather_point_grad(op,out_g):
45 |     inp=op.inputs[0]
46 |     idx=op.inputs[1]
47 |     return [sampling_module.gather_point_grad(inp,idx,out_g),None]
48 | def farthest_point_sample(npoint,inp):
49 |     '''
50 | input:
51 |     int32
52 |     batch_size * ndataset * 3   float32
53 | returns:
54 |     batch_size * npoint         int32
55 |     '''
56 |     return sampling_module.farthest_point_sample(inp, npoint)
57 | ops.NoGradient('FarthestPointSample')
58 |     
59 | 
60 | if __name__=='__main__':
61 |     import numpy as np
62 |     np.random.seed(100)
63 |     triangles=np.random.rand(1,5,3,3).astype('float32')
64 |     with tf.device('/gpu:1'):
65 |         inp=tf.constant(triangles)
66 |         tria=inp[:,:,0,:]
67 |         trib=inp[:,:,1,:]
68 |         tric=inp[:,:,2,:]
69 |         areas=tf.sqrt(tf.reduce_sum(tf.cross(trib-tria,tric-tria)**2,2)+1e-9)
70 |         randomnumbers=tf.random_uniform((1,8192))
71 |         triids=prob_sample(areas,randomnumbers)
72 |         tria_sample=gather_point(tria,triids)
73 |         trib_sample=gather_point(trib,triids)
74 |         tric_sample=gather_point(tric,triids)
75 |         us=tf.random_uniform((1,8192))
76 |         vs=tf.random_uniform((1,8192))
77 |         uplusv=1-tf.abs(us+vs-1)
78 |         uminusv=us-vs
79 |         us=(uplusv+uminusv)*0.5
80 |         vs=(uplusv-uminusv)*0.5
81 |         pt_sample=tria_sample+(trib_sample-tria_sample)*tf.expand_dims(us,-1)+(tric_sample-tria_sample)*tf.expand_dims(vs,-1)
82 |         print ('pt_sample: ', pt_sample)
83 |         reduced_sample=gather_point(pt_sample,farthest_point_sample(1024,pt_sample))
84 |         print (reduced_sample)
85 |     with tf.Session('') as sess:
86 |         ret=sess.run(reduced_sample)
87 |     print (ret.shape,ret.dtype)
88 |     import cPickle as pickle
89 |     pickle.dump(ret,open('1.pkl','wb'),-1)
90 | 


--------------------------------------------------------------------------------
/models/pointnet2_sem_seg.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | BASE_DIR = os.path.dirname(__file__)
 4 | sys.path.append(BASE_DIR)
 5 | sys.path.append(os.path.join(BASE_DIR, '../utils'))
 6 | import tensorflow as tf
 7 | import numpy as np
 8 | import tf_util
 9 | from pointnet_util import pointnet_sa_module, pointnet_fp_module
10 | 
11 | def placeholder_inputs(batch_size, num_point):
12 |     pointclouds_pl = tf.placeholder(tf.float32, shape=(batch_size, num_point, 3))
13 |     labels_pl = tf.placeholder(tf.int32, shape=(batch_size, num_point))
14 |     smpws_pl = tf.placeholder(tf.float32, shape=(batch_size, num_point))
15 |     return pointclouds_pl, labels_pl, smpws_pl
16 | 
17 | 
18 | def get_model(point_cloud, is_training, num_class, bn_decay=None):
19 |     """ Semantic segmentation PointNet, input is BxNx3, output Bxnum_class """
20 |     batch_size = point_cloud.get_shape()[0].value
21 |     num_point = point_cloud.get_shape()[1].value
22 |     end_points = {}
23 |     l0_xyz = point_cloud
24 |     l0_points = None
25 |     end_points['l0_xyz'] = l0_xyz
26 | 
27 |     # Layer 1
28 |     l1_xyz, l1_points, l1_indices = pointnet_sa_module(l0_xyz, l0_points, npoint=1024, radius=0.1, nsample=32, mlp=[32,32,64], mlp2=None, group_all=False, is_training=is_training, bn_decay=bn_decay, scope='layer1')
29 |     l2_xyz, l2_points, l2_indices = pointnet_sa_module(l1_xyz, l1_points, npoint=256, radius=0.2, nsample=32, mlp=[64,64,128], mlp2=None, group_all=False, is_training=is_training, bn_decay=bn_decay, scope='layer2')
30 |     ##l3_xyz, l3_points, l3_indices = pointnet_sa_module(l2_xyz, l2_points, npoint=64, radius=0.4, nsample=32, mlp=[128,128,256], mlp2=None, group_all=False, is_training=is_training, bn_decay=bn_decay, scope='layer3')
31 |     ##l4_xyz, l4_points, l4_indices = pointnet_sa_module(l3_xyz, l3_points, npoint=16, radius=0.8, nsample=32, mlp=[256,256,512], mlp2=None, group_all=False, is_training=is_training, bn_decay=bn_decay, scope='layer4')
32 | 
33 |     # Feature Propagation layers
34 |     ##l3_points = pointnet_fp_module(l3_xyz, l4_xyz, l3_points, l4_points, [256,256], is_training, bn_decay, scope='fa_layer1')
35 |     ##l2_points = pointnet_fp_module(l2_xyz, l3_xyz, l2_points, l3_points, [256,256], is_training, bn_decay, scope='fa_layer2')
36 |     l1_points = pointnet_fp_module(l1_xyz, l2_xyz, l1_points, l2_points, [256,128], is_training, bn_decay, scope='fa_layer3')
37 |     l0_points = pointnet_fp_module(l0_xyz, l1_xyz, l0_points, l1_points, [128,128,128], is_training, bn_decay, scope='fa_layer4')
38 | 
39 |     # FC layers
40 |     net = tf_util.conv1d(l0_points, 128, 1, padding='VALID', bn=True, is_training=is_training, scope='fc1', bn_decay=bn_decay)
41 |     end_points['feats'] = net 
42 |     net = tf_util.dropout(net, keep_prob=0.5, is_training=is_training, scope='dp1')
43 |     net = tf_util.conv1d(net, 11, 1, padding='VALID', activation_fn=None, scope='fc2')
44 | 
45 |     return net, end_points
46 | 
47 | 
48 | def get_loss(pred, label, smpw):
49 |     """ pred: BxNxC,
50 |         label: BxN, 
51 | 	smpw: BxN """
52 |     classify_loss = tf.losses.sparse_softmax_cross_entropy(labels=label, logits=pred, weights=smpw)
53 |     tf.summary.scalar('classify loss', classify_loss)
54 |     tf.add_to_collection('losses', classify_loss)
55 |     return classify_loss
56 | 
57 | if __name__=='__main__':
58 |     with tf.Graph().as_default():
59 |         inputs = tf.zeros((32,2048,3))
60 |         net, _ = get_model(inputs, tf.constant(True), 10)
61 |         print(net)
62 | 


--------------------------------------------------------------------------------
/models/pointnet2_part_seg_msg_one_hot.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | BASE_DIR = os.path.dirname(__file__)
 4 | sys.path.append(BASE_DIR)
 5 | sys.path.append(os.path.join(BASE_DIR, '../utils'))
 6 | import tensorflow as tf
 7 | import numpy as np
 8 | import tf_util
 9 | from pointnet_util import pointnet_sa_module, pointnet_sa_module_msg, pointnet_fp_module
10 | 
11 | def placeholder_inputs(batch_size, num_point):
12 |     pointclouds_pl = tf.placeholder(tf.float32, shape=(batch_size, num_point, 6))
13 |     labels_pl = tf.placeholder(tf.int32, shape=(batch_size, num_point))
14 |     cls_labels_pl = tf.placeholder(tf.int32, shape=(batch_size))
15 |     return pointclouds_pl, labels_pl, cls_labels_pl
16 | 
17 | NUM_CATEGORIES = 16
18 | 
19 | def get_model(point_cloud, cls_label, is_training, bn_decay=None):
20 |     """ Classification PointNet, input is BxNx3, output Bx40 """
21 |     batch_size = point_cloud.get_shape()[0].value
22 |     num_point = point_cloud.get_shape()[1].value
23 |     end_points = {}
24 |     l0_xyz = tf.slice(point_cloud, [0,0,0], [-1,-1,3])
25 |     l0_points = tf.slice(point_cloud, [0,0,3], [-1,-1,3])
26 | 
27 |     # Set abstraction layers
28 |     l1_xyz, l1_points = pointnet_sa_module_msg(l0_xyz, l0_points, 512, [0.1,0.2,0.4], [32,64,128], [[32,32,64], [64,64,128], [64,96,128]], is_training, bn_decay, scope='layer1')
29 |     l2_xyz, l2_points = pointnet_sa_module_msg(l1_xyz, l1_points, 128, [0.4,0.8], [64,128], [[128,128,256],[128,196,256]], is_training, bn_decay, scope='layer2')
30 |     l3_xyz, l3_points, l3_indices = pointnet_sa_module(l2_xyz, l2_points, npoint=None, radius=None, nsample=None, mlp=[256,512,1024], mlp2=None, group_all=True, is_training=is_training, bn_decay=bn_decay, scope='layer3')
31 | 
32 |     # Feature propagation layers
33 |     l2_points = pointnet_fp_module(l2_xyz, l3_xyz, l2_points, l3_points, [256,256], is_training, bn_decay, scope='fa_layer1')
34 |     l1_points = pointnet_fp_module(l1_xyz, l2_xyz, l1_points, l2_points, [256,128], is_training, bn_decay, scope='fa_layer2')
35 | 
36 |     cls_label_one_hot = tf.one_hot(cls_label, depth=NUM_CATEGORIES, on_value=1.0, off_value=0.0)
37 |     cls_label_one_hot = tf.reshape(cls_label_one_hot, [batch_size, 1, NUM_CATEGORIES])
38 |     cls_label_one_hot = tf.tile(cls_label_one_hot, [1,num_point,1])
39 |     l0_points = pointnet_fp_module(l0_xyz, l1_xyz, tf.concat([cls_label_one_hot, l0_xyz, l0_points],axis=-1), l1_points, [128,128], is_training, bn_decay, scope='fp_layer3')
40 | 
41 |     # FC layers
42 |     net = tf_util.conv1d(l0_points, 128, 1, padding='VALID', bn=True, is_training=is_training, scope='fc1', bn_decay=bn_decay)
43 |     end_points['feats'] = net 
44 |     net = tf_util.dropout(net, keep_prob=0.5, is_training=is_training, scope='dp1')
45 |     net = tf_util.conv1d(net, 50, 1, padding='VALID', activation_fn=None, scope='fc2')
46 | 
47 |     return net, end_points
48 | 
49 | 
50 | def get_loss(pred, label):
51 |     """ pred: BxNxC,
52 |         label: BxN, """
53 |     loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=pred, labels=label)
54 |     classify_loss = tf.reduce_mean(loss)
55 |     tf.summary.scalar('classify loss', classify_loss)
56 |     tf.add_to_collection('losses', classify_loss)
57 |     return classify_loss
58 | 
59 | 
60 | 
61 | if __name__=='__main__':
62 |     with tf.Graph().as_default():
63 |         inputs = tf.zeros((32,2048,6))
64 |         cls_labels = tf.zeros((32),dtype=tf.int32)
65 |         output, ep = get_model(inputs, cls_labels, tf.constant(True))
66 |         print(output)
67 | 


--------------------------------------------------------------------------------
/models/pointnet_cls_basic.py:
--------------------------------------------------------------------------------
 1 | '''
 2 |     PointNet version 1 Model
 3 |     Reference: https://github.com/charlesq34/pointnet
 4 | '''
 5 | import tensorflow as tf
 6 | import numpy as np
 7 | import math
 8 | import sys
 9 | import os
10 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
11 | sys.path.append(BASE_DIR)
12 | sys.path.append(os.path.join(BASE_DIR, '../utils'))
13 | import tf_util
14 | 
15 | def placeholder_inputs(batch_size, num_point):
16 |     pointclouds_pl = tf.placeholder(tf.float32, shape=(batch_size, num_point, 3))
17 |     labels_pl = tf.placeholder(tf.int32, shape=(batch_size))
18 |     return pointclouds_pl, labels_pl
19 | 
20 | 
21 | def get_model(point_cloud, is_training, bn_decay=None):
22 |     """ Classification PointNet, input is BxNx3, output Bx40 """
23 |     batch_size = point_cloud.get_shape()[0].value
24 |     num_point = point_cloud.get_shape()[1].value
25 |     end_points = {}
26 |     input_image = tf.expand_dims(point_cloud, -1)
27 |     
28 |     # Point functions (MLP implemented as conv2d)
29 |     net = tf_util.conv2d(input_image, 64, [1,3],
30 |                          padding='VALID', stride=[1,1],
31 |                          bn=True, is_training=is_training,
32 |                          scope='conv1', bn_decay=bn_decay)
33 |     net = tf_util.conv2d(net, 64, [1,1],
34 |                          padding='VALID', stride=[1,1],
35 |                          bn=True, is_training=is_training,
36 |                          scope='conv2', bn_decay=bn_decay)
37 |     net = tf_util.conv2d(net, 64, [1,1],
38 |                          padding='VALID', stride=[1,1],
39 |                          bn=True, is_training=is_training,
40 |                          scope='conv3', bn_decay=bn_decay)
41 |     net = tf_util.conv2d(net, 128, [1,1],
42 |                          padding='VALID', stride=[1,1],
43 |                          bn=True, is_training=is_training,
44 |                          scope='conv4', bn_decay=bn_decay)
45 |     net = tf_util.conv2d(net, 1024, [1,1],
46 |                          padding='VALID', stride=[1,1],
47 |                          bn=True, is_training=is_training,
48 |                          scope='conv5', bn_decay=bn_decay)
49 | 
50 |     # Symmetric function: max pooling
51 |     net = tf_util.max_pool2d(net, [num_point,1],
52 |                              padding='VALID', scope='maxpool')
53 |     
54 |     # MLP on global point cloud vector
55 |     net = tf.reshape(net, [batch_size, -1])
56 |     net = tf_util.fully_connected(net, 512, bn=True, is_training=is_training,
57 |                                   scope='fc1', bn_decay=bn_decay)
58 |     net = tf_util.fully_connected(net, 256, bn=True, is_training=is_training,
59 |                                   scope='fc2', bn_decay=bn_decay)
60 |     net = tf_util.dropout(net, keep_prob=0.7, is_training=is_training,
61 |                           scope='dp1')
62 |     net = tf_util.fully_connected(net, 40, activation_fn=None, scope='fc3')
63 | 
64 |     return net, end_points
65 | 
66 | 
67 | def get_loss(pred, label, end_points):
68 |     """ pred: B*NUM_CLASSES,
69 |         label: B, """
70 |     loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=pred, labels=label)
71 |     classify_loss = tf.reduce_mean(loss)
72 |     tf.summary.scalar('classify loss', classify_loss)
73 |     tf.add_to_collection('losses', classify_loss)
74 |     return classify_loss
75 | 
76 | 
77 | if __name__=='__main__':
78 |     with tf.Graph().as_default():
79 |         inputs = tf.zeros((32,1024,3))
80 |         outputs = get_model(inputs, tf.constant(True))
81 |         print(outputs)
82 | 


--------------------------------------------------------------------------------
/models/pointnet2_cls_scene.py:
--------------------------------------------------------------------------------
 1 | """
 2 |     PointNet++ Model for point clouds classification
 3 | """
 4 | 
 5 | import os
 6 | import sys
 7 | BASE_DIR = os.path.dirname(__file__)
 8 | sys.path.append(BASE_DIR)
 9 | sys.path.append(os.path.join(BASE_DIR, '../utils'))
10 | import tensorflow as tf
11 | import numpy as np
12 | import tf_util
13 | from pointnet_util import pointnet_sa_module
14 | import matplotlib.pyplot as plt
15 | from mpl_toolkits.mplot3d import Axes3D
16 | from mpl_toolkits.mplot3d.art3d import Poly3DCollection, Line3DCollection
17 | from plot_cube import plot_cube
18 | 
19 | def placeholder_inputs(batch_size, num_point):
20 |     pointclouds_pl = tf.placeholder(tf.float32, shape=(batch_size, num_point, 3))
21 |     labels_pl = tf.placeholder(tf.int32, shape=(batch_size))
22 |     print(pointclouds_pl.shape)
23 |     return pointclouds_pl, labels_pl
24 | 
25 | def get_model(point_cloud, is_training, bn_decay=None):
26 |     """ Classification PointNet, input is BxNx3, output Bx40 """
27 |     batch_size = point_cloud.get_shape()[0].value
28 |     num_point = point_cloud.get_shape()[1].value
29 |     #end_points = {}
30 |     l0_xyz = point_cloud
31 |     l0_points = None
32 | 
33 | 
34 |     # Set abstraction layers
35 |     # Note: When using NCHW for layer 2, we see increased GPU memory usage (in TF1.4).
36 |     # So we only use NCHW for layer 1 until this issue can be resolved.   npoint=512,128 nsample = 32, 64
37 |     l1_xyz, l1_points, l1_indices = pointnet_sa_module(l0_xyz, l0_points, npoint=512, radius=0.2, nsample=32, mlp=[64,64,128], mlp2=None, group_all=False, is_training=is_training, bn_decay=bn_decay, scope='layer1', use_nchw=True)
38 |     l2_xyz, l2_points, l2_indices = pointnet_sa_module(l1_xyz, l1_points, npoint=128, radius=0.4, nsample=64, mlp=[128,128,256], mlp2=None, group_all=False, is_training=is_training, bn_decay=bn_decay, scope='layer2')#npoint:128
39 |     #l3_xyz, l3_points, l3_indices = pointnet_sa_module(l2_xyz, l2_points, npoint=128, radius=0.6, nsample=128, mlp=[128,128,256], mlp2=None, group_all=False, is_training=is_training, bn_decay=bn_decay, scope='layer3')#npoint:128
40 |     l3_xyz, l3_points, l3_indices = pointnet_sa_module(l2_xyz, l2_points, npoint=None, radius=None, nsample=None, mlp=[256,512,1024], mlp2=None, group_all=True, is_training=is_training, bn_decay=bn_decay, scope='layer3')
41 |     end_points = l1_xyz
42 | 
43 |     # Fully connected layers for classification
44 |     net = tf.reshape(l3_points, [batch_size, -1])
45 |     net = tf_util.fully_connected(net, 512, bn=True, is_training=is_training, scope='fc1', bn_decay=bn_decay)#512
46 |     net = tf_util.dropout(net, keep_prob=0.5, is_training=is_training, scope='dp1')
47 |     net = tf_util.fully_connected(net, 256, bn=True, is_training=is_training, scope='fc2', bn_decay=bn_decay)
48 |     net = tf_util.dropout(net, keep_prob=0.5, is_training=is_training, scope='dp2')#256
49 |     net = tf_util.fully_connected(net, 10, activation_fn=None, scope='fc3')#40
50 | 
51 |     return net, end_points
52 | 
53 | 
54 | def get_loss(pred, label, end_points):
55 |     """ pred: B*NUM_CLASSES,
56 |         label: B, """
57 |     loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=pred, labels=label)
58 |     classify_loss = tf.reduce_mean(loss)
59 |     tf.summary.scalar('classify loss', classify_loss)
60 |     tf.add_to_collection('losses', classify_loss)
61 |     return classify_loss
62 | 
63 | 
64 | if __name__=='__main__':
65 |     with tf.Graph().as_default():
66 |         inputs = tf.zeros((32,1024,3))
67 |         output, _ = get_model(inputs, tf.constant(True))
68 |         print(output)
69 | 


--------------------------------------------------------------------------------
/tf_ops/grouping/test/query_ball_point.cpp:
--------------------------------------------------------------------------------
  1 | #include <cstdio>
  2 | #include <ctime>
  3 | #include <cstring> // memset
  4 | #include <cstdlib> // rand, RAND_MAX
  5 | #include <cmath> // sqrtf
  6 | #include <string>
  7 | #include <vector>
  8 | using namespace std;
  9 | float randomf(){
 10 |     return (rand()+0.5)/(RAND_MAX+1.0);
 11 | }
 12 | static double get_time(){
 13 |     timespec tp;
 14 |     clock_gettime(CLOCK_MONOTONIC,&tp);
 15 |     return tp.tv_sec+tp.tv_nsec*1e-9;
 16 | }
 17 | // input: radius (1), nsample (1), xyz1 (b,n,3), xyz2 (b,m,3)
 18 | // output: idx (b,m,nsample)
 19 | void query_ball_point_cpu(int b, int n, int m, float radius, int nsample, const float *xyz1, const float *xyz2, int *idx) {
 20 |     for (int i=0;i<b;++i) {
 21 |         for (int j=0;j<m;++j) {
 22 |             int cnt = 0;
 23 |             for (int k=0;k<n;++k) {
 24 |                 if (cnt == nsample)
 25 |                     break; // only pick the FIRST nsample points in the ball
 26 | 	        float x2=xyz2[j*3+0];
 27 | 	        float y2=xyz2[j*3+1];
 28 | 	        float z2=xyz2[j*3+2];
 29 | 	        float x1=xyz1[k*3+0];
 30 | 	        float y1=xyz1[k*3+1];
 31 | 	        float z1=xyz1[k*3+2];
 32 | 		float d=max(sqrtf((x2-x1)*(x2-x1)+(y2-y1)*(y2-y1)+(z2-z1)*(z2-z1)),1e-20f);
 33 |                 if (d<radius) {
 34 |                     if (cnt==0) { // set ALL indices to k, s.t. if there are less points in ball than nsample, we still have valid (repeating) indices
 35 |                         for (int l=0;l<nsample;++l)
 36 |                             idx[j*nsample+l] = k;
 37 |                     }
 38 |                     idx[j*nsample+cnt] = k;
 39 |                     cnt+=1;
 40 |                 }
 41 |             }
 42 |         }
 43 |         xyz1+=n*3;
 44 |         xyz2+=m*3;
 45 |         idx+=m*nsample;
 46 |     }
 47 | }
 48 | 
 49 | 
 50 | // input: points (b,n,c), idx (b,m,nsample)
 51 | // output: out (b,m,nsample,c)
 52 | void group_point_cpu(int b, int n, int c, int m, int nsample, const float *points, const int *idx, float *out) {
 53 |     for (int i=0;i<b;++i) {
 54 |         for (int j=0;j<m;++j) {
 55 |             for (int k=0;k<nsample;++k) {
 56 |                 int ii = idx[j*nsample+k];
 57 |                 for (int l=0;l<c;++l) {
 58 |                     out[j*nsample*c+k*c+l] = points[ii*c+l];
 59 |                 }
 60 |             }
 61 |         }
 62 |         points+=n*c;
 63 |         idx+=m*nsample;
 64 |         out+=m*nsample*c;
 65 |     }
 66 | }
 67 | 
 68 | // input: grad_out (b,m,nsample,c), idx (b,m,nsample), 
 69 | // output: grad_points (b,n,c)
 70 | void group_point_grad_cpu(int b, int n, int c, int m, int nsample, const float *grad_out, const int *idx, float *grad_points) {
 71 |     for (int i=0;i<b;++i) {
 72 |         for (int j=0;j<m;++j) {
 73 |             for (int k=0;k<nsample;++k) {
 74 |                 int ii = idx[j*nsample+k];
 75 |                 for (int l=0;l<c;++l) {
 76 |                      grad_points[ii*c+l] += grad_out[j*nsample*c+k*c+l];
 77 |                 }
 78 |             }
 79 |         }
 80 |         idx+=m*nsample;
 81 |         grad_out+=m*nsample*c;
 82 |         grad_points+=n*c;
 83 |     }
 84 | }
 85 | 
 86 | int main()
 87 | {
 88 |     int b=32,n=512,m=128,nsample=64,c=64;
 89 |     float radius=0.1;
 90 |     float *xyz1=new float[b*n*3];
 91 |     float *xyz2=new float[b*m*3];
 92 |     float *points=new float[b*n*c];
 93 |     int *idx=new int[b*m*nsample];
 94 |     memset(idx, 0, sizeof(int)*b*m*nsample);
 95 |     float *out=new float[b*m*nsample*c];
 96 |     float *grad_out=new float[b*m*nsample*c]; // grad to out
 97 |     memset(grad_out, 0.0, sizeof(float)*b*m*nsample*c);
 98 |     float *grad_points=new float[b*n*c]; // grad to points
 99 |     for (int i=0;i<b*n*3;i++)
100 |         xyz1[i]=randomf();
101 |     for (int i=0;i<b*m*3;i++)
102 |         xyz2[i]=randomf();
103 |     for (int i=0;i<b*n*c;i++)
104 |         points[i]=randomf();
105 | 
106 |     double t0=get_time();
107 |     query_ball_point_cpu(b,n,m,radius,nsample,xyz1,xyz2,idx);
108 |     printf("query_ball_point cpu time %f\n",get_time()-t0);
109 | 
110 |     t0=get_time();
111 |     group_point_cpu(b,n,c,m,nsample,points,idx,out);
112 |     printf("grou_point cpu time %f\n",get_time()-t0);
113 | 
114 |     t0=get_time();
115 |     group_point_grad_cpu(b,n,c,m,nsample,grad_out,idx,grad_points);
116 |     printf("grou_point_grad cpu time %f\n",get_time()-t0);
117 | 
118 |     return 0;
119 | }
120 | 


--------------------------------------------------------------------------------
/tf_ops/grouping/tf_grouping.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | from tensorflow.python.framework import ops
  3 | import sys
  4 | import os
  5 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
  6 | sys.path.append(BASE_DIR)
  7 | grouping_module=tf.load_op_library(os.path.join(BASE_DIR, 'tf_grouping_so.so'))
  8 | def query_ball_point(radius, nsample, xyz1, xyz2):
  9 |     '''
 10 |     Input:
 11 |         radius: float32, ball search radius
 12 |         nsample: int32, number of points selected in each ball region
 13 |         xyz1: (batch_size, ndataset, 3) float32 array, input points
 14 |         xyz2: (batch_size, npoint, 3) float32 array, query points
 15 |     Output:
 16 |         idx: (batch_size, npoint, nsample) int32 array, indices to input points
 17 |         pts_cnt: (batch_size, npoint) int32 array, number of unique points in each local region
 18 |     '''
 19 |     #return grouping_module.query_ball_point(radius, nsample, xyz1, xyz2)
 20 |     return grouping_module.query_ball_point(xyz1, xyz2, radius, nsample)
 21 | ops.NoGradient('QueryBallPoint')
 22 | def select_top_k(k, dist):
 23 |     '''
 24 |     Input:
 25 |         k: int32, number of k SMALLEST elements selected
 26 |         dist: (b,m,n) float32 array, distance matrix, m query points, n dataset points
 27 |     Output:
 28 |         idx: (b,m,n) int32 array, first k in n are indices to the top k
 29 |         dist_out: (b,m,n) float32 array, first k in n are the top k
 30 |     '''
 31 |     return grouping_module.selection_sort(dist, k)
 32 | ops.NoGradient('SelectionSort')
 33 | def group_point(points, idx):
 34 |     '''
 35 |     Input:
 36 |         points: (batch_size, ndataset, channel) float32 array, points to sample from
 37 |         idx: (batch_size, npoint, nsample) int32 array, indices to points
 38 |     Output:
 39 |         out: (batch_size, npoint, nsample, channel) float32 array, values sampled from points
 40 |     '''
 41 |     return grouping_module.group_point(points, idx)
 42 | @tf.RegisterGradient('GroupPoint')
 43 | def _group_point_grad(op, grad_out):
 44 |     points = op.inputs[0]
 45 |     idx = op.inputs[1]
 46 |     return [grouping_module.group_point_grad(points, idx, grad_out), None]
 47 | 
 48 | def knn_point(k, xyz1, xyz2):
 49 |     '''
 50 |     Input:
 51 |         k: int32, number of k in k-nn search
 52 |         xyz1: (batch_size, ndataset, c) float32 array, input points
 53 |         xyz2: (batch_size, npoint, c) float32 array, query points
 54 |     Output:
 55 |         val: (batch_size, npoint, k) float32 array, L2 distances
 56 |         idx: (batch_size, npoint, k) int32 array, indices to input points
 57 |     '''
 58 |     b = xyz1.get_shape()[0].value
 59 |     n = xyz1.get_shape()[1].value
 60 |     c = xyz1.get_shape()[2].value
 61 |     m = xyz2.get_shape()[1].value
 62 |     print (b, n, c, m)
 63 |     print (xyz1, (b,1,n,c))
 64 |     xyz1 = tf.tile(tf.reshape(xyz1, (b,1,n,c)), [1,m,1,1])
 65 |     xyz2 = tf.tile(tf.reshape(xyz2, (b,m,1,c)), [1,1,n,1])
 66 |     dist = tf.reduce_sum((xyz1-xyz2)**2, -1)
 67 |     print (dist, k)
 68 |     outi, out = select_top_k(k, dist)
 69 |     idx = tf.slice(outi, [0,0,0], [-1,-1,k])
 70 |     val = tf.slice(out, [0,0,0], [-1,-1,k])
 71 |     print (idx, val)
 72 |     #val, idx = tf.nn.top_k(-dist, k=k) # ONLY SUPPORT CPU
 73 |     return val, idx
 74 | 
 75 | if __name__=='__main__':
 76 |     knn=True
 77 |     import numpy as np
 78 |     import time
 79 |     np.random.seed(100)
 80 |     pts = np.random.random((32,512,64)).astype('float32')
 81 |     tmp1 = np.random.random((32,512,3)).astype('float32')
 82 |     tmp2 = np.random.random((32,128,3)).astype('float32')
 83 |     with tf.device('/gpu:1'):
 84 |         points = tf.constant(pts)
 85 |         xyz1 = tf.constant(tmp1)
 86 |         xyz2 = tf.constant(tmp2)
 87 |         radius = 0.1 
 88 |         nsample = 64
 89 |         if knn:
 90 |             _, idx = knn_point(nsample, xyz1, xyz2)
 91 |             grouped_points = group_point(points, idx)
 92 |         else:
 93 |             idx, _ = query_ball_point(radius, nsample, xyz1, xyz2)
 94 |             grouped_points = group_point(points, idx)
 95 |             #grouped_points_grad = tf.ones_like(grouped_points)
 96 |             #points_grad = tf.gradients(grouped_points, points, grouped_points_grad)
 97 |     with tf.Session('') as sess:
 98 |         now = time.time() 
 99 |         for _ in range(100):
100 |             ret = sess.run(grouped_points)
101 |         print (time.time() - now)
102 |         print (ret.shape, ret.dtype)
103 |         print (ret)
104 |     
105 |     
106 | 


--------------------------------------------------------------------------------
/tf_ops/grouping/tf_grouping.py~:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | from tensorflow.python.framework import ops
  3 | import sys
  4 | import os
  5 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
  6 | sys.path.append(BASE_DIR)
  7 | grouping_module=tf.load_op_library(os.path.join(BASE_DIR, 'tf_grouping_so.so'))
  8 | def query_ball_point(radius, nsample, xyz1, xyz2):
  9 |     '''
 10 |     Input:
 11 |         radius: float32, ball search radius
 12 |         nsample: int32, number of points selected in each ball region
 13 |         xyz1: (batch_size, ndataset, 3) float32 array, input points
 14 |         xyz2: (batch_size, npoint, 3) float32 array, query points
 15 |     Output:
 16 |         idx: (batch_size, npoint, nsample) int32 array, indices to input points
 17 |         pts_cnt: (batch_size, npoint) int32 array, number of unique points in each local region
 18 |     '''
 19 |     #return grouping_module.query_ball_point(radius, nsample, xyz1, xyz2)
 20 |     return grouping_module.query_ball_point(xyz1, xyz2, radius, nsample)
 21 | ops.NoGradient('QueryBallPoint')
 22 | def select_top_k(k, dist):
 23 |     '''
 24 |     Input:
 25 |         k: int32, number of k SMALLEST elements selected
 26 |         dist: (b,m,n) float32 array, distance matrix, m query points, n dataset points
 27 |     Output:
 28 |         idx: (b,m,n) int32 array, first k in n are indices to the top k
 29 |         dist_out: (b,m,n) float32 array, first k in n are the top k
 30 |     '''
 31 |     return grouping_module.selection_sort(dist, k)
 32 | ops.NoGradient('SelectionSort')
 33 | def group_point(points, idx):
 34 |     '''
 35 |     Input:
 36 |         points: (batch_size, ndataset, channel) float32 array, points to sample from
 37 |         idx: (batch_size, npoint, nsample) int32 array, indices to points
 38 |     Output:
 39 |         out: (batch_size, npoint, nsample, channel) float32 array, values sampled from points
 40 |     '''
 41 |     return grouping_module.group_point(points, idx)
 42 | @tf.RegisterGradient('GroupPoint')
 43 | def _group_point_grad(op, grad_out):
 44 |     points = op.inputs[0]
 45 |     idx = op.inputs[1]
 46 |     return [grouping_module.group_point_grad(points, idx, grad_out), None]
 47 | 
 48 | def knn_point(k, xyz1, xyz2):
 49 |     '''
 50 |     Input:
 51 |         k: int32, number of k in k-nn search
 52 |         xyz1: (batch_size, ndataset, c) float32 array, input points
 53 |         xyz2: (batch_size, npoint, c) float32 array, query points
 54 |     Output:
 55 |         val: (batch_size, npoint, k) float32 array, L2 distances
 56 |         idx: (batch_size, npoint, k) int32 array, indices to input points
 57 |     '''
 58 |     b = xyz1.get_shape()[0].value
 59 |     n = xyz1.get_shape()[1].value
 60 |     c = xyz1.get_shape()[2].value
 61 |     m = xyz2.get_shape()[1].value
 62 |     print (b, n, c, m)
 63 |     print (xyz1, (b,1,n,c))
 64 |     xyz1 = tf.tile(tf.reshape(xyz1, (b,1,n,c)), [1,m,1,1])
 65 |     xyz2 = tf.tile(tf.reshape(xyz2, (b,m,1,c)), [1,1,n,1])
 66 |     dist = tf.reduce_sum((xyz1-xyz2)**2, -1)
 67 |     print (dist, k)
 68 |     outi, out = select_top_k(k, dist)
 69 |     idx = tf.slice(outi, [0,0,0], [-1,-1,k])
 70 |     val = tf.slice(out, [0,0,0], [-1,-1,k])
 71 |     print (idx, val)
 72 |     #val, idx = tf.nn.top_k(-dist, k=k) # ONLY SUPPORT CPU
 73 |     return val, idx
 74 | 
 75 | if __name__=='__main__':
 76 |     knn=True
 77 |     import numpy as np
 78 |     import time
 79 |     np.random.seed(100)
 80 |     pts = np.random.random((32,512,64)).astype('float32')
 81 |     tmp1 = np.random.random((32,512,3)).astype('float32')
 82 |     tmp2 = np.random.random((32,128,3)).astype('float32')
 83 |     with tf.device('/gpu:1'):
 84 |         points = tf.constant(pts)
 85 |         xyz1 = tf.constant(tmp1)
 86 |         xyz2 = tf.constant(tmp2)
 87 |         radius = 0.1 
 88 |         nsample = 64
 89 |         if knn:
 90 |             _, idx = knn_point(nsample, xyz1, xyz2)
 91 |             grouped_points = group_point(points, idx)
 92 |         else:
 93 |             idx, _ = query_ball_point(radius, nsample, xyz1, xyz2)
 94 |             grouped_points = group_point(points, idx)
 95 |             #grouped_points_grad = tf.ones_like(grouped_points)
 96 |             #points_grad = tf.gradients(grouped_points, points, grouped_points_grad)
 97 |     with tf.Session('') as sess:
 98 |         now = time.time() 
 99 |         for _ in range(100):
100 |             ret = sess.run(grouped_points)
101 |         print (time.time() - now)
102 |         print (ret.shape, ret.dtype)
103 |         print (ret)
104 |     
105 |     
106 | 


--------------------------------------------------------------------------------
/tf_ops/grouping/test/query_ball_point_block.cu:
--------------------------------------------------------------------------------
  1 | #include <cstdio>
  2 | #include <ctime>
  3 | #include <cstring> // memset
  4 | #include <cstdlib> // rand, RAND_MAX
  5 | #include <cmath> // sqrtf
  6 | #include <string>
  7 | #include <vector>
  8 | using namespace std;
  9 | float randomf(){
 10 |     return (rand()+0.5)/(RAND_MAX+1.0);
 11 | }
 12 | static double get_time(){
 13 |     timespec tp;
 14 |     clock_gettime(CLOCK_MONOTONIC,&tp);
 15 |     return tp.tv_sec+tp.tv_nsec*1e-9;
 16 | }
 17 | // input: radius (1), nsample (1), xyz1 (b,n,3), xyz2 (b,m,3)
 18 | // output: idx (b,m,nsample)
 19 | __global__ void query_ball_point_gpu(int b, int n, int m, float radius, int nsample, const float *xyz1, const float *xyz2, int *idx) {
 20 |     int index = threadIdx.x;
 21 |     xyz1 += n*3*index;
 22 |     xyz2 += m*3*index;
 23 |     idx += m*nsample*index;
 24 | 
 25 |     for (int j=0;j<m;++j) {
 26 |         int cnt = 0;
 27 |         for (int k=0;k<n;++k) {
 28 |             if (cnt == nsample)
 29 |                 break; // only pick the FIRST nsample points in the ball
 30 |             float x2=xyz2[j*3+0];
 31 |             float y2=xyz2[j*3+1];
 32 |             float z2=xyz2[j*3+2];
 33 |             float x1=xyz1[k*3+0];
 34 |             float y1=xyz1[k*3+1];
 35 |             float z1=xyz1[k*3+2];
 36 |     	float d=max(sqrtf((x2-x1)*(x2-x1)+(y2-y1)*(y2-y1)+(z2-z1)*(z2-z1)),1e-20f);
 37 |             if (d<radius) {
 38 |                 if (cnt==0) { // set ALL indices to k, s.t. if there are less points in ball than nsample, we still have valid (repeating) indices
 39 |                     for (int l=0;l<nsample;++l)
 40 |                         idx[j*nsample+l] = k;
 41 |                 }
 42 |                 idx[j*nsample+cnt] = k;
 43 |                 cnt+=1;
 44 |             }
 45 |         }
 46 |     }
 47 | }
 48 | 
 49 | 
 50 | // input: points (b,n,c), idx (b,m,nsample)
 51 | // output: out (b,m,nsample,c)
 52 | __global__ void group_point_gpu(int b, int n, int c, int m, int nsample, const float *points, const int *idx, float *out) {
 53 |     int index = threadIdx.x;
 54 |     points += n*c*index;
 55 |     idx += m*nsample*index;
 56 |     out += m*nsample*c*index;
 57 | 
 58 |     for (int j=0;j<m;++j) {
 59 |         for (int k=0;k<nsample;++k) {
 60 |             int ii = idx[j*nsample+k];
 61 |             for (int l=0;l<c;++l) {
 62 |                 out[j*nsample*c+k*c+l] = points[ii*c+l];
 63 |             }
 64 |         }
 65 |     }
 66 | }
 67 | 
 68 | // input: grad_out (b,m,nsample,c), idx (b,m,nsample), 
 69 | // output: grad_points (b,n,c)
 70 | __global__ void group_point_grad_gpu(int b, int n, int c, int m, int nsample, const float *grad_out, const int *idx, float *grad_points) {
 71 |     int index = threadIdx.x;
 72 |     idx += m*nsample*index;
 73 |     grad_out += m*nsample*c*index;
 74 |     grad_points += n*c*index;
 75 | 
 76 |     for (int j=0;j<m;++j) {
 77 |         for (int k=0;k<nsample;++k) {
 78 |             int ii = idx[j*nsample+k];
 79 |             for (int l=0;l<c;++l) {
 80 |                  grad_points[ii*c+l] += grad_out[j*nsample*c+k*c+l];
 81 |             }
 82 |         }
 83 |     }
 84 | }
 85 | 
 86 | int main()
 87 | {
 88 |     int b=32,n=512,m=128,nsample=64,c=64;
 89 |     float radius=0.1;
 90 |     float *xyz1, *xyz2, *points;
 91 |     cudaMallocManaged(&xyz1, b*n*3*sizeof(float));
 92 |     cudaMallocManaged(&xyz2, b*m*3*sizeof(float));
 93 |     cudaMallocManaged(&points, b*n*c*sizeof(float));
 94 |     int *idx;
 95 |     cudaMallocManaged(&idx, b*m*nsample*sizeof(int));
 96 |     memset(idx, 0, sizeof(int)*b*m*nsample);
 97 |     float *out, *grad_out;
 98 |     cudaMallocManaged(&out, b*m*nsample*c*sizeof(float));
 99 |     cudaMallocManaged(&grad_out, b*m*nsample*c*sizeof(float));
100 |     memset(grad_out, 0.0, sizeof(float)*b*m*nsample*c);
101 |     float *grad_points;
102 |     cudaMallocManaged(&grad_points, b*n*c*sizeof(float));
103 | 
104 |     for (int i=0;i<b*n*3;i++)
105 |         xyz1[i]=randomf();
106 |     for (int i=0;i<b*m*3;i++)
107 |         xyz2[i]=randomf();
108 |     for (int i=0;i<b*n*c;i++)
109 |         points[i]=randomf();
110 | 
111 |     double t0=get_time();
112 |     query_ball_point_gpu<<<1,b>>>(b,n,m,radius,nsample,xyz1,xyz2,idx);
113 |     cudaDeviceSynchronize();
114 |     printf("query_ball_point gpu time %f\n",get_time()-t0);
115 | 
116 |     t0=get_time();
117 |     group_point_gpu<<<1,b>>>(b,n,c,m,nsample,points,idx,out);
118 |     cudaDeviceSynchronize();
119 |     printf("grou_point gpu time %f\n",get_time()-t0);
120 | 
121 |     t0=get_time();
122 |     group_point_grad_gpu<<<1,b>>>(b,n,c,m,nsample,grad_out,idx,grad_points);
123 |     cudaDeviceSynchronize();
124 |     printf("grou_point_grad gpu time %f\n",get_time()-t0);
125 | 
126 |     cudaFree(xyz1);
127 |     cudaFree(xyz2);
128 |     cudaFree(points);
129 |     cudaFree(idx);
130 |     cudaFree(out);
131 |     cudaFree(grad_out);
132 |     cudaFree(grad_points);
133 |     return 0;
134 | }
135 | 


--------------------------------------------------------------------------------
/tf_ops/grouping/test/query_ball_point.cu:
--------------------------------------------------------------------------------
  1 | #include <cstdio>
  2 | #include <ctime>
  3 | #include <cstring> // memset
  4 | #include <cstdlib> // rand, RAND_MAX
  5 | #include <cmath> // sqrtf
  6 | #include <string>
  7 | #include <vector>
  8 | using namespace std;
  9 | float randomf(){
 10 |     return (rand()+0.5)/(RAND_MAX+1.0);
 11 | }
 12 | static double get_time(){
 13 |     timespec tp;
 14 |     clock_gettime(CLOCK_MONOTONIC,&tp);
 15 |     return tp.tv_sec+tp.tv_nsec*1e-9;
 16 | }
 17 | // input: radius (1), nsample (1), xyz1 (b,n,3), xyz2 (b,m,3)
 18 | // output: idx (b,m,nsample)
 19 | __global__ void query_ball_point_gpu(int b, int n, int m, float radius, int nsample, const float *xyz1, const float *xyz2, int *idx) {
 20 |     for (int i=0;i<b;++i) {
 21 |         for (int j=0;j<m;++j) {
 22 |             int cnt = 0;
 23 |             for (int k=0;k<n;++k) {
 24 |                 if (cnt == nsample)
 25 |                     break; // only pick the FIRST nsample points in the ball
 26 | 	        float x2=xyz2[j*3+0];
 27 | 	        float y2=xyz2[j*3+1];
 28 | 	        float z2=xyz2[j*3+2];
 29 | 	        float x1=xyz1[k*3+0];
 30 | 	        float y1=xyz1[k*3+1];
 31 | 	        float z1=xyz1[k*3+2];
 32 | 		float d=max(sqrtf((x2-x1)*(x2-x1)+(y2-y1)*(y2-y1)+(z2-z1)*(z2-z1)),1e-20f);
 33 |                 if (d<radius) {
 34 |                     if (cnt==0) { // set ALL indices to k, s.t. if there are less points in ball than nsample, we still have valid (repeating) indices
 35 |                         for (int l=0;l<nsample;++l)
 36 |                             idx[j*nsample+l] = k;
 37 |                     }
 38 |                     idx[j*nsample+cnt] = k;
 39 |                     cnt+=1;
 40 |                 }
 41 |             }
 42 |         }
 43 |         xyz1+=n*3;
 44 |         xyz2+=m*3;
 45 |         idx+=m*nsample;
 46 |     }
 47 | }
 48 | 
 49 | 
 50 | // input: points (b,n,c), idx (b,m,nsample)
 51 | // output: out (b,m,nsample,c)
 52 | __global__ void group_point_gpu(int b, int n, int c, int m, int nsample, const float *points, const int *idx, float *out) {
 53 |     for (int i=0;i<b;++i) {
 54 |         for (int j=0;j<m;++j) {
 55 |             for (int k=0;k<nsample;++k) {
 56 |                 int ii = idx[j*nsample+k];
 57 |                 for (int l=0;l<c;++l) {
 58 |                     out[j*nsample*c+k*c+l] = points[ii*c+l];
 59 |                 }
 60 |             }
 61 |         }
 62 |         points+=n*c;
 63 |         idx+=m*nsample;
 64 |         out+=m*nsample*c;
 65 |     }
 66 | }
 67 | 
 68 | // input: grad_out (b,m,nsample,c), idx (b,m,nsample), 
 69 | // output: grad_points (b,n,c)
 70 | __global__ void group_point_grad_gpu(int b, int n, int c, int m, int nsample, const float *grad_out, const int *idx, float *grad_points) {
 71 |     for (int i=0;i<b;++i) {
 72 |         for (int j=0;j<m;++j) {
 73 |             for (int k=0;k<nsample;++k) {
 74 |                 int ii = idx[j*nsample+k];
 75 |                 for (int l=0;l<c;++l) {
 76 |                      grad_points[ii*c+l] += grad_out[j*nsample*c+k*c+l];
 77 |                 }
 78 |             }
 79 |         }
 80 |         idx+=m*nsample;
 81 |         grad_out+=m*nsample*c;
 82 |         grad_points+=n*c;
 83 |     }
 84 | }
 85 | 
 86 | int main()
 87 | {
 88 |     int b=32,n=512,m=128,nsample=64,c=64;
 89 |     float radius=0.1;
 90 |     float *xyz1, *xyz2, *points;
 91 |     cudaMallocManaged(&xyz1, b*n*3*sizeof(float));
 92 |     cudaMallocManaged(&xyz2, b*m*3*sizeof(float));
 93 |     cudaMallocManaged(&points, b*n*c*sizeof(float));
 94 |     int *idx;
 95 |     cudaMallocManaged(&idx, b*m*nsample*sizeof(int));
 96 |     memset(idx, 0, sizeof(int)*b*m*nsample);
 97 |     float *out, *grad_out;
 98 |     cudaMallocManaged(&out, b*m*nsample*c*sizeof(float));
 99 |     cudaMallocManaged(&grad_out, b*m*nsample*c*sizeof(float));
100 |     memset(grad_out, 0.0, sizeof(float)*b*m*nsample*c);
101 |     float *grad_points;
102 |     cudaMallocManaged(&grad_points, b*n*c*sizeof(float));
103 | 
104 |     for (int i=0;i<b*n*3;i++)
105 |         xyz1[i]=randomf();
106 |     for (int i=0;i<b*m*3;i++)
107 |         xyz2[i]=randomf();
108 |     for (int i=0;i<b*n*c;i++)
109 |         points[i]=randomf();
110 | 
111 |     double t0=get_time();
112 |     query_ball_point_gpu<<<1,1>>>(b,n,m,radius,nsample,xyz1,xyz2,idx);
113 |     cudaDeviceSynchronize();
114 |     printf("query_ball_point gpu time %f\n",get_time()-t0);
115 | 
116 |     t0=get_time();
117 |     group_point_gpu<<<1,1>>>(b,n,c,m,nsample,points,idx,out);
118 |     cudaDeviceSynchronize();
119 |     printf("grou_point gpu time %f\n",get_time()-t0);
120 | 
121 |     t0=get_time();
122 |     group_point_grad_gpu<<<1,1>>>(b,n,c,m,nsample,grad_out,idx,grad_points);
123 |     cudaDeviceSynchronize();
124 |     printf("grou_point_grad gpu time %f\n",get_time()-t0);
125 | 
126 |     cudaFree(xyz1);
127 |     cudaFree(xyz2);
128 |     cudaFree(points);
129 |     cudaFree(idx);
130 |     cudaFree(out);
131 |     cudaFree(grad_out);
132 |     cudaFree(grad_points);
133 |     return 0;
134 | }
135 | 


--------------------------------------------------------------------------------
/tf_ops/grouping/test/query_ball_point_grid.cu:
--------------------------------------------------------------------------------
  1 | #include <cstdio>
  2 | #include <ctime>
  3 | #include <cstring> // memset
  4 | #include <cstdlib> // rand, RAND_MAX
  5 | #include <cmath> // sqrtf
  6 | #include <string>
  7 | #include <vector>
  8 | using namespace std;
  9 | float randomf(){
 10 |     return (rand()+0.5)/(RAND_MAX+1.0);
 11 | }
 12 | static double get_time(){
 13 |     timespec tp;
 14 |     clock_gettime(CLOCK_MONOTONIC,&tp);
 15 |     return tp.tv_sec+tp.tv_nsec*1e-9;
 16 | }
 17 | // input: radius (1), nsample (1), xyz1 (b,n,3), xyz2 (b,m,3)
 18 | // output: idx (b,m,nsample)
 19 | __global__ void query_ball_point_gpu(int b, int n, int m, float radius, int nsample, const float *xyz1, const float *xyz2, int *idx) {
 20 |     int batch_index = blockIdx.x;
 21 |     xyz1 += n*3*batch_index;
 22 |     xyz2 += m*3*batch_index;
 23 |     idx += m*nsample*batch_index;
 24 | 
 25 |     int index = threadIdx.x;
 26 |     int stride = blockDim.x;
 27 |     
 28 |     for (int j=index;j<m;j+=stride) {
 29 |         int cnt = 0;
 30 |         for (int k=0;k<n;++k) {
 31 |             if (cnt == nsample)
 32 |                 break; // only pick the FIRST nsample points in the ball
 33 |             float x2=xyz2[j*3+0];
 34 |             float y2=xyz2[j*3+1];
 35 |             float z2=xyz2[j*3+2];
 36 |             float x1=xyz1[k*3+0];
 37 |             float y1=xyz1[k*3+1];
 38 |             float z1=xyz1[k*3+2];
 39 |     	float d=max(sqrtf((x2-x1)*(x2-x1)+(y2-y1)*(y2-y1)+(z2-z1)*(z2-z1)),1e-20f);
 40 |             if (d<radius) {
 41 |                 if (cnt==0) { // set ALL indices to k, s.t. if there are less points in ball than nsample, we still have valid (repeating) indices
 42 |                     for (int l=0;l<nsample;++l)
 43 |                         idx[j*nsample+l] = k;
 44 |                 }
 45 |                 idx[j*nsample+cnt] = k;
 46 |                 cnt+=1;
 47 |             }
 48 |         }
 49 |     }
 50 | }
 51 | 
 52 | 
 53 | // input: points (b,n,c), idx (b,m,nsample)
 54 | // output: out (b,m,nsample,c)
 55 | __global__ void group_point_gpu(int b, int n, int c, int m, int nsample, const float *points, const int *idx, float *out) {
 56 |     int batch_index = blockIdx.x;
 57 |     points += n*c*batch_index;
 58 |     idx += m*nsample*batch_index;
 59 |     out += m*nsample*c*batch_index;
 60 | 
 61 |     int index = threadIdx.x;
 62 |     int stride = blockDim.x;
 63 |     
 64 |     for (int j=index;j<m;j+=stride) {
 65 |         for (int k=0;k<nsample;++k) {
 66 |             int ii = idx[j*nsample+k];
 67 |             for (int l=0;l<c;++l) {
 68 |                 out[j*nsample*c+k*c+l] = points[ii*c+l];
 69 |             }
 70 |         }
 71 |     }
 72 | }
 73 | 
 74 | // input: grad_out (b,m,nsample,c), idx (b,m,nsample), 
 75 | // output: grad_points (b,n,c)
 76 | __global__ void group_point_grad_gpu(int b, int n, int c, int m, int nsample, const float *grad_out, const int *idx, float *grad_points) {
 77 |     int batch_index = blockIdx.x;
 78 |     idx += m*nsample*batch_index;
 79 |     grad_out += m*nsample*c*batch_index;
 80 |     grad_points += n*c*batch_index;
 81 | 
 82 |     int index = threadIdx.x;
 83 |     int stride = blockDim.x;
 84 | 
 85 |     for (int j=index;j<m;j+=stride) {
 86 |         for (int k=0;k<nsample;++k) {
 87 |             int ii = idx[j*nsample+k];
 88 |             for (int l=0;l<c;++l) {
 89 |                  // Use atomic add to avoid race condition
 90 |                  atomicAdd(&grad_points[ii*c+l], grad_out[j*nsample*c+k*c+l]);
 91 |             }
 92 |         }
 93 |     }
 94 | }
 95 | 
 96 | int main()
 97 | {
 98 |     int b=32,n=512,m=128,nsample=64,c=64;
 99 |     float radius=0.1;
100 |     float *xyz1, *xyz2, *points;
101 |     cudaMallocManaged(&xyz1, b*n*3*sizeof(float));
102 |     cudaMallocManaged(&xyz2, b*m*3*sizeof(float));
103 |     cudaMallocManaged(&points, b*n*c*sizeof(float));
104 |     int *idx;
105 |     cudaMallocManaged(&idx, b*m*nsample*sizeof(int));
106 |     memset(idx, 0, sizeof(int)*b*m*nsample);
107 |     float *out, *grad_out;
108 |     cudaMallocManaged(&out, b*m*nsample*c*sizeof(float));
109 |     cudaMallocManaged(&grad_out, b*m*nsample*c*sizeof(float));
110 |     memset(grad_out, 0.0, sizeof(float)*b*m*nsample*c);
111 |     float *grad_points;
112 |     cudaMallocManaged(&grad_points, b*n*c*sizeof(float));
113 | 
114 |     for (int i=0;i<b*n*3;i++)
115 |         xyz1[i]=randomf();
116 |     for (int i=0;i<b*m*3;i++)
117 |         xyz2[i]=randomf();
118 |     for (int i=0;i<b*n*c;i++)
119 |         points[i]=randomf();
120 | 
121 |     double t0=get_time();
122 |     query_ball_point_gpu<<<b,256>>>(b,n,m,radius,nsample,xyz1,xyz2,idx);
123 |     cudaDeviceSynchronize();
124 |     printf("query_ball_point gpu time %f\n",get_time()-t0);
125 | 
126 |     t0=get_time();
127 |     group_point_gpu<<<b,256>>>(b,n,c,m,nsample,points,idx,out);
128 |     cudaDeviceSynchronize();
129 |     printf("grou_point gpu time %f\n",get_time()-t0);
130 | 
131 |     t0=get_time();
132 |     group_point_grad_gpu<<<b,256>>>(b,n,c,m,nsample,grad_out,idx,grad_points);
133 |     cudaDeviceSynchronize();
134 |     printf("grou_point_grad gpu time %f\n",get_time()-t0);
135 | 
136 |     cudaFree(xyz1);
137 |     cudaFree(xyz2);
138 |     cudaFree(points);
139 |     cudaFree(idx);
140 |     cudaFree(out);
141 |     cudaFree(grad_out);
142 |     cudaFree(grad_points);
143 |     return 0;
144 | }
145 | 


--------------------------------------------------------------------------------
/tf_ops/grouping/tf_grouping_g.cu:
--------------------------------------------------------------------------------
  1 | // input: radius (1), nsample (1), xyz1 (b,n,3), xyz2 (b,m,3)
  2 | // output: idx (b,m,nsample), pts_cnt (b,m)
  3 | __global__ void query_ball_point_gpu(int b, int n, int m, float radius, int nsample, const float *xyz1, const float *xyz2, int *idx, int *pts_cnt) {
  4 |     int batch_index = blockIdx.x;
  5 |     xyz1 += n*3*batch_index;
  6 |     xyz2 += m*3*batch_index;
  7 |     idx += m*nsample*batch_index;
  8 |     pts_cnt += m*batch_index; // counting how many unique points selected in local region
  9 | 
 10 |     int index = threadIdx.x;
 11 |     int stride = blockDim.x;
 12 |     
 13 |     for (int j=index;j<m;j+=stride) {
 14 |         int cnt = 0;
 15 |         for (int k=0;k<n;++k) {
 16 |             if (cnt == nsample)
 17 |                 break; // only pick the FIRST nsample points in the ball
 18 |             float x2=xyz2[j*3+0];
 19 |             float y2=xyz2[j*3+1];
 20 |             float z2=xyz2[j*3+2];
 21 |             float x1=xyz1[k*3+0];
 22 |             float y1=xyz1[k*3+1];
 23 |             float z1=xyz1[k*3+2];
 24 |     	    float d=max(sqrtf((x2-x1)*(x2-x1)+(y2-y1)*(y2-y1)+(z2-z1)*(z2-z1)),1e-20f);
 25 |             if (d<radius) {
 26 |                 if (cnt==0) { // set ALL indices to k, s.t. if there are less points in ball than nsample, we still have valid (repeating) indices
 27 |                     for (int l=0;l<nsample;++l)
 28 |                         idx[j*nsample+l] = k;
 29 |                 }
 30 |                 idx[j*nsample+cnt] = k;
 31 |                 cnt+=1;
 32 |             }
 33 |         }
 34 |         pts_cnt[j] = cnt;
 35 |     }
 36 | }
 37 | 
 38 | // input: points (b,n,c), idx (b,m,nsample)
 39 | // output: out (b,m,nsample,c)
 40 | __global__ void group_point_gpu(int b, int n, int c, int m, int nsample, const float *points, const int *idx, float *out) {
 41 |     int batch_index = blockIdx.x;
 42 |     points += n*c*batch_index;
 43 |     idx += m*nsample*batch_index;
 44 |     out += m*nsample*c*batch_index;
 45 | 
 46 |     int index = threadIdx.x;
 47 |     int stride = blockDim.x;
 48 |     
 49 |     for (int j=index;j<m;j+=stride) {
 50 |         for (int k=0;k<nsample;++k) {
 51 |             int ii = idx[j*nsample+k];
 52 |             for (int l=0;l<c;++l) {
 53 |                 out[j*nsample*c+k*c+l] = points[ii*c+l];
 54 |             }
 55 |         }
 56 |     }
 57 | }
 58 | 
 59 | // input: grad_out (b,m,nsample,c), idx (b,m,nsample), 
 60 | // output: grad_points (b,n,c)
 61 | __global__ void group_point_grad_gpu(int b, int n, int c, int m, int nsample, const float *grad_out, const int *idx, float *grad_points) {
 62 |     int batch_index = blockIdx.x;
 63 |     idx += m*nsample*batch_index;
 64 |     grad_out += m*nsample*c*batch_index;
 65 |     grad_points += n*c*batch_index;
 66 | 
 67 |     int index = threadIdx.x;
 68 |     int stride = blockDim.x;
 69 | 
 70 |     for (int j=index;j<m;j+=stride) {
 71 |         for (int k=0;k<nsample;++k) {
 72 |             int ii = idx[j*nsample+k];
 73 |             for (int l=0;l<c;++l) {
 74 |                  atomicAdd(&grad_points[ii*c+l], grad_out[j*nsample*c+k*c+l]);
 75 |             }
 76 |         }
 77 |     }
 78 | }
 79 | 
 80 | // input: k (1), distance matrix dist (b,m,n)
 81 | // output: idx (b,m,n), dist_out (b,m,n)
 82 | // only the top k results within n are useful
 83 | __global__ void selection_sort_gpu(int b, int n, int m, int k, const float *dist, int *outi, float *out) {
 84 |     int batch_index = blockIdx.x;
 85 |     dist+=m*n*batch_index;
 86 |     outi+=m*n*batch_index;
 87 |     out+=m*n*batch_index;
 88 | 
 89 |     int index = threadIdx.x;
 90 |     int stride = blockDim.x;
 91 | 
 92 |     // copy from dist to dist_out
 93 |     for (int j=index;j<m;j+=stride) {
 94 |         for (int s=0;s<n;++s) {
 95 |             out[j*n+s] = dist[j*n+s];
 96 |             outi[j*n+s] = s;
 97 |         }
 98 |     }
 99 | 
100 |     float *p_dist;
101 |     for (int j=index;j<m;j+=stride) {
102 |         p_dist = out+j*n;
103 |         // selection sort for the first k elements
104 |         for (int s=0;s<k;++s) {
105 |             int min=s; 
106 |             // find the min
107 |             for (int t=s+1;t<n;++t) {
108 |                 if (p_dist[t]<p_dist[min]) {
109 |                     min = t;
110 |                 }
111 |             }
112 |             // swap min-th and i-th element
113 |             if (min!=s) {
114 |                 float tmp = p_dist[min];
115 |                 p_dist[min] = p_dist[s];
116 |                 p_dist[s] = tmp;
117 |                 int tmpi = outi[j*n+min];
118 |                 outi[j*n+min] = outi[j*n+s];
119 |                 outi[j*n+s] = tmpi;
120 |             }
121 |         }
122 |     }
123 | }
124 | 
125 | void queryBallPointLauncher(int b, int n, int m, float radius, int nsample, const float *xyz1, const float *xyz2, int *idx, int *pts_cnt) {
126 |     query_ball_point_gpu<<<b,256>>>(b,n,m,radius,nsample,xyz1,xyz2,idx,pts_cnt);
127 |     //cudaDeviceSynchronize();
128 | }
129 | void selectionSortLauncher(int b, int n, int m, int k, const float *dist, int *outi, float *out) {
130 |     selection_sort_gpu<<<b,256>>>(b,n,m,k,dist,outi,out); 
131 |     //cudaDeviceSynchronize();
132 | }
133 | void groupPointLauncher(int b, int n, int c, int m, int nsample, const float *points, const int *idx, float *out){
134 |     group_point_gpu<<<b,256>>>(b,n,c,m,nsample,points,idx,out);
135 |     //cudaDeviceSynchronize();
136 | }
137 | void groupPointGradLauncher(int b, int n, int c, int m, int nsample, const float *grad_out, const int *idx, float *grad_points){
138 |     group_point_grad_gpu<<<b,256>>>(b,n,c,m,nsample,grad_out,idx,grad_points);
139 |     //group_point_grad_gpu<<<1,1>>>(b,n,c,m,nsample,grad_out,idx,grad_points);
140 |     //cudaDeviceSynchronize();
141 | }
142 | 


--------------------------------------------------------------------------------
/tf_ops/3d_interpolation/interpolate.cpp:
--------------------------------------------------------------------------------
  1 | #include <cstdio>
  2 | #include <ctime>
  3 | #include <cstring> // memset
  4 | #include <cstdlib> // rand, RAND_MAX
  5 | #include <cmath> // sqrtf
  6 | #include <string>
  7 | #include <vector>
  8 | using namespace std;
  9 | float randomf(){
 10 |     return (rand()+0.5)/(RAND_MAX+1.0);
 11 | }
 12 | static double get_time(){
 13 |     timespec tp;
 14 |     clock_gettime(CLOCK_MONOTONIC,&tp);
 15 |     return tp.tv_sec+tp.tv_nsec*1e-9;
 16 | }
 17 | 
 18 | // Find three nearest neigbors with square distance
 19 | // input: xyz1 (b,n,3), xyz2(b,m,3)
 20 | // output: dist (b,n,3), idx (b,n,3)
 21 | void threenn_cpu(int b, int n, int m, const float *xyz1, const float *xyz2, float *dist, int *idx) {
 22 |      for (int i=0;i<b;++i) {
 23 |         for (int j=0;j<n;++j) {
 24 | 	    float x1=xyz1[j*3+0];
 25 | 	    float y1=xyz1[j*3+1];
 26 | 	    float z1=xyz1[j*3+2];
 27 |             double best1=1e40; double best2=1e40; double best3=1e40;
 28 |             int besti1=0; int besti2=0; int besti3=0;
 29 |             for (int k=0;k<m;++k) {
 30 |                 float x2=xyz2[k*3+0];
 31 | 	        float y2=xyz2[k*3+1];
 32 | 	        float z2=xyz2[k*3+2];
 33 | 		//float d=max(sqrtf((x2-x1)*(x2-x1)+(y2-y1)*(y2-y1)+(z2-z1)*(z2-z1)),1e-20f);
 34 | 		double d=x2*x2+y2*y2+z2*z2;
 35 |                 if (d<best1) {
 36 |                     best3=best2;
 37 |                     besti3=besti2;
 38 |                     best2=best1;
 39 |                     besti2=besti1;
 40 |                     best1=d;
 41 |                     besti1=k;
 42 |                 } else if (d<best2) {
 43 |                     best3=best2;
 44 |                     besti3=besti2;
 45 |                     best2=d;
 46 |                     besti2=k;
 47 |                 } else if (d<best3) {
 48 |                     best3=d;
 49 |                     besti3=k;
 50 |                 }
 51 |             } 
 52 |             dist[j*3]=best1;
 53 |             idx[j*3]=besti1;
 54 |             dist[j*3+1]=best2;
 55 |             idx[j*3+1]=besti2;
 56 |             dist[j*3+2]=best3;
 57 |             idx[j*3+2]=besti3;
 58 |         } 
 59 |         xyz1+=n*3;
 60 |         xyz2+=m*3;
 61 |         dist+=n*3;
 62 |         idx+=n*3;
 63 |     }
 64 | } 
 65 | 
 66 | // CONSTANT WEIGHT TODO
 67 | // input: dist (b,n,3)
 68 | // output: weight (b,n,3)
 69 | void get_weights_cpu(int b, int n, const float *dist, float *weight) {
 70 |     const float w = 1.0/3.0;
 71 |     for (int i=0;i<b;++i) {
 72 |         for (int j=0;j<n;++j) {
 73 |             weight[j*3]=w;
 74 |             weight[j*3+1]=w;
 75 |             weight[j*3+2]=w;
 76 |         } 
 77 |         dist+=n*3;
 78 |         weight+=n*3;
 79 |     }
 80 | }
 81 | 
 82 | // input: points (b,m,c), idx (b,n,3), weight (b,n,3)
 83 | // output: out (b,n,c)
 84 | void interpolate_cpu(int b, int m, int c, int n, const float *points, const int *idx, const float *weight, float *out) {
 85 |      float w1,w2,w3;
 86 |      int i1,i2,i3;
 87 |      for (int i=0;i<b;++i) {
 88 |         for (int j=0;j<n;++j) {
 89 |             w1=weight[j*3];
 90 |             w2=weight[j*3+1];
 91 |             w3=weight[j*3+2]; 
 92 |             i1=idx[j*3];
 93 |             i2=idx[j*3+1];
 94 |             i3=idx[j*3+2];
 95 |             for (int l=0;l<c;++l) {
 96 |                 out[j*c+l] = points[i1*c+l]*w1 + points[i2*c+l]*w2 + points[i3*c+l]*w3;
 97 |             }
 98 |         } 
 99 |         points+=m*c;
100 |         idx+=n*3;
101 |         weight+=n*3;
102 |         out+=n*c;
103 |     }
104 | }
105 | 
106 | // input: grad_out (b,n,c), idx (b,n,3), weight (b,n,3)
107 | // output: grad_points (b,m,c)
108 | void interpolate_grad_cpu(int b, int n, int c, int m, const float *grad_out, const int *idx, const float *weight, float *grad_points) {
109 |      float w1,w2,w3;
110 |      int i1,i2,i3;
111 |      for (int i=0;i<b;++i) {
112 |         for (int j=0;j<n;++j) {
113 |             w1=weight[j*3];
114 |             w2=weight[j*3+1];
115 |             w3=weight[j*3+2]; 
116 |             i1=idx[j*3];
117 |             i2=idx[j*3+1];
118 |             i3=idx[j*3+2];
119 |             for (int l=0;l<c;++l) {
120 |                 grad_points[i1*c+l] += grad_out[j*c+l]*w1;
121 |                 grad_points[i2*c+l] += grad_out[j*c+l]*w2;
122 |                 grad_points[i3*c+l] += grad_out[j*c+l]*w3;
123 |             }
124 |         } 
125 |         grad_out+=n*c;
126 |         idx+=n*3;
127 |         weight+=n*3;
128 |         grad_points+=m*c;
129 |     }
130 | }
131 | 
132 | int main()
133 | {
134 |     int b=32,n=512,m=128,c=64;
135 |     float *xyz1=new float[b*n*3];
136 |     float *xyz2=new float[b*m*3];
137 |     float *dist=new float[b*n*3];
138 |     int *idx=new int[b*n*3];
139 |     memset(idx, 0, sizeof(int)*b*n*3);
140 |     float *weight=new float[b*n*3];
141 |     float *points=new float[b*m*c];
142 |     float *out=new float[b*n*c];
143 |     float *grad_out=new float[b*n*c]; // grad to out
144 |     memset(grad_out, 0.0, sizeof(float)*b*n*c);
145 |     float *grad_points=new float[b*m*c]; // grad to points
146 |     for (int i=0;i<b*n*3;i++)
147 |         xyz1[i]=randomf();
148 |     for (int i=0;i<b*m*3;i++)
149 |         xyz2[i]=randomf();
150 |     for (int i=0;i<b*m*c;i++)
151 |         points[i]=randomf();
152 | 
153 |     double t0=get_time();
154 |     threenn_cpu(b,n,m,xyz1,xyz2,dist,idx);
155 |     printf("threenn cpu time %f\n",get_time()-t0);
156 |     
157 |     t0=get_time();
158 |     get_weights_cpu(b,n,dist,weight);
159 |     printf("get_weights_cpu cpu time %f\n",get_time()-t0);
160 | 
161 |     t0=get_time();
162 |     interpolate_cpu(b,m,c,n,points,idx,weight,out);
163 |     printf("interpolate_cpu cpu time %f\n",get_time()-t0);
164 | 
165 |     t0=get_time();
166 |     interpolate_grad_cpu(b,n,c,m,grad_out,idx,weight,grad_points);
167 |     printf("interpolate_grad_cpu cpu time %f\n",get_time()-t0);
168 |     return 0;
169 | }
170 | 


--------------------------------------------------------------------------------
/utils/show3d_balls.py:
--------------------------------------------------------------------------------
  1 | """ Original Author: Haoqiang Fan """
  2 | import numpy as np
  3 | import ctypes as ct
  4 | import cv2
  5 | import sys
  6 | import os
  7 | from numpy.linalg import norm
  8 | 
  9 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 10 | showsz=800
 11 | mousex,mousey=0.5,0.5
 12 | zoom=1.0
 13 | changed=True
 14 | def onmouse(*args):
 15 |     global mousex,mousey,changed
 16 |     y=args[1]
 17 |     x=args[2]
 18 |     mousex=x/float(showsz)
 19 |     mousey=y/float(showsz)
 20 |     changed=True
 21 | cv2.namedWindow('show3d')
 22 | cv2.moveWindow('show3d',0,0)
 23 | cv2.setMouseCallback('show3d',onmouse)
 24 | 
 25 | dll=np.ctypeslib.load_library(os.path.join(BASE_DIR, 'render_balls_so'),'.')
 26 | 
 27 | 
 28 | def showpoints(xyz,corners,c_gt=None, c_pred = None ,waittime=0,showrot=False,magnifyBlue=0,freezerot=False,background=(0,0,0),normalizecolor=True,ballradius=1,pcd=True):
 29 |     global showsz,mousex,mousey,zoom,changed
 30 |     xyz=xyz-xyz.mean(axis=0)
 31 |     radius=((xyz**2).sum(axis=-1)**0.5).max()
 32 |     xyz/=(radius*2.2)/showsz
 33 |     if c_gt is None:
 34 |         c0=np.zeros((len(xyz),),dtype='float32')+255
 35 |         c1=np.zeros((len(xyz),),dtype='float32')+255
 36 |         c2=np.zeros((len(xyz),),dtype='float32')+255
 37 |     else:
 38 |         c0=c_gt[:,0]
 39 |         c1=c_gt[:,1]
 40 |         c2=c_gt[:,2]
 41 | 
 42 | 
 43 |     if normalizecolor:
 44 |         c0/=(c0.max()+1e-14)/255.0
 45 |         c1/=(c1.max()+1e-14)/255.0
 46 |         c2/=(c2.max()+1e-14)/255.0
 47 | 
 48 | 
 49 |     c0=np.require(c0,'float32','C')
 50 |     c1=np.require(c1,'float32','C')
 51 |     c2=np.require(c2,'float32','C')
 52 | 
 53 |     show=np.zeros((showsz,showsz,3),dtype='uint8')
 54 |     def render():
 55 |         rotmat=np.eye(3)
 56 |         if not freezerot:
 57 |             xangle=(mousey-0.5)*np.pi*1.2
 58 |         else:
 59 |             xangle=0
 60 |         rotmat=rotmat.dot(np.array([
 61 |             [1.0,0.0,0.0],
 62 |             [0.0,np.cos(xangle),-np.sin(xangle)],
 63 |             [0.0,np.sin(xangle),np.cos(xangle)],
 64 |             ]))
 65 |         if not freezerot:
 66 |             yangle=(mousex-0.5)*np.pi*1.2
 67 |         else:
 68 |             yangle=0
 69 |         rotmat=rotmat.dot(np.array([
 70 |             [np.cos(yangle),0.0,-np.sin(yangle)],
 71 |             [0.0,1.0,0.0],
 72 |             [np.sin(yangle),0.0,np.cos(yangle)],
 73 |             ]))
 74 |         rotmat*=zoom
 75 |         nxyz=xyz.dot(rotmat)+[showsz/2,showsz/2,0]
 76 | 
 77 |         ixyz=nxyz.astype('int32')
 78 |         show[:]=background
 79 |         dll.render_ball(
 80 |             ct.c_int(show.shape[0]),
 81 |             ct.c_int(show.shape[1]),
 82 |             show.ctypes.data_as(ct.c_void_p),
 83 |             ct.c_int(ixyz.shape[0]),
 84 |             ixyz.ctypes.data_as(ct.c_void_p),
 85 |             c0.ctypes.data_as(ct.c_void_p),
 86 |             c1.ctypes.data_as(ct.c_void_p),
 87 |             c2.ctypes.data_as(ct.c_void_p),
 88 |             ct.c_int(ballradius)
 89 |         )
 90 | 
 91 |         if magnifyBlue>0:
 92 |             show[:,:,0]=np.maximum(show[:,:,0],np.roll(show[:,:,0],1,axis=0))
 93 |             if magnifyBlue>=2:
 94 |                 show[:,:,0]=np.maximum(show[:,:,0],np.roll(show[:,:,0],-1,axis=0))
 95 |             show[:,:,0]=np.maximum(show[:,:,0],np.roll(show[:,:,0],1,axis=1))
 96 |             if magnifyBlue>=2:
 97 |                 show[:,:,0]=np.maximum(show[:,:,0],np.roll(show[:,:,0],-1,axis=1))
 98 |         if showrot:
 99 |             cv2.putText(show,'xangle %d'%(int(xangle/np.pi*180)),(30,showsz-30),0,0.5,cv2.cv.CV_RGB(255,0,0))
100 |             cv2.putText(show,'yangle %d'%(int(yangle/np.pi*180)),(30,showsz-50),0,0.5,cv2.cv.CV_RGB(255,0,0))
101 |             cv2.putText(show,'zoom %d%%'%(int(zoom*100)),(30,showsz-70),0,0.5,cv2.cv.CV_RGB(255,0,0))
102 |     changed=True
103 |     while pcd:
104 |         if changed:
105 |             render()
106 |             changed=False
107 |         # draw ground floor in green
108 |         #cv2.drawContours(show, [corners[:4]],-1,(0,255,0),-3)
109 |         #for i,j in zip(range(4),range(4,8)):
110 |         #   cv2.line(show, tuple(corners[i]), tuple(corners[j]),(255),3)
111 |         #cv2.drawContours(show, [corners[4:]],-1,(0,0,255),3)
112 |         cv2.imshow('show3d',show)
113 |         # Draw a diagonal blue line with thickness of 5 px
114 | 
115 |         if waittime==0:
116 |             cmd=cv2.waitKey(10)%256
117 |         else:
118 |             cmd=cv2.waitKey(waittime)%256
119 |         
120 |         if cmd==ord('q'):
121 |             break
122 |         elif cmd==ord('Q'):
123 |             sys.exit(0)
124 |         
125 |         if cmd==ord('t') or cmd == ord('p'):
126 |             '''
127 |             if cmd == ord('t'):
128 |                 if c_gt is None:
129 |                     c0=np.zeros((len(xyz),),dtype='float32')+255
130 |                     c1=np.zeros((len(xyz),),dtype='float32')+255
131 |                     c2=np.zeros((len(xyz),),dtype='float32')+255
132 |                 else:
133 |                     c0=c_gt[:,0]
134 |                     c1=c_gt[:,1]
135 |                     c2=c_gt[:,2]
136 |             else:
137 |                 if c_pred is None:
138 |                     c0=np.zeros((len(xyz),),dtype='float32')+255
139 |                     c1=np.zeros((len(xyz),),dtype='float32')+255
140 |                     c2=np.zeros((len(xyz),),dtype='float32')+255
141 |                 else:
142 |                     c0=c_pred[:,0]
143 |                     c1=c_pred[:,1]
144 |                     c2=c_pred[:,2]
145 |             if normalizecolor:
146 |                 c0/=(c0.max()+1e-14)/255.0
147 |                 c1/=(c1.max()+1e-14)/255.0
148 |                 c2/=(c2.max()+1e-14)/255.0
149 |             '''
150 |             c0=np.require(c0,'float32','C')
151 |             c1=np.require(c1,'float32','C')
152 |             c2=np.require(c2,'float32','C')
153 |             changed = False
154 |             pcd = False
155 | 
156 |         '''
157 |         if cmd==ord('n'):
158 |             zoom*=1.1
159 |             changed=True
160 |         elif cmd==ord('m'):
161 |             zoom/=1.1
162 |             changed=True
163 |         elif cmd==ord('r'):
164 |             zoom=1.0
165 |             changed=True
166 |         elif cmd==ord('s'):
167 |             cv2.imwrite('show3d.png',show)
168 |         if waittime!=0:
169 |             break
170 |         '''
171 |     return cmd
172 | if __name__=='__main__':
173 |     np.random.seed(100)
174 |     showpoints(np.random.randn(2500,3))
175 | 
176 | 


--------------------------------------------------------------------------------
/utils/show3d_balls.py~:
--------------------------------------------------------------------------------
  1 | """ Original Author: Haoqiang Fan """
  2 | import numpy as np
  3 | import ctypes as ct
  4 | import cv2
  5 | import sys
  6 | import os
  7 | from numpy.linalg import norm
  8 | 
  9 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 10 | showsz=800
 11 | mousex,mousey=0.5,0.5
 12 | zoom=1.0
 13 | changed=True
 14 | def onmouse(*args):
 15 |     global mousex,mousey,changed
 16 |     y=args[1]
 17 |     x=args[2]
 18 |     mousex=x/float(showsz)
 19 |     mousey=y/float(showsz)
 20 |     changed=True
 21 | cv2.namedWindow('show3d')
 22 | cv2.moveWindow('show3d',0,0)
 23 | cv2.setMouseCallback('show3d',onmouse)
 24 | 
 25 | dll=np.ctypeslib.load_library(os.path.join(BASE_DIR, 'render_balls_so'),'.')
 26 | 
 27 | 
 28 | def showpoints(xyz,corners,c_gt=None, c_pred = None ,waittime=0,showrot=False,magnifyBlue=0,freezerot=False,background=(0,0,0),normalizecolor=True,ballradius=1,pcd=True):
 29 |     global showsz,mousex,mousey,zoom,changed
 30 |     xyz=xyz-xyz.mean(axis=0)
 31 |     radius=((xyz**2).sum(axis=-1)**0.5).max()
 32 |     xyz/=(radius*2.2)/showsz
 33 |     if c_gt is None:
 34 |         c0=np.zeros((len(xyz),),dtype='float32')+255
 35 |         c1=np.zeros((len(xyz),),dtype='float32')+255
 36 |         c2=np.zeros((len(xyz),),dtype='float32')+255
 37 |     else:
 38 |         c0=c_gt[:,0]
 39 |         c1=c_gt[:,1]
 40 |         c2=c_gt[:,2]
 41 | 
 42 | 
 43 |     if normalizecolor:
 44 |         c0/=(c0.max()+1e-14)/255.0
 45 |         c1/=(c1.max()+1e-14)/255.0
 46 |         c2/=(c2.max()+1e-14)/255.0
 47 | 
 48 | 
 49 |     c0=np.require(c0,'float32','C')
 50 |     c1=np.require(c1,'float32','C')
 51 |     c2=np.require(c2,'float32','C')
 52 | 
 53 |     show=np.zeros((showsz,showsz,3),dtype='uint8')
 54 |     def render():
 55 |         rotmat=np.eye(3)
 56 |         if not freezerot:
 57 |             xangle=(mousey-0.5)*np.pi*1.2
 58 |         else:
 59 |             xangle=0
 60 |         rotmat=rotmat.dot(np.array([
 61 |             [1.0,0.0,0.0],
 62 |             [0.0,np.cos(xangle),-np.sin(xangle)],
 63 |             [0.0,np.sin(xangle),np.cos(xangle)],
 64 |             ]))
 65 |         if not freezerot:
 66 |             yangle=(mousex-0.5)*np.pi*1.2
 67 |         else:
 68 |             yangle=0
 69 |         rotmat=rotmat.dot(np.array([
 70 |             [np.cos(yangle),0.0,-np.sin(yangle)],
 71 |             [0.0,1.0,0.0],
 72 |             [np.sin(yangle),0.0,np.cos(yangle)],
 73 |             ]))
 74 |         rotmat*=zoom
 75 |         nxyz=xyz.dot(rotmat)+[showsz/2,showsz/2,0]
 76 | 
 77 |         ixyz=nxyz.astype('int32')
 78 |         show[:]=background
 79 |         dll.render_ball(
 80 |             ct.c_int(show.shape[0]),
 81 |             ct.c_int(show.shape[1]),
 82 |             show.ctypes.data_as(ct.c_void_p),
 83 |             ct.c_int(ixyz.shape[0]),
 84 |             ixyz.ctypes.data_as(ct.c_void_p),
 85 |             c0.ctypes.data_as(ct.c_void_p),
 86 |             c1.ctypes.data_as(ct.c_void_p),
 87 |             c2.ctypes.data_as(ct.c_void_p),
 88 |             ct.c_int(ballradius)
 89 |         )
 90 | 
 91 |         if magnifyBlue>0:
 92 |             show[:,:,0]=np.maximum(show[:,:,0],np.roll(show[:,:,0],1,axis=0))
 93 |             if magnifyBlue>=2:
 94 |                 show[:,:,0]=np.maximum(show[:,:,0],np.roll(show[:,:,0],-1,axis=0))
 95 |             show[:,:,0]=np.maximum(show[:,:,0],np.roll(show[:,:,0],1,axis=1))
 96 |             if magnifyBlue>=2:
 97 |                 show[:,:,0]=np.maximum(show[:,:,0],np.roll(show[:,:,0],-1,axis=1))
 98 |         if showrot:
 99 |             cv2.putText(show,'xangle %d'%(int(xangle/np.pi*180)),(30,showsz-30),0,0.5,cv2.cv.CV_RGB(255,0,0))
100 |             cv2.putText(show,'yangle %d'%(int(yangle/np.pi*180)),(30,showsz-50),0,0.5,cv2.cv.CV_RGB(255,0,0))
101 |             cv2.putText(show,'zoom %d%%'%(int(zoom*100)),(30,showsz-70),0,0.5,cv2.cv.CV_RGB(255,0,0))
102 |     changed=True
103 |     while pcd:
104 |         if changed:
105 |             render()
106 |             changed=False
107 |         # draw ground floor in green
108 |         #cv2.drawContours(show, [corners[:4]],-1,(0,255,0),-3)
109 |         for i,j in zip(range(4),range(4,8)):
110 |            cv2.line(show, tuple(corners[i]), tuple(corners[j]),(255),3)
111 |         #cv2.drawContours(show, [corners[4:]],-1,(0,0,255),3)
112 |         cv2.imshow('show3d',show)
113 |         # Draw a diagonal blue line with thickness of 5 px
114 | 
115 |         if waittime==0:
116 |             cmd=cv2.waitKey(10)%256
117 |         else:
118 |             cmd=cv2.waitKey(waittime)%256
119 |         
120 |         if cmd==ord('q'):
121 |             break
122 |         elif cmd==ord('Q'):
123 |             sys.exit(0)
124 |         
125 |         if cmd==ord('t') or cmd == ord('p'):
126 |             '''
127 |             if cmd == ord('t'):
128 |                 if c_gt is None:
129 |                     c0=np.zeros((len(xyz),),dtype='float32')+255
130 |                     c1=np.zeros((len(xyz),),dtype='float32')+255
131 |                     c2=np.zeros((len(xyz),),dtype='float32')+255
132 |                 else:
133 |                     c0=c_gt[:,0]
134 |                     c1=c_gt[:,1]
135 |                     c2=c_gt[:,2]
136 |             else:
137 |                 if c_pred is None:
138 |                     c0=np.zeros((len(xyz),),dtype='float32')+255
139 |                     c1=np.zeros((len(xyz),),dtype='float32')+255
140 |                     c2=np.zeros((len(xyz),),dtype='float32')+255
141 |                 else:
142 |                     c0=c_pred[:,0]
143 |                     c1=c_pred[:,1]
144 |                     c2=c_pred[:,2]
145 |             if normalizecolor:
146 |                 c0/=(c0.max()+1e-14)/255.0
147 |                 c1/=(c1.max()+1e-14)/255.0
148 |                 c2/=(c2.max()+1e-14)/255.0
149 |             '''
150 |             c0=np.require(c0,'float32','C')
151 |             c1=np.require(c1,'float32','C')
152 |             c2=np.require(c2,'float32','C')
153 |             changed = False
154 |             pcd = False
155 | 
156 |         '''
157 |         if cmd==ord('n'):
158 |             zoom*=1.1
159 |             changed=True
160 |         elif cmd==ord('m'):
161 |             zoom/=1.1
162 |             changed=True
163 |         elif cmd==ord('r'):
164 |             zoom=1.0
165 |             changed=True
166 |         elif cmd==ord('s'):
167 |             cv2.imwrite('show3d.png',show)
168 |         if waittime!=0:
169 |             break
170 |         '''
171 |     return cmd
172 | if __name__=='__main__':
173 |     np.random.seed(100)
174 |     showpoints(np.random.randn(2500,3))
175 | 
176 | 


--------------------------------------------------------------------------------
/tf_ops/sampling/tf_sampling_g.cu:
--------------------------------------------------------------------------------
  1 | /* Furthest point sampling GPU implementation
  2 |  * Original author: Haoqiang Fan
  3 |  * Modified by Charles R. Qi
  4 |  * All Rights Reserved. 2017. 
  5 |  */
  6 | 
  7 | __global__ void cumsumKernel(int b,int n,const float * __restrict__ inp,float * __restrict__ out){
  8 |   const int BlockSize=2048;
  9 |   const int paddingLevel=5;
 10 |   __shared__ float buffer4[BlockSize*4];
 11 |   __shared__ float buffer[BlockSize+(BlockSize>>paddingLevel)];
 12 |   for (int i=blockIdx.x;i<b;i+=gridDim.x){
 13 |     float runningsum=0,runningsum2=0;
 14 |     for (int j=0;j<n;j+=BlockSize*4){
 15 |       int n24_i=min(n-j,BlockSize*4);
 16 |       int n24=(n24_i+3)&~3;
 17 |       int n2=n24>>2;
 18 |       for (int k=threadIdx.x*4;k<n24_i;k+=blockDim.x*4){
 19 |         if (k+3<n24_i){
 20 |           float v1=inp[i*n+j+k];
 21 |           float v2=inp[i*n+j+k+1];
 22 |           v2+=v1;
 23 |           float v3=inp[i*n+j+k+2];
 24 |           float v4=inp[i*n+j+k+3];
 25 |           v4+=v3;
 26 |           v3+=v2;
 27 |           v4+=v2;
 28 |           buffer4[k]=v1;
 29 |           buffer4[k+1]=v2;
 30 |           buffer4[k+2]=v3;
 31 |           buffer4[k+3]=v4;
 32 |           buffer[(k>>2)+(k>>(2+paddingLevel))]=v4;
 33 |         }else{
 34 |           float v=0;
 35 |           for (int k2=k;k2<n24_i;k2++){
 36 |             v+=inp[i*n+j+k2];
 37 |             buffer4[k2]=v;
 38 |           }
 39 |           for (int k2=n24_i;k2<n24;k2++){
 40 |             buffer4[k2]=v;
 41 |           }
 42 |           buffer[(k>>2)+(k>>(2+paddingLevel))]=v;
 43 |         }
 44 |       }
 45 |       int u=0;
 46 |       for (;(2<<u)<=n2;u++){
 47 |         __syncthreads();
 48 |         for (int k=threadIdx.x;k<int(n2>>(u+1));k+=blockDim.x){
 49 |           int i1=(((k<<1)+2)<<u)-1;
 50 |           int i2=(((k<<1)+1)<<u)-1;
 51 |           i1+=i1>>paddingLevel;
 52 |           i2+=i2>>paddingLevel;
 53 |           buffer[i1]+=buffer[i2];
 54 |         }
 55 |       }
 56 |       u--;
 57 |       for (;u>=0;u--){
 58 |         __syncthreads();
 59 |         for (int k=threadIdx.x;k<int((n2-(1<<u))>>(u+1));k+=blockDim.x){
 60 |           int i1=(((k<<1)+3)<<u)-1;
 61 |           int i2=(((k<<1)+2)<<u)-1;
 62 |           i1+=i1>>paddingLevel;
 63 |           i2+=i2>>paddingLevel;
 64 |           buffer[i1]+=buffer[i2];
 65 |         }
 66 |       }
 67 |       __syncthreads();
 68 |       for (int k=threadIdx.x*4;k<n24;k+=blockDim.x*4){
 69 |         if (k!=0){
 70 |           int k2=((k>>2)-1)+(((k>>2)-1)>>paddingLevel);
 71 |           buffer4[k]+=buffer[k2];
 72 |           buffer4[k+1]+=buffer[k2];
 73 |           buffer4[k+2]+=buffer[k2];
 74 |           buffer4[k+3]+=buffer[k2];
 75 |         }
 76 |       }
 77 |       __syncthreads();
 78 |       for (int k=threadIdx.x;k<n24_i;k+=blockDim.x){
 79 |         out[i*n+j+k]=buffer4[k]+runningsum;
 80 |       }
 81 |       float t=buffer[(n2-1)+((n2-1)>>paddingLevel)]+runningsum2;
 82 |       float r2=runningsum+t;
 83 |       runningsum2=t-(r2-runningsum);
 84 |       runningsum=r2;
 85 |       __syncthreads();
 86 |     }
 87 |   }
 88 | }
 89 | 
 90 | __global__ void binarysearchKernel(int b,int n,int m,const float * __restrict__ dataset,const float * __restrict__ query, int * __restrict__ result){
 91 |   int base=1;
 92 |   while (base<n)
 93 |     base<<=1;
 94 |   for (int i=blockIdx.x;i<b;i+=gridDim.x){
 95 |     for (int j=blockIdx.y*blockDim.x+threadIdx.x;j<m;j+=blockDim.x*gridDim.y){
 96 |       float q=query[i*m+j]*dataset[i*n+n-1];
 97 |       int r=n-1;
 98 |       for (int k=base;k>=1;k>>=1)
 99 |         if (r>=k && dataset[i*n+r-k]>=q)
100 |           r-=k;
101 |       result[i*m+j]=r;
102 |     }
103 |   }
104 | }
105 | __global__ void farthestpointsamplingKernel(int b,int n,int m,const float * __restrict__ dataset,float * __restrict__ temp,int * __restrict__ idxs){
106 |   if (m<=0)
107 |     return;
108 |   const int BlockSize=512;
109 |   __shared__ float dists[BlockSize];
110 |   __shared__ int dists_i[BlockSize];
111 |   const int BufferSize=3072;
112 |   __shared__ float buf[BufferSize*3];
113 |   for (int i=blockIdx.x;i<b;i+=gridDim.x){
114 |     int old=0;
115 |     if (threadIdx.x==0)
116 |       idxs[i*m+0]=old;
117 |     for (int j=threadIdx.x;j<n;j+=blockDim.x){
118 |       temp[blockIdx.x*n+j]=1e38;
119 |     }
120 |     for (int j=threadIdx.x;j<min(BufferSize,n)*3;j+=blockDim.x){
121 |       buf[j]=dataset[i*n*3+j];
122 |     }
123 |     __syncthreads();
124 |     for (int j=1;j<m;j++){
125 |       int besti=0;
126 |       float best=-1;
127 |       float x1=dataset[i*n*3+old*3+0];
128 |       float y1=dataset[i*n*3+old*3+1];
129 |       float z1=dataset[i*n*3+old*3+2];
130 |       for (int k=threadIdx.x;k<n;k+=blockDim.x){
131 |         float td=temp[blockIdx.x*n+k];
132 |         float x2,y2,z2;
133 |         if (k<BufferSize){
134 |           x2=buf[k*3+0];
135 |           y2=buf[k*3+1];
136 |           z2=buf[k*3+2];
137 |         }else{
138 |           x2=dataset[i*n*3+k*3+0];
139 |           y2=dataset[i*n*3+k*3+1];
140 |           z2=dataset[i*n*3+k*3+2];
141 |         }
142 |         float d=(x2-x1)*(x2-x1)+(y2-y1)*(y2-y1)+(z2-z1)*(z2-z1);
143 |         float d2=min(d,td);
144 |         if (d2!=td)
145 |           temp[blockIdx.x*n+k]=d2;
146 |         if (d2>best){
147 |           best=d2;
148 |           besti=k;
149 |         }
150 |       }
151 |       dists[threadIdx.x]=best;
152 |       dists_i[threadIdx.x]=besti;
153 |       for (int u=0;(1<<u)<blockDim.x;u++){
154 |         __syncthreads();
155 |         if (threadIdx.x<(blockDim.x>>(u+1))){
156 |           int i1=(threadIdx.x*2)<<u;
157 |           int i2=(threadIdx.x*2+1)<<u;
158 |           if (dists[i1]<dists[i2]){
159 |             dists[i1]=dists[i2];
160 |             dists_i[i1]=dists_i[i2];
161 |           }
162 |         }
163 |       }
164 |       __syncthreads();
165 |       old=dists_i[0];
166 |       if (threadIdx.x==0)
167 |         idxs[i*m+j]=old;
168 |     }
169 |   }
170 | }
171 | 
172 | __global__ void gatherpointKernel(int b,int n,int m,const float * __restrict__ inp,const int * __restrict__ idx,float * __restrict__ out){
173 |   for (int i=blockIdx.x;i<b;i+=gridDim.x){
174 |     for (int j=blockIdx.y*blockDim.x+threadIdx.x;j<m;j+=blockDim.x*gridDim.y){
175 |       int a=idx[i*m+j];
176 |       out[(i*m+j)*3+0]=inp[(i*n+a)*3+0];
177 |       out[(i*m+j)*3+1]=inp[(i*n+a)*3+1];
178 |       out[(i*m+j)*3+2]=inp[(i*n+a)*3+2];
179 |     }
180 |   }
181 | }
182 | 
183 | __global__ void scatteraddpointKernel(int b,int n,int m,const float * __restrict__ out_g,const int * __restrict__ idx,float * __restrict__ inp_g){
184 |   for (int i=blockIdx.x;i<b;i+=gridDim.x){
185 |     for (int j=blockIdx.y*blockDim.x+threadIdx.x;j<m;j+=blockDim.x*gridDim.y){
186 |       int a=idx[i*m+j];
187 |       atomicAdd(&inp_g[(i*n+a)*3+0],out_g[(i*m+j)*3+0]);
188 |       atomicAdd(&inp_g[(i*n+a)*3+1],out_g[(i*m+j)*3+1]);
189 |       atomicAdd(&inp_g[(i*n+a)*3+2],out_g[(i*m+j)*3+2]);
190 |     }
191 |   }
192 | }
193 | 
194 | void cumsumLauncher(int b,int n,const float * inp,float * out){
195 |   cumsumKernel<<<32,512>>>(b,n,inp,out);
196 | }
197 | //require b*n working space
198 | void probsampleLauncher(int b,int n,int m,const float * inp_p,const float * inp_r,float * temp,int * out){
199 |   cumsumKernel<<<32,512>>>(b,n,inp_p,temp);
200 |   binarysearchKernel<<<dim3(32,8,1),512>>>(b,n,m,temp,inp_r,out);
201 | }
202 | //require 32*n working space
203 | void farthestpointsamplingLauncher(int b,int n,int m,const float * inp,float * temp,int * out){
204 |   farthestpointsamplingKernel<<<32,512>>>(b,n,m,inp,temp,out);
205 | }
206 | void gatherpointLauncher(int b,int n,int m,const float * inp,const int * idx,float * out){
207 |   gatherpointKernel<<<dim3(2,8,1),512>>>(b,n,m,inp,idx,out);
208 | }
209 | void scatteraddpointLauncher(int b,int n,int m,const float * out_g,const int * idx,float * inp_g){
210 |   scatteraddpointKernel<<<dim3(2,8,1),512>>>(b,n,m,out_g,idx,inp_g);
211 | }
212 | 
213 | 


--------------------------------------------------------------------------------
/tf_ops/sampling/tf_sampling.cpp:
--------------------------------------------------------------------------------
  1 | /* Furthest point sampling
  2 |  * Original author: Haoqiang Fan
  3 |  * Modified by Charles R. Qi
  4 |  * All Rights Reserved. 2017. 
  5 |  */
  6 | #include "tensorflow/core/framework/op.h"
  7 | #include "tensorflow/core/framework/op_kernel.h"
  8 | #include "tensorflow/core/framework/shape_inference.h"
  9 | #include "tensorflow/core/framework/common_shape_fns.h"
 10 | #include <cuda_runtime.h>
 11 | 
 12 | using namespace tensorflow;
 13 | 
 14 | REGISTER_OP("ProbSample")
 15 |   .Input("inp: float32")
 16 |   .Input("inpr: float32")
 17 |   .Output("out: int32")
 18 |   .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
 19 |     ::tensorflow::shape_inference::ShapeHandle dims1; // batch_size * ncategory
 20 |     c->WithRank(c->input(0), 2, &dims1);
 21 |     ::tensorflow::shape_inference::ShapeHandle dims2; // batch_size * npoints
 22 |     c->WithRank(c->input(1), 2, &dims2);
 23 |     // batch_size * npoints
 24 |     ::tensorflow::shape_inference::ShapeHandle output = c->MakeShape({c->Dim(dims2, 0), c->Dim(dims2, 1)});
 25 |     c->set_output(0, output);
 26 |     return Status::OK();
 27 |   });
 28 | REGISTER_OP("FarthestPointSample")
 29 |   .Attr("npoint: int")
 30 |   .Input("inp: float32")
 31 |   .Output("out: int32")
 32 |   .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
 33 |     ::tensorflow::shape_inference::ShapeHandle dims1; // batch_size * npoint * 3
 34 |     c->WithRank(c->input(0), 3, &dims1);
 35 |     int npoint;
 36 |     TF_RETURN_IF_ERROR(c->GetAttr("npoint", &npoint));
 37 |     ::tensorflow::shape_inference::ShapeHandle output = c->MakeShape({c->Dim(dims1, 0), npoint});
 38 |     c->set_output(0, output);
 39 |     return Status::OK();
 40 |   });
 41 | REGISTER_OP("GatherPoint")
 42 |   .Input("inp: float32")
 43 |   .Input("idx: int32")
 44 |   .Output("out: float32")
 45 |   .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
 46 |     ::tensorflow::shape_inference::ShapeHandle dims1; // batch_size * ndataset * 3
 47 |     c->WithRank(c->input(0), 3, &dims1);
 48 |     ::tensorflow::shape_inference::ShapeHandle dims2; // batch_size * npoints
 49 |     c->WithRank(c->input(1), 2, &dims2);
 50 |     // batch_size * npoints * 3
 51 |     ::tensorflow::shape_inference::ShapeHandle output = c->MakeShape({c->Dim(dims1, 0), c->Dim(dims2, 1), c->Dim(dims1, 2)});
 52 |     c->set_output(0, output);
 53 |     return Status::OK();
 54 |   });
 55 | REGISTER_OP("GatherPointGrad")
 56 |   .Input("inp: float32")
 57 |   .Input("idx: int32")
 58 |   .Input("out_g: float32")
 59 |   .Output("inp_g: float32")
 60 |   .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
 61 |     c->set_output(0, c->input(0));
 62 |     return Status::OK();
 63 |   });
 64 | 
 65 | void probsampleLauncher(int b,int n,int m,const float * inp_p,const float * inp_r,float * temp,int * out);
 66 | class ProbSampleGpuOp: public OpKernel{
 67 |   public:
 68 |     explicit ProbSampleGpuOp(OpKernelConstruction* context):OpKernel(context){}
 69 |     void Compute(OpKernelContext * context)override{
 70 |       const Tensor& inp_tensor=context->input(0);
 71 |       const Tensor& inpr_tensor=context->input(1);
 72 |       auto inp_flat=inp_tensor.flat<float>();
 73 |       auto inpr_flat=inpr_tensor.flat<float>();
 74 |       const float * inp=&(inp_flat(0));
 75 |       const float * inpr=&(inpr_flat(0));
 76 |       OP_REQUIRES(context,inp_tensor.dims()==2,errors::InvalidArgument("ProbSample expects (batch_size,num_choices) inp shape"));
 77 |       int b=inp_tensor.shape().dim_size(0);
 78 |       int n=inp_tensor.shape().dim_size(1);
 79 |       OP_REQUIRES(context,inpr_tensor.dims()==2 && inpr_tensor.shape().dim_size(0)==b,errors::InvalidArgument("ProbSample expects (batch_size,num_points) inpr shape"));
 80 |       int m=inpr_tensor.shape().dim_size(1);
 81 |       Tensor * out_tensor=NULL;
 82 |       OP_REQUIRES_OK(context,context->allocate_output(0,TensorShape{b,m},&out_tensor));
 83 |       auto out_flat=out_tensor->flat<int>();
 84 |       int * out=&(out_flat(0));
 85 |       Tensor temp_tensor;
 86 |       OP_REQUIRES_OK(context,context->allocate_temp(DataTypeToEnum<float>::value,TensorShape{b,n},&temp_tensor));
 87 |       auto temp_flat=temp_tensor.flat<float>();
 88 |       float * temp=&(temp_flat(0));
 89 |       probsampleLauncher(b,n,m,inp,inpr,temp,out);
 90 |     }
 91 | };
 92 | REGISTER_KERNEL_BUILDER(Name("ProbSample").Device(DEVICE_GPU), ProbSampleGpuOp);
 93 | 
 94 | void farthestpointsamplingLauncher(int b,int n,int m,const float * inp,float * temp,int * out);
 95 | class FarthestPointSampleGpuOp: public OpKernel{
 96 |   public:
 97 |     explicit FarthestPointSampleGpuOp(OpKernelConstruction* context):OpKernel(context) {
 98 |                     OP_REQUIRES_OK(context, context->GetAttr("npoint", &npoint_));
 99 |                     OP_REQUIRES(context, npoint_ > 0, errors::InvalidArgument("FarthestPointSample expects positive npoint"));
100 |                 }
101 |     void Compute(OpKernelContext * context)override{
102 |       int m = npoint_;
103 | 
104 |       const Tensor& inp_tensor=context->input(0);
105 |       OP_REQUIRES(context,inp_tensor.dims()==3 && inp_tensor.shape().dim_size(2)==3,errors::InvalidArgument("FarthestPointSample expects (batch_size,num_points,3) inp shape"));
106 |       int b=inp_tensor.shape().dim_size(0);
107 |       int n=inp_tensor.shape().dim_size(1);
108 |       auto inp_flat=inp_tensor.flat<float>();
109 |       const float * inp=&(inp_flat(0));
110 |       Tensor * out_tensor;
111 |       OP_REQUIRES_OK(context,context->allocate_output(0,TensorShape{b,m},&out_tensor));
112 |       auto out_flat=out_tensor->flat<int>();
113 |       int * out=&(out_flat(0));
114 |       Tensor temp_tensor;
115 |       OP_REQUIRES_OK(context,context->allocate_temp(DataTypeToEnum<float>::value,TensorShape{32,n},&temp_tensor));
116 |       auto temp_flat=temp_tensor.flat<float>();
117 |       float * temp=&(temp_flat(0));
118 |       farthestpointsamplingLauncher(b,n,m,inp,temp,out);
119 |     }
120 |     private:
121 |         int npoint_;
122 | };
123 | REGISTER_KERNEL_BUILDER(Name("FarthestPointSample").Device(DEVICE_GPU),FarthestPointSampleGpuOp);
124 | 
125 | void gatherpointLauncher(int b,int n,int m,const float * inp,const int * idx,float * out);
126 | class GatherPointGpuOp: public OpKernel{
127 |   public:
128 |     explicit GatherPointGpuOp(OpKernelConstruction * context):OpKernel(context){}
129 |     void Compute(OpKernelContext * context)override{
130 |       const Tensor& inp_tensor=context->input(0);
131 |       OP_REQUIRES(context,inp_tensor.dims()==3 && inp_tensor.shape().dim_size(2)==3,errors::InvalidArgument("GatherPoint expects (batch_size,num_points,3) inp shape"));
132 |       int b=inp_tensor.shape().dim_size(0);
133 |       int n=inp_tensor.shape().dim_size(1);
134 |       const Tensor& idx_tensor=context->input(1);
135 |       OP_REQUIRES(context,idx_tensor.dims()==2 && idx_tensor.shape().dim_size(0)==b,errors::InvalidArgument("GatherPoint expects (batch_size,num_result) idx shape"));
136 |       int m=idx_tensor.shape().dim_size(1);
137 |       auto inp_flat=inp_tensor.flat<float>();
138 |       const float * inp=&(inp_flat(0));
139 |       auto idx_flat=idx_tensor.flat<int>();
140 |       const int * idx=&(idx_flat(0));
141 |       Tensor * out_tensor=NULL;
142 |       OP_REQUIRES_OK(context,context->allocate_output(0,TensorShape{b,m,3},&out_tensor));
143 |       auto out_flat=out_tensor->flat<float>();
144 |       float * out=&(out_flat(0));
145 |       gatherpointLauncher(b,n,m,inp,idx,out);
146 |     }
147 | };
148 | REGISTER_KERNEL_BUILDER(Name("GatherPoint").Device(DEVICE_GPU),GatherPointGpuOp);
149 | 
150 | void scatteraddpointLauncher(int b,int n,int m,const float * out_g,const int * idx,float * inp_g);
151 | class GatherPointGradGpuOp: public OpKernel{
152 |   public:
153 |     explicit GatherPointGradGpuOp(OpKernelConstruction * context):OpKernel(context){}
154 |     void Compute(OpKernelContext * context)override{
155 |       const Tensor& inp_tensor=context->input(0);
156 |       OP_REQUIRES(context,inp_tensor.dims()==3 && inp_tensor.shape().dim_size(2)==3,errors::InvalidArgument("GatherPointGradGpuOp expects (batch_size,num_points,3) inp"));
157 |       int b=inp_tensor.shape().dim_size(0);
158 |       int n=inp_tensor.shape().dim_size(1);
159 |       const Tensor& idx_tensor=context->input(1);
160 |       OP_REQUIRES(context,idx_tensor.dims()==2 && idx_tensor.shape().dim_size(0)==b,errors::InvalidArgument("GatherPointGradGpuOp expects (batch_size,num_result) idx shape"));
161 |       int m=idx_tensor.shape().dim_size(1);
162 |       auto inp_flat=inp_tensor.flat<float>();
163 |       const float * inp=&(inp_flat(0));
164 |       auto idx_flat=idx_tensor.flat<int>();
165 |       const int * idx=&(idx_flat(0));
166 |       const Tensor& out_g_tensor=context->input(2);
167 |       OP_REQUIRES(context,out_g_tensor.dims()==3 && out_g_tensor.shape().dim_size(0)==b && out_g_tensor.shape().dim_size(1)==m && out_g_tensor.shape().dim_size(2)==3,errors::InvalidArgument("GatherPointGradGpuOp expects (batch_size,num_result,3) out_g shape"));
168 |       auto out_g_flat=out_g_tensor.flat<float>();
169 |       const float * out_g=&(out_g_flat(0));
170 |       Tensor * inp_g_tensor=NULL;
171 |       OP_REQUIRES_OK(context,context->allocate_output(0,TensorShape{b,n,3},&inp_g_tensor));
172 |       auto inp_g_flat=inp_g_tensor->flat<float>();
173 |       float * inp_g=&(inp_g_flat(0));
174 |       cudaMemset(inp_g,0,b*n*3*4);
175 |       scatteraddpointLauncher(b,n,m,out_g,idx,inp_g);
176 |     }
177 | };
178 | REGISTER_KERNEL_BUILDER(Name("GatherPointGrad").Device(DEVICE_GPU),GatherPointGradGpuOp);
179 | 
180 | 


--------------------------------------------------------------------------------
/models/frustum_pointnets_v1.py:
--------------------------------------------------------------------------------
  1 | ''' Frsutum PointNets v1 Model.
  2 | '''
  3 | from __future__ import print_function
  4 | 
  5 | import sys
  6 | import os
  7 | import tensorflow as tf
  8 | import numpy as np
  9 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 10 | ROOT_DIR = os.path.dirname(BASE_DIR)
 11 | sys.path.append(BASE_DIR)
 12 | sys.path.append(os.path.join(ROOT_DIR, 'utils'))
 13 | import tf_util
 14 | from model_util import NUM_HEADING_BIN, NUM_SIZE_CLUSTER, NUM_OBJECT_POINT
 15 | from model_util import point_cloud_masking, get_center_regression_net
 16 | from model_util import placeholder_inputs, parse_output_to_tensors, get_loss
 17 | 
 18 | def get_instance_seg_v1_net(point_cloud, one_hot_vec,
 19 |                             is_training, bn_decay, end_points):
 20 |     ''' 3D instance segmentation PointNet v1 network.
 21 |     Input:
 22 |         point_cloud: TF tensor in shape (B,N,4)
 23 |             frustum point clouds with XYZ and intensity in point channels
 24 |             XYZs are in frustum coordinate
 25 |         one_hot_vec: TF tensor in shape (B,3)
 26 |             length-3 vectors indicating predicted object type
 27 |         is_training: TF boolean scalar
 28 |         bn_decay: TF float scalar
 29 |         end_points: dict
 30 |     Output:
 31 |         logits: TF tensor in shape (B,N,2), scores for bkg/clutter and object
 32 |         end_points: dict
 33 |     '''
 34 |     batch_size = point_cloud.get_shape()[0].value
 35 |     num_point = point_cloud.get_shape()[1].value
 36 | 
 37 |     net = tf.expand_dims(point_cloud, 2)
 38 | 
 39 |     net = tf_util.conv2d(net, 64, [1,1],
 40 |                          padding='VALID', stride=[1,1],
 41 |                          bn=True, is_training=is_training,
 42 |                          scope='conv1', bn_decay=bn_decay)
 43 |     net = tf_util.conv2d(net, 64, [1,1],
 44 |                          padding='VALID', stride=[1,1],
 45 |                          bn=True, is_training=is_training,
 46 |                          scope='conv2', bn_decay=bn_decay)
 47 |     point_feat = tf_util.conv2d(net, 64, [1,1],
 48 |                          padding='VALID', stride=[1,1],
 49 |                          bn=True, is_training=is_training,
 50 |                          scope='conv3', bn_decay=bn_decay)
 51 |     net = tf_util.conv2d(point_feat, 128, [1,1],
 52 |                          padding='VALID', stride=[1,1],
 53 |                          bn=True, is_training=is_training,
 54 |                          scope='conv4', bn_decay=bn_decay)
 55 |     net = tf_util.conv2d(net, 1024, [1,1],
 56 |                          padding='VALID', stride=[1,1],
 57 |                          bn=True, is_training=is_training,
 58 |                          scope='conv5', bn_decay=bn_decay)
 59 |     global_feat = tf_util.max_pool2d(net, [num_point,1],
 60 |                                      padding='VALID', scope='maxpool')
 61 | 
 62 |     global_feat = tf.concat([global_feat, tf.expand_dims(tf.expand_dims(one_hot_vec, 1), 1)], axis=3)
 63 |     global_feat_expand = tf.tile(global_feat, [1, num_point, 1, 1])
 64 |     concat_feat = tf.concat(axis=3, values=[point_feat, global_feat_expand])
 65 | 
 66 |     net = tf_util.conv2d(concat_feat, 512, [1,1],
 67 |                          padding='VALID', stride=[1,1],
 68 |                          bn=True, is_training=is_training,
 69 |                          scope='conv6', bn_decay=bn_decay)
 70 |     net = tf_util.conv2d(net, 256, [1,1],
 71 |                          padding='VALID', stride=[1,1],
 72 |                          bn=True, is_training=is_training,
 73 |                          scope='conv7', bn_decay=bn_decay)
 74 |     net = tf_util.conv2d(net, 128, [1,1],
 75 |                          padding='VALID', stride=[1,1],
 76 |                          bn=True, is_training=is_training,
 77 |                          scope='conv8', bn_decay=bn_decay)
 78 |     net = tf_util.conv2d(net, 128, [1,1],
 79 |                          padding='VALID', stride=[1,1],
 80 |                          bn=True, is_training=is_training,
 81 |                          scope='conv9', bn_decay=bn_decay)
 82 |     net = tf_util.dropout(net, is_training, 'dp1', keep_prob=0.5)
 83 | 
 84 |     logits = tf_util.conv2d(net, 2, [1,1],
 85 |                          padding='VALID', stride=[1,1], activation_fn=None,
 86 |                          scope='conv10')
 87 |     logits = tf.squeeze(logits, [2]) # BxNxC
 88 |     return logits, end_points
 89 |  
 90 | 
 91 | def get_3d_box_estimation_v1_net(object_point_cloud, one_hot_vec,
 92 |                                  is_training, bn_decay, end_points):
 93 |     ''' 3D Box Estimation PointNet v1 network.
 94 |     Input:
 95 |         object_point_cloud: TF tensor in shape (B,M,C)
 96 |             point clouds in object coordinate
 97 |         one_hot_vec: TF tensor in shape (B,3)
 98 |             length-3 vectors indicating predicted object type
 99 |     Output:
100 |         output: TF tensor in shape (B,3+NUM_HEADING_BIN*2+NUM_SIZE_CLUSTER*4)
101 |             including box centers, heading bin class scores and residuals,
102 |             and size cluster scores and residuals
103 |     ''' 
104 |     num_point = object_point_cloud.get_shape()[1].value
105 |     net = tf.expand_dims(object_point_cloud, 2)
106 |     net = tf_util.conv2d(net, 128, [1,1],
107 |                          padding='VALID', stride=[1,1],
108 |                          bn=True, is_training=is_training,
109 |                          scope='conv-reg1', bn_decay=bn_decay)
110 |     net = tf_util.conv2d(net, 128, [1,1],
111 |                          padding='VALID', stride=[1,1],
112 |                          bn=True, is_training=is_training,
113 |                          scope='conv-reg2', bn_decay=bn_decay)
114 |     net = tf_util.conv2d(net, 256, [1,1],
115 |                          padding='VALID', stride=[1,1],
116 |                          bn=True, is_training=is_training,
117 |                          scope='conv-reg3', bn_decay=bn_decay)
118 |     net = tf_util.conv2d(net, 512, [1,1],
119 |                          padding='VALID', stride=[1,1],
120 |                          bn=True, is_training=is_training,
121 |                          scope='conv-reg4', bn_decay=bn_decay)
122 |     net = tf_util.max_pool2d(net, [num_point,1],
123 |         padding='VALID', scope='maxpool2')
124 |     net = tf.squeeze(net, axis=[1,2])
125 |     net = tf.concat([net, one_hot_vec], axis=1)
126 |     net = tf_util.fully_connected(net, 512, scope='fc1', bn=True,
127 |         is_training=is_training, bn_decay=bn_decay)
128 |     net = tf_util.fully_connected(net, 256, scope='fc2', bn=True,
129 |         is_training=is_training, bn_decay=bn_decay)
130 | 
131 |     # The first 3 numbers: box center coordinates (cx,cy,cz),
132 |     # the next NUM_HEADING_BIN*2:  heading bin class scores and bin residuals
133 |     # next NUM_SIZE_CLUSTER*4: box cluster scores and residuals
134 |     output = tf_util.fully_connected(net,
135 |         3+NUM_HEADING_BIN*2+NUM_SIZE_CLUSTER*4, activation_fn=None, scope='fc3')
136 |     return output, end_points
137 | 
138 | 
139 | def get_model(point_cloud, one_hot_vec, is_training, bn_decay=None):
140 |     ''' Frustum PointNets model. The model predict 3D object masks and
141 |     amodel bounding boxes for objects in frustum point clouds.
142 | 
143 |     Input:
144 |         point_cloud: TF tensor in shape (B,N,4)
145 |             frustum point clouds with XYZ and intensity in point channels
146 |             XYZs are in frustum coordinate
147 |         one_hot_vec: TF tensor in shape (B,3)
148 |             length-3 vectors indicating predicted object type
149 |         is_training: TF boolean scalar
150 |         bn_decay: TF float scalar
151 |     Output:
152 |         end_points: dict (map from name strings to TF tensors)
153 |     '''
154 |     end_points = {}
155 |     
156 |     # 3D Instance Segmentation PointNet
157 |     logits, end_points = get_instance_seg_v1_net(\
158 |         point_cloud, one_hot_vec,
159 |         is_training, bn_decay, end_points)
160 |     end_points['mask_logits'] = logits
161 | 
162 |     # Masking
163 |     # select masked points and translate to masked points' centroid
164 |     object_point_cloud_xyz, mask_xyz_mean, end_points = \
165 |         point_cloud_masking(point_cloud, logits, end_points)
166 | 
167 |     # T-Net and coordinate translation
168 |     center_delta, end_points = get_center_regression_net(\
169 |         object_point_cloud_xyz, one_hot_vec,
170 |         is_training, bn_decay, end_points)
171 |     stage1_center = center_delta + mask_xyz_mean # Bx3
172 |     end_points['stage1_center'] = stage1_center
173 |     # Get object point cloud in object coordinate
174 |     object_point_cloud_xyz_new = \
175 |         object_point_cloud_xyz - tf.expand_dims(center_delta, 1)
176 | 
177 |     # Amodel Box Estimation PointNet
178 |     output, end_points = get_3d_box_estimation_v1_net(\
179 |         object_point_cloud_xyz_new, one_hot_vec,
180 |         is_training, bn_decay, end_points)
181 | 
182 |     # Parse output to 3D box parameters
183 |     end_points = parse_output_to_tensors(output, end_points)
184 |     end_points['center'] = end_points['center_boxnet'] + stage1_center # Bx3
185 | 
186 |     return end_points
187 | 
188 | if __name__=='__main__':
189 |     with tf.Graph().as_default():
190 |         inputs = tf.zeros((32,1024,4))
191 |         outputs = get_model(inputs, tf.ones((32,3)), tf.constant(True))
192 |         for key in outputs:
193 |             print((key, outputs[key]))
194 |         loss = get_loss(tf.zeros((32,1024),dtype=tf.int32),
195 |             tf.zeros((32,3)), tf.zeros((32,),dtype=tf.int32),
196 |             tf.zeros((32,)), tf.zeros((32,),dtype=tf.int32),
197 |             tf.zeros((32,3)), outputs)
198 |         print(loss)
199 | 


--------------------------------------------------------------------------------
/models/pointnet2_cls_joint.py:
--------------------------------------------------------------------------------
  1 | """
  2 |     PointNet++ Model for point clouds pose detection
  3 | """
  4 | 
  5 | import os
  6 | import sys
  7 | import math
  8 | BASE_DIR = os.path.dirname(__file__)
  9 | sys.path.append(BASE_DIR)
 10 | ROOT_DIR = os.path.dirname(BASE_DIR)
 11 | sys.path.append(os.path.join(BASE_DIR, '../utils'))
 12 | sys.path.append(os.path.join(ROOT_DIR, 'tf_ops/grouping'))
 13 | from tf_grouping import query_ball_point, group_point, knn_point
 14 | import tensorflow as tf
 15 | import numpy as np
 16 | import tf_util
 17 | import modelnet40_dataset_orig
 18 | from pointnet_util_edge_only import pointnet_sa_module#
 19 | from sklearn.metrics import mean_squared_error
 20 | from eul2rot import euler2rotm
 21 | from sklearn.neighbors import NearestNeighbors
 22 | 
 23 | 
 24 | # map axes strings to/from tuples of inner axis, parity, repetition, frame
 25 | _AXES2TUPLE = {
 26 |     'sxyz': (0, 0, 0, 0), 'sxyx': (0, 0, 1, 0), 'sxzy': (0, 1, 0, 0),
 27 |     'sxzx': (0, 1, 1, 0), 'syzx': (1, 0, 0, 0), 'syzy': (1, 0, 1, 0),
 28 |     'syxz': (1, 1, 0, 0), 'syxy': (1, 1, 1, 0), 'szxy': (2, 0, 0, 0),
 29 |     'szxz': (2, 0, 1, 0), 'szyx': (2, 1, 0, 0), 'szyz': (2, 1, 1, 0),
 30 |     'rzyx': (0, 0, 0, 1), 'rxyx': (0, 0, 1, 1), 'ryzx': (0, 1, 0, 1),
 31 |     'rxzx': (0, 1, 1, 1), 'rxzy': (1, 0, 0, 1), 'ryzy': (1, 0, 1, 1),
 32 |     'rzxy': (1, 1, 0, 1), 'ryxy': (1, 1, 1, 1), 'ryxz': (2, 0, 0, 1),
 33 |     'rzxz': (2, 0, 1, 1), 'rxyz': (2, 1, 0, 1), 'rzyz': (2, 1, 1, 1)}
 34 | 
 35 | _NEXT_AXIS = [1, 2, 0, 1]
 36 | 
 37 | def placeholder_inputs_class(batch_size, num_point):
 38 |     pointclouds_pl = tf.placeholder(tf.float32, shape=(batch_size, num_point, 4))
 39 |     labels_pl_class = tf.placeholder(tf.int32, shape=(batch_size))
 40 |     return pointclouds_pl,labels_pl_class
 41 | 
 42 | def placeholder_inputs_pose(batch_size, num_point):
 43 |     labels_pl_pose = tf.placeholder(tf.float32, shape=(batch_size,7))
 44 |     return labels_pl_pose
 45 | 
 46 | def placeholder_inputs_anchor(batch_size, num_point):
 47 |     labels_pl_anchor = tf.placeholder(tf.int32, shape=(batch_size))
 48 |     return labels_pl_anchor
 49 | 
 50 | 
 51 | def get_corners(batch_size,bb_param):
 52 | 	#bb_param (batchsize, w, l, h, x, y ,z) Nx6
 53 | 	corners = tf.placeholder(tf.float32,shape=(batch_size,6,8,3))
 54 | 	'''
 55 | 	corners[:,0,0:3] = [bb_param[:,3]+bb_param[:,0]/2,bb_param[:,4]+bb_param[:,1]/2,bb_param[:,5]-bb_param[:,2]/2]
 56 | 	corners[:,1,0:3] = [bb_param[:,3]+bb_param[:,0]/2,bb_param[:,4]-bb_param[:,1]/2,bb_param[:,5]-bb_param[:,2]/2]
 57 | 	corners[:,2,0:3] = [bb_param[:,3]+bb_param[:,0]/2,bb_param[:,4]+bb_param[:,1]/2,bb_param[:,5]+bb_param[:,2]/2]
 58 | 	corners[:,3,0:3] = [bb_param[:,3]+bb_param[:,0]/2,bb_param[:,4]-bb_param[:,1]/2,bb_param[:,5]+bb_param[:,2]/2]
 59 | 	corners[:,4,0:3] = [bb_param[:,3]-bb_param[:,0]/2,bb_param[:,4]+bb_param[:,1]/2,bb_param[:,5]-bb_param[:,2]/2]
 60 | 	corners[:,5,0:3] = [bb_param[:,3]-bb_param[:,0]/2,bb_param[:,4]-bb_param[:,1]/2,bb_param[:,5]-bb_param[:,2]/2]
 61 | 	corners[:,6,0:3] = [bb_param[:,3]-bb_param[:,0]/2,bb_param[:,4]+bb_param[:,1]/2,bb_param[:,5]+bb_param[:,2]/2]
 62 | 	corners[:,7,0:3] = [bb_param[:,3]-bb_param[:,0]/2,bb_param[:,4]-bb_param[:,1]/2,bb_param[:,5]+bb_param[:,2]/2]
 63 | 	'''
 64 | 	return corners
 65 | 
 66 | def huber_loss(error, delta):
 67 |     abs_error = tf.abs(error)
 68 |     quadratic = tf.minimum(abs_error, delta)
 69 |     linear = (abs_error - quadratic)
 70 |     losses = 0.5 * quadratic**2 + delta * linear
 71 |     return tf.reduce_mean(losses)
 72 | 
 73 | 
 74 | def get_model_class(point_cloud, is_training, bn_decay=None):
 75 |     """ Classification PointNet, input is BxNx3, output Bx40 """
 76 |     batch_size = point_cloud.get_shape()[0].value
 77 |     num_point = point_cloud.get_shape()[1].value
 78 |     end_points = {}
 79 |     l0_xyz = point_cloud
 80 |     l0_points = None
 81 |     end_points['l0_xyz'] = l0_xyz
 82 | 
 83 |     # Set abstraction layers
 84 |     # Note: When using NCHW for layer 2, we see increased GPU memory usage (in TF1.4).
 85 |     # So we only use NCHW for layer 1 until this issue can be resolved.   npoint=512,128 nsample = 32, 64
 86 |     l1_xyz, l1_points, l1_indices = pointnet_sa_module(l0_xyz, l0_points,npoint=512, radius=1, nsample=64, mlp=[64,64,128], mlp2=None, group_all=False, is_training=is_training, bn_decay=bn_decay, scope=('layer1'),scope_reuse = False, use_nchw=True)
 87 |     l2_xyz, l2_points, l2_indices = pointnet_sa_module(l1_xyz, l1_points, npoint=128, radius=1, nsample=32, mlp=[128,128,256], mlp2=None, group_all=False, is_training=is_training, bn_decay=bn_decay, scope=('layer2'),scope_reuse = False)
 88 |     l3_xyz, l3_points, l3_indices = pointnet_sa_module(l2_xyz, l2_points, npoint=None, radius=None, nsample=None, mlp=[256,512,1024], mlp2=None, group_all=True, is_training=is_training, bn_decay=bn_decay, scope=('layer3'),scope_reuse = False)
 89 | 
 90 |     # Fully connected layers for classification
 91 |     net_class = tf.concat([tf.reshape(l3_points, [batch_size, -1]),l0_xyz[:,:,3]],axis=-1)#tf.reshape(l3_points, [batch_size, -1])
 92 |     net_class = tf_util.fully_connected(net_class, 512, bn=True, is_training=is_training, scope='fc1_class', bn_decay=bn_decay)
 93 |     net_class = tf_util.dropout(net_class, keep_prob=0.5, is_training=is_training, scope='dp1_class')
 94 |     net_class = tf_util.fully_connected(net_class, 256, bn=True, is_training=is_training, scope='fc2_class', bn_decay=bn_decay)
 95 |     net_class = tf_util.dropout(net_class, keep_prob=0.5, is_training=is_training, scope='dp2_class')#256
 96 |     net_class = tf_util.fully_connected(net_class, 40, activation_fn=None, scope='fc3_class')
 97 | 
 98 |     # Fully connected layers for center estimation and offset
 99 |     net_pose = tf.reshape(l3_points, [batch_size, -1])
100 |     net_pose = tf_util.fully_connected(net_pose, 512, bn=True, is_training=is_training, scope='fc1_pose', bn_decay=bn_decay)
101 |     net_pose = tf_util.dropout(net_pose, keep_prob=0.5, is_training=is_training, scope='dp1_pose')
102 |     net_pose = tf_util.fully_connected(net_pose, 256, bn=True, is_training=is_training, scope='fc2_pose', bn_decay=bn_decay)
103 |     net_pose = tf_util.dropout(net_pose, keep_prob=0.5, is_training=is_training, scope='dp2_pose')
104 |     net_pose = tf_util.fully_connected(net_pose, 7, activation_fn=None, scope='fc3_pose')
105 | 
106 |     # Fully connected layers for anchor box classification
107 |     net_anchor = tf.reshape(l3_points, [batch_size, -1])
108 |     net_anchor = tf_util.fully_connected(net_anchor, 512, bn=True, is_training=is_training, scope='fc1_anchor', bn_decay=bn_decay)#512
109 |     net_anchor = tf_util.dropout(net_anchor, keep_prob=0.5, is_training=is_training, scope='dp1_anchor')
110 |     net_anchor = tf_util.fully_connected(net_anchor, 256, bn=True, is_training=is_training, scope='fc2_anchor', bn_decay=bn_decay)
111 |     net_anchor = tf_util.dropout(net_anchor, keep_prob=0.5, is_training=is_training, scope='dp2_anchor')#256
112 |     net_anchor = tf_util.fully_connected(net_anchor, 4, activation_fn=None, scope='fc3_anchor')
113 | 
114 | 
115 |     return net_class, net_pose, net_anchor, end_points
116 | 
117 | def get_model_pose(point_cloud, is_training, bn_decay=None):
118 |     """ Classification PointNet, input is BxNx3, output Bx40 """
119 |     batch_size = point_cloud.get_shape()[0].value
120 |     num_point = point_cloud.get_shape()[1].value
121 |     end_points = {}
122 |     l0_xyz = point_cloud
123 |     l0_points = None
124 |     end_points['l0_xyz'] = l0_xyz
125 | 
126 |     # Set abstraction layers
127 |     # Note: When using NCHW for layer 2, we see increased GPU memory usage (in TF1.4).
128 |     # So we only use NCHW for layer 1 until this issue can be resolved.   npoint=512,128 nsample = 32, 64
129 |     l1_xyz, l1_points, l1_indices = pointnet_sa_module(l0_xyz, l0_points, npoint=512, radius=1, nsample=64, mlp=[64,64,128], mlp2=None, group_all=False, is_training=is_training, bn_decay=bn_decay, scope=('layer11'),scope_reuse = False, use_nchw=False)
130 |     l2_xyz, l2_points, l2_indices = pointnet_sa_module(l1_xyz, l1_points, npoint=128, radius=1, nsample=32, mlp=[128,128,256], mlp2=None, group_all=False, is_training=is_training, bn_decay=bn_decay, scope=('layer22'),scope_reuse = False)
131 |     l3_xyz, l3_points, l3_indices = pointnet_sa_module(l2_xyz, l2_points, npoint=None, radius=None, nsample=None, mlp=[256,512,1024], mlp2=None, group_all=True, is_training=is_training, bn_decay=bn_decay, scope=('layer33'),scope_reuse = False)
132 | 
133 |     # Fully connected layers for pose estimation
134 |     net_pose = tf.reshape(l3_points, [batch_size, -1])
135 |     #net_pose = tf_util.fully_connected(net_pose, 1024, bn=True, is_training=is_training, scope='fc1_pose', bn_decay=bn_decay)#512
136 |     #net_pose = tf_util.dropout(net_pose, keep_prob=0.5, is_training=is_training, scope='dp1_pose')
137 |     net_pose = tf_util.fully_connected(net_pose, 512, bn=True, is_training=is_training, scope='fc1_pose', bn_decay=bn_decay)
138 |     net_pose = tf_util.dropout(net_pose, keep_prob=0.5, is_training=is_training, scope='dp1_pose')
139 |     net_pose = tf_util.fully_connected(net_pose, 256, bn=True, is_training=is_training, scope='fc2_pose', bn_decay=bn_decay)
140 |     net_pose = tf_util.dropout(net_pose, keep_prob=0.5, is_training=is_training, scope='dp2_pose')
141 |     net_pose = tf_util.fully_connected(net_pose, 6, activation_fn=None, scope='fc3_pose')
142 | 
143 |     return net_pose, end_points
144 | 
145 | def get_loss_pose(pred_pose, label_pose, end_points,bsize):
146 |     """ pred: B*NUM_CLASSES,
147 |         label: B, """
148 |     # L1 norm
149 | 
150 |     reg_loss = tf.norm(label_pose - pred_pose)
151 |     loss = huber_loss(reg_loss, delta=2.0)
152 |     #tf.summary.scalar('center loss', center_loss)
153 |     #stage1_center_dist = tf.norm(center_label - \
154 |     #    end_points['stage1_center'], axis=-1)
155 |     #stage1_center_loss = huber_loss(stage1_center_dist, delta=1.0)
156 |     #tf.summary.scalar('stage1 center loss', stage1_center_loss)
157 | 
158 | 
159 |     #loss = tf.reduce_mean(tf.square(tf.abs(label_pose-pred_pose)))
160 |     #pred_corners = get_corners(bsize,pred_pose)
161 |     #actual_corners = get_corners(bsize,pred_pose)
162 |     tf.summary.scalar('regression loss', loss)
163 |     tf.add_to_collection('losses_pose', loss)
164 |     return loss
165 | 
166 | def get_loss_class(pred_class, label_class, end_points):
167 |     cls = tf.one_hot(label_class, 40)
168 |     loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=cls, logits=pred_class)) 
169 |     #loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=pred_class, labels=label_class)
170 |     classify_loss = loss#tf.reduce_mean(loss)
171 |     tf.summary.scalar('classify loss', classify_loss)
172 |     tf.add_to_collection('losses_class', classify_loss)
173 |     return classify_loss
174 | 
175 | def get_loss_anchor(pred_anchor, label_anchor):
176 |     loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=pred_anchor, labels=label_anchor)
177 |     anchor_loss = tf.reduce_mean(loss)
178 |     tf.summary.scalar('anchor loss', anchor_loss)
179 |     tf.add_to_collection('losses_anchor', anchor_loss)
180 |     return anchor_loss
181 | 
182 | if __name__=='__main__':
183 |     with tf.Graph().as_default():
184 |         inputs = tf.zeros((32,1024,4))
185 |         output, _ = get_model(inputs, tf.constant(True))
186 | 
187 | 


--------------------------------------------------------------------------------
/tf_ops/grouping/tf_grouping.cpp:
--------------------------------------------------------------------------------
  1 | #include <cstdio>
  2 | #include <ctime>
  3 | #include <cstring> // memset
  4 | #include <cstdlib> // rand, RAND_MAX
  5 | #include <cmath> // sqrtf
  6 | #include "tensorflow/core/framework/op.h"
  7 | #include "tensorflow/core/framework/op_kernel.h"
  8 | #include "tensorflow/core/framework/shape_inference.h"
  9 | #include "tensorflow/core/framework/common_shape_fns.h"
 10 | #include <cuda_runtime.h>
 11 | using namespace tensorflow;
 12 | 
 13 | REGISTER_OP("QueryBallPoint")
 14 |     .Attr("radius: float")
 15 |     .Attr("nsample: int")
 16 |     .Input("xyz1: float32")
 17 |     .Input("xyz2: float32")
 18 |     .Output("idx: int32")
 19 |     .Output("pts_cnt: int32")
 20 |     .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
 21 |         ::tensorflow::shape_inference::ShapeHandle dims2; // batch_size * npoint * 3
 22 |         c->WithRank(c->input(1), 3, &dims2);
 23 |         int nsample;
 24 |         TF_RETURN_IF_ERROR(c->GetAttr("nsample", &nsample));
 25 |         ::tensorflow::shape_inference::ShapeHandle output1 = c->MakeShape({c->Dim(dims2, 0), c->Dim(dims2, 1), nsample});
 26 |         c->set_output(0, output1);
 27 |         ::tensorflow::shape_inference::ShapeHandle output2 = c->MakeShape({c->Dim(dims2, 0), c->Dim(dims2, 1)});
 28 |         c->set_output(1, output2);
 29 |         return Status::OK();
 30 |     });
 31 | REGISTER_OP("SelectionSort")
 32 |     .Attr("k: int")
 33 |     .Input("dist: float32")
 34 |     .Output("outi: int32")
 35 |     .Output("out: float32")
 36 |     .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
 37 |         c->set_output(0, c->input(0));
 38 |         c->set_output(1, c->input(0));
 39 |         return Status::OK();
 40 |     });
 41 | REGISTER_OP("GroupPoint")
 42 |     .Input("points: float32")
 43 |     .Input("idx: int32")
 44 |     .Output("out: float32")
 45 |     .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
 46 |         ::tensorflow::shape_inference::ShapeHandle dims1; // batch_size * ndataset * channels
 47 |         c->WithRank(c->input(0), 3, &dims1);
 48 |         ::tensorflow::shape_inference::ShapeHandle dims2; // batch_size * npoints * nsample
 49 |         c->WithRank(c->input(1), 3, &dims2);
 50 |         // batch_size * npoints * nsample * channels
 51 |         ::tensorflow::shape_inference::ShapeHandle output = c->MakeShape({c->Dim(dims2, 0), c->Dim(dims2, 1), c->Dim(dims2, 2), c->Dim(dims1, 2)});
 52 |         c->set_output(0, output);
 53 |         return Status::OK();
 54 |     });
 55 | REGISTER_OP("GroupPointGrad")
 56 |     .Input("points: float32")
 57 |     .Input("idx: int32")
 58 |     .Input("grad_out: float32")
 59 |     .Output("grad_points: float32")
 60 |     .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
 61 |         c->set_output(0, c->input(0));
 62 |         return Status::OK();
 63 |     });
 64 | 
 65 | 
 66 | void queryBallPointLauncher(int b, int n, int m, float radius, int nsample, const float *xyz1, const float *xyz2, int *idx, int *pts_cnt);
 67 | class QueryBallPointGpuOp : public OpKernel {
 68 |     public:
 69 |         explicit QueryBallPointGpuOp(OpKernelConstruction* context) : OpKernel(context) {
 70 |             OP_REQUIRES_OK(context, context->GetAttr("radius", &radius_));
 71 |             OP_REQUIRES(context, radius_ > 0, errors::InvalidArgument("QueryBallPoint expects positive radius"));
 72 | 
 73 |             OP_REQUIRES_OK(context, context->GetAttr("nsample", &nsample_));
 74 |             OP_REQUIRES(context, nsample_ > 0, errors::InvalidArgument("QueryBallPoint expects positive nsample"));
 75 |         }
 76 | 
 77 |         void Compute(OpKernelContext* context) override {
 78 |             const Tensor& xyz1_tensor = context->input(0);
 79 |             OP_REQUIRES(context, xyz1_tensor.dims()==3 && xyz1_tensor.shape().dim_size(2)==3, errors::InvalidArgument("QueryBallPoint expects (batch_size, ndataset, 3) xyz1 shape."));
 80 |             int b = xyz1_tensor.shape().dim_size(0);
 81 |             int n = xyz1_tensor.shape().dim_size(1);
 82 | 
 83 |             const Tensor& xyz2_tensor = context->input(1);
 84 |             OP_REQUIRES(context, xyz2_tensor.dims()==3 && xyz2_tensor.shape().dim_size(2)==3, errors::InvalidArgument("QueryBallPoint expects (batch_size, npoint, 3) xyz2 shape."));
 85 |             int m = xyz2_tensor.shape().dim_size(1);
 86 | 
 87 |             Tensor *idx_tensor = nullptr;
 88 |             OP_REQUIRES_OK(context, context->allocate_output(0, TensorShape{b,m,nsample_}, &idx_tensor));
 89 |             Tensor *pts_cnt_tensor = nullptr;
 90 |             OP_REQUIRES_OK(context, context->allocate_output(1, TensorShape{b,m}, &pts_cnt_tensor));
 91 | 
 92 |             auto xyz1_flat = xyz1_tensor.flat<float>();
 93 |             const float *xyz1 = &(xyz1_flat(0));
 94 |             auto xyz2_flat = xyz2_tensor.flat<float>();
 95 |             const float *xyz2 = &(xyz2_flat(0));
 96 |             auto idx_flat = idx_tensor->flat<int>();
 97 |             int *idx = &(idx_flat(0));
 98 |             auto pts_cnt_flat = pts_cnt_tensor->flat<int>();
 99 |             int *pts_cnt = &(pts_cnt_flat(0));
100 |             queryBallPointLauncher(b,n,m,radius_,nsample_,xyz1,xyz2,idx,pts_cnt);
101 |         }
102 |     private:
103 |         float radius_;
104 |         int nsample_;
105 | };
106 | REGISTER_KERNEL_BUILDER(Name("QueryBallPoint").Device(DEVICE_GPU), QueryBallPointGpuOp);
107 | 
108 | void selectionSortLauncher(int b, int n, int m, int k, const float *dist, int *outi, float *out);
109 | class SelectionSortGpuOp : public OpKernel {
110 |     public:
111 |         explicit SelectionSortGpuOp(OpKernelConstruction* context) : OpKernel(context) {
112 |             OP_REQUIRES_OK(context, context->GetAttr("k", &k_));
113 |             OP_REQUIRES(context, k_ > 0, errors::InvalidArgument("SelectionSort expects positive k"));
114 |         }
115 | 
116 |         void Compute(OpKernelContext* context) override {
117 |             const Tensor& dist_tensor = context->input(0);
118 |             OP_REQUIRES(context, dist_tensor.dims()==3, errors::InvalidArgument("SelectionSort expects (b,m,n) dist shape."));
119 |             int b = dist_tensor.shape().dim_size(0);
120 |             int m = dist_tensor.shape().dim_size(1);
121 |             int n = dist_tensor.shape().dim_size(2);
122 | 
123 |             Tensor *outi_tensor = nullptr;
124 |             OP_REQUIRES_OK(context, context->allocate_output(0, TensorShape{b,m,n}, &outi_tensor));
125 |             Tensor *out_tensor = nullptr;
126 |             OP_REQUIRES_OK(context, context->allocate_output(1, TensorShape{b,m,n}, &out_tensor));
127 | 
128 |             auto dist_flat = dist_tensor.flat<float>();
129 |             const float *dist = &(dist_flat(0));
130 |             auto outi_flat = outi_tensor->flat<int>();
131 |             int *outi = &(outi_flat(0));
132 |             auto out_flat = out_tensor->flat<float>();
133 |             float *out = &(out_flat(0));
134 |             selectionSortLauncher(b,n,m,k_,dist,outi,out);
135 |         }
136 |     private:
137 |         int k_;
138 | };
139 | REGISTER_KERNEL_BUILDER(Name("SelectionSort").Device(DEVICE_GPU), SelectionSortGpuOp);
140 | 
141 | 
142 | void groupPointLauncher(int b, int n, int c, int m, int nsample, const float *points, const int *idx, float *out);
143 | class GroupPointGpuOp: public OpKernel{
144 |     public:
145 |         explicit GroupPointGpuOp(OpKernelConstruction * context):OpKernel(context){}
146 | 
147 |         void Compute(OpKernelContext * context) override {
148 |             const Tensor& points_tensor=context->input(0);
149 |             OP_REQUIRES(context, points_tensor.dims()==3, errors::InvalidArgument("GroupPoint expects (batch_size, num_points, channel) points shape"));
150 |             int b = points_tensor.shape().dim_size(0);
151 |             int n = points_tensor.shape().dim_size(1);
152 |             int c = points_tensor.shape().dim_size(2);
153 | 
154 |             const Tensor& idx_tensor=context->input(1);
155 |             OP_REQUIRES(context,idx_tensor.dims()==3 && idx_tensor.shape().dim_size(0)==b, errors::InvalidArgument("GroupPoint expects (batch_size, npoints, nsample) idx shape"));
156 |             int m = idx_tensor.shape().dim_size(1);
157 |             int nsample = idx_tensor.shape().dim_size(2);
158 | 
159 |             Tensor * out_tensor = nullptr;
160 |             OP_REQUIRES_OK(context, context->allocate_output(0,TensorShape{b,m,nsample,c}, &out_tensor));
161 | 
162 |             auto points_flat = points_tensor.flat<float>();
163 |             const float *points = &(points_flat(0));
164 |             auto idx_flat = idx_tensor.flat<int>();
165 |             const int *idx = &(idx_flat(0));
166 |             auto out_flat = out_tensor->flat<float>();
167 |             float *out = &(out_flat(0));
168 |             groupPointLauncher(b,n,c,m,nsample,points,idx,out);
169 |         }
170 | };
171 | REGISTER_KERNEL_BUILDER(Name("GroupPoint").Device(DEVICE_GPU),GroupPointGpuOp);
172 | 
173 | void groupPointGradLauncher(int b, int n, int c, int m, int nsample, const float *grad_out, const int *idx, float *grad_points);
174 | class GroupPointGradGpuOp: public OpKernel{
175 |     public:
176 |         explicit GroupPointGradGpuOp(OpKernelConstruction * context):OpKernel(context){}
177 | 
178 |         void Compute(OpKernelContext * context) override {
179 |             const Tensor& points_tensor=context->input(0);
180 |             OP_REQUIRES(context, points_tensor.dims()==3, errors::InvalidArgument("GroupPointGrad expects (batch_size, num_points, channel) points shape"));
181 |             int b = points_tensor.shape().dim_size(0);
182 |             int n = points_tensor.shape().dim_size(1);
183 |             int c = points_tensor.shape().dim_size(2);
184 | 
185 |             const Tensor& idx_tensor=context->input(1);
186 |             OP_REQUIRES(context,idx_tensor.dims()==3 && idx_tensor.shape().dim_size(0)==b, errors::InvalidArgument("GroupPointGrad expects (batch_size, npoints, nsample) idx shape"));
187 |             int m = idx_tensor.shape().dim_size(1);
188 |             int nsample = idx_tensor.shape().dim_size(2);
189 | 
190 |             const Tensor& grad_out_tensor=context->input(2);
191 |             OP_REQUIRES(context,grad_out_tensor.dims()==4 && grad_out_tensor.shape().dim_size(0)==b && grad_out_tensor.shape().dim_size(1)==m && grad_out_tensor.shape().dim_size(2)==nsample && grad_out_tensor.shape().dim_size(3)==c, errors::InvalidArgument("GroupPointGrad expects (batch_size, npoints, nsample, channel) grad_out shape"));
192 | 
193 |             Tensor * grad_points_tensor = nullptr;
194 |             OP_REQUIRES_OK(context, context->allocate_output(0,TensorShape{b,n,c}, &grad_points_tensor));
195 | 
196 |             auto points_flat = points_tensor.flat<float>();
197 |             const float *points = &(points_flat(0));
198 |             auto idx_flat = idx_tensor.flat<int>();
199 |             const int *idx = &(idx_flat(0));
200 |             auto grad_out_flat = grad_out_tensor.flat<float>();
201 |             const float *grad_out = &(grad_out_flat(0));
202 |             auto grad_points_flat = grad_points_tensor->flat<float>();
203 |             float *grad_points = &(grad_points_flat(0));
204 |             cudaMemset(grad_points, 0, sizeof(float)*b*n*c);
205 |             groupPointGradLauncher(b,n,c,m,nsample,grad_out,idx,grad_points);
206 |         }
207 | };
208 | REGISTER_KERNEL_BUILDER(Name("GroupPointGrad").Device(DEVICE_GPU),GroupPointGradGpuOp);
209 | 
210 | 
211 | 


--------------------------------------------------------------------------------
/utils/provider.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import numpy as np
  4 | import h5py
  5 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
  6 | sys.path.append(BASE_DIR)
  7 | 
  8 | def shuffle_data(data, labels):
  9 |     """ Shuffle data and labels.
 10 |         Input:
 11 |           data: B,N,... numpy array
 12 |           label: B,... numpy array
 13 |         Return:
 14 |           shuffled data, label and shuffle indices
 15 |     """
 16 |     idx = np.arange(len(labels))
 17 |     np.random.shuffle(idx)
 18 |     return data[idx, ...], labels[idx], idx
 19 | 
 20 | def shuffle_points(batch_data):
 21 |     """ Shuffle orders of points in each point cloud -- changes FPS behavior.
 22 |         Use the same shuffling idx for the entire batch.
 23 |         Input:
 24 |             BxNxC array
 25 |         Output:
 26 |             BxNxC array
 27 |     """
 28 |     idx = np.arange(batch_data.shape[1])
 29 |     np.random.shuffle(idx)
 30 |     return batch_data[:,idx,:]
 31 | 
 32 | def rotate_point_cloud(batch_data):
 33 |     """ Randomly rotate the point clouds to augument the dataset
 34 |         rotation is per shape based along up direction
 35 |         Input:
 36 |           BxNx3 array, original batch of point clouds
 37 |         Return:
 38 |           BxNx3 array, rotated batch of point clouds
 39 |     """
 40 |     rotated_data = np.zeros(batch_data.shape, dtype=np.float32)
 41 |     for k in range(batch_data.shape[0]):
 42 |         rotation_angle =  np.random.uniform(-1,1) * np.pi #
 43 |         cosval = np.cos(rotation_angle)
 44 |         sinval = np.sin(rotation_angle)
 45 |         rotation_matrix = np.array([[cosval,-sinval,0],
 46 |                                     [sinval,cosval, 0],
 47 |                                     [0, 0, 1]])
 48 |         shape_pc = batch_data[k, ...]
 49 |         rotated_data[k, ...] = np.dot(shape_pc.reshape((-1, 3)), rotation_matrix)
 50 |     return rotated_data
 51 | 
 52 | def rotate_point_cloud_with_label(batch_data,batch_label):
 53 |     """ Randomly rotate the point clouds to augument the dataset
 54 |         rotation is per shape based along up direction
 55 |         Input:
 56 |           BxNx3 array, original batch of point clouds
 57 |         Return:
 58 |           BxNx3 array, rotated batch of point clouds
 59 |     """
 60 |     rotated_data = np.zeros(batch_data.shape, dtype=np.float32)
 61 |     rotated_label = np.zeros(batch_label.shape, dtype=np.float32)
 62 |     k = 0
 63 |     for k in range(0,batch_data.shape[0]):
 64 |         rotation_angle =  np.random.uniform(-1,1) * np.pi #
 65 |         cosval = np.cos(rotation_angle)
 66 |         sinval = np.sin(rotation_angle)
 67 |         rotation_matrix = np.array([[cosval,-sinval,0],
 68 |                                     [sinval,cosval, 0],
 69 |                                     [0, 0, 1]])
 70 |         shape_pc = batch_data[k, ...]
 71 |         rotated_data[k, ...] = np.dot(shape_pc.reshape((-1, 3)), rotation_matrix)
 72 |         rotated_label[k,:] = batch_label[k,:]
 73 |         rotated_label[k,2] = batch_label[k,2]+rotation_angle
 74 |         if(rotated_label[k,2]>3.142):
 75 |            rotated_label[k,2] -= 6.284
 76 |         elif(rotated_label[k,2]<-3.142):
 77 |            rotated_label[k,2] -= -6.284
 78 |     return rotated_data,rotated_label
 79 | 
 80 | 
 81 | def rotate_point_cloud_with_normal(batch_xyz_normal):
 82 |     ''' Randomly rotate XYZ, normal point cloud.
 83 |         Input:
 84 |             batch_xyz_normal: B,N,6, first three channels are XYZ, last 3 all normal
 85 |         Output:
 86 |             B,N,6, rotated XYZ, normal point cloud
 87 |     '''
 88 |     for k in range(batch_xyz_normal.shape[0]):
 89 |         rotation_angle = np.random.uniform() * 2 * np.pi
 90 |         cosval = np.cos(rotation_angle)
 91 |         sinval = np.sin(rotation_angle)
 92 |         rotation_matrix = np.array([[cosval, 0, sinval],
 93 |                                     [0, 1, 0],
 94 |                                     [-sinval, 0, cosval]])
 95 |         shape_pc = batch_xyz_normal[k,:,0:3]
 96 |         shape_normal = batch_xyz_normal[k,:,3:6]
 97 |         batch_xyz_normal[k,:,0:3] = np.dot(shape_pc.reshape((-1, 3)), rotation_matrix)
 98 |         batch_xyz_normal[k,:,3:6] = np.dot(shape_normal.reshape((-1, 3)), rotation_matrix)
 99 |     return batch_xyz_normal
100 | 
101 | def rotate_perturbation_point_cloud_with_normal(batch_data, angle_sigma=0.06, angle_clip=0.18):
102 |     """ Randomly perturb the point clouds by small rotations
103 |         Input:
104 |           BxNx6 array, original batch of point clouds and point normals
105 |         Return:
106 |           BxNx3 array, rotated batch of point clouds
107 |     """
108 |     rotated_data = np.zeros(batch_data.shape, dtype=np.float32)
109 |     for k in range(batch_data.shape[0]):
110 |         angles = np.clip(angle_sigma*np.random.randn(3), -angle_clip, angle_clip)
111 |         Rx = np.array([[1,0,0],
112 |                        [0,np.cos(angles[0]),-np.sin(angles[0])],
113 |                        [0,np.sin(angles[0]),np.cos(angles[0])]])
114 |         Ry = np.array([[np.cos(angles[1]),0,np.sin(angles[1])],
115 |                        [0,1,0],
116 |                        [-np.sin(angles[1]),0,np.cos(angles[1])]])
117 |         Rz = np.array([[np.cos(angles[2]),-np.sin(angles[2]),0],
118 |                        [np.sin(angles[2]),np.cos(angles[2]),0],
119 |                        [0,0,1]])
120 |         R = np.dot(Rz, np.dot(Ry,Rx))
121 |         shape_pc = batch_data[k,:,0:3]
122 |         shape_normal = batch_data[k,:,3:6]
123 |         rotated_data[k,:,0:3] = np.dot(shape_pc.reshape((-1, 3)), R)
124 |         rotated_data[k,:,3:6] = np.dot(shape_normal.reshape((-1, 3)), R)
125 |     return rotated_data
126 | 
127 | 
128 | def rotate_point_cloud_by_angle(batch_data, rotation_angle):
129 |     """ Rotate the point cloud along up direction with certain angle.
130 |         Input:
131 |           BxNx3 array, original batch of point clouds
132 |         Return:
133 |           BxNx3 array, rotated batch of point clouds
134 |     """
135 |     rotated_data = np.zeros(batch_data.shape, dtype=np.float32)
136 |     for k in range(batch_data.shape[0]):
137 |         #rotation_angle = np.random.uniform() * 2 * np.pi
138 |         cosval = np.cos(rotation_angle)
139 |         sinval = np.sin(rotation_angle)
140 |         rotation_matrix = np.array([[cosval, 0, sinval],
141 |                                     [0, 1, 0],
142 |                                     [-sinval, 0, cosval]])
143 |         shape_pc = batch_data[k,:,0:3]
144 |         rotated_data[k,:,0:3] = np.dot(shape_pc.reshape((-1, 3)), rotation_matrix)
145 |     return rotated_data
146 | 
147 | def rotate_point_cloud_by_angle_with_normal(batch_data, rotation_angle):
148 |     """ Rotate the point cloud along up direction with certain angle.
149 |         Input:
150 |           BxNx3 array, original batch of point clouds
151 |         Return:
152 |           BxNx3 array, rotated batch of point clouds
153 |     """
154 |     rotated_data = np.zeros(batch_data.shape, dtype=np.float32)
155 |     for k in range(batch_data.shape[0]):
156 |         #rotation_angle = np.random.uniform() * 2 * np.pi
157 |         cosval = np.cos(rotation_angle)
158 |         sinval = np.sin(rotation_angle)
159 |         rotation_matrix = np.array([[cosval, 0, sinval],
160 |                                     [0, 1, 0],
161 |                                     [-sinval, 0, cosval]])
162 |         shape_pc = batch_data[k, ...]
163 |         shape_normal = batch_data[k,:,3:6]
164 |         rotated_data[k, ...] = np.dot(shape_pc.reshape((-1, 3)), rotation_matrix)
165 |         rotated_data[k,:,3:6] = np.dot(shape_normal.reshape((-1,3)), rotation_matrix)
166 |     return rotated_data
167 | 
168 | 
169 | 
170 | def rotate_perturbation_point_cloud(batch_data, angle_sigma=0.06, angle_clip=0.18):
171 |     """ Randomly perturb the point clouds by small rotations
172 |         Input:
173 |           BxNx3 array, original batch of point clouds
174 |         Return:
175 |           BxNx3 array, rotated batch of point clouds
176 |     """
177 |     rotated_data = np.zeros(batch_data.shape, dtype=np.float32)
178 |     for k in range(batch_data.shape[0]):
179 |         angles = np.clip(angle_sigma*np.random.randn(3), -angle_clip, angle_clip)
180 |         Rx = np.array([[1,0,0],
181 |                        [0,np.cos(angles[0]),-np.sin(angles[0])],
182 |                        [0,np.sin(angles[0]),np.cos(angles[0])]])
183 |         Ry = np.array([[np.cos(angles[1]),0,np.sin(angles[1])],
184 |                        [0,1,0],
185 |                        [-np.sin(angles[1]),0,np.cos(angles[1])]])
186 |         Rz = np.array([[np.cos(angles[2]),-np.sin(angles[2]),0],
187 |                        [np.sin(angles[2]),np.cos(angles[2]),0],
188 |                        [0,0,1]])
189 |         R = np.dot(Rz, np.dot(Ry,Rx))
190 |         shape_pc = batch_data[k, ...]
191 |         rotated_data[k, ...] = np.dot(shape_pc.reshape((-1, 3)), R)
192 |     return rotated_data
193 | 
194 | 
195 | def jitter_point_cloud(batch_data, sigma=0.01, clip=0.05):
196 |     """ Randomly jitter points. jittering is per point.
197 |         Input:
198 |           BxNx3 array, original batch of point clouds
199 |         Return:
200 |           BxNx3 array, jittered batch of point clouds
201 |     """
202 |     B, N, C = batch_data.shape
203 |     assert(clip > 0)
204 |     jittered_data = np.clip(sigma * np.random.randn(B, N, C), -1*clip, clip)
205 |     jittered_data += batch_data
206 |     return jittered_data
207 | 
208 | def shift_point_cloud(batch_data, shift_range=0.1):
209 |     """ Randomly shift point cloud. Shift is per point cloud.
210 |         Input:
211 |           BxNx3 array, original batch of point clouds
212 |         Return:
213 |           BxNx3 array, shifted batch of point clouds
214 |     """
215 |     B, N, C = batch_data.shape
216 |     shifts = np.random.uniform(-shift_range, shift_range, (B,3))
217 |     for batch_index in range(B):
218 |         batch_data[batch_index,:,:] += shifts[batch_index,:]
219 |     return batch_data
220 | 
221 | 
222 | def random_scale_point_cloud(batch_data, batch_label,scale_low=0.5, scale_high=1.5):
223 |     """ Randomly scale the point cloud. Scale is per point cloud.
224 |         Input:
225 |             BxNx3 array, original batch of point clouds
226 |         Return:
227 |             BxNx3 array, scaled batch of point clouds
228 |     """
229 |     B, N, C = batch_data.shape
230 |     scales = np.random.uniform(scale_low, scale_high, B)
231 |     for batch_index in range(B):
232 |         batch_data[batch_index,:,:] *= scales[batch_index]
233 |         batch_label[batch_index,5] = scales[batch_index]
234 |     return batch_data,batch_label
235 | 
236 | def random_point_dropout(batch_pc, max_dropout_ratio=0.875):
237 |     ''' batch_pc: BxNx3 '''
238 |     for b in range(batch_pc.shape[0]):
239 |         dropout_ratio =  np.random.random()*max_dropout_ratio # 0~0.875
240 |         drop_idx = np.where(np.random.random((batch_pc.shape[1]))<=dropout_ratio)[0]
241 |         if len(drop_idx)>0:
242 |             batch_pc[b,drop_idx,:] = batch_pc[b,0,:] # set to the first point
243 |     return batch_pc
244 | 
245 | 
246 | def getDataFiles(list_filename):
247 |     return [line.rstrip() for line in open(list_filename)]
248 | 
249 | def load_h5(h5_filename):
250 |     f = h5py.File(h5_filename)
251 |     data = f['data'][:]
252 |     label = f['label'][:]
253 |     return (data, label)
254 | 
255 | def loadDataFile(filename):
256 |     return load_h5(filename)
257 | 


--------------------------------------------------------------------------------
/utils/provider.py~:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import numpy as np
  4 | import h5py
  5 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
  6 | sys.path.append(BASE_DIR)
  7 | 
  8 | def shuffle_data(data, labels):
  9 |     """ Shuffle data and labels.
 10 |         Input:
 11 |           data: B,N,... numpy array
 12 |           label: B,... numpy array
 13 |         Return:
 14 |           shuffled data, label and shuffle indices
 15 |     """
 16 |     idx = np.arange(len(labels))
 17 |     np.random.shuffle(idx)
 18 |     return data[idx, ...], labels[idx], idx
 19 | 
 20 | def shuffle_points(batch_data):
 21 |     """ Shuffle orders of points in each point cloud -- changes FPS behavior.
 22 |         Use the same shuffling idx for the entire batch.
 23 |         Input:
 24 |             BxNxC array
 25 |         Output:
 26 |             BxNxC array
 27 |     """
 28 |     idx = np.arange(batch_data.shape[1])
 29 |     np.random.shuffle(idx)
 30 |     return batch_data[:,idx,:]
 31 | 
 32 | def rotate_point_cloud(batch_data):
 33 |     """ Randomly rotate the point clouds to augument the dataset
 34 |         rotation is per shape based along up direction
 35 |         Input:
 36 |           BxNx3 array, original batch of point clouds
 37 |         Return:
 38 |           BxNx3 array, rotated batch of point clouds
 39 |     """
 40 |     rotated_data = np.zeros(batch_data.shape, dtype=np.float32)
 41 |     for k in range(batch_data.shape[0]):
 42 |         rotation_angle =  np.random.uniform(-1,1) * np.pi #
 43 |         cosval = np.cos(rotation_angle)
 44 |         sinval = np.sin(rotation_angle)
 45 |         rotation_matrix = np.array([[cosval,-sinval,0],
 46 |                                     [sinval,cosval, 0],
 47 |                                     [0, 0, 1]])
 48 |         shape_pc = batch_data[k, ...]
 49 |         rotated_data[k, ...] = np.dot(shape_pc.reshape((-1, 3)), rotation_matrix)
 50 |     return rotated_data
 51 | 
 52 | def rotate_point_cloud_with_label(batch_data,batch_label):
 53 |     """ Randomly rotate the point clouds to augument the dataset
 54 |         rotation is per shape based along up direction
 55 |         Input:
 56 |           BxNx3 array, original batch of point clouds
 57 |         Return:
 58 |           BxNx3 array, rotated batch of point clouds
 59 |     """
 60 |     rotated_data = np.zeros(batch_data.shape, dtype=np.float32)
 61 |     rotated_label = np.zeros(batch_label.shape, dtype=np.float32)
 62 |     k = 0
 63 |     for k in range(0,batch_data.shape[0]):
 64 |         rotation_angle =  np.random.uniform(-1,1) * np.pi #
 65 |         cosval = np.cos(rotation_angle)
 66 |         sinval = np.sin(rotation_angle)
 67 |         rotation_matrix = np.array([[cosval,-sinval,0],
 68 |                                     [sinval,cosval, 0],
 69 |                                     [0, 0, 1]])
 70 |         shape_pc = batch_data[k, ...]
 71 |         rotated_data[k, ...] = np.dot(shape_pc.reshape((-1, 3)), rotation_matrix)
 72 |         rotated_label[k,:] = batch_label[k,:]
 73 |         rotated_label[k,2] = batch_label[k,2]+rotation_angle
 74 |         if(rotated_label[k,2]>3.142):
 75 |            rotated_label[k,2] -= 6.284
 76 |         elif(rotated_label[k,2]<-3.142):
 77 |            rotated_label[k,2] -= -6.284
 78 |     return rotated_data,rotated_label
 79 | 
 80 | 
 81 | def rotate_point_cloud_with_normal(batch_xyz_normal):
 82 |     ''' Randomly rotate XYZ, normal point cloud.
 83 |         Input:
 84 |             batch_xyz_normal: B,N,6, first three channels are XYZ, last 3 all normal
 85 |         Output:
 86 |             B,N,6, rotated XYZ, normal point cloud
 87 |     '''
 88 |     for k in range(batch_xyz_normal.shape[0]):
 89 |         rotation_angle = np.random.uniform() * 2 * np.pi
 90 |         cosval = np.cos(rotation_angle)
 91 |         sinval = np.sin(rotation_angle)
 92 |         rotation_matrix = np.array([[cosval, 0, sinval],
 93 |                                     [0, 1, 0],
 94 |                                     [-sinval, 0, cosval]])
 95 |         shape_pc = batch_xyz_normal[k,:,0:3]
 96 |         shape_normal = batch_xyz_normal[k,:,3:6]
 97 |         batch_xyz_normal[k,:,0:3] = np.dot(shape_pc.reshape((-1, 3)), rotation_matrix)
 98 |         batch_xyz_normal[k,:,3:6] = np.dot(shape_normal.reshape((-1, 3)), rotation_matrix)
 99 |     return batch_xyz_normal
100 | 
101 | def rotate_perturbation_point_cloud_with_normal(batch_data, angle_sigma=0.06, angle_clip=0.18):
102 |     """ Randomly perturb the point clouds by small rotations
103 |         Input:
104 |           BxNx6 array, original batch of point clouds and point normals
105 |         Return:
106 |           BxNx3 array, rotated batch of point clouds
107 |     """
108 |     rotated_data = np.zeros(batch_data.shape, dtype=np.float32)
109 |     for k in range(batch_data.shape[0]):
110 |         angles = np.clip(angle_sigma*np.random.randn(3), -angle_clip, angle_clip)
111 |         Rx = np.array([[1,0,0],
112 |                        [0,np.cos(angles[0]),-np.sin(angles[0])],
113 |                        [0,np.sin(angles[0]),np.cos(angles[0])]])
114 |         Ry = np.array([[np.cos(angles[1]),0,np.sin(angles[1])],
115 |                        [0,1,0],
116 |                        [-np.sin(angles[1]),0,np.cos(angles[1])]])
117 |         Rz = np.array([[np.cos(angles[2]),-np.sin(angles[2]),0],
118 |                        [np.sin(angles[2]),np.cos(angles[2]),0],
119 |                        [0,0,1]])
120 |         R = np.dot(Rz, np.dot(Ry,Rx))
121 |         shape_pc = batch_data[k,:,0:3]
122 |         shape_normal = batch_data[k,:,3:6]
123 |         rotated_data[k,:,0:3] = np.dot(shape_pc.reshape((-1, 3)), R)
124 |         rotated_data[k,:,3:6] = np.dot(shape_normal.reshape((-1, 3)), R)
125 |     return rotated_data
126 | 
127 | 
128 | def rotate_point_cloud_by_angle(batch_data, rotation_angle):
129 |     """ Rotate the point cloud along up direction with certain angle.
130 |         Input:
131 |           BxNx3 array, original batch of point clouds
132 |         Return:
133 |           BxNx3 array, rotated batch of point clouds
134 |     """
135 |     rotated_data = np.zeros(batch_data.shape, dtype=np.float32)
136 |     for k in range(batch_data.shape[0]):
137 |         #rotation_angle = np.random.uniform() * 2 * np.pi
138 |         cosval = np.cos(rotation_angle)
139 |         sinval = np.sin(rotation_angle)
140 |         rotation_matrix = np.array([[cosval, 0, sinval],
141 |                                     [0, 1, 0],
142 |                                     [-sinval, 0, cosval]])
143 |         shape_pc = batch_data[k,:,0:3]
144 |         rotated_data[k,:,0:3] = np.dot(shape_pc.reshape((-1, 3)), rotation_matrix)
145 |     return rotated_data
146 | 
147 | def rotate_point_cloud_by_angle_with_normal(batch_data, rotation_angle):
148 |     """ Rotate the point cloud along up direction with certain angle.
149 |         Input:
150 |           BxNx3 array, original batch of point clouds
151 |         Return:
152 |           BxNx3 array, rotated batch of point clouds
153 |     """
154 |     rotated_data = np.zeros(batch_data.shape, dtype=np.float32)
155 |     for k in range(batch_data.shape[0]):
156 |         #rotation_angle = np.random.uniform() * 2 * np.pi
157 |         cosval = np.cos(rotation_angle)
158 |         sinval = np.sin(rotation_angle)
159 |         rotation_matrix = np.array([[cosval, 0, sinval],
160 |                                     [0, 1, 0],
161 |                                     [-sinval, 0, cosval]])
162 |         shape_pc = batch_data[k, ...]
163 |         shape_normal = batch_data[k,:,3:6]
164 |         rotated_data[k, ...] = np.dot(shape_pc.reshape((-1, 3)), rotation_matrix)
165 |         rotated_data[k,:,3:6] = np.dot(shape_normal.reshape((-1,3)), rotation_matrix)
166 |     return rotated_data
167 | 
168 | 
169 | 
170 | def rotate_perturbation_point_cloud(batch_data, angle_sigma=0.06, angle_clip=0.18):
171 |     """ Randomly perturb the point clouds by small rotations
172 |         Input:
173 |           BxNx3 array, original batch of point clouds
174 |         Return:
175 |           BxNx3 array, rotated batch of point clouds
176 |     """
177 |     rotated_data = np.zeros(batch_data.shape, dtype=np.float32)
178 |     for k in range(batch_data.shape[0]):
179 |         angles = np.clip(angle_sigma*np.random.randn(3), -angle_clip, angle_clip)
180 |         Rx = np.array([[1,0,0],
181 |                        [0,np.cos(angles[0]),-np.sin(angles[0])],
182 |                        [0,np.sin(angles[0]),np.cos(angles[0])]])
183 |         Ry = np.array([[np.cos(angles[1]),0,np.sin(angles[1])],
184 |                        [0,1,0],
185 |                        [-np.sin(angles[1]),0,np.cos(angles[1])]])
186 |         Rz = np.array([[np.cos(angles[2]),-np.sin(angles[2]),0],
187 |                        [np.sin(angles[2]),np.cos(angles[2]),0],
188 |                        [0,0,1]])
189 |         R = np.dot(Rz, np.dot(Ry,Rx))
190 |         shape_pc = batch_data[k, ...]
191 |         rotated_data[k, ...] = np.dot(shape_pc.reshape((-1, 3)), R)
192 |     return rotated_data
193 | 
194 | 
195 | def jitter_point_cloud(batch_data, sigma=0.01, clip=0.05):
196 |     """ Randomly jitter points. jittering is per point.
197 |         Input:
198 |           BxNx3 array, original batch of point clouds
199 |         Return:
200 |           BxNx3 array, jittered batch of point clouds
201 |     """
202 |     B, N, C = batch_data.shape
203 |     assert(clip > 0)
204 |     jittered_data = np.clip(sigma * np.random.randn(B, N, C), -1*clip, clip)
205 |     jittered_data += batch_data
206 |     return jittered_data
207 | 
208 | def shift_point_cloud(batch_data, shift_range=0.1):
209 |     """ Randomly shift point cloud. Shift is per point cloud.
210 |         Input:
211 |           BxNx3 array, original batch of point clouds
212 |         Return:
213 |           BxNx3 array, shifted batch of point clouds
214 |     """
215 |     B, N, C = batch_data.shape
216 |     shifts = np.random.uniform(-shift_range, shift_range, (B,3))
217 |     for batch_index in range(B):
218 |         batch_data[batch_index,:,:] += shifts[batch_index,:]
219 |     return batch_data
220 | 
221 | 
222 | def random_scale_point_cloud(batch_data, batch_label,scale_low=0.5, scale_high=1.5):
223 |     """ Randomly scale the point cloud. Scale is per point cloud.
224 |         Input:
225 |             BxNx3 array, original batch of point clouds
226 |         Return:
227 |             BxNx3 array, scaled batch of point clouds
228 |     """
229 |     B, N, C = batch_data.shape
230 |     scales = np.random.uniform(scale_low, scale_high, B)
231 |     for batch_index in range(B):
232 |         batch_data[batch_index,:,:] *= scales[batch_index]
233 |         batch_label[batch_index,5] = scales[batch_index]
234 |     return batch_data,batch_label
235 | 
236 | def random_point_dropout(batch_pc, max_dropout_ratio=0.875):
237 |     ''' batch_pc: BxNx3 '''
238 |     for b in range(batch_pc.shape[0]):
239 |         dropout_ratio =  np.random.random()*max_dropout_ratio # 0~0.875
240 |         drop_idx = np.where(np.random.random((batch_pc.shape[1]))<=dropout_ratio)[0]
241 |         if len(drop_idx)>0:
242 |             batch_pc[b,drop_idx,:] = batch_pc[b,0,:] # set to the first point
243 |     return batch_pc
244 | 
245 | 
246 | def getDataFiles(list_filename):
247 |     return [line.rstrip() for line in open(list_filename)]
248 | 
249 | def load_h5(h5_filename):
250 |     f = h5py.File(h5_filename)
251 |     data = f['data'][:]
252 |     label = f['label'][:]
253 |     return (data, label)
254 | 
255 | def loadDataFile(filename):
256 |     return load_h5(filename)
257 | 


--------------------------------------------------------------------------------
/models/pointnet2_seg_orig.py:
--------------------------------------------------------------------------------
  1 | """
  2 |     PointNet++ Model for point clouds pose detection
  3 | """
  4 | 
  5 | import os
  6 | import sys
  7 | import math
  8 | BASE_DIR = os.path.dirname(__file__)
  9 | sys.path.append(BASE_DIR)
 10 | ROOT_DIR = os.path.dirname(BASE_DIR)
 11 | sys.path.append(os.path.join(BASE_DIR, '../utils'))
 12 | sys.path.append(os.path.join(ROOT_DIR, 'tf_ops/grouping'))
 13 | from tf_grouping import query_ball_point, group_point, knn_point
 14 | import tensorflow as tf
 15 | import numpy as np
 16 | import tf_util
 17 | import modelnet40_dataset_orig
 18 | from pointnet_util import pointnet_sa_module, pointnet_fp_module
 19 | from sklearn.metrics import mean_squared_error
 20 | from eul2rot import euler2rotm
 21 | from sklearn.neighbors import NearestNeighbors
 22 | 
 23 | 
 24 | # map axes strings to/from tuples of inner axis, parity, repetition, frame
 25 | _AXES2TUPLE = {
 26 |     'sxyz': (0, 0, 0, 0), 'sxyx': (0, 0, 1, 0), 'sxzy': (0, 1, 0, 0),
 27 |     'sxzx': (0, 1, 1, 0), 'syzx': (1, 0, 0, 0), 'syzy': (1, 0, 1, 0),
 28 |     'syxz': (1, 1, 0, 0), 'syxy': (1, 1, 1, 0), 'szxy': (2, 0, 0, 0),
 29 |     'szxz': (2, 0, 1, 0), 'szyx': (2, 1, 0, 0), 'szyz': (2, 1, 1, 0),
 30 |     'rzyx': (0, 0, 0, 1), 'rxyx': (0, 0, 1, 1), 'ryzx': (0, 1, 0, 1),
 31 |     'rxzx': (0, 1, 1, 1), 'rxzy': (1, 0, 0, 1), 'ryzy': (1, 0, 1, 1),
 32 |     'rzxy': (1, 1, 0, 1), 'ryxy': (1, 1, 1, 1), 'ryxz': (2, 0, 0, 1),
 33 |     'rzxz': (2, 0, 1, 1), 'rxyz': (2, 1, 0, 1), 'rzyz': (2, 1, 1, 1)}
 34 | 
 35 | _NEXT_AXIS = [1, 2, 0, 1]
 36 | 
 37 | def placeholder_inputs_class(batch_size, num_point):
 38 |     pointclouds_pl = tf.placeholder(tf.float32, shape=(batch_size, num_point, 6))
 39 |     labels_pl_class = tf.placeholder(tf.int32, shape=(batch_size, num_point))
 40 |     labels_pl_instance = tf.placeholder(tf.float32, shape=(batch_size,num_point,3))
 41 |     return pointclouds_pl,labels_pl_instance,labels_pl_class
 42 | 
 43 | def get_model_seg(point_cloud, is_training, bn_decay=None):
 44 |     """ Semantic segmentation PointNet, input is BxNx3, output Bxnum_class """
 45 |     batch_size = point_cloud.get_shape()[0].value
 46 |     num_point = point_cloud.get_shape()[1].value
 47 |     end_points = {}
 48 |     l0_xyz = point_cloud
 49 |     l0_points = None
 50 |     end_points['l0_xyz'] = l0_xyz
 51 | 
 52 |     # Layer 1
 53 |     l1_xyz, l1_points, l1_indices = pointnet_sa_module(l0_xyz, l0_points, npoint=5000, radius=0.1, nsample=32, mlp=[32,32,64], mlp2=None, group_all=False, is_training=is_training, bn_decay=bn_decay, scope='layer1')
 54 |     l2_xyz, l2_points, l2_indices = pointnet_sa_module(l1_xyz, l1_points, npoint=5000, radius=0.2, nsample=32, mlp=[64,64,128], mlp2=None, group_all=False, is_training=is_training, bn_decay=bn_decay, scope='layer2')
 55 |     l3_xyz, l3_points, l3_indices = pointnet_sa_module(l2_xyz, l2_points, npoint=4000, radius=0.4, nsample=32, mlp=[128,128,256], mlp2=None, group_all=False, is_training=is_training, bn_decay=bn_decay, scope='layer3')
 56 |     l4_xyz, l4_points, l4_indices = pointnet_sa_module(l3_xyz, l3_points, npoint=2000, radius=0.8, nsample=32, mlp=[256,256,512], mlp2=None, group_all=False, is_training=is_training, bn_decay=bn_decay, scope='layer4')
 57 | 
 58 |     # Feature Propagation layers
 59 |     l3_points = pointnet_fp_module(l3_xyz, l4_xyz, l3_points, l4_points, [256,256], is_training, bn_decay, scope='fa_layer1')
 60 |     l2_points = pointnet_fp_module(l2_xyz, l3_xyz, l2_points, l3_points, [256,256], is_training, bn_decay, scope='fa_layer2')
 61 |     l1_points = pointnet_fp_module(l1_xyz, l2_xyz, l1_points, l2_points, [256,128], is_training, bn_decay, scope='fa_layer3')
 62 |     l0_points = pointnet_fp_module(l0_xyz, l1_xyz, l0_points, l1_points, [128,128,128], is_training, bn_decay, scope='fa_layer4')
 63 | 
 64 |     # FC layers
 65 |     net = tf_util.conv1d(l0_points, 128, 1, padding='VALID', bn=True, is_training=is_training, scope='fc1', bn_decay=bn_decay)
 66 |     end_points['feats'] = net 
 67 |     net = tf_util.dropout(net, keep_prob=0.5, is_training=is_training, scope='dp1')
 68 |     net = tf_util.conv1d(net, 2, 1, padding='VALID', activation_fn=None, scope='fc2')
 69 |     return net, end_points
 70 | 
 71 | def get_model_instance_seg(point_cloud, is_training, bn_decay=None):
 72 |     """ Semantic segmentation PointNet, input is BxNx3, output Bxnum_class """
 73 |     batch_size = point_cloud.get_shape()[0].value
 74 |     num_point = point_cloud.get_shape()[1].value
 75 |     end_points = {}
 76 |     l0_xyz = point_cloud
 77 |     l0_points = None
 78 |     end_points['l0_xyz'] = l0_xyz
 79 | 
 80 |     # Layer 1
 81 |     l1_xyz, l1_points, l1_indices = pointnet_sa_module(l0_xyz, l0_points, npoint=5000, radius=1, nsample=32, mlp=[32,32,64], mlp2=None, group_all=False, is_training=is_training, bn_decay=bn_decay, scope='layer1')
 82 |     l2_xyz, l2_points, l2_indices = pointnet_sa_module(l1_xyz, l1_points, npoint=5000, radius=1, nsample=32, mlp=[64,64,128], mlp2=None, group_all=False, is_training=is_training, bn_decay=bn_decay, scope='layer2')
 83 |     l3_xyz, l3_points, l3_indices = pointnet_sa_module(l2_xyz, l2_points, npoint=4000, radius=0.4, nsample=32, mlp=[128,128,256], mlp2=None, group_all=False, is_training=is_training, bn_decay=bn_decay, scope='layer3')
 84 |     l4_xyz, l4_points, l4_indices = pointnet_sa_module(l3_xyz, l3_points, npoint=1000, radius=0.8, nsample=32, mlp=[256,256,512], mlp2=None, group_all=False, is_training=is_training, bn_decay=bn_decay, scope='layer4')
 85 | 
 86 |     # Feature Propagation layers
 87 |     l3_points = pointnet_fp_module(l3_xyz, l4_xyz, l3_points, l4_points, [256,256], is_training, bn_decay, scope='fa_layer1')
 88 |     l2_points = pointnet_fp_module(l2_xyz, l3_xyz, l2_points, l3_points, [256,256], is_training, bn_decay, scope='fa_layer2')
 89 |     l1_points = pointnet_fp_module(l1_xyz, l2_xyz, l1_points, l2_points, [256,128], is_training, bn_decay, scope='fa_layer3')
 90 |     l0_points = pointnet_fp_module(l0_xyz, l1_xyz, l0_points, l1_points, [128,128,128], is_training, bn_decay, scope='fa_layer4')
 91 | 
 92 |     # FC layers regression
 93 |     net = tf_util.conv1d(l0_points, 128, 1, padding='VALID', bn=True, is_training=is_training, scope='fc1', bn_decay=bn_decay)
 94 |     end_points['feats'] = net 
 95 |     net = tf_util.dropout(net, keep_prob=0.5, is_training=is_training, scope='dp1')
 96 |     net = tf_util.conv1d(net, 2, 1, padding='VALID', activation_fn=None, scope='fc2')
 97 | 
 98 |     '''
 99 |     # FC layers classification
100 |     net = tf_util.conv1d(l0_points, 128, 1, padding='VALID', bn=True, is_training=is_training, scope='fc1_class', bn_decay=bn_decay)
101 |     net = tf_util.dropout(net, keep_prob=0.5, is_training=is_training, scope='dp1_class')
102 |     net_class = tf_util.conv1d(net, 11, 1, padding='VALID', activation_fn=None, scope='fc2_class')
103 |     '''
104 |     return net, end_points#,net_class,
105 | 
106 | def get_model_seg_cnn(point_cloud, img_cnn, is_training, bn_decay=None):
107 |     """ Semantic segmentation PointNet, input is BxNx3, output Bxnum_class """
108 |     batch_size = point_cloud.get_shape()[0].value
109 |     num_point = point_cloud.get_shape()[1].value
110 |     end_points = {}
111 |     l0_xyz = point_cloud
112 |     l0_points = None
113 |     end_points['l0_xyz'] = l0_xyz
114 | 
115 |     # Layer 1 Encoder
116 |     l1_xyz, l1_points, l1_indices = pointnet_sa_module(l0_xyz, l0_points, npoint=10000, radius=0.1, nsample=32, mlp=[32,32,64], mlp2=None, group_all=False, is_training=is_training, bn_decay=bn_decay, scope='layer1')
117 |     l2_xyz, l2_points, l2_indices = pointnet_sa_module(l1_xyz, l1_points, npoint=5000, radius=0.2, nsample=32, mlp=[64,64,128], mlp2=None, group_all=False, is_training=is_training, bn_decay=bn_decay, scope='layer2')
118 |     l3_xyz, l3_points, l3_indices = pointnet_sa_module(l2_xyz, l2_points, npoint=4000, radius=0.4, nsample=32, mlp=[128,128,256], mlp2=None, group_all=False, is_training=is_training, bn_decay=bn_decay, scope='layer3')
119 |     l4_xyz, l4_points, l4_indices = pointnet_sa_module(l3_xyz, l3_points, npoint=1000, radius=0.8, nsample=32, mlp=[256,256,512], mlp2=None, group_all=False, is_training=is_training, bn_decay=bn_decay, scope='layer4')
120 | 
121 |     # Feature Propagation layers
122 |     l3_points = pointnet_fp_module(l3_xyz, l4_xyz, l3_points, l4_points, [256,256], is_training, bn_decay, scope='fa_layer1')
123 |     l2_points = pointnet_fp_module(l2_xyz, l3_xyz, l2_points, l3_points, [256,256], is_training, bn_decay, scope='fa_layer2')
124 |     l1_points = pointnet_fp_module(l1_xyz, l2_xyz, l1_points, l2_points, [256,128], is_training, bn_decay, scope='fa_layer3')
125 |     l0_points = pointnet_fp_module(l0_xyz, l1_xyz, l0_points, l1_points, [128,128,128], is_training, bn_decay, scope='fa_layer4')#1 x 10000 x 128
126 | 
127 |     # CNN layers for edge detection Encoder 
128 |     conv1 = tf.layers.conv2d(inputs=img_cnn,filters=32,kernel_size=[5, 5],padding="same",activation=tf.nn.relu)
129 |     pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)
130 |     conv2 = tf.layers.conv2d(inputs=pool1,filters=64,kernel_size=[5, 5],padding="same",activation=tf.nn.relu)
131 |     pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)
132 |  
133 |     #Decoder
134 |     conv3 = tf.layers.conv2d(pool2,filters=32,kernel_size=(3,3),strides=(1,1),name='conv3',padding='SAME',use_bias=True,activation=tf.nn.relu)
135 |     upsample1 = tf.layers.conv2d_transpose(conv3,filters=64,kernel_size=3,padding='same',strides=2,name='upsample1')
136 |     upsample2 = tf.layers.conv2d_transpose(upsample1,filters=64,kernel_size=3,padding='same',strides=2,name='upsample2')
137 |     conv4 = tf.layers.conv2d(upsample2,filters=128,kernel_size=(3,3),strides=(1,1),name='upsample3',padding='SAME',use_bias=True)
138 |     net_class = tf.reshape(conv4, [batch_size, num_point,128])#1 x 10000 x 128
139 | 
140 |     #Concatenate 
141 |     net = tf.concat([l0_points,net_class],axis=-1)
142 | 
143 |     # FC layers
144 |     net = tf_util.conv1d(net, 128, 1, padding='VALID', bn=True, is_training=is_training, scope='fc1', bn_decay=bn_decay)
145 |     end_points['feats'] = net 
146 |     net = tf_util.dropout(net, keep_prob=0.5, is_training=is_training, scope='dp1')
147 |     net = tf_util.conv1d(net, 12, 1, padding='VALID', activation_fn=None, scope='fc2')
148 |     return net, end_points
149 | 
150 | def huber_loss(error, delta):
151 |     abs_error = tf.abs(error)
152 |     quadratic = tf.minimum(abs_error, delta)
153 |     linear = (abs_error - quadratic)
154 |     losses = 0.5 * quadratic**2 + delta * linear
155 |     return tf.reduce_mean(losses)
156 | 
157 | def get_loss_class(pred_class, label_class, end_points): 
158 |     loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=pred_class, labels=label_class)
159 |     classify_loss = tf.reduce_mean(loss)
160 |     tf.summary.scalar('classify loss', classify_loss)
161 |     tf.add_to_collection('losses_class', classify_loss)
162 |     return classify_loss
163 | 
164 | def get_loss_instance(pred_instance, label_instance, end_points): 
165 |     """ pred: B*NUM_CLASSES,
166 |         label: B, """
167 |     # L1 norm
168 |     #loss = tf.reduce_mean(tf.abs(label_instance-pred_instance))#tf.square
169 |     #tf.summary.scalar('instance loss', loss)
170 |     #tf.add_to_collection('losses_instance', loss)
171 |     #Huber Loss
172 |     reg_loss = tf.norm(label_instance - pred_instance)
173 |     loss = huber_loss(reg_loss, delta=2.0)
174 |     tf.summary.scalar('instance loss', loss)
175 |     tf.add_to_collection('losses_instance', loss)
176 |     return loss
177 | 
178 | if __name__=='__main__':
179 |     with tf.Graph().as_default():
180 |         inputs = tf.zeros((32,1024,3))
181 |         output, _ = get_model(inputs, tf.constant(True))
182 | 
183 | 


--------------------------------------------------------------------------------
/tf_ops/3d_interpolation/tf_interpolate.cpp:
--------------------------------------------------------------------------------
  1 | #include <cstdio>
  2 | #include <ctime>
  3 | #include <cstring> // memset
  4 | #include <cstdlib> // rand, RAND_MAX
  5 | #include <cmath> // sqrtf
  6 | #include "tensorflow/core/framework/op.h"
  7 | #include "tensorflow/core/framework/op_kernel.h"
  8 | #include "tensorflow/core/framework/shape_inference.h"
  9 | #include "tensorflow/core/framework/common_shape_fns.h"
 10 | using namespace tensorflow;
 11 | 
 12 | REGISTER_OP("ThreeNN")
 13 |     .Input("xyz1: float32")
 14 |     .Input("xyz2: float32")
 15 |     .Output("dist: float32")
 16 |     .Output("idx: int32")
 17 |     .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
 18 |         c->set_output(0, c->input(0));
 19 |         c->set_output(1, c->input(0));
 20 |         return Status::OK();
 21 |     });
 22 | REGISTER_OP("ThreeInterpolate")
 23 |     .Input("points: float32")
 24 |     .Input("idx: int32")
 25 |     .Input("weight: float32")
 26 |     .Output("out: float32")
 27 |     .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
 28 |         ::tensorflow::shape_inference::ShapeHandle dims1; // (b,m,c)
 29 |         c->WithRank(c->input(0), 3, &dims1);
 30 |         ::tensorflow::shape_inference::ShapeHandle dims2; // (b,n,3)
 31 |         c->WithRank(c->input(1), 3, &dims2);
 32 |         // (b,n,c)
 33 |         ::tensorflow::shape_inference::ShapeHandle output = c->MakeShape({c->Dim(dims1, 0), c->Dim(dims2, 1), c->Dim(dims1, 2)});
 34 |         c->set_output(0, output);
 35 |         return Status::OK();
 36 |     });
 37 | REGISTER_OP("ThreeInterpolateGrad")
 38 |     .Input("points: float32")
 39 |     .Input("idx: int32")
 40 |     .Input("weight: float32")
 41 |     .Input("grad_out: float32")
 42 |     .Output("grad_points: float32")
 43 |     .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
 44 |         c->set_output(0, c->input(0));
 45 |         return Status::OK();
 46 |     });
 47 | 
 48 | float randomf(){
 49 |     return (rand()+0.5)/(RAND_MAX+1.0);
 50 | }
 51 | static double get_time(){
 52 |     timespec tp;
 53 |     clock_gettime(CLOCK_MONOTONIC,&tp);
 54 |     return tp.tv_sec+tp.tv_nsec*1e-9;
 55 | }
 56 | 
 57 | // Find three nearest neigbors with square distance
 58 | // input: xyz1 (b,n,3), xyz2(b,m,3)
 59 | // output: dist (b,n,3), idx (b,n,3)
 60 | void threenn_cpu(int b, int n, int m, const float *xyz1, const float *xyz2, float *dist, int *idx) {
 61 |      for (int i=0;i<b;++i) {
 62 |         for (int j=0;j<n;++j) {
 63 | 	    float x1=xyz1[j*3+0];
 64 | 	    float y1=xyz1[j*3+1];
 65 | 	    float z1=xyz1[j*3+2];
 66 |             double best1=1e40; double best2=1e40; double best3=1e40;
 67 |             int besti1=0; int besti2=0; int besti3=0;
 68 |             for (int k=0;k<m;++k) {
 69 |                 float x2=xyz2[k*3+0];
 70 | 	        float y2=xyz2[k*3+1];
 71 | 	        float z2=xyz2[k*3+2];
 72 | 		//float d=max(sqrtf((x2-x1)*(x2-x1)+(y2-y1)*(y2-y1)+(z2-z1)*(z2-z1)),1e-20f);
 73 | 		double d=(x2-x1)*(x2-x1)+(y2-y1)*(y2-y1)+(z2-z1)*(z2-z1);
 74 |                 if (d<best1) {
 75 |                     best3=best2;
 76 |                     besti3=besti2;
 77 |                     best2=best1;
 78 |                     besti2=besti1;
 79 |                     best1=d;
 80 |                     besti1=k;
 81 |                 } else if (d<best2) {
 82 |                     best3=best2;
 83 |                     besti3=besti2;
 84 |                     best2=d;
 85 |                     besti2=k;
 86 |                 } else if (d<best3) {
 87 |                     best3=d;
 88 |                     besti3=k;
 89 |                 }
 90 |             } 
 91 |             dist[j*3]=best1;
 92 |             idx[j*3]=besti1;
 93 |             dist[j*3+1]=best2;
 94 |             idx[j*3+1]=besti2;
 95 |             dist[j*3+2]=best3;
 96 |             idx[j*3+2]=besti3;
 97 |         } 
 98 |         xyz1+=n*3;
 99 |         xyz2+=m*3;
100 |         dist+=n*3;
101 |         idx+=n*3;
102 |     }
103 | } 
104 | 
105 | // input: points (b,m,c), idx (b,n,3), weight (b,n,3)
106 | // output: out (b,n,c)
107 | void threeinterpolate_cpu(int b, int m, int c, int n, const float *points, const int *idx, const float *weight, float *out) {
108 |      float w1,w2,w3;
109 |      int i1,i2,i3;
110 |      for (int i=0;i<b;++i) {
111 |         for (int j=0;j<n;++j) {
112 |             w1=weight[j*3];
113 |             w2=weight[j*3+1];
114 |             w3=weight[j*3+2]; 
115 |             i1=idx[j*3];
116 |             i2=idx[j*3+1];
117 |             i3=idx[j*3+2];
118 |             for (int l=0;l<c;++l) {
119 |                 out[j*c+l] = points[i1*c+l]*w1 + points[i2*c+l]*w2 + points[i3*c+l]*w3;
120 |             }
121 |         } 
122 |         points+=m*c;
123 |         idx+=n*3;
124 |         weight+=n*3;
125 |         out+=n*c;
126 |     }
127 | }
128 | 
129 | // input: grad_out (b,n,c), idx (b,n,3), weight (b,n,3)
130 | // output: grad_points (b,m,c)
131 | void threeinterpolate_grad_cpu(int b, int n, int c, int m, const float *grad_out, const int *idx, const float *weight, float *grad_points) {
132 |      float w1,w2,w3;
133 |      int i1,i2,i3;
134 |      for (int i=0;i<b;++i) {
135 |         for (int j=0;j<n;++j) {
136 |             w1=weight[j*3];
137 |             w2=weight[j*3+1];
138 |             w3=weight[j*3+2]; 
139 |             i1=idx[j*3];
140 |             i2=idx[j*3+1];
141 |             i3=idx[j*3+2];
142 |             for (int l=0;l<c;++l) {
143 |                 grad_points[i1*c+l] += grad_out[j*c+l]*w1;
144 |                 grad_points[i2*c+l] += grad_out[j*c+l]*w2;
145 |                 grad_points[i3*c+l] += grad_out[j*c+l]*w3;
146 |             }
147 |         } 
148 |         grad_out+=n*c;
149 |         idx+=n*3;
150 |         weight+=n*3;
151 |         grad_points+=m*c;
152 |     }
153 | }
154 | 
155 | 
156 | 
157 | class ThreeNNOp : public OpKernel {
158 |     public:
159 |         explicit ThreeNNOp(OpKernelConstruction* context) : OpKernel(context) {}
160 | 
161 |         void Compute(OpKernelContext* context) override {
162 |             const Tensor& xyz1_tensor = context->input(0);
163 |             OP_REQUIRES(context, xyz1_tensor.dims()==3 && xyz1_tensor.shape().dim_size(2)==3, errors::InvalidArgument("ThreeNN expects (b,n,3) xyz1 shape."));
164 |             int b = xyz1_tensor.shape().dim_size(0);
165 |             int n = xyz1_tensor.shape().dim_size(1);
166 | 
167 |             const Tensor& xyz2_tensor = context->input(1);
168 |             OP_REQUIRES(context, xyz2_tensor.dims()==3 && xyz2_tensor.shape().dim_size(2)==3, errors::InvalidArgument("ThreeNN expects (b,m,3) xyz2 shape."));
169 |             int m = xyz2_tensor.shape().dim_size(1);
170 | 
171 |             Tensor *dist_tensor = nullptr;
172 |             OP_REQUIRES_OK(context, context->allocate_output(0, TensorShape{b,n,3}, &dist_tensor));
173 |             Tensor *idx_tensor = nullptr;
174 |             OP_REQUIRES_OK(context, context->allocate_output(1, TensorShape{b,n,3}, &idx_tensor));
175 | 
176 |             auto xyz1_flat = xyz1_tensor.flat<float>();
177 |             const float *xyz1 = &(xyz1_flat(0));
178 |             auto xyz2_flat = xyz2_tensor.flat<float>();
179 |             const float *xyz2 = &(xyz2_flat(0));
180 |             auto dist_flat = dist_tensor->flat<float>();
181 |             float *dist = &(dist_flat(0));
182 |             auto idx_flat = idx_tensor->flat<int>();
183 |             int *idx = &(idx_flat(0));
184 |             threenn_cpu(b,n,m,xyz1,xyz2,dist,idx);
185 |         }
186 | };
187 | REGISTER_KERNEL_BUILDER(Name("ThreeNN").Device(DEVICE_CPU), ThreeNNOp);
188 | 
189 | 
190 | 
191 | class ThreeInterpolateOp: public OpKernel{
192 |     public:
193 |         explicit ThreeInterpolateOp(OpKernelConstruction * context):OpKernel(context){}
194 | 
195 |         void Compute(OpKernelContext * context) override {
196 |             const Tensor& points_tensor=context->input(0);
197 |             OP_REQUIRES(context, points_tensor.dims()==3, errors::InvalidArgument("ThreeInterpolate expects (b,m,c) points shape"));
198 |             int b = points_tensor.shape().dim_size(0);
199 |             int m = points_tensor.shape().dim_size(1);
200 |             int c = points_tensor.shape().dim_size(2);
201 | 
202 |             const Tensor& idx_tensor=context->input(1);
203 |             OP_REQUIRES(context,idx_tensor.dims()==3 && idx_tensor.shape().dim_size(0)==b && idx_tensor.shape().dim_size(2)==3, errors::InvalidArgument("ThreeInterpolate expects (b,n,3) idx shape"));
204 |             int n = idx_tensor.shape().dim_size(1);
205 |             const Tensor& weight_tensor=context->input(2);
206 |             OP_REQUIRES(context,weight_tensor.dims()==3 && weight_tensor.shape().dim_size(0)==b && weight_tensor.shape().dim_size(1)==n && weight_tensor.shape().dim_size(2)==3, errors::InvalidArgument("ThreeInterpolate expects (b,n,3) weight shape"));
207 | 
208 |             Tensor * out_tensor = nullptr;
209 |             OP_REQUIRES_OK(context, context->allocate_output(0,TensorShape{b,n,c}, &out_tensor));
210 | 
211 |             auto points_flat = points_tensor.flat<float>();
212 |             const float *points = &(points_flat(0));
213 |             auto idx_flat = idx_tensor.flat<int>();
214 |             const int *idx = &(idx_flat(0));
215 |             auto weight_flat = weight_tensor.flat<float>();
216 |             const float *weight = &(weight_flat(0));
217 |             auto out_flat = out_tensor->flat<float>();
218 |             float *out = &(out_flat(0));
219 |             threeinterpolate_cpu(b,m,c,n,points,idx,weight,out);
220 |         }
221 | };
222 | REGISTER_KERNEL_BUILDER(Name("ThreeInterpolate").Device(DEVICE_CPU),ThreeInterpolateOp);
223 | 
224 | 
225 | class ThreeInterpolateGradOp: public OpKernel{
226 |     public:
227 |         explicit ThreeInterpolateGradOp(OpKernelConstruction * context):OpKernel(context){}
228 | 
229 |         void Compute(OpKernelContext * context) override {
230 |             const Tensor& points_tensor=context->input(0);
231 |             OP_REQUIRES(context, points_tensor.dims()==3, errors::InvalidArgument("ThreeInterpolateGrad expects (b,m,c) points shape"));
232 |             int b = points_tensor.shape().dim_size(0);
233 |             int m = points_tensor.shape().dim_size(1);
234 |             int c = points_tensor.shape().dim_size(2);
235 | 
236 |             const Tensor& idx_tensor=context->input(1);
237 |             OP_REQUIRES(context,idx_tensor.dims()==3 && idx_tensor.shape().dim_size(0)==b, errors::InvalidArgument("ThreeInterpolateGrad expects (b,n,3) idx shape"));
238 |             int n = idx_tensor.shape().dim_size(1);
239 |             const Tensor& weight_tensor=context->input(2);
240 |             OP_REQUIRES(context,weight_tensor.dims()==3 && weight_tensor.shape().dim_size(0)==b && weight_tensor.shape().dim_size(1)==n && weight_tensor.shape().dim_size(2)==3, errors::InvalidArgument("ThreeInterpolateGrad expects (b,n,3) weight shape"));
241 | 
242 |             const Tensor& grad_out_tensor=context->input(3);
243 |             OP_REQUIRES(context,grad_out_tensor.dims()==3 && grad_out_tensor.shape().dim_size(0)==b && grad_out_tensor.shape().dim_size(1)==n && grad_out_tensor.shape().dim_size(2)==c, errors::InvalidArgument("ThreeInterpolateGrad expects (b,n,c) grad_out shape"));
244 | 
245 |             Tensor * grad_points_tensor = nullptr;
246 |             OP_REQUIRES_OK(context, context->allocate_output(0,TensorShape{b,m,c}, &grad_points_tensor));
247 | 
248 |             auto points_flat = points_tensor.flat<float>();
249 |             const float *points = &(points_flat(0));
250 |             auto idx_flat = idx_tensor.flat<int>();
251 |             const int *idx = &(idx_flat(0));
252 |             auto weight_flat = weight_tensor.flat<float>();
253 |             const float *weight = &(weight_flat(0));
254 |             auto grad_out_flat = grad_out_tensor.flat<float>();
255 |             const float *grad_out = &(grad_out_flat(0));
256 |             auto grad_points_flat = grad_points_tensor->flat<float>();
257 |             float *grad_points = &(grad_points_flat(0));
258 |             memset(grad_points, 0, sizeof(float)*b*m*c);
259 |             threeinterpolate_grad_cpu(b,n,c,m,grad_out,idx,weight,grad_points);
260 |         }
261 | };
262 | REGISTER_KERNEL_BUILDER(Name("ThreeInterpolateGrad").Device(DEVICE_CPU),ThreeInterpolateGradOp);
263 | 
264 | 
265 | 


--------------------------------------------------------------------------------