├── utils
    ├── PyTorchEMD
    │   ├── __init__.py
    │   ├── setup.py
    │   ├── README.md
    │   ├── cuda
    │   │   └── emd.cpp
    │   ├── emd.py
    │   └── test_emd_loss.py
    ├── metrics
    │   ├── __init__.py
    │   ├── .gitignore
    │   └── pytorch_structural_losses
    │   │   ├── .gitignore
    │   │   ├── __init__.py
    │   │   ├── src
    │   │       ├── nndistance.cuh
    │   │       ├── approxmatch.cuh
    │   │       ├── utils.hpp
    │   │       ├── nndistance.cu
    │   │       ├── structural_loss.cpp
    │   │       └── approxmatch.cu
    │   │   ├── pybind
    │   │       ├── bind.cpp
    │   │       └── extern.hpp
    │   │   ├── setup.py
    │   │   ├── nn_distance.py
    │   │   ├── match_cost.py
    │   │   └── Makefile
    ├── pyTorchChamferDistance
    │   ├── chamfer_distance
    │   │   ├── __init__.py
    │   │   ├── chamfer_distance.py
    │   │   ├── chamfer_distance.cu
    │   │   └── chamfer_distance.cpp
    │   ├── README.md
    │   └── LICENSE.md
    ├── Pointnet2.PyTorch
    │   ├── .gitignore
    │   ├── tools
    │   │   ├── _init_path.py
    │   │   ├── pointnet2_msg.py
    │   │   ├── dataset.py
    │   │   ├── train_and_eval.py
    │   │   └── kitti_utils.py
    │   ├── pointnet2
    │   │   ├── src
    │   │   │   ├── cuda_utils.h
    │   │   │   ├── ball_query_gpu.h
    │   │   │   ├── group_points_gpu.h
    │   │   │   ├── ball_query.cpp
    │   │   │   ├── sampling_gpu.h
    │   │   │   ├── pointnet2_api.cpp
    │   │   │   ├── interpolate_gpu.h
    │   │   │   ├── group_points.cpp
    │   │   │   ├── sampling.cpp
    │   │   │   ├── interpolate.cpp
    │   │   │   ├── ball_query_gpu.cu
    │   │   │   ├── group_points_gpu.cu
    │   │   │   ├── interpolate_gpu.cu
    │   │   │   └── sampling_gpu.cu
    │   │   ├── setup.py
    │   │   ├── pointnet2_modules.py
    │   │   ├── pytorch_utils.py
    │   │   └── pointnet2_utils.py
    │   ├── LICENSE
    │   └── README.md
    ├── randPartial
    │   ├── test
    │   │   ├── 1a04e3eab45ca15dd86060f189eb133.png
    │   │   └── visual.py
    │   ├── CMakeLists.txt
    │   ├── setup.py
    │   └── main.cpp
    ├── furthestPointSampling
    │   ├── setup.py
    │   ├── sampling_gpu.h
    │   ├── sampling.cpp
    │   ├── fps.py
    │   └── sampling_gpu.cu
    ├── sample_pytorch3d.py
    ├── data_loader.py
    └── utils.py
├── docs
    ├── poster.pdf
    └── teaser.png
├── .gitmodules
├── README.md
└── models
    └── latent_3d_points
        └── autoencoder.py


/utils/PyTorchEMD/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/utils/metrics/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/utils/metrics/.gitignore:
--------------------------------------------------------------------------------
1 | StructuralLosses
2 | 


--------------------------------------------------------------------------------
/docs/poster.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaiyizhang/AXform/HEAD/docs/poster.pdf


--------------------------------------------------------------------------------
/docs/teaser.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaiyizhang/AXform/HEAD/docs/teaser.png


--------------------------------------------------------------------------------
/utils/metrics/pytorch_structural_losses/.gitignore:
--------------------------------------------------------------------------------
1 | PyTorchStructuralLosses.egg-info/
2 | 


--------------------------------------------------------------------------------
/utils/pyTorchChamferDistance/chamfer_distance/__init__.py:
--------------------------------------------------------------------------------
1 | from .chamfer_distance import ChamferDistance
2 | 


--------------------------------------------------------------------------------
/utils/Pointnet2.PyTorch/.gitignore:
--------------------------------------------------------------------------------
1 | pointnet2/build/
2 | pointnet2/dist/
3 | pointnet2/pointnet2.egg-info/
4 | __pycache__/
5 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "utils/randPartial/pybind11"]
2 | 	path = utils/randPartial/pybind11
3 | 	url = https://github.com/pybind/pybind11.git
4 | 


--------------------------------------------------------------------------------
/utils/Pointnet2.PyTorch/tools/_init_path.py:
--------------------------------------------------------------------------------
1 | import os, sys
2 | sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), '../'))
3 | 


--------------------------------------------------------------------------------
/utils/randPartial/test/1a04e3eab45ca15dd86060f189eb133.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaiyizhang/AXform/HEAD/utils/randPartial/test/1a04e3eab45ca15dd86060f189eb133.png


--------------------------------------------------------------------------------
/utils/metrics/pytorch_structural_losses/__init__.py:
--------------------------------------------------------------------------------
1 | #import torch
2 | 
3 | #from MakePytorchBackend import AddGPU, Foo, ApproxMatch
4 | 
5 | #from Add import add_gpu, approx_match
6 | 
7 | 


--------------------------------------------------------------------------------
/utils/metrics/pytorch_structural_losses/src/nndistance.cuh:
--------------------------------------------------------------------------------
1 | void nndistance(int b,int n,const float * xyz,int m,const float * xyz2,float * result,int * result_i,float * result2,int * result2_i, cudaStream_t stream);
2 | void nndistancegrad(int b,int n,const float * xyz1,int m,const float * xyz2,const float * grad_dist1,const int * idx1,const float * grad_dist2,const int * idx2,float * grad_xyz1,float * grad_xyz2, cudaStream_t stream);
3 | 


--------------------------------------------------------------------------------
/utils/Pointnet2.PyTorch/pointnet2/src/cuda_utils.h:
--------------------------------------------------------------------------------
 1 | #ifndef _CUDA_UTILS_H
 2 | #define _CUDA_UTILS_H
 3 | 
 4 | #include <cmath>
 5 | 
 6 | #define TOTAL_THREADS 1024
 7 | #define THREADS_PER_BLOCK 256
 8 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
 9 | 
10 | inline int opt_n_threads(int work_size) {
11 |     const int pow_2 = std::log(static_cast<double>(work_size)) / std::log(2.0);
12 | 
13 |     return max(min(1 << pow_2, TOTAL_THREADS), 1);
14 | }
15 | #endif
16 | 


--------------------------------------------------------------------------------
/utils/furthestPointSampling/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
 3 | 
 4 | setup(
 5 |     name='fps_cuda',
 6 |     ext_modules=[
 7 |         CUDAExtension('fps_cuda', [
 8 |             'sampling.cpp', 
 9 |             'sampling_gpu.cu',
10 |         ],
11 |         extra_compile_args={'cxx': ['-g'],
12 |                             'nvcc': ['-O2']})
13 |     ],
14 |     cmdclass={'build_ext': BuildExtension}
15 | )
16 | 


--------------------------------------------------------------------------------
/utils/metrics/pytorch_structural_losses/pybind/bind.cpp:
--------------------------------------------------------------------------------
 1 | #include <string>
 2 | 
 3 | #include <torch/extension.h>
 4 | 
 5 | // #include "pybind/extern.hpp"
 6 | #include "extern.hpp"
 7 | 
 8 | namespace py = pybind11;
 9 | 
10 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m){
11 |   m.def("ApproxMatch", &ApproxMatch);
12 |   m.def("MatchCost", &MatchCost);
13 |   m.def("MatchCostGrad", &MatchCostGrad);
14 |   m.def("NNDistance", &NNDistance);
15 |   m.def("NNDistanceGrad", &NNDistanceGrad);
16 | }
17 | 


--------------------------------------------------------------------------------
/utils/metrics/pytorch_structural_losses/pybind/extern.hpp:
--------------------------------------------------------------------------------
1 | std::vector<at::Tensor> ApproxMatch(at::Tensor in_a, at::Tensor in_b);
2 | at::Tensor MatchCost(at::Tensor set_d, at::Tensor set_q, at::Tensor match);
3 | std::vector<at::Tensor> MatchCostGrad(at::Tensor set_d, at::Tensor set_q, at::Tensor match);
4 | 
5 | std::vector<at::Tensor> NNDistance(at::Tensor set_d, at::Tensor set_q);
6 | std::vector<at::Tensor> NNDistanceGrad(at::Tensor set_d, at::Tensor set_q, at::Tensor idx1, at::Tensor idx2, at::Tensor grad_dist1, at::Tensor grad_dist2);
7 | 


--------------------------------------------------------------------------------
/utils/Pointnet2.PyTorch/pointnet2/src/ball_query_gpu.h:
--------------------------------------------------------------------------------
 1 | #ifndef _BALL_QUERY_GPU_H
 2 | #define _BALL_QUERY_GPU_H
 3 | 
 4 | #include <torch/serialize/tensor.h>
 5 | #include <vector>
 6 | #include <cuda.h>
 7 | #include <cuda_runtime_api.h>
 8 | 
 9 | int ball_query_wrapper_fast(int b, int n, int m, float radius, int nsample, 
10 | 	at::Tensor new_xyz_tensor, at::Tensor xyz_tensor, at::Tensor idx_tensor);
11 | 
12 | void ball_query_kernel_launcher_fast(int b, int n, int m, float radius, int nsample, 
13 | 	const float *xyz, const float *new_xyz, int *idx, cudaStream_t stream);
14 | 
15 | #endif
16 | 


--------------------------------------------------------------------------------
/utils/metrics/pytorch_structural_losses/src/approxmatch.cuh:
--------------------------------------------------------------------------------
1 | /*
2 | template <typename Dtype>
3 | void AddGPUKernel(Dtype *in_a, Dtype *in_b, Dtype *out_c, int N,
4 |                   cudaStream_t stream);
5 | */
6 | void approxmatch(int b,int n,int m,const float * xyz1,const float * xyz2,float * match,float * temp, cudaStream_t stream);
7 | void matchcost(int b,int n,int m,const float * xyz1,const float * xyz2,float * match, float * out, cudaStream_t stream);
8 | void matchcostgrad(int b,int n,int m,const float * xyz1,const float * xyz2,const float * match,float * grad1,float * grad2, cudaStream_t stream);
9 | 


--------------------------------------------------------------------------------
/utils/metrics/pytorch_structural_losses/src/utils.hpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | #include <sstream>
 3 | #include <string>
 4 | 
 5 | class Formatter {
 6 | public:
 7 |   Formatter() {}
 8 |   ~Formatter() {}
 9 | 
10 |   template <typename Type> Formatter &operator<<(const Type &value) {
11 |     stream_ << value;
12 |     return *this;
13 |   }
14 | 
15 |   std::string str() const { return stream_.str(); }
16 |   operator std::string() const { return stream_.str(); }
17 | 
18 |   enum ConvertToString { to_str };
19 | 
20 |   std::string operator>>(ConvertToString) { return stream_.str(); }
21 | 
22 | private:
23 |   std::stringstream stream_;
24 |   Formatter(const Formatter &);
25 |   Formatter &operator=(Formatter &);
26 | };
27 | 


--------------------------------------------------------------------------------
/utils/PyTorchEMD/setup.py:
--------------------------------------------------------------------------------
 1 | """Setup extension
 2 | 
 3 | Notes:
 4 |     If extra_compile_args is provided, you need to provide different instances for different extensions.
 5 |     Refer to https://github.com/pytorch/pytorch/issues/20169
 6 | 
 7 | """
 8 | 
 9 | from setuptools import setup
10 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
11 | 
12 | 
13 | setup(
14 |     name='emd_ext',
15 |     ext_modules=[
16 |         CUDAExtension(
17 |             name='emd_cuda',
18 |             sources=[
19 |                 'cuda/emd.cpp',
20 |                 'cuda/emd_kernel.cu',
21 |             ],
22 |             extra_compile_args={'cxx': ['-g'], 'nvcc': ['-O2']}
23 |         ),
24 |     ],
25 |     cmdclass={
26 |         'build_ext': BuildExtension
27 |     })
28 | 


--------------------------------------------------------------------------------
/utils/randPartial/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.4...3.18)
 2 | project(randpartial)
 3 | set(CMAKE_CXX_STANDARD 11)
 4 | 
 5 | add_subdirectory(pybind11)
 6 | # find_package(pybind11 REQUIRED)
 7 | include_directories(/usr/include/python3.6m)
 8 | # include_directories(/opt/conda/include/python3.8)
 9 | find_package(PCL REQUIRED)
10 | 
11 | include_directories(SYSTEM ${PCL_INCLUDE_DIRS})
12 | link_directories(${PCL_LIBRARY_DIRS})
13 | add_definitions(${PCL_DEFINITIONS})
14 | 
15 | pybind11_add_module(randpartial main.cpp)
16 | # set_target_properties(randpartial PROPERTIES PREFIX "")
17 | # set_target_properties(randpartial PROPERTIES SUFFIX ".so")
18 | target_link_libraries(randpartial PRIVATE ${PCL_LIBRARIES})
19 | target_compile_definitions(randpartial PRIVATE VERSION_INFO=${EXAMPLE_VERSION_INFO})


--------------------------------------------------------------------------------
/utils/Pointnet2.PyTorch/pointnet2/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
 3 | 
 4 | setup(
 5 |     name='pointnet2',
 6 |     ext_modules=[
 7 |         CUDAExtension('pointnet2_cuda', [
 8 |             'src/pointnet2_api.cpp',
 9 |             
10 |             'src/ball_query.cpp', 
11 |             'src/ball_query_gpu.cu',
12 |             'src/group_points.cpp', 
13 |             'src/group_points_gpu.cu',
14 |             'src/interpolate.cpp', 
15 |             'src/interpolate_gpu.cu',
16 |             'src/sampling.cpp', 
17 |             'src/sampling_gpu.cu',
18 |         ],
19 |         extra_compile_args={'cxx': ['-g'],
20 |                             'nvcc': ['-O2']})
21 |     ],
22 |     cmdclass={'build_ext': BuildExtension}
23 | )
24 | 


--------------------------------------------------------------------------------
/utils/PyTorchEMD/README.md:
--------------------------------------------------------------------------------
 1 | # PyTorch Wrapper for Point-cloud Earth-Mover-Distance (EMD)
 2 | 
 3 | ## Dependency
 4 | 
 5 | The code has been tested on Ubuntu 16.04, PyTorch 1.1.0, CUDA 9.0.
 6 | 
 7 | ## Usage
 8 | 
 9 | First compile using
10 |         
11 |         python setup.py install
12 | 
13 | Then, copy the lib file out to the main directory,
14 | 
15 |         cp build/lib.linux-x86_64-3.6/emd_cuda.cpython-36m-x86_64-linux-gnu.so .
16 | 
17 | Then, you can use it by simply
18 | 
19 |         from emd import earth_mover_distance
20 |         d = earth_mover_distance(p1, p2, transpose=False)  # p1: B x N1 x 3, p2: B x N2 x 3
21 | 
22 | Check `test_emd_loss.py` for example.
23 | 
24 | ## Author
25 | 
26 | The cuda code is originally written by Haoqiang Fan. The PyTorch wrapper is written by Kaichun Mo. Also, Jiayuan Gu provided helps.
27 | 
28 | ## License
29 | 
30 | MIT
31 | 
32 | 


--------------------------------------------------------------------------------
/utils/PyTorchEMD/cuda/emd.cpp:
--------------------------------------------------------------------------------
 1 | #ifndef _EMD
 2 | #define _EMD
 3 | 
 4 | #include <vector>
 5 | #include <torch/extension.h>
 6 | 
 7 | //CUDA declarations
 8 | at::Tensor ApproxMatchForward(
 9 |     const at::Tensor xyz1,
10 |     const at::Tensor xyz2);
11 | 
12 | at::Tensor MatchCostForward(
13 |     const at::Tensor xyz1,
14 |     const at::Tensor xyz2,
15 |     const at::Tensor match);
16 | 
17 | std::vector<at::Tensor> MatchCostBackward(
18 |     const at::Tensor grad_cost,
19 |     const at::Tensor xyz1,
20 |     const at::Tensor xyz2,
21 |     const at::Tensor match);
22 | 
23 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
24 |   m.def("approxmatch_forward", &ApproxMatchForward,"ApproxMatch forward (CUDA)");
25 |   m.def("matchcost_forward", &MatchCostForward,"MatchCost forward (CUDA)");
26 |   m.def("matchcost_backward", &MatchCostBackward,"MatchCost backward (CUDA)");
27 | }
28 | 
29 | #endif
30 | 


--------------------------------------------------------------------------------
/utils/Pointnet2.PyTorch/pointnet2/src/group_points_gpu.h:
--------------------------------------------------------------------------------
 1 | #ifndef _GROUP_POINTS_GPU_H
 2 | #define _GROUP_POINTS_GPU_H
 3 | 
 4 | #include <torch/serialize/tensor.h>
 5 | #include <cuda.h>
 6 | #include <cuda_runtime_api.h>
 7 | #include <vector>
 8 | 
 9 | 
10 | int group_points_wrapper_fast(int b, int c, int n, int npoints, int nsample, 
11 |     at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor);
12 | 
13 | void group_points_kernel_launcher_fast(int b, int c, int n, int npoints, int nsample, 
14 |     const float *points, const int *idx, float *out, cudaStream_t stream);
15 | 
16 | int group_points_grad_wrapper_fast(int b, int c, int n, int npoints, int nsample, 
17 |     at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor);
18 | 
19 | void group_points_grad_kernel_launcher_fast(int b, int c, int n, int npoints, int nsample, 
20 |     const float *grad_out, const int *idx, float *grad_points, cudaStream_t stream);
21 | 
22 | #endif
23 | 


--------------------------------------------------------------------------------
/utils/pyTorchChamferDistance/README.md:
--------------------------------------------------------------------------------
 1 | # Chamfer Distance for pyTorch
 2 | 
 3 | This is an implementation of the Chamfer Distance as a module for pyTorch. It is written as a custom C++/CUDA extension.
 4 | 
 5 | As it is using pyTorch's [JIT compilation](https://pytorch.org/tutorials/advanced/cpp_extension.html), there are no additional prerequisite steps that have to be taken. Simply import the module as shown below; CUDA and C++ code will be compiled on the first run.
 6 | 
 7 | ### Usage
 8 | ```python
 9 | from chamfer_distance import ChamferDistance
10 | chamfer_dist = ChamferDistance()
11 | 
12 | #...
13 | # points and points_reconstructed are n_points x 3 matrices
14 | 
15 | dist1, dist2 = chamfer_dist(points, points_reconstructed)
16 | loss = (torch.mean(dist1)) + (torch.mean(dist2))
17 | 
18 | 
19 | #...
20 | ```
21 | 
22 | ### Integration
23 | This code has been integrated into the [Kaolin](https://github.com/NVIDIAGameWorks/kaolin) library for 3D Deep Learning by NVIDIAGameWorks. You should probably take a look at it if you are working on anything 3D :)
24 | 


--------------------------------------------------------------------------------
/utils/Pointnet2.PyTorch/pointnet2/src/ball_query.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/serialize/tensor.h>
 2 | #include <vector>
 3 | #include <THC/THC.h>
 4 | #include <cuda.h>
 5 | #include <cuda_runtime_api.h>
 6 | #include "ball_query_gpu.h"
 7 | 
 8 | extern THCState *state;
 9 | 
10 | #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
11 | #define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ")
12 | #define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x)
13 | 
14 | int ball_query_wrapper_fast(int b, int n, int m, float radius, int nsample, 
15 |     at::Tensor new_xyz_tensor, at::Tensor xyz_tensor, at::Tensor idx_tensor) {
16 |     CHECK_INPUT(new_xyz_tensor);
17 |     CHECK_INPUT(xyz_tensor);
18 |     const float *new_xyz = new_xyz_tensor.data<float>();
19 |     const float *xyz = xyz_tensor.data<float>();
20 |     int *idx = idx_tensor.data<int>();
21 |     
22 |     cudaStream_t stream = at::cuda::getCurrentCUDAStream();
23 |     ball_query_kernel_launcher_fast(b, n, m, radius, nsample, new_xyz, xyz, idx, stream);
24 |     return 1;
25 | }


--------------------------------------------------------------------------------
/utils/metrics/pytorch_structural_losses/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | from torch.utils.cpp_extension import CUDAExtension, BuildExtension
 3 | 
 4 | # Python interface
 5 | setup(
 6 |     name='PyTorchStructuralLosses',
 7 |     version='0.1.0',
 8 |     install_requires=['torch'],
 9 |     packages=['StructuralLosses'],
10 |     package_dir={'StructuralLosses': './'},
11 |     ext_modules=[
12 |         CUDAExtension(
13 |             name='StructuralLossesBackend',
14 |             include_dirs=['./'],
15 |             sources=[
16 |                 'pybind/bind.cpp',
17 |             ],
18 |             libraries=['make_pytorch'],
19 |             library_dirs=['objs'],
20 |             # extra_compile_args=['-g']
21 |         )
22 |     ],
23 |     cmdclass={'build_ext': BuildExtension},
24 |     author='Christopher B. Choy',
25 |     author_email='chrischoy@ai.stanford.edu',
26 |     description='Tutorial for Pytorch C++ Extension with a Makefile',
27 |     keywords='Pytorch C++ Extension',
28 |     url='https://github.com/chrischoy/MakePytorchPlusPlus',
29 |     zip_safe=False,
30 | )
31 | 


--------------------------------------------------------------------------------
/utils/Pointnet2.PyTorch/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Shaoshuai Shi
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/utils/pyTorchChamferDistance/LICENSE.md:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) [year] [fullname]
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/utils/furthestPointSampling/sampling_gpu.h:
--------------------------------------------------------------------------------
 1 | #ifndef _SAMPLING_GPU_H
 2 | #define _SAMPLING_GPU_H
 3 | 
 4 | #include <torch/serialize/tensor.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | #include<vector>
 7 | 
 8 | 
 9 | int gather_points_wrapper_fast(int b, int c, int n, int npoints, 
10 |     at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor);
11 | 
12 | void gather_points_kernel_launcher_fast(int b, int c, int n, int npoints, 
13 |     const float *points, const int *idx, float *out, cudaStream_t stream);
14 | 
15 | 
16 | int gather_points_grad_wrapper_fast(int b, int c, int n, int npoints, 
17 |     at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor);
18 | 
19 | void gather_points_grad_kernel_launcher_fast(int b, int c, int n, int npoints, 
20 |     const float *grad_out, const int *idx, float *grad_points, cudaStream_t stream);
21 | 
22 | 
23 | int furthest_point_sampling_wrapper(int b, int n, int m, 
24 |     at::Tensor points_tensor, at::Tensor temp_tensor, at::Tensor idx_tensor);
25 | 
26 | void furthest_point_sampling_kernel_launcher(int b, int n, int m, 
27 |     const float *dataset, float *temp, int *idxs, cudaStream_t stream);
28 | 
29 | #endif


--------------------------------------------------------------------------------
/utils/Pointnet2.PyTorch/pointnet2/src/sampling_gpu.h:
--------------------------------------------------------------------------------
 1 | #ifndef _SAMPLING_GPU_H
 2 | #define _SAMPLING_GPU_H
 3 | 
 4 | #include <torch/serialize/tensor.h>
 5 | #include <ATen/cuda/CUDAContext.h>
 6 | #include<vector>
 7 | 
 8 | 
 9 | int gather_points_wrapper_fast(int b, int c, int n, int npoints, 
10 |     at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor);
11 | 
12 | void gather_points_kernel_launcher_fast(int b, int c, int n, int npoints, 
13 |     const float *points, const int *idx, float *out, cudaStream_t stream);
14 | 
15 | 
16 | int gather_points_grad_wrapper_fast(int b, int c, int n, int npoints, 
17 |     at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor);
18 | 
19 | void gather_points_grad_kernel_launcher_fast(int b, int c, int n, int npoints, 
20 |     const float *grad_out, const int *idx, float *grad_points, cudaStream_t stream);
21 | 
22 | 
23 | int furthest_point_sampling_wrapper(int b, int n, int m, 
24 |     at::Tensor points_tensor, at::Tensor temp_tensor, at::Tensor idx_tensor);
25 | 
26 | void furthest_point_sampling_kernel_launcher(int b, int n, int m, 
27 |     const float *dataset, float *temp, int *idxs, cudaStream_t stream);
28 | 
29 | #endif
30 | 


--------------------------------------------------------------------------------
/utils/Pointnet2.PyTorch/pointnet2/src/pointnet2_api.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/serialize/tensor.h>
 2 | #include <torch/extension.h>
 3 | 
 4 | #include "ball_query_gpu.h"
 5 | #include "group_points_gpu.h"
 6 | #include "sampling_gpu.h"
 7 | #include "interpolate_gpu.h"
 8 | 
 9 | 
10 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
11 |     m.def("ball_query_wrapper", &ball_query_wrapper_fast, "ball_query_wrapper_fast");
12 | 
13 |     m.def("group_points_wrapper", &group_points_wrapper_fast, "group_points_wrapper_fast");
14 |     m.def("group_points_grad_wrapper", &group_points_grad_wrapper_fast, "group_points_grad_wrapper_fast");
15 | 
16 |     m.def("gather_points_wrapper", &gather_points_wrapper_fast, "gather_points_wrapper_fast");
17 |     m.def("gather_points_grad_wrapper", &gather_points_grad_wrapper_fast, "gather_points_grad_wrapper_fast");
18 | 
19 |     m.def("furthest_point_sampling_wrapper", &furthest_point_sampling_wrapper, "furthest_point_sampling_wrapper");
20 |     
21 |     m.def("three_nn_wrapper", &three_nn_wrapper_fast, "three_nn_wrapper_fast");
22 |     m.def("three_interpolate_wrapper", &three_interpolate_wrapper_fast, "three_interpolate_wrapper_fast");
23 |     m.def("three_interpolate_grad_wrapper", &three_interpolate_grad_wrapper_fast, "three_interpolate_grad_wrapper_fast");
24 | }
25 | 


--------------------------------------------------------------------------------
/utils/Pointnet2.PyTorch/pointnet2/src/interpolate_gpu.h:
--------------------------------------------------------------------------------
 1 | #ifndef _INTERPOLATE_GPU_H
 2 | #define _INTERPOLATE_GPU_H
 3 | 
 4 | #include <torch/serialize/tensor.h>
 5 | #include<vector>
 6 | #include <cuda.h>
 7 | #include <cuda_runtime_api.h>
 8 | 
 9 | 
10 | void three_nn_wrapper_fast(int b, int n, int m, at::Tensor unknown_tensor, 
11 |   at::Tensor known_tensor, at::Tensor dist2_tensor, at::Tensor idx_tensor);
12 | 
13 | void three_nn_kernel_launcher_fast(int b, int n, int m, const float *unknown,
14 | 	const float *known, float *dist2, int *idx, cudaStream_t stream);
15 | 
16 | 
17 | void three_interpolate_wrapper_fast(int b, int c, int m, int n, at::Tensor points_tensor, 
18 |     at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor out_tensor);
19 | 
20 | void three_interpolate_kernel_launcher_fast(int b, int c, int m, int n, 
21 |     const float *points, const int *idx, const float *weight, float *out, cudaStream_t stream);
22 | 
23 | 
24 | void three_interpolate_grad_wrapper_fast(int b, int c, int n, int m, at::Tensor grad_out_tensor, 
25 |     at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_points_tensor);
26 | 
27 | void three_interpolate_grad_kernel_launcher_fast(int b, int c, int n, int m, const float *grad_out, 
28 |     const int *idx, const float *weight, float *grad_points, cudaStream_t stream);
29 | 
30 | #endif
31 | 


--------------------------------------------------------------------------------
/utils/Pointnet2.PyTorch/pointnet2/src/group_points.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/serialize/tensor.h>
 2 | #include <cuda.h>
 3 | #include <cuda_runtime_api.h>
 4 | #include <vector>
 5 | #include <THC/THC.h>
 6 | #include "group_points_gpu.h"
 7 | 
 8 | extern THCState *state;
 9 | 
10 | 
11 | int group_points_grad_wrapper_fast(int b, int c, int n, int npoints, int nsample, 
12 |     at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor) {
13 | 
14 |     float *grad_points = grad_points_tensor.data<float>();
15 |     const int *idx = idx_tensor.data<int>();
16 |     const float *grad_out = grad_out_tensor.data<float>();
17 | 
18 |     cudaStream_t stream = at::cuda::getCurrentCUDAStream();
19 | 
20 |     group_points_grad_kernel_launcher_fast(b, c, n, npoints, nsample, grad_out, idx, grad_points, stream);
21 |     return 1;
22 | }
23 | 
24 | 
25 | int group_points_wrapper_fast(int b, int c, int n, int npoints, int nsample, 
26 |     at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor) {
27 | 
28 |     const float *points = points_tensor.data<float>();
29 |     const int *idx = idx_tensor.data<int>();
30 |     float *out = out_tensor.data<float>();
31 | 
32 |     cudaStream_t stream = at::cuda::getCurrentCUDAStream();
33 | 
34 |     group_points_kernel_launcher_fast(b, c, n, npoints, nsample, points, idx, out, stream);
35 |     return 1;
36 | }


--------------------------------------------------------------------------------
/utils/randPartial/test/visual.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | import open3d as o3d
 4 | import randpartial
 5 | 
 6 | from visdom import Visdom
 7 | 
 8 | 
 9 | def plot_diff_pcds(pcds, vis, title, legend, win=None):
10 |     '''
11 |     :param pcds: python list, include pcds with different size
12 |     :      legend: each pcds' legend
13 |     :return:
14 |     '''
15 |     device = pcds[0].device
16 |     assert vis.check_connection()
17 | 
18 |     pcds_data = torch.Tensor().to(device)
19 |     for i in range(len(pcds)):
20 |         pcds_data = torch.cat((pcds_data, pcds[i]), 0)
21 | 
22 |     pcds_label = torch.Tensor().to(device)
23 |     for i in range(1, len(pcds) + 1):
24 |         pcds_label = torch.cat((pcds_label, torch.Tensor([i] * pcds[i - 1].shape[0]).to(device)), 0)
25 | 
26 |     vis.scatter(X=pcds_data, Y=pcds_label,
27 |                 opts={
28 |                     'title': title,
29 |                     'markersize': 3,
30 |                     # 'markercolor': np.random.randint(0, 255, (len(pcds), 3)),
31 |                     'webgl': True,
32 |                     'legend': legend},
33 |                 win=win)
34 | 
35 | 
36 | if __name__ == "__main__":
37 |     vis = Visdom(env='randpartial')
38 | 
39 |     pcd1 = o3d.io.read_point_cloud('./1a04e3eab45ca15dd86060f189eb133.pcd')
40 |     pcd1 = torch.from_numpy(np.asarray(pcd1.points))
41 | 
42 |     pcd2 = torch.from_numpy(randpartial.gen(pcd1))
43 |     plot_diff_pcds([pcd1, pcd2], vis=vis, title='test', legend=['pcd1', 'pcd2'], win='test')


--------------------------------------------------------------------------------
/utils/PyTorchEMD/emd.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import emd_cuda
 3 | 
 4 | 
 5 | class EarthMoverDistanceFunction(torch.autograd.Function):
 6 |     @staticmethod
 7 |     def forward(ctx, xyz1, xyz2):
 8 |         xyz1 = xyz1.contiguous()
 9 |         xyz2 = xyz2.contiguous()
10 |         assert xyz1.is_cuda and xyz2.is_cuda, "Only support cuda currently."
11 |         match = emd_cuda.approxmatch_forward(xyz1, xyz2)
12 |         cost = emd_cuda.matchcost_forward(xyz1, xyz2, match)
13 |         ctx.save_for_backward(xyz1, xyz2, match)
14 |         return cost
15 | 
16 |     @staticmethod
17 |     def backward(ctx, grad_cost):
18 |         xyz1, xyz2, match = ctx.saved_tensors
19 |         grad_cost = grad_cost.contiguous()
20 |         grad_xyz1, grad_xyz2 = emd_cuda.matchcost_backward(grad_cost, xyz1, xyz2, match)
21 |         return grad_xyz1, grad_xyz2
22 | 
23 | 
24 | def earth_mover_distance(xyz1, xyz2, transpose=True):
25 |     """Earth Mover Distance (Approx)
26 | 
27 |     Args:
28 |         xyz1 (torch.Tensor): (b, 3, n1)
29 |         xyz2 (torch.Tensor): (b, 3, n1)
30 |         transpose (bool): whether to transpose inputs as it might be BCN format.
31 |             Extensions only support BNC format.
32 | 
33 |     Returns:
34 |         cost (torch.Tensor): (b)
35 | 
36 |     """
37 |     if xyz1.dim() == 2:
38 |         xyz1 = xyz1.unsqueeze(0)
39 |     if xyz2.dim() == 2:
40 |         xyz2 = xyz2.unsqueeze(0)
41 |     if transpose:
42 |         xyz1 = xyz1.transpose(1, 2)
43 |         xyz2 = xyz2.transpose(1, 2)
44 |     cost = EarthMoverDistanceFunction.apply(xyz1, xyz2)
45 |     return cost
46 | 
47 | 


--------------------------------------------------------------------------------
/utils/PyTorchEMD/test_emd_loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | import time
 4 | from emd import earth_mover_distance
 5 | 
 6 | # gt
 7 | # p1 = torch.from_numpy(np.array([[[1.7, -0.1, 0.1], [0.1, 1.2, 0.3]]], dtype=np.float32)).cuda()
 8 | # p1 = p1.repeat(3, 1, 1)
 9 | # p2 = torch.from_numpy(np.array([[[0.3, 1.8, 0.2], [1.2, -0.2, 0.3]]], dtype=np.float32)).cuda()
10 | # p2 = p2.repeat(3, 1, 1)
11 | # print(p1)
12 | # print(p2)
13 | # p1.requires_grad = True
14 | # p2.requires_grad = True
15 | 
16 | # gt_dist = (((p1[0, 0] - p2[0, 1])**2).sum() + ((p1[0, 1] - p2[0, 0])**2).sum()) / 2 +  \
17 | #          (((p1[1, 0] - p2[1, 1])**2).sum() + ((p1[1, 1] - p2[1, 0])**2).sum()) * 2 + \
18 | #          (((p1[2, 0] - p2[2, 1])**2).sum() + ((p1[2, 1] - p2[2, 0])**2).sum()) / 3
19 | # print('gt_dist: ', gt_dist)
20 | 
21 | # gt_dist.backward()
22 | # print(p1.grad)
23 | # print(p2.grad)
24 | 
25 | # emd
26 | # p1 = torch.from_numpy(np.array([[[1.7, -0.1, 0.1], [0.1, 1.2, 0.3]]], dtype=np.float32)).cuda()
27 | p1 = torch.from_numpy(np.array([[[1, 1, 1], [1, 1, 1]]], dtype=np.float32)).cuda()
28 | p1 = p1.repeat(3, 1, 1)
29 | # p2 = torch.from_numpy(np.array([[[0.3, 1.8, 0.2], [1.2, -0.2, 0.3]]], dtype=np.float32)).cuda()
30 | p2 = torch.from_numpy(np.array([[[0, 0, 0], [0, 0, 0]]], dtype=np.float32)).cuda()
31 | p2 = p2.repeat(3, 1, 1)
32 | print(p1)
33 | print(p2)
34 | p1.requires_grad = True
35 | p2.requires_grad = True
36 | 
37 | d = earth_mover_distance(p1, p2, transpose=False)
38 | print(d)
39 | 
40 | loss = d[0] / 2 + d[1] * 2 + d[2] / 3
41 | print(loss)
42 | 
43 | # loss.backward()
44 | # print(p1.grad)
45 | # print(p2.grad)
46 | 
47 | 


--------------------------------------------------------------------------------
/utils/metrics/pytorch_structural_losses/nn_distance.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | # from extensions.StructuralLosses.StructuralLossesBackend import NNDistance, NNDistanceGrad
 4 | from .StructuralLossesBackend import NNDistance, NNDistanceGrad
 5 | 
 6 | # Inherit from Function
 7 | class NNDistanceFunction(Function):
 8 |     # Note that both forward and backward are @staticmethods
 9 |     @staticmethod
10 |     # bias is an optional argument
11 |     def forward(ctx, seta, setb):
12 |         #print("Match Cost Forward")
13 |         ctx.save_for_backward(seta, setb)
14 |         '''
15 |         input:
16 | 	        set1 : batch_size * #dataset_points * 3
17 | 	        set2 : batch_size * #query_points * 3
18 |         returns:
19 | 	        dist1, idx1, dist2, idx2
20 |         '''
21 |         dist1, idx1, dist2, idx2 = NNDistance(seta, setb)
22 |         ctx.idx1 = idx1
23 |         ctx.idx2 = idx2
24 |         return dist1, dist2
25 | 
26 |     # This function has only a single output, so it gets only one gradient
27 |     @staticmethod
28 |     def backward(ctx, grad_dist1, grad_dist2):
29 |         #print("Match Cost Backward")
30 |         # This is a pattern that is very convenient - at the top of backward
31 |         # unpack saved_tensors and initialize all gradients w.r.t. inputs to
32 |         # None. Thanks to the fact that additional trailing Nones are
33 |         # ignored, the return statement is simple even when the function has
34 |         # optional inputs.
35 |         seta, setb = ctx.saved_tensors
36 |         idx1 = ctx.idx1
37 |         idx2 = ctx.idx2
38 |         grada, gradb = NNDistanceGrad(seta, setb, idx1, idx2, grad_dist1, grad_dist2)
39 |         return grada, gradb
40 | 
41 | nn_distance = NNDistanceFunction.apply
42 | 
43 | 


--------------------------------------------------------------------------------
/utils/Pointnet2.PyTorch/pointnet2/src/sampling.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/serialize/tensor.h>
 2 | #include <ATen/cuda/CUDAContext.h>
 3 | #include <vector>
 4 | #include <THC/THC.h>
 5 | 
 6 | #include "sampling_gpu.h"
 7 | 
 8 | extern THCState *state;
 9 | 
10 | 
11 | int gather_points_wrapper_fast(int b, int c, int n, int npoints, 
12 |     at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor){
13 |     const float *points = points_tensor.data<float>();
14 |     const int *idx = idx_tensor.data<int>();
15 |     float *out = out_tensor.data<float>();
16 | 
17 |     cudaStream_t stream = at::cuda::getCurrentCUDAStream();
18 |     gather_points_kernel_launcher_fast(b, c, n, npoints, points, idx, out, stream);
19 |     return 1;
20 | }
21 | 
22 | 
23 | int gather_points_grad_wrapper_fast(int b, int c, int n, int npoints, 
24 |     at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor) {
25 | 
26 |     const float *grad_out = grad_out_tensor.data<float>();
27 |     const int *idx = idx_tensor.data<int>();
28 |     float *grad_points = grad_points_tensor.data<float>();
29 | 
30 |     cudaStream_t stream = at::cuda::getCurrentCUDAStream();
31 |     gather_points_grad_kernel_launcher_fast(b, c, n, npoints, grad_out, idx, grad_points, stream);
32 |     return 1;
33 | }
34 | 
35 | 
36 | int furthest_point_sampling_wrapper(int b, int n, int m, 
37 |     at::Tensor points_tensor, at::Tensor temp_tensor, at::Tensor idx_tensor) {
38 | 
39 |     const float *points = points_tensor.data<float>();
40 |     float *temp = temp_tensor.data<float>();
41 |     int *idx = idx_tensor.data<int>();
42 | 
43 |     cudaStream_t stream = at::cuda::getCurrentCUDAStream();
44 |     furthest_point_sampling_kernel_launcher(b, n, m, points, temp, idx, stream);
45 |     return 1;
46 | }
47 | 


--------------------------------------------------------------------------------
/utils/Pointnet2.PyTorch/README.md:
--------------------------------------------------------------------------------
 1 | # Pointnet2.PyTorch
 2 | 
 3 | * PyTorch implementation of [PointNet++](https://arxiv.org/abs/1706.02413) based on [erikwijmans/Pointnet2_PyTorch](https://github.com/erikwijmans/Pointnet2_PyTorch).
 4 | * Faster than the original codes by re-implementing the CUDA operations. 
 5 | 
 6 | ## Installation
 7 | ### Requirements
 8 | * Linux (tested on Ubuntu 14.04/16.04)
 9 | * Python 3.6+
10 | * PyTorch 1.0
11 | 
12 | ### Install 
13 | Install this library by running the following command:
14 | 
15 | ```shell
16 | cd pointnet2
17 | python setup.py install
18 | cd ../
19 | ```
20 | 
21 | ## Examples
22 | Here I provide a simple example to use this library in the task of KITTI ourdoor foreground point cloud segmentation, and you could refer to the paper [PointRCNN](https://arxiv.org/abs/1812.04244) for the details of task description and foreground label generation.
23 | 
24 | 1. Download the training data from [KITTI 3D object detection](http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d) website and organize the downloaded files as follows:
25 | ```
26 | Pointnet2.PyTorch
27 | ├── pointnet2
28 | ├── tools
29 | │   ├──data
30 | │   │  ├── KITTI
31 | │   │  │   ├── ImageSets
32 | │   │  │   ├── object
33 | │   │  │   │   ├──training
34 | │   │  │   │      ├──calib & velodyne & label_2 & image_2
35 | │   │  train_and_eval.py
36 | ```
37 | 
38 | 2. Run the following command to train and evaluate:
39 | ```shell
40 | cd tools
41 | python train_and_eval.py --batch_size 8 --epochs 100 --ckpt_save_interval 2 
42 | ```
43 | 
44 | 
45 | 
46 | ## Project using this repo:
47 | * [PointRCNN](https://github.com/sshaoshuai/PointRCNN): 3D object detector from raw point cloud.
48 | 
49 | ## Acknowledgement
50 | * [charlesq34/pointnet2](https://github.com/charlesq34/pointnet2): Paper author and official code repo.
51 | * [erikwijmans/Pointnet2_PyTorch](https://github.com/erikwijmans/Pointnet2_PyTorch): Initial work of PyTorch implementation of PointNet++. 
52 | 


--------------------------------------------------------------------------------
/utils/metrics/pytorch_structural_losses/match_cost.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | from .StructuralLossesBackend import ApproxMatch, MatchCost, MatchCostGrad
 4 | 
 5 | # Inherit from Function
 6 | class MatchCostFunction(Function):
 7 |     # Note that both forward and backward are @staticmethods
 8 |     @staticmethod
 9 |     # bias is an optional argument
10 |     def forward(ctx, seta, setb):
11 |         #print("Match Cost Forward")
12 |         ctx.save_for_backward(seta, setb)
13 |         '''
14 |         input:
15 | 	        set1 : batch_size * #dataset_points * 3
16 | 	        set2 : batch_size * #query_points * 3
17 |         returns:
18 | 	        match : batch_size * #query_points * #dataset_points
19 |         '''
20 |         match, temp = ApproxMatch(seta, setb)
21 |         ctx.match = match
22 |         cost = MatchCost(seta, setb, match)
23 |         return cost
24 | 
25 |     """
26 |     grad_1,grad_2=approxmatch_module.match_cost_grad(xyz1,xyz2,match)
27 | 	return [grad_1*tf.expand_dims(tf.expand_dims(grad_cost,1),2),grad_2*tf.expand_dims(tf.expand_dims(grad_cost,1),2),None]
28 | 	"""
29 |     # This function has only a single output, so it gets only one gradient
30 |     @staticmethod
31 |     def backward(ctx, grad_output):
32 |         #print("Match Cost Backward")
33 |         # This is a pattern that is very convenient - at the top of backward
34 |         # unpack saved_tensors and initialize all gradients w.r.t. inputs to
35 |         # None. Thanks to the fact that additional trailing Nones are
36 |         # ignored, the return statement is simple even when the function has
37 |         # optional inputs.
38 |         seta, setb = ctx.saved_tensors
39 |         #grad_input = grad_weight = grad_bias = None
40 |         grada, gradb = MatchCostGrad(seta, setb, ctx.match)
41 |         grad_output_expand = grad_output.unsqueeze(1).unsqueeze(2)
42 |         return grada*grad_output_expand, gradb*grad_output_expand
43 | 
44 | match_cost = MatchCostFunction.apply
45 | 
46 | 


--------------------------------------------------------------------------------
/utils/pyTorchChamferDistance/chamfer_distance/chamfer_distance.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import torch
 3 | 
 4 | from torch.utils.cpp_extension import load
 5 | 
 6 | import os
 7 | path = os.path.dirname(os.path.abspath(__file__))+'/'
 8 | cd = load(name="cd",
 9 |           sources=[path+"chamfer_distance.cpp",
10 |                    path+"chamfer_distance.cu"])
11 | 
12 | class ChamferDistanceFunction(torch.autograd.Function):
13 |     @staticmethod
14 |     def forward(ctx, xyz1, xyz2):
15 |         batchsize, n, _ = xyz1.size()
16 |         _, m, _ = xyz2.size()
17 |         xyz1 = xyz1.contiguous()
18 |         xyz2 = xyz2.contiguous()
19 |         dist1 = torch.zeros(batchsize, n)
20 |         dist2 = torch.zeros(batchsize, m)
21 | 
22 |         idx1 = torch.zeros(batchsize, n, dtype=torch.int)
23 |         idx2 = torch.zeros(batchsize, m, dtype=torch.int)
24 | 
25 |         if not xyz1.is_cuda:
26 |             cd.forward(xyz1, xyz2, dist1, dist2, idx1, idx2)
27 |         else:
28 |             dist1 = dist1.cuda()
29 |             dist2 = dist2.cuda()
30 |             idx1 = idx1.cuda()
31 |             idx2 = idx2.cuda()
32 |             cd.forward_cuda(xyz1, xyz2, dist1, dist2, idx1, idx2)
33 | 
34 |         ctx.save_for_backward(xyz1, xyz2, idx1, idx2)
35 | 
36 |         return dist1, dist2
37 | 
38 |     @staticmethod
39 |     def backward(ctx, graddist1, graddist2):
40 |         xyz1, xyz2, idx1, idx2 = ctx.saved_tensors
41 | 
42 |         graddist1 = graddist1.contiguous()
43 |         graddist2 = graddist2.contiguous()
44 | 
45 |         gradxyz1 = torch.zeros(xyz1.size())
46 |         gradxyz2 = torch.zeros(xyz2.size())
47 | 
48 |         if not graddist1.is_cuda:
49 |             cd.backward(xyz1, xyz2, gradxyz1, gradxyz2, graddist1, graddist2, idx1, idx2)
50 |         else:
51 |             gradxyz1 = gradxyz1.cuda()
52 |             gradxyz2 = gradxyz2.cuda()
53 |             cd.backward_cuda(xyz1, xyz2, gradxyz1, gradxyz2, graddist1, graddist2, idx1, idx2)
54 | 
55 |         return gradxyz1, gradxyz2
56 | 
57 | 
58 | class ChamferDistance(torch.nn.Module):
59 |     def forward(self, xyz1, xyz2):
60 |         return ChamferDistanceFunction.apply(xyz1, xyz2)
61 | 


--------------------------------------------------------------------------------
/utils/furthestPointSampling/sampling.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/serialize/tensor.h>
 2 | #include <ATen/cuda/CUDAContext.h>
 3 | #include <vector>
 4 | #include <THC/THC.h>
 5 | #include <torch/extension.h>
 6 | 
 7 | #include "sampling_gpu.h"
 8 | 
 9 | extern THCState *state;
10 | 
11 | 
12 | int gather_points_wrapper_fast(int b, int c, int n, int npoints, 
13 |     at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor){
14 |     const float *points = points_tensor.data<float>();
15 |     const int *idx = idx_tensor.data<int>();
16 |     float *out = out_tensor.data<float>();
17 | 
18 |     cudaStream_t stream = at::cuda::getCurrentCUDAStream();
19 |     gather_points_kernel_launcher_fast(b, c, n, npoints, points, idx, out, stream);
20 |     return 1;
21 | }
22 | 
23 | 
24 | int gather_points_grad_wrapper_fast(int b, int c, int n, int npoints, 
25 |     at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor) {
26 | 
27 |     const float *grad_out = grad_out_tensor.data<float>();
28 |     const int *idx = idx_tensor.data<int>();
29 |     float *grad_points = grad_points_tensor.data<float>();
30 | 
31 |     cudaStream_t stream = at::cuda::getCurrentCUDAStream();
32 |     gather_points_grad_kernel_launcher_fast(b, c, n, npoints, grad_out, idx, grad_points, stream);
33 |     return 1;
34 | }
35 | 
36 | 
37 | int furthest_point_sampling_wrapper(int b, int n, int m, 
38 |     at::Tensor points_tensor, at::Tensor temp_tensor, at::Tensor idx_tensor) {
39 | 
40 |     const float *points = points_tensor.data<float>();
41 |     float *temp = temp_tensor.data<float>();
42 |     int *idx = idx_tensor.data<int>();
43 | 
44 |     cudaStream_t stream = at::cuda::getCurrentCUDAStream();
45 |     furthest_point_sampling_kernel_launcher(b, n, m, points, temp, idx, stream);
46 |     return 1;
47 | }
48 | 
49 | 
50 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
51 |     m.def("gather_points_wrapper", &gather_points_wrapper_fast, "gather_points_wrapper_fast");
52 |     m.def("gather_points_grad_wrapper", &gather_points_grad_wrapper_fast, "gather_points_grad_wrapper_fast");
53 | 
54 |     m.def("furthest_point_sampling_wrapper", &furthest_point_sampling_wrapper, "furthest_point_sampling_wrapper");
55 | }
56 | 


--------------------------------------------------------------------------------
/utils/Pointnet2.PyTorch/pointnet2/src/interpolate.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/serialize/tensor.h>
 2 | #include <vector>
 3 | #include <THC/THC.h>
 4 | #include <math.h>
 5 | #include <stdio.h>
 6 | #include <stdlib.h>
 7 | #include <cuda.h>
 8 | #include <cuda_runtime_api.h>
 9 | #include "interpolate_gpu.h"
10 | 
11 | extern THCState *state;
12 | 
13 | 
14 | void three_nn_wrapper_fast(int b, int n, int m, at::Tensor unknown_tensor, 
15 |     at::Tensor known_tensor, at::Tensor dist2_tensor, at::Tensor idx_tensor) {
16 |     const float *unknown = unknown_tensor.data<float>();
17 |     const float *known = known_tensor.data<float>();
18 |     float *dist2 = dist2_tensor.data<float>();
19 |     int *idx = idx_tensor.data<int>();
20 | 
21 |     cudaStream_t stream = at::cuda::getCurrentCUDAStream();
22 |     three_nn_kernel_launcher_fast(b, n, m, unknown, known, dist2, idx, stream);
23 | }
24 | 
25 | 
26 | void three_interpolate_wrapper_fast(int b, int c, int m, int n,
27 |                          at::Tensor points_tensor,
28 |                          at::Tensor idx_tensor,
29 |                          at::Tensor weight_tensor,
30 |                          at::Tensor out_tensor) {
31 | 
32 |     const float *points = points_tensor.data<float>();
33 |     const float *weight = weight_tensor.data<float>();
34 |     float *out = out_tensor.data<float>();
35 |     const int *idx = idx_tensor.data<int>();
36 | 
37 |     cudaStream_t stream = at::cuda::getCurrentCUDAStream();
38 |     three_interpolate_kernel_launcher_fast(b, c, m, n, points, idx, weight, out, stream);
39 | }
40 | 
41 | void three_interpolate_grad_wrapper_fast(int b, int c, int n, int m,
42 |                             at::Tensor grad_out_tensor,
43 |                             at::Tensor idx_tensor,
44 |                             at::Tensor weight_tensor,
45 |                             at::Tensor grad_points_tensor) {
46 | 
47 |     const float *grad_out = grad_out_tensor.data<float>();
48 |     const float *weight = weight_tensor.data<float>();
49 |     float *grad_points = grad_points_tensor.data<float>();
50 |     const int *idx = idx_tensor.data<int>();
51 | 
52 |     cudaStream_t stream = at::cuda::getCurrentCUDAStream();
53 |     three_interpolate_grad_kernel_launcher_fast(b, c, n, m, grad_out, idx, weight, grad_points, stream);
54 | }


--------------------------------------------------------------------------------
/utils/Pointnet2.PyTorch/pointnet2/src/ball_query_gpu.cu:
--------------------------------------------------------------------------------
 1 | #include <math.h>
 2 | #include <stdio.h>
 3 | #include <stdlib.h>
 4 | 
 5 | #include "ball_query_gpu.h"
 6 | #include "cuda_utils.h"
 7 | 
 8 | 
 9 | __global__ void ball_query_kernel_fast(int b, int n, int m, float radius, int nsample, 
10 |     const float *__restrict__ new_xyz, const float *__restrict__ xyz, int *__restrict__ idx) {
11 |     // new_xyz: (B, M, 3)
12 |     // xyz: (B, N, 3)
13 |     // output:
14 |     //      idx: (B, M, nsample)
15 |     int bs_idx = blockIdx.y;
16 |     int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
17 |     if (bs_idx >= b || pt_idx >= m) return;
18 | 
19 |     new_xyz += bs_idx * m * 3 + pt_idx * 3;
20 |     xyz += bs_idx * n * 3;
21 |     idx += bs_idx * m * nsample + pt_idx * nsample;
22 | 
23 |     float radius2 = radius * radius;
24 |     float new_x = new_xyz[0];
25 |     float new_y = new_xyz[1];
26 |     float new_z = new_xyz[2];
27 | 
28 |     int cnt = 0;
29 |     for (int k = 0; k < n; ++k) {
30 |         float x = xyz[k * 3 + 0];
31 |         float y = xyz[k * 3 + 1];
32 |         float z = xyz[k * 3 + 2];
33 |         float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z);
34 |         if (d2 < radius2){
35 |             if (cnt == 0){
36 |                 for (int l = 0; l < nsample; ++l) {
37 |                     idx[l] = k;
38 |                 }
39 |             }
40 |             idx[cnt] = k;
41 |             ++cnt;
42 |             if (cnt >= nsample) break;
43 |         }
44 |     }
45 | }
46 | 
47 | 
48 | void ball_query_kernel_launcher_fast(int b, int n, int m, float radius, int nsample, \
49 |     const float *new_xyz, const float *xyz, int *idx, cudaStream_t stream) {
50 |     // new_xyz: (B, M, 3)
51 |     // xyz: (B, N, 3)
52 |     // output:
53 |     //      idx: (B, M, nsample)
54 | 
55 |     cudaError_t err;
56 | 
57 |     dim3 blocks(DIVUP(m, THREADS_PER_BLOCK), b);  // blockIdx.x(col), blockIdx.y(row)
58 |     dim3 threads(THREADS_PER_BLOCK);
59 | 
60 |     ball_query_kernel_fast<<<blocks, threads, 0, stream>>>(b, n, m, radius, nsample, new_xyz, xyz, idx);
61 |     // cudaDeviceSynchronize();  // for using printf in kernel function
62 |     err = cudaGetLastError();
63 |     if (cudaSuccess != err) {
64 |         fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
65 |         exit(-1);
66 |     }
67 | }


--------------------------------------------------------------------------------
/utils/furthestPointSampling/fps.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Variable
 3 | from torch.autograd import Function
 4 | 
 5 | import fps_cuda
 6 | 
 7 | 
 8 | class FurthestPointSampling(Function):
 9 |     @staticmethod
10 |     def forward(ctx, xyz: torch.Tensor, npoint: int) -> torch.Tensor:
11 |         """
12 |         Uses iterative furthest point sampling to select a set of npoint features that have the largest
13 |         minimum distance
14 |         :param ctx:
15 |         :param xyz: (B, N, 3) where N > npoint
16 |         :param npoint: int, number of features in the sampled set
17 |         :return:
18 |              output: (B, npoint) tensor containing the set
19 |         """
20 |         assert xyz.is_contiguous()
21 | 
22 |         B, N, _ = xyz.size()
23 |         output = torch.cuda.IntTensor(B, npoint)
24 |         temp = torch.cuda.FloatTensor(B, N).fill_(1e10)
25 | 
26 |         fps_cuda.furthest_point_sampling_wrapper(B, N, npoint, xyz, temp, output)
27 |         return output
28 | 
29 |     @staticmethod
30 |     def backward(xyz, a=None):
31 |         return None, None
32 | 
33 | 
34 | furthest_point_sample = FurthestPointSampling.apply
35 | 
36 | 
37 | class GatherOperation(Function):
38 | 
39 |     @staticmethod
40 |     def forward(ctx, features: torch.Tensor, idx: torch.Tensor) -> torch.Tensor:
41 |         """
42 |         :param ctx:
43 |         :param features: (B, C, N)
44 |         :param idx: (B, npoint) index tensor of the features to gather
45 |         :return:
46 |             output: (B, C, npoint)
47 |         """
48 |         assert features.is_contiguous()
49 |         assert idx.is_contiguous()
50 | 
51 |         B, npoint = idx.size()
52 |         _, C, N = features.size()
53 |         output = torch.cuda.FloatTensor(B, C, npoint)
54 | 
55 |         fps_cuda.gather_points_wrapper(B, C, N, npoint, features, idx, output)
56 | 
57 |         ctx.for_backwards = (idx, C, N)
58 |         return output
59 | 
60 |     @staticmethod
61 |     def backward(ctx, grad_out):
62 |         idx, C, N = ctx.for_backwards
63 |         B, npoint = idx.size()
64 | 
65 |         grad_features = Variable(torch.cuda.FloatTensor(B, C, N).zero_())
66 |         grad_out_data = grad_out.data.contiguous()
67 |         fps_cuda.gather_points_grad_wrapper(B, C, N, npoint, grad_out_data, idx, grad_features.data)
68 |         return grad_features, None
69 | 
70 | 
71 | gather_operation = GatherOperation.apply
72 | 


--------------------------------------------------------------------------------
/utils/Pointnet2.PyTorch/tools/pointnet2_msg.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from pointnet2.pointnet2_modules import PointnetFPModule, PointnetSAModuleMSG
 4 | import pointnet2.pytorch_utils as pt_utils
 5 | 
 6 | 
 7 | def get_model(input_channels=0):
 8 |     return Pointnet2MSG(input_channels=input_channels)
 9 | 
10 | 
11 | NPOINTS = [4096, 1024, 256, 64]
12 | RADIUS = [[0.1, 0.5], [0.5, 1.0], [1.0, 2.0], [2.0, 4.0]]
13 | NSAMPLE = [[16, 32], [16, 32], [16, 32], [16, 32]]
14 | MLPS = [[[16, 16, 32], [32, 32, 64]], [[64, 64, 128], [64, 96, 128]],
15 |         [[128, 196, 256], [128, 196, 256]], [[256, 256, 512], [256, 384, 512]]]
16 | FP_MLPS = [[128, 128], [256, 256], [512, 512], [512, 512]]
17 | CLS_FC = [128]
18 | DP_RATIO = 0.5
19 | 
20 | 
21 | class Pointnet2MSG(nn.Module):
22 |     def __init__(self, input_channels=6):
23 |         super().__init__()
24 | 
25 |         self.SA_modules = nn.ModuleList()
26 |         channel_in = input_channels
27 | 
28 |         skip_channel_list = [input_channels]
29 |         for k in range(NPOINTS.__len__()):
30 |             mlps = MLPS[k].copy()
31 |             channel_out = 0
32 |             for idx in range(mlps.__len__()):
33 |                 mlps[idx] = [channel_in] + mlps[idx]
34 |                 channel_out += mlps[idx][-1]
35 | 
36 |             self.SA_modules.append(
37 |                 PointnetSAModuleMSG(
38 |                     npoint=NPOINTS[k],
39 |                     radii=RADIUS[k],
40 |                     nsamples=NSAMPLE[k],
41 |                     mlps=mlps,
42 |                     use_xyz=True,
43 |                     bn=True
44 |                 )
45 |             )
46 |             skip_channel_list.append(channel_out)
47 |             channel_in = channel_out
48 | 
49 |         self.FP_modules = nn.ModuleList()
50 | 
51 |         for k in range(FP_MLPS.__len__()):
52 |             pre_channel = FP_MLPS[k + 1][-1] if k + 1 < len(FP_MLPS) else channel_out
53 |             self.FP_modules.append(
54 |                 PointnetFPModule(mlp=[pre_channel + skip_channel_list[k]] + FP_MLPS[k])
55 |             )
56 | 
57 |         cls_layers = []
58 |         pre_channel = FP_MLPS[0][-1]
59 |         for k in range(0, CLS_FC.__len__()):
60 |             cls_layers.append(pt_utils.Conv1d(pre_channel, CLS_FC[k], bn=True))
61 |             pre_channel = CLS_FC[k]
62 |         cls_layers.append(pt_utils.Conv1d(pre_channel, 1, activation=None))
63 |         cls_layers.insert(1, nn.Dropout(0.5))
64 |         self.cls_layer = nn.Sequential(*cls_layers)
65 | 
66 |     def _break_up_pc(self, pc):
67 |         xyz = pc[..., 0:3].contiguous()
68 |         features = (
69 |             pc[..., 3:].transpose(1, 2).contiguous()
70 |             if pc.size(-1) > 3 else None
71 |         )
72 | 
73 |         return xyz, features
74 | 
75 |     def forward(self, pointcloud: torch.cuda.FloatTensor):
76 |         xyz, features = self._break_up_pc(pointcloud)
77 | 
78 |         l_xyz, l_features = [xyz], [features]
79 |         for i in range(len(self.SA_modules)):
80 |             li_xyz, li_features = self.SA_modules[i](l_xyz[i], l_features[i])
81 |             l_xyz.append(li_xyz)
82 |             l_features.append(li_features)
83 | 
84 |         for i in range(-1, -(len(self.FP_modules) + 1), -1):
85 |             l_features[i - 1] = self.FP_modules[i](
86 |                 l_xyz[i - 1], l_xyz[i], l_features[i - 1], l_features[i]
87 |             )
88 | 
89 |         pred_cls = self.cls_layer(l_features[0]).transpose(1, 2).contiguous()  # (B, N, 1)
90 |         return pred_cls
91 | 


--------------------------------------------------------------------------------
/utils/Pointnet2.PyTorch/pointnet2/src/group_points_gpu.cu:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | 
 4 | #include "cuda_utils.h"
 5 | #include "group_points_gpu.h"
 6 | 
 7 | 
 8 | __global__ void group_points_grad_kernel_fast(int b, int c, int n, int npoints, int nsample, 
 9 |     const float *__restrict__ grad_out, const int *__restrict__ idx, float *__restrict__ grad_points) {
10 |     // grad_out: (B, C, npoints, nsample)
11 |     // idx: (B, npoints, nsample)
12 |     // output:
13 |     //      grad_points: (B, C, N)
14 |     int bs_idx = blockIdx.z;
15 |     int c_idx = blockIdx.y;
16 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
17 |     int pt_idx = index / nsample;
18 |     if (bs_idx >= b || c_idx >= c || pt_idx >= npoints) return;
19 | 
20 |     int sample_idx = index % nsample;
21 |     grad_out += bs_idx * c * npoints * nsample + c_idx * npoints * nsample + pt_idx * nsample + sample_idx;
22 |     idx += bs_idx * npoints * nsample + pt_idx * nsample + sample_idx; 
23 |     
24 |     atomicAdd(grad_points + bs_idx * c * n + c_idx * n + idx[0] , grad_out[0]);
25 | }
26 | 
27 | void group_points_grad_kernel_launcher_fast(int b, int c, int n, int npoints, int nsample, 
28 |     const float *grad_out, const int *idx, float *grad_points, cudaStream_t stream) {
29 |     // grad_out: (B, C, npoints, nsample)
30 |     // idx: (B, npoints, nsample)
31 |     // output:
32 |     //      grad_points: (B, C, N)
33 |     cudaError_t err;
34 |     dim3 blocks(DIVUP(npoints * nsample, THREADS_PER_BLOCK), c, b);  // blockIdx.x(col), blockIdx.y(row)
35 |     dim3 threads(THREADS_PER_BLOCK);
36 | 
37 |     group_points_grad_kernel_fast<<<blocks, threads, 0, stream>>>(b, c, n, npoints, nsample, grad_out, idx, grad_points);
38 | 
39 |     err = cudaGetLastError();
40 |     if (cudaSuccess != err) {
41 |         fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
42 |         exit(-1);
43 |     }
44 | }
45 | 
46 | 
47 | __global__ void group_points_kernel_fast(int b, int c, int n, int npoints, int nsample, 
48 |     const float *__restrict__ points, const int *__restrict__ idx, float *__restrict__ out) {
49 |     // points: (B, C, N)
50 |     // idx: (B, npoints, nsample)
51 |     // output:
52 |     //      out: (B, C, npoints, nsample)
53 |     int bs_idx = blockIdx.z;
54 |     int c_idx = blockIdx.y;
55 |     int index = blockIdx.x * blockDim.x + threadIdx.x;
56 |     int pt_idx = index / nsample;
57 |     if (bs_idx >= b || c_idx >= c || pt_idx >= npoints) return;
58 | 
59 |     int sample_idx = index % nsample;
60 | 
61 |     idx += bs_idx * npoints * nsample + pt_idx * nsample + sample_idx; 
62 |     int in_idx = bs_idx * c * n + c_idx * n + idx[0];
63 |     int out_idx = bs_idx * c * npoints * nsample + c_idx * npoints * nsample + pt_idx * nsample + sample_idx;
64 | 
65 |     out[out_idx] = points[in_idx];
66 | }
67 | 
68 | 
69 | void group_points_kernel_launcher_fast(int b, int c, int n, int npoints, int nsample, 
70 |     const float *points, const int *idx, float *out, cudaStream_t stream) {
71 |     // points: (B, C, N)
72 |     // idx: (B, npoints, nsample)
73 |     // output:
74 |     //      out: (B, C, npoints, nsample)
75 |     cudaError_t err;
76 |     dim3 blocks(DIVUP(npoints * nsample, THREADS_PER_BLOCK), c, b);  // blockIdx.x(col), blockIdx.y(row)
77 |     dim3 threads(THREADS_PER_BLOCK);
78 | 
79 |     group_points_kernel_fast<<<blocks, threads, 0, stream>>>(b, c, n, npoints, nsample, points, idx, out);
80 |     // cudaDeviceSynchronize();  // for using printf in kernel function
81 |     err = cudaGetLastError();
82 |     if (cudaSuccess != err) {
83 |         fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
84 |         exit(-1);
85 |     }
86 | }
87 | 


--------------------------------------------------------------------------------
/utils/metrics/pytorch_structural_losses/Makefile:
--------------------------------------------------------------------------------
  1 | ###############################################################################
  2 | # Uncomment for debugging
  3 | # DEBUG := 1
  4 | # Pretty build
  5 | # Q ?= @
  6 | 
  7 | CXX := g++
  8 | PYTHON := python3
  9 | NVCC := /usr/local/cuda/bin/nvcc
 10 | 
 11 | # PYTHON Header path
 12 | PYTHON_HEADER_DIR := $(shell $(PYTHON) -c 'from distutils.sysconfig import get_python_inc; print(get_python_inc())')
 13 | PYTHON_VERSION := $(shell $(PYTHON) -c 'import sys; print(str(sys.version_info[0])+"."+str(sys.version_info[1]))')
 14 | PYTORCH_INCLUDES := $(shell $(PYTHON) -c 'from torch.utils.cpp_extension import include_paths; [print(p) for p in include_paths()]')
 15 | PYTORCH_LIBRARIES := $(shell $(PYTHON) -c 'from torch.utils.cpp_extension import library_paths; [print(p) for p in library_paths()]')
 16 | 
 17 | # CUDA ROOT DIR that contains bin/ lib64/ and include/
 18 | # CUDA_DIR := /usr/local/cuda
 19 | CUDA_DIR := $(shell $(PYTHON) -c 'from torch.utils.cpp_extension import _find_cuda_home; print(_find_cuda_home())')
 20 | 
 21 | INCLUDE_DIRS := ./ $(CUDA_DIR)/include
 22 | 
 23 | INCLUDE_DIRS += $(PYTHON_HEADER_DIR)
 24 | INCLUDE_DIRS += $(PYTORCH_INCLUDES)
 25 | 
 26 | # Custom (MKL/ATLAS/OpenBLAS) include and lib directories.
 27 | # Leave commented to accept the defaults for your choice of BLAS
 28 | # (which should work)!
 29 | # BLAS_INCLUDE := /path/to/your/blas
 30 | # BLAS_LIB := /path/to/your/blas
 31 | 
 32 | ###############################################################################
 33 | SRC_DIR := ./src
 34 | OBJ_DIR := ./objs
 35 | CPP_SRCS := $(wildcard $(SRC_DIR)/*.cpp)
 36 | CU_SRCS := $(wildcard $(SRC_DIR)/*.cu)
 37 | OBJS := $(patsubst $(SRC_DIR)/%.cpp,$(OBJ_DIR)/%.o,$(CPP_SRCS))
 38 | CU_OBJS := $(patsubst $(SRC_DIR)/%.cu,$(OBJ_DIR)/cuda/%.o,$(CU_SRCS))
 39 | STATIC_LIB := $(OBJ_DIR)/libmake_pytorch.a
 40 | 
 41 | # CUDA architecture setting: going with all of them.
 42 | # For CUDA < 6.0, comment the *_50 through *_61 lines for compatibility.
 43 | # For CUDA < 8.0, comment the *_60 and *_61 lines for compatibility.
 44 | CUDA_ARCH :=	-gencode arch=compute_61,code=sm_61 \
 45 | 		-gencode arch=compute_61,code=compute_61 \
 46 | 		-gencode arch=compute_52,code=sm_52
 47 | 
 48 | # We will also explicitly add stdc++ to the link target.
 49 | LIBRARIES += stdc++ cudart c10 caffe2 torch torch_python caffe2_gpu
 50 | 
 51 | # Debugging
 52 | ifeq ($(DEBUG), 1)
 53 | 	COMMON_FLAGS += -DDEBUG -g -O0
 54 | 	# https://gcoe-dresden.de/reaching-the-shore-with-a-fog-warning-my-eurohack-day-4-morning-session/
 55 | 	NVCCFLAGS += -g -G # -rdc true
 56 | else
 57 | 	COMMON_FLAGS += -DNDEBUG -O3
 58 | endif
 59 | 
 60 | WARNINGS := -Wall -Wno-sign-compare -Wcomment
 61 | 
 62 | INCLUDE_DIRS += $(BLAS_INCLUDE)
 63 | 
 64 | # Automatic dependency generation (nvcc is handled separately)
 65 | CXXFLAGS += -MMD -MP
 66 | 
 67 | # Complete build flags.
 68 | COMMON_FLAGS += $(foreach includedir,$(INCLUDE_DIRS),-I$(includedir)) \
 69 | 	     -DTORCH_API_INCLUDE_EXTENSION_H -D_GLIBCXX_USE_CXX11_ABI=0
 70 | CXXFLAGS += -pthread -fPIC -fwrapv -std=c++14 $(COMMON_FLAGS) $(WARNINGS)
 71 | NVCCFLAGS += -std=c++14 -ccbin=$(CXX) -Xcompiler -fPIC $(COMMON_FLAGS)
 72 | 
 73 | all: $(STATIC_LIB)
 74 | 	$(PYTHON) setup.py build
 75 | 	@ cp -R build/lib.linux-x86_64-$(PYTHON_VERSION)/StructuralLosses ..
 76 | 	@ mv build/lib.linux-x86_64-$(PYTHON_VERSION)/*.so ../StructuralLosses/
 77 | 	@- $(RM) -rf $(OBJ_DIR) build objs
 78 | 
 79 | $(OBJ_DIR):
 80 | 	@ mkdir -p $@
 81 | 	@ mkdir -p $@/cuda
 82 | 
 83 | $(OBJ_DIR)/%.o: $(SRC_DIR)/%.cpp | $(OBJ_DIR)
 84 | 	@ echo CXX $<
 85 | 	$(Q)$(CXX) $< $(CXXFLAGS) -c -o $@
 86 | 
 87 | $(OBJ_DIR)/cuda/%.o: $(SRC_DIR)/%.cu | $(OBJ_DIR)
 88 | 	@ echo NVCC $<
 89 | 	$(Q)$(NVCC) $(NVCCFLAGS) $(CUDA_ARCH) -M $< -o ${@:.o=.d} \
 90 | 		-odir $(@D)
 91 | 	$(Q)$(NVCC) $(NVCCFLAGS) $(CUDA_ARCH) -c $< -o $@
 92 | 
 93 | $(STATIC_LIB): $(OBJS) $(CU_OBJS) | $(OBJ_DIR)
 94 | 	$(RM) -f $(STATIC_LIB)
 95 | 	$(RM) -rf build dist
 96 | 	@ echo LD -o $@
 97 | 	ar rc $(STATIC_LIB) $(OBJS) $(CU_OBJS)
 98 | 
 99 | clean:
100 | 	@- $(RM) -rf $(OBJ_DIR) build dist ../StructuralLosses
101 | 
102 | 


--------------------------------------------------------------------------------
/utils/sample_pytorch3d.py:
--------------------------------------------------------------------------------
  1 | """
  2 | ShapeNet dataset download link: http://shapenet.cs.stanford.edu/shapenet/obj-zip/
  3 | """
  4 | 
  5 | import argparse
  6 | import torch
  7 | import torch.utils.data as data
  8 | import os
  9 | import h5py
 10 | import pandas as pd
 11 | from tqdm import tqdm
 12 | from visdom import Visdom
 13 | 
 14 | from pytorch3d.datasets import (
 15 |     ShapeNetCore,
 16 |     collate_batched_meshes,
 17 | )
 18 | from pytorch3d.renderer import (
 19 |     TexturesVertex,
 20 | )
 21 | from pytorch3d.structures import (
 22 |     Meshes, 
 23 |     Pointclouds,
 24 | )
 25 | from pytorch3d.ops import (
 26 |     sample_points_from_meshes,
 27 | )
 28 | from pytorch3d.io import (
 29 |     load_obj,
 30 | )
 31 | 
 32 | from utils import plot_diff_pcds, farthest_point_sample
 33 | 
 34 | 
 35 | if torch.cuda.is_available():
 36 |     device = torch.device("cuda:0")
 37 |     torch.cuda.set_device(device)
 38 | else:
 39 |     device = torch.device("cpu")
 40 | 
 41 | """
 42 | shapenet_dataset = ShapeNetCore("../data/ShapeNetCore.v2", synsets=["02691156"], version=2)
 43 | 
 44 | # batch_size = 5
 45 | # loader = data.DataLoader(shapenet_dataset, batch_size=batch_size, collate_fn=collate_batched_meshes)
 46 | 
 47 | vis = Visdom(env='sample')
 48 | df = pd.read_csv("../data/ShapeNetCore.v2/all.csv")
 49 | 
 50 | for i in range(len(shapenet_dataset)):
 51 |     shapenet_model = shapenet_dataset[i]
 52 |     print(i)
 53 |     foldername = shapenet_model["synset_id"]
 54 |     filename = shapenet_model["model_id"]
 55 |     model_verts, model_faces = shapenet_model["verts"], shapenet_model["faces"]
 56 |     
 57 |     # model_textures = TexturesVertex(verts_features=torch.ones_like(model_verts, device=device)[None])
 58 |     shapenet_model_mesh = Meshes(
 59 |         verts=[model_verts.to(device)],
 60 |         faces=[model_faces.to(device)],
 61 |         # textures=model_textures
 62 |     )
 63 |     samples, normals = sample_points_from_meshes(shapenet_model_mesh, num_samples=16384, return_normals=True)
 64 |     # print(samples.shape)
 65 |     samples = farthest_point_sample(samples, 2048)
 66 | 
 67 |     B, N = samples.shape[:2]
 68 |     samples_mean = samples.mean(axis=1).reshape(B, 1, 3)
 69 |     scale = torch.max(torch.max(samples - samples_mean, dim=2, keepdim=True)[0], dim=1, keepdim=True)[0].repeat(B, 1, 3)
 70 |     samples = (samples - samples_mean) * 0.5 / scale
 71 | 
 72 |     # plot_diff_pcds([samples[0]], vis=vis, title=foldername+'_'+filename, legend=['pcd'], win=foldername+'_'+filename)
 73 |     samples = samples[0]
 74 | 
 75 |     _df = df.loc[df['synsetId'] == int(foldername)]
 76 |     split = _df.loc[_df['modelId'] == filename].iloc[0, 4]
 77 | 
 78 |     path = os.path.join('../data/ShapeNetCore.v2.PC2048', foldername, split)
 79 |     if not os.path.exists(path):
 80 |         os.makedirs(path)
 81 | 
 82 |     with h5py.File(os.path.join(path, filename+'.h5'), 'w') as f:
 83 |         f.create_dataset('data', data=samples.detach().cpu().numpy())
 84 | """
 85 | 
 86 | 
 87 | parser = argparse.ArgumentParser()
 88 | parser.add_argument('--input_path', type=str, default="../data/ShapeNetCore.v2", help="path to ShapeNetCore.v2")
 89 | parser.add_argument('--output_path', type=str, default="../data/ShapeNetCore.v2.PC2048_1", help="path to the sampling results")
 90 | parser.add_argument('--synsetid', type=str, default="02691156", help="")
 91 | parser.add_argument('--pnum', type=int, default=2048, help="point number, no more than 16384")
 92 | args = parser.parse_args()
 93 | 
 94 | 
 95 | # vis = Visdom(env='sample')
 96 | df = pd.read_csv(os.path.join(args.input_path, 'all.csv'))
 97 | _df = df.loc[df['synsetId'] == int(args.synsetid)]
 98 | 
 99 | for i in tqdm(range(len(_df))): 
100 |     filename = _df.iloc[i, 3]
101 |     split =_df.iloc[i, 4]
102 |     
103 |     path = os.path.join(args.output_path, args.synsetid, split)
104 |     if not os.path.exists(path):
105 |         os.makedirs(path)
106 |     save_path = os.path.join(path, filename+'.h5')
107 |     if os.path.exists(save_path):
108 |         continue
109 |     
110 |     path = os.path.join(args.input_path, args.synsetid, filename, 'models/model_normalized.obj')
111 |     if not os.path.exists(path):
112 |         continue
113 |     verts, faces, aux = load_obj(path, load_textures=False, device=device)
114 |     shapenet_model_mesh = Meshes(verts=[verts], faces=[faces.verts_idx])
115 | 
116 |     samples, normals = sample_points_from_meshes(shapenet_model_mesh, num_samples=16384, return_normals=True)
117 |     # print(samples.shape)
118 |     samples = farthest_point_sample(samples, args.pnum)
119 | 
120 |     B, N = samples.shape[:2]
121 |     samples_mean = samples.mean(axis=1).reshape(B, 1, 3)
122 |     scale = torch.max(torch.max(torch.abs(samples - samples_mean), dim=2, keepdim=True)[0], dim=1, keepdim=True)[0].repeat(B, 1, 3)
123 |     samples = (samples - samples_mean) * 0.5 / scale
124 | 
125 |     # plot_diff_pcds([samples[0]], vis=vis, title=foldername+'_'+filename, legend=['pcd'], win=foldername+'_'+filename)
126 |     # if i == 20:
127 |     #     break
128 | 
129 |     samples = samples[0]
130 |     with h5py.File(save_path, 'w') as f:
131 |         f.create_dataset('data', data=samples.detach().cpu().numpy())


--------------------------------------------------------------------------------
/utils/randPartial/setup.py:
--------------------------------------------------------------------------------
  1 | # from setuptools import setup
  2 | # from pybind11.setup_helpers import Pybind11Extension, build_ext
  3 | 
  4 | 
  5 | # setup(
  6 | #     name='randpartial',
  7 | #     ext_modules=[
  8 | #         Pybind11Extension(
  9 | #             name='randpartial',
 10 | #             sources=[
 11 | #                 'main.cpp',
 12 | #             ],
 13 | #         ),
 14 | #     ],
 15 | #     cmdclass={
 16 | #         'build_ext': build_ext,
 17 | #     })
 18 | 
 19 | 
 20 | import os
 21 | import sys
 22 | import subprocess
 23 | 
 24 | from setuptools import setup, Extension, find_packages
 25 | from setuptools.command.build_ext import build_ext
 26 | 
 27 | # Convert distutils Windows platform specifiers to CMake -A arguments
 28 | PLAT_TO_CMAKE = {
 29 |     "win32": "Win32",
 30 |     "win-amd64": "x64",
 31 |     "win-arm32": "ARM",
 32 |     "win-arm64": "ARM64",
 33 | }
 34 | 
 35 | 
 36 | # A CMakeExtension needs a sourcedir instead of a file list.
 37 | # The name must be the _single_ output extension from the CMake build.
 38 | # If you need multiple extensions, see scikit-build.
 39 | class CMakeExtension(Extension):
 40 |     def __init__(self, name, sourcedir=""):
 41 |         Extension.__init__(self, name, sources=[])
 42 |         self.sourcedir = os.path.abspath(sourcedir)
 43 | 
 44 | 
 45 | class CMakeBuild(build_ext):
 46 |     def build_extension(self, ext):
 47 |         extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.name)))
 48 | 
 49 |         # required for auto-detection of auxiliary "native" libs
 50 |         if not extdir.endswith(os.path.sep):
 51 |             extdir += os.path.sep
 52 | 
 53 |         cfg = "Debug" if self.debug else "Release"
 54 | 
 55 |         # CMake lets you override the generator - we need to check this.
 56 |         # Can be set with Conda-Build, for example.
 57 |         cmake_generator = os.environ.get("CMAKE_GENERATOR", "")
 58 | 
 59 |         # Set Python_EXECUTABLE instead if you use PYBIND11_FINDPYTHON
 60 |         # EXAMPLE_VERSION_INFO shows you how to pass a value into the C++ code
 61 |         # from Python.
 62 |         cmake_args = [
 63 |             "-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={}".format(extdir),
 64 |             "-DPYTHON_EXECUTABLE={}".format(sys.executable),
 65 |             "-DEXAMPLE_VERSION_INFO={}".format(self.distribution.get_version()),
 66 |             "-DCMAKE_BUILD_TYPE={}".format(cfg),  # not used on MSVC, but no harm
 67 |         ]
 68 |         build_args = []
 69 | 
 70 |         if self.compiler.compiler_type != "msvc":
 71 |             # Using Ninja-build since it a) is available as a wheel and b)
 72 |             # multithreads automatically. MSVC would require all variables be
 73 |             # exported for Ninja to pick it up, which is a little tricky to do.
 74 |             # Users can override the generator with CMAKE_GENERATOR in CMake
 75 |             # 3.15+.
 76 |             if not cmake_generator:
 77 |                 cmake_args += ["-GNinja"]
 78 | 
 79 |         else:
 80 | 
 81 |             # Single config generators are handled "normally"
 82 |             single_config = any(x in cmake_generator for x in {"NMake", "Ninja"})
 83 | 
 84 |             # CMake allows an arch-in-generator style for backward compatibility
 85 |             contains_arch = any(x in cmake_generator for x in {"ARM", "Win64"})
 86 | 
 87 |             # Specify the arch if using MSVC generator, but only if it doesn't
 88 |             # contain a backward-compatibility arch spec already in the
 89 |             # generator name.
 90 |             if not single_config and not contains_arch:
 91 |                 cmake_args += ["-A", PLAT_TO_CMAKE[self.plat_name]]
 92 | 
 93 |             # Multi-config generators have a different way to specify configs
 94 |             if not single_config:
 95 |                 cmake_args += [
 96 |                     "-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{}={}".format(cfg.upper(), extdir)
 97 |                 ]
 98 |                 build_args += ["--config", cfg]
 99 | 
100 |         # Set CMAKE_BUILD_PARALLEL_LEVEL to control the parallel build level
101 |         # across all generators.
102 |         if "CMAKE_BUILD_PARALLEL_LEVEL" not in os.environ:
103 |             # self.parallel is a Python 3 only way to set parallel jobs by hand
104 |             # using -j in the build_ext call, not supported by pip or PyPA-build.
105 |             if hasattr(self, "parallel") and self.parallel:
106 |                 # CMake 3.12+ only.
107 |                 build_args += ["-j{}".format(self.parallel)]
108 | 
109 |         if not os.path.exists(self.build_temp):
110 |             os.makedirs(self.build_temp)
111 | 
112 |         subprocess.check_call(
113 |             ["cmake", ext.sourcedir] + cmake_args, cwd=self.build_temp
114 |         )
115 |         subprocess.check_call(
116 |             ["cmake", "--build", "."] + build_args, cwd=self.build_temp
117 |         )
118 | 
119 | 
120 | # The information here can also be placed in setup.cfg - better separation of
121 | # logic and declaration, and simpler if you include description/version in a file.
122 | setup(
123 |     name="randpartial",
124 |     packages=find_packages(),
125 |     ext_modules=[CMakeExtension("randpartial")],
126 |     cmdclass={"build_ext": CMakeBuild},
127 | )


--------------------------------------------------------------------------------
/utils/metrics/pytorch_structural_losses/src/nndistance.cu:
--------------------------------------------------------------------------------
  1 | 
  2 | __global__ void NmDistanceKernel(int b,int n,const float * xyz,int m,const float * xyz2,float * result,int * result_i){
  3 | 	const int batch=512;
  4 | 	__shared__ float buf[batch*3];
  5 | 	for (int i=blockIdx.x;i<b;i+=gridDim.x){
  6 | 		for (int k2=0;k2<m;k2+=batch){
  7 | 			int end_k=min(m,k2+batch)-k2;
  8 | 			for (int j=threadIdx.x;j<end_k*3;j+=blockDim.x){
  9 | 				buf[j]=xyz2[(i*m+k2)*3+j];
 10 | 			}
 11 | 			__syncthreads();
 12 | 			for (int j=threadIdx.x+blockIdx.y*blockDim.x;j<n;j+=blockDim.x*gridDim.y){
 13 | 				float x1=xyz[(i*n+j)*3+0];
 14 | 				float y1=xyz[(i*n+j)*3+1];
 15 | 				float z1=xyz[(i*n+j)*3+2];
 16 | 				int best_i=0;
 17 | 				float best=0;
 18 | 				int end_ka=end_k-(end_k&3);
 19 | 				if (end_ka==batch){
 20 | 					for (int k=0;k<batch;k+=4){
 21 | 						{
 22 | 							float x2=buf[k*3+0]-x1;
 23 | 							float y2=buf[k*3+1]-y1;
 24 | 							float z2=buf[k*3+2]-z1;
 25 | 							float d=x2*x2+y2*y2+z2*z2;
 26 | 							if (k==0 || d<best){
 27 | 								best=d;
 28 | 								best_i=k+k2;
 29 | 							}
 30 | 						}
 31 | 						{
 32 | 							float x2=buf[k*3+3]-x1;
 33 | 							float y2=buf[k*3+4]-y1;
 34 | 							float z2=buf[k*3+5]-z1;
 35 | 							float d=x2*x2+y2*y2+z2*z2;
 36 | 							if (d<best){
 37 | 								best=d;
 38 | 								best_i=k+k2+1;
 39 | 							}
 40 | 						}
 41 | 						{
 42 | 							float x2=buf[k*3+6]-x1;
 43 | 							float y2=buf[k*3+7]-y1;
 44 | 							float z2=buf[k*3+8]-z1;
 45 | 							float d=x2*x2+y2*y2+z2*z2;
 46 | 							if (d<best){
 47 | 								best=d;
 48 | 								best_i=k+k2+2;
 49 | 							}
 50 | 						}
 51 | 						{
 52 | 							float x2=buf[k*3+9]-x1;
 53 | 							float y2=buf[k*3+10]-y1;
 54 | 							float z2=buf[k*3+11]-z1;
 55 | 							float d=x2*x2+y2*y2+z2*z2;
 56 | 							if (d<best){
 57 | 								best=d;
 58 | 								best_i=k+k2+3;
 59 | 							}
 60 | 						}
 61 | 					}
 62 | 				}else{
 63 | 					for (int k=0;k<end_ka;k+=4){
 64 | 						{
 65 | 							float x2=buf[k*3+0]-x1;
 66 | 							float y2=buf[k*3+1]-y1;
 67 | 							float z2=buf[k*3+2]-z1;
 68 | 							float d=x2*x2+y2*y2+z2*z2;
 69 | 							if (k==0 || d<best){
 70 | 								best=d;
 71 | 								best_i=k+k2;
 72 | 							}
 73 | 						}
 74 | 						{
 75 | 							float x2=buf[k*3+3]-x1;
 76 | 							float y2=buf[k*3+4]-y1;
 77 | 							float z2=buf[k*3+5]-z1;
 78 | 							float d=x2*x2+y2*y2+z2*z2;
 79 | 							if (d<best){
 80 | 								best=d;
 81 | 								best_i=k+k2+1;
 82 | 							}
 83 | 						}
 84 | 						{
 85 | 							float x2=buf[k*3+6]-x1;
 86 | 							float y2=buf[k*3+7]-y1;
 87 | 							float z2=buf[k*3+8]-z1;
 88 | 							float d=x2*x2+y2*y2+z2*z2;
 89 | 							if (d<best){
 90 | 								best=d;
 91 | 								best_i=k+k2+2;
 92 | 							}
 93 | 						}
 94 | 						{
 95 | 							float x2=buf[k*3+9]-x1;
 96 | 							float y2=buf[k*3+10]-y1;
 97 | 							float z2=buf[k*3+11]-z1;
 98 | 							float d=x2*x2+y2*y2+z2*z2;
 99 | 							if (d<best){
100 | 								best=d;
101 | 								best_i=k+k2+3;
102 | 							}
103 | 						}
104 | 					}
105 | 				}
106 | 				for (int k=end_ka;k<end_k;k++){
107 | 					float x2=buf[k*3+0]-x1;
108 | 					float y2=buf[k*3+1]-y1;
109 | 					float z2=buf[k*3+2]-z1;
110 | 					float d=x2*x2+y2*y2+z2*z2;
111 | 					if (k==0 || d<best){
112 | 						best=d;
113 | 						best_i=k+k2;
114 | 					}
115 | 				}
116 | 				if (k2==0 || result[(i*n+j)]>best){
117 | 					result[(i*n+j)]=best;
118 | 					result_i[(i*n+j)]=best_i;
119 | 				}
120 | 			}
121 | 			__syncthreads();
122 | 		}
123 | 	}
124 | }
125 | void nndistance(int b,int n,const float * xyz,int m,const float * xyz2,float * result,int * result_i,float * result2,int * result2_i, cudaStream_t stream){
126 | 	NmDistanceKernel<<<dim3(32,16,1),512, 0, stream>>>(b,n,xyz,m,xyz2,result,result_i);
127 | 	NmDistanceKernel<<<dim3(32,16,1),512, 0, stream>>>(b,m,xyz2,n,xyz,result2,result2_i);
128 | }
129 | __global__ void NmDistanceGradKernel(int b,int n,const float * xyz1,int m,const float * xyz2,const float * grad_dist1,const int * idx1,float * grad_xyz1,float * grad_xyz2){
130 | 	for (int i=blockIdx.x;i<b;i+=gridDim.x){
131 | 		for (int j=threadIdx.x+blockIdx.y*blockDim.x;j<n;j+=blockDim.x*gridDim.y){
132 | 			float x1=xyz1[(i*n+j)*3+0];
133 | 			float y1=xyz1[(i*n+j)*3+1];
134 | 			float z1=xyz1[(i*n+j)*3+2];
135 | 			int j2=idx1[i*n+j];
136 | 			float x2=xyz2[(i*m+j2)*3+0];
137 | 			float y2=xyz2[(i*m+j2)*3+1];
138 | 			float z2=xyz2[(i*m+j2)*3+2];
139 | 			float g=grad_dist1[i*n+j]*2;
140 | 			atomicAdd(&(grad_xyz1[(i*n+j)*3+0]),g*(x1-x2));
141 | 			atomicAdd(&(grad_xyz1[(i*n+j)*3+1]),g*(y1-y2));
142 | 			atomicAdd(&(grad_xyz1[(i*n+j)*3+2]),g*(z1-z2));
143 | 			atomicAdd(&(grad_xyz2[(i*m+j2)*3+0]),-(g*(x1-x2)));
144 | 			atomicAdd(&(grad_xyz2[(i*m+j2)*3+1]),-(g*(y1-y2)));
145 | 			atomicAdd(&(grad_xyz2[(i*m+j2)*3+2]),-(g*(z1-z2)));
146 | 		}
147 | 	}
148 | }
149 | void nndistancegrad(int b,int n,const float * xyz1,int m,const float * xyz2,const float * grad_dist1,const int * idx1,const float * grad_dist2,const int * idx2,float * grad_xyz1,float * grad_xyz2, cudaStream_t stream){
150 | 	cudaMemset(grad_xyz1,0,b*n*3*4);
151 | 	cudaMemset(grad_xyz2,0,b*m*3*4);
152 | 	NmDistanceGradKernel<<<dim3(1,16,1),256, 0, stream>>>(b,n,xyz1,m,xyz2,grad_dist1,idx1,grad_xyz1,grad_xyz2);
153 | 	NmDistanceGradKernel<<<dim3(1,16,1),256, 0, stream>>>(b,m,xyz2,n,xyz1,grad_dist2,idx2,grad_xyz2,grad_xyz1);
154 | }
155 | 
156 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Attention-based Transformation from Latent Features to Point Clouds
  2 | 
  3 | ![](docs/teaser.png)
  4 | 
  5 | This repository contains a PyTorch implementation of the paper:
  6 | 
  7 | [Attention-based Transformation from Latent Features to Point Clouds](https://ojs.aaai.org/index.php/AAAI/article/view/20238)
  8 | <br>
  9 | [Kaiyi Zhang](https://scholar.google.com/citations?user=2F1aJh4AAAAJ&hl=zh-CN),
 10 | [Ximing Yang](https://symenyang.github.io/),
 11 | Yuan Wu,
 12 | [Cheng Jin](https://faculty.fudan.edu.cn/jc/zh_CN/index.htm)
 13 | <br>
 14 | AAAI 2022
 15 | 
 16 | ## Introduction
 17 | 
 18 | In point cloud generation and completion, previous methods for transforming latent features to point clouds are generally based on fully connected layers (FC-based) or folding operations (Folding-based). However, point clouds generated by FC-based methods are usually troubled by outliers and rough surfaces. For folding-based methods, their data flow is large, convergence speed is slow, and they are also hard to handle the generation of non-smooth surfaces. In this work, we propose AXform, an attention-based method to transform latent features to point clouds. AXform first generates points in an interim space, using a fully connected layer. These interim points are then aggregated to generate the target point cloud. AXform takes both parameter sharing and data flow into account, which makes it has fewer outliers, fewer network parameters, and a faster convergence speed. The points generated by AXform do not have the strong 2-manifold constraint, which improves the generation of non-smooth surfaces. When AXform is expanded to multiple branches for local generations, the centripetal constraint makes it has properties of self-clustering and space consistency, which further enables unsupervised semantic segmentation. We also adopt this scheme and design AXformNet for point cloud completion. Considerable experiments on different datasets show that our methods achieve state-of-the-art results.
 19 | 
 20 | ## Dependencies
 21 | 
 22 | - Python 3.6
 23 | - CUDA 10.0
 24 | - G++ or GCC 7.5
 25 | - [PyTorch](https://pytorch.org/). Codes are tested with version 1.6.0
 26 | - (Optional) [Visdom](https://github.com/fossasia/visdom/) for visualization of the training process
 27 | 
 28 | Install all the following tools based on CUDA.
 29 | ```bash
 30 | cd utils/furthestPointSampling
 31 | python3 setup.py install
 32 | 
 33 | # https://github.com/stevenygd/PointFlow/tree/master/metrics
 34 | cd utils/metrics/pytorch_structural_losses
 35 | make
 36 | 
 37 | # https://github.com/sshaoshuai/Pointnet2.PyTorch
 38 | cd utils/Pointnet2.PyTorch/pointnet2
 39 | python3 setup.py install
 40 | 
 41 | # https://github.com/daerduoCarey/PyTorchEMD
 42 | cd utils/PyTorchEMD
 43 | python3 setup.py install
 44 | 
 45 | # not used
 46 | cd utils/randPartial
 47 | python3 setup.py install
 48 | ```
 49 | 
 50 | ## Datasets
 51 | 
 52 | PCN dataset ([Google Drive](https://drive.google.com/file/d/1Wd-aJPxrSXrUBrXG5Fi7it71Cqn8Xydt/view?usp=sharing)) are used for point cloud completion. 
 53 | 
 54 | ShapeNetCore.v2.PC2048 ([Google Drive](https://drive.google.com/file/d/11PZzFtWTY5jtB-2g_Z1QyWSC5CJIqzAv/view?usp=sharing)) are used for the other tasks. The point clouds are uniformly sampled from the meshes in ShapeNetCore dataset (version 2). All the point clouds are centered and scaled to [-0.5, 0.5]. We follow the official split. The sample code based on [PyTorch3D](https://pytorch3d.org/) can be found in `utils/sample_pytorch3d.py`.
 55 | 
 56 | Please download them to the `data` directory.
 57 | 
 58 | ## Training
 59 | 
 60 | All the arguments, e.g. `gpu_ids`, `mode`, `method`, `hparas`, `num_branch`, `class_choice`, `visual`, can be adjusted before training. For example:
 61 | 
 62 | ```bash
 63 | # axform, airplane category, 16 branches
 64 | python3 axform.py --mode train --num_branch 16 --class_choice ['airplane']
 65 | 
 66 | # fc-based, car category
 67 | python3 models/fc_folding.py --mode train --method fc-based --class_choice ['car']
 68 | 
 69 | # l-gan, airplane category, not use axform
 70 | python3 models/latent_3d_points/l-gan.py --mode train --method original --class_choice ['airplane'] --ae_ckpt_path path_to_ckpt_autoencoder.pth
 71 | 
 72 | # axformnet, all categories, integrated
 73 | python3 axformnet.py --mode train --method integrated --class_choice None
 74 | ```
 75 | 
 76 | ## Pre-trained models
 77 | 
 78 | Here we provide pre-trained models ([Google Drive](https://drive.google.com/file/d/1oCjAEtFGzEAFC9j9meGg9FI3rHpdOyXM/view?usp=sharing)) for point cloud completion. The following is the suggested way to evaluate the performance of the pre-trained models.
 79 | 
 80 | ```bash
 81 | # vanilla
 82 | python3 axformnet.py --mode test --method vanilla --ckpt_path path_to_ckpt_vanilla.pth
 83 | 
 84 | # integrated
 85 | python3 axformnet.py --mode test --method integrated --ckpt_path path_to_ckpt_integrated.pth
 86 | ```
 87 | 
 88 | ## Visualization
 89 | 
 90 | [Matplotlib](https://matplotlib.org/) is used for the visualization of results in the paper. Code for reference can be seen in `utils/draw.py`.
 91 | 
 92 | Here we recommend using [Mitsuba 2](https://www.mitsuba-renderer.org/) for visualization. An example code can be found in [Point Cloud Renderer](https://github.com/zekunhao1995/PointFlowRenderer/).
 93 | 
 94 | ## Citation
 95 | 
 96 | Please cite our work if you find it useful:
 97 | ```latex
 98 | @article{Zhang_Yang_Wu_Jin_2022,
 99 | 	author = {Zhang, Kaiyi and Yang, Ximing and Wu, Yuan and Jin, Cheng},
100 | 	journal = {Proceedings of the AAAI Conference on Artificial Intelligence},
101 | 	month = {Jun.},
102 | 	number = {3},
103 | 	pages = {3291-3299},
104 | 	title = {Attention-Based Transformation from Latent Features to Point Clouds},
105 | 	volume = {36},
106 | 	year = {2022}
107 | }
108 | ```
109 | 
110 | ## License
111 | 
112 | This project Code is released under the MIT License (refer to the LICENSE file for details).
113 | 


--------------------------------------------------------------------------------
/utils/Pointnet2.PyTorch/pointnet2/src/interpolate_gpu.cu:
--------------------------------------------------------------------------------
  1 | #include <math.h>
  2 | #include <stdio.h>
  3 | #include <stdlib.h>
  4 | 
  5 | #include "cuda_utils.h"
  6 | #include "interpolate_gpu.h"
  7 | 
  8 | 
  9 | __global__ void three_nn_kernel_fast(int b, int n, int m, const float *__restrict__ unknown, 
 10 |     const float *__restrict__ known, float *__restrict__ dist2, int *__restrict__ idx) {
 11 |     // unknown: (B, N, 3)
 12 |     // known: (B, M, 3)
 13 |     // output: 
 14 |     //      dist2: (B, N, 3)
 15 |     //      idx: (B, N, 3)
 16 |     
 17 |     int bs_idx = blockIdx.y;
 18 |     int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
 19 |     if (bs_idx >= b || pt_idx >= n) return;
 20 | 
 21 |     unknown += bs_idx * n * 3 + pt_idx * 3;
 22 |     known += bs_idx * m * 3;
 23 |     dist2 += bs_idx * n * 3 + pt_idx * 3;
 24 |     idx += bs_idx * n * 3 + pt_idx * 3;
 25 | 
 26 |     float ux = unknown[0];
 27 |     float uy = unknown[1];
 28 |     float uz = unknown[2];
 29 | 
 30 |     double best1 = 1e40, best2 = 1e40, best3 = 1e40;
 31 |     int besti1 = 0, besti2 = 0, besti3 = 0;
 32 |     for (int k = 0; k < m; ++k) {
 33 |         float x = known[k * 3 + 0];
 34 |         float y = known[k * 3 + 1];
 35 |         float z = known[k * 3 + 2];
 36 |         float d = (ux - x) * (ux - x) + (uy - y) * (uy - y) + (uz - z) * (uz - z);
 37 |         if (d < best1) {
 38 |             best3 = best2; besti3 = besti2;
 39 |             best2 = best1; besti2 = besti1;
 40 |             best1 = d; besti1 = k;
 41 |         } 
 42 |         else if (d < best2) {
 43 |             best3 = best2; besti3 = besti2;
 44 |             best2 = d; besti2 = k;
 45 |         } 
 46 |         else if (d < best3) {
 47 |             best3 = d; besti3 = k;
 48 |         }
 49 |     }
 50 |     dist2[0] = best1; dist2[1] = best2; dist2[2] = best3;
 51 |     idx[0] = besti1; idx[1] = besti2; idx[2] = besti3;
 52 | }
 53 | 
 54 | 
 55 | void three_nn_kernel_launcher_fast(int b, int n, int m, const float *unknown, 
 56 |     const float *known, float *dist2, int *idx, cudaStream_t stream) {
 57 |     // unknown: (B, N, 3)
 58 |     // known: (B, M, 3)
 59 |     // output: 
 60 |     //      dist2: (B, N, 3)
 61 |     //      idx: (B, N, 3)
 62 | 
 63 |     cudaError_t err;
 64 |     dim3 blocks(DIVUP(n, THREADS_PER_BLOCK), b);  // blockIdx.x(col), blockIdx.y(row)
 65 |     dim3 threads(THREADS_PER_BLOCK);
 66 | 
 67 |     three_nn_kernel_fast<<<blocks, threads, 0, stream>>>(b, n, m, unknown, known, dist2, idx);
 68 | 
 69 |     err = cudaGetLastError();
 70 |     if (cudaSuccess != err) {
 71 |         fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
 72 |         exit(-1);
 73 |     }
 74 | }
 75 | 
 76 | 
 77 | __global__ void three_interpolate_kernel_fast(int b, int c, int m, int n, const float *__restrict__ points, 
 78 |     const int *__restrict__ idx, const float *__restrict__ weight, float *__restrict__ out) {
 79 |     // points: (B, C, M)
 80 |     // idx: (B, N, 3)
 81 |     // weight: (B, N, 3)
 82 |     // output:
 83 |     //      out: (B, C, N)
 84 | 
 85 |     int bs_idx = blockIdx.z;
 86 |     int c_idx = blockIdx.y;
 87 |     int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
 88 | 
 89 |     if (bs_idx >= b || c_idx >= c || pt_idx >= n) return;
 90 | 
 91 |     weight += bs_idx * n * 3 + pt_idx * 3;
 92 |     points += bs_idx * c * m + c_idx * m;
 93 |     idx += bs_idx * n * 3 + pt_idx * 3;
 94 |     out += bs_idx * c * n + c_idx * n;
 95 | 
 96 |     out[pt_idx] = weight[0] * points[idx[0]] + weight[1] * points[idx[1]] + weight[2] * points[idx[2]];
 97 | }
 98 | 
 99 | void three_interpolate_kernel_launcher_fast(int b, int c, int m, int n, 
100 |     const float *points, const int *idx, const float *weight, float *out, cudaStream_t stream) {
101 |     // points: (B, C, M)
102 |     // idx: (B, N, 3)
103 |     // weight: (B, N, 3)
104 |     // output:
105 |     //      out: (B, C, N)
106 | 
107 |     cudaError_t err;
108 |     dim3 blocks(DIVUP(n, THREADS_PER_BLOCK), c, b);  // blockIdx.x(col), blockIdx.y(row)
109 |     dim3 threads(THREADS_PER_BLOCK);
110 |     three_interpolate_kernel_fast<<<blocks, threads, 0, stream>>>(b, c, m, n, points, idx, weight, out);
111 | 
112 |     err = cudaGetLastError();
113 |     if (cudaSuccess != err) {
114 |         fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
115 |         exit(-1);
116 |     }
117 | }
118 | 
119 | 
120 | __global__ void three_interpolate_grad_kernel_fast(int b, int c, int n, int m, const float *__restrict__ grad_out, 
121 |     const int *__restrict__ idx, const float *__restrict__ weight, float *__restrict__ grad_points) {
122 |     // grad_out: (B, C, N)
123 |     // weight: (B, N, 3)
124 |     // output:
125 |     //      grad_points: (B, C, M)
126 | 
127 |     int bs_idx = blockIdx.z;
128 |     int c_idx = blockIdx.y;
129 |     int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
130 | 
131 |     if (bs_idx >= b || c_idx >= c || pt_idx >= n) return;
132 |     
133 |     grad_out += bs_idx * c * n + c_idx * n + pt_idx;
134 |     weight += bs_idx * n * 3 + pt_idx * 3;
135 |     grad_points += bs_idx * c * m + c_idx * m;
136 |     idx += bs_idx * n * 3 + pt_idx * 3;
137 | 
138 | 
139 |     atomicAdd(grad_points + idx[0], grad_out[0] * weight[0]);
140 |     atomicAdd(grad_points + idx[1], grad_out[0] * weight[1]);
141 |     atomicAdd(grad_points + idx[2], grad_out[0] * weight[2]);
142 | }
143 | 
144 | void three_interpolate_grad_kernel_launcher_fast(int b, int c, int n, int m, const float *grad_out, 
145 |     const int *idx, const float *weight, float *grad_points, cudaStream_t stream) {
146 |     // grad_out: (B, C, N)
147 |     // weight: (B, N, 3)
148 |     // output:
149 |     //      grad_points: (B, C, M)
150 | 
151 |     cudaError_t err;
152 |     dim3 blocks(DIVUP(n, THREADS_PER_BLOCK), c, b);  // blockIdx.x(col), blockIdx.y(row)
153 |     dim3 threads(THREADS_PER_BLOCK);
154 |     three_interpolate_grad_kernel_fast<<<blocks, threads, 0, stream>>>(b, c, n, m, grad_out, idx, weight, grad_points);
155 | 
156 |     err = cudaGetLastError();
157 |     if (cudaSuccess != err) {
158 |         fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
159 |         exit(-1);
160 |     }
161 | }


--------------------------------------------------------------------------------
/utils/pyTorchChamferDistance/chamfer_distance/chamfer_distance.cu:
--------------------------------------------------------------------------------
  1 | #include <ATen/ATen.h>
  2 | 
  3 | #include <cuda.h>
  4 | #include <cuda_runtime.h>
  5 | 
  6 | __global__ 
  7 | void ChamferDistanceKernel(
  8 | 	int b,
  9 | 	int n,
 10 | 	const float* xyz,
 11 | 	int m,
 12 | 	const float* xyz2,
 13 | 	float* result,
 14 | 	int* result_i)
 15 | {
 16 | 	const int batch=512;
 17 | 	__shared__ float buf[batch*3];
 18 | 	for (int i=blockIdx.x;i<b;i+=gridDim.x){
 19 | 		for (int k2=0;k2<m;k2+=batch){
 20 | 			int end_k=min(m,k2+batch)-k2;
 21 | 			for (int j=threadIdx.x;j<end_k*3;j+=blockDim.x){
 22 | 				buf[j]=xyz2[(i*m+k2)*3+j];
 23 | 			}
 24 | 			__syncthreads();
 25 | 			for (int j=threadIdx.x+blockIdx.y*blockDim.x;j<n;j+=blockDim.x*gridDim.y){
 26 | 				float x1=xyz[(i*n+j)*3+0];
 27 | 				float y1=xyz[(i*n+j)*3+1];
 28 | 				float z1=xyz[(i*n+j)*3+2];
 29 | 				int best_i=0;
 30 | 				float best=0;
 31 | 				int end_ka=end_k-(end_k&3);
 32 | 				if (end_ka==batch){
 33 | 					for (int k=0;k<batch;k+=4){
 34 | 						{
 35 | 							float x2=buf[k*3+0]-x1;
 36 | 							float y2=buf[k*3+1]-y1;
 37 | 							float z2=buf[k*3+2]-z1;
 38 | 							float d=x2*x2+y2*y2+z2*z2;
 39 | 							if (k==0 || d<best){
 40 | 								best=d;
 41 | 								best_i=k+k2;
 42 | 							}
 43 | 						}
 44 | 						{
 45 | 							float x2=buf[k*3+3]-x1;
 46 | 							float y2=buf[k*3+4]-y1;
 47 | 							float z2=buf[k*3+5]-z1;
 48 | 							float d=x2*x2+y2*y2+z2*z2;
 49 | 							if (d<best){
 50 | 								best=d;
 51 | 								best_i=k+k2+1;
 52 | 							}
 53 | 						}
 54 | 						{
 55 | 							float x2=buf[k*3+6]-x1;
 56 | 							float y2=buf[k*3+7]-y1;
 57 | 							float z2=buf[k*3+8]-z1;
 58 | 							float d=x2*x2+y2*y2+z2*z2;
 59 | 							if (d<best){
 60 | 								best=d;
 61 | 								best_i=k+k2+2;
 62 | 							}
 63 | 						}
 64 | 						{
 65 | 							float x2=buf[k*3+9]-x1;
 66 | 							float y2=buf[k*3+10]-y1;
 67 | 							float z2=buf[k*3+11]-z1;
 68 | 							float d=x2*x2+y2*y2+z2*z2;
 69 | 							if (d<best){
 70 | 								best=d;
 71 | 								best_i=k+k2+3;
 72 | 							}
 73 | 						}
 74 | 					}
 75 | 				}else{
 76 | 					for (int k=0;k<end_ka;k+=4){
 77 | 						{
 78 | 							float x2=buf[k*3+0]-x1;
 79 | 							float y2=buf[k*3+1]-y1;
 80 | 							float z2=buf[k*3+2]-z1;
 81 | 							float d=x2*x2+y2*y2+z2*z2;
 82 | 							if (k==0 || d<best){
 83 | 								best=d;
 84 | 								best_i=k+k2;
 85 | 							}
 86 | 						}
 87 | 						{
 88 | 							float x2=buf[k*3+3]-x1;
 89 | 							float y2=buf[k*3+4]-y1;
 90 | 							float z2=buf[k*3+5]-z1;
 91 | 							float d=x2*x2+y2*y2+z2*z2;
 92 | 							if (d<best){
 93 | 								best=d;
 94 | 								best_i=k+k2+1;
 95 | 							}
 96 | 						}
 97 | 						{
 98 | 							float x2=buf[k*3+6]-x1;
 99 | 							float y2=buf[k*3+7]-y1;
100 | 							float z2=buf[k*3+8]-z1;
101 | 							float d=x2*x2+y2*y2+z2*z2;
102 | 							if (d<best){
103 | 								best=d;
104 | 								best_i=k+k2+2;
105 | 							}
106 | 						}
107 | 						{
108 | 							float x2=buf[k*3+9]-x1;
109 | 							float y2=buf[k*3+10]-y1;
110 | 							float z2=buf[k*3+11]-z1;
111 | 							float d=x2*x2+y2*y2+z2*z2;
112 | 							if (d<best){
113 | 								best=d;
114 | 								best_i=k+k2+3;
115 | 							}
116 | 						}
117 | 					}
118 | 				}
119 | 				for (int k=end_ka;k<end_k;k++){
120 | 					float x2=buf[k*3+0]-x1;
121 | 					float y2=buf[k*3+1]-y1;
122 | 					float z2=buf[k*3+2]-z1;
123 | 					float d=x2*x2+y2*y2+z2*z2;
124 | 					if (k==0 || d<best){
125 | 						best=d;
126 | 						best_i=k+k2;
127 | 					}
128 | 				}
129 | 				if (k2==0 || result[(i*n+j)]>best){
130 | 					result[(i*n+j)]=best;
131 | 					result_i[(i*n+j)]=best_i;
132 | 				}
133 | 			}
134 | 			__syncthreads();
135 | 		}
136 | 	}
137 | }
138 | 
139 | void ChamferDistanceKernelLauncher(
140 |     const int b, const int n,
141 |     const float* xyz,
142 |     const int m,
143 |     const float* xyz2,
144 |     float* result,
145 |     int* result_i,
146 |     float* result2,
147 |     int* result2_i)
148 | {
149 | 	ChamferDistanceKernel<<<dim3(32,16,1),512>>>(b, n, xyz, m, xyz2, result, result_i);
150 | 	ChamferDistanceKernel<<<dim3(32,16,1),512>>>(b, m, xyz2, n, xyz, result2, result2_i);
151 | 
152 | 	cudaError_t err = cudaGetLastError();
153 | 	if (err != cudaSuccess)
154 | 	    printf("error in chamfer distance updateOutput: %s\n", cudaGetErrorString(err));
155 | }
156 | 
157 | 
158 | __global__ 
159 | void ChamferDistanceGradKernel(
160 | 	int b, int n,
161 | 	const float* xyz1,
162 | 	int m,
163 | 	const float* xyz2,
164 | 	const float* grad_dist1,
165 | 	const int* idx1,
166 | 	float* grad_xyz1,
167 | 	float* grad_xyz2)
168 | {
169 | 	for (int i = blockIdx.x; i<b; i += gridDim.x) {
170 | 		for (int j = threadIdx.x + blockIdx.y * blockDim.x; j < n; j += blockDim.x*gridDim.y) {
171 | 			float x1=xyz1[(i*n+j)*3+0];
172 | 			float y1=xyz1[(i*n+j)*3+1];
173 | 			float z1=xyz1[(i*n+j)*3+2];
174 | 			int j2=idx1[i*n+j];
175 | 			float x2=xyz2[(i*m+j2)*3+0];
176 | 			float y2=xyz2[(i*m+j2)*3+1];
177 | 			float z2=xyz2[(i*m+j2)*3+2];
178 | 			float g=grad_dist1[i*n+j]*2;
179 | 			atomicAdd(&(grad_xyz1[(i*n+j)*3+0]),g*(x1-x2));
180 | 			atomicAdd(&(grad_xyz1[(i*n+j)*3+1]),g*(y1-y2));
181 | 			atomicAdd(&(grad_xyz1[(i*n+j)*3+2]),g*(z1-z2));
182 | 			atomicAdd(&(grad_xyz2[(i*m+j2)*3+0]),-(g*(x1-x2)));
183 | 			atomicAdd(&(grad_xyz2[(i*m+j2)*3+1]),-(g*(y1-y2)));
184 | 			atomicAdd(&(grad_xyz2[(i*m+j2)*3+2]),-(g*(z1-z2)));
185 | 		}
186 | 	}
187 | }
188 | 
189 | void ChamferDistanceGradKernelLauncher(
190 |     const int b, const int n,
191 |     const float* xyz1,
192 |     const int m,
193 |     const float* xyz2,
194 |     const float* grad_dist1,
195 |     const int* idx1,
196 |     const float* grad_dist2,
197 |     const int* idx2,
198 |     float* grad_xyz1,
199 |     float* grad_xyz2)
200 | {
201 | 	cudaMemset(grad_xyz1, 0, b*n*3*4);
202 | 	cudaMemset(grad_xyz2, 0, b*m*3*4);
203 | 	ChamferDistanceGradKernel<<<dim3(1,16,1), 256>>>(b, n, xyz1, m, xyz2, grad_dist1, idx1, grad_xyz1, grad_xyz2);
204 | 	ChamferDistanceGradKernel<<<dim3(1,16,1), 256>>>(b, m, xyz2, n, xyz1, grad_dist2, idx2, grad_xyz2, grad_xyz1);
205 | 
206 | 	cudaError_t err = cudaGetLastError();
207 |   	if (err != cudaSuccess)
208 | 	    printf("error in chamfer distance get grad: %s\n", cudaGetErrorString(err));
209 | }
210 | 


--------------------------------------------------------------------------------
/utils/data_loader.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.utils.data as data
  3 | import os
  4 | import math
  5 | import json
  6 | import h5py
  7 | import numpy as np
  8 | import open3d as o3d
  9 | import randpartial
 10 | from visdom import Visdom
 11 | 
 12 | # taken from https://github.com/optas/latent_3d_points/blob/8e8f29f8124ed5fc59439e8551ba7ef7567c9a37/src/in_out.py
 13 | synsetid_to_cate = {
 14 |     '02691156': 'airplane', '02773838': 'bag', '02801938': 'basket',
 15 |     '02808440': 'bathtub', '02818832': 'bed', '02828884': 'bench',
 16 |     '02876657': 'bottle', '02880940': 'bowl', '02924116': 'bus',
 17 |     '02933112': 'cabinet', '02747177': 'can', '02942699': 'camera',
 18 |     '02954340': 'cap', '02958343': 'car', '03001627': 'chair',
 19 |     '03046257': 'clock', '03207941': 'dishwasher', '03211117': 'monitor',
 20 |     '04379243': 'table', '04401088': 'telephone', '02946921': 'tin_can',
 21 |     '04460130': 'tower', '04468005': 'train', '03085013': 'keyboard',
 22 |     '03261776': 'earphone', '03325088': 'faucet', '03337140': 'file',
 23 |     '03467517': 'guitar', '03513137': 'helmet', '03593526': 'jar',
 24 |     '03624134': 'knife', '03636649': 'lamp', '03642806': 'laptop',
 25 |     '03691459': 'speaker', '03710193': 'mailbox', '03759954': 'microphone',
 26 |     '03761084': 'microwave', '03790512': 'motorcycle', '03797390': 'mug',
 27 |     '03928116': 'piano', '03938244': 'pillow', '03948459': 'pistol',
 28 |     '03991062': 'pot', '04004475': 'printer', '04074963': 'remote_control',
 29 |     '04090263': 'rifle', '04099429': 'rocket', '04225987': 'skateboard',
 30 |     '04256520': 'sofa', '04330267': 'stove', '04530566': 'vessel',
 31 |     '04554684': 'washer', '02992529': 'cellphone',
 32 |     '02843684': 'birdhouse', '02871439': 'bookshelf',
 33 |     # '02858304': 'boat', no boat in our dataset, merged into vessels
 34 |     # '02834778': 'bicycle', not in our taxonomy
 35 | }
 36 | cate_to_synsetid = {v: k for k, v in synsetid_to_cate.items()}
 37 | 
 38 | 
 39 | # ----------------------------------------------------------------------- #
 40 | # PCN Dataset
 41 | # ----------------------------------------------------------------------- #
 42 | 
 43 | def resample_pcd(pcd, n):
 44 |     """Drop or duplicate points so that pcd has exactly n points"""
 45 |     idx = np.random.permutation(pcd.shape[0])
 46 |     if idx.shape[0] < n:
 47 |         idx = np.concatenate([idx, np.random.randint(pcd.shape[0], size = n - pcd.shape[0])])
 48 |     return pcd[idx[:n]]
 49 | 
 50 | 
 51 | class PCNDataset(data.Dataset):
 52 |     def __init__(self, root=None, class_choice=None, split='train'):
 53 |         """
 54 |         It uses all eight partials.
 55 | 
 56 |         plane	02691156 | 3795
 57 |         cabinet	02933112 | 1322
 58 |         car	02958343 | 5677
 59 |         chair	03001627 | 5750
 60 |         lamp	03636649 | 2068
 61 |         sofa	04256520 | 2923
 62 |         table	04379243 | 5750
 63 |         vessel	04530566 | 1689
 64 |         """
 65 |         if split == 'train':
 66 |             self.partial_num = 8
 67 |         else:
 68 |             self.partial_num = 1
 69 |         
 70 |         self.cat = {}
 71 |         self.datapath = []  # [(02691156, 1a04e3eab45ca15dd86060f189eb133_0x, xxx.pcd, ...), ...]
 72 | 
 73 |         if class_choice is None:
 74 |             class_choice = ['airplane', 'cabinet', 'car', 'chair', 'lamp', 'sofa', 'table', 'vessel']
 75 |         self.cat = {k: v for k, v in cate_to_synsetid.items() if k in class_choice}
 76 |         
 77 |         with open(os.path.join(root, 'objectid.json'), 'r') as f:
 78 |             objectid = json.load(f)[split]
 79 |             for item in objectid:
 80 |                 fn = item.strip().split('/')
 81 |                 if fn[0] in self.cat.values():
 82 |                     for i in range(self.partial_num):
 83 |                         partial_path = os.path.join(root, split, 'partial', item.strip(), '0%d.pcd' % (i))
 84 |                         gt_path = os.path.join(root, split, 'gt', item.strip()+'.pcd')
 85 |                         self.datapath.append((fn[0], fn[1]+'_0%d'%(i), partial_path, gt_path))
 86 |     
 87 |     def __getitem__(self, index):
 88 |         dp = self.datapath[index]
 89 |         foldername = dp[0]
 90 |         filename = dp[1]
 91 |         pcd = o3d.io.read_point_cloud(dp[2])
 92 |         partial = np.asarray(pcd.points)
 93 |         pcd = o3d.io.read_point_cloud(dp[3])
 94 |         gt = np.asarray(pcd.points)
 95 | 
 96 |         partial = resample_pcd(partial, 2048)
 97 |         # gt = resample_pcd(gt, 2048)
 98 | 
 99 |         partial = torch.from_numpy(partial).float()
100 |         gt = torch.from_numpy(gt).float()
101 | 
102 |         return foldername, filename, partial, gt
103 |     
104 |     def __len__(self):
105 |         return len(self.datapath)
106 | 
107 | 
108 | # ----------------------------------------------------------------------- #
109 | # ShapeNetCore.v2.PC2048 Dataset
110 | # ----------------------------------------------------------------------- #
111 | 
112 | class ShapeNetCorev2PC2048Dataset(data.Dataset):
113 |     def __init__(self, root=None, class_choice=None, split='train'):
114 |         """
115 |         plane 02691156 | 2832/405/808 4045
116 |         car 02958343 | 2458/352/704 3514
117 |         chair 03001627 | 4612/662/1317 6591
118 |         """
119 |         self.cat = {}
120 |         self.datapath = []  # [(02691156, 1a04e3eab45ca15dd86060f189eb133, xxx.h5), ...]
121 |         
122 |         if class_choice is None:
123 |             class_choice = ['airplane', 'car', 'chair']
124 |         self.cat = {k: v for k, v in cate_to_synsetid.items() if k in class_choice}
125 | 
126 |         for k, value in self.cat.items():
127 |             foldername = value
128 |             path = os.path.join(root, foldername, split)
129 |             for path, dir_list, file_list in os.walk(path):
130 |                 for file_name in file_list:
131 |                     self.datapath.append((foldername, file_name.split('.')[0], os.path.join(path, file_name)))
132 | 
133 |     def __getitem__(self, index):
134 |         dp = self.datapath[index]
135 |         foldername = dp[0]
136 |         filename = dp[1]
137 |         with h5py.File(dp[2], 'r') as f:
138 |             data = torch.from_numpy(np.array(f['data'])).float()
139 | 
140 |         return foldername, filename, data
141 | 
142 |     def __len__(self):
143 |         return len(self.datapath)
144 | 
145 | 
146 | if __name__ == "__main__":
147 |     # d = PCNDataset(root='../data/PCN/ShapeNet', class_choice=None, split='train', p_index=0, p_size=1)
148 |     # print(len(d))
149 |     # print(d[0][2], d[0][2].shape)
150 |     # print(d[0][3], d[0][3].shape)
151 | 
152 |     d = ShapeNetCorev2PC2048Dataset(root='../data/ShapeNetCore.v2.PC2048', class_choice='airplane', split='train')
153 |     print(len(d))
154 |     print(d[0][0])
155 |     print(d[0][1])
156 |     print(d[0][2], d[0][2].shape)


--------------------------------------------------------------------------------
/utils/metrics/pytorch_structural_losses/src/structural_loss.cpp:
--------------------------------------------------------------------------------
  1 | #include <ATen/cuda/CUDAContext.h>
  2 | #include <torch/extension.h>
  3 | 
  4 | #include "src/approxmatch.cuh"
  5 | #include "src/nndistance.cuh"
  6 | 
  7 | #include <vector>
  8 | #include <iostream>
  9 | 
 10 | #define CHECK_CUDA(x) AT_ASSERTM(x.type().is_cuda(), #x " must be a CUDA tensor")
 11 | #define CHECK_CONTIGUOUS(x) AT_ASSERTM(x.is_contiguous(), #x " must be contiguous")
 12 | #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
 13 | 
 14 | /*
 15 | input:
 16 | 	set1 : batch_size * #dataset_points * 3
 17 | 	set2 : batch_size * #query_points * 3
 18 | returns:
 19 | 	match : batch_size * #query_points * #dataset_points
 20 | */
 21 | //  temp: TensorShape{b,(n+m)*2}
 22 | std::vector<at::Tensor> ApproxMatch(at::Tensor set_d, at::Tensor set_q) {
 23 |     //std::cout << "[ApproxMatch] Called." << std::endl;
 24 |     int64_t batch_size = set_d.size(0);    
 25 |     int64_t n_dataset_points = set_d.size(1); // n
 26 |     int64_t n_query_points = set_q.size(1);   // m
 27 |     //std::cout << "[ApproxMatch] batch_size:" << batch_size << std::endl;
 28 |     at::Tensor match = torch::empty({batch_size, n_query_points, n_dataset_points}, torch::TensorOptions().dtype(torch::kFloat32).device(set_d.device()));
 29 |     at::Tensor temp = torch::empty({batch_size, (n_query_points+n_dataset_points)*2}, torch::TensorOptions().dtype(torch::kFloat32).device(set_d.device()));
 30 |     CHECK_INPUT(set_d);
 31 |     CHECK_INPUT(set_q);
 32 |     CHECK_INPUT(match);
 33 |     CHECK_INPUT(temp);
 34 |     
 35 |     approxmatch(batch_size,n_dataset_points,n_query_points,set_d.data<float>(),set_q.data<float>(),match.data<float>(),temp.data<float>(), at::cuda::getCurrentCUDAStream());
 36 |     return {match, temp};
 37 | }
 38 | 
 39 | at::Tensor MatchCost(at::Tensor set_d, at::Tensor set_q, at::Tensor match) {
 40 |     //std::cout << "[MatchCost] Called." << std::endl;
 41 |     int64_t batch_size = set_d.size(0);    
 42 |     int64_t n_dataset_points = set_d.size(1); // n
 43 |     int64_t n_query_points = set_q.size(1);   // m
 44 |     //std::cout << "[MatchCost] batch_size:" << batch_size << std::endl;
 45 |     at::Tensor out = torch::empty({batch_size}, torch::TensorOptions().dtype(torch::kFloat32).device(set_d.device()));
 46 |     CHECK_INPUT(set_d);
 47 |     CHECK_INPUT(set_q);
 48 |     CHECK_INPUT(match);
 49 |     CHECK_INPUT(out);
 50 |     matchcost(batch_size,n_dataset_points,n_query_points,set_d.data<float>(),set_q.data<float>(),match.data<float>(),out.data<float>(),at::cuda::getCurrentCUDAStream());
 51 |     return out;
 52 | }
 53 | 
 54 | std::vector<at::Tensor> MatchCostGrad(at::Tensor set_d, at::Tensor set_q, at::Tensor match) {
 55 |     //std::cout << "[MatchCostGrad] Called." << std::endl;
 56 |     int64_t batch_size = set_d.size(0);    
 57 |     int64_t n_dataset_points = set_d.size(1); // n
 58 |     int64_t n_query_points = set_q.size(1);   // m
 59 |     //std::cout << "[MatchCostGrad] batch_size:" << batch_size << std::endl;
 60 |     at::Tensor grad1 = torch::empty({batch_size,n_dataset_points,3}, torch::TensorOptions().dtype(torch::kFloat32).device(set_d.device()));
 61 |     at::Tensor grad2 = torch::empty({batch_size,n_query_points,3}, torch::TensorOptions().dtype(torch::kFloat32).device(set_d.device()));
 62 |     CHECK_INPUT(set_d);
 63 |     CHECK_INPUT(set_q);
 64 |     CHECK_INPUT(match);
 65 |     CHECK_INPUT(grad1);
 66 |     CHECK_INPUT(grad2);
 67 |     matchcostgrad(batch_size,n_dataset_points,n_query_points,set_d.data<float>(),set_q.data<float>(),match.data<float>(),grad1.data<float>(),grad2.data<float>(),at::cuda::getCurrentCUDAStream());
 68 |     return {grad1, grad2};
 69 | }
 70 | 
 71 | 
 72 | /*
 73 | input:
 74 | 	set_d : batch_size * #dataset_points * 3
 75 | 	set_q : batch_size * #query_points * 3
 76 | returns:
 77 | 	dist1, idx1 : batch_size * #dataset_points
 78 | 	dist2, idx2 : batch_size * #query_points
 79 | */
 80 | std::vector<at::Tensor> NNDistance(at::Tensor set_d, at::Tensor set_q) {
 81 |     //std::cout << "[NNDistance] Called." << std::endl;
 82 |     int64_t batch_size = set_d.size(0);    
 83 |     int64_t n_dataset_points = set_d.size(1); // n
 84 |     int64_t n_query_points = set_q.size(1);   // m
 85 |     //std::cout << "[NNDistance] batch_size:" << batch_size << std::endl;
 86 |     at::Tensor dist1 = torch::empty({batch_size, n_dataset_points}, torch::TensorOptions().dtype(torch::kFloat32).device(set_d.device()));
 87 |     at::Tensor idx1 = torch::empty({batch_size, n_dataset_points}, torch::TensorOptions().dtype(torch::kInt32).device(set_d.device()));
 88 |     at::Tensor dist2 = torch::empty({batch_size, n_query_points}, torch::TensorOptions().dtype(torch::kFloat32).device(set_d.device()));
 89 |     at::Tensor idx2 = torch::empty({batch_size, n_query_points}, torch::TensorOptions().dtype(torch::kInt32).device(set_d.device()));
 90 |     CHECK_INPUT(set_d);
 91 |     CHECK_INPUT(set_q);
 92 |     CHECK_INPUT(dist1);
 93 |     CHECK_INPUT(idx1);
 94 |     CHECK_INPUT(dist2);
 95 |     CHECK_INPUT(idx2);
 96 |     // void nndistance(int b,int n,const float * xyz,int m,const float * xyz2,float * result,int * result_i,float * result2,int * result2_i, cudaStream_t stream);
 97 |     nndistance(batch_size,n_dataset_points,set_d.data<float>(),n_query_points,set_q.data<float>(),dist1.data<float>(),idx1.data<int>(),dist2.data<float>(),idx2.data<int>(), at::cuda::getCurrentCUDAStream());
 98 |     return {dist1, idx1, dist2, idx2};
 99 | }
100 | 
101 | std::vector<at::Tensor> NNDistanceGrad(at::Tensor set_d, at::Tensor set_q, at::Tensor idx1, at::Tensor idx2, at::Tensor grad_dist1, at::Tensor grad_dist2) {
102 |     //std::cout << "[NNDistanceGrad] Called." << std::endl;
103 |     int64_t batch_size = set_d.size(0);    
104 |     int64_t n_dataset_points = set_d.size(1); // n
105 |     int64_t n_query_points = set_q.size(1);   // m
106 |     //std::cout << "[NNDistanceGrad] batch_size:" << batch_size << std::endl;
107 |     at::Tensor grad1 = torch::empty({batch_size,n_dataset_points,3}, torch::TensorOptions().dtype(torch::kFloat32).device(set_d.device()));
108 |     at::Tensor grad2 = torch::empty({batch_size,n_query_points,3}, torch::TensorOptions().dtype(torch::kFloat32).device(set_d.device()));
109 |     CHECK_INPUT(set_d);
110 |     CHECK_INPUT(set_q);
111 |     CHECK_INPUT(idx1);
112 |     CHECK_INPUT(idx2);
113 |     CHECK_INPUT(grad_dist1);
114 |     CHECK_INPUT(grad_dist2);
115 |     CHECK_INPUT(grad1);
116 |     CHECK_INPUT(grad2);
117 |     //void nndistancegrad(int b,int n,const float * xyz1,int m,const float * xyz2,const float * grad_dist1,const int * idx1,const float * grad_dist2,const int * idx2,float * grad_xyz1,float * grad_xyz2, cudaStream_t stream);
118 |     nndistancegrad(batch_size,n_dataset_points,set_d.data<float>(),n_query_points,set_q.data<float>(),
119 |         grad_dist1.data<float>(),idx1.data<int>(),
120 |         grad_dist2.data<float>(),idx2.data<int>(),
121 |         grad1.data<float>(),grad2.data<float>(),
122 |         at::cuda::getCurrentCUDAStream());
123 |     return {grad1, grad2};
124 | }
125 | 
126 | 


--------------------------------------------------------------------------------
/utils/Pointnet2.PyTorch/pointnet2/pointnet2_modules.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from . import pointnet2_utils
  5 | from . import pytorch_utils as pt_utils
  6 | from typing import List
  7 | 
  8 | 
  9 | class _PointnetSAModuleBase(nn.Module):
 10 | 
 11 |     def __init__(self):
 12 |         super().__init__()
 13 |         self.npoint = None
 14 |         self.groupers = None
 15 |         self.mlps = None
 16 |         self.pool_method = 'max_pool'
 17 | 
 18 |     def forward(self, xyz: torch.Tensor, features: torch.Tensor = None, new_xyz=None) -> (torch.Tensor, torch.Tensor):
 19 |         """
 20 |         :param xyz: (B, N, 3) tensor of the xyz coordinates of the features
 21 |         :param features: (B, N, C) tensor of the descriptors of the the features
 22 |         :param new_xyz:
 23 |         :return:
 24 |             new_xyz: (B, npoint, 3) tensor of the new features' xyz
 25 |             new_features: (B, npoint, \sum_k(mlps[k][-1])) tensor of the new_features descriptors
 26 |         """
 27 |         new_features_list = []
 28 |         xyz_flipped = xyz.transpose(1, 2).contiguous()
 29 |         if new_xyz is None:
 30 |             new_xyz = pointnet2_utils.gather_operation(
 31 |                 xyz_flipped,
 32 |                 pointnet2_utils.furthest_point_sample(xyz, self.npoint)
 33 |             ).transpose(1, 2).contiguous() if self.npoint is not None else None
 34 |         for i in range(len(self.groupers)):
 35 |             new_features = self.groupers[i](xyz, new_xyz, features)  # (B, C, npoint, nsample)
 36 |             new_features = self.mlps[i](new_features)  # (B, mlp[-1], npoint, nsample)
 37 |             if self.pool_method == 'max_pool':
 38 |                 new_features = F.max_pool2d(
 39 |                     new_features, kernel_size=[1, new_features.size(3)]
 40 |                 )  # (B, mlp[-1], npoint, 1)
 41 |             elif self.pool_method == 'avg_pool':
 42 |                 new_features = F.avg_pool2d(
 43 |                     new_features, kernel_size=[1, new_features.size(3)]
 44 |                 )  # (B, mlp[-1], npoint, 1)
 45 |             else:
 46 |                 raise NotImplementedError
 47 | 
 48 |             new_features = new_features.squeeze(-1)  # (B, mlp[-1], npoint)
 49 |             new_features_list.append(new_features)
 50 |         return new_xyz, torch.cat(new_features_list, dim=1)
 51 | 
 52 | 
 53 | class PointnetSAModuleMSG(_PointnetSAModuleBase):
 54 |     """Pointnet set abstraction layer with multiscale grouping"""
 55 | 
 56 |     def __init__(self, *, npoint: int, radii: List[float], nsamples: List[int], mlps: List[List[int]], bn: bool = True,
 57 |                  use_xyz: bool = True, pool_method='max_pool', instance_norm=False, features: torch.Tensor = None):
 58 |         """
 59 |         :param npoint: int
 60 |         :param radii: list of float, list of radii to group with
 61 |         :param nsamples: list of int, number of samples in each ball query
 62 |         :param mlps: list of list of int, spec of the pointnet before the global pooling for each scale
 63 |         :param bn: whether to use batchnorm
 64 |         :param use_xyz:
 65 |         :param pool_method: max_pool / avg_pool
 66 |         :param instance_norm: whether to use instance_norm
 67 |         """
 68 |         super().__init__()
 69 | 
 70 |         assert len(radii) == len(nsamples) == len(mlps)
 71 | 
 72 |         self.npoint = npoint
 73 |         self.groupers = nn.ModuleList()
 74 |         self.mlps = nn.ModuleList()
 75 |         for i in range(len(radii)):
 76 |             radius = radii[i]
 77 |             nsample = nsamples[i]
 78 |             self.groupers.append(
 79 |                 pointnet2_utils.QueryAndGroup(radius, nsample, use_xyz=use_xyz)
 80 |                 if npoint is not None else pointnet2_utils.GroupAll(use_xyz)
 81 |             )
 82 |             mlp_spec = mlps[i]
 83 |             
 84 |             if features is not None:
 85 |                 if use_xyz:
 86 |                     mlp_spec[0]+=3
 87 |             self.mlps.append(pt_utils.SharedMLP(mlp_spec, bn=bn, instance_norm=instance_norm))
 88 |         self.pool_method = pool_method
 89 | 
 90 | 
 91 | class PointnetSAModule(PointnetSAModuleMSG):
 92 |     """Pointnet set abstraction layer"""
 93 | 
 94 |     def __init__(self, *, mlp: List[int], npoint: int = None, radius: float = None, nsample: int = None,
 95 |                  bn: bool = True, use_xyz: bool = True, pool_method='max_pool', instance_norm=False):
 96 |         """
 97 |         :param mlp: list of int, spec of the pointnet before the global max_pool
 98 |         :param npoint: int, number of features
 99 |         :param radius: float, radius of ball
100 |         :param nsample: int, number of samples in the ball query
101 |         :param bn: whether to use batchnorm
102 |         :param use_xyz:
103 |         :param pool_method: max_pool / avg_pool
104 |         :param instance_norm: whether to use instance_norm
105 |         """
106 |         super().__init__(
107 |             mlps=[mlp], npoint=npoint, radii=[radius], nsamples=[nsample], bn=bn, use_xyz=use_xyz,
108 |             pool_method=pool_method, instance_norm=instance_norm
109 |         )
110 | 
111 | 
112 | class PointnetFPModule(nn.Module):
113 |     r"""Propigates the features of one set to another"""
114 | 
115 |     def __init__(self, *, mlp: List[int], bn: bool = True):
116 |         """
117 |         :param mlp: list of int
118 |         :param bn: whether to use batchnorm
119 |         """
120 |         super().__init__()
121 |         self.mlp = pt_utils.SharedMLP(mlp, bn=bn)
122 | 
123 |     def forward(
124 |             self, unknown: torch.Tensor, known: torch.Tensor, unknow_feats: torch.Tensor, known_feats: torch.Tensor
125 |     ) -> torch.Tensor:
126 |         """
127 |         :param unknown: (B, n, 3) tensor of the xyz positions of the unknown features
128 |         :param known: (B, m, 3) tensor of the xyz positions of the known features
129 |         :param unknow_feats: (B, C1, n) tensor of the features to be propigated to
130 |         :param known_feats: (B, C2, m) tensor of features to be propigated
131 |         :return:
132 |             new_features: (B, mlp[-1], n) tensor of the features of the unknown features
133 |         """
134 |         if known is not None:
135 |             dist, idx = pointnet2_utils.three_nn(unknown, known)
136 |             dist_recip = 1.0 / (dist + 1e-8)
137 |             norm = torch.sum(dist_recip, dim=2, keepdim=True)
138 |             weight = dist_recip / norm
139 | 
140 |             interpolated_feats = pointnet2_utils.three_interpolate(known_feats, idx, weight)
141 |         else:
142 |             interpolated_feats = known_feats.expand(*known_feats.size()[0:2], unknown.size(1))
143 | 
144 |         if unknow_feats is not None:
145 |             new_features = torch.cat([interpolated_feats, unknow_feats], dim=1)  # (B, C2 + C1, n)
146 |         else:
147 |             new_features = interpolated_feats
148 | 
149 |         new_features = new_features.unsqueeze(-1)
150 |         new_features = self.mlp(new_features)
151 | 
152 |         return new_features.squeeze(-1)
153 | 
154 | 
155 | if __name__ == "__main__":
156 |     pass
157 | 


--------------------------------------------------------------------------------
/utils/pyTorchChamferDistance/chamfer_distance/chamfer_distance.cpp:
--------------------------------------------------------------------------------
  1 | #include <torch/torch.h>
  2 | 
  3 | // CUDA forward declarations
  4 | int ChamferDistanceKernelLauncher(
  5 |     const int b, const int n,
  6 |     const float* xyz,
  7 |     const int m,
  8 |     const float* xyz2,
  9 |     float* result,
 10 |     int* result_i,
 11 |     float* result2,
 12 |     int* result2_i);
 13 | 
 14 | int ChamferDistanceGradKernelLauncher(
 15 |     const int b, const int n,
 16 |     const float* xyz1,
 17 |     const int m,
 18 |     const float* xyz2,
 19 |     const float* grad_dist1,
 20 |     const int* idx1,
 21 |     const float* grad_dist2,
 22 |     const int* idx2,
 23 |     float* grad_xyz1,
 24 |     float* grad_xyz2);
 25 | 
 26 | 
 27 | void chamfer_distance_forward_cuda(
 28 |     const at::Tensor xyz1, 
 29 |     const at::Tensor xyz2, 
 30 |     const at::Tensor dist1, 
 31 |     const at::Tensor dist2, 
 32 |     const at::Tensor idx1, 
 33 |     const at::Tensor idx2) 
 34 | {
 35 |     ChamferDistanceKernelLauncher(xyz1.size(0), xyz1.size(1), xyz1.data<float>(),
 36 |                                             xyz2.size(1), xyz2.data<float>(),
 37 |                                             dist1.data<float>(), idx1.data<int>(),
 38 |                                             dist2.data<float>(), idx2.data<int>());
 39 | }
 40 | 
 41 | void chamfer_distance_backward_cuda(
 42 |     const at::Tensor xyz1,
 43 |     const at::Tensor xyz2, 
 44 |     at::Tensor gradxyz1, 
 45 |     at::Tensor gradxyz2, 
 46 |     at::Tensor graddist1, 
 47 |     at::Tensor graddist2, 
 48 |     at::Tensor idx1, 
 49 |     at::Tensor idx2)
 50 | {
 51 |     ChamferDistanceGradKernelLauncher(xyz1.size(0), xyz1.size(1), xyz1.data<float>(),
 52 |                                            xyz2.size(1), xyz2.data<float>(),
 53 |                                            graddist1.data<float>(), idx1.data<int>(),
 54 |                                            graddist2.data<float>(), idx2.data<int>(),
 55 |                                            gradxyz1.data<float>(), gradxyz2.data<float>());
 56 | }
 57 | 
 58 | 
 59 | void nnsearch(
 60 |     const int b, const int n, const int m,
 61 |     const float* xyz1,
 62 |     const float* xyz2,
 63 |     float* dist,
 64 |     int* idx)
 65 | {
 66 |     for (int i = 0; i < b; i++) {
 67 |         for (int j = 0; j < n; j++) {
 68 |             const float x1 = xyz1[(i*n+j)*3+0];
 69 |             const float y1 = xyz1[(i*n+j)*3+1];
 70 |             const float z1 = xyz1[(i*n+j)*3+2];
 71 |             double best = 0;
 72 |             int besti = 0;
 73 |             for (int k = 0; k < m; k++) {
 74 |                 const float x2 = xyz2[(i*m+k)*3+0] - x1;
 75 |                 const float y2 = xyz2[(i*m+k)*3+1] - y1;
 76 |                 const float z2 = xyz2[(i*m+k)*3+2] - z1;
 77 |                 const double d=x2*x2+y2*y2+z2*z2;
 78 |                 if (k==0 || d < best){
 79 |                     best = d;
 80 |                     besti = k;
 81 |                 }
 82 |             }
 83 |             dist[i*n+j] = best;
 84 |             idx[i*n+j] = besti;
 85 |         }
 86 |     }
 87 | }
 88 | 
 89 | 
 90 | void chamfer_distance_forward(
 91 |     const at::Tensor xyz1, 
 92 |     const at::Tensor xyz2, 
 93 |     const at::Tensor dist1, 
 94 |     const at::Tensor dist2, 
 95 |     const at::Tensor idx1, 
 96 |     const at::Tensor idx2) 
 97 | {
 98 |     const int batchsize = xyz1.size(0);
 99 |     const int n = xyz1.size(1);
100 |     const int m = xyz2.size(1);
101 | 
102 |     const float* xyz1_data = xyz1.data<float>();
103 |     const float* xyz2_data = xyz2.data<float>();
104 |     float* dist1_data = dist1.data<float>();
105 |     float* dist2_data = dist2.data<float>();
106 |     int* idx1_data = idx1.data<int>();
107 |     int* idx2_data = idx2.data<int>();
108 | 
109 |     nnsearch(batchsize, n, m, xyz1_data, xyz2_data, dist1_data, idx1_data);
110 |     nnsearch(batchsize, m, n, xyz2_data, xyz1_data, dist2_data, idx2_data);
111 | }
112 | 
113 | 
114 | void chamfer_distance_backward(
115 |     const at::Tensor xyz1, 
116 |     const at::Tensor xyz2, 
117 |     at::Tensor gradxyz1, 
118 |     at::Tensor gradxyz2, 
119 |     at::Tensor graddist1, 
120 |     at::Tensor graddist2, 
121 |     at::Tensor idx1, 
122 |     at::Tensor idx2) 
123 | {
124 |     const int b = xyz1.size(0);
125 |     const int n = xyz1.size(1);
126 |     const int m = xyz2.size(1);
127 | 
128 |     const float* xyz1_data = xyz1.data<float>();
129 |     const float* xyz2_data = xyz2.data<float>();
130 |     float* gradxyz1_data = gradxyz1.data<float>();
131 |     float* gradxyz2_data = gradxyz2.data<float>();
132 |     float* graddist1_data = graddist1.data<float>();
133 |     float* graddist2_data = graddist2.data<float>();
134 |     const int* idx1_data = idx1.data<int>();
135 |     const int* idx2_data = idx2.data<int>();
136 | 
137 |     for (int i = 0; i < b*n*3; i++)
138 |         gradxyz1_data[i] = 0;
139 |     for (int i = 0; i < b*m*3; i++)
140 |         gradxyz2_data[i] = 0;
141 |     for (int i = 0;i < b; i++) {
142 |         for (int j = 0; j < n; j++) {
143 |             const float x1 = xyz1_data[(i*n+j)*3+0];
144 |             const float y1 = xyz1_data[(i*n+j)*3+1];
145 |             const float z1 = xyz1_data[(i*n+j)*3+2];
146 |             const int j2 = idx1_data[i*n+j];
147 | 
148 |             const float x2 = xyz2_data[(i*m+j2)*3+0];
149 |             const float y2 = xyz2_data[(i*m+j2)*3+1];
150 |             const float z2 = xyz2_data[(i*m+j2)*3+2];
151 |             const float g = graddist1_data[i*n+j]*2;
152 | 
153 |             gradxyz1_data[(i*n+j)*3+0] += g*(x1-x2);
154 |             gradxyz1_data[(i*n+j)*3+1] += g*(y1-y2);
155 |             gradxyz1_data[(i*n+j)*3+2] += g*(z1-z2);
156 |             gradxyz2_data[(i*m+j2)*3+0] -= (g*(x1-x2));
157 |             gradxyz2_data[(i*m+j2)*3+1] -= (g*(y1-y2));
158 |             gradxyz2_data[(i*m+j2)*3+2] -= (g*(z1-z2));
159 |         }
160 |         for (int j = 0; j < m; j++) {
161 |             const float x1 = xyz2_data[(i*m+j)*3+0];
162 |             const float y1 = xyz2_data[(i*m+j)*3+1];
163 |             const float z1 = xyz2_data[(i*m+j)*3+2];
164 |             const int j2 = idx2_data[i*m+j];
165 |             const float x2 = xyz1_data[(i*n+j2)*3+0];
166 |             const float y2 = xyz1_data[(i*n+j2)*3+1];
167 |             const float z2 = xyz1_data[(i*n+j2)*3+2];
168 |             const float g = graddist2_data[i*m+j]*2;
169 |             gradxyz2_data[(i*m+j)*3+0] += g*(x1-x2);
170 |             gradxyz2_data[(i*m+j)*3+1] += g*(y1-y2);
171 |             gradxyz2_data[(i*m+j)*3+2] += g*(z1-z2);
172 |             gradxyz1_data[(i*n+j2)*3+0] -= (g*(x1-x2));
173 |             gradxyz1_data[(i*n+j2)*3+1] -= (g*(y1-y2));
174 |             gradxyz1_data[(i*n+j2)*3+2] -= (g*(z1-z2));
175 |         }
176 |     }
177 | }
178 | 
179 | 
180 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
181 |     m.def("forward", &chamfer_distance_forward, "ChamferDistance forward");
182 |     m.def("forward_cuda", &chamfer_distance_forward_cuda, "ChamferDistance forward (CUDA)");
183 |     m.def("backward", &chamfer_distance_backward, "ChamferDistance backward");
184 |     m.def("backward_cuda", &chamfer_distance_backward_cuda, "ChamferDistance backward (CUDA)");
185 | }
186 | 


--------------------------------------------------------------------------------
/utils/Pointnet2.PyTorch/pointnet2/pytorch_utils.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | from typing import List, Tuple
  3 | 
  4 | 
  5 | class SharedMLP(nn.Sequential):
  6 | 
  7 |     def __init__(
  8 |             self,
  9 |             args: List[int],
 10 |             *,
 11 |             bn: bool = False,
 12 |             activation=nn.ReLU(inplace=True),
 13 |             preact: bool = False,
 14 |             first: bool = False,
 15 |             name: str = "",
 16 |             instance_norm: bool = False,
 17 |     ):
 18 |         super().__init__()
 19 | 
 20 |         for i in range(len(args) - 1):
 21 |             self.add_module(
 22 |                 name + 'layer{}'.format(i),
 23 |                 Conv2d(
 24 |                     args[i],
 25 |                     args[i + 1],
 26 |                     bn=(not first or not preact or (i != 0)) and bn,
 27 |                     activation=activation
 28 |                     if (not first or not preact or (i != 0)) else None,
 29 |                     preact=preact,
 30 |                     instance_norm=instance_norm
 31 |                 )
 32 |             )
 33 | 
 34 | 
 35 | class _ConvBase(nn.Sequential):
 36 | 
 37 |     def __init__(
 38 |             self,
 39 |             in_size,
 40 |             out_size,
 41 |             kernel_size,
 42 |             stride,
 43 |             padding,
 44 |             activation,
 45 |             bn,
 46 |             init,
 47 |             conv=None,
 48 |             batch_norm=None,
 49 |             bias=True,
 50 |             preact=False,
 51 |             name="",
 52 |             instance_norm=False,
 53 |             instance_norm_func=None
 54 |     ):
 55 |         super().__init__()
 56 | 
 57 |         bias = bias and (not bn)
 58 |         conv_unit = conv(
 59 |             in_size,
 60 |             out_size,
 61 |             kernel_size=kernel_size,
 62 |             stride=stride,
 63 |             padding=padding,
 64 |             bias=bias
 65 |         )
 66 |         init(conv_unit.weight)
 67 |         if bias:
 68 |             nn.init.constant_(conv_unit.bias, 0)
 69 | 
 70 |         if bn:
 71 |             if not preact:
 72 |                 bn_unit = batch_norm(out_size)
 73 |             else:
 74 |                 bn_unit = batch_norm(in_size)
 75 |         if instance_norm:
 76 |             if not preact:
 77 |                 in_unit = instance_norm_func(out_size, affine=False, track_running_stats=False)
 78 |             else:
 79 |                 in_unit = instance_norm_func(in_size, affine=False, track_running_stats=False)
 80 | 
 81 |         if preact:
 82 |             if bn:
 83 |                 self.add_module(name + 'bn', bn_unit)
 84 | 
 85 |             if activation is not None:
 86 |                 self.add_module(name + 'activation', activation)
 87 | 
 88 |             if not bn and instance_norm:
 89 |                 self.add_module(name + 'in', in_unit)
 90 | 
 91 |         self.add_module(name + 'conv', conv_unit)
 92 | 
 93 |         if not preact:
 94 |             if bn:
 95 |                 self.add_module(name + 'bn', bn_unit)
 96 | 
 97 |             if activation is not None:
 98 |                 self.add_module(name + 'activation', activation)
 99 | 
100 |             if not bn and instance_norm:
101 |                 self.add_module(name + 'in', in_unit)
102 | 
103 | 
104 | class _BNBase(nn.Sequential):
105 | 
106 |     def __init__(self, in_size, batch_norm=None, name=""):
107 |         super().__init__()
108 |         self.add_module(name + "bn", batch_norm(in_size))
109 | 
110 |         nn.init.constant_(self[0].weight, 1.0)
111 |         nn.init.constant_(self[0].bias, 0)
112 | 
113 | 
114 | class BatchNorm1d(_BNBase):
115 | 
116 |     def __init__(self, in_size: int, *, name: str = ""):
117 |         super().__init__(in_size, batch_norm=nn.BatchNorm1d, name=name)
118 | 
119 | 
120 | class BatchNorm2d(_BNBase):
121 | 
122 |     def __init__(self, in_size: int, name: str = ""):
123 |         super().__init__(in_size, batch_norm=nn.BatchNorm2d, name=name)
124 | 
125 | 
126 | class Conv1d(_ConvBase):
127 | 
128 |     def __init__(
129 |             self,
130 |             in_size: int,
131 |             out_size: int,
132 |             *,
133 |             kernel_size: int = 1,
134 |             stride: int = 1,
135 |             padding: int = 0,
136 |             activation=nn.ReLU(inplace=True),
137 |             bn: bool = False,
138 |             init=nn.init.kaiming_normal_,
139 |             bias: bool = True,
140 |             preact: bool = False,
141 |             name: str = "",
142 |             instance_norm=False
143 |     ):
144 |         super().__init__(
145 |             in_size,
146 |             out_size,
147 |             kernel_size,
148 |             stride,
149 |             padding,
150 |             activation,
151 |             bn,
152 |             init,
153 |             conv=nn.Conv1d,
154 |             batch_norm=BatchNorm1d,
155 |             bias=bias,
156 |             preact=preact,
157 |             name=name,
158 |             instance_norm=instance_norm,
159 |             instance_norm_func=nn.InstanceNorm1d
160 |         )
161 | 
162 | 
163 | class Conv2d(_ConvBase):
164 | 
165 |     def __init__(
166 |             self,
167 |             in_size: int,
168 |             out_size: int,
169 |             *,
170 |             kernel_size: Tuple[int, int] = (1, 1),
171 |             stride: Tuple[int, int] = (1, 1),
172 |             padding: Tuple[int, int] = (0, 0),
173 |             activation=nn.ReLU(inplace=True),
174 |             bn: bool = False,
175 |             init=nn.init.kaiming_normal_,
176 |             bias: bool = True,
177 |             preact: bool = False,
178 |             name: str = "",
179 |             instance_norm=False
180 |     ):
181 |         super().__init__(
182 |             in_size,
183 |             out_size,
184 |             kernel_size,
185 |             stride,
186 |             padding,
187 |             activation,
188 |             bn,
189 |             init,
190 |             conv=nn.Conv2d,
191 |             batch_norm=BatchNorm2d,
192 |             bias=bias,
193 |             preact=preact,
194 |             name=name,
195 |             instance_norm=instance_norm,
196 |             instance_norm_func=nn.InstanceNorm2d
197 |         )
198 | 
199 | 
200 | class FC(nn.Sequential):
201 | 
202 |     def __init__(
203 |             self,
204 |             in_size: int,
205 |             out_size: int,
206 |             *,
207 |             activation=nn.ReLU(inplace=True),
208 |             bn: bool = False,
209 |             init=None,
210 |             preact: bool = False,
211 |             name: str = ""
212 |     ):
213 |         super().__init__()
214 | 
215 |         fc = nn.Linear(in_size, out_size, bias=not bn)
216 |         if init is not None:
217 |             init(fc.weight)
218 |         if not bn:
219 |             nn.init.constant(fc.bias, 0)
220 | 
221 |         if preact:
222 |             if bn:
223 |                 self.add_module(name + 'bn', BatchNorm1d(in_size))
224 | 
225 |             if activation is not None:
226 |                 self.add_module(name + 'activation', activation)
227 | 
228 |         self.add_module(name + 'fc', fc)
229 | 
230 |         if not preact:
231 |             if bn:
232 |                 self.add_module(name + 'bn', BatchNorm1d(out_size))
233 | 
234 |             if activation is not None:
235 |                 self.add_module(name + 'activation', activation)
236 | 
237 | 


--------------------------------------------------------------------------------
/utils/randPartial/main.cpp:
--------------------------------------------------------------------------------
  1 | #include <pcl/range_image/range_image_planar.h>
  2 | #include <pcl/io/pcd_io.h>
  3 | #include <pcl/io/png_io.h>
  4 | #include <pcl/visualization/common/float_image_utils.h>
  5 | #include <chrono>
  6 | #include <random>
  7 | #include <pybind11/pybind11.h>
  8 | #include <pybind11/numpy.h>
  9 | 
 10 | 
 11 | Eigen::Affine3f random_pose() {
 12 |     srand(time(NULL));
 13 |     float angle_x = rand()*1.0/(RAND_MAX*1.0) * 2.0 * M_PI;
 14 |     float angle_y = rand()*1.0/(RAND_MAX*1.0) * 2.0 * M_PI;
 15 |     float angle_z = rand()*1.0/(RAND_MAX*1.0) * 2.0 * M_PI;
 16 | 
 17 |     Eigen::Affine3f R = Eigen::Affine3f::Identity();
 18 |     R.rotate(Eigen::AngleAxisf (angle_x, Eigen::Vector3f::UnitX()));
 19 |     R.rotate(Eigen::AngleAxisf (angle_y, Eigen::Vector3f::UnitY()));
 20 |     R.rotate(Eigen::AngleAxisf (angle_z, Eigen::Vector3f::UnitZ()));
 21 |     R.translation() <<  -R(0, 2), -R(1, 2), -R(2, 2);
 22 |     return R;
 23 | 
 24 |     // // float theta = M_PI/2;
 25 |     // Eigen::Affine3f R = Eigen::Affine3f::Identity();
 26 |     // R.translation() << 0.0, 0.0, -1.0;
 27 |     // // R.rotate (Eigen::AngleAxisf (theta, Eigen::Vector3f::UnitZ()));
 28 |     // return R;
 29 | }
 30 | 
 31 | 
 32 | Eigen::MatrixXf depth2pcd(Eigen::MatrixXf ranges, Eigen::Matrix3f intrinsics, Eigen::Matrix4f sensor_pose) {
 33 |     Eigen::Matrix3f inv_K = intrinsics.inverse();
 34 |     // Eigen::Matrix4f inv_P = sensor_pose.inverse();
 35 | 
 36 |     /* get depth value array */
 37 |     Eigen::MatrixXf depth(3, 0);
 38 |     for (int i = 0; i < ranges.rows(); i++) {
 39 |         for (int j = 0; j < ranges.cols(); j++) {
 40 |             if (isinf(-ranges(i, j)))
 41 |                 continue;
 42 | 
 43 |             // float zc = ranges(i, j);
 44 |             float zc = intrinsics(0,0)*ranges(i, j)/sqrt(pow(intrinsics(1,2)-i, 2)+pow(intrinsics(0,2)-j, 2)+pow(intrinsics(0,0), 2));
 45 |             Eigen::Vector3f pixel(j*zc, i*zc, zc);
 46 |             depth.conservativeResize(depth.rows(), depth.cols() + 1);
 47 |             depth.col(depth.cols() - 1) = pixel;
 48 |         }
 49 |     }
 50 |     // std::cout << depth.cols() << "\n";
 51 | 
 52 |     /* image coordinates -> camera coordinates */
 53 |     Eigen::MatrixXf points1;
 54 |     points1 = inv_K * depth;
 55 |     // std::cout << points1.cols() << "\n";
 56 | 
 57 |     /* camera coordinates -> world coordinates */
 58 |     Eigen::MatrixXf vec_one = Eigen::MatrixXf::Ones(1, depth.cols());
 59 |     // std::cout << vec_one.cols() << "\n";
 60 |     Eigen::MatrixXf points2(4, depth.cols());
 61 |     points2 << points1,
 62 |                vec_one;
 63 |     // std::cout << points2.rows() << " " << points2.cols() << "\n";
 64 |     Eigen::MatrixXf points3 = (sensor_pose * points2).transpose().block(0, 0, depth.cols(), 3);
 65 |     // std::cout << points3.rows() << "\n";
 66 | 
 67 |     /* resample to 2048 */
 68 |     std::vector<int> idx;
 69 |     for (int i = 0; i < points3.rows(); i++) {
 70 |         idx.push_back(i);
 71 |     }
 72 |     unsigned seed = std::chrono::system_clock::now().time_since_epoch().count();
 73 |     std::default_random_engine eng(seed);
 74 |     std::shuffle(idx.begin(), idx.end(), eng);
 75 |     if (points3.rows() < 2048) {
 76 |         std::uniform_int_distribution<int> dist(0, points3.rows()-1);
 77 |         for (int i = 0; i < 2048-points3.rows(); i++) {
 78 |             idx.push_back(dist(eng));
 79 |         }
 80 |     }
 81 |     else {
 82 |         idx.resize(2048);
 83 |     }
 84 |     // Eigen::MatrixXf points4 = points3(idx, Eigen::all);
 85 |     Eigen::MatrixXf points4(0, 3);
 86 |     for (int i = 0; i < 2048; i++) {
 87 |         Eigen::Vector3f value = points3.row(idx[i]);
 88 |         points4.conservativeResize(points4.rows() + 1, points4.cols());
 89 |         points4.row(points4.rows() - 1) = value;
 90 |     }
 91 |     // std::cout << points4.rows() << "\n";
 92 |     return points4;
 93 | }
 94 | 
 95 | 
 96 | pcl::PointCloud<pcl::PointXYZ> numpy2pcd(pybind11::array_t<float> input) {
 97 |     // request a buffer descriptor from Python
 98 |     pybind11::buffer_info buffer_info = input.request();
 99 | 
100 |     // extract data an shape of input array
101 |     float *data = static_cast<float *>(buffer_info.ptr);
102 |     std::vector<ssize_t> shape = buffer_info.shape;
103 | 
104 |     pcl::PointCloud<pcl::PointXYZ> pointCloud;
105 |     for (int i = 0; i < shape[0]; i++) {
106 |         pcl::PointXYZ point;
107 |         point.x = data[i*shape[1] + 0];
108 |         point.y = data[i*shape[1] + 1];
109 |         point.z = data[i*shape[1] + 2];
110 |         pointCloud.points.push_back(point);
111 |     }
112 |     pointCloud.width = pointCloud.size();
113 |     pointCloud.height = 1;
114 |     return pointCloud;
115 | }
116 | 
117 | 
118 | pybind11::array_t<float> eigen2numpy(Eigen::MatrixXf input) {
119 |     // map to rowmajor storage
120 |     Eigen::Matrix<float,Eigen::Dynamic,Eigen::Dynamic,Eigen::RowMajor> rmajor_input(input);
121 | 
122 |     std::vector<ssize_t> shape(2);
123 |     shape[0] = input.rows();
124 |     shape[1] = input.cols();
125 |     return pybind11::array_t<float>(
126 |         shape,  // shape
127 |         {shape[1] * sizeof(float), sizeof(float)},  // strides
128 |         rmajor_input.data());  // data pointer
129 | }
130 | 
131 | 
132 | pybind11::array_t<float> gen(pybind11::array_t<float> input) {
133 |     pcl::PointCloud<pcl::PointXYZ> pointCloud1;
134 |     pcl::PointCloud<pcl::PointXYZ> pointCloud2;
135 |     
136 |     // pcl::io::loadPCDFile("../data/1a04e3eab45ca15dd86060f189eb133.pcd", pointCloud1);
137 |     pointCloud1 = numpy2pcd(input);
138 |     // pcl::io::savePCDFile("1a04e3eab45ca15dd86060f189eb133.pcd", pointCloud1);
139 |     // auto t1 = std::chrono::system_clock::now();
140 | 
141 |     int di_width = 160*10;
142 |     int di_height = 120*10;
143 |     float di_center_x = 80.0*10;
144 |     float di_center_y = 60.0*10;
145 |     float di_focal_length = 100.0*10;
146 | 
147 |     Eigen::Affine3f sensor_pose = random_pose();
148 |     // Eigen::Affine3f sensor_pose = (Eigen::Affine3f) Eigen::Translation3f(0.0, 0.0, 0.0);
149 |     // std::cout << sensor_pose.matrix() << "\n";
150 |     pcl::RangeImagePlanar::CoordinateFrame coordinate_frame = pcl::RangeImagePlanar::CAMERA_FRAME;
151 |     float noise_level = 0.0;
152 |     float min_range = 0.0;
153 | 
154 |     pcl::RangeImagePlanar rangeImage;
155 |     rangeImage.createFromPointCloudWithFixedSize(pointCloud1, di_width, di_height, di_center_x, di_center_y, 
156 |                                     di_focal_length, di_focal_length, sensor_pose, coordinate_frame, noise_level, min_range);
157 | 
158 |     // std::cout << rangeImage << "\n";
159 | 
160 |     // auto t2 = std::chrono::system_clock::now();
161 |     float* ranges = rangeImage.getRangesArray();
162 |     Eigen::MatrixXf depth = Eigen::Map<Eigen::MatrixXf>(ranges, di_width, di_height).transpose();
163 |     // std::cout << depth;
164 |     unsigned char* rgb_image = pcl::visualization::FloatImageUtils::getVisualImage(ranges, rangeImage.width, rangeImage.height);
165 |     pcl::io::saveRgbPNGFile("1a04e3eab45ca15dd86060f189eb133.png", rgb_image, rangeImage.width, rangeImage.height);
166 | 
167 |     // auto t3 = std::chrono::system_clock::now();
168 |     Eigen::Matrix3f intrinsics; 
169 |     intrinsics << di_focal_length, 0.0, di_center_x,
170 |                   0.0, di_focal_length, di_center_y,
171 |                   0.0, 0.0, 1.0;
172 |     Eigen::MatrixXf output = depth2pcd(depth, intrinsics, sensor_pose.matrix());
173 |     // auto t4 = std::chrono::system_clock::now();
174 |     // std::cout << std::chrono::duration<float> (t2-t1).count() << "s\n";
175 |     // std::cout << std::chrono::duration<float> (t3-t2).count() << "s\n";
176 |     // std::cout << std::chrono::duration<float> (t4-t3).count() << "s\n";
177 | 
178 |     // for (int i = 0; i < output.rows(); i++) {
179 |     //     pcl::PointXYZ point;
180 |     //     point.x = output(i, 0);
181 |     //     point.y = output(i, 1);
182 |     //     point.z = output(i, 2);
183 |     //     pointCloud2.points.push_back(point);
184 |     // }
185 |     // pointCloud2.width = pointCloud2.size();
186 |     // pointCloud2.height = 1;
187 |     // pcl::io::savePCDFile("1a04e3eab45ca15dd86060f189eb133.pcd", pointCloud2);
188 |     // return 0;
189 |     return eigen2numpy(output);
190 | }
191 | 
192 | 
193 | PYBIND11_MODULE(randpartial, m) {
194 |     m.def("gen", &gen, "A function which generates random partial from the gt.");
195 | }


--------------------------------------------------------------------------------
/utils/Pointnet2.PyTorch/tools/dataset.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import torch.utils.data as torch_data
  4 | import kitti_utils
  5 | import cv2
  6 | from PIL import Image
  7 | 
  8 | 
  9 | USE_INTENSITY = False
 10 | 
 11 | 
 12 | class KittiDataset(torch_data.Dataset):
 13 |     def __init__(self, root_dir, split='train', mode='TRAIN'):
 14 |         self.split = split
 15 |         self.mode = mode
 16 |         self.classes = ['Car']
 17 |         is_test = self.split == 'test'
 18 |         self.imageset_dir = os.path.join(root_dir, 'KITTI', 'object', 'testing' if is_test else 'training')
 19 | 
 20 |         split_dir = os.path.join(root_dir, 'KITTI', 'ImageSets', split + '.txt')
 21 |         self.image_idx_list = [x.strip() for x in open(split_dir).readlines()]
 22 |         self.sample_id_list = [int(sample_id) for sample_id in self.image_idx_list]
 23 |         self.num_sample = self.image_idx_list.__len__()
 24 | 
 25 |         self.npoints = 16384
 26 | 
 27 |         self.image_dir = os.path.join(self.imageset_dir, 'image_2')
 28 |         self.lidar_dir = os.path.join(self.imageset_dir, 'velodyne')
 29 |         self.calib_dir = os.path.join(self.imageset_dir, 'calib')
 30 |         self.label_dir = os.path.join(self.imageset_dir, 'label_2')
 31 |         self.plane_dir = os.path.join(self.imageset_dir, 'planes')
 32 | 
 33 |     def get_image(self, idx):
 34 |         img_file = os.path.join(self.image_dir, '%06d.png' % idx)
 35 |         assert os.path.exists(img_file)
 36 |         return cv2.imread(img_file)  # (H, W, 3) BGR mode
 37 | 
 38 |     def get_image_shape(self, idx):
 39 |         img_file = os.path.join(self.image_dir, '%06d.png' % idx)
 40 |         assert os.path.exists(img_file)
 41 |         im = Image.open(img_file)
 42 |         width, height = im.size
 43 |         return height, width, 3
 44 | 
 45 |     def get_lidar(self, idx):
 46 |         lidar_file = os.path.join(self.lidar_dir, '%06d.bin' % idx)
 47 |         assert os.path.exists(lidar_file)
 48 |         return np.fromfile(lidar_file, dtype=np.float32).reshape(-1, 4)
 49 | 
 50 |     def get_calib(self, idx):
 51 |         calib_file = os.path.join(self.calib_dir, '%06d.txt' % idx)
 52 |         assert os.path.exists(calib_file)
 53 |         return kitti_utils.Calibration(calib_file)
 54 | 
 55 |     def get_label(self, idx):
 56 |         label_file = os.path.join(self.label_dir, '%06d.txt' % idx)
 57 |         assert os.path.exists(label_file)
 58 |         return kitti_utils.get_objects_from_label(label_file)
 59 | 
 60 |     @staticmethod
 61 |     def get_valid_flag(pts_rect, pts_img, pts_rect_depth, img_shape):
 62 |         val_flag_1 = np.logical_and(pts_img[:, 0] >= 0, pts_img[:, 0] < img_shape[1])
 63 |         val_flag_2 = np.logical_and(pts_img[:, 1] >= 0, pts_img[:, 1] < img_shape[0])
 64 |         val_flag_merge = np.logical_and(val_flag_1, val_flag_2)
 65 |         pts_valid_flag = np.logical_and(val_flag_merge, pts_rect_depth >= 0)
 66 |         return pts_valid_flag
 67 | 
 68 |     def filtrate_objects(self, obj_list):
 69 |         type_whitelist = self.classes
 70 |         if self.mode == 'TRAIN':
 71 |             type_whitelist = list(self.classes)
 72 |             if 'Car' in self.classes:
 73 |                 type_whitelist.append('Van')
 74 | 
 75 |         valid_obj_list = []
 76 |         for obj in obj_list:
 77 |             if obj.cls_type not in type_whitelist:
 78 |                 continue
 79 | 
 80 |             valid_obj_list.append(obj)
 81 |         return valid_obj_list
 82 | 
 83 |     def __len__(self):
 84 |         return len(self.sample_id_list)
 85 | 
 86 |     def __getitem__(self, index):
 87 |         sample_id = int(self.sample_id_list[index])
 88 |         calib = self.get_calib(sample_id)
 89 |         img_shape = self.get_image_shape(sample_id)
 90 |         pts_lidar = self.get_lidar(sample_id)
 91 | 
 92 |         # get valid point (projected points should be in image)
 93 |         pts_rect = calib.lidar_to_rect(pts_lidar[:, 0:3])
 94 |         pts_intensity = pts_lidar[:, 3]
 95 | 
 96 |         pts_img, pts_rect_depth = calib.rect_to_img(pts_rect)
 97 |         pts_valid_flag = self.get_valid_flag(pts_rect, pts_img, pts_rect_depth, img_shape)
 98 | 
 99 |         pts_rect = pts_rect[pts_valid_flag][:, 0:3]
100 |         pts_intensity = pts_intensity[pts_valid_flag]
101 | 
102 |         if self.npoints < len(pts_rect):
103 |             pts_depth = pts_rect[:, 2]
104 |             pts_near_flag = pts_depth < 40.0
105 |             far_idxs_choice = np.where(pts_near_flag == 0)[0]
106 |             near_idxs = np.where(pts_near_flag == 1)[0]
107 |             near_idxs_choice = np.random.choice(near_idxs, self.npoints - len(far_idxs_choice), replace=False)
108 | 
109 |             choice = np.concatenate((near_idxs_choice, far_idxs_choice), axis=0) \
110 |                 if len(far_idxs_choice) > 0 else near_idxs_choice
111 |             np.random.shuffle(choice)
112 |         else:
113 |             choice = np.arange(0, len(pts_rect), dtype=np.int32)
114 |             if self.npoints > len(pts_rect):
115 |                 extra_choice = np.random.choice(choice, self.npoints - len(pts_rect), replace=False)
116 |                 choice = np.concatenate((choice, extra_choice), axis=0)
117 |             np.random.shuffle(choice)
118 | 
119 |         ret_pts_rect = pts_rect[choice, :]
120 |         ret_pts_intensity = pts_intensity[choice] - 0.5  # translate intensity to [-0.5, 0.5]
121 | 
122 |         pts_features = [ret_pts_intensity.reshape(-1, 1)]
123 |         ret_pts_features = np.concatenate(pts_features, axis=1) if pts_features.__len__() > 1 else pts_features[0]
124 | 
125 |         sample_info = {'sample_id': sample_id}
126 | 
127 |         if self.mode == 'TEST':
128 |             if USE_INTENSITY:
129 |                 pts_input = np.concatenate((ret_pts_rect, ret_pts_features), axis=1)  # (N, C)
130 |             else:
131 |                 pts_input = ret_pts_rect
132 |             sample_info['pts_input'] = pts_input
133 |             sample_info['pts_rect'] = ret_pts_rect
134 |             sample_info['pts_features'] = ret_pts_features
135 |             return sample_info
136 | 
137 |         gt_obj_list = self.filtrate_objects(self.get_label(sample_id))
138 | 
139 |         gt_boxes3d = kitti_utils.objs_to_boxes3d(gt_obj_list)
140 | 
141 |         # prepare input
142 |         if USE_INTENSITY:
143 |             pts_input = np.concatenate((ret_pts_rect, ret_pts_features), axis=1)  # (N, C)
144 |         else:
145 |             pts_input = ret_pts_rect
146 | 
147 |         # generate training labels
148 |         cls_labels = self.generate_training_labels(ret_pts_rect, gt_boxes3d)
149 |         sample_info['pts_input'] = pts_input
150 |         sample_info['pts_rect'] = ret_pts_rect
151 |         sample_info['cls_labels'] = cls_labels
152 |         return sample_info
153 | 
154 |     @staticmethod
155 |     def generate_training_labels(pts_rect, gt_boxes3d):
156 |         cls_label = np.zeros((pts_rect.shape[0]), dtype=np.int32)
157 |         gt_corners = kitti_utils.boxes3d_to_corners3d(gt_boxes3d, rotate=True)
158 |         extend_gt_boxes3d = kitti_utils.enlarge_box3d(gt_boxes3d, extra_width=0.2)
159 |         extend_gt_corners = kitti_utils.boxes3d_to_corners3d(extend_gt_boxes3d, rotate=True)
160 |         for k in range(gt_boxes3d.shape[0]):
161 |             box_corners = gt_corners[k]
162 |             fg_pt_flag = kitti_utils.in_hull(pts_rect, box_corners)
163 |             cls_label[fg_pt_flag] = 1
164 | 
165 |             # enlarge the bbox3d, ignore nearby points
166 |             extend_box_corners = extend_gt_corners[k]
167 |             fg_enlarge_flag = kitti_utils.in_hull(pts_rect, extend_box_corners)
168 |             ignore_flag = np.logical_xor(fg_pt_flag, fg_enlarge_flag)
169 |             cls_label[ignore_flag] = -1
170 | 
171 |         return cls_label
172 | 
173 |     def collate_batch(self, batch):
174 |         batch_size = batch.__len__()
175 |         ans_dict = {}
176 | 
177 |         for key in batch[0].keys():
178 |             if isinstance(batch[0][key], np.ndarray):
179 |                 ans_dict[key] = np.concatenate([batch[k][key][np.newaxis, ...] for k in range(batch_size)], axis=0)
180 | 
181 |             else:
182 |                 ans_dict[key] = [batch[k][key] for k in range(batch_size)]
183 |                 if isinstance(batch[0][key], int):
184 |                     ans_dict[key] = np.array(ans_dict[key], dtype=np.int32)
185 |                 elif isinstance(batch[0][key], float):
186 |                     ans_dict[key] = np.array(ans_dict[key], dtype=np.float32)
187 | 
188 |         return ans_dict
189 | 


--------------------------------------------------------------------------------
/utils/Pointnet2.PyTorch/tools/train_and_eval.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import os
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.optim as optim
  6 | import torch.optim.lr_scheduler as lr_sched
  7 | from torch.nn.utils import clip_grad_norm_
  8 | from torch.utils.data import DataLoader
  9 | import tensorboard_logger as tb_log
 10 | from data.dataset import KittiDataset
 11 | import argparse
 12 | import importlib
 13 | 
 14 | parser = argparse.ArgumentParser(description="Arg parser")
 15 | parser.add_argument("--batch_size", type=int, default=8)
 16 | parser.add_argument("--epochs", type=int, default=100)
 17 | parser.add_argument("--ckpt_save_interval", type=int, default=5)
 18 | parser.add_argument('--workers', type=int, default=4)
 19 | parser.add_argument("--mode", type=str, default='train')
 20 | parser.add_argument("--ckpt", type=str, default='None')
 21 | 
 22 | parser.add_argument("--net", type=str, default='pointnet2_msg')
 23 | 
 24 | parser.add_argument('--lr', type=float, default=0.002)
 25 | parser.add_argument('--lr_decay', type=float, default=0.2)
 26 | parser.add_argument('--lr_clip', type=float, default=0.000001)
 27 | parser.add_argument('--decay_step_list', type=list, default=[50, 70, 80, 90])
 28 | parser.add_argument('--weight_decay', type=float, default=0.001)
 29 | 
 30 | parser.add_argument("--output_dir", type=str, default='output')
 31 | parser.add_argument("--extra_tag", type=str, default='default')
 32 | 
 33 | args = parser.parse_args()
 34 | 
 35 | FG_THRESH = 0.3
 36 | 
 37 | 
 38 | def log_print(info, log_f=None):
 39 |     print(info)
 40 |     if log_f is not None:
 41 |         print(info, file=log_f)
 42 | 
 43 | 
 44 | class DiceLoss(nn.Module):
 45 |     def __init__(self, ignore_target=-1):
 46 |         super().__init__()
 47 |         self.ignore_target = ignore_target
 48 | 
 49 |     def forward(self, input, target):
 50 |         """
 51 |         :param input: (N), logit
 52 |         :param target: (N), {0, 1}
 53 |         :return:
 54 |         """
 55 |         input = torch.sigmoid(input.view(-1))
 56 |         target = target.float().view(-1)
 57 |         mask = (target != self.ignore_target).float()
 58 |         return 1.0 - (torch.min(input, target) * mask).sum() / torch.clamp((torch.max(input, target) * mask).sum(), min=1.0)
 59 | 
 60 | 
 61 | def train_one_epoch(model, train_loader, optimizer, epoch, lr_scheduler, total_it, tb_log, log_f):
 62 |     model.train()
 63 |     log_print('===============TRAIN EPOCH %d================' % epoch, log_f=log_f)
 64 |     loss_func = DiceLoss(ignore_target=-1)
 65 | 
 66 |     for it, batch in enumerate(train_loader):
 67 |         optimizer.zero_grad()
 68 | 
 69 |         pts_input, cls_labels = batch['pts_input'], batch['cls_labels']
 70 |         pts_input = torch.from_numpy(pts_input).cuda(non_blocking=True).float()
 71 |         cls_labels = torch.from_numpy(cls_labels).cuda(non_blocking=True).long().view(-1)
 72 | 
 73 |         pred_cls = model(pts_input)
 74 |         pred_cls = pred_cls.view(-1)
 75 | 
 76 |         loss = loss_func(pred_cls, cls_labels)
 77 |         loss.backward()
 78 |         clip_grad_norm_(model.parameters(), 1.0)
 79 |         optimizer.step()
 80 | 
 81 |         total_it += 1
 82 | 
 83 |         pred_class = (torch.sigmoid(pred_cls) > FG_THRESH)
 84 |         fg_mask = cls_labels > 0
 85 |         correct = ((pred_class.long() == cls_labels) & fg_mask).float().sum()
 86 |         union = fg_mask.sum().float() + (pred_class > 0).sum().float() - correct
 87 |         iou = correct / torch.clamp(union, min=1.0)
 88 | 
 89 |         cur_lr = lr_scheduler.get_lr()[0]
 90 |         tb_log.log_value('learning_rate', cur_lr, epoch)
 91 |         if tb_log is not None:
 92 |             tb_log.log_value('train_loss', loss, total_it)
 93 |             tb_log.log_value('train_fg_iou', iou, total_it)
 94 | 
 95 |         log_print('training epoch %d: it=%d/%d, total_it=%d, loss=%.5f, fg_iou=%.3f, lr=%f' %
 96 |                   (epoch, it, len(train_loader), total_it, loss.item(), iou.item(), cur_lr), log_f=log_f)
 97 | 
 98 |     return total_it
 99 | 
100 | 
101 | def eval_one_epoch(model, eval_loader, epoch, tb_log=None, log_f=None):
102 |     model.train()
103 |     log_print('===============EVAL EPOCH %d================' % epoch, log_f=log_f)
104 | 
105 |     iou_list = []
106 |     for it, batch in enumerate(eval_loader):
107 |         pts_input, cls_labels = batch['pts_input'], batch['cls_labels']
108 |         pts_input = torch.from_numpy(pts_input).cuda(non_blocking=True).float()
109 |         cls_labels = torch.from_numpy(cls_labels).cuda(non_blocking=True).long().view(-1)
110 | 
111 |         pred_cls = model(pts_input)
112 |         pred_cls = pred_cls.view(-1)
113 | 
114 |         pred_class = (torch.sigmoid(pred_cls) > FG_THRESH)
115 |         fg_mask = cls_labels > 0
116 |         correct = ((pred_class.long() == cls_labels) & fg_mask).float().sum()
117 |         union = fg_mask.sum().float() + (pred_class > 0).sum().float() - correct
118 |         iou = correct / torch.clamp(union, min=1.0)
119 | 
120 |         iou_list.append(iou.item())
121 |         log_print('EVAL: it=%d/%d, iou=%.3f' % (it, len(eval_loader), iou), log_f=log_f)
122 | 
123 |     iou_list = np.array(iou_list)
124 |     avg_iou = iou_list.mean()
125 |     if tb_log is not None:
126 |         tb_log.log_value('eval_fg_iou', avg_iou, epoch)
127 | 
128 |     log_print('\nEpoch %d: Average IoU (samples=%d): %.6f' % (epoch, iou_list.__len__(), avg_iou), log_f=log_f)
129 |     return avg_iou
130 | 
131 | 
132 | def save_checkpoint(model, epoch, ckpt_name):
133 |     if isinstance(model, torch.nn.DataParallel):
134 |         model_state = model.module.state_dict()
135 |     else:
136 |         model_state = model.state_dict()
137 | 
138 |     state = {'epoch': epoch, 'model_state': model_state}
139 |     ckpt_name = '{}.pth'.format(ckpt_name)
140 |     torch.save(state, ckpt_name)
141 | 
142 | 
143 | def load_checkpoint(model, filename):
144 |     if os.path.isfile(filename):
145 |         log_print("==> Loading from checkpoint %s" % filename)
146 |         checkpoint = torch.load(filename)
147 |         epoch = checkpoint['epoch']
148 |         model.load_state_dict(checkpoint['model_state'])
149 |         log_print("==> Done")
150 |     else:
151 |         raise FileNotFoundError
152 | 
153 |     return epoch
154 | 
155 | 
156 | def train_and_eval(model, train_loader, eval_loader, tb_log, ckpt_dir, log_f):
157 |     model.cuda()
158 |     optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
159 | 
160 |     def lr_lbmd(cur_epoch):
161 |         cur_decay = 1
162 |         for decay_step in args.decay_step_list:
163 |             if cur_epoch >= decay_step:
164 |                 cur_decay = cur_decay * args.lr_decay
165 |         return max(cur_decay, args.lr_clip / args.lr)
166 | 
167 |     lr_scheduler = lr_sched.LambdaLR(optimizer, lr_lbmd)
168 | 
169 |     total_it = 0
170 |     for epoch in range(1, args.epochs + 1):
171 |         lr_scheduler.step(epoch)
172 |         total_it = train_one_epoch(model, train_loader, optimizer, epoch, lr_scheduler, total_it, tb_log, log_f)
173 | 
174 |         if epoch % args.ckpt_save_interval == 0:
175 |             with torch.no_grad():
176 |                 avg_iou = eval_one_epoch(model, eval_loader, epoch, tb_log, log_f)
177 |                 ckpt_name = os.path.join(ckpt_dir, 'checkpoint_epoch_%d' % epoch)
178 |                 save_checkpoint(model, epoch, ckpt_name)
179 | 
180 | 
181 | if __name__ == '__main__':
182 |     MODEL = importlib.import_module(args.net)  # import network module
183 |     model = MODEL.get_model(input_channels=0)
184 | 
185 |     eval_set = KittiDataset(root_dir='data', mode='EVAL', split='val')
186 |     eval_loader = DataLoader(eval_set, batch_size=args.batch_size, shuffle=False, pin_memory=True,
187 |                              num_workers=args.workers, collate_fn=eval_set.collate_batch)
188 | 
189 |     if args.mode == 'train':
190 |         train_set = KittiDataset(root_dir='data', mode='TRAIN', split='train')
191 |         train_loader = DataLoader(train_set, batch_size=args.batch_size, shuffle=True, pin_memory=True,
192 |                                   num_workers=args.workers, collate_fn=train_set.collate_batch)
193 |         # output dir config
194 |         output_dir = os.path.join(args.output_dir, args.extra_tag)
195 |         os.makedirs(output_dir, exist_ok=True)
196 |         tb_log.configure(os.path.join(output_dir, 'tensorboard'))
197 |         ckpt_dir = os.path.join(output_dir, 'ckpt')
198 |         os.makedirs(ckpt_dir, exist_ok=True)
199 | 
200 |         log_file = os.path.join(output_dir, 'log.txt')
201 |         log_f = open(log_file, 'w')
202 | 
203 |         for key, val in vars(args).items():
204 |             log_print("{:16} {}".format(key, val), log_f=log_f)
205 | 
206 |         # train and eval
207 |         train_and_eval(model, train_loader, eval_loader, tb_log, ckpt_dir, log_f)
208 |         log_f.close()
209 |     elif args.mode == 'eval':
210 |         epoch = load_checkpoint(model, args.ckpt)
211 |         model.cuda()
212 |         with torch.no_grad():
213 |             avg_iou = eval_one_epoch(model, eval_loader, epoch)
214 |     else:
215 |         raise NotImplementedError
216 | 
217 | 


--------------------------------------------------------------------------------
/utils/Pointnet2.PyTorch/pointnet2/src/sampling_gpu.cu:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | 
  4 | #include "cuda_utils.h"
  5 | #include "sampling_gpu.h"
  6 | 
  7 | 
  8 | __global__ void gather_points_kernel_fast(int b, int c, int n, int m, 
  9 |     const float *__restrict__ points, const int *__restrict__ idx, float *__restrict__ out) {
 10 |     // points: (B, C, N)
 11 |     // idx: (B, M)
 12 |     // output:
 13 |     //      out: (B, C, M)
 14 | 
 15 |     int bs_idx = blockIdx.z;
 16 |     int c_idx = blockIdx.y;
 17 |     int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
 18 |     if (bs_idx >= b || c_idx >= c || pt_idx >= m) return;
 19 | 
 20 |     out += bs_idx * c * m + c_idx * m + pt_idx;
 21 |     idx += bs_idx * m + pt_idx;
 22 |     points += bs_idx * c * n + c_idx * n;
 23 |     out[0] = points[idx[0]];
 24 | }
 25 | 
 26 | void gather_points_kernel_launcher_fast(int b, int c, int n, int npoints, 
 27 |     const float *points, const int *idx, float *out, cudaStream_t stream) {
 28 |     // points: (B, C, N)
 29 |     // idx: (B, npoints)
 30 |     // output:
 31 |     //      out: (B, C, npoints)
 32 | 
 33 |     cudaError_t err;
 34 |     dim3 blocks(DIVUP(npoints, THREADS_PER_BLOCK), c, b);  // blockIdx.x(col), blockIdx.y(row)
 35 |     dim3 threads(THREADS_PER_BLOCK);
 36 | 
 37 |     gather_points_kernel_fast<<<blocks, threads, 0, stream>>>(b, c, n, npoints, points, idx, out);
 38 | 
 39 |     err = cudaGetLastError();
 40 |     if (cudaSuccess != err) {
 41 |         fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
 42 |         exit(-1);
 43 |     }
 44 | }
 45 | 
 46 | __global__ void gather_points_grad_kernel_fast(int b, int c, int n, int m, const float *__restrict__ grad_out, 
 47 |     const int *__restrict__ idx, float *__restrict__ grad_points) {
 48 |     // grad_out: (B, C, M)
 49 |     // idx: (B, M)
 50 |     // output:
 51 |     //      grad_points: (B, C, N)
 52 | 
 53 |     int bs_idx = blockIdx.z;
 54 |     int c_idx = blockIdx.y;
 55 |     int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
 56 |     if (bs_idx >= b || c_idx >= c || pt_idx >= m) return;
 57 | 
 58 |     grad_out += bs_idx * c * m + c_idx * m + pt_idx;
 59 |     idx += bs_idx * m + pt_idx;
 60 |     grad_points += bs_idx * c * n + c_idx * n;
 61 | 
 62 |     atomicAdd(grad_points + idx[0], grad_out[0]);
 63 | }
 64 | 
 65 | void gather_points_grad_kernel_launcher_fast(int b, int c, int n, int npoints, 
 66 |     const float *grad_out, const int *idx, float *grad_points, cudaStream_t stream) {
 67 |     // grad_out: (B, C, npoints)
 68 |     // idx: (B, npoints)
 69 |     // output:
 70 |     //      grad_points: (B, C, N)
 71 | 
 72 |     cudaError_t err;
 73 |     dim3 blocks(DIVUP(npoints, THREADS_PER_BLOCK), c, b);  // blockIdx.x(col), blockIdx.y(row)
 74 |     dim3 threads(THREADS_PER_BLOCK);
 75 | 
 76 |     gather_points_grad_kernel_fast<<<blocks, threads, 0, stream>>>(b, c, n, npoints, grad_out, idx, grad_points);
 77 | 
 78 |     err = cudaGetLastError();
 79 |     if (cudaSuccess != err) {
 80 |         fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
 81 |         exit(-1);
 82 |     }
 83 | }
 84 | 
 85 | 
 86 | __device__ void __update(float *__restrict__ dists, int *__restrict__ dists_i, int idx1, int idx2){
 87 |     const float v1 = dists[idx1], v2 = dists[idx2];
 88 |     const int i1 = dists_i[idx1], i2 = dists_i[idx2];
 89 |     dists[idx1] = max(v1, v2);
 90 |     dists_i[idx1] = v2 > v1 ? i2 : i1;
 91 | }
 92 | 
 93 | template <unsigned int block_size>
 94 | __global__ void furthest_point_sampling_kernel(int b, int n, int m, 
 95 |     const float *__restrict__ dataset, float *__restrict__ temp, int *__restrict__ idxs) {
 96 |     // dataset: (B, N, 3)
 97 |     // tmp: (B, N)
 98 |     // output:
 99 |     //      idx: (B, M)
100 | 
101 |     if (m <= 0) return;
102 |     __shared__ float dists[block_size];
103 |     __shared__ int dists_i[block_size];
104 | 
105 |     int batch_index = blockIdx.x;
106 |     dataset += batch_index * n * 3;
107 |     temp += batch_index * n;
108 |     idxs += batch_index * m;
109 | 
110 |     int tid = threadIdx.x;
111 |     const int stride = block_size;
112 | 
113 |     int old = 0;
114 |     if (threadIdx.x == 0)
115 |     idxs[0] = old;
116 | 
117 |     __syncthreads();
118 |     for (int j = 1; j < m; j++) {
119 |     int besti = 0;
120 |     float best = -1;
121 |     float x1 = dataset[old * 3 + 0];
122 |     float y1 = dataset[old * 3 + 1];
123 |     float z1 = dataset[old * 3 + 2];
124 |     for (int k = tid; k < n; k += stride) {
125 |         float x2, y2, z2;
126 |         x2 = dataset[k * 3 + 0];
127 |         y2 = dataset[k * 3 + 1];
128 |         z2 = dataset[k * 3 + 2];
129 |         // float mag = (x2 * x2) + (y2 * y2) + (z2 * z2);
130 |         // if (mag <= 1e-3)
131 |         // continue;
132 | 
133 |         float d = (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1) + (z2 - z1) * (z2 - z1);
134 |         float d2 = min(d, temp[k]);
135 |         temp[k] = d2;
136 |         besti = d2 > best ? k : besti;
137 |         best = d2 > best ? d2 : best;
138 |     }
139 |     dists[tid] = best;
140 |     dists_i[tid] = besti;
141 |     __syncthreads();
142 | 
143 |     if (block_size >= 1024) {
144 |         if (tid < 512) {
145 |             __update(dists, dists_i, tid, tid + 512);
146 |         }
147 |         __syncthreads();
148 |     }
149 | 
150 |     if (block_size >= 512) {
151 |         if (tid < 256) {
152 |             __update(dists, dists_i, tid, tid + 256);
153 |         }
154 |         __syncthreads();
155 |     }
156 |     if (block_size >= 256) {
157 |         if (tid < 128) {
158 |             __update(dists, dists_i, tid, tid + 128);
159 |         }
160 |         __syncthreads();
161 |     }
162 |     if (block_size >= 128) {
163 |         if (tid < 64) {
164 |             __update(dists, dists_i, tid, tid + 64);
165 |         }
166 |         __syncthreads();
167 |     }
168 |     if (block_size >= 64) {
169 |         if (tid < 32) {
170 |             __update(dists, dists_i, tid, tid + 32);
171 |         }
172 |         __syncthreads();
173 |     }
174 |     if (block_size >= 32) {
175 |         if (tid < 16) {
176 |             __update(dists, dists_i, tid, tid + 16);
177 |         }
178 |         __syncthreads();
179 |     }
180 |     if (block_size >= 16) {
181 |         if (tid < 8) {
182 |             __update(dists, dists_i, tid, tid + 8);
183 |         }
184 |         __syncthreads();
185 |     }
186 |     if (block_size >= 8) {
187 |         if (tid < 4) {
188 |             __update(dists, dists_i, tid, tid + 4);
189 |         }
190 |         __syncthreads();
191 |     }
192 |     if (block_size >= 4) {
193 |         if (tid < 2) {
194 |             __update(dists, dists_i, tid, tid + 2);
195 |         }
196 |         __syncthreads();
197 |     }
198 |     if (block_size >= 2) {
199 |         if (tid < 1) {
200 |             __update(dists, dists_i, tid, tid + 1);
201 |         }
202 |         __syncthreads();
203 |     }
204 | 
205 |     old = dists_i[0];
206 |     if (tid == 0)
207 |         idxs[j] = old;
208 |     }
209 | }
210 | 
211 | void furthest_point_sampling_kernel_launcher(int b, int n, int m, 
212 |     const float *dataset, float *temp, int *idxs, cudaStream_t stream) {
213 |     // dataset: (B, N, 3)
214 |     // tmp: (B, N)
215 |     // output:
216 |     //      idx: (B, M)
217 | 
218 |     cudaError_t err;
219 |     unsigned int n_threads = opt_n_threads(n);
220 | 
221 |     switch (n_threads) {
222 |         case 1024:
223 |         furthest_point_sampling_kernel<1024><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
224 |         case 512:
225 |         furthest_point_sampling_kernel<512><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
226 |         case 256:
227 |         furthest_point_sampling_kernel<256><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
228 |         case 128:
229 |         furthest_point_sampling_kernel<128><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
230 |         case 64:
231 |         furthest_point_sampling_kernel<64><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
232 |         case 32:
233 |         furthest_point_sampling_kernel<32><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
234 |         case 16:
235 |         furthest_point_sampling_kernel<16><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
236 |         case 8:
237 |         furthest_point_sampling_kernel<8><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
238 |         case 4:
239 |         furthest_point_sampling_kernel<4><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
240 |         case 2:
241 |         furthest_point_sampling_kernel<2><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
242 |         case 1:
243 |         furthest_point_sampling_kernel<1><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
244 |         default:
245 |         furthest_point_sampling_kernel<512><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
246 |     }
247 | 
248 |     err = cudaGetLastError();
249 |     if (cudaSuccess != err) {
250 |         fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
251 |         exit(-1);
252 |     }
253 | }
254 | 


--------------------------------------------------------------------------------
/utils/furthestPointSampling/sampling_gpu.cu:
--------------------------------------------------------------------------------
  1 | #ifndef _CUDA_UTILS_H
  2 | #define _CUDA_UTILS_H
  3 | 
  4 | #include <stdio.h>
  5 | #include <stdlib.h>
  6 | #include <cmath>
  7 | #include "sampling_gpu.h"
  8 | 
  9 | #define TOTAL_THREADS 1024
 10 | #define THREADS_PER_BLOCK 256
 11 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
 12 | 
 13 | inline int opt_n_threads(int work_size) {
 14 |     const int pow_2 = std::log(static_cast<double>(work_size)) / std::log(2.0);
 15 | 
 16 |     return max(min(1 << pow_2, TOTAL_THREADS), 1);
 17 | }
 18 | #endif
 19 | 
 20 | 
 21 | __global__ void gather_points_kernel_fast(int b, int c, int n, int m, 
 22 |     const float *__restrict__ points, const int *__restrict__ idx, float *__restrict__ out) {
 23 |     // points: (B, C, N)
 24 |     // idx: (B, M)
 25 |     // output:
 26 |     //      out: (B, C, M)
 27 | 
 28 |     int bs_idx = blockIdx.z;
 29 |     int c_idx = blockIdx.y;
 30 |     int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
 31 |     if (bs_idx >= b || c_idx >= c || pt_idx >= m) return;
 32 | 
 33 |     out += bs_idx * c * m + c_idx * m + pt_idx;
 34 |     idx += bs_idx * m + pt_idx;
 35 |     points += bs_idx * c * n + c_idx * n;
 36 |     out[0] = points[idx[0]];
 37 | }
 38 | 
 39 | void gather_points_kernel_launcher_fast(int b, int c, int n, int npoints, 
 40 |     const float *points, const int *idx, float *out, cudaStream_t stream) {
 41 |     // points: (B, C, N)
 42 |     // idx: (B, npoints)
 43 |     // output:
 44 |     //      out: (B, C, npoints)
 45 | 
 46 |     cudaError_t err;
 47 |     dim3 blocks(DIVUP(npoints, THREADS_PER_BLOCK), c, b);  // blockIdx.x(col), blockIdx.y(row)
 48 |     dim3 threads(THREADS_PER_BLOCK);
 49 | 
 50 |     gather_points_kernel_fast<<<blocks, threads, 0, stream>>>(b, c, n, npoints, points, idx, out);
 51 | 
 52 |     err = cudaGetLastError();
 53 |     if (cudaSuccess != err) {
 54 |         fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
 55 |         exit(-1);
 56 |     }
 57 | }
 58 | 
 59 | __global__ void gather_points_grad_kernel_fast(int b, int c, int n, int m, const float *__restrict__ grad_out, 
 60 |     const int *__restrict__ idx, float *__restrict__ grad_points) {
 61 |     // grad_out: (B, C, M)
 62 |     // idx: (B, M)
 63 |     // output:
 64 |     //      grad_points: (B, C, N)
 65 | 
 66 |     int bs_idx = blockIdx.z;
 67 |     int c_idx = blockIdx.y;
 68 |     int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
 69 |     if (bs_idx >= b || c_idx >= c || pt_idx >= m) return;
 70 | 
 71 |     grad_out += bs_idx * c * m + c_idx * m + pt_idx;
 72 |     idx += bs_idx * m + pt_idx;
 73 |     grad_points += bs_idx * c * n + c_idx * n;
 74 | 
 75 |     atomicAdd(grad_points + idx[0], grad_out[0]);
 76 | }
 77 | 
 78 | void gather_points_grad_kernel_launcher_fast(int b, int c, int n, int npoints, 
 79 |     const float *grad_out, const int *idx, float *grad_points, cudaStream_t stream) {
 80 |     // grad_out: (B, C, npoints)
 81 |     // idx: (B, npoints)
 82 |     // output:
 83 |     //      grad_points: (B, C, N)
 84 | 
 85 |     cudaError_t err;
 86 |     dim3 blocks(DIVUP(npoints, THREADS_PER_BLOCK), c, b);  // blockIdx.x(col), blockIdx.y(row)
 87 |     dim3 threads(THREADS_PER_BLOCK);
 88 | 
 89 |     gather_points_grad_kernel_fast<<<blocks, threads, 0, stream>>>(b, c, n, npoints, grad_out, idx, grad_points);
 90 | 
 91 |     err = cudaGetLastError();
 92 |     if (cudaSuccess != err) {
 93 |         fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
 94 |         exit(-1);
 95 |     }
 96 | }
 97 | 
 98 | 
 99 | __device__ void __update(float *__restrict__ dists, int *__restrict__ dists_i, int idx1, int idx2){
100 |     const float v1 = dists[idx1], v2 = dists[idx2];
101 |     const int i1 = dists_i[idx1], i2 = dists_i[idx2];
102 |     dists[idx1] = max(v1, v2);
103 |     dists_i[idx1] = v2 > v1 ? i2 : i1;
104 | }
105 | 
106 | template <unsigned int block_size>
107 | __global__ void furthest_point_sampling_kernel(int b, int n, int m, 
108 |     const float *__restrict__ dataset, float *__restrict__ temp, int *__restrict__ idxs) {
109 |     // dataset: (B, N, 3)
110 |     // tmp: (B, N)
111 |     // output:
112 |     //      idx: (B, M)
113 | 
114 |     if (m <= 0) return;
115 |     __shared__ float dists[block_size];
116 |     __shared__ int dists_i[block_size];
117 | 
118 |     int batch_index = blockIdx.x;
119 |     dataset += batch_index * n * 3;
120 |     temp += batch_index * n;
121 |     idxs += batch_index * m;
122 | 
123 |     int tid = threadIdx.x;
124 |     const int stride = block_size;
125 | 
126 |     int old = 0;
127 |     if (threadIdx.x == 0)
128 |     idxs[0] = old;
129 | 
130 |     __syncthreads();
131 |     for (int j = 1; j < m; j++) {
132 |     int besti = 0;
133 |     float best = -1;
134 |     float x1 = dataset[old * 3 + 0];
135 |     float y1 = dataset[old * 3 + 1];
136 |     float z1 = dataset[old * 3 + 2];
137 |     for (int k = tid; k < n; k += stride) {
138 |         float x2, y2, z2;
139 |         x2 = dataset[k * 3 + 0];
140 |         y2 = dataset[k * 3 + 1];
141 |         z2 = dataset[k * 3 + 2];
142 |         // float mag = (x2 * x2) + (y2 * y2) + (z2 * z2);
143 |         // if (mag <= 1e-3)
144 |         // continue;
145 | 
146 |         float d = (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1) + (z2 - z1) * (z2 - z1);
147 |         float d2 = min(d, temp[k]);
148 |         temp[k] = d2;
149 |         besti = d2 > best ? k : besti;
150 |         best = d2 > best ? d2 : best;
151 |     }
152 |     dists[tid] = best;
153 |     dists_i[tid] = besti;
154 |     __syncthreads();
155 | 
156 |     if (block_size >= 1024) {
157 |         if (tid < 512) {
158 |             __update(dists, dists_i, tid, tid + 512);
159 |         }
160 |         __syncthreads();
161 |     }
162 | 
163 |     if (block_size >= 512) {
164 |         if (tid < 256) {
165 |             __update(dists, dists_i, tid, tid + 256);
166 |         }
167 |         __syncthreads();
168 |     }
169 |     if (block_size >= 256) {
170 |         if (tid < 128) {
171 |             __update(dists, dists_i, tid, tid + 128);
172 |         }
173 |         __syncthreads();
174 |     }
175 |     if (block_size >= 128) {
176 |         if (tid < 64) {
177 |             __update(dists, dists_i, tid, tid + 64);
178 |         }
179 |         __syncthreads();
180 |     }
181 |     if (block_size >= 64) {
182 |         if (tid < 32) {
183 |             __update(dists, dists_i, tid, tid + 32);
184 |         }
185 |         __syncthreads();
186 |     }
187 |     if (block_size >= 32) {
188 |         if (tid < 16) {
189 |             __update(dists, dists_i, tid, tid + 16);
190 |         }
191 |         __syncthreads();
192 |     }
193 |     if (block_size >= 16) {
194 |         if (tid < 8) {
195 |             __update(dists, dists_i, tid, tid + 8);
196 |         }
197 |         __syncthreads();
198 |     }
199 |     if (block_size >= 8) {
200 |         if (tid < 4) {
201 |             __update(dists, dists_i, tid, tid + 4);
202 |         }
203 |         __syncthreads();
204 |     }
205 |     if (block_size >= 4) {
206 |         if (tid < 2) {
207 |             __update(dists, dists_i, tid, tid + 2);
208 |         }
209 |         __syncthreads();
210 |     }
211 |     if (block_size >= 2) {
212 |         if (tid < 1) {
213 |             __update(dists, dists_i, tid, tid + 1);
214 |         }
215 |         __syncthreads();
216 |     }
217 | 
218 |     old = dists_i[0];
219 |     if (tid == 0)
220 |         idxs[j] = old;
221 |     }
222 | }
223 | 
224 | void furthest_point_sampling_kernel_launcher(int b, int n, int m, 
225 |     const float *dataset, float *temp, int *idxs, cudaStream_t stream) {
226 |     // dataset: (B, N, 3)
227 |     // tmp: (B, N)
228 |     // output:
229 |     //      idx: (B, M)
230 | 
231 |     cudaError_t err;
232 |     unsigned int n_threads = opt_n_threads(n);
233 | 
234 |     switch (n_threads) {
235 |         case 1024:
236 |         furthest_point_sampling_kernel<1024><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
237 |         case 512:
238 |         furthest_point_sampling_kernel<512><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
239 |         case 256:
240 |         furthest_point_sampling_kernel<256><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
241 |         case 128:
242 |         furthest_point_sampling_kernel<128><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
243 |         case 64:
244 |         furthest_point_sampling_kernel<64><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
245 |         case 32:
246 |         furthest_point_sampling_kernel<32><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
247 |         case 16:
248 |         furthest_point_sampling_kernel<16><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
249 |         case 8:
250 |         furthest_point_sampling_kernel<8><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
251 |         case 4:
252 |         furthest_point_sampling_kernel<4><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
253 |         case 2:
254 |         furthest_point_sampling_kernel<2><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
255 |         case 1:
256 |         furthest_point_sampling_kernel<1><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
257 |         default:
258 |         furthest_point_sampling_kernel<512><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
259 |     }
260 | 
261 |     err = cudaGetLastError();
262 |     if (cudaSuccess != err) {
263 |         fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
264 |         exit(-1);
265 |     }
266 | }


--------------------------------------------------------------------------------
/utils/Pointnet2.PyTorch/tools/kitti_utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from scipy.spatial import Delaunay
  3 | import scipy
  4 | 
  5 | 
  6 | def cls_type_to_id(cls_type):
  7 |     type_to_id = {'Car': 1, 'Pedestrian': 2, 'Cyclist': 3, 'Van': 4}
  8 |     if cls_type not in type_to_id.keys():
  9 |         return -1
 10 |     return type_to_id[cls_type]
 11 | 
 12 | 
 13 | class Object3d(object):
 14 |     def __init__(self, line):
 15 |         label = line.strip().split(' ')
 16 |         self.src = line
 17 |         self.cls_type = label[0]
 18 |         self.cls_id = cls_type_to_id(self.cls_type)
 19 |         self.trucation = float(label[1])
 20 |         self.occlusion = float(label[2])  # 0:fully visible 1:partly occluded 2:largely occluded 3:unknown
 21 |         self.alpha = float(label[3])
 22 |         self.box2d = np.array((float(label[4]), float(label[5]), float(label[6]), float(label[7])), dtype=np.float32)
 23 |         self.h = float(label[8])
 24 |         self.w = float(label[9])
 25 |         self.l = float(label[10])
 26 |         self.pos = np.array((float(label[11]), float(label[12]), float(label[13])), dtype=np.float32)
 27 |         self.dis_to_cam = np.linalg.norm(self.pos)
 28 |         self.ry = float(label[14])
 29 |         self.score = float(label[15]) if label.__len__() == 16 else -1.0
 30 |         self.level_str = None
 31 |         self.level = self.get_obj_level()
 32 | 
 33 |     def get_obj_level(self):
 34 |         height = float(self.box2d[3]) - float(self.box2d[1]) + 1
 35 | 
 36 |         if height >= 40 and self.trucation <= 0.15 and self.occlusion <= 0:
 37 |             self.level_str = 'Easy'
 38 |             return 1  # Easy
 39 |         elif height >= 25 and self.trucation <= 0.3 and self.occlusion <= 1:
 40 |             self.level_str = 'Moderate'
 41 |             return 2  # Moderate
 42 |         elif height >= 25 and self.trucation <= 0.5 and self.occlusion <= 2:
 43 |             self.level_str = 'Hard'
 44 |             return 3  # Hard
 45 |         else:
 46 |             self.level_str = 'UnKnown'
 47 |             return 4
 48 | 
 49 |     def generate_corners3d(self):
 50 |         """
 51 |         generate corners3d representation for this object
 52 |         :return corners_3d: (8, 3) corners of box3d in camera coord
 53 |         """
 54 |         l, h, w = self.l, self.h, self.w
 55 |         x_corners = [l / 2, l / 2, -l / 2, -l / 2, l / 2, l / 2, -l / 2, -l / 2]
 56 |         y_corners = [0, 0, 0, 0, -h, -h, -h, -h]
 57 |         z_corners = [w / 2, -w / 2, -w / 2, w / 2, w / 2, -w / 2, -w / 2, w / 2]
 58 | 
 59 |         R = np.array([[np.cos(self.ry), 0, np.sin(self.ry)],
 60 |                       [0, 1, 0],
 61 |                       [-np.sin(self.ry), 0, np.cos(self.ry)]])
 62 |         corners3d = np.vstack([x_corners, y_corners, z_corners])  # (3, 8)
 63 |         corners3d = np.dot(R, corners3d).T
 64 |         corners3d = corners3d + self.pos
 65 |         return corners3d
 66 | 
 67 |     def to_str(self):
 68 |         print_str = '%s %.3f %.3f %.3f box2d: %s hwl: [%.3f %.3f %.3f] pos: %s ry: %.3f' \
 69 |                      % (self.cls_type, self.trucation, self.occlusion, self.alpha, self.box2d, self.h, self.w, self.l,
 70 |                         self.pos, self.ry)
 71 |         return print_str
 72 | 
 73 |     def to_kitti_format(self):
 74 |         kitti_str = '%s %.2f %d %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f' \
 75 |                     % (self.cls_type, self.trucation, int(self.occlusion), self.alpha, self.box2d[0], self.box2d[1],
 76 |                        self.box2d[2], self.box2d[3], self.h, self.w, self.l, self.pos[0], self.pos[1], self.pos[2],
 77 |                        self.ry)
 78 |         return kitti_str
 79 | 
 80 | 
 81 | def get_calib_from_file(calib_file):
 82 |     with open(calib_file) as f:
 83 |         lines = f.readlines()
 84 | 
 85 |     obj = lines[2].strip().split(' ')[1:]
 86 |     P2 = np.array(obj, dtype=np.float32)
 87 |     obj = lines[3].strip().split(' ')[1:]
 88 |     P3 = np.array(obj, dtype=np.float32)
 89 |     obj = lines[4].strip().split(' ')[1:]
 90 |     R0 = np.array(obj, dtype=np.float32)
 91 |     obj = lines[5].strip().split(' ')[1:]
 92 |     Tr_velo_to_cam = np.array(obj, dtype=np.float32)
 93 | 
 94 |     return {'P2': P2.reshape(3, 4),
 95 |             'P3': P3.reshape(3, 4),
 96 |             'R0': R0.reshape(3, 3),
 97 |             'Tr_velo2cam': Tr_velo_to_cam.reshape(3, 4)}
 98 | 
 99 | 
100 | class Calibration(object):
101 |     def __init__(self, calib_file):
102 |         if isinstance(calib_file, str):
103 |             calib = get_calib_from_file(calib_file)
104 |         else:
105 |             calib = calib_file
106 | 
107 |         self.P2 = calib['P2']  # 3 x 4
108 |         self.R0 = calib['R0']  # 3 x 3
109 |         self.V2C = calib['Tr_velo2cam']  # 3 x 4
110 | 
111 |     def cart_to_hom(self, pts):
112 |         """
113 |         :param pts: (N, 3 or 2)
114 |         :return pts_hom: (N, 4 or 3)
115 |         """
116 |         pts_hom = np.hstack((pts, np.ones((pts.shape[0], 1), dtype=np.float32)))
117 |         return pts_hom
118 | 
119 |     def lidar_to_rect(self, pts_lidar):
120 |         """
121 |         :param pts_lidar: (N, 3)
122 |         :return pts_rect: (N, 3)
123 |         """
124 |         pts_lidar_hom = self.cart_to_hom(pts_lidar)
125 |         pts_rect = np.dot(pts_lidar_hom, np.dot(self.V2C.T, self.R0.T))
126 |         return pts_rect
127 | 
128 |     def rect_to_img(self, pts_rect):
129 |         """
130 |         :param pts_rect: (N, 3)
131 |         :return pts_img: (N, 2)
132 |         """
133 |         pts_rect_hom = self.cart_to_hom(pts_rect)
134 |         pts_2d_hom = np.dot(pts_rect_hom, self.P2.T)
135 |         pts_img = (pts_2d_hom[:, 0:2].T / pts_rect_hom[:, 2]).T  # (N, 2)
136 |         pts_rect_depth = pts_2d_hom[:, 2] - self.P2.T[3, 2]  # depth in rect camera coord
137 |         return pts_img, pts_rect_depth
138 | 
139 |     def lidar_to_img(self, pts_lidar):
140 |         """
141 |         :param pts_lidar: (N, 3)
142 |         :return pts_img: (N, 2)
143 |         """
144 |         pts_rect = self.lidar_to_rect(pts_lidar)
145 |         pts_img, pts_depth = self.rect_to_img(pts_rect)
146 |         return pts_img, pts_depth
147 | 
148 | 
149 | def get_objects_from_label(label_file):
150 |     with open(label_file, 'r') as f:
151 |         lines = f.readlines()
152 |     objects = [Object3d(line) for line in lines]
153 |     return objects
154 | 
155 | 
156 | def objs_to_boxes3d(obj_list):
157 |     boxes3d = np.zeros((obj_list.__len__(), 7), dtype=np.float32)
158 |     for k, obj in enumerate(obj_list):
159 |         boxes3d[k, 0:3], boxes3d[k, 3], boxes3d[k, 4], boxes3d[k, 5], boxes3d[k, 6] \
160 |             = obj.pos, obj.h, obj.w, obj.l, obj.ry
161 |     return boxes3d
162 | 
163 | 
164 | def boxes3d_to_corners3d(boxes3d, rotate=True):
165 |     """
166 |     :param boxes3d: (N, 7) [x, y, z, h, w, l, ry]
167 |     :param rotate:
168 |     :return: corners3d: (N, 8, 3)
169 |     """
170 |     boxes_num = boxes3d.shape[0]
171 |     h, w, l = boxes3d[:, 3], boxes3d[:, 4], boxes3d[:, 5]
172 |     x_corners = np.array([l / 2., l / 2., -l / 2., -l / 2., l / 2., l / 2., -l / 2., -l / 2.], dtype=np.float32).T  # (N, 8)
173 |     z_corners = np.array([w / 2., -w / 2., -w / 2., w / 2., w / 2., -w / 2., -w / 2., w / 2.], dtype=np.float32).T  # (N, 8)
174 | 
175 |     y_corners = np.zeros((boxes_num, 8), dtype=np.float32)
176 |     y_corners[:, 4:8] = -h.reshape(boxes_num, 1).repeat(4, axis=1)  # (N, 8)
177 | 
178 |     if rotate:
179 |         ry = boxes3d[:, 6]
180 |         zeros, ones = np.zeros(ry.size, dtype=np.float32), np.ones(ry.size, dtype=np.float32)
181 |         rot_list = np.array([[np.cos(ry), zeros, -np.sin(ry)],
182 |                              [zeros,       ones,       zeros],
183 |                              [np.sin(ry), zeros,  np.cos(ry)]])  # (3, 3, N)
184 |         R_list = np.transpose(rot_list, (2, 0, 1))  # (N, 3, 3)
185 | 
186 |         temp_corners = np.concatenate((x_corners.reshape(-1, 8, 1), y_corners.reshape(-1, 8, 1),
187 |                                        z_corners.reshape(-1, 8, 1)), axis=2)  # (N, 8, 3)
188 |         rotated_corners = np.matmul(temp_corners, R_list)  # (N, 8, 3)
189 |         x_corners, y_corners, z_corners = rotated_corners[:, :, 0], rotated_corners[:, :, 1], rotated_corners[:, :, 2]
190 | 
191 |     x_loc, y_loc, z_loc = boxes3d[:, 0], boxes3d[:, 1], boxes3d[:, 2]
192 | 
193 |     x = x_loc.reshape(-1, 1) + x_corners.reshape(-1, 8)
194 |     y = y_loc.reshape(-1, 1) + y_corners.reshape(-1, 8)
195 |     z = z_loc.reshape(-1, 1) + z_corners.reshape(-1, 8)
196 | 
197 |     corners = np.concatenate((x.reshape(-1, 8, 1), y.reshape(-1, 8, 1), z.reshape(-1, 8, 1)), axis=2)
198 | 
199 |     return corners.astype(np.float32)
200 | 
201 | 
202 | def enlarge_box3d(boxes3d, extra_width):
203 |     """
204 |     :param boxes3d: (N, 7) [x, y, z, h, w, l, ry]
205 |     """
206 |     if isinstance(boxes3d, np.ndarray):
207 |         large_boxes3d = boxes3d.copy()
208 |     else:
209 |         large_boxes3d = boxes3d.clone()
210 |     large_boxes3d[:, 3:6] += extra_width * 2
211 |     large_boxes3d[:, 1] += extra_width
212 |     return large_boxes3d
213 | 
214 | 
215 | def in_hull(p, hull):
216 |     """
217 |     :param p: (N, K) test points
218 |     :param hull: (M, K) M corners of a box
219 |     :return (N) bool
220 |     """
221 |     try:
222 |         if not isinstance(hull, Delaunay):
223 |             hull = Delaunay(hull)
224 |         flag = hull.find_simplex(p) >= 0
225 |     except scipy.spatial.qhull.QhullError:
226 |         print('Warning: not a hull %s' % str(hull))
227 |         flag = np.zeros(p.shape[0], dtype=np.bool)
228 | 
229 |     return flag
230 | 


--------------------------------------------------------------------------------
/utils/Pointnet2.PyTorch/pointnet2/pointnet2_utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch.autograd import Variable
  3 | from torch.autograd import Function
  4 | import torch.nn as nn
  5 | from typing import Tuple
  6 | 
  7 | import pointnet2_cuda as pointnet2
  8 | 
  9 | 
 10 | class FurthestPointSampling(Function):
 11 |     @staticmethod
 12 |     def forward(ctx, xyz: torch.Tensor, npoint: int) -> torch.Tensor:
 13 |         """
 14 |         Uses iterative furthest point sampling to select a set of npoint features that have the largest
 15 |         minimum distance
 16 |         :param ctx:
 17 |         :param xyz: (B, N, 3) where N > npoint
 18 |         :param npoint: int, number of features in the sampled set
 19 |         :return:
 20 |              output: (B, npoint) tensor containing the set
 21 |         """
 22 |         assert xyz.is_contiguous()
 23 | 
 24 |         B, N, _ = xyz.size()
 25 |         output = torch.cuda.IntTensor(B, npoint)
 26 |         temp = torch.cuda.FloatTensor(B, N).fill_(1e10)
 27 | 
 28 |         pointnet2.furthest_point_sampling_wrapper(B, N, npoint, xyz, temp, output)
 29 |         return output
 30 | 
 31 |     @staticmethod
 32 |     def backward(xyz, a=None):
 33 |         return None, None
 34 | 
 35 | 
 36 | furthest_point_sample = FurthestPointSampling.apply
 37 | 
 38 | 
 39 | class GatherOperation(Function):
 40 | 
 41 |     @staticmethod
 42 |     def forward(ctx, features: torch.Tensor, idx: torch.Tensor) -> torch.Tensor:
 43 |         """
 44 |         :param ctx:
 45 |         :param features: (B, C, N)
 46 |         :param idx: (B, npoint) index tensor of the features to gather
 47 |         :return:
 48 |             output: (B, C, npoint)
 49 |         """
 50 |         assert features.is_contiguous()
 51 |         assert idx.is_contiguous()
 52 | 
 53 |         B, npoint = idx.size()
 54 |         _, C, N = features.size()
 55 |         output = torch.cuda.FloatTensor(B, C, npoint)
 56 | 
 57 |         pointnet2.gather_points_wrapper(B, C, N, npoint, features, idx, output)
 58 | 
 59 |         ctx.for_backwards = (idx, C, N)
 60 |         return output
 61 | 
 62 |     @staticmethod
 63 |     def backward(ctx, grad_out):
 64 |         idx, C, N = ctx.for_backwards
 65 |         B, npoint = idx.size()
 66 | 
 67 |         grad_features = Variable(torch.cuda.FloatTensor(B, C, N).zero_())
 68 |         grad_out_data = grad_out.data.contiguous()
 69 |         pointnet2.gather_points_grad_wrapper(B, C, N, npoint, grad_out_data, idx, grad_features.data)
 70 |         return grad_features, None
 71 | 
 72 | 
 73 | gather_operation = GatherOperation.apply
 74 | 
 75 | 
 76 | class ThreeNN(Function):
 77 | 
 78 |     @staticmethod
 79 |     def forward(ctx, unknown: torch.Tensor, known: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
 80 |         """
 81 |         Find the three nearest neighbors of unknown in known
 82 |         :param ctx:
 83 |         :param unknown: (B, N, 3)
 84 |         :param known: (B, M, 3)
 85 |         :return:
 86 |             dist: (B, N, 3) l2 distance to the three nearest neighbors
 87 |             idx: (B, N, 3) index of 3 nearest neighbors
 88 |         """
 89 |         assert unknown.is_contiguous()
 90 |         assert known.is_contiguous()
 91 | 
 92 |         B, N, _ = unknown.size()
 93 |         m = known.size(1)
 94 |         dist2 = torch.cuda.FloatTensor(B, N, 3)
 95 |         idx = torch.cuda.IntTensor(B, N, 3)
 96 | 
 97 |         pointnet2.three_nn_wrapper(B, N, m, unknown, known, dist2, idx)
 98 |         return torch.sqrt(dist2), idx
 99 | 
100 |     @staticmethod
101 |     def backward(ctx, a=None, b=None):
102 |         return None, None
103 | 
104 | 
105 | three_nn = ThreeNN.apply
106 | 
107 | 
108 | class ThreeInterpolate(Function):
109 | 
110 |     @staticmethod
111 |     def forward(ctx, features: torch.Tensor, idx: torch.Tensor, weight: torch.Tensor) -> torch.Tensor:
112 |         """
113 |         Performs weight linear interpolation on 3 features
114 |         :param ctx:
115 |         :param features: (B, C, M) Features descriptors to be interpolated from
116 |         :param idx: (B, n, 3) three nearest neighbors of the target features in features
117 |         :param weight: (B, n, 3) weights
118 |         :return:
119 |             output: (B, C, N) tensor of the interpolated features
120 |         """
121 |         assert features.is_contiguous()
122 |         assert idx.is_contiguous()
123 |         assert weight.is_contiguous()
124 | 
125 |         B, c, m = features.size()
126 |         n = idx.size(1)
127 |         ctx.three_interpolate_for_backward = (idx, weight, m)
128 |         output = torch.cuda.FloatTensor(B, c, n)
129 | 
130 |         pointnet2.three_interpolate_wrapper(B, c, m, n, features, idx, weight, output)
131 |         return output
132 | 
133 |     @staticmethod
134 |     def backward(ctx, grad_out: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
135 |         """
136 |         :param ctx:
137 |         :param grad_out: (B, C, N) tensor with gradients of outputs
138 |         :return:
139 |             grad_features: (B, C, M) tensor with gradients of features
140 |             None:
141 |             None:
142 |         """
143 |         idx, weight, m = ctx.three_interpolate_for_backward
144 |         B, c, n = grad_out.size()
145 | 
146 |         grad_features = Variable(torch.cuda.FloatTensor(B, c, m).zero_())
147 |         grad_out_data = grad_out.data.contiguous()
148 | 
149 |         pointnet2.three_interpolate_grad_wrapper(B, c, n, m, grad_out_data, idx, weight, grad_features.data)
150 |         return grad_features, None, None
151 | 
152 | 
153 | three_interpolate = ThreeInterpolate.apply
154 | 
155 | 
156 | class GroupingOperation(Function):
157 | 
158 |     @staticmethod
159 |     def forward(ctx, features: torch.Tensor, idx: torch.Tensor) -> torch.Tensor:
160 |         """
161 |         :param ctx:
162 |         :param features: (B, C, N) tensor of features to group
163 |         :param idx: (B, npoint, nsample) tensor containing the indicies of features to group with
164 |         :return:
165 |             output: (B, C, npoint, nsample) tensor
166 |         """
167 |         assert features.is_contiguous()
168 |         assert idx.is_contiguous()
169 | 
170 |         B, nfeatures, nsample = idx.size()
171 |         _, C, N = features.size()
172 |         output = torch.cuda.FloatTensor(B, C, nfeatures, nsample)
173 | 
174 |         pointnet2.group_points_wrapper(B, C, N, nfeatures, nsample, features, idx, output)
175 | 
176 |         ctx.for_backwards = (idx, N)
177 |         return output
178 | 
179 |     @staticmethod
180 |     def backward(ctx, grad_out: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
181 |         """
182 |         :param ctx:
183 |         :param grad_out: (B, C, npoint, nsample) tensor of the gradients of the output from forward
184 |         :return:
185 |             grad_features: (B, C, N) gradient of the features
186 |         """
187 |         idx, N = ctx.for_backwards
188 | 
189 |         B, C, npoint, nsample = grad_out.size()
190 |         grad_features = Variable(torch.cuda.FloatTensor(B, C, N).zero_())
191 | 
192 |         grad_out_data = grad_out.data.contiguous()
193 |         pointnet2.group_points_grad_wrapper(B, C, N, npoint, nsample, grad_out_data, idx, grad_features.data)
194 |         return grad_features, None
195 | 
196 | 
197 | grouping_operation = GroupingOperation.apply
198 | 
199 | 
200 | class BallQuery(Function):
201 | 
202 |     @staticmethod
203 |     def forward(ctx, radius: float, nsample: int, xyz: torch.Tensor, new_xyz: torch.Tensor) -> torch.Tensor:
204 |         """
205 |         :param ctx:
206 |         :param radius: float, radius of the balls
207 |         :param nsample: int, maximum number of features in the balls
208 |         :param xyz: (B, N, 3) xyz coordinates of the features
209 |         :param new_xyz: (B, npoint, 3) centers of the ball query
210 |         :return:
211 |             idx: (B, npoint, nsample) tensor with the indicies of the features that form the query balls
212 |         """
213 |         assert new_xyz.is_contiguous()
214 |         assert xyz.is_contiguous()
215 | 
216 |         B, N, _ = xyz.size()
217 |         npoint = new_xyz.size(1)
218 |         idx = torch.cuda.IntTensor(B, npoint, nsample).zero_()
219 | 
220 |         pointnet2.ball_query_wrapper(B, N, npoint, radius, nsample, new_xyz, xyz, idx)
221 |         return idx
222 | 
223 |     @staticmethod
224 |     def backward(ctx, a=None):
225 |         return None, None, None, None
226 | 
227 | 
228 | ball_query = BallQuery.apply
229 | 
230 | 
231 | class QueryAndGroup(nn.Module):
232 |     def __init__(self, radius: float, nsample: int, use_xyz: bool = True):
233 |         """
234 |         :param radius: float, radius of ball
235 |         :param nsample: int, maximum number of features to gather in the ball
236 |         :param use_xyz:
237 |         """
238 |         super().__init__()
239 |         self.radius, self.nsample, self.use_xyz = radius, nsample, use_xyz
240 | 
241 |     def forward(self, xyz: torch.Tensor, new_xyz: torch.Tensor, features: torch.Tensor = None) -> Tuple[torch.Tensor]:
242 |         """
243 |         :param xyz: (B, N, 3) xyz coordinates of the features
244 |         :param new_xyz: (B, npoint, 3) centroids
245 |         :param features: (B, C, N) descriptors of the features
246 |         :return:
247 |             new_features: (B, 3 + C, npoint, nsample)
248 |         """
249 |         idx = ball_query(self.radius, self.nsample, xyz, new_xyz)
250 |         xyz_trans = xyz.transpose(1, 2).contiguous()
251 |         grouped_xyz = grouping_operation(xyz_trans, idx)  # (B, 3, npoint, nsample)
252 |         grouped_xyz -= new_xyz.transpose(1, 2).unsqueeze(-1)
253 | 
254 |         if features is not None:
255 |             grouped_features = grouping_operation(features, idx)
256 |             if self.use_xyz:
257 |                 new_features = torch.cat([grouped_xyz, grouped_features], dim=1)  # (B, C + 3, npoint, nsample)
258 |             else:
259 |                 new_features = grouped_features
260 |         else:
261 |             assert self.use_xyz, "Cannot have not features and not use xyz as a feature!"
262 |             new_features = grouped_xyz
263 | 
264 |         return new_features
265 | 
266 | 
267 | class GroupAll(nn.Module):
268 |     def __init__(self, use_xyz: bool = True):
269 |         super().__init__()
270 |         self.use_xyz = use_xyz
271 | 
272 |     def forward(self, xyz: torch.Tensor, new_xyz: torch.Tensor, features: torch.Tensor = None):
273 |         """
274 |         :param xyz: (B, N, 3) xyz coordinates of the features
275 |         :param new_xyz: ignored
276 |         :param features: (B, C, N) descriptors of the features
277 |         :return:
278 |             new_features: (B, C + 3, 1, N)
279 |         """
280 |         grouped_xyz = xyz.transpose(1, 2).unsqueeze(2)
281 |         if features is not None:
282 |             grouped_features = features.unsqueeze(2)
283 |             if self.use_xyz:
284 |                 new_features = torch.cat([grouped_xyz, grouped_features], dim=1)  # (B, 3 + C, 1, N)
285 |             else:
286 |                 new_features = grouped_features
287 |         else:
288 |             new_features = grouped_xyz
289 | 
290 |         return new_features
291 | 


--------------------------------------------------------------------------------
/models/latent_3d_points/autoencoder.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import torch
  3 | import torch.utils.data
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | import os
  7 | import time
  8 | from datetime import datetime, timedelta
  9 | from visdom import Visdom
 10 | 
 11 | from utils.utils import *
 12 | from utils.data_loader import ShapeNetCorev2PC2048Dataset
 13 | 
 14 | 
 15 | class Encoder(nn.Module):
 16 |     def __init__(self, bneck_size):
 17 |         super(Encoder, self).__init__()
 18 |         self.conv1 = nn.Conv1d(3, 64, 1)
 19 |         self.conv2 = nn.Conv1d(64, 128, 1)
 20 |         self.conv3 = nn.Conv1d(128, 128, 1)
 21 |         self.conv4 = nn.Conv1d(128, 256, 1)
 22 |         self.conv5 = nn.Conv1d(256, bneck_size, 1)
 23 | 
 24 |         self.bn1 = nn.BatchNorm1d(64)
 25 |         self.bn2 = nn.BatchNorm1d(128)
 26 |         self.bn3 = nn.BatchNorm1d(128)
 27 |         self.bn4 = nn.BatchNorm1d(256)
 28 |         self.bn5 = nn.BatchNorm1d(bneck_size)
 29 | 
 30 |     def forward(self, x):
 31 |         x = x.transpose(1, 2).contiguous()
 32 |         x = F.relu(self.bn1(self.conv1(x)))
 33 |         x = F.relu(self.bn2(self.conv2(x)))
 34 |         x = F.relu(self.bn3(self.conv3(x)))
 35 |         x = F.relu(self.bn4(self.conv4(x)))
 36 |         x = self.bn5(self.conv5(x))
 37 |         x, _ = torch.max(x, dim=2)
 38 |         return x
 39 | 
 40 | 
 41 | class Decoder(nn.Module):
 42 |     def __init__(self, bneck_size):
 43 |         super(Decoder, self).__init__()
 44 |         self.fc1 = nn.Linear(bneck_size, 256)
 45 |         self.fc2 = nn.Linear(256, 256)
 46 |         self.fc3 = nn.Linear(256, 2048*3)
 47 | 
 48 |     def forward(self, x):
 49 |         x = F.relu(self.fc1(x))
 50 |         x = F.relu(self.fc2(x))
 51 |         x = self.fc3(x)
 52 |         x = x.view(-1, 2048, 3)
 53 |         return x
 54 | 
 55 | 
 56 | class AXform(nn.Module):
 57 |     def __init__(self, K1, K2, N):
 58 |         super(AXform, self).__init__()
 59 |         self.K1 = K1
 60 |         self.K2 = K2
 61 |         self.N = N  # N>=K2
 62 | 
 63 |         self.fc1 = nn.Linear(K1, N*K2)
 64 | 
 65 |         self.conv1 = nn.Conv1d(K2, 64, 1)
 66 |         self.conv2 = nn.Conv1d(64, 128, 1)
 67 |         self.conv3 = nn.Conv1d(128, 2048, 1)
 68 |         self.bn1 = nn.BatchNorm1d(64)
 69 |         self.bn2 = nn.BatchNorm1d(128)
 70 |         self.softmax = nn.Softmax(dim=2)
 71 | 
 72 |         self.conv4 = nn.Conv1d(K2, 3, 1)
 73 | 
 74 |     def forward(self, x):
 75 |         x = self.fc1(x)
 76 |         x = x.view(-1, self.N, self.K2)
 77 | 
 78 |         x_base = x
 79 |         x = x.transpose(1, 2).contiguous()
 80 |         x = F.relu(self.bn1(self.conv1(x)))
 81 |         x = F.relu(self.bn2(self.conv2(x)))
 82 |         x = self.conv3(x)
 83 |         x_weights = self.softmax(x)
 84 |         x = torch.bmm(x_weights, x_base)
 85 | 
 86 |         x = x.transpose(1, 2).contiguous()
 87 |         x = self.conv4(x)
 88 |         x = x.transpose(1, 2).contiguous()
 89 |         return x
 90 | 
 91 | 
 92 | class Network(nn.Module):
 93 |     def __init__(self, opt):
 94 |         super(Network, self).__init__()
 95 |         self.encoder = Encoder(bneck_size=128)
 96 | 
 97 |         if opt.method == 'original':
 98 |             self.decoder = Decoder(bneck_size=128)
 99 |         if opt.method == 'axform':
100 |             self.decoder = AXform(K1=128, K2=32, N=128)
101 | 
102 |     def forward(self, x):
103 |         x_latent = self.encoder(x)
104 |         x = self.decoder(x_latent)
105 |         return [x]
106 | 
107 | 
108 | class Runner:
109 |     def __init__(self, opt):
110 |         super(Runner, self).__init__()
111 |         self.opt = opt
112 |         self.network = torch.nn.DataParallel(Network(opt).to(opt.device), device_ids=opt.gpu_ids)
113 |         self.display_id = opt.display_id
114 |         self.vis = Visdom(env='%s' % self.display_id+'_'+str(opt.gpu_ids[0]))
115 | 
116 |         self.loss_cd = L2_ChamferLoss()
117 |         self.eval_cd = L2_ChamferEval()
118 |         # self.loss_emd = EMDLoss()
119 |         # self.eval_emd = EMDEval()
120 |         self.optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, self.network.parameters()), lr=opt.lr, betas=(0.9, 0.999))
121 |         
122 |         self.best_cd = 0
123 |         self.cd = {'02691156': {'num': 0, 'value': 0},
124 |                    '02958343': {'num': 0, 'value': 0},
125 |                    '03001627': {'num': 0, 'value': 0},
126 |                    'overall' : {'num': 0, 'value': 0}}
127 |         self.id2cat = {'02691156': 'airplane',
128 |                        '02958343': 'car',
129 |                        '03001627': 'chair',
130 |                        'overall' : 'overall'}
131 |         self.cat2id = {v: k for k, v in self.id2cat.items()}
132 | 
133 |     def train_model(self, data, args):
134 |         start = time.time()
135 | 
136 |         foldername = data[0]
137 |         filename = data[1]
138 |         input = data[2].to(self.opt.device)
139 |         
140 |         self.network.train()
141 |         output = self.network(input)
142 |         self.loss = self.loss_cd(output[-1], input)
143 | 
144 |         self.optimizer.zero_grad()
145 |         self.loss.backward()
146 |         self.optimizer.step()
147 | 
148 |         end = time.time()
149 |         if args[1] % 10 == 0:
150 |             print('[%d/%d][%d/%d]' % (args[0], self.opt.n_epochs, args[1], args[2]), end=' ')
151 |             print('Loss: %.6f Time: %.6f' % (self.loss, end - start))
152 |             with open(os.path.join(self.opt.save_path, 'runlog.txt'), 'a') as f:
153 |                 f.write('[%d/%d][%d/%d]' % (args[0], self.opt.n_epochs, args[1], args[2]))
154 |                 f.write('Loss: %.6f Time: %.6f\n' % (self.loss, end - start))
155 | 
156 |             if self.opt.visual:
157 |                 plot_diff_pcds([input[0]]+[output[i][0] for i in range(len(output))],
158 |                                 vis=self.vis,
159 |                                 title='train for epoch %d' % args[0],
160 |                                 legend=['input']+['output'+str(i) for i in range(len(output))],
161 |                                 win='train_vis')
162 | 
163 |     def val_model(self, data, args):
164 |         with torch.no_grad():
165 |             foldername_val = data[0]
166 |             filename_val = data[1]
167 |             input_val = torch.unsqueeze(data[2], 0).to(self.opt.device)
168 |             
169 |             self.network.eval()
170 |             output_val = self.network(input_val)
171 | 
172 |             value = self.eval_cd(output_val[-1], input_val)
173 |             self.cd[foldername_val]['num'] += 1
174 |             self.cd['overall']['num'] += 1
175 |             self.cd[foldername_val]['value'] += value
176 |             self.cd['overall']['value'] += value
177 | 
178 |         if self.opt.visual:
179 |             if args[1] % 200 == 0:
180 |                 plot_diff_pcds([input_val[0]]+[output_val[i][0] for i in range(len(output_val))],
181 |                                 vis=self.vis,
182 |                                 title=foldername_val+'_'+filename_val,
183 |                                 legend=['input']+['output'+str(i) for i in range(len(output_val))],
184 |                                 win='val_vis'+foldername_val)
185 | 
186 |     def after_one_epoch(self, args):
187 |         self.epoch = args[0]
188 | 
189 |         print('val result:')
190 |         with open(os.path.join(self.opt.save_path, 'runlog.txt'), 'a') as f:
191 |             f.write('val result:\n')
192 |         for key in self.cd:
193 |             self.cd[key]['value'] /= max(self.cd[key]['num'], 1)
194 |             print(self.id2cat[key]+': CD: %.6f' % (self.cd[key]['value']))
195 |             with open(os.path.join(self.opt.save_path, 'runlog.txt'), 'a') as f:
196 |                 f.write(self.id2cat[key]+': CD: %.6f\n' % (self.cd[key]['value']))
197 | 
198 |         losses = {'loss': self.loss.item()}
199 |         if self.opt.class_choice is None:
200 |             self.opt.class_choice = ['airplane', 'car', 'chair']
201 |         self.opt.class_choice.append('overall')
202 |         for i in range(len(self.opt.class_choice)):
203 |             cat_name = self.opt.class_choice[i]
204 |             losses['val_'+cat_name] = self.cd[self.cat2id[cat_name]]['value'].item()
205 |         plot_loss_curves(self, losses, vis=self.vis, win='loss_curves')
206 | 
207 |         save_ckpt(self, step=50)
208 |         self.cd = {'02691156': {'num': 0, 'value': 0},
209 |                    '02958343': {'num': 0, 'value': 0},
210 |                    '03001627': {'num': 0, 'value': 0},
211 |                    'overall' : {'num': 0, 'value': 0}}
212 | 
213 | 
214 | if __name__ == "__main__":
215 |     parser = argparse.ArgumentParser()
216 |     parser.add_argument('--gpu_ids', type=list, default=[0], help='gpu_ids seperated by comma')
217 |     parser.add_argument('--mode', type=str, default='train', help='train...')
218 |     parser.add_argument('--method', type=str, default='original', help='original | axform')
219 |     parser.add_argument('--class_choice', default=['airplane'], help='category names | None')
220 |     parser.add_argument('--visual', type=bool, default=True, help='visualization during training')
221 | 
222 |     parser.add_argument('--batch_size', type=int, default=50, help='input batch size')
223 |     parser.add_argument('--n_epochs', type=int, default=500, help='the epoch number of training the model')
224 |     parser.add_argument('--lr', type=float, default=0.0005, help='learning rate')
225 |     parser.add_argument('--nThreads', default=8, type=int, help='# threads for loading data')
226 |     parser.add_argument('--dataroot', default='./data/ShapeNetCore.v2.PC2048', help='path to point clouds')
227 |     parser.add_argument('--display_winsize', type=int, default=256, help='display window size')
228 |     parser.add_argument('--display_id', type=str, default='gpu', help='window id of the web display')
229 | 
230 |     opt = parser.parse_args()
231 |     torch.cuda.set_device('cuda:'+str(opt.gpu_ids[0]))
232 |     opt.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
233 | 
234 |     dataset = ShapeNetCorev2PC2048Dataset(root=opt.dataroot, class_choice=opt.class_choice, split='train')
235 |     dataloader = torch.utils.data.DataLoader(dataset,
236 |                                                 batch_size=opt.batch_size,
237 |                                                 shuffle=True,
238 |                                                 num_workers=opt.nThreads)
239 | 
240 |     val_dataset = ShapeNetCorev2PC2048Dataset(root=opt.dataroot, class_choice=opt.class_choice, split='test')
241 | 
242 |     if opt.mode == "train":
243 |         now = (datetime.utcnow()+timedelta(hours=8)).isoformat()
244 |         opt.save_path = os.path.join('./log', 'latent_3d_points/autoencoder', now)
245 |         if not os.path.exists(opt.save_path):
246 |             os.makedirs(opt.save_path)
247 |         os.system('cp ./models/latent_3d_points/autoencoder.py %s' % opt.save_path)
248 | 
249 |         set_seed(42)
250 |         
251 |         print('------------ Options -------------')
252 |         for k, v in sorted(vars(opt).items()):
253 |             print('%s: %s' % (str(k), str(v)))
254 |         print('-------------- End ----------------')
255 |         with open(os.path.join(opt.save_path, 'runlog.txt'), 'a') as f:
256 |             f.write('------------ Options -------------\n')
257 |             for k, v in sorted(vars(opt).items()):
258 |                 f.write('%s: %s\n' % (str(k), str(v)))
259 |             f.write('-------------- End ----------------\n')
260 | 
261 |         runner = Runner(opt)
262 |         for epoch in range(1, opt.n_epochs+1):
263 |             for i, data in enumerate(dataloader):
264 |                 runner.train_model(data=data, args=[epoch, i+1, len(dataloader)])
265 |             for i in range(len(val_dataset)):
266 |                 runner.val_model(data=val_dataset[i], args=[epoch, i+1])
267 |             runner.after_one_epoch(args=[epoch])


--------------------------------------------------------------------------------
/utils/metrics/pytorch_structural_losses/src/approxmatch.cu:
--------------------------------------------------------------------------------
  1 | #include "utils.hpp"
  2 | 
  3 | __global__ void approxmatchkernel(int b,int n,int m,const float * __restrict__ xyz1,const float * __restrict__ xyz2,float * __restrict__ match,float * temp){
  4 | 	float * remainL=temp+blockIdx.x*(n+m)*2, * remainR=temp+blockIdx.x*(n+m)*2+n,*ratioL=temp+blockIdx.x*(n+m)*2+n+m,*ratioR=temp+blockIdx.x*(n+m)*2+n+m+n;
  5 | 	float multiL,multiR;
  6 | 	if (n>=m){
  7 | 		multiL=1;
  8 | 		multiR=n/m;
  9 | 	}else{
 10 | 		multiL=m/n;
 11 | 		multiR=1;
 12 | 	}
 13 | 	const int Block=1024;
 14 | 	__shared__ float buf[Block*4];
 15 | 	for (int i=blockIdx.x;i<b;i+=gridDim.x){
 16 | 		for (int j=threadIdx.x;j<n*m;j+=blockDim.x)
 17 | 			match[i*n*m+j]=0;
 18 | 		for (int j=threadIdx.x;j<n;j+=blockDim.x)
 19 | 			remainL[j]=multiL;
 20 | 		for (int j=threadIdx.x;j<m;j+=blockDim.x)
 21 | 			remainR[j]=multiR;
 22 | 		__syncthreads();
 23 | 		//for (int j=7;j>=-2;j--){
 24 | 		for (int j=7;j>-2;j--){
 25 | 			float level=-powf(4.0f,j);
 26 | 			if (j==-2){
 27 | 				level=0;
 28 | 			}
 29 | 			for (int k0=0;k0<n;k0+=blockDim.x){
 30 | 				int k=k0+threadIdx.x;
 31 | 				float x1=0,y1=0,z1=0;
 32 | 				if (k<n){
 33 | 					x1=xyz1[i*n*3+k*3+0];
 34 | 					y1=xyz1[i*n*3+k*3+1];
 35 | 					z1=xyz1[i*n*3+k*3+2];
 36 | 				}
 37 | 				float suml=1e-9f;
 38 | 				for (int l0=0;l0<m;l0+=Block){
 39 | 					int lend=min(m,l0+Block)-l0;
 40 | 					for (int l=threadIdx.x;l<lend;l+=blockDim.x){
 41 | 						float x2=xyz2[i*m*3+l0*3+l*3+0];
 42 | 						float y2=xyz2[i*m*3+l0*3+l*3+1];
 43 | 						float z2=xyz2[i*m*3+l0*3+l*3+2];
 44 | 						buf[l*4+0]=x2;
 45 | 						buf[l*4+1]=y2;
 46 | 						buf[l*4+2]=z2;
 47 | 						buf[l*4+3]=remainR[l0+l];
 48 | 					}
 49 | 					__syncthreads();
 50 | 					for (int l=0;l<lend;l++){
 51 | 						float x2=buf[l*4+0];
 52 | 						float y2=buf[l*4+1];
 53 | 						float z2=buf[l*4+2];
 54 | 						float d=level*((x2-x1)*(x2-x1)+(y2-y1)*(y2-y1)+(z2-z1)*(z2-z1));
 55 | 						float w=__expf(d)*buf[l*4+3];
 56 | 						suml+=w;
 57 | 					}
 58 | 					__syncthreads();
 59 | 				}
 60 | 				if (k<n)
 61 | 					ratioL[k]=remainL[k]/suml;
 62 | 			}
 63 | 			/*for (int k=threadIdx.x;k<n;k+=gridDim.x){
 64 | 				float x1=xyz1[i*n*3+k*3+0];
 65 | 				float y1=xyz1[i*n*3+k*3+1];
 66 | 				float z1=xyz1[i*n*3+k*3+2];
 67 | 				float suml=1e-9f;
 68 | 				for (int l=0;l<m;l++){
 69 | 					float x2=xyz2[i*m*3+l*3+0];
 70 | 					float y2=xyz2[i*m*3+l*3+1];
 71 | 					float z2=xyz2[i*m*3+l*3+2];
 72 | 					float w=expf(level*((x2-x1)*(x2-x1)+(y2-y1)*(y2-y1)+(z2-z1)*(z2-z1)))*remainR[l];
 73 | 					suml+=w;
 74 | 				}
 75 | 				ratioL[k]=remainL[k]/suml;
 76 | 			}*/
 77 | 			__syncthreads();
 78 | 			for (int l0=0;l0<m;l0+=blockDim.x){
 79 | 				int l=l0+threadIdx.x;
 80 | 				float x2=0,y2=0,z2=0;
 81 | 				if (l<m){
 82 | 					x2=xyz2[i*m*3+l*3+0];
 83 | 					y2=xyz2[i*m*3+l*3+1];
 84 | 					z2=xyz2[i*m*3+l*3+2];
 85 | 				}
 86 | 				float sumr=0;
 87 | 				for (int k0=0;k0<n;k0+=Block){
 88 | 					int kend=min(n,k0+Block)-k0;
 89 | 					for (int k=threadIdx.x;k<kend;k+=blockDim.x){
 90 | 						buf[k*4+0]=xyz1[i*n*3+k0*3+k*3+0];
 91 | 						buf[k*4+1]=xyz1[i*n*3+k0*3+k*3+1];
 92 | 						buf[k*4+2]=xyz1[i*n*3+k0*3+k*3+2];
 93 | 						buf[k*4+3]=ratioL[k0+k];
 94 | 					}
 95 | 					__syncthreads();
 96 | 					for (int k=0;k<kend;k++){
 97 | 						float x1=buf[k*4+0];
 98 | 						float y1=buf[k*4+1];
 99 | 						float z1=buf[k*4+2];
100 | 						float w=__expf(level*((x2-x1)*(x2-x1)+(y2-y1)*(y2-y1)+(z2-z1)*(z2-z1)))*buf[k*4+3];
101 | 						sumr+=w;
102 | 					}
103 | 					__syncthreads();
104 | 				}
105 | 				if (l<m){
106 | 					sumr*=remainR[l];
107 | 					float consumption=fminf(remainR[l]/(sumr+1e-9f),1.0f);
108 | 					ratioR[l]=consumption*remainR[l];
109 | 					remainR[l]=fmaxf(0.0f,remainR[l]-sumr);
110 | 				}
111 | 			}
112 | 			/*for (int l=threadIdx.x;l<m;l+=blockDim.x){
113 | 				float x2=xyz2[i*m*3+l*3+0];
114 | 				float y2=xyz2[i*m*3+l*3+1];
115 | 				float z2=xyz2[i*m*3+l*3+2];
116 | 				float sumr=0;
117 | 				for (int k=0;k<n;k++){
118 | 					float x1=xyz1[i*n*3+k*3+0];
119 | 					float y1=xyz1[i*n*3+k*3+1];
120 | 					float z1=xyz1[i*n*3+k*3+2];
121 | 					float w=expf(level*((x2-x1)*(x2-x1)+(y2-y1)*(y2-y1)+(z2-z1)*(z2-z1)))*ratioL[k];
122 | 					sumr+=w;
123 | 				}
124 | 				sumr*=remainR[l];
125 | 				float consumption=fminf(remainR[l]/(sumr+1e-9f),1.0f);
126 | 				ratioR[l]=consumption*remainR[l];
127 | 				remainR[l]=fmaxf(0.0f,remainR[l]-sumr);
128 | 			}*/
129 | 			__syncthreads();
130 | 			for (int k0=0;k0<n;k0+=blockDim.x){
131 | 				int k=k0+threadIdx.x;
132 | 				float x1=0,y1=0,z1=0;
133 | 				if (k<n){
134 | 					x1=xyz1[i*n*3+k*3+0];
135 | 					y1=xyz1[i*n*3+k*3+1];
136 | 					z1=xyz1[i*n*3+k*3+2];
137 | 				}
138 | 				float suml=0;
139 | 				for (int l0=0;l0<m;l0+=Block){
140 | 					int lend=min(m,l0+Block)-l0;
141 | 					for (int l=threadIdx.x;l<lend;l+=blockDim.x){
142 | 						buf[l*4+0]=xyz2[i*m*3+l0*3+l*3+0];
143 | 						buf[l*4+1]=xyz2[i*m*3+l0*3+l*3+1];
144 | 						buf[l*4+2]=xyz2[i*m*3+l0*3+l*3+2];
145 | 						buf[l*4+3]=ratioR[l0+l];
146 | 					}
147 | 					__syncthreads();
148 | 					float rl=ratioL[k];
149 | 					if (k<n){
150 | 						for (int l=0;l<lend;l++){
151 | 							float x2=buf[l*4+0];
152 | 							float y2=buf[l*4+1];
153 | 							float z2=buf[l*4+2];
154 | 							float w=__expf(level*((x2-x1)*(x2-x1)+(y2-y1)*(y2-y1)+(z2-z1)*(z2-z1)))*rl*buf[l*4+3];
155 | 							match[i*n*m+(l0+l)*n+k]+=w;
156 | 							suml+=w;
157 | 						}
158 | 					}
159 | 					__syncthreads();
160 | 				}
161 | 				if (k<n)
162 | 					remainL[k]=fmaxf(0.0f,remainL[k]-suml);
163 | 			}
164 | 			/*for (int k=threadIdx.x;k<n;k+=blockDim.x){
165 | 				float x1=xyz1[i*n*3+k*3+0];
166 | 				float y1=xyz1[i*n*3+k*3+1];
167 | 				float z1=xyz1[i*n*3+k*3+2];
168 | 				float suml=0;
169 | 				for (int l=0;l<m;l++){
170 | 					float x2=xyz2[i*m*3+l*3+0];
171 | 					float y2=xyz2[i*m*3+l*3+1];
172 | 					float z2=xyz2[i*m*3+l*3+2];
173 | 					float w=expf(level*((x2-x1)*(x2-x1)+(y2-y1)*(y2-y1)+(z2-z1)*(z2-z1)))*ratioL[k]*ratioR[l];
174 | 					match[i*n*m+l*n+k]+=w;
175 | 					suml+=w;
176 | 				}
177 | 				remainL[k]=fmaxf(0.0f,remainL[k]-suml);
178 | 			}*/
179 | 			__syncthreads();
180 | 		}
181 | 	}
182 | }
183 | 
184 | __global__ void matchcostkernel(int b,int n,int m,const float * __restrict__ xyz1,const float * __restrict__ xyz2,const float * __restrict__ match,float * __restrict__ out){
185 | 	__shared__ float allsum[512];
186 | 	const int Block=256;
187 | 	__shared__ float buf[Block*3];
188 | 	for (int i=blockIdx.x;i<b;i+=gridDim.x){
189 | 		float subsum=0;
190 | 		for (int k0=0;k0<m;k0+=Block){
191 | 			int endk=min(m,k0+Block);
192 | 			for (int k=threadIdx.x;k<(endk-k0)*3;k+=blockDim.x){
193 | 				buf[k]=xyz2[i*m*3+k0*3+k];
194 | 			}
195 | 			__syncthreads();
196 | 			for (int j=threadIdx.x;j<n;j+=blockDim.x){
197 | 				float x1=xyz1[(i*n+j)*3+0];
198 | 				float y1=xyz1[(i*n+j)*3+1];
199 | 				float z1=xyz1[(i*n+j)*3+2];
200 | 				for (int k=0;k<endk-k0;k++){
201 | 					//float x2=xyz2[(i*m+k)*3+0]-x1;
202 | 					//float y2=xyz2[(i*m+k)*3+1]-y1;
203 | 					//float z2=xyz2[(i*m+k)*3+2]-z1;
204 | 					float x2=buf[k*3+0]-x1;
205 | 					float y2=buf[k*3+1]-y1;
206 | 					float z2=buf[k*3+2]-z1;
207 | 					float d=sqrtf(x2*x2+y2*y2+z2*z2);
208 | 					subsum+=match[i*n*m+(k0+k)*n+j]*d;
209 | 				}
210 | 			}
211 | 			__syncthreads();
212 | 		}
213 | 		allsum[threadIdx.x]=subsum;
214 | 		for (int j=1;j<blockDim.x;j<<=1){
215 | 			__syncthreads();
216 | 			if ((threadIdx.x&j)==0 && threadIdx.x+j<blockDim.x){
217 | 				allsum[threadIdx.x]+=allsum[threadIdx.x+j];
218 | 			}
219 | 		}
220 | 		if (threadIdx.x==0)
221 | 			out[i]=allsum[0];
222 | 		__syncthreads();
223 | 	}
224 | }
225 | //void matchcostLauncher(int b,int n,int m,const float * xyz1,const float * xyz2,const float * match,float * out){
226 | //	matchcost<<<32,512>>>(b,n,m,xyz1,xyz2,match,out);
227 | //}
228 | 
229 | __global__ void matchcostgrad2kernel(int b,int n,int m,const float * __restrict__ xyz1,const float * __restrict__ xyz2,const float * __restrict__ match,float * __restrict__ grad2){
230 | 	__shared__ float sum_grad[256*3];
231 | 	for (int i=blockIdx.x;i<b;i+=gridDim.x){
232 | 		int kbeg=m*blockIdx.y/gridDim.y;
233 | 		int kend=m*(blockIdx.y+1)/gridDim.y;
234 | 		for (int k=kbeg;k<kend;k++){
235 | 			float x2=xyz2[(i*m+k)*3+0];
236 | 			float y2=xyz2[(i*m+k)*3+1];
237 | 			float z2=xyz2[(i*m+k)*3+2];
238 | 			float subsumx=0,subsumy=0,subsumz=0;
239 | 			for (int j=threadIdx.x;j<n;j+=blockDim.x){
240 | 				float x1=x2-xyz1[(i*n+j)*3+0];
241 | 				float y1=y2-xyz1[(i*n+j)*3+1];
242 | 				float z1=z2-xyz1[(i*n+j)*3+2];
243 | 				float d=match[i*n*m+k*n+j]*rsqrtf(fmaxf(x1*x1+y1*y1+z1*z1,1e-20f));
244 | 				subsumx+=x1*d;
245 | 				subsumy+=y1*d;
246 | 				subsumz+=z1*d;
247 | 			}
248 | 			sum_grad[threadIdx.x*3+0]=subsumx;
249 | 			sum_grad[threadIdx.x*3+1]=subsumy;
250 | 			sum_grad[threadIdx.x*3+2]=subsumz;
251 | 			for (int j=1;j<blockDim.x;j<<=1){
252 | 				__syncthreads();
253 | 				int j1=threadIdx.x;
254 | 				int j2=threadIdx.x+j;
255 | 				if ((j1&j)==0 && j2<blockDim.x){
256 | 					sum_grad[j1*3+0]+=sum_grad[j2*3+0];
257 | 					sum_grad[j1*3+1]+=sum_grad[j2*3+1];
258 | 					sum_grad[j1*3+2]+=sum_grad[j2*3+2];
259 | 				}
260 | 			}
261 | 			if (threadIdx.x==0){
262 | 				grad2[(i*m+k)*3+0]=sum_grad[0];
263 | 				grad2[(i*m+k)*3+1]=sum_grad[1];
264 | 				grad2[(i*m+k)*3+2]=sum_grad[2];
265 | 			}
266 | 			__syncthreads();
267 | 		}
268 | 	}
269 | }
270 | __global__ void matchcostgrad1kernel(int b,int n,int m,const float * __restrict__ xyz1,const float * __restrict__ xyz2,const float * __restrict__ match,float * __restrict__ grad1){
271 | 	for (int i=blockIdx.x;i<b;i+=gridDim.x){
272 | 		for (int l=threadIdx.x;l<n;l+=blockDim.x){
273 | 			float x1=xyz1[i*n*3+l*3+0];
274 | 			float y1=xyz1[i*n*3+l*3+1];
275 | 			float z1=xyz1[i*n*3+l*3+2];
276 | 			float dx=0,dy=0,dz=0;
277 | 			for (int k=0;k<m;k++){
278 | 				float x2=xyz2[i*m*3+k*3+0];
279 | 				float y2=xyz2[i*m*3+k*3+1];
280 | 				float z2=xyz2[i*m*3+k*3+2];
281 | 				float d=match[i*n*m+k*n+l]*rsqrtf(fmaxf((x1-x2)*(x1-x2)+(y1-y2)*(y1-y2)+(z1-z2)*(z1-z2),1e-20f));
282 | 				dx+=(x1-x2)*d;
283 | 				dy+=(y1-y2)*d;
284 | 				dz+=(z1-z2)*d;
285 | 			}
286 | 			grad1[i*n*3+l*3+0]=dx;
287 | 			grad1[i*n*3+l*3+1]=dy;
288 | 			grad1[i*n*3+l*3+2]=dz;
289 | 		}
290 | 	}
291 | }
292 | //void matchcostgradLauncher(int b,int n,int m,const float * xyz1,const float * xyz2,const float * match,float * grad2){
293 | //	matchcostgrad<<<dim3(32,32),256>>>(b,n,m,xyz1,xyz2,match,grad2);
294 | //}
295 | 
296 | /*void AddGPUKernel(Dtype *in_a, Dtype *in_b, Dtype *out_c, int N,
297 |                   cudaStream_t stream)*/
298 | // temp: TensorShape{b,(n+m)*2}
299 | void approxmatch(int b,int n,int m,const float * xyz1,const float * xyz2,float * match,float * temp, cudaStream_t stream){
300 | 	approxmatchkernel
301 |       <<<32, 512, 0, stream>>>(b,n,m,xyz1,xyz2,match,temp);
302 |       
303 |   cudaError_t err = cudaGetLastError();
304 |   if (cudaSuccess != err)
305 |     throw std::runtime_error(Formatter()
306 |                              << "CUDA kernel failed : " << std::to_string(err));
307 | }
308 | 
309 | void matchcost(int b,int n,int m,const float * xyz1,const float * xyz2,float * match, float * out, cudaStream_t stream){
310 | 	matchcostkernel<<<32,512,0,stream>>>(b,n,m,xyz1,xyz2,match,out);
311 |       
312 |   cudaError_t err = cudaGetLastError();
313 |   if (cudaSuccess != err)
314 |     throw std::runtime_error(Formatter()
315 |                              << "CUDA kernel failed : " << std::to_string(err));
316 | }
317 | 
318 | void matchcostgrad(int b,int n,int m,const float * xyz1,const float * xyz2,const float * match,float * grad1,float * grad2, cudaStream_t stream){
319 | 	matchcostgrad1kernel<<<32,512,0,stream>>>(b,n,m,xyz1,xyz2,match,grad1);
320 | 	matchcostgrad2kernel<<<dim3(32,32),256,0,stream>>>(b,n,m,xyz1,xyz2,match,grad2);
321 | 	
322 |     cudaError_t err = cudaGetLastError();
323 |     if (cudaSuccess != err)
324 |         throw std::runtime_error(Formatter()
325 |                              << "CUDA kernel failed : " << std::to_string(err));
326 | }
327 | 


--------------------------------------------------------------------------------
/utils/utils.py:
--------------------------------------------------------------------------------
  1 | from matplotlib import pyplot as plt
  2 | import matplotlib
  3 | from matplotlib import cm
  4 | from mpl_toolkits.mplot3d import Axes3D
  5 | 
  6 | import torch
  7 | import torch.nn as nn
  8 | import h5py
  9 | import random
 10 | import time
 11 | import os
 12 | import math
 13 | import numpy as np
 14 | from torch.autograd import grad
 15 | 
 16 | path = os.path.dirname(os.path.abspath(__file__))+'/'
 17 | import sys
 18 | sys.path.append(path+'furthestPointSampling/')
 19 | import fps
 20 | sys.path.append(path+'pyTorchChamferDistance/chamfer_distance/')
 21 | from chamfer_distance import ChamferDistance
 22 | sys.path.append(path+'PyTorchEMD/')
 23 | from emd import earth_mover_distance
 24 | 
 25 | 
 26 | # ----------------------------------------------------------------------- #
 27 | # pytorch or cuda utils
 28 | # ----------------------------------------------------------------------- #
 29 | 
 30 | def set_seed(seed = 42):
 31 |     if seed is not None:
 32 |         random.seed(seed)
 33 |         os.environ['PYTHONHASHSEED'] = str(seed)
 34 |         np.random.seed(seed)
 35 |         torch.manual_seed(seed)
 36 |         if torch.cuda.is_available():
 37 |             torch.cuda.manual_seed(seed)
 38 |             torch.cuda.manual_seed_all(seed)
 39 |         # some cudnn methods can be random even after fixing the seed
 40 |         # unless you tell it to be deterministic
 41 |         torch.backends.cudnn.deterministic = True
 42 | 
 43 | 
 44 | def save_ckpt(runner, step):
 45 |     save_path = os.path.join(runner.opt.save_path, 'checkpoints')
 46 |     if not os.path.exists(save_path):
 47 |         os.makedirs(save_path)
 48 |     if runner.epoch % step == 0:
 49 |         torch.save({'epoch': runner.epoch, 'state_dict': runner.network.state_dict()}, os.path.join(save_path, 'epoch_%s.pth' % (runner.epoch)))
 50 |     if runner.epoch == 1 or runner.cd['overall']['value'] <= runner.best_cd:
 51 |         runner.best_cd = runner.cd['overall']['value']
 52 |         torch.save({'epoch': runner.epoch, 'state_dict': runner.network.state_dict()}, os.path.join(save_path, 'epoch_best.pth'))
 53 | 
 54 | 
 55 | def preprocess_partial(partial):
 56 |     """
 57 |     remove the duplicated points in partial
 58 |     """
 59 |     partial = np.array(list(set([tuple(t) for t in partial])))
 60 |     return partial
 61 | 
 62 | 
 63 | def preprocess_gt(gt):
 64 |     """
 65 |     remove the duplicated points in gt
 66 |     """
 67 |     gt = np.array(list(set([tuple(t) for t in gt])))
 68 |     return gt
 69 | 
 70 | 
 71 | def resample_pcd(pcd, n):
 72 |     """Drop or duplicate points so that pcd has exactly n points"""
 73 |     idx = np.random.permutation(pcd.shape[0])
 74 |     if idx.shape[0] < n:
 75 |         idx = np.concatenate([idx, np.random.randint(pcd.shape[0], size = n - pcd.shape[0])])
 76 |     return pcd[idx[:n]]
 77 | 
 78 | 
 79 | def farthest_point_sample(xyz, npoints):
 80 |     idx = fps.furthest_point_sample(xyz, npoints)
 81 |     new_points = fps.gather_operation(xyz.transpose(1, 2).contiguous(), idx).transpose(1, 2).contiguous()
 82 |     return new_points
 83 | 
 84 | 
 85 | def plot_diff_pcds(pcds, vis, title, legend, win=None):
 86 |     '''
 87 |     :param pcds: python list, include pcds with different size
 88 |     :      legend: each pcds' legend
 89 |     :return:
 90 |     '''
 91 |     device = pcds[0].device
 92 |     assert vis.check_connection()
 93 | 
 94 |     pcds_data = torch.Tensor().to(device)
 95 |     for i in range(len(pcds)):
 96 |         pcds_data = torch.cat((pcds_data, pcds[i]), 0)
 97 | 
 98 |     pcds_label = torch.Tensor().to(device)
 99 |     for i in range(1, len(pcds) + 1):
100 |         pcds_label = torch.cat((pcds_label, torch.Tensor([i] * pcds[i - 1].shape[0]).to(device)), 0)
101 | 
102 |     vis.scatter(X=pcds_data, Y=pcds_label,
103 |                 opts={
104 |                     'title': title,
105 |                     'markersize': 3,
106 |                     # 'markercolor': np.random.randint(0, 255, (len(pcds), 3)),
107 |                     'webgl': True,
108 |                     'legend': legend},
109 |                 win=win)
110 | 
111 | 
112 | def generate_selfrgb(color_num):
113 |     """
114 |     https://www.coder.work/article/3139758
115 |     """
116 |     t = np.linspace(-510, 510, color_num)
117 |     rgb = np.round(np.clip(np.stack([-t, 510-np.abs(t), t], axis=1), 0, 255)).astype(np.uint8)
118 |     # print(rgb, rgb.shape)
119 |     return rgb
120 | 
121 | 
122 | def generate_cmap(color_num, cmap='viridis'):
123 |     """
124 |     https://matplotlib.org/stable/tutorials/colors/colormap-manipulation.html
125 |     """
126 |     viridis = cm.get_cmap(cmap)
127 |     rgb = np.trunc(viridis(np.linspace(0, 1, color_num))[:,0:3]*255)
128 |     return rgb  # color_num * 3
129 | 
130 | 
131 | def plot_diff_regions(pcd, vis, title, region_num, win=None):
132 |     '''
133 |     :param pcd: one pcd, diff color for diff region
134 |     :return:
135 |     '''
136 |     device = pcd.device
137 |     assert vis.check_connection()
138 | 
139 |     pcd_data = pcd
140 |     pcd_label = torch.linspace(1, region_num, steps=region_num, dtype=torch.int32).to(device)
141 |     pcd_label = pcd_label.reshape(region_num, 1).repeat(1, int(pcd.shape[0]/region_num)).reshape(pcd.shape[0])
142 | 
143 |     vis.scatter(X=pcd_data, Y=pcd_label,
144 |                 # update='append',
145 |                 opts={
146 |                     'title': title,
147 |                     'markersize': 3,
148 |                     'markercolor': generate_cmap(region_num),
149 |                     'webgl': True,
150 |                     'legend': ['region'+str(i) for i in range(region_num)]},
151 |                 win=win)
152 | 
153 | 
154 | def plot_loss_curves(runner, losses, vis, win=None):
155 |     if not hasattr(runner, 'curve_data'):
156 |         runner.curve_data = {'X':[], 'Y':[], 'legend':list(losses.keys())}
157 |     runner.curve_data['X'].append(runner.epoch)
158 |     runner.curve_data['Y'].append([losses[k] for k in runner.curve_data['legend']])
159 |     
160 |     vis.line(
161 |         X=np.array(runner.curve_data['X']),
162 |         Y=np.array(runner.curve_data['Y']),
163 |         opts={
164 |             'title': 'runing loss over time',
165 |             'legend': runner.curve_data['legend'],
166 |             'xlabel': 'epoch',
167 |             'ylabel': 'loss'},
168 |         win=win)
169 | 
170 | 
171 | # ----------------------------------------------------------------------- #
172 | # losses and metrics
173 | # ----------------------------------------------------------------------- #
174 | 
175 | class L2_ChamferLoss(nn.Module):
176 |     def __init__(self):
177 |         super(L2_ChamferLoss, self).__init__()
178 |         self.chamfer_dist = ChamferDistance()
179 | 
180 |     def forward(self, array1, array2):
181 |         dist1, dist2 = self.chamfer_dist(array1, array2)
182 |         dist = torch.mean(dist1) + torch.mean(dist2)
183 |         return dist
184 | 
185 | 
186 | class L2_ChamferEval(nn.Module):
187 |     def __init__(self):
188 |         super(L2_ChamferEval, self).__init__()
189 |         self.chamfer_dist = ChamferDistance()
190 | 
191 |     def forward(self, array1, array2):
192 |         dist1, dist2 = self.chamfer_dist(array1, array2)
193 |         dist = torch.mean(dist1) + torch.mean(dist2)
194 |         return dist * 10000
195 | 
196 | 
197 | class L1_ChamferLoss(nn.Module):
198 |     def __init__(self):
199 |         super(L1_ChamferLoss, self).__init__()
200 |         self.chamfer_dist = ChamferDistance()
201 | 
202 |     def forward(self, array1, array2):
203 |         dist1, dist2 = self.chamfer_dist(array1, array2)
204 |         # print(dist1, dist1.shape) [B, N]
205 |         dist = torch.mean(torch.sqrt(dist1)) + torch.mean(torch.sqrt(dist2))
206 |         return dist / 2
207 | 
208 | 
209 | class L1_ChamferEval(nn.Module):
210 |     def __init__(self):
211 |         super(L1_ChamferEval, self).__init__()
212 |         self.chamfer_dist = ChamferDistance()
213 | 
214 |     def forward(self, array1, array2):
215 |         dist1, dist2 = self.chamfer_dist(array1, array2)
216 |         dist = torch.mean(torch.sqrt(dist1)) + torch.mean(torch.sqrt(dist2))
217 |         return dist / 2 * 1000
218 | 
219 | 
220 | class F1Score(nn.Module):
221 |     def __init__(self):
222 |         super(F1Score, self).__init__()
223 |         self.chamfer_dist = ChamferDistance()
224 |     
225 |     def forward(self, array1, array2, threshold=0.0001):
226 |         dist1, dist2 = self.chamfer_dist(array1, array2)
227 |         precision_1 = torch.mean((dist1 < threshold).float(), dim=1)
228 |         precision_2 = torch.mean((dist2 < threshold).float(), dim=1)
229 |         fscore = 2 * precision_1 * precision_2 / (precision_1 + precision_2)
230 |         fscore[torch.isnan(fscore)] = 0
231 |         return fscore, precision_1, precision_2
232 | 
233 | 
234 | class EMDLoss(nn.Module):
235 |     def __init__(self):
236 |         super(EMDLoss, self).__init__()
237 | 
238 |     def forward(self, array1, array2):
239 |         dist = earth_mover_distance(array1, array2, transpose=False)
240 |         # print(dist.shape)
241 |         dist = torch.mean(dist) / array1.shape[1]
242 |         return dist
243 |         
244 | 
245 | class EMDEval(nn.Module):
246 |     def __init__(self):
247 |         super(EMDEval, self).__init__()
248 | 
249 |     def forward(self, array1, array2):
250 |         dist = earth_mover_distance(array1, array2, transpose=False)
251 |         dist = torch.mean(dist) / array1.shape[1]
252 |         return dist * 100
253 | 
254 | 
255 | # taken from https://github.com/SymenYang/CPCGAN/blob/main/Model/Gradient_penalty.py
256 | class GradientPenalty:
257 |     """Computes the gradient penalty as defined in "Improved Training of Wasserstein GANs"
258 |     (https://arxiv.org/abs/1704.00028)
259 |     Args:
260 |         batchSize (int): batch-size used in the training. Must be updated w.r.t the current batchsize
261 |         lambdaGP (float): coefficient of the gradient penalty as defined in the article
262 |         gamma (float): regularization term of the gradient penalty, augment to minimize "ghosts"
263 |     """
264 | 
265 |     def __init__(self, lambdaGP, gamma=1, vertex_num=2500, device=torch.device('cpu')):
266 |         self.lambdaGP = lambdaGP
267 |         self.gamma = gamma
268 |         self.vertex_num = vertex_num
269 |         self.device = device
270 | 
271 |     def __call__(self, netD, real_data, fake_data):
272 |         batch_size = real_data.size(0)
273 |         
274 |         fake_data = fake_data[:batch_size]
275 |         
276 |         alpha = torch.rand(batch_size, 1, 1, requires_grad=True).to(self.device)
277 |         # randomly mix real and fake data
278 |         interpolates = real_data + alpha * (fake_data - real_data)
279 |         # compute output of D for interpolated input
280 |         disc_interpolates = netD(interpolates)
281 |         # compute gradients w.r.t the interpolated outputs
282 |         
283 |         gradients = grad(outputs=disc_interpolates, inputs=interpolates,
284 |                          grad_outputs=torch.ones(disc_interpolates.size()).to(self.device),
285 |                          create_graph=True, retain_graph=True, only_inputs=True)[0].contiguous().view(batch_size,-1)
286 |                          
287 |         gradient_penalty = (((gradients.norm(2, dim=1) - self.gamma) / self.gamma) ** 2).mean() * self.lambdaGP
288 | 
289 |         return gradient_penalty
290 | 
291 | 
292 | if __name__ == '__main__':
293 |     from scipy.spatial import cKDTree
294 |     from knn_cuda import KNN
295 | 
296 |     ref = torch.rand(32, 2048, 3).cuda()
297 |     query = torch.rand(32, 2048, 3).cuda()
298 | 
299 |     begin = time.time()
300 |     knn = KNN(k=5, transpose_mode=True)
301 |     dist, indx = knn(ref, query)  # B*query*k
302 |     end = time.time()
303 |     shape = indx.shape
304 |     indx = torch.reshape(indx, (shape[0], shape[1]*shape[2]))
305 |     print(dist[0], indx[0])  # dist is from small to large
306 |     print("knn pytorch time: %.4f" % (end-begin))
307 | 
308 |     np_ref = ref.detach().cpu().numpy()
309 |     np_query = ref.detach().cpu().numpy()
310 | 
311 |     begin = time.time()
312 |     for b in range(np_ref.shape[0]):
313 |         tree = cKDTree(np_ref[b])
314 |         dist, indx = tree.query(np_query[b], k=32)  # 64 x 32
315 |     end = time.time()
316 |     print("scipy kd-tree time: %.4f" % (end-begin))


--------------------------------------------------------------------------------