├── .gitignore
├── LICENSE
├── README.md
├── chamfer2D
    ├── chamfer2D.cu
    ├── chamfer_cuda.cpp
    ├── dist_chamfer_2D.py
    └── setup.py
├── chamfer3D
    ├── chamfer3D.cu
    ├── chamfer_cuda.cpp
    ├── dist_chamfer_3D.py
    └── setup.py
├── chamfer5D
    ├── chamfer5D.cu
    ├── chamfer_cuda.cpp
    ├── dist_chamfer_5D.py
    └── setup.py
├── chamfer6D
    ├── chamfer6D.cu
    ├── chamfer_cuda.cpp
    ├── dist_chamfer_6D.py
    └── setup.py
├── chamfer_python.py
├── fscore.py
└── unit_test.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *__pycache__*
2 | /tmp
3 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 ThibaultGROUEIX
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | `pip install torch ninja`
  2 | 
  3 | # Pytorch Chamfer Distance.
  4 | 
  5 | Include a **CUDA** version, and a **PYTHON** version with pytorch standard operations.
  6 | NB : In this depo, dist1 and dist2 are squared pointcloud euclidean distances, so you should adapt thresholds accordingly.
  7 | 
  8 | - [x] F - Score  
  9 | 
 10 | 
 11 | 
 12 | ### CUDA VERSION
 13 | 
 14 | - [x] JIT compilation
 15 | - [x] Supports multi-gpu
 16 | - [x] 2D  point clouds.
 17 | - [x] 3D  point clouds.
 18 | - [x] 5D  point clouds.
 19 | - [x] Contiguous() safe.
 20 | 
 21 | 
 22 | 
 23 | ### Python Version
 24 | 
 25 | - [x]  Supports any dimension
 26 | 
 27 | 
 28 | 
 29 | ### Usage
 30 | 
 31 | ```python
 32 | import torch, chamfer3D.dist_chamfer_3D, fscore
 33 | chamLoss = chamfer3D.dist_chamfer_3D.chamfer_3DDist()
 34 | points1 = torch.rand(32, 1000, 3).cuda()
 35 | points2 = torch.rand(32, 2000, 3, requires_grad=True).cuda()
 36 | dist1, dist2, idx1, idx2 = chamLoss(points1, points2)
 37 | f_score, precision, recall = fscore.fscore(dist1, dist2)
 38 | ```
 39 | 
 40 | 
 41 | 
 42 | ### Add it to your project as a submodule
 43 | 
 44 | ```shell
 45 | git submodule add https://github.com/ThibaultGROUEIX/ChamferDistancePytorch
 46 | ```
 47 | 
 48 | 
 49 | 
 50 | ### Benchmark:  [forward + backward] pass
 51 | - [x] CUDA 10.1, NVIDIA 435, Pytorch 1.4
 52 | - [x] p1 : 32 x 2000 x dim
 53 | - [x] p2 : 32 x 1000 x dim
 54 | 
 55 | |  *Timing (sec * 1000)*  | 2D | 3D | 5D |
 56 | | ---------- | -------- | ------- | ------- |
 57 | | **Cuda Compiled**     | **1.2** | 1.4 |1.8 |
 58 | | **Cuda JIT**     | 1.3 | **1.4** |**1.5** |
 59 | | **Python**     | 37 | 37 | 37 |
 60 | 
 61 | 
 62 | | *Memory (MB)* |  2D | 3D | 5D |
 63 | | ---------- | -------- | ------- | ------- |
 64 | | **Cuda Compiled**     | 529 | 529  | 549 |
 65 | | **Cuda JIT**     | **520** | **529** |**549** |
 66 | | **Python**     | 2495 | 2495 | 2495 |
 67 | 
 68 | 
 69 | 
 70 | ### What is the chamfer distance ? 
 71 | 
 72 | [Stanford course](http://graphics.stanford.edu/courses/cs468-17-spring/LectureSlides/L14%20-%203d%20deep%20learning%20on%20point%20cloud%20representation%20(analysis).pdf) on 3D deep Learning
 73 | 
 74 | 
 75 | 
 76 | ### Aknowledgment 
 77 | 
 78 | Original backbone from [Fei Xia](https://github.com/fxia22/pointGAN/blob/master/nndistance/src/nnd_cuda.cu).
 79 | 
 80 | JIT cool trick from [Christian Diller](https://github.com/chrdiller)
 81 | 
 82 | ### Troubleshoot
 83 | 
 84 | - `Undefined symbol: Zxxxxxxxxxxxxxxxxx `:
 85 | 
 86 | --> Fix: Make sure to `import torch` before you `import chamfer`.
 87 | --> Use pytorch.version >= 1.1.0
 88 | 
 89 | -  [RuntimeError: Ninja is required to load C++ extension](https://github.com/zhanghang1989/PyTorch-Encoding/issues/167)
 90 | 
 91 | ```shell
 92 | wget https://github.com/ninja-build/ninja/releases/download/v1.8.2/ninja-linux.zip
 93 | sudo unzip ninja-linux.zip -d /usr/local/bin/
 94 | sudo update-alternatives --install /usr/bin/ninja ninja /usr/local/bin/ninja 1 --force 
 95 | ```
 96 | 
 97 | 
 98 | 
 99 | 
100 | 
101 | #### TODO:
102 | 
103 | * Discuss behaviour of torch.min() and tensor.min() which causes issues in some pytorch versions
104 | 


--------------------------------------------------------------------------------
/chamfer2D/chamfer2D.cu:
--------------------------------------------------------------------------------
  1 | 
  2 | #include <stdio.h>
  3 | #include <ATen/ATen.h>
  4 | 
  5 | #include <cuda.h>
  6 | #include <cuda_runtime.h>
  7 | 
  8 | #include <vector>
  9 | 
 10 | 
 11 | 
 12 | __global__ void NmDistanceKernel(int b,int n,const float * xyz,int m,const float * xyz2,float * result,int * result_i){
 13 | 	const int batch=512;
 14 | 	__shared__ float buf[batch*2];
 15 | 	for (int i=blockIdx.x;i<b;i+=gridDim.x){
 16 | 		for (int k2=0;k2<m;k2+=batch){
 17 | 			int end_k=min(m,k2+batch)-k2;
 18 | 			for (int j=threadIdx.x;j<end_k*2;j+=blockDim.x){
 19 | 				buf[j]=xyz2[(i*m+k2)*2+j];
 20 | 			}
 21 | 			__syncthreads();
 22 | 			for (int j=threadIdx.x+blockIdx.y*blockDim.x;j<n;j+=blockDim.x*gridDim.y){
 23 | 				float x1=xyz[(i*n+j)*2+0];
 24 | 				float y1=xyz[(i*n+j)*2+1];
 25 | 				int best_i=0;
 26 | 				float best=0;
 27 | 				int end_ka=end_k-(end_k&2);
 28 | 				if (end_ka==batch){
 29 | 					for (int k=0;k<batch;k+=4){
 30 | 						{
 31 | 							float x2=buf[k*2+0]-x1;
 32 | 							float y2=buf[k*2+1]-y1;
 33 | 							float d=x2*x2+y2*y2;
 34 | 							if (k==0 || d<best){
 35 | 								best=d;
 36 | 								best_i=k+k2;
 37 | 							}
 38 | 						}
 39 | 						{
 40 | 							float x2=buf[k*2+2]-x1;
 41 | 							float y2=buf[k*2+3]-y1;
 42 | 							float d=x2*x2+y2*y2;
 43 | 							if (d<best){
 44 | 								best=d;
 45 | 								best_i=k+k2+1;
 46 | 							}
 47 | 						}
 48 | 						{
 49 | 							float x2=buf[k*2+4]-x1;
 50 | 							float y2=buf[k*2+5]-y1;
 51 | 							float d=x2*x2+y2*y2;
 52 | 							if (d<best){
 53 | 								best=d;
 54 | 								best_i=k+k2+2;
 55 | 							}
 56 | 						}
 57 | 						{
 58 | 							float x2=buf[k*2+6]-x1;
 59 | 							float y2=buf[k*2+7]-y1;
 60 | 							float d=x2*x2+y2*y2;
 61 | 							if (d<best){
 62 | 								best=d;
 63 | 								best_i=k+k2+3;
 64 | 							}
 65 | 						}
 66 | 					}
 67 | 				}else{
 68 | 					for (int k=0;k<end_ka;k+=4){
 69 | 						{
 70 | 							float x2=buf[k*2+0]-x1;
 71 | 							float y2=buf[k*2+1]-y1;
 72 | 							float d=x2*x2+y2*y2;
 73 | 							if (k==0 || d<best){
 74 | 								best=d;
 75 | 								best_i=k+k2;
 76 | 							}
 77 | 						}
 78 | 						{
 79 | 							float x2=buf[k*2+2]-x1;
 80 | 							float y2=buf[k*2+3]-y1;
 81 | 							float d=x2*x2+y2*y2;
 82 | 							if (d<best){
 83 | 								best=d;
 84 | 								best_i=k+k2+1;
 85 | 							}
 86 | 						}
 87 | 						{
 88 | 							float x2=buf[k*2+4]-x1;
 89 | 							float y2=buf[k*2+5]-y1;
 90 | 							float d=x2*x2+y2*y2;
 91 | 							if (d<best){
 92 | 								best=d;
 93 | 								best_i=k+k2+2;
 94 | 							}
 95 | 						}
 96 | 						{
 97 | 							float x2=buf[k*2+6]-x1;
 98 | 							float y2=buf[k*2+7]-y1;
 99 | 							float d=x2*x2+y2*y2;
100 | 							if (d<best){
101 | 								best=d;
102 | 								best_i=k+k2+3;
103 | 							}
104 | 						}
105 | 					}
106 | 				}
107 | 				for (int k=end_ka;k<end_k;k++){
108 | 					float x2=buf[k*2+0]-x1;
109 | 					float y2=buf[k*2+1]-y1;
110 | 					float d=x2*x2+y2*y2;
111 | 					if (k==0 || d<best){
112 | 						best=d;
113 | 						best_i=k+k2;
114 | 					}
115 | 				}
116 | 				if (k2==0 || result[(i*n+j)]>best){
117 | 					result[(i*n+j)]=best;
118 | 					result_i[(i*n+j)]=best_i;
119 | 				}
120 | 			}
121 | 			__syncthreads();
122 | 		}
123 | 	}
124 | }
125 | // int chamfer_cuda_forward(int b,int n,const float * xyz,int m,const float * xyz2,float * result,int * result_i,float * result2,int * result2_i, cudaStream_t stream){
126 | int chamfer_cuda_forward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor dist1, at::Tensor dist2, at::Tensor idx1, at::Tensor idx2){
127 | 
128 | 	const auto batch_size = xyz1.size(0);
129 | 	const auto n = xyz1.size(1); //num_points point cloud A
130 | 	const auto m = xyz2.size(1); //num_points point cloud B
131 | 
132 | 	NmDistanceKernel<<<dim3(32,16,1),512>>>(batch_size, n, xyz1.data<float>(), m, xyz2.data<float>(), dist1.data<float>(), idx1.data<int>());
133 | 	NmDistanceKernel<<<dim3(32,16,1),512>>>(batch_size, m, xyz2.data<float>(), n, xyz1.data<float>(), dist2.data<float>(), idx2.data<int>());
134 | 
135 | 	cudaError_t err = cudaGetLastError();
136 | 	  if (err != cudaSuccess) {
137 | 	    printf("error in nnd updateOutput: %s\n", cudaGetErrorString(err));
138 | 	    //THError("aborting");
139 | 	    return 0;
140 | 	  }
141 | 	  return 1;
142 | 
143 | 
144 | }
145 | __global__ void NmDistanceGradKernel(int b,int n,const float * xyz1,int m,const float * xyz2,const float * grad_dist1,const int * idx1,float * grad_xyz1,float * grad_xyz2){
146 | 	for (int i=blockIdx.x;i<b;i+=gridDim.x){
147 | 		for (int j=threadIdx.x+blockIdx.y*blockDim.x;j<n;j+=blockDim.x*gridDim.y){
148 | 			float x1=xyz1[(i*n+j)*2+0];
149 | 			float y1=xyz1[(i*n+j)*2+1];
150 | 			int j2=idx1[i*n+j];
151 | 			float x2=xyz2[(i*m+j2)*2+0];
152 | 			float y2=xyz2[(i*m+j2)*2+1];
153 | 			float g=grad_dist1[i*n+j]*2;
154 | 			atomicAdd(&(grad_xyz1[(i*n+j)*2+0]),g*(x1-x2));
155 | 			atomicAdd(&(grad_xyz1[(i*n+j)*2+1]),g*(y1-y2));
156 | 			atomicAdd(&(grad_xyz2[(i*m+j2)*2+0]),-(g*(x1-x2)));
157 | 			atomicAdd(&(grad_xyz2[(i*m+j2)*2+1]),-(g*(y1-y2)));
158 | 		}
159 | 	}
160 | }
161 | // int chamfer_cuda_backward(int b,int n,const float * xyz1,int m,const float * xyz2,const float * grad_dist1,const int * idx1,const float * grad_dist2,const int * idx2,float * grad_xyz1,float * grad_xyz2, cudaStream_t stream){
162 | int chamfer_cuda_backward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor gradxyz1, at::Tensor gradxyz2, at::Tensor graddist1, at::Tensor graddist2, at::Tensor idx1, at::Tensor idx2){
163 | 	// cudaMemset(grad_xyz1,0,b*n*3*4);
164 | 	// cudaMemset(grad_xyz2,0,b*m*3*4);
165 | 	
166 | 	const auto batch_size = xyz1.size(0);
167 | 	const auto n = xyz1.size(1); //num_points point cloud A
168 | 	const auto m = xyz2.size(1); //num_points point cloud B
169 | 
170 | 	NmDistanceGradKernel<<<dim3(1,16,1),256>>>(batch_size,n,xyz1.data<float>(),m,xyz2.data<float>(),graddist1.data<float>(),idx1.data<int>(),gradxyz1.data<float>(),gradxyz2.data<float>());
171 | 	NmDistanceGradKernel<<<dim3(1,16,1),256>>>(batch_size,m,xyz2.data<float>(),n,xyz1.data<float>(),graddist2.data<float>(),idx2.data<int>(),gradxyz2.data<float>(),gradxyz1.data<float>());
172 | 	
173 | 	cudaError_t err = cudaGetLastError();
174 | 	  if (err != cudaSuccess) {
175 | 	    printf("error in nnd get grad: %s\n", cudaGetErrorString(err));
176 | 	    //THError("aborting");
177 | 	    return 0;
178 | 	  }
179 | 	  return 1;
180 | 	
181 | }
182 | 
183 | 


--------------------------------------------------------------------------------
/chamfer2D/chamfer_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/torch.h>
 2 | #include <vector>
 3 | 
 4 | ///TMP
 5 | //#include "common.h"
 6 | /// NOT TMP
 7 | 	
 8 | 
 9 | int chamfer_cuda_forward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor dist1, at::Tensor dist2, at::Tensor idx1, at::Tensor idx2);
10 | 
11 | 
12 | int chamfer_cuda_backward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor gradxyz1, at::Tensor gradxyz2, at::Tensor graddist1, at::Tensor graddist2, at::Tensor idx1, at::Tensor idx2);
13 | 
14 | 
15 | 
16 | 
17 | int chamfer_forward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor dist1, at::Tensor dist2, at::Tensor idx1, at::Tensor idx2) {
18 |     return chamfer_cuda_forward(xyz1, xyz2, dist1, dist2, idx1, idx2);
19 | }
20 | 
21 | 
22 | int chamfer_backward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor gradxyz1, at::Tensor gradxyz2, at::Tensor graddist1, 
23 | 					  at::Tensor graddist2, at::Tensor idx1, at::Tensor idx2) {
24 | 
25 |     return chamfer_cuda_backward(xyz1, xyz2, gradxyz1, gradxyz2, graddist1, graddist2, idx1, idx2);
26 | }
27 | 
28 | 
29 | 
30 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
31 |   m.def("forward", &chamfer_forward, "chamfer forward (CUDA)");
32 |   m.def("backward", &chamfer_backward, "chamfer backward (CUDA)");
33 | }


--------------------------------------------------------------------------------
/chamfer2D/dist_chamfer_2D.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | from torch.autograd import Function
 3 | import torch
 4 | import importlib
 5 | import os
 6 | chamfer_found = importlib.find_loader("chamfer_2D") is not None
 7 | if not chamfer_found:
 8 |     ## Cool trick from https://github.com/chrdiller
 9 |     print("Jitting Chamfer 2D")
10 |     cur_path = os.path.dirname(os.path.abspath(__file__))
11 |     build_path = cur_path.replace('chamfer2D', 'tmp')
12 |     os.makedirs(build_path, exist_ok=True)
13 | 
14 |     from torch.utils.cpp_extension import load
15 |     chamfer_2D = load(name="chamfer_2D",
16 |                   sources=[
17 |                       "/".join(os.path.abspath(__file__).split('/')[:-1] + ["chamfer_cuda.cpp"]),
18 |                       "/".join(os.path.abspath(__file__).split('/')[:-1] + ["chamfer2D.cu"]),
19 |                   ], build_directory=build_path)
20 |     print("Loaded JIT 2D CUDA chamfer distance")
21 | 
22 | else:
23 |     import chamfer_2D
24 |     print("Loaded compiled 2D CUDA chamfer distance")
25 | 
26 | # Chamfer's distance module @thibaultgroueix
27 | # GPU tensors only
28 | class chamfer_2DFunction(Function):
29 |     @staticmethod
30 |     def forward(ctx, xyz1, xyz2):
31 |         batchsize, n, dim = xyz1.size()
32 |         assert dim==2, "Wrong last dimension for the chamfer distance 's input! Check with .size()"
33 |         _, m, dim = xyz2.size()
34 |         assert dim==2, "Wrong last dimension for the chamfer distance 's input! Check with .size()"
35 |         device = xyz1.device
36 | 
37 |         device = xyz1.device
38 | 
39 |         dist1 = torch.zeros(batchsize, n)
40 |         dist2 = torch.zeros(batchsize, m)
41 | 
42 |         idx1 = torch.zeros(batchsize, n).type(torch.IntTensor)
43 |         idx2 = torch.zeros(batchsize, m).type(torch.IntTensor)
44 | 
45 |         dist1 = dist1.to(device)
46 |         dist2 = dist2.to(device)
47 |         idx1 = idx1.to(device)
48 |         idx2 = idx2.to(device)
49 |         torch.cuda.set_device(device)
50 | 
51 |         chamfer_2D.forward(xyz1, xyz2, dist1, dist2, idx1, idx2)
52 |         ctx.save_for_backward(xyz1, xyz2, idx1, idx2)
53 |         return dist1, dist2, idx1, idx2
54 | 
55 |     @staticmethod
56 |     def backward(ctx, graddist1, graddist2, gradidx1, gradidx2):
57 |         xyz1, xyz2, idx1, idx2 = ctx.saved_tensors
58 |         graddist1 = graddist1.contiguous()
59 |         graddist2 = graddist2.contiguous()
60 |         device = graddist1.device
61 | 
62 |         gradxyz1 = torch.zeros(xyz1.size())
63 |         gradxyz2 = torch.zeros(xyz2.size())
64 | 
65 |         gradxyz1 = gradxyz1.to(device)
66 |         gradxyz2 = gradxyz2.to(device)
67 |         chamfer_2D.backward(
68 |             xyz1, xyz2, gradxyz1, gradxyz2, graddist1, graddist2, idx1, idx2
69 |         )
70 |         return gradxyz1, gradxyz2
71 | 
72 | 
73 | class chamfer_2DDist(nn.Module):
74 |     def __init__(self):
75 |         super(chamfer_2DDist, self).__init__()
76 | 
77 |     def forward(self, input1, input2):
78 |         input1 = input1.contiguous()
79 |         input2 = input2.contiguous()
80 |         return chamfer_2DFunction.apply(input1, input2)
81 | 


--------------------------------------------------------------------------------
/chamfer2D/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
 3 | 
 4 | setup(
 5 |     name='chamfer_2D',
 6 |     ext_modules=[
 7 |         CUDAExtension('chamfer_2D', [
 8 |             "/".join(__file__.split('/')[:-1] + ['chamfer_cuda.cpp']),
 9 |             "/".join(__file__.split('/')[:-1] + ['chamfer2D.cu']),
10 |         ]),
11 |     ],
12 |     cmdclass={
13 |         'build_ext': BuildExtension
14 |     })


--------------------------------------------------------------------------------
/chamfer3D/chamfer3D.cu:
--------------------------------------------------------------------------------
  1 | 
  2 | #include <stdio.h>
  3 | #include <ATen/ATen.h>
  4 | 
  5 | #include <cuda.h>
  6 | #include <cuda_runtime.h>
  7 | 
  8 | #include <vector>
  9 | 
 10 | 
 11 | 
 12 | __global__ void NmDistanceKernel(int b,int n,const float * xyz,int m,const float * xyz2,float * result,int * result_i){
 13 | 	const int batch=512;
 14 | 	__shared__ float buf[batch*3];
 15 | 	for (int i=blockIdx.x;i<b;i+=gridDim.x){
 16 | 		for (int k2=0;k2<m;k2+=batch){
 17 | 			int end_k=min(m,k2+batch)-k2;
 18 | 			for (int j=threadIdx.x;j<end_k*3;j+=blockDim.x){
 19 | 				buf[j]=xyz2[(i*m+k2)*3+j];
 20 | 			}
 21 | 			__syncthreads();
 22 | 			for (int j=threadIdx.x+blockIdx.y*blockDim.x;j<n;j+=blockDim.x*gridDim.y){
 23 | 				float x1=xyz[(i*n+j)*3+0];
 24 | 				float y1=xyz[(i*n+j)*3+1];
 25 | 				float z1=xyz[(i*n+j)*3+2];
 26 | 				int best_i=0;
 27 | 				float best=0;
 28 | 				int end_ka=end_k-(end_k&3);
 29 | 				if (end_ka==batch){
 30 | 					for (int k=0;k<batch;k+=4){
 31 | 						{
 32 | 							float x2=buf[k*3+0]-x1;
 33 | 							float y2=buf[k*3+1]-y1;
 34 | 							float z2=buf[k*3+2]-z1;
 35 | 							float d=x2*x2+y2*y2+z2*z2;
 36 | 							if (k==0 || d<best){
 37 | 								best=d;
 38 | 								best_i=k+k2;
 39 | 							}
 40 | 						}
 41 | 						{
 42 | 							float x2=buf[k*3+3]-x1;
 43 | 							float y2=buf[k*3+4]-y1;
 44 | 							float z2=buf[k*3+5]-z1;
 45 | 							float d=x2*x2+y2*y2+z2*z2;
 46 | 							if (d<best){
 47 | 								best=d;
 48 | 								best_i=k+k2+1;
 49 | 							}
 50 | 						}
 51 | 						{
 52 | 							float x2=buf[k*3+6]-x1;
 53 | 							float y2=buf[k*3+7]-y1;
 54 | 							float z2=buf[k*3+8]-z1;
 55 | 							float d=x2*x2+y2*y2+z2*z2;
 56 | 							if (d<best){
 57 | 								best=d;
 58 | 								best_i=k+k2+2;
 59 | 							}
 60 | 						}
 61 | 						{
 62 | 							float x2=buf[k*3+9]-x1;
 63 | 							float y2=buf[k*3+10]-y1;
 64 | 							float z2=buf[k*3+11]-z1;
 65 | 							float d=x2*x2+y2*y2+z2*z2;
 66 | 							if (d<best){
 67 | 								best=d;
 68 | 								best_i=k+k2+3;
 69 | 							}
 70 | 						}
 71 | 					}
 72 | 				}else{
 73 | 					for (int k=0;k<end_ka;k+=4){
 74 | 						{
 75 | 							float x2=buf[k*3+0]-x1;
 76 | 							float y2=buf[k*3+1]-y1;
 77 | 							float z2=buf[k*3+2]-z1;
 78 | 							float d=x2*x2+y2*y2+z2*z2;
 79 | 							if (k==0 || d<best){
 80 | 								best=d;
 81 | 								best_i=k+k2;
 82 | 							}
 83 | 						}
 84 | 						{
 85 | 							float x2=buf[k*3+3]-x1;
 86 | 							float y2=buf[k*3+4]-y1;
 87 | 							float z2=buf[k*3+5]-z1;
 88 | 							float d=x2*x2+y2*y2+z2*z2;
 89 | 							if (d<best){
 90 | 								best=d;
 91 | 								best_i=k+k2+1;
 92 | 							}
 93 | 						}
 94 | 						{
 95 | 							float x2=buf[k*3+6]-x1;
 96 | 							float y2=buf[k*3+7]-y1;
 97 | 							float z2=buf[k*3+8]-z1;
 98 | 							float d=x2*x2+y2*y2+z2*z2;
 99 | 							if (d<best){
100 | 								best=d;
101 | 								best_i=k+k2+2;
102 | 							}
103 | 						}
104 | 						{
105 | 							float x2=buf[k*3+9]-x1;
106 | 							float y2=buf[k*3+10]-y1;
107 | 							float z2=buf[k*3+11]-z1;
108 | 							float d=x2*x2+y2*y2+z2*z2;
109 | 							if (d<best){
110 | 								best=d;
111 | 								best_i=k+k2+3;
112 | 							}
113 | 						}
114 | 					}
115 | 				}
116 | 				for (int k=end_ka;k<end_k;k++){
117 | 					float x2=buf[k*3+0]-x1;
118 | 					float y2=buf[k*3+1]-y1;
119 | 					float z2=buf[k*3+2]-z1;
120 | 					float d=x2*x2+y2*y2+z2*z2;
121 | 					if (k==0 || d<best){
122 | 						best=d;
123 | 						best_i=k+k2;
124 | 					}
125 | 				}
126 | 				if (k2==0 || result[(i*n+j)]>best){
127 | 					result[(i*n+j)]=best;
128 | 					result_i[(i*n+j)]=best_i;
129 | 				}
130 | 			}
131 | 			__syncthreads();
132 | 		}
133 | 	}
134 | }
135 | // int chamfer_cuda_forward(int b,int n,const float * xyz,int m,const float * xyz2,float * result,int * result_i,float * result2,int * result2_i, cudaStream_t stream){
136 | int chamfer_cuda_forward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor dist1, at::Tensor dist2, at::Tensor idx1, at::Tensor idx2){
137 | 
138 | 	const auto batch_size = xyz1.size(0);
139 | 	const auto n = xyz1.size(1); //num_points point cloud A
140 | 	const auto m = xyz2.size(1); //num_points point cloud B
141 | 
142 | 	NmDistanceKernel<<<dim3(32,16,1),512>>>(batch_size, n, xyz1.data<float>(), m, xyz2.data<float>(), dist1.data<float>(), idx1.data<int>());
143 | 	NmDistanceKernel<<<dim3(32,16,1),512>>>(batch_size, m, xyz2.data<float>(), n, xyz1.data<float>(), dist2.data<float>(), idx2.data<int>());
144 | 
145 | 	cudaError_t err = cudaGetLastError();
146 | 	  if (err != cudaSuccess) {
147 | 	    printf("error in nnd updateOutput: %s\n", cudaGetErrorString(err));
148 | 	    //THError("aborting");
149 | 	    return 0;
150 | 	  }
151 | 	  return 1;
152 | 
153 | 
154 | }
155 | __global__ void NmDistanceGradKernel(int b,int n,const float * xyz1,int m,const float * xyz2,const float * grad_dist1,const int * idx1,float * grad_xyz1,float * grad_xyz2){
156 | 	for (int i=blockIdx.x;i<b;i+=gridDim.x){
157 | 		for (int j=threadIdx.x+blockIdx.y*blockDim.x;j<n;j+=blockDim.x*gridDim.y){
158 | 			float x1=xyz1[(i*n+j)*3+0];
159 | 			float y1=xyz1[(i*n+j)*3+1];
160 | 			float z1=xyz1[(i*n+j)*3+2];
161 | 			int j2=idx1[i*n+j];
162 | 			float x2=xyz2[(i*m+j2)*3+0];
163 | 			float y2=xyz2[(i*m+j2)*3+1];
164 | 			float z2=xyz2[(i*m+j2)*3+2];
165 | 			float g=grad_dist1[i*n+j]*2;
166 | 			atomicAdd(&(grad_xyz1[(i*n+j)*3+0]),g*(x1-x2));
167 | 			atomicAdd(&(grad_xyz1[(i*n+j)*3+1]),g*(y1-y2));
168 | 			atomicAdd(&(grad_xyz1[(i*n+j)*3+2]),g*(z1-z2));
169 | 			atomicAdd(&(grad_xyz2[(i*m+j2)*3+0]),-(g*(x1-x2)));
170 | 			atomicAdd(&(grad_xyz2[(i*m+j2)*3+1]),-(g*(y1-y2)));
171 | 			atomicAdd(&(grad_xyz2[(i*m+j2)*3+2]),-(g*(z1-z2)));
172 | 		}
173 | 	}
174 | }
175 | // int chamfer_cuda_backward(int b,int n,const float * xyz1,int m,const float * xyz2,const float * grad_dist1,const int * idx1,const float * grad_dist2,const int * idx2,float * grad_xyz1,float * grad_xyz2, cudaStream_t stream){
176 | int chamfer_cuda_backward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor gradxyz1, at::Tensor gradxyz2, at::Tensor graddist1, at::Tensor graddist2, at::Tensor idx1, at::Tensor idx2){
177 | 	// cudaMemset(grad_xyz1,0,b*n*3*4);
178 | 	// cudaMemset(grad_xyz2,0,b*m*3*4);
179 | 	
180 | 	const auto batch_size = xyz1.size(0);
181 | 	const auto n = xyz1.size(1); //num_points point cloud A
182 | 	const auto m = xyz2.size(1); //num_points point cloud B
183 | 
184 | 	NmDistanceGradKernel<<<dim3(1,16,1),256>>>(batch_size,n,xyz1.data<float>(),m,xyz2.data<float>(),graddist1.data<float>(),idx1.data<int>(),gradxyz1.data<float>(),gradxyz2.data<float>());
185 | 	NmDistanceGradKernel<<<dim3(1,16,1),256>>>(batch_size,m,xyz2.data<float>(),n,xyz1.data<float>(),graddist2.data<float>(),idx2.data<int>(),gradxyz2.data<float>(),gradxyz1.data<float>());
186 | 	
187 | 	cudaError_t err = cudaGetLastError();
188 | 	  if (err != cudaSuccess) {
189 | 	    printf("error in nnd get grad: %s\n", cudaGetErrorString(err));
190 | 	    //THError("aborting");
191 | 	    return 0;
192 | 	  }
193 | 	  return 1;
194 | 	
195 | }
196 | 
197 | 


--------------------------------------------------------------------------------
/chamfer3D/chamfer_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/torch.h>
 2 | #include <vector>
 3 | 
 4 | ///TMP
 5 | //#include "common.h"
 6 | /// NOT TMP
 7 | 	
 8 | 
 9 | int chamfer_cuda_forward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor dist1, at::Tensor dist2, at::Tensor idx1, at::Tensor idx2);
10 | 
11 | 
12 | int chamfer_cuda_backward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor gradxyz1, at::Tensor gradxyz2, at::Tensor graddist1, at::Tensor graddist2, at::Tensor idx1, at::Tensor idx2);
13 | 
14 | 
15 | 
16 | 
17 | int chamfer_forward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor dist1, at::Tensor dist2, at::Tensor idx1, at::Tensor idx2) {
18 |     return chamfer_cuda_forward(xyz1, xyz2, dist1, dist2, idx1, idx2);
19 | }
20 | 
21 | 
22 | int chamfer_backward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor gradxyz1, at::Tensor gradxyz2, at::Tensor graddist1, 
23 | 					  at::Tensor graddist2, at::Tensor idx1, at::Tensor idx2) {
24 | 
25 |     return chamfer_cuda_backward(xyz1, xyz2, gradxyz1, gradxyz2, graddist1, graddist2, idx1, idx2);
26 | }
27 | 
28 | 
29 | 
30 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
31 |   m.def("forward", &chamfer_forward, "chamfer forward (CUDA)");
32 |   m.def("backward", &chamfer_backward, "chamfer backward (CUDA)");
33 | }


--------------------------------------------------------------------------------
/chamfer3D/dist_chamfer_3D.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | from torch.autograd import Function
 3 | import torch
 4 | import importlib
 5 | import os
 6 | chamfer_found = importlib.find_loader("chamfer_3D") is not None
 7 | if not chamfer_found:
 8 |     ## Cool trick from https://github.com/chrdiller
 9 |     print("Jitting Chamfer 3D")
10 |     cur_path = os.path.dirname(os.path.abspath(__file__))
11 |     build_path = cur_path.replace('chamfer3D', 'tmp')
12 |     os.makedirs(build_path, exist_ok=True)
13 | 
14 |     from torch.utils.cpp_extension import load
15 |     chamfer_3D = load(name="chamfer_3D",
16 |           sources=[
17 |               "/".join(os.path.abspath(__file__).split('/')[:-1] + ["chamfer_cuda.cpp"]),
18 |               "/".join(os.path.abspath(__file__).split('/')[:-1] + ["chamfer3D.cu"]),
19 |               ], build_directory=build_path)
20 |     print("Loaded JIT 3D CUDA chamfer distance")
21 | 
22 | else:
23 |     import chamfer_3D
24 |     print("Loaded compiled 3D CUDA chamfer distance")
25 | 
26 | 
27 | # Chamfer's distance module @thibaultgroueix
28 | # GPU tensors only
29 | class chamfer_3DFunction(Function):
30 |     @staticmethod
31 |     def forward(ctx, xyz1, xyz2):
32 |         batchsize, n, dim = xyz1.size()
33 |         assert dim==3, "Wrong last dimension for the chamfer distance 's input! Check with .size()"
34 |         _, m, dim = xyz2.size()
35 |         assert dim==3, "Wrong last dimension for the chamfer distance 's input! Check with .size()"
36 |         device = xyz1.device
37 | 
38 |         device = xyz1.device
39 | 
40 |         dist1 = torch.zeros(batchsize, n)
41 |         dist2 = torch.zeros(batchsize, m)
42 | 
43 |         idx1 = torch.zeros(batchsize, n).type(torch.IntTensor)
44 |         idx2 = torch.zeros(batchsize, m).type(torch.IntTensor)
45 | 
46 |         dist1 = dist1.to(device)
47 |         dist2 = dist2.to(device)
48 |         idx1 = idx1.to(device)
49 |         idx2 = idx2.to(device)
50 |         torch.cuda.set_device(device)
51 | 
52 |         chamfer_3D.forward(xyz1, xyz2, dist1, dist2, idx1, idx2)
53 |         ctx.save_for_backward(xyz1, xyz2, idx1, idx2)
54 |         return dist1, dist2, idx1, idx2
55 | 
56 |     @staticmethod
57 |     def backward(ctx, graddist1, graddist2, gradidx1, gradidx2):
58 |         xyz1, xyz2, idx1, idx2 = ctx.saved_tensors
59 |         graddist1 = graddist1.contiguous()
60 |         graddist2 = graddist2.contiguous()
61 |         device = graddist1.device
62 | 
63 |         gradxyz1 = torch.zeros(xyz1.size())
64 |         gradxyz2 = torch.zeros(xyz2.size())
65 | 
66 |         gradxyz1 = gradxyz1.to(device)
67 |         gradxyz2 = gradxyz2.to(device)
68 |         chamfer_3D.backward(
69 |             xyz1, xyz2, gradxyz1, gradxyz2, graddist1, graddist2, idx1, idx2
70 |         )
71 |         return gradxyz1, gradxyz2
72 | 
73 | 
74 | class chamfer_3DDist(nn.Module):
75 |     def __init__(self):
76 |         super(chamfer_3DDist, self).__init__()
77 | 
78 |     def forward(self, input1, input2):
79 |         input1 = input1.contiguous()
80 |         input2 = input2.contiguous()
81 |         return chamfer_3DFunction.apply(input1, input2)
82 | 


--------------------------------------------------------------------------------
/chamfer3D/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
 3 | 
 4 | setup(
 5 |     name='chamfer_3D',
 6 |     ext_modules=[
 7 |         CUDAExtension('chamfer_3D', [
 8 |             "/".join(__file__.split('/')[:-1] + ['chamfer_cuda.cpp']),
 9 |             "/".join(__file__.split('/')[:-1] + ['chamfer3D.cu']),
10 |         ]),
11 |     ],
12 |     cmdclass={
13 |         'build_ext': BuildExtension
14 |     })


--------------------------------------------------------------------------------
/chamfer5D/chamfer5D.cu:
--------------------------------------------------------------------------------
  1 | 
  2 | #include <stdio.h>
  3 | #include <ATen/ATen.h>
  4 | 
  5 | #include <cuda.h>
  6 | #include <cuda_runtime.h>
  7 | 
  8 | #include <vector>
  9 | 
 10 | 
 11 | 
 12 | __global__ void NmDistanceKernel(int b,int n,const float * xyz,int m,const float * xyz2,float * result,int * result_i){
 13 | 	const int batch=2048;
 14 | 	__shared__ float buf[batch*5];
 15 | 	for (int i=blockIdx.x;i<b;i+=gridDim.x){
 16 | 		for (int k2=0;k2<m;k2+=batch){
 17 | 			int end_k=min(m,k2+batch)-k2;
 18 | 			for (int j=threadIdx.x;j<end_k*5;j+=blockDim.x){
 19 | 				buf[j]=xyz2[(i*m+k2)*5+j];
 20 | 			}
 21 | 			__syncthreads();
 22 | 			for (int j=threadIdx.x+blockIdx.y*blockDim.x;j<n;j+=blockDim.x*gridDim.y){
 23 | 				float x1=xyz[(i*n+j)*5+0];
 24 | 				float y1=xyz[(i*n+j)*5+1];
 25 | 				float r1=xyz[(i*n+j)*5+2];
 26 | 				float g1=xyz[(i*n+j)*5+3];
 27 | 				float b1=xyz[(i*n+j)*5+4];
 28 | 				int best_i=0;
 29 | 				float best=0;
 30 | 				int end_ka=end_k-(end_k&5);
 31 | 				if (end_ka==batch){
 32 | 					for (int k=0;k<batch;k+=4){
 33 | 						{
 34 | 							float x2=buf[k*5+0]-x1;
 35 | 							float y2=buf[k*5+1]-y1;
 36 | 							float r2=buf[k*5+2]-r1;
 37 | 							float g2=buf[k*5+3]-g1;
 38 | 							float b2=buf[k*5+4]-b1;
 39 | 							float d=x2*x2+y2*y2+r2*r2+g2*g2+b2*b2;
 40 | 							if (k==0 || d<best){
 41 | 								best=d;
 42 | 								best_i=k+k2;
 43 | 							}
 44 | 						}
 45 | 						{
 46 | 							float x2=buf[k*5+5]-x1;
 47 | 							float y2=buf[k*5+6]-y1;
 48 | 							float r2=buf[k*5+7]-r1;
 49 | 							float g2=buf[k*5+8]-g1;
 50 | 							float b2=buf[k*5+9]-b1;
 51 | 							float d=x2*x2+y2*y2+r2*r2+g2*g2+b2*b2;
 52 | 							if (d<best){
 53 | 								best=d;
 54 | 								best_i=k+k2+1;
 55 | 							}
 56 | 						}
 57 | 						{
 58 | 							float x2=buf[k*5+10]-x1;
 59 | 							float y2=buf[k*5+11]-y1;
 60 | 							float r2=buf[k*5+12]-r1;
 61 | 							float g2=buf[k*5+13]-g1;
 62 | 							float b2=buf[k*5+14]-b1;
 63 | 							float d=x2*x2+y2*y2+r2*r2+g2*g2+b2*b2;
 64 | 							if (d<best){
 65 | 								best=d;
 66 | 								best_i=k+k2+2;
 67 | 							}
 68 | 						}
 69 | 						{
 70 | 							float x2=buf[k*5+15]-x1;
 71 | 							float y2=buf[k*5+16]-y1;
 72 | 							float r2=buf[k*5+17]-r1;
 73 | 							float g2=buf[k*5+18]-g1;
 74 | 							float b2=buf[k*5+19]-b1;
 75 | 							float d=x2*x2+y2*y2+r2*r2+g2*g2+b2*b2;
 76 | 							if (d<best){
 77 | 								best=d;
 78 | 								best_i=k+k2+3;
 79 | 							}
 80 | 						}
 81 | 					}
 82 | 				}else{
 83 | 					for (int k=0;k<end_ka;k+=4){
 84 | 						{
 85 | 							float x2=buf[k*5+0]-x1;
 86 | 							float y2=buf[k*5+1]-y1;
 87 | 							float r2=buf[k*5+2]-r1;
 88 | 							float g2=buf[k*5+3]-g1;
 89 | 							float b2=buf[k*5+4]-b1;
 90 | 							float d=x2*x2+y2*y2+r2*r2+g2*g2+b2*b2;
 91 | 							if (k==0 || d<best){
 92 | 								best=d;
 93 | 								best_i=k+k2;
 94 | 							}
 95 | 						}
 96 | 						{
 97 | 							float x2=buf[k*5+5]-x1;
 98 | 							float y2=buf[k*5+6]-y1;
 99 | 							float r2=buf[k*5+7]-r1;
100 | 							float g2=buf[k*5+8]-g1;
101 | 							float b2=buf[k*5+9]-b1;
102 | 							float d=x2*x2+y2*y2+r2*r2+g2*g2+b2*b2;
103 | 							if (d<best){
104 | 								best=d;
105 | 								best_i=k+k2+1;
106 | 							}
107 | 						}
108 | 						{
109 | 							float x2=buf[k*5+10]-x1;
110 | 							float y2=buf[k*5+11]-y1;
111 | 							float r2=buf[k*5+12]-r1;
112 | 							float g2=buf[k*5+13]-g1;
113 | 							float b2=buf[k*5+14]-b1;
114 | 							float d=x2*x2+y2*y2+r2*r2+g2*g2+b2*b2;
115 | 							if (d<best){
116 | 								best=d;
117 | 								best_i=k+k2+2;
118 | 							}
119 | 						}
120 | 						{
121 | 							float x2=buf[k*5+15]-x1;
122 | 							float y2=buf[k*5+16]-y1;
123 | 							float r2=buf[k*5+17]-r1;
124 | 							float g2=buf[k*5+18]-g1;
125 | 							float b2=buf[k*5+19]-b1;
126 | 							float d=x2*x2+y2*y2+r2*r2+g2*g2+b2*b2;
127 | 							if (d<best){
128 | 								best=d;
129 | 								best_i=k+k2+3;
130 | 							}
131 | 						}
132 | 					}
133 | 				}
134 | 				for (int k=end_ka;k<end_k;k++){
135 | 					float x2=buf[k*5+0]-x1;
136 | 					float y2=buf[k*5+1]-y1;
137 | 					float r2=buf[k*5+2]-r1;
138 | 					float g2=buf[k*5+3]-g1;
139 | 					float b2=buf[k*5+4]-b1;
140 | 					float d=x2*x2+y2*y2+r2*r2+g2*g2+b2*b2;
141 | 					if (k==0 || d<best){
142 | 						best=d;
143 | 						best_i=k+k2;
144 | 					}
145 | 				}
146 | 				if (k2==0 || result[(i*n+j)]>best){
147 | 					result[(i*n+j)]=best;
148 | 					result_i[(i*n+j)]=best_i;
149 | 				}
150 | 			}
151 | 			__syncthreads();
152 | 		}
153 | 	}
154 | }
155 | // int chamfer_cuda_forward(int b,int n,const float * xyz,int m,const float * xyz2,float * result,int * result_i,float * result2,int * result2_i, cudaStream_t stream){
156 | int chamfer_cuda_forward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor dist1, at::Tensor dist2, at::Tensor idx1, at::Tensor idx2){
157 | 
158 | 	const auto batch_size = xyz1.size(0);
159 | 	const auto n = xyz1.size(1); //num_points point cloud A
160 | 	const auto m = xyz2.size(1); //num_points point cloud B
161 | 
162 | 	NmDistanceKernel<<<dim3(32,16,1),512>>>(batch_size, n, xyz1.data<float>(), m, xyz2.data<float>(), dist1.data<float>(), idx1.data<int>());
163 | 	NmDistanceKernel<<<dim3(32,16,1),512>>>(batch_size, m, xyz2.data<float>(), n, xyz1.data<float>(), dist2.data<float>(), idx2.data<int>());
164 | 
165 | 	cudaError_t err = cudaGetLastError();
166 | 	  if (err != cudaSuccess) {
167 | 	    printf("error in nnd updateOutput: %s\n", cudaGetErrorString(err));
168 | 	    //THError("aborting");
169 | 	    return 0;
170 | 	  }
171 | 	  return 1;
172 | 
173 | 
174 | }
175 | __global__ void NmDistanceGradKernel(int b,int n,const float * xyz1,int m,const float * xyz2,const float * grad_dist1,const int * idx1,float * grad_xyz1,float * grad_xyz2){
176 | 	for (int i=blockIdx.x;i<b;i+=gridDim.x){
177 | 		for (int j=threadIdx.x+blockIdx.y*blockDim.x;j<n;j+=blockDim.x*gridDim.y){
178 | 			float x1=xyz1[(i*n+j)*5+0];
179 | 			float y1=xyz1[(i*n+j)*5+1];
180 | 			float r1=xyz1[(i*n+j)*5+2];
181 | 			float g1=xyz1[(i*n+j)*5+3];
182 | 			float b1=xyz1[(i*n+j)*5+4];
183 | 			int j2=idx1[i*n+j];
184 | 			float x2=xyz2[(i*m+j2)*5+0];
185 | 			float y2=xyz2[(i*m+j2)*5+1];
186 | 			float r2=xyz2[(i*m+j2)*5+2];
187 | 			float g2=xyz2[(i*m+j2)*5+3];
188 | 			float b2=xyz2[(i*m+j2)*5+4];
189 | 			float g=grad_dist1[i*n+j]*2;
190 | 			atomicAdd(&(grad_xyz1[(i*n+j)*5+0]),g*(x1-x2));
191 | 			atomicAdd(&(grad_xyz1[(i*n+j)*5+1]),g*(y1-y2));
192 | 			atomicAdd(&(grad_xyz1[(i*n+j)*5+2]),g*(r1-r2));
193 | 			atomicAdd(&(grad_xyz1[(i*n+j)*5+3]),g*(g1-g2));
194 | 			atomicAdd(&(grad_xyz1[(i*n+j)*5+4]),g*(b1-b2));
195 | 			atomicAdd(&(grad_xyz2[(i*m+j2)*5+0]),-(g*(x1-x2)));
196 | 			atomicAdd(&(grad_xyz2[(i*m+j2)*5+1]),-(g*(y1-y2)));
197 | 			atomicAdd(&(grad_xyz2[(i*m+j2)*5+2]),-(g*(r1-r2)));
198 | 			atomicAdd(&(grad_xyz2[(i*m+j2)*5+3]),-(g*(g1-g2)));
199 | 			atomicAdd(&(grad_xyz2[(i*m+j2)*5+4]),-(g*(b1-b2)));
200 | 		}
201 | 	}
202 | }
203 | // int chamfer_cuda_backward(int b,int n,const float * xyz1,int m,const float * xyz2,const float * grad_dist1,const int * idx1,const float * grad_dist2,const int * idx2,float * grad_xyz1,float * grad_xyz2, cudaStream_t stream){
204 | int chamfer_cuda_backward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor gradxyz1, at::Tensor gradxyz2, at::Tensor graddist1, at::Tensor graddist2, at::Tensor idx1, at::Tensor idx2){
205 | 	// cudaMemset(grad_xyz1,0,b*n*3*4);
206 | 	// cudaMemset(grad_xyz2,0,b*m*3*4);
207 | 
208 | 	const auto batch_size = xyz1.size(0);
209 | 	const auto n = xyz1.size(1); //num_points point cloud A
210 | 	const auto m = xyz2.size(1); //num_points point cloud B
211 | 
212 | 	NmDistanceGradKernel<<<dim3(1,16,1),256>>>(batch_size,n,xyz1.data<float>(),m,xyz2.data<float>(),graddist1.data<float>(),idx1.data<int>(),gradxyz1.data<float>(),gradxyz2.data<float>());
213 | 	NmDistanceGradKernel<<<dim3(1,16,1),256>>>(batch_size,m,xyz2.data<float>(),n,xyz1.data<float>(),graddist2.data<float>(),idx2.data<int>(),gradxyz2.data<float>(),gradxyz1.data<float>());
214 | 
215 | 	cudaError_t err = cudaGetLastError();
216 | 	  if (err != cudaSuccess) {
217 | 	    printf("error in nnd get grad: %s\n", cudaGetErrorString(err));
218 | 	    //THError("aborting");
219 | 	    return 0;
220 | 	  }
221 | 	  return 1;
222 | 
223 | }
224 | 


--------------------------------------------------------------------------------
/chamfer5D/chamfer_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/torch.h>
 2 | #include <vector>
 3 | 
 4 | ///TMP
 5 | //#include "common.h"
 6 | /// NOT TMP
 7 | 	
 8 | 
 9 | int chamfer_cuda_forward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor dist1, at::Tensor dist2, at::Tensor idx1, at::Tensor idx2);
10 | 
11 | 
12 | int chamfer_cuda_backward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor gradxyz1, at::Tensor gradxyz2, at::Tensor graddist1, at::Tensor graddist2, at::Tensor idx1, at::Tensor idx2);
13 | 
14 | 
15 | 
16 | 
17 | int chamfer_forward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor dist1, at::Tensor dist2, at::Tensor idx1, at::Tensor idx2) {
18 |     return chamfer_cuda_forward(xyz1, xyz2, dist1, dist2, idx1, idx2);
19 | }
20 | 
21 | 
22 | int chamfer_backward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor gradxyz1, at::Tensor gradxyz2, at::Tensor graddist1, 
23 | 					  at::Tensor graddist2, at::Tensor idx1, at::Tensor idx2) {
24 | 
25 |     return chamfer_cuda_backward(xyz1, xyz2, gradxyz1, gradxyz2, graddist1, graddist2, idx1, idx2);
26 | }
27 | 
28 | 
29 | 
30 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
31 |   m.def("forward", &chamfer_forward, "chamfer forward (CUDA)");
32 |   m.def("backward", &chamfer_backward, "chamfer backward (CUDA)");
33 | }


--------------------------------------------------------------------------------
/chamfer5D/dist_chamfer_5D.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | from torch.autograd import Function
 3 | import torch
 4 | import importlib
 5 | import os
 6 | 
 7 | chamfer_found = importlib.find_loader("chamfer_5D") is not None
 8 | if not chamfer_found:
 9 |     ## Cool trick from https://github.com/chrdiller
10 |     print("Jitting Chamfer 5D")
11 |     cur_path = os.path.dirname(os.path.abspath(__file__))
12 |     build_path = cur_path.replace('chamfer5D', 'tmp')
13 |     os.makedirs(build_path, exist_ok=True)
14 | 
15 |     from torch.utils.cpp_extension import load
16 |     chamfer_5D = load(name="chamfer_5D",
17 |                       sources=[
18 |                           "/".join(os.path.abspath(__file__).split('/')[:-1] + ["chamfer_cuda.cpp"]),
19 |                           "/".join(os.path.abspath(__file__).split('/')[:-1] + ["chamfer5D.cu"]),
20 |                       ], build_directory=build_path)
21 |     print("Loaded JIT 5D CUDA chamfer distance")
22 | 
23 | else:
24 |     import chamfer_5D
25 |     print("Loaded compiled 5D CUDA chamfer distance")
26 | 
27 | 
28 | # Chamfer's distance module @thibaultgroueix
29 | # GPU tensors only
30 | class chamfer_5DFunction(Function):
31 |     @staticmethod
32 |     def forward(ctx, xyz1, xyz2):
33 |         batchsize, n, dim = xyz1.size()
34 |         assert dim==5, "Wrong last dimension for the chamfer distance 's input! Check with .size()"
35 |         _, m, dim = xyz2.size()
36 |         assert dim==5, "Wrong last dimension for the chamfer distance 's input! Check with .size()"
37 |         device = xyz1.device
38 | 
39 |         device = xyz1.device
40 | 
41 |         dist1 = torch.zeros(batchsize, n)
42 |         dist2 = torch.zeros(batchsize, m)
43 | 
44 |         idx1 = torch.zeros(batchsize, n).type(torch.IntTensor)
45 |         idx2 = torch.zeros(batchsize, m).type(torch.IntTensor)
46 | 
47 |         dist1 = dist1.to(device)
48 |         dist2 = dist2.to(device)
49 |         idx1 = idx1.to(device)
50 |         idx2 = idx2.to(device)
51 |         torch.cuda.set_device(device)
52 | 
53 |         chamfer_5D.forward(xyz1, xyz2, dist1, dist2, idx1, idx2)
54 |         ctx.save_for_backward(xyz1, xyz2, idx1, idx2)
55 |         return dist1, dist2, idx1, idx2
56 | 
57 |     @staticmethod
58 |     def backward(ctx, graddist1, graddist2, gradidx1, gradidx2):
59 |         xyz1, xyz2, idx1, idx2 = ctx.saved_tensors
60 |         graddist1 = graddist1.contiguous()
61 |         graddist2 = graddist2.contiguous()
62 |         device = graddist1.device
63 | 
64 |         gradxyz1 = torch.zeros(xyz1.size())
65 |         gradxyz2 = torch.zeros(xyz2.size())
66 | 
67 |         gradxyz1 = gradxyz1.to(device)
68 |         gradxyz2 = gradxyz2.to(device)
69 |         chamfer_5D.backward(
70 |             xyz1, xyz2, gradxyz1, gradxyz2, graddist1, graddist2, idx1, idx2
71 |         )
72 |         return gradxyz1, gradxyz2
73 | 
74 | 
75 | class chamfer_5DDist(nn.Module):
76 |     def __init__(self):
77 |         super(chamfer_5DDist, self).__init__()
78 | 
79 |     def forward(self, input1, input2):
80 |         input1 = input1.contiguous()
81 |         input2 = input2.contiguous()
82 |         return chamfer_5DFunction.apply(input1, input2)
83 | 


--------------------------------------------------------------------------------
/chamfer5D/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
 3 | 
 4 | setup(
 5 |     name='chamfer_5D',
 6 |     ext_modules=[
 7 |         CUDAExtension('chamfer_5D', [
 8 |             "/".join(__file__.split('/')[:-1] + ['chamfer_cuda.cpp']),
 9 |             "/".join(__file__.split('/')[:-1] + ['chamfer5D.cu']),
10 |         ]),
11 |     ],
12 |     cmdclass={
13 |         'build_ext': BuildExtension
14 |     })


--------------------------------------------------------------------------------
/chamfer6D/chamfer6D.cu:
--------------------------------------------------------------------------------
  1 | 
  2 | #include <stdio.h>
  3 | #include <ATen/ATen.h>
  4 | 
  5 | #include <cuda.h>
  6 | #include <cuda_runtime.h>
  7 | 
  8 | #include <vector>
  9 | 
 10 | 
 11 | 
 12 | __global__ void NmDistanceKernel(int b,int n,const float * xyz,int m,const float * xyz2,float * result,int * result_i){
 13 | 	const int batch=2048;
 14 | 	__shared__ float buf[batch*6];
 15 | 	for (int i=blockIdx.x;i<b;i+=gridDim.x){
 16 | 		for (int k2=0;k2<m;k2+=batch){
 17 | 			int end_k=min(m,k2+batch)-k2;
 18 | 			for (int j=threadIdx.x;j<end_k*6;j+=blockDim.x){
 19 | 				buf[j]=xyz2[(i*m+k2)*6+j];
 20 | 			}
 21 | 			__syncthreads();
 22 | 			for (int j=threadIdx.x+blockIdx.y*blockDim.x;j<n;j+=blockDim.x*gridDim.y){
 23 | 				float x1=xyz[(i*n+j)*6+0];
 24 | 				float y1=xyz[(i*n+j)*6+1];
 25 | 				float z1=xyz[(i*n+j)*6+2];
 26 | 				float nx1=xyz[(i*n+j)*6+3];
 27 | 				float ny1=xyz[(i*n+j)*6+4];
 28 | 				float nz1=xyz[(i*n+j)*6+5];
 29 | 				int best_i=0;
 30 | 				float best=0;
 31 | 				int end_ka=end_k-(end_k&6);
 32 | 				if (end_ka==batch){
 33 | 					for (int k=0;k<batch;k+=4){
 34 | 						{
 35 | 							float x2=buf[k*6+0]-x1;
 36 | 							float y2=buf[k*6+1]-y1;
 37 | 							float z2=buf[k*6+2]-z1;
 38 | 							float nx2=buf[k*6+3]-nx1;
 39 | 							float ny2=buf[k*6+4]-ny1;
 40 | 							float nz2=buf[k*6+5]-nz1;
 41 | 							float d=x2*x2+y2*y2+z2*z2+nx2*nx2+ny2*ny2+nz2*nz2;
 42 | 							if (k==0 || d<best){
 43 | 								best=d;
 44 | 								best_i=k+k2;
 45 | 							}
 46 | 						}
 47 | 						{
 48 | 							float x2=buf[k*6+6]-x1;
 49 | 							float y2=buf[k*6+7]-y1;
 50 | 							float z2=buf[k*6+8]-z1;
 51 | 							float nx2=buf[k*6+9]-nx1;
 52 | 							float ny2=buf[k*6+10]-ny1;
 53 | 							float nz2=buf[k*6+11]-nz1;
 54 | 							float d=x2*x2+y2*y2+z2*z2+nx2*nx2+ny2*ny2+nz2*nz2;
 55 | 							if (d<best){
 56 | 								best=d;
 57 | 								best_i=k+k2+1;
 58 | 							}
 59 | 						}
 60 | 						{
 61 | 							float x2=buf[k*6+12]-x1;
 62 | 							float y2=buf[k*6+13]-y1;
 63 | 							float z2=buf[k*6+14]-z1;
 64 | 							float nx2=buf[k*6+15]-nx1;
 65 | 							float ny2=buf[k*6+16]-ny1;
 66 | 							float nz2=buf[k*6+17]-nz1;
 67 | 							float d=x2*x2+y2*y2+z2*z2+nx2*nx2+ny2*ny2+nz2*nz2;
 68 | 							if (d<best){
 69 | 								best=d;
 70 | 								best_i=k+k2+2;
 71 | 							}
 72 | 						}
 73 | 						{
 74 | 							float x2=buf[k*6+18]-x1;
 75 | 							float y2=buf[k*6+19]-y1;
 76 | 							float z2=buf[k*6+20]-z1;
 77 | 							float nx2=buf[k*6+21]-nx1;
 78 | 							float ny2=buf[k*6+22]-ny1;
 79 | 							float nz2=buf[k*6+23]-nz1;
 80 | 							float d=x2*x2+y2*y2+z2*z2+nx2*nx2+ny2*ny2+nz2*nz2;
 81 | 							if (d<best){
 82 | 								best=d;
 83 | 								best_i=k+k2+3;
 84 | 							}
 85 | 						}
 86 | 					}
 87 | 				}else{
 88 | 					for (int k=0;k<end_ka;k+=4){
 89 | 						{
 90 | 							float x2=buf[k*6+0]-x1;
 91 | 							float y2=buf[k*6+1]-y1;
 92 | 							float z2=buf[k*6+2]-z1;
 93 | 							float nx2=buf[k*6+3]-nx1;
 94 | 							float ny2=buf[k*6+4]-ny1;
 95 | 							float nz2=buf[k*6+5]-nz1;
 96 | 							float d=x2*x2+y2*y2+z2*z2+nx2*nx2+ny2*ny2+nz2*nz2;
 97 | 							if (k==0 || d<best){
 98 | 								best=d;
 99 | 								best_i=k+k2;
100 | 							}
101 | 						}
102 | 						{
103 | 							float x2=buf[k*6+6]-x1;
104 | 							float y2=buf[k*6+7]-y1;
105 | 							float z2=buf[k*6+8]-z1;
106 | 							float nx2=buf[k*6+9]-nx1;
107 | 							float ny2=buf[k*6+10]-ny1;
108 | 							float nz2=buf[k*6+11]-nz1;
109 | 							float d=x2*x2+y2*y2+z2*z2+nx2*nx2+ny2*ny2+nz2*nz2;
110 | 							if (d<best){
111 | 								best=d;
112 | 								best_i=k+k2+1;
113 | 							}
114 | 						}
115 | 						{
116 | 							float x2=buf[k*6+12]-x1;
117 | 							float y2=buf[k*6+13]-y1;
118 | 							float z2=buf[k*6+14]-z1;
119 | 							float nx2=buf[k*6+15]-nx1;
120 | 							float ny2=buf[k*6+16]-ny1;
121 | 							float nz2=buf[k*6+17]-nz1;
122 | 							float d=x2*x2+y2*y2+z2*z2+nx2*nx2+ny2*ny2+nz2*nz2;
123 | 							if (d<best){
124 | 								best=d;
125 | 								best_i=k+k2+2;
126 | 							}
127 | 						}
128 | 						{
129 | 							float x2=buf[k*6+18]-x1;
130 | 							float y2=buf[k*6+19]-y1;
131 | 							float z2=buf[k*6+20]-z1;
132 | 							float nx2=buf[k*6+21]-nx1;
133 | 							float ny2=buf[k*6+22]-ny1;
134 | 							float nz2=buf[k*6+23]-nz1;
135 | 							float d=x2*x2+y2*y2+z2*z2+nx2*nx2+ny2*ny2+nz2*nz2;
136 | 							if (d<best){
137 | 								best=d;
138 | 								best_i=k+k2+3;
139 | 							}
140 | 						}
141 | 					}
142 | 				}
143 | 				for (int k=end_ka;k<end_k;k++){
144 | 					float x2=buf[k*6+0]-x1;
145 | 					float y2=buf[k*6+1]-y1;
146 | 					float z2=buf[k*6+2]-z1;
147 | 					float nx2=buf[k*6+3]-nx1;
148 | 					float ny2=buf[k*6+4]-ny1;
149 | 					float nz2=buf[k*6+5]-nz1;
150 | 					float d=x2*x2+y2*y2+z2*z2+nx2*nx2+ny2*ny2+nz2*nz2;
151 | 					if (k==0 || d<best){
152 | 						best=d;
153 | 						best_i=k+k2;
154 | 					}
155 | 				}
156 | 				if (k2==0 || result[(i*n+j)]>best){
157 | 					result[(i*n+j)]=best;
158 | 					result_i[(i*n+j)]=best_i;
159 | 				}
160 | 			}
161 | 			__syncthreads();
162 | 		}
163 | 	}
164 | }
165 | // int chamfer_cuda_forward(int b,int n,const float * xyz,int m,const float * xyz2,float * result,int * result_i,float * result2,int * result2_i, cudaStream_t stream){
166 | int chamfer_cuda_forward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor dist1, at::Tensor dist2, at::Tensor idx1, at::Tensor idx2){
167 | 
168 | 	const auto batch_size = xyz1.size(0);
169 | 	const auto n = xyz1.size(1); //num_points point cloud A
170 | 	const auto m = xyz2.size(1); //num_points point cloud B
171 | 
172 | 	NmDistanceKernel<<<dim3(32,16,1),512>>>(batch_size, n, xyz1.data<float>(), m, xyz2.data<float>(), dist1.data<float>(), idx1.data<int>());
173 | 	NmDistanceKernel<<<dim3(32,16,1),512>>>(batch_size, m, xyz2.data<float>(), n, xyz1.data<float>(), dist2.data<float>(), idx2.data<int>());
174 | 
175 | 	cudaError_t err = cudaGetLastError();
176 | 	  if (err != cudaSuccess) {
177 | 	    printf("error in nnd updateOutput: %s\n", cudaGetErrorString(err));
178 | 	    //THError("aborting");
179 | 	    return 0;
180 | 	  }
181 | 	  return 1;
182 | 
183 | 
184 | }
185 | __global__ void NmDistanceGradKernel(int b,int n,const float * xyz1,int m,const float * xyz2,const float * grad_dist1,const int * idx1,float * grad_xyz1,float * grad_xyz2){
186 | 	for (int i=blockIdx.x;i<b;i+=gridDim.x){
187 | 		for (int j=threadIdx.x+blockIdx.y*blockDim.x;j<n;j+=blockDim.x*gridDim.y){
188 | 			float x1=xyz1[(i*n+j)*6+0];
189 | 			float y1=xyz1[(i*n+j)*6+1];
190 | 			float z1=xyz1[(i*n+j)*6+2];
191 | 			float nx1=xyz1[(i*n+j)*6+3];
192 | 			float ny1=xyz1[(i*n+j)*6+4];
193 | 			float nz1=xyz1[(i*n+j)*6+5];
194 | 			int j2=idx1[i*n+j];
195 | 			float x2=xyz2[(i*m+j2)*6+0];
196 | 			float y2=xyz2[(i*m+j2)*6+1];
197 | 			float z2=xyz2[(i*m+j2)*6+2];
198 | 			float nx2=xyz2[(i*m+j2)*6+3];
199 | 			float ny2=xyz2[(i*m+j2)*6+4];
200 | 			float nz2=xyz2[(i*m+j2)*6+5];
201 | 			float g=grad_dist1[i*n+j]*2;
202 | 			atomicAdd(&(grad_xyz1[(i*n+j)*6+0]),g*(x1-x2));
203 | 			atomicAdd(&(grad_xyz1[(i*n+j)*6+1]),g*(y1-y2));
204 | 			atomicAdd(&(grad_xyz1[(i*n+j)*6+2]),g*(z1-z2));
205 | 			atomicAdd(&(grad_xyz1[(i*n+j)*6+3]),g*(nx1-nx2));
206 | 			atomicAdd(&(grad_xyz1[(i*n+j)*6+4]),g*(ny1-ny2));
207 | 			atomicAdd(&(grad_xyz1[(i*n+j)*6+5]),g*(nz1-nz2));
208 | 			atomicAdd(&(grad_xyz2[(i*m+j2)*6+0]),-(g*(x1-x2)));
209 | 			atomicAdd(&(grad_xyz2[(i*m+j2)*6+1]),-(g*(y1-y2)));
210 | 			atomicAdd(&(grad_xyz2[(i*m+j2)*6+2]),-(g*(z1-z2)));
211 | 			atomicAdd(&(grad_xyz2[(i*m+j2)*6+3]),-(g*(nx1-nx2)));
212 | 			atomicAdd(&(grad_xyz2[(i*m+j2)*6+4]),-(g*(ny1-ny2)));
213 | 			atomicAdd(&(grad_xyz2[(i*m+j2)*6+5]),-(g*(nz1-nz2)));
214 | 		}
215 | 	}
216 | }
217 | // int chamfer_cuda_backward(int b,int n,const float * xyz1,int m,const float * xyz2,const float * grad_dist1,const int * idx1,const float * grad_dist2,const int * idx2,float * grad_xyz1,float * grad_xyz2, cudaStream_t stream){
218 | int chamfer_cuda_backward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor gradxyz1, at::Tensor gradxyz2, at::Tensor graddist1, at::Tensor graddist2, at::Tensor idx1, at::Tensor idx2){
219 | 	// cudaMemset(grad_xyz1,0,b*n*3*4);
220 | 	// cudaMemset(grad_xyz2,0,b*m*3*4);
221 | 
222 | 	const auto batch_size = xyz1.size(0);
223 | 	const auto n = xyz1.size(1); //num_points point cloud A
224 | 	const auto m = xyz2.size(1); //num_points point cloud B
225 | 
226 | 	NmDistanceGradKernel<<<dim3(1,16,1),256>>>(batch_size,n,xyz1.data<float>(),m,xyz2.data<float>(),graddist1.data<float>(),idx1.data<int>(),gradxyz1.data<float>(),gradxyz2.data<float>());
227 | 	NmDistanceGradKernel<<<dim3(1,16,1),256>>>(batch_size,m,xyz2.data<float>(),n,xyz1.data<float>(),graddist2.data<float>(),idx2.data<int>(),gradxyz2.data<float>(),gradxyz1.data<float>());
228 | 
229 | 	cudaError_t err = cudaGetLastError();
230 | 	  if (err != cudaSuccess) {
231 | 	    printf("error in nnd get grad: %s\n", cudaGetErrorString(err));
232 | 	    //THError("aborting");
233 | 	    return 0;
234 | 	  }
235 | 	  return 1;
236 | 
237 | }
238 | 


--------------------------------------------------------------------------------
/chamfer6D/chamfer_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/torch.h>
 2 | #include <vector>
 3 | 
 4 | ///TMP
 5 | //#include "common.h"
 6 | /// NOT TMP
 7 | 	
 8 | 
 9 | int chamfer_cuda_forward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor dist1, at::Tensor dist2, at::Tensor idx1, at::Tensor idx2);
10 | 
11 | 
12 | int chamfer_cuda_backward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor gradxyz1, at::Tensor gradxyz2, at::Tensor graddist1, at::Tensor graddist2, at::Tensor idx1, at::Tensor idx2);
13 | 
14 | 
15 | 
16 | 
17 | int chamfer_forward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor dist1, at::Tensor dist2, at::Tensor idx1, at::Tensor idx2) {
18 |     return chamfer_cuda_forward(xyz1, xyz2, dist1, dist2, idx1, idx2);
19 | }
20 | 
21 | 
22 | int chamfer_backward(at::Tensor xyz1, at::Tensor xyz2, at::Tensor gradxyz1, at::Tensor gradxyz2, at::Tensor graddist1, 
23 | 					  at::Tensor graddist2, at::Tensor idx1, at::Tensor idx2) {
24 | 
25 |     return chamfer_cuda_backward(xyz1, xyz2, gradxyz1, gradxyz2, graddist1, graddist2, idx1, idx2);
26 | }
27 | 
28 | 
29 | 
30 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
31 |   m.def("forward", &chamfer_forward, "chamfer forward (CUDA)");
32 |   m.def("backward", &chamfer_backward, "chamfer backward (CUDA)");
33 | }


--------------------------------------------------------------------------------
/chamfer6D/dist_chamfer_6D.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | from torch.autograd import Function
 3 | import torch
 4 | import importlib
 5 | import os
 6 | 
 7 | chamfer_found = importlib.find_loader("chamfer_6D") is not None
 8 | if not chamfer_found:
 9 |     ## Cool trick from https://github.com/chrdiller
10 |     print("Jitting Chamfer 6D")
11 |     cur_path = os.path.dirname(os.path.abspath(__file__))
12 |     build_path = cur_path.replace('chamfer6D', 'tmp')
13 |     os.makedirs(build_path, exist_ok=True)
14 | 
15 |     from torch.utils.cpp_extension import load
16 |     chamfer_6D = load(name="chamfer_6D",
17 |                       sources=[
18 |                           "/".join(os.path.abspath(__file__).split('/')[:-1] + ["chamfer_cuda.cpp"]),
19 |                           "/".join(os.path.abspath(__file__).split('/')[:-1] + ["chamfer6D.cu"]),
20 |                       ], build_directory=build_path)
21 |     print("Loaded JIT 6D CUDA chamfer distance")
22 | 
23 | else:
24 |     import chamfer_6D
25 |     print("Loaded compiled 6D CUDA chamfer distance")
26 | 
27 | 
28 | # Chamfer's distance module @thibaultgroueix
29 | # GPU tensors only
30 | class chamfer_6DFunction(Function):
31 |     @staticmethod
32 |     def forward(ctx, xyz1, xyz2):
33 |         batchsize, n, dim = xyz1.size()
34 |         assert dim==6, "Wrong last dimension for the chamfer distance 's input! Check with .size()"
35 |         _, m, dim = xyz2.size()
36 |         assert dim==6, "Wrong last dimension for the chamfer distance 's input! Check with .size()"
37 |         device = xyz1.device
38 | 
39 |         device = xyz1.device
40 | 
41 |         dist1 = torch.zeros(batchsize, n)
42 |         dist2 = torch.zeros(batchsize, m)
43 | 
44 |         idx1 = torch.zeros(batchsize, n).type(torch.IntTensor)
45 |         idx2 = torch.zeros(batchsize, m).type(torch.IntTensor)
46 | 
47 |         dist1 = dist1.to(device)
48 |         dist2 = dist2.to(device)
49 |         idx1 = idx1.to(device)
50 |         idx2 = idx2.to(device)
51 |         torch.cuda.set_device(device)
52 | 
53 |         chamfer_6D.forward(xyz1, xyz2, dist1, dist2, idx1, idx2)
54 |         ctx.save_for_backward(xyz1, xyz2, idx1, idx2)
55 |         return dist1, dist2, idx1, idx2
56 | 
57 |     @staticmethod
58 |     def backward(ctx, graddist1, graddist2, gradidx1, gradidx2):
59 |         xyz1, xyz2, idx1, idx2 = ctx.saved_tensors
60 |         graddist1 = graddist1.contiguous()
61 |         graddist2 = graddist2.contiguous()
62 |         device = graddist1.device
63 | 
64 |         gradxyz1 = torch.zeros(xyz1.size())
65 |         gradxyz2 = torch.zeros(xyz2.size())
66 | 
67 |         gradxyz1 = gradxyz1.to(device)
68 |         gradxyz2 = gradxyz2.to(device)
69 |         chamfer_6D.backward(
70 |             xyz1, xyz2, gradxyz1, gradxyz2, graddist1, graddist2, idx1, idx2
71 |         )
72 |         return gradxyz1, gradxyz2
73 | 
74 | 
75 | class chamfer_6DDist(nn.Module):
76 |     def __init__(self):
77 |         super(chamfer_6DDist, self).__init__()
78 | 
79 |     def forward(self, input1, input2):
80 |         input1 = input1.contiguous()
81 |         input2 = input2.contiguous()
82 |         return chamfer_6DFunction.apply(input1, input2)
83 | 


--------------------------------------------------------------------------------
/chamfer6D/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
 3 | 
 4 | setup(
 5 |     name='chamfer_6D',
 6 |     ext_modules=[
 7 |         CUDAExtension('chamfer_6D', [
 8 |             "/".join(__file__.split('/')[:-1] + ['chamfer_cuda.cpp']),
 9 |             "/".join(__file__.split('/')[:-1] + ['chamfer6D.cu']),
10 |         ]),
11 |     ],
12 |     cmdclass={
13 |         'build_ext': BuildExtension
14 |     })


--------------------------------------------------------------------------------
/chamfer_python.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def pairwise_dist(x, y):
 5 |     xx, yy, zz = torch.mm(x, x.t()), torch.mm(y, y.t()), torch.mm(x, y.t())
 6 |     rx = xx.diag().unsqueeze(0).expand_as(xx)
 7 |     ry = yy.diag().unsqueeze(0).expand_as(yy)
 8 |     P = rx.t() + ry - 2 * zz
 9 |     return P
10 | 
11 | 
12 | def NN_loss(x, y, dim=0):
13 |     dist = pairwise_dist(x, y)
14 |     values, indices = dist.min(dim=dim)
15 |     return values.mean()
16 | 
17 | 
18 | def batched_pairwise_dist(a, b):
19 |     x, y = a.double(), b.double()
20 |     bs, num_points_x, points_dim = x.size()
21 |     bs, num_points_y, points_dim = y.size()
22 | 
23 |     xx = torch.pow(x, 2).sum(2)
24 |     yy = torch.pow(y, 2).sum(2)
25 |     zz = torch.bmm(x, y.transpose(2, 1))
26 |     rx = xx.unsqueeze(1).expand(bs, num_points_y, num_points_x) # Diagonal elements xx
27 |     ry = yy.unsqueeze(1).expand(bs, num_points_x, num_points_y) # Diagonal elements yy
28 |     P = rx.transpose(2, 1) + ry - 2 * zz
29 |     return P
30 | 
31 | def distChamfer(a, b):
32 |     """
33 |     :param a: Pointclouds Batch x nul_points x dim
34 |     :param b:  Pointclouds Batch x nul_points x dim
35 |     :return:
36 |     -closest point on b of points from a
37 |     -closest point on a of points from b
38 |     -idx of closest point on b of points from a
39 |     -idx of closest point on a of points from b
40 |     Works for pointcloud of any dimension
41 |     """
42 |     P = batched_pairwise_dist(a, b)
43 |     return torch.min(P, 2)[0].float(), torch.min(P, 1)[0].float(), torch.min(P, 2)[1].int(), torch.min(P, 1)[1].int()
44 | 
45 | 


--------------------------------------------------------------------------------
/fscore.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | def fscore(dist1, dist2, threshold=0.001):
 4 |     """
 5 |     Calculates the F-score between two point clouds with the corresponding threshold value.
 6 |     :param dist1: Batch, N-Points
 7 |     :param dist2: Batch, N-Points
 8 |     :param th: float
 9 |     :return: fscore, precision, recall
10 |     """
11 |     # NB : In this depo, dist1 and dist2 are squared pointcloud euclidean distances, so you should adapt the threshold accordingly.
12 |     precision_1 = torch.mean((dist1 < threshold).float(), dim=1)
13 |     precision_2 = torch.mean((dist2 < threshold).float(), dim=1)
14 |     fscore = 2 * precision_1 * precision_2 / (precision_1 + precision_2)
15 |     fscore[torch.isnan(fscore)] = 0
16 |     return fscore, precision_1, precision_2
17 | 
18 | 


--------------------------------------------------------------------------------
/unit_test.py:
--------------------------------------------------------------------------------
 1 | import torch, time
 2 | import chamfer2D.dist_chamfer_2D
 3 | import chamfer3D.dist_chamfer_3D
 4 | import chamfer5D.dist_chamfer_5D
 5 | import chamfer_python
 6 | 
 7 | cham2D = chamfer2D.dist_chamfer_2D.chamfer_2DDist()
 8 | cham3D = chamfer3D.dist_chamfer_3D.chamfer_3DDist()
 9 | cham5D = chamfer5D.dist_chamfer_5D.chamfer_5DDist()
10 | 
11 | from torch.autograd import Variable
12 | from fscore import fscore
13 | 
14 | def test_chamfer(distChamfer, dim):
15 |     points1 = torch.rand(4, 100, dim).cuda()
16 |     points2 = torch.rand(4, 200, dim, requires_grad=True).cuda()
17 |     dist1, dist2, idx1, idx2= distChamfer(points1, points2)
18 | 
19 |     loss = torch.sum(dist1)
20 |     loss.backward()
21 | 
22 |     mydist1, mydist2, myidx1, myidx2 = chamfer_python.distChamfer(points1, points2)
23 |     d1 = (dist1 - mydist1) ** 2
24 |     d2 = (dist2 - mydist2) ** 2
25 |     assert (
26 |         torch.mean(d1) + torch.mean(d2) < 0.00000001
27 |     ), "chamfer cuda and chamfer normal are not giving the same results"
28 | 
29 |     xd1 = idx1 - myidx1
30 |     xd2 = idx2 - myidx2
31 |     assert (
32 |             torch.norm(xd1.float()) + torch.norm(xd2.float()) == 0
33 |     ), "chamfer cuda and chamfer normal are not giving the same results"
34 |     print(f"fscore :", fscore(dist1, dist2))
35 |     print("Unit test passed")
36 | 
37 | 
38 | def timings(distChamfer, dim):
39 |     p1 = torch.rand(32, 2000, dim).cuda()
40 |     p2 = torch.rand(32, 1000, dim).cuda()
41 |     print("Timings : Start CUDA version")
42 |     start = time.time()
43 |     num_it = 100
44 |     for i in range(num_it):
45 |         points1 = Variable(p1, requires_grad=True)
46 |         points2 = Variable(p2)
47 |         mydist1, mydist2, idx1, idx2 = distChamfer(points1, points2)
48 |         loss = torch.sum(mydist1)
49 |         loss.backward()
50 |     print(f"Ellapsed time forward backward is {(time.time() - start)/num_it} seconds.")
51 | 
52 | 
53 |     print("Timings : Start Pythonic version")
54 |     start = time.time()
55 |     for i in range(num_it):
56 |         points1 = Variable(p1, requires_grad=True)
57 |         points2 = Variable(p2)
58 |         mydist1, mydist2, idx1, idx2 = chamfer_python.distChamfer(points1, points2)
59 |         loss = torch.sum(mydist1)
60 |         loss.backward()
61 |     print(f"Ellapsed time  forward backward  is {(time.time() - start)/num_it} seconds.")
62 | 
63 | 
64 | 
65 | dims = [2,3,5]
66 | for i,cham in enumerate([cham2D, cham3D, cham5D]):
67 |     print(f"testing Chamfer {dims[i]}D")
68 |     test_chamfer(cham, dims[i])
69 |     timings(cham, dims[i])
70 | 


--------------------------------------------------------------------------------