├── GraphReg
    ├── input.png
    ├── result.png
    ├── input_clean.png
    ├── result_clean.png
    ├── TransMat.m
    ├── AxisAngle.m
    ├── readme.txt
    ├── computeVariationAndFeaturePoint.m
    ├── cuda
    │   ├── MatchingMethods.cuh
    │   ├── KernelMatching.cu
    │   ├── KernelMatching.ptx
    │   └── CudaMath.cuh
    ├── SalientFeature.m
    ├── GraphReg.m
    ├── AdaptSimulatedAnnealing.m
    ├── AdaptiveSimulatedAnnealingOptimization.m
    ├── findPointNormals.m
    └── IterateOnGPU5.m
└── README.md


/GraphReg/input.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zikai1/GraphReg/HEAD/GraphReg/input.png


--------------------------------------------------------------------------------
/GraphReg/result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zikai1/GraphReg/HEAD/GraphReg/result.png


--------------------------------------------------------------------------------
/GraphReg/input_clean.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zikai1/GraphReg/HEAD/GraphReg/input_clean.png


--------------------------------------------------------------------------------
/GraphReg/result_clean.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zikai1/GraphReg/HEAD/GraphReg/result_clean.png


--------------------------------------------------------------------------------
/GraphReg/TransMat.m:
--------------------------------------------------------------------------------
 1 | function [ M ] = TransMat( vector )
 2 | %TRANSMAT Generates a homogenuous 4x4-matrix representing a translation
 3 | %      about the vector (x,y,z)
 4 | %   
 5 | %input:
 6 | %   vector  the translation vector (x,y,z)
 7 | %
 8 | %return:
 9 | %   M   the matrix
10 | 
11 | M = [1 0 0 vector(1); ...
12 |      0 1 0 vector(2); ...
13 |      0 0 1 vector(3); ...
14 |      0 0 0 1];
15 | end
16 | 
17 | 


--------------------------------------------------------------------------------
/GraphReg/AxisAngle.m:
--------------------------------------------------------------------------------
 1 | function [ T ] = AxisAngle( axis, angle )
 2 | %AXISANGLE Convertes axis angle representation to a 4x4 homogenuous
 3 | %rotation matrix
 4 | %   
 5 | 
 6 | c = cos(angle);
 7 | s = sin(angle);
 8 | t = 1.0 - c;
 9 | 
10 | axis = axis / norm(axis);
11 | x = axis(1);
12 | y = axis(2);
13 | z = axis(3);
14 | 
15 | T = [(x*x*t + c),   (x*y*t - z*s), (x*z*t + y*s), 0;...
16 |      (y*x*t + z*s), (y*y*t + c),   (y*z*t - x*s), 0;...
17 |      (z*x*t - y*s), (z*y*t + x*s), (z*z*t + c),   0;...
18 |      0,              0,             0,            1];
19 | end
20 | 
21 | 


--------------------------------------------------------------------------------
/GraphReg/readme.txt:
--------------------------------------------------------------------------------
 1 | This work aims to improve the robustness and convergence process with respect to dynamical point cloud registration methods. 
 2 | 
 3 | The designed method is a local registration framework and is an effective alternative to ICP.
 4 | 
 5 | We use the graph signal processing theory to describe the local geometry features, i.e., the point response intensity and the local geometry variations, through which we can remove outliers and attain invariants to rigid transformations.
 6 | 
 7 | Run "GraphReg.m" to see demo examples.
 8 | 
 9 | There are several parameters that can be adjusted for better registration results or faster convergence process, such as "cool_down".
10 | 
11 | 
12 | If you find any question, please do not hesitate to contact me 
13 | myzhao@baai.ac.cn
14 | 
15 | 
16 | If you find our work useful, please cite 
17 | M. Zhao, L. Ma, X. Jia, D. -M. Yan and T. Huang, "GraphReg: Dynamical Point Cloud Registration With Geometry-Aware Graph Signal Processing," in IEEE Transactions on Image Processing, vol. 31, pp. 7449-7464, 2022.


--------------------------------------------------------------------------------
/GraphReg/computeVariationAndFeaturePoint.m:
--------------------------------------------------------------------------------
 1 | function [pointScore,featScore] = computeVariationAndFeaturePoint(coords, geoFeat,K)
 2 | %==========================
 3 | % Compute the point response intensity and the graph gradient w.r.t. normals and curvatures. 
 4 | %Input:
 5 | % coords: Nx3 array point coordinate
 6 | % geoFeat: Nx4  point normal and curvature
 7 | % K: K nearest neighbors
 8 | %
 9 | % Output:
10 | % pointScore: Nx1  the response of each data point with respect to local...
11 | % variation
12 | % featScore: Nx2  the difference between adjacent normals and curvature
13 | %==========================
14 | 
15 | z = single( coords );
16 | N = size(z, 1);
17 | 
18 | 
19 | %----------determine the radius and build the adjacency matrix
20 | 
21 | 
22 | [~, dist] = knnsearch( coords, coords( round( linspace(1,N, 20))  ,:), 'k', K,  'distance', 'euclidean');
23 | radius = max( dist(:, K) );
24 |    
25 | % find the K nearest points in z of each point in z
26 | [idx, dist] = knnsearch( z, z, 'k', K,  'distance', 'euclidean');
27 | dist(:,1)=[];
28 | idx(:,1)=[];
29 | 
30 | %build the adjacency matrix
31 | tmp=exp( -(dist./(radius*0.5) ).^2);
32 | weight=tmp./sum(tmp,2);
33 | 
34 | 
35 | 
36 | weightPointSum=zeros(N,3);
37 | normalDiff=zeros(N,3);
38 | 
39 | 
40 | %--------------------compute the featScore
41 | for i=1:N
42 |     % near index of current point
43 |     nearInx=idx(i,:);
44 |     %extract current normal and near normal
45 |     currentNormal=geoFeat(i,1:3);   
46 |     nearPoint=z(nearInx,:);
47 |     nearNormal=geoFeat(nearInx,1:3);
48 |     
49 |     % reshape near weight by K
50 |     nearWeight=repmat(weight(i,:)',1,3);
51 |     
52 |     % difference of current point and normal with its nieghborhood
53 |     weightPointSum(i,:)=sum(nearPoint.*nearWeight,1);
54 |     
55 |     % the second calculation manner  
56 |     normalDiffSquare=sum((repmat(currentNormal,K-1,1)-nearNormal).^2,2);
57 |     normalDiff(i,:)=sqrt(nearWeight(:,1)')*normalDiffSquare;
58 | end
59 | 
60 | %record scores of point response intensity and local geometry variation
61 | pointScore=sum((z-weightPointSum).^2,2);
62 | featScore=[normalDiff(:,1) geoFeat(:,4)];


--------------------------------------------------------------------------------
/GraphReg/cuda/MatchingMethods.cuh:
--------------------------------------------------------------------------------
 1 | #include "CudaMath.cuh"
 2 | 
 3 | __device__ void Gravity(const float* p1, const float* p2, float* force) 
 4 | {
 5 |     float r[3];
 6 |     r[0] = p1[0] - p2[0];
 7 |     r[1] = p1[1] - p2[1];
 8 |     r[2] = p1[2] - p2[2];
 9 | 
10 |     float dist = sqrt(r[0]*r[0] + r[1]*r[1] + r[2]*r[2]);
11 |     
12 | 	dist = dist*dist*dist;
13 | 
14 | 	if(dist < 0.0000001)
15 | 		dist = 0.0000001;
16 | 	    
17 | 
18 | 	force[0] = r[0] / dist;
19 | 	force[1] = r[1] / dist;
20 | 	force[2] = r[2] / dist;
21 | 	    
22 | }
23 | 
24 | __device__ void Coulomb(const float* p1, const float* p2, const int numFeat, const float* f1, int idx1, const float* f2, int idx2, float* force) 
25 | {
26 | 	float r[3];
27 |     r[0] = p1[0] - p2[0];
28 |     r[1] = p1[1] - p2[1];
29 |     r[2] = p1[2] - p2[2];
30 | 
31 |     float dist = sqrt(r[0]*r[0] + r[1]*r[1] + r[2]*r[2]);
32 | 
33 |     float cDist = 0.f;
34 | 
35 |     for(int i = 0; i < numFeat; i++)
36 |     {
37 |         float fd = abs(f1[idx1*numFeat + i] - f2[idx2*numFeat + i]);
38 |         cDist += abs(fd*fd);
39 |     }
40 | 
41 |     cDist = cDist / sqrt((float)numFeat);
42 | 
43 | 	float c = 1-abs(cDist);
44 |     
45 | 	dist = dist*dist*dist;
46 | 
47 | 	//c = c*c*c;
48 | 
49 | 	if(dist < 0.0000001)
50 | 		dist = 0.0000001;
51 | 	    
52 | 
53 | 	force[0] = c * r[0] / dist;
54 | 	force[1] = c * r[1] / dist;
55 | 	force[2] = c * r[2] / dist;
56 | }
57 | 
58 | __device__ void CoulombPM(const float* p1, const float* p2, const int numFeat, const float* f1, int idx1, const float* f2, int idx2, float* force) 
59 | {
60 | 	float r[3];
61 |     r[0] = p1[0] - p2[0];
62 |     r[1] = p1[1] - p2[1];
63 |     r[2] = p1[2] - p2[2];
64 | 
65 |     float dist = sqrt(r[0]*r[0] + r[1]*r[1] + r[2]*r[2]);
66 | 
67 |     float cDist = 0.f;
68 |     for(int i = 0; i < numFeat; i++)
69 |     {
70 |         float fd = abs(f1[idx1*numFeat + i] - f2[idx2*numFeat + i]);
71 |         cDist += abs(fd*fd);
72 |     }
73 |     cDist = cDist / sqrt((float)numFeat);
74 | 	float c = 2 * (1 - cDist) - 1;
75 |     
76 | 	dist = dist*dist*dist;
77 | 
78 | 	//c = c*c*c;
79 | 
80 | 	if(dist < 0.0000001)
81 | 		dist = 0.0000001;
82 | 
83 | 	force[0] = c * r[0] / dist;
84 | 	force[1] = c * r[1] / dist;
85 | 	force[2] = c * r[2] / dist;
86 | }


--------------------------------------------------------------------------------
/GraphReg/SalientFeature.m:
--------------------------------------------------------------------------------
 1 | function [pt,feature_score,struct_tensor]=SalientFeature(pc,num,removeOut)
 2 | %========================================
 3 | % Compute the point response intensity and the local geometry variation
 4 | % Input:
 5 | %       pc   the point cloud 
 6 | %       num  the downsampling rate of the salient points
 7 | %       removeOut  if removing outliers according to the x84 statistics
 8 | % Output:
 9 | %       pt   the downsampled point cloud with salient feature points
10 | %       feature_score  the point response intensity
11 | %       struct_tensor  the local geometry variation
12 | %       
13 | %========================================
14 | 
15 | coordinate=pc.Location;
16 | 
17 | %------------compute normal and curvature for each data point----------
18 | [Nor,Cur]=findPointNormals(coordinate,10,[0,0,10],true); 
19 | 
20 | geoFeat=[Nor Cur];
21 | 
22 | %-----------point response intensity (pointScore) and local feature variation (featScore)-----------------
23 | [pointScore,featScore] = computeVariationAndFeaturePoint(coordinate, geoFeat,10);
24 | 
25 | nanInx=isnan(pointScore);
26 | featScore(nanInx,:)=[];
27 | pointScore(nanInx,:)=[];
28 | coordinate(nanInx,:)=[];
29 | 
30 | N = size(pointScore,1);
31 | 
32 | 
33 | %----------------sample round(N/num) points------------------
34 | s = RandStream('mlfg6331_64'); % set random number 
35 | 
36 | 
37 | if removeOut
38 |     % use robust statistics x84 rule to remove scattered outliers
39 |     x84_rule=median(abs(pointScore-repmat(median(pointScore),N,1)));
40 |     M=find(pointScore<=5.2*x84_rule);%5.2 in default or smaller values to remove more outliers 
41 |     
42 | else
43 |     % random sampling
44 |     % use graph filter h(A)=I-A
45 |     M = datasample(s,1:N, round(N/num), 'Replace', false,'Weights',  pointScore(:,1) );
46 | end
47 | 
48 | 
49 | feature_data=[coordinate(M,1), coordinate(M,2), coordinate(M, 3)];
50 | feature_score=pointScore(M,:);
51 | struct_tensor=featScore(M,:);
52 | 
53 | 
54 | %----------sampled points are stored as point cloud---------------
55 | pt=pointCloud(feature_data);
56 | 
57 | 
58 | if ~removeOut
59 | %---------no sampling
60 | pt=pc;
61 | feature_score=pointScore;
62 | struct_tensor=featScore;
63 | end
64 | 
65 | 
66 | 
67 | 
68 | 


--------------------------------------------------------------------------------
/GraphReg/GraphReg.m:
--------------------------------------------------------------------------------
 1 | %===========================
 2 | % GraphReg: Dynamical point cloud registration with geometry-aware graph signal
 3 | % processing
 4 | % GraphReg is a local registration framework aiming to produce robust and
 5 | % accurate results through the graph signal processing theory and the adaptive simulated annealing mechanism. 
 6 | % M. Zhao, L. Ma, X. Jia, D. -M. Yan and T. Huang, 
 7 | %"GraphReg: Dynamical Point Cloud Registration With Geometry-Aware Graph Signal Processing," 
 8 | % IEEE Transactions on Image Processing, vol. 31, pp. 7449-7464, 2022.
 9 | %==========================
10 | 
11 | clear;
12 | clc;
13 | close all;
14 | 
15 | %------read the input point clouds
16 | file1='.\data\bun000.ply';
17 | file2='.\data\bun045.ply';
18 | 
19 | % file1='.\data\dragonStandRight_0.ply';
20 | % file2='.\data\dragonStandRight_24.ply';
21 | 
22 | % add outliers
23 | file1='.\data\bun000_outlier_0.1.ply';
24 | file2='.\data\bun045_outlier_0.1.ply';
25 | 
26 | % file1='.\data\bun000_outlier_0.5.ply';
27 | % file2='.\data\bun045_outlier_0.5.ply';
28 | 
29 | 
30 | 
31 | tgt=pcread(file1);
32 | src=pcread(file2);
33 | 
34 | %------downsample and show the point clouds 
35 | tgt=pcdownsample(tgt,'gridAverage',0.001); 
36 | src=pcdownsample(src,'gridAverage',0.001);
37 | 
38 | figure;
39 | pcshowpair(src,tgt);
40 | title("Input point clouds");
41 | 
42 | 
43 | %------compute the point intensity (scoreP,scoreQ) and the local geometric feature (featP, featQ)
44 | [tgt2,scoreP,featP]=SalientFeature(tgt,10,true);% 10 in general
45 | [src2,scoreQ,featQ]=SalientFeature(src,10,true);
46 | 
47 | 
48 | tgtPt=tgt2.Location;
49 | srcPt=src2.Location;
50 | 
51 | 
52 | tgtPt=tgtPt';
53 | srcPt=srcPt';
54 | 
55 | fp=featP';
56 | fq=featQ';
57 | 
58 | 
59 | %-----------start registration----------------
60 | % add features and score to a struct 
61 | feat = struct('p', fp, 'q', fq);
62 | score=struct('pScore',scoreP','qScore',scoreQ');
63 | 
64 | 
65 | addpath('cuda');
66 | 
67 | 
68 | cool_down=0.9;%0.9 in default, adjust cool_down for better results or faster convergence process such as 0.8:0.02:0.98;
69 | 
70 | 
71 | [T] = AdaptiveSimulatedAnnealingOptimization(tgtPt, srcPt, feat,score,cool_down);
72 | 
73 | 
74 | tform=affine3d(T');
75 | 
76 | src2tgt=pctransform(src,tform);
77 | 
78 | 
79 | figure;
80 | pcshowpair(src2tgt,tgt);
81 | title("Registration results");
82 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # GraphReg (TIP 2022)
 2 | <table>
 3 |     <tr>
 4 |         <td ><center><img src="https://github.com/zikai1/GraphReg/blob/main/GraphReg/input_clean.png" width = "200" height = "200"> </center></td>
 5 |         <td ><center><img src="https://github.com/zikai1/GraphReg/blob/main/GraphReg/result_clean.png" width = "200" height = "200"> </center></td>
 6 |         <td ><center><img src="https://github.com/zikai1/GraphReg/blob/main/GraphReg/input.png" width = "200" height = "200"> </center></td>
 7 |         <td ><center><img src="https://github.com/zikai1/GraphReg/blob/main/GraphReg/result.png" width = "200" height = "200"> </center></td>
 8 |     </tr>
 9 | </table>
10 | 
11 | ## 1. Motivation
12 | This work aims to improve the robustness and convergence process with respect to currently dynamical point cloud registration methods. 
13 | 
14 | The designed method is a local registration framework and is an effective alternative to ICP.
15 | 
16 | We use the graph signal processing theory to describe the local geometry features, i.e., the point response intensity and the local geometry variations, through which we can remove outliers and attain invariants to rigid transformations.
17 | 
18 | ## 2. Useage
19 | 
20 | `Run "GraphReg.m" to see demo examples.`
21 | 
22 | There are several parameters that can be adjusted for better registration results or faster convergence process:
23 | + **cool_down**: Smaller values typically result in faster convergence but with fluctuations, whereas larger ones ensure more accurate registrations. The suggested interval scope empirically attaining good results is [0.8, 0.98]. 
24 | + **$\alpha$**: Although we can fix $\alpha=5.2$ to remove outliers for most test settings, smaller $\alpha$ will further improve the robusteness when there are a large percentage of outliers. 
25 | 
26 | ## 3. Contact
27 | If you have any question, please do not hesitate to contact myzhao@baai.ac.cn
28 | 
29 | ## 4. Citation
30 | If you find this implementation useful for your research, please cite:
31 | + M. Zhao, L. Ma, X. Jia, D. -M. Yan and T. Huang, "GraphReg: Dynamical Point Cloud Registration With Geometry-Aware Graph Signal Processing," in IEEE Transactions on Image Processing, vol. 31, pp. 7449-7464, 2022, doi: 10.1109/TIP.2022.3223793.
32 | + P. Jauer, I. Kuhlemann, R. Bruder, A. Schweikard and F. Ernst, "Efficient Registration of High-Resolution Feature Enhanced Point Clouds," in IEEE Transactions on Pattern Analysis and Machine Intelligence, vol. 41, no. 5, pp. 1102-1115, 1 May 2019, doi: 10.1109/TPAMI.2018.2831670.
33 | 


--------------------------------------------------------------------------------
/GraphReg/AdaptSimulatedAnnealing.m:
--------------------------------------------------------------------------------
 1 | function [ temperature,AcceptRate,transition, Ekin ] = AdaptSimulatedAnnealing(iter,EvalsMax,AcceptRate,temperature, Ekin, Etrans, Erot,cool_down)
 2 | 
 3 | 
 4 | transition = 0;
 5 | 
 6 | cEkin = log(Etrans + Erot)/log(10);
 7 | Ekin = [Ekin cEkin];%cEkin: current kinetic energy 
 8 | 
 9 | if(length(Ekin) > 3)
10 |     
11 |     %% case 3: avoid large steps of both energies
12 |     oE = Ekin(end - 1);%last kinect over E
13 |     cE = Ekin(end);%current kinect over E
14 |     
15 |     dE = cE - oE;
16 |     
17 |     if (dE<0)
18 |         AcceptRate = 1/500*(499*AcceptRate + 1);
19 |     else 
20 |         p = min(1, exp(-dE / temperature));
21 |         if(rand < p)
22 |             AcceptRate = 1/500*(499*AcceptRate + 1);
23 |         else
24 |             transition = 1;
25 |             AcceptRate = 1/500*(499*AcceptRate);
26 |         end
27 |         
28 |     end
29 |     
30 |    
31 |     if iter/EvalsMax < 0.15
32 |         LamRate = 0.44 + 0.56 * 560^(-1*iter/EvalsMax/0.15);
33 |     else
34 |         if iter/EvalsMax < 0.65
35 |             LamRate = 0.44;
36 |         else
37 |             LamRate = 0.44 * 440^- (((iter/EvalsMax)-0.65)/0.35);
38 |         end
39 |     end
40 |     
41 |     %adaptive update temperature
42 |     if AcceptRate > LamRate
43 |         temperature = 0.9*temperature;
44 |     else
45 |         temperature = temperature /cool_down; % cool_down can be adjusted for better results
46 |     end
47 | end
48 | end
49 | 
50 | 
51 |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 


--------------------------------------------------------------------------------
/GraphReg/cuda/KernelMatching.cu:
--------------------------------------------------------------------------------
  1 | #include "MatchingMethods.cuh"
  2 | 
  3 | __device__ void GetForce(int matchingMethod, const float* p1, const float* p2, const int numFeat, const float* f1, int id1, const float* f2, int id2, float* force)
  4 | {
  5 | 	switch (matchingMethod)
  6 | 	{
  7 | 		// Gravitation
  8 | 		case 0:
  9 | 			{
 10 | 				Gravity(p1, p2, force);
 11 | 				break;
 12 | 			}
 13 | 		case 1:
 14 | 			{
 15 | 				Coulomb(p1, p2, numFeat, f1, id1, f2, id2, force);
 16 | 				break;
 17 | 			}
 18 | 		case 2:
 19 | 			{
 20 | 				CoulombPM(p1, p2, numFeat, f1, id1, f2, id2, force);
 21 | 				break;
 22 | 			}
 23 | 		default:
 24 | 			{
 25 | 				Gravity(p1, p2, force);
 26 | 				break;
 27 | 			}
 28 | 	}
 29 | 	
 30 | }
 31 | 
 32 | __global__ void MatchingStep(   const float* cloud1, const float* cloud2,
 33 |                                 const int* index1, const int* index2,
 34 |                                 const float* feat1, const float* feat2,
 35 |                                 const int numFeat, const float* centroid2,
 36 | 								const float* M,
 37 | 								float* force, float* torque, float* momentInt,
 38 | 								const int matchingMethod,
 39 | 								const int numPoints, const int numSteps)
 40 | {
 41 |     int tID = threadIdx.x;
 42 | 
 43 | 	if(tID >= numPoints)
 44 | 		return;
 45 | 
 46 | 	int idx2 = index2[tID];
 47 | 
 48 | 	float p2[3];
 49 | 	p2[0] = cloud2[idx2 * 3 + 0];
 50 | 	p2[1] = cloud2[idx2 * 3 + 1];
 51 | 	p2[2] = cloud2[idx2 * 3 + 2]; 
 52 | 
 53 | 	// apply the current transformation to the points of cloud2 and the centroid 2
 54 | 	// transform the cloud2
 55 | 	float tp2[3];
 56 | 	tp2[0] = M[0]*p2[0] + M[1]*p2[1] + M[2]*p2[2]  + M[3];
 57 | 	tp2[1] = M[4]*p2[0] + M[5]*p2[1] + M[6]*p2[2]  + M[7];
 58 | 	tp2[2] = M[8]*p2[0] + M[9]*p2[1] + M[10]*p2[2] + M[11];
 59 | 
 60 | 	float f[3];
 61 | 	f[0] = 0;
 62 | 	f[1] = 0;
 63 | 	f[2] = 0;
 64 | 
 65 | 	for(int i = 0; i < numSteps; i++)
 66 | 	{
 67 | 		//int idx1 = index1[tID];
 68 | 
 69 | 		int idx1 = index1[i];
 70 | 
 71 | 		float p1[3];
 72 | 		p1[0] = cloud1[idx1 * 3 + 0];
 73 | 		p1[1] = cloud1[idx1 * 3 + 1];
 74 | 		p1[2] = cloud1[idx1 * 3 + 2];
 75 | 
 76 | 		float fDir[3];
 77 | 		
 78 | 		GetForce(matchingMethod, p1, tp2, numFeat, feat1, idx1, feat2, idx2, fDir);
 79 | 
 80 | 		f[0] += fDir[0];
 81 | 		f[1] += fDir[1];
 82 | 		f[2] += fDir[2];
 83 | 
 84 | 	}
 85 | 
 86 |     force[tID * 3 + 0] = f[0] ;
 87 |     force[tID * 3 + 1] = f[1] ;
 88 |     force[tID * 3 + 2] = f[2] ;
 89 | 
 90 |     float p2c[3];
 91 |     p2c[0] = tp2[0] - centroid2[0];
 92 |     p2c[1] = tp2[1] - centroid2[1];
 93 |     p2c[2] = tp2[2] - centroid2[2];
 94 | 
 95 |     float tDir[3];
 96 |     crossVec3<float>(tDir, p2c, f);
 97 |     
 98 |     torque[tID * 3 + 0] = tDir[0];
 99 |     torque[tID * 3 + 1] = tDir[1];
100 |     torque[tID * 3 + 2] = tDir[2];
101 | 
102 | 	// the moment of inertia with the mass = 1
103 | 	momentInt[tID] = sqrt( p2c[0]*p2c[0] + p2c[1]*p2c[1] + p2c[2]*p2c[2] );
104 | 	
105 | }


--------------------------------------------------------------------------------
/GraphReg/AdaptiveSimulatedAnnealingOptimization.m:
--------------------------------------------------------------------------------
1 | function [ T] = AdaptiveSimulatedAnnealingOptimization( p, q, features,score,cool_down)
2 | %-------------adaptive simulated annealing
3 | [T] = IterateOnGPU5(p, q, features,score,cool_down);
4 | end
5 | 
6 |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      


--------------------------------------------------------------------------------
/GraphReg/findPointNormals.m:
--------------------------------------------------------------------------------
  1 | function [ normals, curvature ] = findPointNormals(points, numNeighbours, viewPoint, dirLargest)
  2 | %FINDPOINTNORMALS Estimates the normals of a sparse set of n 3d points by
  3 | % using a set of the closest neighbours to approximate a plane.
  4 | %
  5 | %   Required Inputs:
  6 | %   points- nx3 set of 3d points (x,y,z)
  7 | %
  8 | %   Optional Inputs: (will give default values on empty array [])
  9 | %   numNeighbours- number of neighbouring points to use in plane fitting
 10 | %       (default 9)
 11 | %   viewPoint- location all normals will point towards (default [0,0,0])
 12 | %   dirLargest- use only the largest component of the normal in determining
 13 | %       its direction wrt the viewPoint (generally provides a more stable
 14 | %       estimation of planes near the viewPoint, default true)
 15 | %
 16 | %   Outputs:
 17 | %   normals- nx3 set of normals (nx,ny,nz)
 18 | %   curvature- nx1 set giving the curvature
 19 | %
 20 | %   References-
 21 | %   The implementation closely follows the method given at
 22 | %   http://pointclouds.org/documentation/tutorials/normal_estimation.php
 23 | %   This code was used in generating the results for the journal paper
 24 | %   Multi-modal sensor calibration using a gradient orientation measure 
 25 | %   http://www.zjtaylor.com/welcome/download_pdf?pdf=JFR2013.pdf
 26 | %
 27 | %   This code was written by Zachary Taylor
 28 | %   zacharyjeremytaylor@gmail.com
 29 | %   http://www.zjtaylor.com
 30 | 
 31 | %% check inputs
 32 | validateattributes(points, {'numeric'},{'ncols',3});
 33 | 
 34 | if(nargin < 2)
 35 |     numNeighbours = [];
 36 | end
 37 | if(isempty(numNeighbours))
 38 |     numNeighbours = 9;
 39 | else
 40 |     validateattributes(numNeighbours, {'numeric'},{'scalar','positive'});
 41 |     if(numNeighbours > 100)
 42 |         warning(['%i neighbouring points will be used in plane'...
 43 |             ' estimation, expect long run times, large ram usage and'...
 44 |             ' poor results near edges'],numNeighbours);
 45 |     end
 46 | end
 47 | 
 48 | if(nargin < 3)
 49 |     viewPoint = [];
 50 | end
 51 | if(isempty(viewPoint))
 52 |     viewPoint = [0,0,0];
 53 | else
 54 |     validateattributes(viewPoint, {'numeric'},{'size',[1,3]});
 55 | end
 56 | 
 57 | if(nargin < 4)
 58 |     dirLargest = [];
 59 | end
 60 | if(isempty(dirLargest))
 61 |     dirLargest = true;
 62 | else
 63 |     validateattributes(dirLargest, {'logical'},{'scalar'});
 64 | end
 65 | 
 66 | %% setup
 67 | 
 68 | %ensure inputs of correct type
 69 | points = double(points);
 70 | viewPoint = double(viewPoint);
 71 | 
 72 | %create kdtree
 73 | kdtreeobj = KDTreeSearcher(points,'distance','euclidean');
 74 | 
 75 | %get nearest neighbours
 76 | n = knnsearch(kdtreeobj,points,'k',(numNeighbours+1));
 77 | 
 78 | %remove self
 79 | n = n(:,2:end);
 80 | 
 81 | %find difference in position from neighbouring points
 82 | p = repmat(points(:,1:3),numNeighbours,1) - points(n(:),1:3);
 83 | p = reshape(p, size(points,1),numNeighbours,3);
 84 | 
 85 | %calculate values for covariance matrix
 86 | C = zeros(size(points,1),6);
 87 | C(:,1) = sum(p(:,:,1).*p(:,:,1),2);
 88 | C(:,2) = sum(p(:,:,1).*p(:,:,2),2);
 89 | C(:,3) = sum(p(:,:,1).*p(:,:,3),2);
 90 | C(:,4) = sum(p(:,:,2).*p(:,:,2),2);
 91 | C(:,5) = sum(p(:,:,2).*p(:,:,3),2);
 92 | C(:,6) = sum(p(:,:,3).*p(:,:,3),2);
 93 | C = C ./ numNeighbours;
 94 | 
 95 | %% normals and curvature calculation
 96 | 
 97 | normals = zeros(size(points));
 98 | curvature = zeros(size(points,1),1);
 99 | for i = 1:(size(points,1))
100 |     
101 |     %form covariance matrix
102 |     Cmat = [C(i,1) C(i,2) C(i,3);...
103 |         C(i,2) C(i,4) C(i,5);...
104 |         C(i,3) C(i,5) C(i,6)];  
105 |     
106 |     %get eigen values and vectors
107 |     [v,d] = eig(Cmat);
108 |     d = diag(d);
109 |     [lambda,k] = min(d);
110 |     
111 |     %store normals
112 |     normals(i,:) = v(:,k)';
113 |     
114 |     %store curvature
115 |     curvature(i) = lambda / (sum(d)+1e-6);
116 | end
117 | 
118 | %% flipping normals
119 | 
120 | %ensure normals point towards viewPoint
121 | points = points - repmat(viewPoint,size(points,1),1);
122 | if(dirLargest)
123 |     [~,idx] = max(abs(normals),[],2);
124 |     idx = (1:size(normals,1))' + (idx-1)*size(normals,1);
125 |     dir = normals(idx).*points(idx) > 0;
126 | else
127 |     dir = sum(normals.*points,2) > 0;
128 | end
129 | 
130 | normals(dir,:) = -normals(dir,:);
131 | 
132 | end


--------------------------------------------------------------------------------
/GraphReg/cuda/KernelMatching.ptx:
--------------------------------------------------------------------------------
  1 | //
  2 | // Generated by NVIDIA NVVM Compiler
  3 | //
  4 | // Compiler Build ID: CL-21124049
  5 | // Cuda compilation tools, release 8.0, V8.0.44
  6 | // Based on LLVM 3.4svn
  7 | //
  8 | 
  9 | .version 5.0
 10 | .target sm_52
 11 | .address_size 64
 12 | 
 13 | 	// .globl	_Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii
 14 | 
 15 | .visible .entry _Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii(
 16 | 	.param .u64 _Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_0,
 17 | 	.param .u64 _Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_1,
 18 | 	.param .u64 _Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_2,
 19 | 	.param .u64 _Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_3,
 20 | 	.param .u64 _Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_4,
 21 | 	.param .u64 _Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_5,
 22 | 	.param .u32 _Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_6,
 23 | 	.param .u64 _Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_7,
 24 | 	.param .u64 _Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_8,
 25 | 	.param .u64 _Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_9,
 26 | 	.param .u64 _Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_10,
 27 | 	.param .u64 _Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_11,
 28 | 	.param .u32 _Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_12,
 29 | 	.param .u32 _Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_13,
 30 | 	.param .u32 _Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_14
 31 | )
 32 | {
 33 | 	.reg .pred 	%p<16>;
 34 | 	.reg .f32 	%f<165>;
 35 | 	.reg .b32 	%r<32>;
 36 | 	.reg .f64 	%fd<5>;
 37 | 	.reg .b64 	%rd<61>;
 38 | 
 39 | 
 40 | 	ld.param.u64 	%rd27, [_Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_0];
 41 | 	ld.param.u64 	%rd19, [_Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_1];
 42 | 	ld.param.u64 	%rd28, [_Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_2];
 43 | 	ld.param.u64 	%rd20, [_Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_3];
 44 | 	ld.param.u64 	%rd29, [_Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_4];
 45 | 	ld.param.u64 	%rd21, [_Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_5];
 46 | 	ld.param.u32 	%r12, [_Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_6];
 47 | 	ld.param.u64 	%rd22, [_Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_7];
 48 | 	ld.param.u64 	%rd23, [_Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_8];
 49 | 	ld.param.u64 	%rd24, [_Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_9];
 50 | 	ld.param.u64 	%rd25, [_Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_10];
 51 | 	ld.param.u64 	%rd26, [_Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_11];
 52 | 	ld.param.u32 	%r13, [_Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_12];
 53 | 	ld.param.u32 	%r15, [_Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_13];
 54 | 	ld.param.u32 	%r14, [_Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_14];
 55 | 	cvta.to.global.u64 	%rd1, %rd29;
 56 | 	cvta.to.global.u64 	%rd2, %rd27;
 57 | 	cvta.to.global.u64 	%rd60, %rd28;
 58 | 	mov.u32 	%r1, %tid.x;
 59 | 	setp.ge.s32	%p1, %r1, %r15;
 60 | 	@%p1 bra 	BB0_18;
 61 | 
 62 | 	cvta.to.global.u64 	%rd30, %rd23;
 63 | 	cvta.to.global.u64 	%rd31, %rd19;
 64 | 	cvta.to.global.u64 	%rd32, %rd20;
 65 | 	cvt.s64.s32	%rd4, %r1;
 66 | 	mul.wide.s32 	%rd33, %r1, 4;
 67 | 	add.s64 	%rd34, %rd32, %rd33;
 68 | 	ld.global.u32 	%r2, [%rd34];
 69 | 	mul.lo.s32 	%r16, %r2, 3;
 70 | 	mul.wide.s32 	%rd35, %r16, 4;
 71 | 	add.s64 	%rd36, %rd31, %rd35;
 72 | 	ld.global.f32 	%f45, [%rd30];
 73 | 	ld.global.f32 	%f46, [%rd36];
 74 | 	ld.global.f32 	%f47, [%rd30+4];
 75 | 	ld.global.f32 	%f48, [%rd36+4];
 76 | 	mul.f32 	%f49, %f48, %f47;
 77 | 	fma.rn.f32 	%f50, %f46, %f45, %f49;
 78 | 	ld.global.f32 	%f51, [%rd30+8];
 79 | 	ld.global.f32 	%f52, [%rd36+8];
 80 | 	fma.rn.f32 	%f53, %f52, %f51, %f50;
 81 | 	ld.global.f32 	%f54, [%rd30+12];
 82 | 	add.f32 	%f1, %f54, %f53;
 83 | 	ld.global.f32 	%f55, [%rd30+16];
 84 | 	ld.global.f32 	%f56, [%rd30+20];
 85 | 	mul.f32 	%f57, %f48, %f56;
 86 | 	fma.rn.f32 	%f58, %f46, %f55, %f57;
 87 | 	ld.global.f32 	%f59, [%rd30+24];
 88 | 	fma.rn.f32 	%f60, %f52, %f59, %f58;
 89 | 	ld.global.f32 	%f61, [%rd30+28];
 90 | 	add.f32 	%f2, %f61, %f60;
 91 | 	ld.global.f32 	%f62, [%rd30+32];
 92 | 	ld.global.f32 	%f63, [%rd30+36];
 93 | 	mul.f32 	%f64, %f48, %f63;
 94 | 	fma.rn.f32 	%f65, %f46, %f62, %f64;
 95 | 	ld.global.f32 	%f66, [%rd30+40];
 96 | 	fma.rn.f32 	%f67, %f52, %f66, %f65;
 97 | 	ld.global.f32 	%f68, [%rd30+44];
 98 | 	add.f32 	%f3, %f68, %f67;
 99 | 	mov.f32 	%f164, 0f00000000;
100 | 	mov.f32 	%f163, %f164;
101 | 	mov.f32 	%f162, %f164;
102 | 	setp.lt.s32	%p2, %r14, 1;
103 | 	@%p2 bra 	BB0_17;
104 | 
105 | 	cvt.rn.f32.s32	%f4, %r12;
106 | 	setp.eq.s32	%p3, %r13, 0;
107 | 	mov.f32 	%f164, 0f00000000;
108 | 	mov.f32 	%f163, %f164;
109 | 	mov.f32 	%f162, %f164;
110 | 	mov.u32 	%r31, 0;
111 | 	@%p3 bra 	BB0_16;
112 | 
113 | 	cvta.to.global.u64 	%rd37, %rd21;
114 | 	mul.lo.s32 	%r19, %r2, %r12;
115 | 	mul.wide.s32 	%rd38, %r19, 4;
116 | 	add.s64 	%rd6, %rd37, %rd38;
117 | 	mov.u64 	%rd5, %rd6;
118 | 	mov.f32 	%f164, 0f00000000;
119 | 	mov.f32 	%f163, %f164;
120 | 	mov.f32 	%f162, %f164;
121 | 	mov.u32 	%r28, 0;
122 | 
123 | BB0_4:
124 | 	mul.wide.s32 	%rd39, %r28, 4;
125 | 	add.s64 	%rd40, %rd60, %rd39;
126 | 	ld.global.u32 	%r4, [%rd40];
127 | 	mul.lo.s32 	%r20, %r4, 3;
128 | 	mul.wide.s32 	%rd41, %r20, 4;
129 | 	add.s64 	%rd42, %rd2, %rd41;
130 | 	ld.global.f32 	%f75, [%rd42];
131 | 	sub.f32 	%f8, %f75, %f1;
132 | 	ld.global.f32 	%f76, [%rd42+4];
133 | 	sub.f32 	%f9, %f76, %f2;
134 | 	ld.global.f32 	%f77, [%rd42+8];
135 | 	sub.f32 	%f10, %f77, %f3;
136 | 	mul.f32 	%f78, %f9, %f9;
137 | 	fma.rn.f32 	%f79, %f8, %f8, %f78;
138 | 	fma.rn.f32 	%f80, %f10, %f10, %f79;
139 | 	sqrt.rn.f32 	%f11, %f80;
140 | 	setp.eq.s32	%p4, %r13, 2;
141 | 	@%p4 bra 	BB0_10;
142 | 	bra.uni 	BB0_5;
143 | 
144 | BB0_10:
145 | 	mov.f32 	%f158, 0f00000000;
146 | 	setp.lt.s32	%p9, %r12, 1;
147 | 	@%p9 bra 	BB0_13;
148 | 
149 | 	mul.lo.s32 	%r24, %r12, %r4;
150 | 	mul.wide.s32 	%rd44, %r24, 4;
151 | 	add.s64 	%rd59, %rd1, %rd44;
152 | 	mov.f32 	%f158, 0f00000000;
153 | 	mov.u32 	%r30, 0;
154 | 	mov.u64 	%rd58, %rd6;
155 | 
156 | BB0_12:
157 | 	mov.u64 	%rd13, %rd58;
158 | 	ld.global.f32 	%f102, [%rd13];
159 | 	ld.global.f32 	%f103, [%rd59];
160 | 	sub.f32 	%f104, %f103, %f102;
161 | 	abs.f32 	%f105, %f104;
162 | 	mul.f32 	%f106, %f105, %f105;
163 | 	abs.f32 	%f107, %f106;
164 | 	add.f32 	%f158, %f158, %f107;
165 | 	add.s64 	%rd59, %rd59, 4;
166 | 	add.s64 	%rd16, %rd13, 4;
167 | 	add.s32 	%r30, %r30, 1;
168 | 	setp.lt.s32	%p10, %r30, %r12;
169 | 	mov.u64 	%rd58, %rd16;
170 | 	@%p10 bra 	BB0_12;
171 | 
172 | BB0_13:
173 | 	sqrt.rn.f32 	%f108, %f4;
174 | 	div.rn.f32 	%f109, %f158, %f108;
175 | 	mov.f32 	%f110, 0f3F800000;
176 | 	sub.f32 	%f111, %f110, %f109;
177 | 	fma.rn.f32 	%f112, %f111, 0f40000000, 0fBF800000;
178 | 	mul.f32 	%f113, %f11, %f11;
179 | 	mul.f32 	%f114, %f11, %f113;
180 | 	cvt.f64.f32	%fd2, %f114;
181 | 	setp.lt.f64	%p11, %fd2, 0d3E7AD7F29ABCAF48;
182 | 	selp.f32	%f115, 0f33D6BF95, %f114, %p11;
183 | 	mul.f32 	%f116, %f8, %f112;
184 | 	div.rn.f32 	%f161, %f116, %f115;
185 | 	mul.f32 	%f117, %f9, %f112;
186 | 	div.rn.f32 	%f160, %f117, %f115;
187 | 	mul.f32 	%f118, %f10, %f112;
188 | 	div.rn.f32 	%f159, %f118, %f115;
189 | 	bra.uni 	BB0_15;
190 | 
191 | BB0_5:
192 | 	setp.ne.s32	%p5, %r13, 1;
193 | 	@%p5 bra 	BB0_14;
194 | 
195 | 	mov.f32 	%f157, 0f00000000;
196 | 	setp.lt.s32	%p6, %r12, 1;
197 | 	@%p6 bra 	BB0_9;
198 | 
199 | 	mul.lo.s32 	%r22, %r12, %r4;
200 | 	mul.wide.s32 	%rd43, %r22, 4;
201 | 	add.s64 	%rd57, %rd1, %rd43;
202 | 	mov.f32 	%f157, 0f00000000;
203 | 	mov.u32 	%r29, 0;
204 | 	mov.u64 	%rd56, %rd5;
205 | 
206 | BB0_8:
207 | 	mov.u64 	%rd8, %rd56;
208 | 	ld.global.f32 	%f83, [%rd8];
209 | 	ld.global.f32 	%f84, [%rd57];
210 | 	sub.f32 	%f85, %f84, %f83;
211 | 	abs.f32 	%f86, %f85;
212 | 	mul.f32 	%f87, %f86, %f86;
213 | 	abs.f32 	%f88, %f87;
214 | 	add.f32 	%f157, %f157, %f88;
215 | 	add.s64 	%rd57, %rd57, 4;
216 | 	add.s64 	%rd11, %rd8, 4;
217 | 	add.s32 	%r29, %r29, 1;
218 | 	setp.lt.s32	%p7, %r29, %r12;
219 | 	mov.u64 	%rd56, %rd11;
220 | 	@%p7 bra 	BB0_8;
221 | 
222 | BB0_9:
223 | 	sqrt.rn.f32 	%f89, %f4;
224 | 	div.rn.f32 	%f90, %f157, %f89;
225 | 	abs.f32 	%f91, %f90;
226 | 	mov.f32 	%f92, 0f3F800000;
227 | 	sub.f32 	%f93, %f92, %f91;
228 | 	mul.f32 	%f94, %f11, %f11;
229 | 	mul.f32 	%f95, %f11, %f94;
230 | 	cvt.f64.f32	%fd1, %f95;
231 | 	setp.lt.f64	%p8, %fd1, 0d3E7AD7F29ABCAF48;
232 | 	selp.f32	%f96, 0f33D6BF95, %f95, %p8;
233 | 	mul.f32 	%f97, %f8, %f93;
234 | 	div.rn.f32 	%f161, %f97, %f96;
235 | 	mul.f32 	%f98, %f9, %f93;
236 | 	div.rn.f32 	%f160, %f98, %f96;
237 | 	mul.f32 	%f99, %f10, %f93;
238 | 	div.rn.f32 	%f159, %f99, %f96;
239 | 	bra.uni 	BB0_15;
240 | 
241 | BB0_14:
242 | 	mul.f32 	%f119, %f11, %f11;
243 | 	mul.f32 	%f120, %f11, %f119;
244 | 	cvt.f64.f32	%fd3, %f120;
245 | 	setp.lt.f64	%p12, %fd3, 0d3E7AD7F29ABCAF48;
246 | 	selp.f32	%f121, 0f33D6BF95, %f120, %p12;
247 | 	div.rn.f32 	%f161, %f8, %f121;
248 | 	div.rn.f32 	%f160, %f9, %f121;
249 | 	div.rn.f32 	%f159, %f10, %f121;
250 | 
251 | BB0_15:
252 | 	add.f32 	%f164, %f164, %f161;
253 | 	add.f32 	%f163, %f163, %f160;
254 | 	add.f32 	%f162, %f162, %f159;
255 | 	add.s32 	%r28, %r28, 1;
256 | 	setp.lt.s32	%p13, %r28, %r14;
257 | 	@%p13 bra 	BB0_4;
258 | 	bra.uni 	BB0_17;
259 | 
260 | BB0_16:
261 | 	ld.global.u32 	%r25, [%rd60];
262 | 	mul.lo.s32 	%r26, %r25, 3;
263 | 	mul.wide.s32 	%rd45, %r26, 4;
264 | 	add.s64 	%rd46, %rd2, %rd45;
265 | 	ld.global.f32 	%f122, [%rd46];
266 | 	sub.f32 	%f123, %f122, %f1;
267 | 	ld.global.f32 	%f124, [%rd46+4];
268 | 	sub.f32 	%f125, %f124, %f2;
269 | 	ld.global.f32 	%f126, [%rd46+8];
270 | 	sub.f32 	%f127, %f126, %f3;
271 | 	mul.f32 	%f128, %f125, %f125;
272 | 	fma.rn.f32 	%f129, %f123, %f123, %f128;
273 | 	fma.rn.f32 	%f130, %f127, %f127, %f129;
274 | 	sqrt.rn.f32 	%f131, %f130;
275 | 	mul.f32 	%f132, %f131, %f131;
276 | 	mul.f32 	%f133, %f131, %f132;
277 | 	cvt.f64.f32	%fd4, %f133;
278 | 	setp.lt.f64	%p14, %fd4, 0d3E7AD7F29ABCAF48;
279 | 	selp.f32	%f134, 0f33D6BF95, %f133, %p14;
280 | 	div.rn.f32 	%f135, %f123, %f134;
281 | 	div.rn.f32 	%f136, %f125, %f134;
282 | 	div.rn.f32 	%f137, %f127, %f134;
283 | 	add.f32 	%f164, %f164, %f135;
284 | 	add.f32 	%f163, %f163, %f136;
285 | 	add.f32 	%f162, %f162, %f137;
286 | 	add.s64 	%rd60, %rd60, 4;
287 | 	add.s32 	%r31, %r31, 1;
288 | 	setp.lt.s32	%p15, %r31, %r14;
289 | 	@%p15 bra 	BB0_16;
290 | 
291 | BB0_17:
292 | 	cvta.to.global.u64 	%rd47, %rd26;
293 | 	cvta.to.global.u64 	%rd48, %rd25;
294 | 	cvta.to.global.u64 	%rd49, %rd22;
295 | 	cvta.to.global.u64 	%rd50, %rd24;
296 | 	mul.lo.s32 	%r27, %r1, 3;
297 | 	mul.wide.s32 	%rd51, %r27, 4;
298 | 	add.s64 	%rd52, %rd50, %rd51;
299 | 	st.global.f32 	[%rd52], %f164;
300 | 	st.global.f32 	[%rd52+4], %f163;
301 | 	st.global.f32 	[%rd52+8], %f162;
302 | 	ld.global.f32 	%f138, [%rd49];
303 | 	sub.f32 	%f139, %f1, %f138;
304 | 	ld.global.f32 	%f140, [%rd49+4];
305 | 	sub.f32 	%f141, %f2, %f140;
306 | 	ld.global.f32 	%f142, [%rd49+8];
307 | 	sub.f32 	%f143, %f3, %f142;
308 | 	mul.f32 	%f144, %f162, %f141;
309 | 	mul.f32 	%f145, %f163, %f143;
310 | 	sub.f32 	%f146, %f144, %f145;
311 | 	mul.f32 	%f147, %f164, %f143;
312 | 	mul.f32 	%f148, %f162, %f139;
313 | 	sub.f32 	%f149, %f147, %f148;
314 | 	mul.f32 	%f150, %f163, %f139;
315 | 	mul.f32 	%f151, %f164, %f141;
316 | 	sub.f32 	%f152, %f150, %f151;
317 | 	add.s64 	%rd53, %rd48, %rd51;
318 | 	st.global.f32 	[%rd53], %f146;
319 | 	st.global.f32 	[%rd53+4], %f149;
320 | 	st.global.f32 	[%rd53+8], %f152;
321 | 	mul.f32 	%f153, %f141, %f141;
322 | 	fma.rn.f32 	%f154, %f139, %f139, %f153;
323 | 	fma.rn.f32 	%f155, %f143, %f143, %f154;
324 | 	sqrt.rn.f32 	%f156, %f155;
325 | 	shl.b64 	%rd54, %rd4, 2;
326 | 	add.s64 	%rd55, %rd47, %rd54;
327 | 	st.global.f32 	[%rd55], %f156;
328 | 
329 | BB0_18:
330 | 	ret;
331 | }
332 | 
333 | 
334 | 


--------------------------------------------------------------------------------
/GraphReg/IterateOnGPU5.m:
--------------------------------------------------------------------------------
  1 | function [T] = IterateOnGPU5( p, q, feat,score,coolDown)
  2 | %================================================================
  3 | % Adopt the adaptive simulated annealing to optimize the registration
  4 | % process with faster convergence process
  5 | % Input:  p  q:   3xN   3xM    the model and the template point clouds
  6 | %        feat:   struct       the features of point clouds p and q
  7 | %        score:  struct       the score of point clouds p and q
  8 | 
  9 | % Output:  T:  4x4 array   the solved transformation matrix  
 10 | % The dynamical framework is based on 
 11 | % P. Jauer, I. Kuhlemann, R. Bruder, A. Schweikard, and F. Ernst,
 12 | %“Efficient registration of high-resolution feature enhanced point clouds,”
 13 | % IEEE Trans. Pattern Anal. Mach. Intell., vol. 41, no. 5, pp. 1102–1115,
 14 | % May 2019.
 15 | %==============================================================
 16 | 
 17 | % get the current matching method, i.e., the Coulomb’s law
 18 | matchingMethod = 2;
 19 | 
 20 | 
 21 | % features
 22 | Np=length(p);
 23 | Nq=length(q);
 24 | 
 25 | [rp,cp]=size(feat.p);
 26 | [rq,cq]=size(feat.q);
 27 | 
 28 | 
 29 | feat1=zeros(2,cp);
 30 | feat2=zeros(2,cq);
 31 | 
 32 | numFeat = min(rp, rq);
 33 | 
 34 | % scale all features within a range of [0,1]
 35 | for fcnt = 1:numFeat
 36 |     ft1 = feat.p(fcnt, :);
 37 |     ft2 = feat.q(fcnt, :);
 38 |     
 39 |     fmax = max(max(ft1), max(ft2));
 40 |     fmin = min(min(ft1), min(ft2));
 41 |     dist =(fmax - fmin);
 42 |     if(dist == 0)
 43 |         feat1(fcnt,:)=ft1 - fmin;
 44 |         feat2(fcnt,:)=ft2-fmin;
 45 |     else
 46 |        %make feat in [0,1]
 47 |         feat1(fcnt,:)=(ft1 - fmin)/dist;
 48 |         feat2(fcnt,:)=(ft2-fmin)/dist;
 49 |         
 50 |     end
 51 | end
 52 | 
 53 | 
 54 | % init the cuda device
 55 | % number of random points per iteration step
 56 | numRand = 100;
 57 | 
 58 | % if the number of random points is larger then the number of points,
 59 | % compare each single point
 60 | numPoints1 = Np;
 61 | numPoints2 = Nq;
 62 | 
 63 | %% scale the clouds onto a maximum edge size of 100 units
 64 | mP = max(abs(max(p,[],2) - min(p,[],2)));
 65 | mQ = max(abs(max(q,[],2) - min(q,[],2)));
 66 | 
 67 | % scaleFactor
 68 | sclFct = 100/max(mP,mQ);
 69 | 
 70 | p = p.*sclFct;
 71 | q = q.*sclFct;
 72 | 
 73 | %% init gpu kernel
 74 | kernel = parallel.gpu.CUDAKernel('KernelMatching.ptx', 'KernelMatching.cu', 'MatchingStep');
 75 | 
 76 | numCmp = round(max(1, numPoints1 * numRand/100)); % procent
 77 | numThrd = round(max(1, numPoints2 * numRand/100)); % procent
 78 | 
 79 | numThrd = min(kernel.MaxThreadsPerBlock, numThrd);
 80 | numCmp = min(numThrd, numCmp);
 81 | 
 82 | kernel.ThreadBlockSize = [kernel.MaxThreadsPerBlock,1,1];
 83 | kernel.GridSize = [ceil(numThrd/kernel.MaxThreadsPerBlock),1];
 84 | 
 85 | % forces computed on GPU
 86 | gF = gpuArray( single( zeros(1, numThrd*3) ));
 87 | % torques computed on GPU
 88 | gT = gpuArray( single( zeros(1, numThrd*3) ));
 89 | % the moment of inertia
 90 | gI = gpuArray( single( zeros(1, numThrd) ));
 91 | 
 92 | %% init point clouds
 93 | p1 = gpuArray( single( reshape(p, 1, []) ));
 94 | p2 = gpuArray( single( reshape(q, 1, []) ));
 95 | 
 96 | 
 97 | centroid2 = mean(q, 2);
 98 | centroid1 = mean(p, 2);
 99 | 
100 | 
101 | kdtree=KDTreeSearcher(feat2');
102 | [idx,~]=knnsearch(kdtree,feat1');
103 | 
104 | 
105 | 
106 | % transfer features to GPU memory
107 | gf1 = gpuArray( single( reshape(feat1, 1, []) ) );
108 | gf2 = gpuArray( single( reshape(feat2, 1, []) ) );
109 | 
110 | 
111 | %% Iteration parameter
112 | 
113 | % transformation matrix, init with centroid to centroid trafo
114 | M = TransMat((centroid1-centroid2));
115 | M = single(M);
116 | 
117 | 
118 | % temperature parameter for the adaptive simulated annealing 
119 | temperature = 0.5;
120 | 
121 | % vector to collect the kinetic energy per step (evaluation ?) ;
122 | oA = 0;%over Angle
123 | oT = 0;%over Translation
124 | 
125 | 
126 | if(1)
127 |    plotQ =  [q; ones(1,length(q))];
128 | end
129 | 
130 | 
131 | AcceptRate = 0.5;
132 | EvalsMax=200;%100 in default or larger iterations for better results
133 | 
134 | %% start iteation
135 | Ekin=1;
136 | for iter=1:1:EvalsMax    
137 |     iter  
138 |     %% get the forces
139 |     % randomly choosen indicies of the reference cloud
140 |     tmpIdx1=unique(randi(numPoints1, numCmp, 1));
141 |     i1=gpuArray(int32(tmpIdx1));
142 |     
143 |     tmpIdx2=idx(tmpIdx1,:);
144 |     
145 |     i2=gpuArray(int32(tmpIdx2));
146 | 
147 |     GM = gpuArray( reshape(M', 1, []) );
148 |    	
149 |     cent = M * [centroid2; 1];
150 |     c2 = gpuArray( cent(1:3) );
151 |     
152 |     [gF, gT, gI] = feval(kernel,...
153 |                         p1, p2,...
154 |                         i1, i2,...
155 |                         gf1, gf2,...
156 |                         numFeat, c2, GM,...
157 |                         gF, gT, gI,...
158 |                         matchingMethod,...
159 |                         length(i2), length(i1));
160 |                     
161 |              
162 |     %% resulting force, copy back to RAM (gather) for speed up
163 | 
164 |     f = reshape(gather(gF), 3, []);
165 |     F = sum(f, 2);
166 |     
167 |     % resulting torques
168 |     t = reshape(gather(gT), 3, []);
169 |     Torque = sum(t, 2);
170 |     
171 |     % resulting moment of inertia
172 |     I = sum(gather(gI));
173 |    
174 |     %% get the transformations
175 |     % angular accelration
176 |     alpha = Torque ./ I;
177 | 
178 |     angle = norm(alpha)/2;
179 |     
180 |     if(angle == 0)
181 |         rotAxis = [0 0 0]';
182 |     else
183 |         rotAxis = alpha / norm(angle);
184 |     end
185 |     
186 |     % linear acceleration 
187 |     a = F ./ numThrd;
188 | 
189 |     magnitude = norm(a)/2;
190 |     magnitude = max(1,magnitude);
191 |     transDir = a / norm(a);
192 |     
193 |             
194 |     %% simulated annealing
195 | 
196 |     % current kinetic energy
197 |     
198 |     Etrans = (numThrd/2) * magnitude^2;%1/2*M*v^2
199 |     Erot = (I/2) * angle^2;%1/2*I*w^2
200 |     
201 |     
202 |     
203 |     [temperature,AcceptRate, transition, Ekin] = AdaptSimulatedAnnealing(iter,EvalsMax,AcceptRate,temperature,...
204 |                                                          Ekin,...
205 |                                                          Etrans,...
206 |                                                          Erot,coolDown);
207 |                                                    
208 | 
209 |     if (transition)
210 |         angle = oA;
211 |         magnitude = oT;
212 |         
213 |         Ekin(end) = Ekin(end-1);
214 |     else
215 |         oA = angle;
216 |         oT = magnitude;
217 |         
218 |     end
219 | 
220 |     
221 |     %% compute the new tranformation matrix
222 |     ss = transDir * magnitude * temperature;
223 |     rr = deg2rad( rad2deg( angle ) * temperature);
224 | 
225 |     
226 |     
227 |     %% compute the transformation matrix
228 |     T = TransMat(ss);
229 |     CM = TransMat(-centroid2);
230 |     CP = TransMat( centroid2);
231 |     R = eye(4);
232 |     
233 |     if(rr ~= 0)
234 |         R = AxisAngle(rotAxis, rr);
235 |     end
236 |     
237 | 
238 |     tmpMat = M * CP * R * T * CM;% transformation composite
239 |     if(~isnan(tmpMat))
240 |         M = tmpMat;
241 |     end
242 |     
243 |     %%{
244 |     %% visualisation 
245 |     %if(args.Visualize)
246 |     if(1)
247 |         
248 |         figure(999);
249 | 
250 |         clf
251 |         hold on;
252 |         axis equal;
253 |         view(-21,12);
254 |         
255 |         tmpP2 = M * plotQ(:, 1:length(plotQ)) ;
256 |         
257 |         scale = 4;
258 |         
259 |         if( 1 ) 
260 |             plot3(p(1,1:scale:end), p(2,1:scale:end), p(3,1:scale:end),'bo');
261 |             plot3(tmpP2(1,1:scale:end), tmpP2(2,1:scale:end), tmpP2(3,1:scale:end),'r+'); 
262 |         else
263 |             scatter3(p(1,1:scale:end), p(2,1:scale:end), p(3,1:scale:end), 30, feat.p(1,1:scale:end), 'o');
264 |             scatter3(tmpP2(1,1:scale:end), tmpP2(2,1:scale:end), tmpP2(3,1:scale:end), 30, feat.q(1,1:scale:end), '*');
265 |         end
266 |         axis off;
267 |         set(gcf,'color','w');
268 |         pause(0.01);
269 |     end
270 | end
271 | 
272 | %% back scaling to orign size
273 | S = [1/sclFct 0 0 0; ...
274 |      0 1/sclFct 0 0; ...
275 |      0 0 1/sclFct 0; ...
276 |      0 0 0 1];
277 |  
278 |  tS = S * M(:,4);
279 | 
280 | outM = [M(:,1:3), tS];
281 | 
282 | T = outM;
283 | 
284 | %% clear graphics memory
285 | clear q1 q2 i1 i2 M GM gF gT gL;
286 | end                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         


--------------------------------------------------------------------------------
/GraphReg/cuda/CudaMath.cuh:
--------------------------------------------------------------------------------
  1 | ////////////////////////////////////////////////////////////////////////////////////////////////////
  2 | // file:	CudaMath.cuh
  3 | //
  4 | // summary:	cuda mathematic operations
  5 | ////////////////////////////////////////////////////////////////////////////////////////////////////
  6 | 
  7 | //#ifndef __CUDA_MATH_CUH__
  8 | //#define __CUDA_MATH_CUH__
  9 | #pragma once
 10 | 
 11 | #include <cuda.h>
 12 | #include <cuda_runtime.h>
 13 | #include <cuda_runtime_api.h>
 14 | #include <cmath>
 15 | 
 16 | /// <summary>	Defines an epsilon for computational accuracy. </summary>
 17 | #define CM_EPS 1e-6
 18 | 
 19 | /// <summary>	Defines PI. </summary>
 20 | #define CM_PI 3.1415926535897932384626433832795028841971693993751f
 21 | 
 22 | /// <summary>	Size of the float 4x4 matrix. </summary>
 23 | #define SIZE_OF_FLOAT_MAT4 sizeof(float) * 16
 24 | 
 25 | 
 26 | ////////////////////////////////////////////////////////////////////////////////////////////////////
 27 | /// <summary>	Creates a 4x4 identity. </summary>
 28 | ///
 29 | /// <remarks>	I. Kuhlemann, P. Jauer, 2014-10-27. </remarks>
 30 | ///
 31 | /// <param name="M">	[in,out] If non-null, the float* to process. </param>
 32 | ///
 33 | /// <returns>	true if it succeeds, false if it fails. </returns>
 34 | ////////////////////////////////////////////////////////////////////////////////////////////////////
 35 | template <typename T>
 36 | __device__ __forceinline__ bool mat4CreateIdentity(T* M) {
 37 | 
 38 | 	
 39 | 	/*if (sizeof(M) != SIZE_OF_FLOAT_MAT4)
 40 | 		return false;*/
 41 | 
 42 | 	M[0]  = 1.0; M[1]  = 0.0; M[2]  = 0.0; M[3]  = 0.0;
 43 | 	M[4]  = 0.0; M[5]  = 1.0; M[6]  = 0.0; M[7]  = 0.0;
 44 | 	M[8]  = 0.0; M[9]  = 0.0; M[10] = 1.0; M[11] = 0.0;
 45 | 	M[12] = 0.0; M[13] = 0.0; M[14] = 0.0; M[15] = 1.0;
 46 | 
 47 | 	return true;
 48 | }
 49 | 
 50 | ////////////////////////////////////////////////////////////////////////////////////////////////////
 51 | /// <summary>	Makes a exact copy of a 4x4 matrix. </summary>
 52 | ///
 53 | /// <remarks>	Philipp Jauer, 2014-12-15. </remarks>
 54 | ///
 55 | /// <param name="Dst">	[in,out] The copy destination. </param>
 56 | /// <param name="Src">	[in,out] The copy source. </param>
 57 | ///
 58 | /// <returns>	true if it succeeds, false if it fails. </returns>
 59 | ////////////////////////////////////////////////////////////////////////////////////////////////////
 60 | template <typename T>
 61 | __device__ __forceinline__ bool mat4Copy(T* Dst, T* Src) {
 62 | 
 63 | 	for (int i = 0; i < 16; i++)
 64 | 		Dst[i] = Src[i];
 65 | 
 66 | 	return true;
 67 | }
 68 | 
 69 | ////////////////////////////////////////////////////////////////////////////////////////////////////
 70 | /// <summary>	Creates a 4x4 matrix rotation about the x-axis. </summary>
 71 | ///
 72 | /// <remarks>	I. Kuhlemann, P. Jauer, 2014-10-27. </remarks>
 73 | ///
 74 | /// <param name="M">		[in,out] If non-null, the float* to process. </param>
 75 | /// <param name="angle">	The angle in radians. </param>
 76 | ///
 77 | /// <returns>	true if it succeeds, false if it fails. </returns>
 78 | ////////////////////////////////////////////////////////////////////////////////////////////////////
 79 | __device__ __forceinline__ bool mat4CreateRotX(float* M, float angle) {
 80 | 
 81 | 	/*if (sizeof(M) != SIZE_OF_FLOAT_MAT4)
 82 | 		return false;*/
 83 | 
 84 | 	float ct = cos(angle);
 85 | 	float st = sin(angle);
 86 | 
 87 | 	M[0]  = 1.0; M[1]  = 0.0; M[2]  = 0.0; M[3]  = 0.0;
 88 | 	M[4]  = 0.0; M[5]  =  ct; M[6]  = -st; M[7]  = 0.0;
 89 | 	M[8]  = 0.0; M[9]  =  st; M[10] =  ct; M[11] = 0.0;
 90 | 	M[12] = 0.0; M[13] = 0.0; M[14] = 0.0; M[15] = 1.0;
 91 | 
 92 | 	return true;
 93 | }
 94 | 
 95 | ////////////////////////////////////////////////////////////////////////////////////////////////////
 96 | /// <summary>	Creates a 4x4 matrix rotation about the y-axis. </summary>
 97 | ///
 98 | /// <remarks>	I. Kuhlemann, P. Jauer, 2014-10-27. </remarks>
 99 | ///
100 | /// <param name="M">		[in,out] If non-null, the float* to process. </param>
101 | /// <param name="angle">	The angle. </param>
102 | ///
103 | /// <returns>	true if it succeeds, false if it fails. </returns>
104 | ////////////////////////////////////////////////////////////////////////////////////////////////////
105 | __device__ __forceinline__ bool mat4CreateRotY(float* M, float angle) {
106 | 	
107 | 	/*if (sizeof(M) != SIZE_OF_FLOAT_MAT4)
108 | 		return false;*/
109 | 
110 | 	float ct = cos(angle);
111 | 	float st = sin(angle);
112 | 
113 | 	M[0]  =  ct; M[1]  = 0.0; M[2]  =  st; M[3]  = 0.0;
114 | 	M[4]  = 0.0; M[5]  = 1.0; M[6]  = 0.0; M[7]  = 0.0;
115 | 	M[8]  = -st; M[9]  = 0.0; M[10] =  ct; M[11] = 0.0;
116 | 	M[12] = 0.0; M[13] = 0.0; M[14] = 0.0; M[15] = 1.0;
117 | 
118 | 	return true;
119 | }
120 | 
121 | ////////////////////////////////////////////////////////////////////////////////////////////////////
122 | /// <summary>	Creates a 4x4 matrix rotation about the z-axis. </summary>
123 | ///
124 | /// <remarks>	I. Kuhlemann, P. Jauer, 2014-10-27. </remarks>
125 | ///
126 | /// <param name="M">		[in,out] If non-null, the float* to process. </param>
127 | /// <param name="angle">	The angle in radians. </param>
128 | ///
129 | /// <returns>	true if it succeeds, false if it fails. </returns>
130 | ////////////////////////////////////////////////////////////////////////////////////////////////////
131 | __device__ __forceinline__ bool mat4CreateRotZ(float* M, float angle){
132 | 
133 | 	/*if (sizeof(M) != SIZE_OF_FLOAT_MAT4)
134 | 		return false;*/
135 | 
136 | 	float ct = cos(angle);
137 | 	float st = sin(angle);
138 | 		
139 | 	M[0]  = ct;  M[1]  = -st; M[2]  = 0.0; M[3]  = 0.0;
140 | 	M[4]  = st;  M[5]  =  ct; M[6]  = 0.0; M[7]  = 0.0;
141 | 	M[8]  = 0.0; M[9]  = 0.0; M[10] = 1.0; M[11] = 0.0;
142 | 	M[12] = 0.0; M[13] = 0.0; M[14] = 0.0; M[15] = 1.0;
143 | 
144 | 	return true;
145 | }
146 | 
147 | ////////////////////////////////////////////////////////////////////////////////////////////////////
148 | /// <summary>	Multiplies two 4x4 matrices A and B, A*B = C. </summary>
149 | ///
150 | /// <remarks>	Philipp Jauer, 2014-10-23. </remarks>
151 | ///
152 | /// <param name="C">	[out] Resulting matrix C. </param>
153 | /// <param name="A">	[in] Left Matrix A. </param>
154 | /// <param name="B">	[in] Right Matrix B. </param>
155 | ///
156 | /// <returns>	true if it succeeds, false if it fails. </returns>
157 | ////////////////////////////////////////////////////////////////////////////////////////////////////
158 | __device__ __forceinline__ bool mat4Mult(float* C, float* A, float* B) {
159 | 
160 | 	/*if (sizeof(C) != SIZE_OF_FLOAT_MAT4 || sizeof(A) != SIZE_OF_FLOAT_MAT4 || sizeof(B) != SIZE_OF_FLOAT_MAT4)
161 | 		return false;*/
162 | 
163 | 	for (int i = 0; i < 4; i++) {
164 | 		for (int j = 0; j < 4; j++) {
165 | 			float sum = 0.0;
166 | 			for (int k = 0; k < 4; k++)
167 | 			{
168 | 				sum += A[i * 4 + k] * B[k * 4 + j];
169 | 			}
170 | 			C[i * 4 + j] = sum;
171 | 		}
172 | 	}
173 | 
174 | 	return true;
175 | }
176 | 
177 | ////////////////////////////////////////////////////////////////////////////////////////////////////
178 | /// <summary>	4x4 matrix inverse. </summary>
179 | ///
180 | /// <remarks>	I. Kuhlemann, P. Jauer, 2014-10-24. </remarks>
181 | ///
182 | /// <param name="M">	[out] Result of the inversion. </param>
183 | /// <param name="A">	[in]  Matrix to invert. </param>
184 | ///
185 | /// <returns>	true if it succeeds, false if it fails. </returns>
186 | ////////////////////////////////////////////////////////////////////////////////////////////////////
187 | __device__ __forceinline__ bool mat4Inv( float* A)
188 | {
189 | 	//if (sizeof(M) != SIZE_OF_FLOAT_MAT4 || sizeof(A) != SIZE_OF_FLOAT_MAT4)
190 | 	//	return false;
191 | 
192 | 
193 | 	double inv[16], det;
194 | 	int i;
195 | 
196 | 	inv[0] = A[5] * A[10] * A[15] - 
197 | 		A[5] * A[11] * A[14] -
198 | 		A[9] * A[6] * A[15] +
199 | 		A[9] * A[7] * A[14] +
200 | 		A[13] * A[6] * A[11] -
201 | 		A[13] * A[7] * A[10];
202 | 
203 | 	inv[4] = -A[4] * A[10] * A[15] +
204 | 		A[4] * A[11] * A[14] +
205 | 		A[8] * A[6] * A[15] -
206 | 		A[8] * A[7] * A[14] -
207 | 		A[12] * A[6] * A[11] +
208 | 		A[12] * A[7] * A[10];
209 | 
210 | 	inv[8] = A[4] * A[9] * A[15] -
211 | 		A[4] * A[11] * A[13] -
212 | 		A[8] * A[5] * A[15] +
213 | 		A[8] * A[7] * A[13] +
214 | 		A[12] * A[5] * A[11] -
215 | 		A[12] * A[7] * A[9];
216 | 
217 | 	inv[12] = -A[4] * A[9] * A[14] +
218 | 		A[4] * A[10] * A[13] +
219 | 		A[8] * A[5] * A[14] -
220 | 		A[8] * A[6] * A[13] -
221 | 		A[12] * A[5] * A[10] +
222 | 		A[12] * A[6] * A[9];
223 | 
224 | 	inv[1] = -A[1] * A[10] * A[15] +
225 | 		A[1] * A[11] * A[14] +
226 | 		A[9] * A[2] * A[15] -
227 | 		A[9] * A[3] * A[14] -
228 | 		A[13] * A[2] * A[11] +
229 | 		A[13] * A[3] * A[10];
230 | 
231 | 	inv[5] = A[0] * A[10] * A[15] -
232 | 		A[0] * A[11] * A[14] -
233 | 		A[8] * A[2] * A[15] +
234 | 		A[8] * A[3] * A[14] +
235 | 		A[12] * A[2] * A[11] -
236 | 		A[12] * A[3] * A[10];
237 | 
238 | 	inv[9] = -A[0] * A[9] * A[15] +
239 | 		A[0] * A[11] * A[13] +
240 | 		A[8] * A[1] * A[15] -
241 | 		A[8] * A[3] * A[13] -
242 | 		A[12] * A[1] * A[11] +
243 | 		A[12] * A[3] * A[9];
244 | 
245 | 	inv[13] = A[0] * A[9] * A[14] -
246 | 		A[0] * A[10] * A[13] -
247 | 		A[8] * A[1] * A[14] +
248 | 		A[8] * A[2] * A[13] +
249 | 		A[12] * A[1] * A[10] -
250 | 		A[12] * A[2] * A[9];
251 | 
252 | 	inv[2] = A[1] * A[6] * A[15] -
253 | 		A[1] * A[7] * A[14] -
254 | 		A[5] * A[2] * A[15] +
255 | 		A[5] * A[3] * A[14] +
256 | 		A[13] * A[2] * A[7] -
257 | 		A[13] * A[3] * A[6];
258 | 
259 | 	inv[6] = -A[0] * A[6] * A[15] +
260 | 		A[0] * A[7] * A[14] +
261 | 		A[4] * A[2] * A[15] -
262 | 		A[4] * A[3] * A[14] -
263 | 		A[12] * A[2] * A[7] +
264 | 		A[12] * A[3] * A[6];
265 | 
266 | 	inv[10] = A[0] * A[5] * A[15] -
267 | 		A[0] * A[7] * A[13] -
268 | 		A[4] * A[1] * A[15] +
269 | 		A[4] * A[3] * A[13] +
270 | 		A[12] * A[1] * A[7] -
271 | 		A[12] * A[3] * A[5];
272 | 
273 | 	inv[14] = -A[0] * A[5] * A[14] +
274 | 		A[0] * A[6] * A[13] +
275 | 		A[4] * A[1] * A[14] -
276 | 		A[4] * A[2] * A[13] -
277 | 		A[12] * A[1] * A[6] +
278 | 		A[12] * A[2] * A[5];
279 | 
280 | 	inv[3] = -A[1] * A[6] * A[11] +
281 | 		A[1] * A[7] * A[10] +
282 | 		A[5] * A[2] * A[11] -
283 | 		A[5] * A[3] * A[10] -
284 | 		A[9] * A[2] * A[7] +
285 | 		A[9] * A[3] * A[6];
286 | 
287 | 	inv[7] = A[0] * A[6] * A[11] -
288 | 		A[0] * A[7] * A[10] -
289 | 		A[4] * A[2] * A[11] +
290 | 		A[4] * A[3] * A[10] +
291 | 		A[8] * A[2] * A[7] -
292 | 		A[8] * A[3] * A[6];
293 | 
294 | 	inv[11] = -A[0] * A[5] * A[11] +
295 | 		A[0] * A[7] * A[9] +
296 | 		A[4] * A[1] * A[11] -
297 | 		A[4] * A[3] * A[9] -
298 | 		A[8] * A[1] * A[7] +
299 | 		A[8] * A[3] * A[5];
300 | 
301 | 	inv[15] = A[0] * A[5] * A[10] -
302 | 		A[0] * A[6] * A[9] -
303 | 		A[4] * A[1] * A[10] +
304 | 		A[4] * A[2] * A[9] +
305 | 		A[8] * A[1] * A[6] -
306 | 		A[8] * A[2] * A[5];
307 | 
308 | 	det = A[0] * inv[0] + A[1] * inv[4] + A[2] * inv[8] + A[3] * inv[12];
309 | 
310 | 	// if nearly zero
311 | 	if (det < CM_EPS)
312 | 		return false;
313 | 
314 | 	det = 1.0 / det;
315 | 
316 | 	for (i = 0; i < 16; i++)
317 | 		A[i] = inv[i] * det;
318 | 
319 | 	return true;
320 | }
321 | 
322 | ////////////////////////////////////////////////////////////////////////////////////////////////////
323 | /// <summary>	Normalizes the given vector of size 2. </summary>
324 | ///
325 | /// <remarks>	I. Kuhlemann, P. Jauer, 2014-10-27. </remarks>
326 | ///
327 | /// <param name="vector">	[in,out] If non-null, the vector. </param>
328 | ///
329 | /// <returns>	true if it succeeds, false if it fails. </returns>
330 | ////////////////////////////////////////////////////////////////////////////////////////////////////
331 | __device__ __forceinline__ bool normalizeVec2(float* vector) {
332 | 
333 | 	//int size = sizeof(vector) / sizeof(float);
334 | 
335 | 	float n = sqrtf(vector[0] * vector[0]
336 | 					+ vector[1] * vector[1]);
337 | 	vector[0] /= n;
338 | 	vector[1] /= n;
339 | 	
340 | 	return true;
341 | }
342 | 
343 | ////////////////////////////////////////////////////////////////////////////////////////////////////
344 | /// <summary>	Normalize vector of size 3. </summary>
345 | ///
346 | /// <remarks>	I. Kuhlemann, P. Jauer, 2014-10-27. </remarks>
347 | ///
348 | /// <param name="vector">	[in,out] If non-null, the vector. </param>
349 | ///
350 | /// <returns>	true if it succeeds, false if it fails. </returns>
351 | ////////////////////////////////////////////////////////////////////////////////////////////////////
352 | __device__ __forceinline__ bool normalizeVec3(float* vector) {
353 | 
354 | 	float n = sqrtf(vector[0] * vector[0]
355 | 		+ vector[1] * vector[1]
356 | 		+ vector[2] * vector[2]);
357 | 	vector[0] /= n;
358 | 	vector[1] /= n;
359 | 	vector[2] /= n;
360 | 
361 | 	return true;
362 | }
363 | 
364 | ////////////////////////////////////////////////////////////////////////////////////////////////////
365 | /// <summary>	Normalize vector of size 4. </summary>
366 | ///
367 | /// <remarks>	I. Kuhlemann, P. Jauer, 2014-10-27. </remarks>
368 | ///
369 | /// <param name="vector">	[in,out] If non-null, the vector. </param>
370 | ///
371 | /// <returns>	true if it succeeds, false if it fails. </returns>
372 | ////////////////////////////////////////////////////////////////////////////////////////////////////
373 | __device__ __forceinline__ bool normalizeVec4(float* vector) {
374 | 	float n = sqrtf(vector[0] * vector[0]
375 | 		+ vector[1] * vector[1]
376 | 		+ vector[2] * vector[2]
377 | 		+ vector[3] * vector[3]);
378 | 
379 | 	vector[0] /= n;
380 | 	vector[1] /= n;
381 | 	vector[2] /= n;
382 | 	vector[3] /= n;
383 | 
384 | 	return true;
385 | }
386 | 
387 | ////////////////////////////////////////////////////////////////////////////////////////////////////
388 | /// <summary>	Cross product of a vector of size 3. </summary>
389 | ///
390 | /// <remarks>	I. Kuhlemann, P. Jauer, 2014-10-27. </remarks>
391 | ///
392 | /// <param name="c">	[in,out] If non-null, the float* to process. </param>
393 | /// <param name="a">	[in,out] If non-null, the float* to process. </param>
394 | /// <param name="b">	[in,out] If non-null, the float* to process. </param>
395 | ///
396 | /// <returns>	true if it succeeds, false if it fails. </returns>
397 | ////////////////////////////////////////////////////////////////////////////////////////////////////
398 | template <typename T>
399 | __device__ __forceinline__ bool crossVec3(T* c, T* a, T* b) {
400 | 
401 | 	c[0] = a[1] * b[2] - a[2] * b[1];
402 | 	c[1] = a[2] * b[0] - a[0] * b[2];
403 | 	c[2] = a[0] * b[1] - a[1] * b[0];
404 | 
405 | 	return true;
406 | }
407 | 
408 | // TODO:: Hier unbedingt nochmal nachdenken und optimieren
409 | __device__ __forceinline__ bool mat4ExtractRotation(float* M, float* A, float* B) {
410 | 	
411 | 	float x[3];
412 | 	x[0] = A[3] - B[3];
413 | 	x[1] = A[7] - B[7];
414 | 	x[2] = A[11] - B[11];
415 | 
416 | 	normalizeVec3(x);
417 | 	
418 | 	int cnt = 0;
419 | 	for (int i = 0; i < 3; i++)
420 | 	{
421 | 		if (fabsf(x[i]) > CM_EPS)
422 | 			cnt++;
423 | 	}
424 | 
425 | 	switch (cnt)
426 | 	{
427 | 	case 1:
428 | 		if (fabsf(x[0]) > CM_EPS){
429 | 
430 | 			float beta = (x[0] < 0) ? -CM_PI/2.0 : CM_PI/2.0;
431 | 			
432 | 			float ct1 = cos(beta);
433 | 			float st1 = sin(beta);
434 | 			float ct2 = cos(CM_PI);
435 | 			float st2 = sin(CM_PI);
436 | 
437 | 			float T1[16] = {ct2,  -st2, 0.0, 0.0,
438 | 							st2,   ct2, 0.0, 0.0,
439 | 							0.0, 0.0, 1.0, 0.0,
440 | 							0.0, 0.0, 0.0, 1.0 };
441 | 			
442 | 			float T2[16] = {  ct1, 0.0, st1,  0.0,
443 | 							 0.0, 1.0, 0.0, 0.0,
444 | 							 -st1, 0.0, ct1,  0.0,
445 | 							 0.0, 0.0, 0.0, 1.0 };
446 | 
447 | 			mat4Mult(M, T1, T2);
448 | 
449 | 		} else if (fabsf(x[0]) > CM_EPS) {
450 | 				
451 | 			float gamma = (x[1] < 0) ? CM_PI / 2.0 : -CM_PI / 2.0;
452 | 
453 | 			float ct1 = cos(CM_PI/2);
454 | 			float st1 = sin(CM_PI/2);
455 | 			float ct2 = cos(gamma);
456 | 			float st2 = sin(gamma);
457 | 
458 | 			float T1[16] = { ct2, -st2, 0.0, 0.0,
459 | 				st2, ct2, 0.0, 0.0,
460 | 				0.0, 0.0, 1.0, 0.0,
461 | 				0.0, 0.0, 0.0, 1.0 };
462 | 
463 | 			float T2[16] = { ct1, 0.0, st1, 0.0,
464 | 				0.0, 1.0, 0.0, 0.0,
465 | 				-st1, 0.0, ct1, 0.0,
466 | 				0.0, 0.0, 0.0, 1.0 };
467 | 
468 | 			mat4Mult(M, T1, T2);
469 | 
470 | 		} else {
471 | 			
472 | 			float ct2 = cos(CM_PI);
473 | 			float st2 = sin(CM_PI);
474 | 
475 | 			M[0] = ct2; M[1] = -st2; M[2] = 0.0; M[3] = 0.0;
476 | 			M[4] = st2; M[5] = ct2; M[6] = 0.0; M[7] = 0.0;
477 | 			M[8] = 0.0; M[9] = 0.0; M[10] = 1.0; M[11] = 0.0;
478 | 			M[12] = 0.0; M[13] = 0.0; M[14] = 0.0; M[15] = 1.0;
479 | 		}
480 | 		break;
481 | 	case 2:
482 | 		if (fabsf(x[0]) <= CM_EPS){
483 | 			// vector u
484 | 			float u[3];
485 | 			u[0] = x[2];
486 | 			u[2] = 0.0;
487 | 			u[1] = (-(x[0] * u[0]) - (x[2] * u[2])) / x[1];
488 | 
489 | 			normalizeVec3(u);
490 | 
491 | 			// vector v
492 | 			float v[3];
493 | 			crossVec3(v, x, u);
494 | 
495 | 			float ct1 = cos(-CM_PI / 2);
496 | 			float st1 = sin(-CM_PI / 2);
497 | 			float ct2 = cos(CM_PI);
498 | 			float st2 = sin(CM_PI);
499 | 
500 | 			//TODO:: hier 3x3 wegen schnelligkeit
501 | 			float T1[16] = { x[0], u[0], v[0], 0.0,
502 | 				x[1], u[1], v[1], 0.0,
503 | 				x[2], u[2], v[2], 0.0,
504 | 				0.0, 0.0, 0.0, 1.0 };
505 | 
506 | 			float T2[16] = { ct1, 0.0, st1, 0.0,
507 | 				0.0, 1.0, 0.0, 0.0,
508 | 				-st1, 0.0, ct1, 0.0,
509 | 				0.0, 0.0, 0.0, 1.0 };
510 | 
511 | 			float T3[16];
512 | 			mat4Mult(T3, T1, T2);
513 | 
514 | 			T1[0] = ct2; T1[1] = -st2; T1[2] = 0.0; T1[3] = 0.0;
515 | 			T1[4] = st2; T1[5] = ct2; T1[6] = 0.0; T1[7] = 0.0;
516 | 			T1[8] = 0.0; T1[9] = 0.0; T1[10] = 1.0; T1[11] = 0.0;
517 | 			T1[12] = 0.0; T1[13] = 0.0; T1[14] = 0.0; T1[15] = 1.0;
518 | 
519 | 			mat4Mult(M, T3, T1);
520 | 		}
521 | 		else if (fabsf(x[1]) <= CM_EPS) {
522 | 			// vector u
523 | 			float u[3];
524 | 			u[1] = fabsf(x[2]);
525 | 			u[2] = 0.0;
526 | 			u[0] = (-(x[1] * u[1]) - (x[2] * u[2])) / x[0];
527 | 
528 | 			normalizeVec3(u);
529 | 
530 | 			// vector v
531 | 			float v[3];
532 | 			crossVec3(v, x, u);
533 | 
534 | 			float ct1 = cos(-CM_PI / 2);
535 | 			float st1 = sin(-CM_PI / 2);
536 | 			float ct2 = cos(CM_PI);
537 | 			float st2 = sin(CM_PI);
538 | 
539 | 			//TODO:: hier 3x3 wegen schnelligkeit
540 | 			float T1[16] = { x[0], u[0], v[0], 0.0,
541 | 				x[1], u[1], v[1], 0.0,
542 | 				x[2], u[2], v[2], 0.0,
543 | 				0.0, 0.0, 0.0, 1.0 };
544 | 
545 | 			float T2[16] = { ct1, 0.0, st1, 0.0,
546 | 				0.0, 1.0, 0.0, 0.0,
547 | 				-st1, 0.0, ct1, 0.0,
548 | 				0.0, 0.0, 0.0, 1.0 };
549 | 
550 | 			float T3[16];
551 | 			mat4Mult(T3, T1, T2);
552 | 
553 | 			T1[0] = ct2; T1[1] = -st2; T1[2] = 0.0; T1[3] = 0.0;
554 | 			T1[4] = st2; T1[5] = ct2; T1[6] = 0.0; T1[7] = 0.0;
555 | 			T1[8] = 0.0; T1[9] = 0.0; T1[10] = 1.0; T1[11] = 0.0;
556 | 			T1[12] = 0.0; T1[13] = 0.0; T1[14] = 0.0; T1[15] = 1.0;
557 | 
558 | 			mat4Mult(M, T3, T1);
559 | 		}
560 | 		else {
561 | 			// vector u
562 | 			float u[3];
563 | 			u[0] = 0.0;
564 | 			u[2] = x[1];
565 | 			//u[1] = (-(x[2] * u[2]) - (x[1] * u[2])) / x[1];
566 | 			u[1] = 0.0;
567 | 
568 | 			normalizeVec3(u);
569 | 
570 | 			// vector v
571 | 			float v[3];
572 | 			crossVec3(v, x, u);
573 | 
574 | 			float ct1 = cos(-CM_PI / 2);
575 | 			float st1 = sin(-CM_PI / 2);
576 | 
577 | 			//TODO:: hier 3x3 wegen schnelligkeit
578 | 			float T1[16] = { x[0], u[0], v[0], 0.0,
579 | 				x[1], u[1], v[1], 0.0,
580 | 				x[2], u[2], v[2], 0.0,
581 | 				0.0, 0.0, 0.0, 1.0 };
582 | 
583 | 			float T2[16] = { ct1, 0.0, st1, 0.0,
584 | 				0.0, 1.0, 0.0, 0.0,
585 | 				-st1, 0.0, ct1, 0.0,
586 | 				0.0, 0.0, 0.0, 1.0 };
587 | 
588 | 			float T3[16];
589 | 			mat4Mult(T3, T1, T2);
590 | 
591 | 			T1[0] = ct1; T1[1] = -st1; T1[2] = 0.0; T1[3] = 0.0;
592 | 			T1[4] = st1; T1[5] = ct1; T1[6] = 0.0; T1[7] = 0.0;
593 | 			T1[8] = 0.0; T1[9] = 0.0; T1[10] = 1.0; T1[11] = 0.0;
594 | 			T1[12] = 0.0; T1[13] = 0.0; T1[14] = 0.0; T1[15] = 1.0;
595 | 
596 | 			mat4Mult(M, T3, T1);
597 | 		}
598 | 		break;
599 | 	case 3:
600 | 		{
601 | 			// vector u
602 | 			float u[3];
603 | 			u[0] = x[2];
604 | 			u[2] = 0.0;
605 | 			u[1] = (-(x[0] * u[0]) - (x[2] * u[2])) / x[1];
606 | 		
607 | 			normalizeVec3(u);
608 | 
609 | 			// vector v
610 | 			float v[3];
611 | 			crossVec3(v, x, u);
612 | 
613 | 			float ct1 = cos(-CM_PI / 2);
614 | 			float st1 = sin(-CM_PI / 2);
615 | 			float ct2 = cos(CM_PI);
616 | 			float st2 = sin(CM_PI);
617 | 
618 | 			//TODO:: hier 3x3 wegen schnelligkeit
619 | 			float T1[16] = {x[0], u[0], v[0], 0.0,
620 | 							x[1], u[1], v[1], 0.0,
621 | 							x[2], u[2], v[2], 0.0,
622 | 							0.0,  0.0,  0.0,  1.0};
623 | 
624 | 			float T2[16] = {ct1, 0.0, st1, 0.0,
625 | 							0.0, 1.0, 0.0, 0.0,
626 | 							-st1, 0.0, ct1, 0.0,
627 | 							0.0, 0.0, 0.0, 1.0 };
628 | 
629 | 			float T3[16];
630 | 			mat4Mult(T3, T1, T2);
631 | 
632 | 			T1[0] = ct2; T1[1] = -st2; T1[2] = 0.0; T1[3] = 0.0;
633 | 			T1[4] = st2; T1[5] = ct2; T1[6] = 0.0; T1[7] = 0.0;
634 | 			T1[8] = 0.0; T1[9] = 0.0; T1[10] = 1.0; T1[11] = 0.0;
635 | 			T1[12] = 0.0; T1[13] = 0.0; T1[14] = 0.0; T1[15] = 1.0;
636 | 
637 | 			mat4Mult(M, T3, T1);
638 | 		
639 | 			break;
640 | 		}
641 | 	default:
642 | 		// fuck off state
643 | 		break;
644 | 	}
645 | 
646 | 	return true;
647 | }
648 | 
649 | //#endif // __CUDA_MATH_CUH


--------------------------------------------------------------------------------