├── GraphReg
├── input.png
├── result.png
├── input_clean.png
├── result_clean.png
├── TransMat.m
├── AxisAngle.m
├── readme.txt
├── computeVariationAndFeaturePoint.m
├── cuda
│ ├── MatchingMethods.cuh
│ ├── KernelMatching.cu
│ ├── KernelMatching.ptx
│ └── CudaMath.cuh
├── SalientFeature.m
├── GraphReg.m
├── AdaptSimulatedAnnealing.m
├── AdaptiveSimulatedAnnealingOptimization.m
├── findPointNormals.m
└── IterateOnGPU5.m
└── README.md
/GraphReg/input.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zikai1/GraphReg/HEAD/GraphReg/input.png
--------------------------------------------------------------------------------
/GraphReg/result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zikai1/GraphReg/HEAD/GraphReg/result.png
--------------------------------------------------------------------------------
/GraphReg/input_clean.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zikai1/GraphReg/HEAD/GraphReg/input_clean.png
--------------------------------------------------------------------------------
/GraphReg/result_clean.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zikai1/GraphReg/HEAD/GraphReg/result_clean.png
--------------------------------------------------------------------------------
/GraphReg/TransMat.m:
--------------------------------------------------------------------------------
1 | function [ M ] = TransMat( vector )
2 | %TRANSMAT Generates a homogenuous 4x4-matrix representing a translation
3 | % about the vector (x,y,z)
4 | %
5 | %input:
6 | % vector the translation vector (x,y,z)
7 | %
8 | %return:
9 | % M the matrix
10 |
11 | M = [1 0 0 vector(1); ...
12 | 0 1 0 vector(2); ...
13 | 0 0 1 vector(3); ...
14 | 0 0 0 1];
15 | end
16 |
17 |
--------------------------------------------------------------------------------
/GraphReg/AxisAngle.m:
--------------------------------------------------------------------------------
1 | function [ T ] = AxisAngle( axis, angle )
2 | %AXISANGLE Convertes axis angle representation to a 4x4 homogenuous
3 | %rotation matrix
4 | %
5 |
6 | c = cos(angle);
7 | s = sin(angle);
8 | t = 1.0 - c;
9 |
10 | axis = axis / norm(axis);
11 | x = axis(1);
12 | y = axis(2);
13 | z = axis(3);
14 |
15 | T = [(x*x*t + c), (x*y*t - z*s), (x*z*t + y*s), 0;...
16 | (y*x*t + z*s), (y*y*t + c), (y*z*t - x*s), 0;...
17 | (z*x*t - y*s), (z*y*t + x*s), (z*z*t + c), 0;...
18 | 0, 0, 0, 1];
19 | end
20 |
21 |
--------------------------------------------------------------------------------
/GraphReg/readme.txt:
--------------------------------------------------------------------------------
1 | This work aims to improve the robustness and convergence process with respect to dynamical point cloud registration methods.
2 |
3 | The designed method is a local registration framework and is an effective alternative to ICP.
4 |
5 | We use the graph signal processing theory to describe the local geometry features, i.e., the point response intensity and the local geometry variations, through which we can remove outliers and attain invariants to rigid transformations.
6 |
7 | Run "GraphReg.m" to see demo examples.
8 |
9 | There are several parameters that can be adjusted for better registration results or faster convergence process, such as "cool_down".
10 |
11 |
12 | If you find any question, please do not hesitate to contact me
13 | myzhao@baai.ac.cn
14 |
15 |
16 | If you find our work useful, please cite
17 | M. Zhao, L. Ma, X. Jia, D. -M. Yan and T. Huang, "GraphReg: Dynamical Point Cloud Registration With Geometry-Aware Graph Signal Processing," in IEEE Transactions on Image Processing, vol. 31, pp. 7449-7464, 2022.
--------------------------------------------------------------------------------
/GraphReg/computeVariationAndFeaturePoint.m:
--------------------------------------------------------------------------------
1 | function [pointScore,featScore] = computeVariationAndFeaturePoint(coords, geoFeat,K)
2 | %==========================
3 | % Compute the point response intensity and the graph gradient w.r.t. normals and curvatures.
4 | %Input:
5 | % coords: Nx3 array point coordinate
6 | % geoFeat: Nx4 point normal and curvature
7 | % K: K nearest neighbors
8 | %
9 | % Output:
10 | % pointScore: Nx1 the response of each data point with respect to local...
11 | % variation
12 | % featScore: Nx2 the difference between adjacent normals and curvature
13 | %==========================
14 |
15 | z = single( coords );
16 | N = size(z, 1);
17 |
18 |
19 | %----------determine the radius and build the adjacency matrix
20 |
21 |
22 | [~, dist] = knnsearch( coords, coords( round( linspace(1,N, 20)) ,:), 'k', K, 'distance', 'euclidean');
23 | radius = max( dist(:, K) );
24 |
25 | % find the K nearest points in z of each point in z
26 | [idx, dist] = knnsearch( z, z, 'k', K, 'distance', 'euclidean');
27 | dist(:,1)=[];
28 | idx(:,1)=[];
29 |
30 | %build the adjacency matrix
31 | tmp=exp( -(dist./(radius*0.5) ).^2);
32 | weight=tmp./sum(tmp,2);
33 |
34 |
35 |
36 | weightPointSum=zeros(N,3);
37 | normalDiff=zeros(N,3);
38 |
39 |
40 | %--------------------compute the featScore
41 | for i=1:N
42 | % near index of current point
43 | nearInx=idx(i,:);
44 | %extract current normal and near normal
45 | currentNormal=geoFeat(i,1:3);
46 | nearPoint=z(nearInx,:);
47 | nearNormal=geoFeat(nearInx,1:3);
48 |
49 | % reshape near weight by K
50 | nearWeight=repmat(weight(i,:)',1,3);
51 |
52 | % difference of current point and normal with its nieghborhood
53 | weightPointSum(i,:)=sum(nearPoint.*nearWeight,1);
54 |
55 | % the second calculation manner
56 | normalDiffSquare=sum((repmat(currentNormal,K-1,1)-nearNormal).^2,2);
57 | normalDiff(i,:)=sqrt(nearWeight(:,1)')*normalDiffSquare;
58 | end
59 |
60 | %record scores of point response intensity and local geometry variation
61 | pointScore=sum((z-weightPointSum).^2,2);
62 | featScore=[normalDiff(:,1) geoFeat(:,4)];
--------------------------------------------------------------------------------
/GraphReg/cuda/MatchingMethods.cuh:
--------------------------------------------------------------------------------
1 | #include "CudaMath.cuh"
2 |
3 | __device__ void Gravity(const float* p1, const float* p2, float* force)
4 | {
5 | float r[3];
6 | r[0] = p1[0] - p2[0];
7 | r[1] = p1[1] - p2[1];
8 | r[2] = p1[2] - p2[2];
9 |
10 | float dist = sqrt(r[0]*r[0] + r[1]*r[1] + r[2]*r[2]);
11 |
12 | dist = dist*dist*dist;
13 |
14 | if(dist < 0.0000001)
15 | dist = 0.0000001;
16 |
17 |
18 | force[0] = r[0] / dist;
19 | force[1] = r[1] / dist;
20 | force[2] = r[2] / dist;
21 |
22 | }
23 |
24 | __device__ void Coulomb(const float* p1, const float* p2, const int numFeat, const float* f1, int idx1, const float* f2, int idx2, float* force)
25 | {
26 | float r[3];
27 | r[0] = p1[0] - p2[0];
28 | r[1] = p1[1] - p2[1];
29 | r[2] = p1[2] - p2[2];
30 |
31 | float dist = sqrt(r[0]*r[0] + r[1]*r[1] + r[2]*r[2]);
32 |
33 | float cDist = 0.f;
34 |
35 | for(int i = 0; i < numFeat; i++)
36 | {
37 | float fd = abs(f1[idx1*numFeat + i] - f2[idx2*numFeat + i]);
38 | cDist += abs(fd*fd);
39 | }
40 |
41 | cDist = cDist / sqrt((float)numFeat);
42 |
43 | float c = 1-abs(cDist);
44 |
45 | dist = dist*dist*dist;
46 |
47 | //c = c*c*c;
48 |
49 | if(dist < 0.0000001)
50 | dist = 0.0000001;
51 |
52 |
53 | force[0] = c * r[0] / dist;
54 | force[1] = c * r[1] / dist;
55 | force[2] = c * r[2] / dist;
56 | }
57 |
58 | __device__ void CoulombPM(const float* p1, const float* p2, const int numFeat, const float* f1, int idx1, const float* f2, int idx2, float* force)
59 | {
60 | float r[3];
61 | r[0] = p1[0] - p2[0];
62 | r[1] = p1[1] - p2[1];
63 | r[2] = p1[2] - p2[2];
64 |
65 | float dist = sqrt(r[0]*r[0] + r[1]*r[1] + r[2]*r[2]);
66 |
67 | float cDist = 0.f;
68 | for(int i = 0; i < numFeat; i++)
69 | {
70 | float fd = abs(f1[idx1*numFeat + i] - f2[idx2*numFeat + i]);
71 | cDist += abs(fd*fd);
72 | }
73 | cDist = cDist / sqrt((float)numFeat);
74 | float c = 2 * (1 - cDist) - 1;
75 |
76 | dist = dist*dist*dist;
77 |
78 | //c = c*c*c;
79 |
80 | if(dist < 0.0000001)
81 | dist = 0.0000001;
82 |
83 | force[0] = c * r[0] / dist;
84 | force[1] = c * r[1] / dist;
85 | force[2] = c * r[2] / dist;
86 | }
--------------------------------------------------------------------------------
/GraphReg/SalientFeature.m:
--------------------------------------------------------------------------------
1 | function [pt,feature_score,struct_tensor]=SalientFeature(pc,num,removeOut)
2 | %========================================
3 | % Compute the point response intensity and the local geometry variation
4 | % Input:
5 | % pc the point cloud
6 | % num the downsampling rate of the salient points
7 | % removeOut if removing outliers according to the x84 statistics
8 | % Output:
9 | % pt the downsampled point cloud with salient feature points
10 | % feature_score the point response intensity
11 | % struct_tensor the local geometry variation
12 | %
13 | %========================================
14 |
15 | coordinate=pc.Location;
16 |
17 | %------------compute normal and curvature for each data point----------
18 | [Nor,Cur]=findPointNormals(coordinate,10,[0,0,10],true);
19 |
20 | geoFeat=[Nor Cur];
21 |
22 | %-----------point response intensity (pointScore) and local feature variation (featScore)-----------------
23 | [pointScore,featScore] = computeVariationAndFeaturePoint(coordinate, geoFeat,10);
24 |
25 | nanInx=isnan(pointScore);
26 | featScore(nanInx,:)=[];
27 | pointScore(nanInx,:)=[];
28 | coordinate(nanInx,:)=[];
29 |
30 | N = size(pointScore,1);
31 |
32 |
33 | %----------------sample round(N/num) points------------------
34 | s = RandStream('mlfg6331_64'); % set random number
35 |
36 |
37 | if removeOut
38 | % use robust statistics x84 rule to remove scattered outliers
39 | x84_rule=median(abs(pointScore-repmat(median(pointScore),N,1)));
40 | M=find(pointScore<=5.2*x84_rule);%5.2 in default or smaller values to remove more outliers
41 |
42 | else
43 | % random sampling
44 | % use graph filter h(A)=I-A
45 | M = datasample(s,1:N, round(N/num), 'Replace', false,'Weights', pointScore(:,1) );
46 | end
47 |
48 |
49 | feature_data=[coordinate(M,1), coordinate(M,2), coordinate(M, 3)];
50 | feature_score=pointScore(M,:);
51 | struct_tensor=featScore(M,:);
52 |
53 |
54 | %----------sampled points are stored as point cloud---------------
55 | pt=pointCloud(feature_data);
56 |
57 |
58 | if ~removeOut
59 | %---------no sampling
60 | pt=pc;
61 | feature_score=pointScore;
62 | struct_tensor=featScore;
63 | end
64 |
65 |
66 |
67 |
68 |
--------------------------------------------------------------------------------
/GraphReg/GraphReg.m:
--------------------------------------------------------------------------------
1 | %===========================
2 | % GraphReg: Dynamical point cloud registration with geometry-aware graph signal
3 | % processing
4 | % GraphReg is a local registration framework aiming to produce robust and
5 | % accurate results through the graph signal processing theory and the adaptive simulated annealing mechanism.
6 | % M. Zhao, L. Ma, X. Jia, D. -M. Yan and T. Huang,
7 | %"GraphReg: Dynamical Point Cloud Registration With Geometry-Aware Graph Signal Processing,"
8 | % IEEE Transactions on Image Processing, vol. 31, pp. 7449-7464, 2022.
9 | %==========================
10 |
11 | clear;
12 | clc;
13 | close all;
14 |
15 | %------read the input point clouds
16 | file1='.\data\bun000.ply';
17 | file2='.\data\bun045.ply';
18 |
19 | % file1='.\data\dragonStandRight_0.ply';
20 | % file2='.\data\dragonStandRight_24.ply';
21 |
22 | % add outliers
23 | file1='.\data\bun000_outlier_0.1.ply';
24 | file2='.\data\bun045_outlier_0.1.ply';
25 |
26 | % file1='.\data\bun000_outlier_0.5.ply';
27 | % file2='.\data\bun045_outlier_0.5.ply';
28 |
29 |
30 |
31 | tgt=pcread(file1);
32 | src=pcread(file2);
33 |
34 | %------downsample and show the point clouds
35 | tgt=pcdownsample(tgt,'gridAverage',0.001);
36 | src=pcdownsample(src,'gridAverage',0.001);
37 |
38 | figure;
39 | pcshowpair(src,tgt);
40 | title("Input point clouds");
41 |
42 |
43 | %------compute the point intensity (scoreP,scoreQ) and the local geometric feature (featP, featQ)
44 | [tgt2,scoreP,featP]=SalientFeature(tgt,10,true);% 10 in general
45 | [src2,scoreQ,featQ]=SalientFeature(src,10,true);
46 |
47 |
48 | tgtPt=tgt2.Location;
49 | srcPt=src2.Location;
50 |
51 |
52 | tgtPt=tgtPt';
53 | srcPt=srcPt';
54 |
55 | fp=featP';
56 | fq=featQ';
57 |
58 |
59 | %-----------start registration----------------
60 | % add features and score to a struct
61 | feat = struct('p', fp, 'q', fq);
62 | score=struct('pScore',scoreP','qScore',scoreQ');
63 |
64 |
65 | addpath('cuda');
66 |
67 |
68 | cool_down=0.9;%0.9 in default, adjust cool_down for better results or faster convergence process such as 0.8:0.02:0.98;
69 |
70 |
71 | [T] = AdaptiveSimulatedAnnealingOptimization(tgtPt, srcPt, feat,score,cool_down);
72 |
73 |
74 | tform=affine3d(T');
75 |
76 | src2tgt=pctransform(src,tform);
77 |
78 |
79 | figure;
80 | pcshowpair(src2tgt,tgt);
81 | title("Registration results");
82 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # GraphReg (TIP 2022)
2 |
3 |
4 | |
5 | |
6 | |
7 | |
8 |
9 |
10 |
11 | ## 1. Motivation
12 | This work aims to improve the robustness and convergence process with respect to currently dynamical point cloud registration methods.
13 |
14 | The designed method is a local registration framework and is an effective alternative to ICP.
15 |
16 | We use the graph signal processing theory to describe the local geometry features, i.e., the point response intensity and the local geometry variations, through which we can remove outliers and attain invariants to rigid transformations.
17 |
18 | ## 2. Useage
19 |
20 | `Run "GraphReg.m" to see demo examples.`
21 |
22 | There are several parameters that can be adjusted for better registration results or faster convergence process:
23 | + **cool_down**: Smaller values typically result in faster convergence but with fluctuations, whereas larger ones ensure more accurate registrations. The suggested interval scope empirically attaining good results is [0.8, 0.98].
24 | + **$\alpha$**: Although we can fix $\alpha=5.2$ to remove outliers for most test settings, smaller $\alpha$ will further improve the robusteness when there are a large percentage of outliers.
25 |
26 | ## 3. Contact
27 | If you have any question, please do not hesitate to contact myzhao@baai.ac.cn
28 |
29 | ## 4. Citation
30 | If you find this implementation useful for your research, please cite:
31 | + M. Zhao, L. Ma, X. Jia, D. -M. Yan and T. Huang, "GraphReg: Dynamical Point Cloud Registration With Geometry-Aware Graph Signal Processing," in IEEE Transactions on Image Processing, vol. 31, pp. 7449-7464, 2022, doi: 10.1109/TIP.2022.3223793.
32 | + P. Jauer, I. Kuhlemann, R. Bruder, A. Schweikard and F. Ernst, "Efficient Registration of High-Resolution Feature Enhanced Point Clouds," in IEEE Transactions on Pattern Analysis and Machine Intelligence, vol. 41, no. 5, pp. 1102-1115, 1 May 2019, doi: 10.1109/TPAMI.2018.2831670.
33 |
--------------------------------------------------------------------------------
/GraphReg/AdaptSimulatedAnnealing.m:
--------------------------------------------------------------------------------
1 | function [ temperature,AcceptRate,transition, Ekin ] = AdaptSimulatedAnnealing(iter,EvalsMax,AcceptRate,temperature, Ekin, Etrans, Erot,cool_down)
2 |
3 |
4 | transition = 0;
5 |
6 | cEkin = log(Etrans + Erot)/log(10);
7 | Ekin = [Ekin cEkin];%cEkin: current kinetic energy
8 |
9 | if(length(Ekin) > 3)
10 |
11 | %% case 3: avoid large steps of both energies
12 | oE = Ekin(end - 1);%last kinect over E
13 | cE = Ekin(end);%current kinect over E
14 |
15 | dE = cE - oE;
16 |
17 | if (dE<0)
18 | AcceptRate = 1/500*(499*AcceptRate + 1);
19 | else
20 | p = min(1, exp(-dE / temperature));
21 | if(rand < p)
22 | AcceptRate = 1/500*(499*AcceptRate + 1);
23 | else
24 | transition = 1;
25 | AcceptRate = 1/500*(499*AcceptRate);
26 | end
27 |
28 | end
29 |
30 |
31 | if iter/EvalsMax < 0.15
32 | LamRate = 0.44 + 0.56 * 560^(-1*iter/EvalsMax/0.15);
33 | else
34 | if iter/EvalsMax < 0.65
35 | LamRate = 0.44;
36 | else
37 | LamRate = 0.44 * 440^- (((iter/EvalsMax)-0.65)/0.35);
38 | end
39 | end
40 |
41 | %adaptive update temperature
42 | if AcceptRate > LamRate
43 | temperature = 0.9*temperature;
44 | else
45 | temperature = temperature /cool_down; % cool_down can be adjusted for better results
46 | end
47 | end
48 | end
49 |
50 |
51 |
--------------------------------------------------------------------------------
/GraphReg/cuda/KernelMatching.cu:
--------------------------------------------------------------------------------
1 | #include "MatchingMethods.cuh"
2 |
3 | __device__ void GetForce(int matchingMethod, const float* p1, const float* p2, const int numFeat, const float* f1, int id1, const float* f2, int id2, float* force)
4 | {
5 | switch (matchingMethod)
6 | {
7 | // Gravitation
8 | case 0:
9 | {
10 | Gravity(p1, p2, force);
11 | break;
12 | }
13 | case 1:
14 | {
15 | Coulomb(p1, p2, numFeat, f1, id1, f2, id2, force);
16 | break;
17 | }
18 | case 2:
19 | {
20 | CoulombPM(p1, p2, numFeat, f1, id1, f2, id2, force);
21 | break;
22 | }
23 | default:
24 | {
25 | Gravity(p1, p2, force);
26 | break;
27 | }
28 | }
29 |
30 | }
31 |
32 | __global__ void MatchingStep( const float* cloud1, const float* cloud2,
33 | const int* index1, const int* index2,
34 | const float* feat1, const float* feat2,
35 | const int numFeat, const float* centroid2,
36 | const float* M,
37 | float* force, float* torque, float* momentInt,
38 | const int matchingMethod,
39 | const int numPoints, const int numSteps)
40 | {
41 | int tID = threadIdx.x;
42 |
43 | if(tID >= numPoints)
44 | return;
45 |
46 | int idx2 = index2[tID];
47 |
48 | float p2[3];
49 | p2[0] = cloud2[idx2 * 3 + 0];
50 | p2[1] = cloud2[idx2 * 3 + 1];
51 | p2[2] = cloud2[idx2 * 3 + 2];
52 |
53 | // apply the current transformation to the points of cloud2 and the centroid 2
54 | // transform the cloud2
55 | float tp2[3];
56 | tp2[0] = M[0]*p2[0] + M[1]*p2[1] + M[2]*p2[2] + M[3];
57 | tp2[1] = M[4]*p2[0] + M[5]*p2[1] + M[6]*p2[2] + M[7];
58 | tp2[2] = M[8]*p2[0] + M[9]*p2[1] + M[10]*p2[2] + M[11];
59 |
60 | float f[3];
61 | f[0] = 0;
62 | f[1] = 0;
63 | f[2] = 0;
64 |
65 | for(int i = 0; i < numSteps; i++)
66 | {
67 | //int idx1 = index1[tID];
68 |
69 | int idx1 = index1[i];
70 |
71 | float p1[3];
72 | p1[0] = cloud1[idx1 * 3 + 0];
73 | p1[1] = cloud1[idx1 * 3 + 1];
74 | p1[2] = cloud1[idx1 * 3 + 2];
75 |
76 | float fDir[3];
77 |
78 | GetForce(matchingMethod, p1, tp2, numFeat, feat1, idx1, feat2, idx2, fDir);
79 |
80 | f[0] += fDir[0];
81 | f[1] += fDir[1];
82 | f[2] += fDir[2];
83 |
84 | }
85 |
86 | force[tID * 3 + 0] = f[0] ;
87 | force[tID * 3 + 1] = f[1] ;
88 | force[tID * 3 + 2] = f[2] ;
89 |
90 | float p2c[3];
91 | p2c[0] = tp2[0] - centroid2[0];
92 | p2c[1] = tp2[1] - centroid2[1];
93 | p2c[2] = tp2[2] - centroid2[2];
94 |
95 | float tDir[3];
96 | crossVec3(tDir, p2c, f);
97 |
98 | torque[tID * 3 + 0] = tDir[0];
99 | torque[tID * 3 + 1] = tDir[1];
100 | torque[tID * 3 + 2] = tDir[2];
101 |
102 | // the moment of inertia with the mass = 1
103 | momentInt[tID] = sqrt( p2c[0]*p2c[0] + p2c[1]*p2c[1] + p2c[2]*p2c[2] );
104 |
105 | }
--------------------------------------------------------------------------------
/GraphReg/AdaptiveSimulatedAnnealingOptimization.m:
--------------------------------------------------------------------------------
1 | function [ T] = AdaptiveSimulatedAnnealingOptimization( p, q, features,score,cool_down)
2 | %-------------adaptive simulated annealing
3 | [T] = IterateOnGPU5(p, q, features,score,cool_down);
4 | end
5 |
6 |
--------------------------------------------------------------------------------
/GraphReg/findPointNormals.m:
--------------------------------------------------------------------------------
1 | function [ normals, curvature ] = findPointNormals(points, numNeighbours, viewPoint, dirLargest)
2 | %FINDPOINTNORMALS Estimates the normals of a sparse set of n 3d points by
3 | % using a set of the closest neighbours to approximate a plane.
4 | %
5 | % Required Inputs:
6 | % points- nx3 set of 3d points (x,y,z)
7 | %
8 | % Optional Inputs: (will give default values on empty array [])
9 | % numNeighbours- number of neighbouring points to use in plane fitting
10 | % (default 9)
11 | % viewPoint- location all normals will point towards (default [0,0,0])
12 | % dirLargest- use only the largest component of the normal in determining
13 | % its direction wrt the viewPoint (generally provides a more stable
14 | % estimation of planes near the viewPoint, default true)
15 | %
16 | % Outputs:
17 | % normals- nx3 set of normals (nx,ny,nz)
18 | % curvature- nx1 set giving the curvature
19 | %
20 | % References-
21 | % The implementation closely follows the method given at
22 | % http://pointclouds.org/documentation/tutorials/normal_estimation.php
23 | % This code was used in generating the results for the journal paper
24 | % Multi-modal sensor calibration using a gradient orientation measure
25 | % http://www.zjtaylor.com/welcome/download_pdf?pdf=JFR2013.pdf
26 | %
27 | % This code was written by Zachary Taylor
28 | % zacharyjeremytaylor@gmail.com
29 | % http://www.zjtaylor.com
30 |
31 | %% check inputs
32 | validateattributes(points, {'numeric'},{'ncols',3});
33 |
34 | if(nargin < 2)
35 | numNeighbours = [];
36 | end
37 | if(isempty(numNeighbours))
38 | numNeighbours = 9;
39 | else
40 | validateattributes(numNeighbours, {'numeric'},{'scalar','positive'});
41 | if(numNeighbours > 100)
42 | warning(['%i neighbouring points will be used in plane'...
43 | ' estimation, expect long run times, large ram usage and'...
44 | ' poor results near edges'],numNeighbours);
45 | end
46 | end
47 |
48 | if(nargin < 3)
49 | viewPoint = [];
50 | end
51 | if(isempty(viewPoint))
52 | viewPoint = [0,0,0];
53 | else
54 | validateattributes(viewPoint, {'numeric'},{'size',[1,3]});
55 | end
56 |
57 | if(nargin < 4)
58 | dirLargest = [];
59 | end
60 | if(isempty(dirLargest))
61 | dirLargest = true;
62 | else
63 | validateattributes(dirLargest, {'logical'},{'scalar'});
64 | end
65 |
66 | %% setup
67 |
68 | %ensure inputs of correct type
69 | points = double(points);
70 | viewPoint = double(viewPoint);
71 |
72 | %create kdtree
73 | kdtreeobj = KDTreeSearcher(points,'distance','euclidean');
74 |
75 | %get nearest neighbours
76 | n = knnsearch(kdtreeobj,points,'k',(numNeighbours+1));
77 |
78 | %remove self
79 | n = n(:,2:end);
80 |
81 | %find difference in position from neighbouring points
82 | p = repmat(points(:,1:3),numNeighbours,1) - points(n(:),1:3);
83 | p = reshape(p, size(points,1),numNeighbours,3);
84 |
85 | %calculate values for covariance matrix
86 | C = zeros(size(points,1),6);
87 | C(:,1) = sum(p(:,:,1).*p(:,:,1),2);
88 | C(:,2) = sum(p(:,:,1).*p(:,:,2),2);
89 | C(:,3) = sum(p(:,:,1).*p(:,:,3),2);
90 | C(:,4) = sum(p(:,:,2).*p(:,:,2),2);
91 | C(:,5) = sum(p(:,:,2).*p(:,:,3),2);
92 | C(:,6) = sum(p(:,:,3).*p(:,:,3),2);
93 | C = C ./ numNeighbours;
94 |
95 | %% normals and curvature calculation
96 |
97 | normals = zeros(size(points));
98 | curvature = zeros(size(points,1),1);
99 | for i = 1:(size(points,1))
100 |
101 | %form covariance matrix
102 | Cmat = [C(i,1) C(i,2) C(i,3);...
103 | C(i,2) C(i,4) C(i,5);...
104 | C(i,3) C(i,5) C(i,6)];
105 |
106 | %get eigen values and vectors
107 | [v,d] = eig(Cmat);
108 | d = diag(d);
109 | [lambda,k] = min(d);
110 |
111 | %store normals
112 | normals(i,:) = v(:,k)';
113 |
114 | %store curvature
115 | curvature(i) = lambda / (sum(d)+1e-6);
116 | end
117 |
118 | %% flipping normals
119 |
120 | %ensure normals point towards viewPoint
121 | points = points - repmat(viewPoint,size(points,1),1);
122 | if(dirLargest)
123 | [~,idx] = max(abs(normals),[],2);
124 | idx = (1:size(normals,1))' + (idx-1)*size(normals,1);
125 | dir = normals(idx).*points(idx) > 0;
126 | else
127 | dir = sum(normals.*points,2) > 0;
128 | end
129 |
130 | normals(dir,:) = -normals(dir,:);
131 |
132 | end
--------------------------------------------------------------------------------
/GraphReg/cuda/KernelMatching.ptx:
--------------------------------------------------------------------------------
1 | //
2 | // Generated by NVIDIA NVVM Compiler
3 | //
4 | // Compiler Build ID: CL-21124049
5 | // Cuda compilation tools, release 8.0, V8.0.44
6 | // Based on LLVM 3.4svn
7 | //
8 |
9 | .version 5.0
10 | .target sm_52
11 | .address_size 64
12 |
13 | // .globl _Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii
14 |
15 | .visible .entry _Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii(
16 | .param .u64 _Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_0,
17 | .param .u64 _Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_1,
18 | .param .u64 _Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_2,
19 | .param .u64 _Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_3,
20 | .param .u64 _Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_4,
21 | .param .u64 _Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_5,
22 | .param .u32 _Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_6,
23 | .param .u64 _Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_7,
24 | .param .u64 _Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_8,
25 | .param .u64 _Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_9,
26 | .param .u64 _Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_10,
27 | .param .u64 _Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_11,
28 | .param .u32 _Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_12,
29 | .param .u32 _Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_13,
30 | .param .u32 _Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_14
31 | )
32 | {
33 | .reg .pred %p<16>;
34 | .reg .f32 %f<165>;
35 | .reg .b32 %r<32>;
36 | .reg .f64 %fd<5>;
37 | .reg .b64 %rd<61>;
38 |
39 |
40 | ld.param.u64 %rd27, [_Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_0];
41 | ld.param.u64 %rd19, [_Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_1];
42 | ld.param.u64 %rd28, [_Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_2];
43 | ld.param.u64 %rd20, [_Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_3];
44 | ld.param.u64 %rd29, [_Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_4];
45 | ld.param.u64 %rd21, [_Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_5];
46 | ld.param.u32 %r12, [_Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_6];
47 | ld.param.u64 %rd22, [_Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_7];
48 | ld.param.u64 %rd23, [_Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_8];
49 | ld.param.u64 %rd24, [_Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_9];
50 | ld.param.u64 %rd25, [_Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_10];
51 | ld.param.u64 %rd26, [_Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_11];
52 | ld.param.u32 %r13, [_Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_12];
53 | ld.param.u32 %r15, [_Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_13];
54 | ld.param.u32 %r14, [_Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_14];
55 | cvta.to.global.u64 %rd1, %rd29;
56 | cvta.to.global.u64 %rd2, %rd27;
57 | cvta.to.global.u64 %rd60, %rd28;
58 | mov.u32 %r1, %tid.x;
59 | setp.ge.s32 %p1, %r1, %r15;
60 | @%p1 bra BB0_18;
61 |
62 | cvta.to.global.u64 %rd30, %rd23;
63 | cvta.to.global.u64 %rd31, %rd19;
64 | cvta.to.global.u64 %rd32, %rd20;
65 | cvt.s64.s32 %rd4, %r1;
66 | mul.wide.s32 %rd33, %r1, 4;
67 | add.s64 %rd34, %rd32, %rd33;
68 | ld.global.u32 %r2, [%rd34];
69 | mul.lo.s32 %r16, %r2, 3;
70 | mul.wide.s32 %rd35, %r16, 4;
71 | add.s64 %rd36, %rd31, %rd35;
72 | ld.global.f32 %f45, [%rd30];
73 | ld.global.f32 %f46, [%rd36];
74 | ld.global.f32 %f47, [%rd30+4];
75 | ld.global.f32 %f48, [%rd36+4];
76 | mul.f32 %f49, %f48, %f47;
77 | fma.rn.f32 %f50, %f46, %f45, %f49;
78 | ld.global.f32 %f51, [%rd30+8];
79 | ld.global.f32 %f52, [%rd36+8];
80 | fma.rn.f32 %f53, %f52, %f51, %f50;
81 | ld.global.f32 %f54, [%rd30+12];
82 | add.f32 %f1, %f54, %f53;
83 | ld.global.f32 %f55, [%rd30+16];
84 | ld.global.f32 %f56, [%rd30+20];
85 | mul.f32 %f57, %f48, %f56;
86 | fma.rn.f32 %f58, %f46, %f55, %f57;
87 | ld.global.f32 %f59, [%rd30+24];
88 | fma.rn.f32 %f60, %f52, %f59, %f58;
89 | ld.global.f32 %f61, [%rd30+28];
90 | add.f32 %f2, %f61, %f60;
91 | ld.global.f32 %f62, [%rd30+32];
92 | ld.global.f32 %f63, [%rd30+36];
93 | mul.f32 %f64, %f48, %f63;
94 | fma.rn.f32 %f65, %f46, %f62, %f64;
95 | ld.global.f32 %f66, [%rd30+40];
96 | fma.rn.f32 %f67, %f52, %f66, %f65;
97 | ld.global.f32 %f68, [%rd30+44];
98 | add.f32 %f3, %f68, %f67;
99 | mov.f32 %f164, 0f00000000;
100 | mov.f32 %f163, %f164;
101 | mov.f32 %f162, %f164;
102 | setp.lt.s32 %p2, %r14, 1;
103 | @%p2 bra BB0_17;
104 |
105 | cvt.rn.f32.s32 %f4, %r12;
106 | setp.eq.s32 %p3, %r13, 0;
107 | mov.f32 %f164, 0f00000000;
108 | mov.f32 %f163, %f164;
109 | mov.f32 %f162, %f164;
110 | mov.u32 %r31, 0;
111 | @%p3 bra BB0_16;
112 |
113 | cvta.to.global.u64 %rd37, %rd21;
114 | mul.lo.s32 %r19, %r2, %r12;
115 | mul.wide.s32 %rd38, %r19, 4;
116 | add.s64 %rd6, %rd37, %rd38;
117 | mov.u64 %rd5, %rd6;
118 | mov.f32 %f164, 0f00000000;
119 | mov.f32 %f163, %f164;
120 | mov.f32 %f162, %f164;
121 | mov.u32 %r28, 0;
122 |
123 | BB0_4:
124 | mul.wide.s32 %rd39, %r28, 4;
125 | add.s64 %rd40, %rd60, %rd39;
126 | ld.global.u32 %r4, [%rd40];
127 | mul.lo.s32 %r20, %r4, 3;
128 | mul.wide.s32 %rd41, %r20, 4;
129 | add.s64 %rd42, %rd2, %rd41;
130 | ld.global.f32 %f75, [%rd42];
131 | sub.f32 %f8, %f75, %f1;
132 | ld.global.f32 %f76, [%rd42+4];
133 | sub.f32 %f9, %f76, %f2;
134 | ld.global.f32 %f77, [%rd42+8];
135 | sub.f32 %f10, %f77, %f3;
136 | mul.f32 %f78, %f9, %f9;
137 | fma.rn.f32 %f79, %f8, %f8, %f78;
138 | fma.rn.f32 %f80, %f10, %f10, %f79;
139 | sqrt.rn.f32 %f11, %f80;
140 | setp.eq.s32 %p4, %r13, 2;
141 | @%p4 bra BB0_10;
142 | bra.uni BB0_5;
143 |
144 | BB0_10:
145 | mov.f32 %f158, 0f00000000;
146 | setp.lt.s32 %p9, %r12, 1;
147 | @%p9 bra BB0_13;
148 |
149 | mul.lo.s32 %r24, %r12, %r4;
150 | mul.wide.s32 %rd44, %r24, 4;
151 | add.s64 %rd59, %rd1, %rd44;
152 | mov.f32 %f158, 0f00000000;
153 | mov.u32 %r30, 0;
154 | mov.u64 %rd58, %rd6;
155 |
156 | BB0_12:
157 | mov.u64 %rd13, %rd58;
158 | ld.global.f32 %f102, [%rd13];
159 | ld.global.f32 %f103, [%rd59];
160 | sub.f32 %f104, %f103, %f102;
161 | abs.f32 %f105, %f104;
162 | mul.f32 %f106, %f105, %f105;
163 | abs.f32 %f107, %f106;
164 | add.f32 %f158, %f158, %f107;
165 | add.s64 %rd59, %rd59, 4;
166 | add.s64 %rd16, %rd13, 4;
167 | add.s32 %r30, %r30, 1;
168 | setp.lt.s32 %p10, %r30, %r12;
169 | mov.u64 %rd58, %rd16;
170 | @%p10 bra BB0_12;
171 |
172 | BB0_13:
173 | sqrt.rn.f32 %f108, %f4;
174 | div.rn.f32 %f109, %f158, %f108;
175 | mov.f32 %f110, 0f3F800000;
176 | sub.f32 %f111, %f110, %f109;
177 | fma.rn.f32 %f112, %f111, 0f40000000, 0fBF800000;
178 | mul.f32 %f113, %f11, %f11;
179 | mul.f32 %f114, %f11, %f113;
180 | cvt.f64.f32 %fd2, %f114;
181 | setp.lt.f64 %p11, %fd2, 0d3E7AD7F29ABCAF48;
182 | selp.f32 %f115, 0f33D6BF95, %f114, %p11;
183 | mul.f32 %f116, %f8, %f112;
184 | div.rn.f32 %f161, %f116, %f115;
185 | mul.f32 %f117, %f9, %f112;
186 | div.rn.f32 %f160, %f117, %f115;
187 | mul.f32 %f118, %f10, %f112;
188 | div.rn.f32 %f159, %f118, %f115;
189 | bra.uni BB0_15;
190 |
191 | BB0_5:
192 | setp.ne.s32 %p5, %r13, 1;
193 | @%p5 bra BB0_14;
194 |
195 | mov.f32 %f157, 0f00000000;
196 | setp.lt.s32 %p6, %r12, 1;
197 | @%p6 bra BB0_9;
198 |
199 | mul.lo.s32 %r22, %r12, %r4;
200 | mul.wide.s32 %rd43, %r22, 4;
201 | add.s64 %rd57, %rd1, %rd43;
202 | mov.f32 %f157, 0f00000000;
203 | mov.u32 %r29, 0;
204 | mov.u64 %rd56, %rd5;
205 |
206 | BB0_8:
207 | mov.u64 %rd8, %rd56;
208 | ld.global.f32 %f83, [%rd8];
209 | ld.global.f32 %f84, [%rd57];
210 | sub.f32 %f85, %f84, %f83;
211 | abs.f32 %f86, %f85;
212 | mul.f32 %f87, %f86, %f86;
213 | abs.f32 %f88, %f87;
214 | add.f32 %f157, %f157, %f88;
215 | add.s64 %rd57, %rd57, 4;
216 | add.s64 %rd11, %rd8, 4;
217 | add.s32 %r29, %r29, 1;
218 | setp.lt.s32 %p7, %r29, %r12;
219 | mov.u64 %rd56, %rd11;
220 | @%p7 bra BB0_8;
221 |
222 | BB0_9:
223 | sqrt.rn.f32 %f89, %f4;
224 | div.rn.f32 %f90, %f157, %f89;
225 | abs.f32 %f91, %f90;
226 | mov.f32 %f92, 0f3F800000;
227 | sub.f32 %f93, %f92, %f91;
228 | mul.f32 %f94, %f11, %f11;
229 | mul.f32 %f95, %f11, %f94;
230 | cvt.f64.f32 %fd1, %f95;
231 | setp.lt.f64 %p8, %fd1, 0d3E7AD7F29ABCAF48;
232 | selp.f32 %f96, 0f33D6BF95, %f95, %p8;
233 | mul.f32 %f97, %f8, %f93;
234 | div.rn.f32 %f161, %f97, %f96;
235 | mul.f32 %f98, %f9, %f93;
236 | div.rn.f32 %f160, %f98, %f96;
237 | mul.f32 %f99, %f10, %f93;
238 | div.rn.f32 %f159, %f99, %f96;
239 | bra.uni BB0_15;
240 |
241 | BB0_14:
242 | mul.f32 %f119, %f11, %f11;
243 | mul.f32 %f120, %f11, %f119;
244 | cvt.f64.f32 %fd3, %f120;
245 | setp.lt.f64 %p12, %fd3, 0d3E7AD7F29ABCAF48;
246 | selp.f32 %f121, 0f33D6BF95, %f120, %p12;
247 | div.rn.f32 %f161, %f8, %f121;
248 | div.rn.f32 %f160, %f9, %f121;
249 | div.rn.f32 %f159, %f10, %f121;
250 |
251 | BB0_15:
252 | add.f32 %f164, %f164, %f161;
253 | add.f32 %f163, %f163, %f160;
254 | add.f32 %f162, %f162, %f159;
255 | add.s32 %r28, %r28, 1;
256 | setp.lt.s32 %p13, %r28, %r14;
257 | @%p13 bra BB0_4;
258 | bra.uni BB0_17;
259 |
260 | BB0_16:
261 | ld.global.u32 %r25, [%rd60];
262 | mul.lo.s32 %r26, %r25, 3;
263 | mul.wide.s32 %rd45, %r26, 4;
264 | add.s64 %rd46, %rd2, %rd45;
265 | ld.global.f32 %f122, [%rd46];
266 | sub.f32 %f123, %f122, %f1;
267 | ld.global.f32 %f124, [%rd46+4];
268 | sub.f32 %f125, %f124, %f2;
269 | ld.global.f32 %f126, [%rd46+8];
270 | sub.f32 %f127, %f126, %f3;
271 | mul.f32 %f128, %f125, %f125;
272 | fma.rn.f32 %f129, %f123, %f123, %f128;
273 | fma.rn.f32 %f130, %f127, %f127, %f129;
274 | sqrt.rn.f32 %f131, %f130;
275 | mul.f32 %f132, %f131, %f131;
276 | mul.f32 %f133, %f131, %f132;
277 | cvt.f64.f32 %fd4, %f133;
278 | setp.lt.f64 %p14, %fd4, 0d3E7AD7F29ABCAF48;
279 | selp.f32 %f134, 0f33D6BF95, %f133, %p14;
280 | div.rn.f32 %f135, %f123, %f134;
281 | div.rn.f32 %f136, %f125, %f134;
282 | div.rn.f32 %f137, %f127, %f134;
283 | add.f32 %f164, %f164, %f135;
284 | add.f32 %f163, %f163, %f136;
285 | add.f32 %f162, %f162, %f137;
286 | add.s64 %rd60, %rd60, 4;
287 | add.s32 %r31, %r31, 1;
288 | setp.lt.s32 %p15, %r31, %r14;
289 | @%p15 bra BB0_16;
290 |
291 | BB0_17:
292 | cvta.to.global.u64 %rd47, %rd26;
293 | cvta.to.global.u64 %rd48, %rd25;
294 | cvta.to.global.u64 %rd49, %rd22;
295 | cvta.to.global.u64 %rd50, %rd24;
296 | mul.lo.s32 %r27, %r1, 3;
297 | mul.wide.s32 %rd51, %r27, 4;
298 | add.s64 %rd52, %rd50, %rd51;
299 | st.global.f32 [%rd52], %f164;
300 | st.global.f32 [%rd52+4], %f163;
301 | st.global.f32 [%rd52+8], %f162;
302 | ld.global.f32 %f138, [%rd49];
303 | sub.f32 %f139, %f1, %f138;
304 | ld.global.f32 %f140, [%rd49+4];
305 | sub.f32 %f141, %f2, %f140;
306 | ld.global.f32 %f142, [%rd49+8];
307 | sub.f32 %f143, %f3, %f142;
308 | mul.f32 %f144, %f162, %f141;
309 | mul.f32 %f145, %f163, %f143;
310 | sub.f32 %f146, %f144, %f145;
311 | mul.f32 %f147, %f164, %f143;
312 | mul.f32 %f148, %f162, %f139;
313 | sub.f32 %f149, %f147, %f148;
314 | mul.f32 %f150, %f163, %f139;
315 | mul.f32 %f151, %f164, %f141;
316 | sub.f32 %f152, %f150, %f151;
317 | add.s64 %rd53, %rd48, %rd51;
318 | st.global.f32 [%rd53], %f146;
319 | st.global.f32 [%rd53+4], %f149;
320 | st.global.f32 [%rd53+8], %f152;
321 | mul.f32 %f153, %f141, %f141;
322 | fma.rn.f32 %f154, %f139, %f139, %f153;
323 | fma.rn.f32 %f155, %f143, %f143, %f154;
324 | sqrt.rn.f32 %f156, %f155;
325 | shl.b64 %rd54, %rd4, 2;
326 | add.s64 %rd55, %rd47, %rd54;
327 | st.global.f32 [%rd55], %f156;
328 |
329 | BB0_18:
330 | ret;
331 | }
332 |
333 |
334 |
--------------------------------------------------------------------------------
/GraphReg/IterateOnGPU5.m:
--------------------------------------------------------------------------------
1 | function [T] = IterateOnGPU5( p, q, feat,score,coolDown)
2 | %================================================================
3 | % Adopt the adaptive simulated annealing to optimize the registration
4 | % process with faster convergence process
5 | % Input: p q: 3xN 3xM the model and the template point clouds
6 | % feat: struct the features of point clouds p and q
7 | % score: struct the score of point clouds p and q
8 |
9 | % Output: T: 4x4 array the solved transformation matrix
10 | % The dynamical framework is based on
11 | % P. Jauer, I. Kuhlemann, R. Bruder, A. Schweikard, and F. Ernst,
12 | %“Efficient registration of high-resolution feature enhanced point clouds,”
13 | % IEEE Trans. Pattern Anal. Mach. Intell., vol. 41, no. 5, pp. 1102–1115,
14 | % May 2019.
15 | %==============================================================
16 |
17 | % get the current matching method, i.e., the Coulomb’s law
18 | matchingMethod = 2;
19 |
20 |
21 | % features
22 | Np=length(p);
23 | Nq=length(q);
24 |
25 | [rp,cp]=size(feat.p);
26 | [rq,cq]=size(feat.q);
27 |
28 |
29 | feat1=zeros(2,cp);
30 | feat2=zeros(2,cq);
31 |
32 | numFeat = min(rp, rq);
33 |
34 | % scale all features within a range of [0,1]
35 | for fcnt = 1:numFeat
36 | ft1 = feat.p(fcnt, :);
37 | ft2 = feat.q(fcnt, :);
38 |
39 | fmax = max(max(ft1), max(ft2));
40 | fmin = min(min(ft1), min(ft2));
41 | dist =(fmax - fmin);
42 | if(dist == 0)
43 | feat1(fcnt,:)=ft1 - fmin;
44 | feat2(fcnt,:)=ft2-fmin;
45 | else
46 | %make feat in [0,1]
47 | feat1(fcnt,:)=(ft1 - fmin)/dist;
48 | feat2(fcnt,:)=(ft2-fmin)/dist;
49 |
50 | end
51 | end
52 |
53 |
54 | % init the cuda device
55 | % number of random points per iteration step
56 | numRand = 100;
57 |
58 | % if the number of random points is larger then the number of points,
59 | % compare each single point
60 | numPoints1 = Np;
61 | numPoints2 = Nq;
62 |
63 | %% scale the clouds onto a maximum edge size of 100 units
64 | mP = max(abs(max(p,[],2) - min(p,[],2)));
65 | mQ = max(abs(max(q,[],2) - min(q,[],2)));
66 |
67 | % scaleFactor
68 | sclFct = 100/max(mP,mQ);
69 |
70 | p = p.*sclFct;
71 | q = q.*sclFct;
72 |
73 | %% init gpu kernel
74 | kernel = parallel.gpu.CUDAKernel('KernelMatching.ptx', 'KernelMatching.cu', 'MatchingStep');
75 |
76 | numCmp = round(max(1, numPoints1 * numRand/100)); % procent
77 | numThrd = round(max(1, numPoints2 * numRand/100)); % procent
78 |
79 | numThrd = min(kernel.MaxThreadsPerBlock, numThrd);
80 | numCmp = min(numThrd, numCmp);
81 |
82 | kernel.ThreadBlockSize = [kernel.MaxThreadsPerBlock,1,1];
83 | kernel.GridSize = [ceil(numThrd/kernel.MaxThreadsPerBlock),1];
84 |
85 | % forces computed on GPU
86 | gF = gpuArray( single( zeros(1, numThrd*3) ));
87 | % torques computed on GPU
88 | gT = gpuArray( single( zeros(1, numThrd*3) ));
89 | % the moment of inertia
90 | gI = gpuArray( single( zeros(1, numThrd) ));
91 |
92 | %% init point clouds
93 | p1 = gpuArray( single( reshape(p, 1, []) ));
94 | p2 = gpuArray( single( reshape(q, 1, []) ));
95 |
96 |
97 | centroid2 = mean(q, 2);
98 | centroid1 = mean(p, 2);
99 |
100 |
101 | kdtree=KDTreeSearcher(feat2');
102 | [idx,~]=knnsearch(kdtree,feat1');
103 |
104 |
105 |
106 | % transfer features to GPU memory
107 | gf1 = gpuArray( single( reshape(feat1, 1, []) ) );
108 | gf2 = gpuArray( single( reshape(feat2, 1, []) ) );
109 |
110 |
111 | %% Iteration parameter
112 |
113 | % transformation matrix, init with centroid to centroid trafo
114 | M = TransMat((centroid1-centroid2));
115 | M = single(M);
116 |
117 |
118 | % temperature parameter for the adaptive simulated annealing
119 | temperature = 0.5;
120 |
121 | % vector to collect the kinetic energy per step (evaluation ?) ;
122 | oA = 0;%over Angle
123 | oT = 0;%over Translation
124 |
125 |
126 | if(1)
127 | plotQ = [q; ones(1,length(q))];
128 | end
129 |
130 |
131 | AcceptRate = 0.5;
132 | EvalsMax=200;%100 in default or larger iterations for better results
133 |
134 | %% start iteation
135 | Ekin=1;
136 | for iter=1:1:EvalsMax
137 | iter
138 | %% get the forces
139 | % randomly choosen indicies of the reference cloud
140 | tmpIdx1=unique(randi(numPoints1, numCmp, 1));
141 | i1=gpuArray(int32(tmpIdx1));
142 |
143 | tmpIdx2=idx(tmpIdx1,:);
144 |
145 | i2=gpuArray(int32(tmpIdx2));
146 |
147 | GM = gpuArray( reshape(M', 1, []) );
148 |
149 | cent = M * [centroid2; 1];
150 | c2 = gpuArray( cent(1:3) );
151 |
152 | [gF, gT, gI] = feval(kernel,...
153 | p1, p2,...
154 | i1, i2,...
155 | gf1, gf2,...
156 | numFeat, c2, GM,...
157 | gF, gT, gI,...
158 | matchingMethod,...
159 | length(i2), length(i1));
160 |
161 |
162 | %% resulting force, copy back to RAM (gather) for speed up
163 |
164 | f = reshape(gather(gF), 3, []);
165 | F = sum(f, 2);
166 |
167 | % resulting torques
168 | t = reshape(gather(gT), 3, []);
169 | Torque = sum(t, 2);
170 |
171 | % resulting moment of inertia
172 | I = sum(gather(gI));
173 |
174 | %% get the transformations
175 | % angular accelration
176 | alpha = Torque ./ I;
177 |
178 | angle = norm(alpha)/2;
179 |
180 | if(angle == 0)
181 | rotAxis = [0 0 0]';
182 | else
183 | rotAxis = alpha / norm(angle);
184 | end
185 |
186 | % linear acceleration
187 | a = F ./ numThrd;
188 |
189 | magnitude = norm(a)/2;
190 | magnitude = max(1,magnitude);
191 | transDir = a / norm(a);
192 |
193 |
194 | %% simulated annealing
195 |
196 | % current kinetic energy
197 |
198 | Etrans = (numThrd/2) * magnitude^2;%1/2*M*v^2
199 | Erot = (I/2) * angle^2;%1/2*I*w^2
200 |
201 |
202 |
203 | [temperature,AcceptRate, transition, Ekin] = AdaptSimulatedAnnealing(iter,EvalsMax,AcceptRate,temperature,...
204 | Ekin,...
205 | Etrans,...
206 | Erot,coolDown);
207 |
208 |
209 | if (transition)
210 | angle = oA;
211 | magnitude = oT;
212 |
213 | Ekin(end) = Ekin(end-1);
214 | else
215 | oA = angle;
216 | oT = magnitude;
217 |
218 | end
219 |
220 |
221 | %% compute the new tranformation matrix
222 | ss = transDir * magnitude * temperature;
223 | rr = deg2rad( rad2deg( angle ) * temperature);
224 |
225 |
226 |
227 | %% compute the transformation matrix
228 | T = TransMat(ss);
229 | CM = TransMat(-centroid2);
230 | CP = TransMat( centroid2);
231 | R = eye(4);
232 |
233 | if(rr ~= 0)
234 | R = AxisAngle(rotAxis, rr);
235 | end
236 |
237 |
238 | tmpMat = M * CP * R * T * CM;% transformation composite
239 | if(~isnan(tmpMat))
240 | M = tmpMat;
241 | end
242 |
243 | %%{
244 | %% visualisation
245 | %if(args.Visualize)
246 | if(1)
247 |
248 | figure(999);
249 |
250 | clf
251 | hold on;
252 | axis equal;
253 | view(-21,12);
254 |
255 | tmpP2 = M * plotQ(:, 1:length(plotQ)) ;
256 |
257 | scale = 4;
258 |
259 | if( 1 )
260 | plot3(p(1,1:scale:end), p(2,1:scale:end), p(3,1:scale:end),'bo');
261 | plot3(tmpP2(1,1:scale:end), tmpP2(2,1:scale:end), tmpP2(3,1:scale:end),'r+');
262 | else
263 | scatter3(p(1,1:scale:end), p(2,1:scale:end), p(3,1:scale:end), 30, feat.p(1,1:scale:end), 'o');
264 | scatter3(tmpP2(1,1:scale:end), tmpP2(2,1:scale:end), tmpP2(3,1:scale:end), 30, feat.q(1,1:scale:end), '*');
265 | end
266 | axis off;
267 | set(gcf,'color','w');
268 | pause(0.01);
269 | end
270 | end
271 |
272 | %% back scaling to orign size
273 | S = [1/sclFct 0 0 0; ...
274 | 0 1/sclFct 0 0; ...
275 | 0 0 1/sclFct 0; ...
276 | 0 0 0 1];
277 |
278 | tS = S * M(:,4);
279 |
280 | outM = [M(:,1:3), tS];
281 |
282 | T = outM;
283 |
284 | %% clear graphics memory
285 | clear q1 q2 i1 i2 M GM gF gT gL;
286 | end
--------------------------------------------------------------------------------
/GraphReg/cuda/CudaMath.cuh:
--------------------------------------------------------------------------------
1 | ////////////////////////////////////////////////////////////////////////////////////////////////////
2 | // file: CudaMath.cuh
3 | //
4 | // summary: cuda mathematic operations
5 | ////////////////////////////////////////////////////////////////////////////////////////////////////
6 |
7 | //#ifndef __CUDA_MATH_CUH__
8 | //#define __CUDA_MATH_CUH__
9 | #pragma once
10 |
11 | #include
12 | #include
13 | #include
14 | #include
15 |
16 | /// Defines an epsilon for computational accuracy.
17 | #define CM_EPS 1e-6
18 |
19 | /// Defines PI.
20 | #define CM_PI 3.1415926535897932384626433832795028841971693993751f
21 |
22 | /// Size of the float 4x4 matrix.
23 | #define SIZE_OF_FLOAT_MAT4 sizeof(float) * 16
24 |
25 |
26 | ////////////////////////////////////////////////////////////////////////////////////////////////////
27 | /// Creates a 4x4 identity.
28 | ///
29 | /// I. Kuhlemann, P. Jauer, 2014-10-27.
30 | ///
31 | /// [in,out] If non-null, the float* to process.
32 | ///
33 | /// true if it succeeds, false if it fails.
34 | ////////////////////////////////////////////////////////////////////////////////////////////////////
35 | template
36 | __device__ __forceinline__ bool mat4CreateIdentity(T* M) {
37 |
38 |
39 | /*if (sizeof(M) != SIZE_OF_FLOAT_MAT4)
40 | return false;*/
41 |
42 | M[0] = 1.0; M[1] = 0.0; M[2] = 0.0; M[3] = 0.0;
43 | M[4] = 0.0; M[5] = 1.0; M[6] = 0.0; M[7] = 0.0;
44 | M[8] = 0.0; M[9] = 0.0; M[10] = 1.0; M[11] = 0.0;
45 | M[12] = 0.0; M[13] = 0.0; M[14] = 0.0; M[15] = 1.0;
46 |
47 | return true;
48 | }
49 |
50 | ////////////////////////////////////////////////////////////////////////////////////////////////////
51 | /// Makes a exact copy of a 4x4 matrix.
52 | ///
53 | /// Philipp Jauer, 2014-12-15.
54 | ///
55 | /// [in,out] The copy destination.
56 | /// [in,out] The copy source.
57 | ///
58 | /// true if it succeeds, false if it fails.
59 | ////////////////////////////////////////////////////////////////////////////////////////////////////
60 | template
61 | __device__ __forceinline__ bool mat4Copy(T* Dst, T* Src) {
62 |
63 | for (int i = 0; i < 16; i++)
64 | Dst[i] = Src[i];
65 |
66 | return true;
67 | }
68 |
69 | ////////////////////////////////////////////////////////////////////////////////////////////////////
70 | /// Creates a 4x4 matrix rotation about the x-axis.
71 | ///
72 | /// I. Kuhlemann, P. Jauer, 2014-10-27.
73 | ///
74 | /// [in,out] If non-null, the float* to process.
75 | /// The angle in radians.
76 | ///
77 | /// true if it succeeds, false if it fails.
78 | ////////////////////////////////////////////////////////////////////////////////////////////////////
79 | __device__ __forceinline__ bool mat4CreateRotX(float* M, float angle) {
80 |
81 | /*if (sizeof(M) != SIZE_OF_FLOAT_MAT4)
82 | return false;*/
83 |
84 | float ct = cos(angle);
85 | float st = sin(angle);
86 |
87 | M[0] = 1.0; M[1] = 0.0; M[2] = 0.0; M[3] = 0.0;
88 | M[4] = 0.0; M[5] = ct; M[6] = -st; M[7] = 0.0;
89 | M[8] = 0.0; M[9] = st; M[10] = ct; M[11] = 0.0;
90 | M[12] = 0.0; M[13] = 0.0; M[14] = 0.0; M[15] = 1.0;
91 |
92 | return true;
93 | }
94 |
95 | ////////////////////////////////////////////////////////////////////////////////////////////////////
96 | /// Creates a 4x4 matrix rotation about the y-axis.
97 | ///
98 | /// I. Kuhlemann, P. Jauer, 2014-10-27.
99 | ///
100 | /// [in,out] If non-null, the float* to process.
101 | /// The angle.
102 | ///
103 | /// true if it succeeds, false if it fails.
104 | ////////////////////////////////////////////////////////////////////////////////////////////////////
105 | __device__ __forceinline__ bool mat4CreateRotY(float* M, float angle) {
106 |
107 | /*if (sizeof(M) != SIZE_OF_FLOAT_MAT4)
108 | return false;*/
109 |
110 | float ct = cos(angle);
111 | float st = sin(angle);
112 |
113 | M[0] = ct; M[1] = 0.0; M[2] = st; M[3] = 0.0;
114 | M[4] = 0.0; M[5] = 1.0; M[6] = 0.0; M[7] = 0.0;
115 | M[8] = -st; M[9] = 0.0; M[10] = ct; M[11] = 0.0;
116 | M[12] = 0.0; M[13] = 0.0; M[14] = 0.0; M[15] = 1.0;
117 |
118 | return true;
119 | }
120 |
121 | ////////////////////////////////////////////////////////////////////////////////////////////////////
122 | /// Creates a 4x4 matrix rotation about the z-axis.
123 | ///
124 | /// I. Kuhlemann, P. Jauer, 2014-10-27.
125 | ///
126 | /// [in,out] If non-null, the float* to process.
127 | /// The angle in radians.
128 | ///
129 | /// true if it succeeds, false if it fails.
130 | ////////////////////////////////////////////////////////////////////////////////////////////////////
131 | __device__ __forceinline__ bool mat4CreateRotZ(float* M, float angle){
132 |
133 | /*if (sizeof(M) != SIZE_OF_FLOAT_MAT4)
134 | return false;*/
135 |
136 | float ct = cos(angle);
137 | float st = sin(angle);
138 |
139 | M[0] = ct; M[1] = -st; M[2] = 0.0; M[3] = 0.0;
140 | M[4] = st; M[5] = ct; M[6] = 0.0; M[7] = 0.0;
141 | M[8] = 0.0; M[9] = 0.0; M[10] = 1.0; M[11] = 0.0;
142 | M[12] = 0.0; M[13] = 0.0; M[14] = 0.0; M[15] = 1.0;
143 |
144 | return true;
145 | }
146 |
147 | ////////////////////////////////////////////////////////////////////////////////////////////////////
148 | /// Multiplies two 4x4 matrices A and B, A*B = C.
149 | ///
150 | /// Philipp Jauer, 2014-10-23.
151 | ///
152 | /// [out] Resulting matrix C.
153 | /// [in] Left Matrix A.
154 | /// [in] Right Matrix B.
155 | ///
156 | /// true if it succeeds, false if it fails.
157 | ////////////////////////////////////////////////////////////////////////////////////////////////////
158 | __device__ __forceinline__ bool mat4Mult(float* C, float* A, float* B) {
159 |
160 | /*if (sizeof(C) != SIZE_OF_FLOAT_MAT4 || sizeof(A) != SIZE_OF_FLOAT_MAT4 || sizeof(B) != SIZE_OF_FLOAT_MAT4)
161 | return false;*/
162 |
163 | for (int i = 0; i < 4; i++) {
164 | for (int j = 0; j < 4; j++) {
165 | float sum = 0.0;
166 | for (int k = 0; k < 4; k++)
167 | {
168 | sum += A[i * 4 + k] * B[k * 4 + j];
169 | }
170 | C[i * 4 + j] = sum;
171 | }
172 | }
173 |
174 | return true;
175 | }
176 |
177 | ////////////////////////////////////////////////////////////////////////////////////////////////////
178 | /// 4x4 matrix inverse.
179 | ///
180 | /// I. Kuhlemann, P. Jauer, 2014-10-24.
181 | ///
182 | /// [out] Result of the inversion.
183 | /// [in] Matrix to invert.
184 | ///
185 | /// true if it succeeds, false if it fails.
186 | ////////////////////////////////////////////////////////////////////////////////////////////////////
187 | __device__ __forceinline__ bool mat4Inv( float* A)
188 | {
189 | //if (sizeof(M) != SIZE_OF_FLOAT_MAT4 || sizeof(A) != SIZE_OF_FLOAT_MAT4)
190 | // return false;
191 |
192 |
193 | double inv[16], det;
194 | int i;
195 |
196 | inv[0] = A[5] * A[10] * A[15] -
197 | A[5] * A[11] * A[14] -
198 | A[9] * A[6] * A[15] +
199 | A[9] * A[7] * A[14] +
200 | A[13] * A[6] * A[11] -
201 | A[13] * A[7] * A[10];
202 |
203 | inv[4] = -A[4] * A[10] * A[15] +
204 | A[4] * A[11] * A[14] +
205 | A[8] * A[6] * A[15] -
206 | A[8] * A[7] * A[14] -
207 | A[12] * A[6] * A[11] +
208 | A[12] * A[7] * A[10];
209 |
210 | inv[8] = A[4] * A[9] * A[15] -
211 | A[4] * A[11] * A[13] -
212 | A[8] * A[5] * A[15] +
213 | A[8] * A[7] * A[13] +
214 | A[12] * A[5] * A[11] -
215 | A[12] * A[7] * A[9];
216 |
217 | inv[12] = -A[4] * A[9] * A[14] +
218 | A[4] * A[10] * A[13] +
219 | A[8] * A[5] * A[14] -
220 | A[8] * A[6] * A[13] -
221 | A[12] * A[5] * A[10] +
222 | A[12] * A[6] * A[9];
223 |
224 | inv[1] = -A[1] * A[10] * A[15] +
225 | A[1] * A[11] * A[14] +
226 | A[9] * A[2] * A[15] -
227 | A[9] * A[3] * A[14] -
228 | A[13] * A[2] * A[11] +
229 | A[13] * A[3] * A[10];
230 |
231 | inv[5] = A[0] * A[10] * A[15] -
232 | A[0] * A[11] * A[14] -
233 | A[8] * A[2] * A[15] +
234 | A[8] * A[3] * A[14] +
235 | A[12] * A[2] * A[11] -
236 | A[12] * A[3] * A[10];
237 |
238 | inv[9] = -A[0] * A[9] * A[15] +
239 | A[0] * A[11] * A[13] +
240 | A[8] * A[1] * A[15] -
241 | A[8] * A[3] * A[13] -
242 | A[12] * A[1] * A[11] +
243 | A[12] * A[3] * A[9];
244 |
245 | inv[13] = A[0] * A[9] * A[14] -
246 | A[0] * A[10] * A[13] -
247 | A[8] * A[1] * A[14] +
248 | A[8] * A[2] * A[13] +
249 | A[12] * A[1] * A[10] -
250 | A[12] * A[2] * A[9];
251 |
252 | inv[2] = A[1] * A[6] * A[15] -
253 | A[1] * A[7] * A[14] -
254 | A[5] * A[2] * A[15] +
255 | A[5] * A[3] * A[14] +
256 | A[13] * A[2] * A[7] -
257 | A[13] * A[3] * A[6];
258 |
259 | inv[6] = -A[0] * A[6] * A[15] +
260 | A[0] * A[7] * A[14] +
261 | A[4] * A[2] * A[15] -
262 | A[4] * A[3] * A[14] -
263 | A[12] * A[2] * A[7] +
264 | A[12] * A[3] * A[6];
265 |
266 | inv[10] = A[0] * A[5] * A[15] -
267 | A[0] * A[7] * A[13] -
268 | A[4] * A[1] * A[15] +
269 | A[4] * A[3] * A[13] +
270 | A[12] * A[1] * A[7] -
271 | A[12] * A[3] * A[5];
272 |
273 | inv[14] = -A[0] * A[5] * A[14] +
274 | A[0] * A[6] * A[13] +
275 | A[4] * A[1] * A[14] -
276 | A[4] * A[2] * A[13] -
277 | A[12] * A[1] * A[6] +
278 | A[12] * A[2] * A[5];
279 |
280 | inv[3] = -A[1] * A[6] * A[11] +
281 | A[1] * A[7] * A[10] +
282 | A[5] * A[2] * A[11] -
283 | A[5] * A[3] * A[10] -
284 | A[9] * A[2] * A[7] +
285 | A[9] * A[3] * A[6];
286 |
287 | inv[7] = A[0] * A[6] * A[11] -
288 | A[0] * A[7] * A[10] -
289 | A[4] * A[2] * A[11] +
290 | A[4] * A[3] * A[10] +
291 | A[8] * A[2] * A[7] -
292 | A[8] * A[3] * A[6];
293 |
294 | inv[11] = -A[0] * A[5] * A[11] +
295 | A[0] * A[7] * A[9] +
296 | A[4] * A[1] * A[11] -
297 | A[4] * A[3] * A[9] -
298 | A[8] * A[1] * A[7] +
299 | A[8] * A[3] * A[5];
300 |
301 | inv[15] = A[0] * A[5] * A[10] -
302 | A[0] * A[6] * A[9] -
303 | A[4] * A[1] * A[10] +
304 | A[4] * A[2] * A[9] +
305 | A[8] * A[1] * A[6] -
306 | A[8] * A[2] * A[5];
307 |
308 | det = A[0] * inv[0] + A[1] * inv[4] + A[2] * inv[8] + A[3] * inv[12];
309 |
310 | // if nearly zero
311 | if (det < CM_EPS)
312 | return false;
313 |
314 | det = 1.0 / det;
315 |
316 | for (i = 0; i < 16; i++)
317 | A[i] = inv[i] * det;
318 |
319 | return true;
320 | }
321 |
322 | ////////////////////////////////////////////////////////////////////////////////////////////////////
323 | /// Normalizes the given vector of size 2.
324 | ///
325 | /// I. Kuhlemann, P. Jauer, 2014-10-27.
326 | ///
327 | /// [in,out] If non-null, the vector.
328 | ///
329 | /// true if it succeeds, false if it fails.
330 | ////////////////////////////////////////////////////////////////////////////////////////////////////
331 | __device__ __forceinline__ bool normalizeVec2(float* vector) {
332 |
333 | //int size = sizeof(vector) / sizeof(float);
334 |
335 | float n = sqrtf(vector[0] * vector[0]
336 | + vector[1] * vector[1]);
337 | vector[0] /= n;
338 | vector[1] /= n;
339 |
340 | return true;
341 | }
342 |
343 | ////////////////////////////////////////////////////////////////////////////////////////////////////
344 | /// Normalize vector of size 3.
345 | ///
346 | /// I. Kuhlemann, P. Jauer, 2014-10-27.
347 | ///
348 | /// [in,out] If non-null, the vector.
349 | ///
350 | /// true if it succeeds, false if it fails.
351 | ////////////////////////////////////////////////////////////////////////////////////////////////////
352 | __device__ __forceinline__ bool normalizeVec3(float* vector) {
353 |
354 | float n = sqrtf(vector[0] * vector[0]
355 | + vector[1] * vector[1]
356 | + vector[2] * vector[2]);
357 | vector[0] /= n;
358 | vector[1] /= n;
359 | vector[2] /= n;
360 |
361 | return true;
362 | }
363 |
364 | ////////////////////////////////////////////////////////////////////////////////////////////////////
365 | /// Normalize vector of size 4.
366 | ///
367 | /// I. Kuhlemann, P. Jauer, 2014-10-27.
368 | ///
369 | /// [in,out] If non-null, the vector.
370 | ///
371 | /// true if it succeeds, false if it fails.
372 | ////////////////////////////////////////////////////////////////////////////////////////////////////
373 | __device__ __forceinline__ bool normalizeVec4(float* vector) {
374 | float n = sqrtf(vector[0] * vector[0]
375 | + vector[1] * vector[1]
376 | + vector[2] * vector[2]
377 | + vector[3] * vector[3]);
378 |
379 | vector[0] /= n;
380 | vector[1] /= n;
381 | vector[2] /= n;
382 | vector[3] /= n;
383 |
384 | return true;
385 | }
386 |
387 | ////////////////////////////////////////////////////////////////////////////////////////////////////
388 | /// Cross product of a vector of size 3.
389 | ///
390 | /// I. Kuhlemann, P. Jauer, 2014-10-27.
391 | ///
392 | /// [in,out] If non-null, the float* to process.
393 | /// [in,out] If non-null, the float* to process.
394 | /// [in,out] If non-null, the float* to process.
395 | ///
396 | /// true if it succeeds, false if it fails.
397 | ////////////////////////////////////////////////////////////////////////////////////////////////////
398 | template
399 | __device__ __forceinline__ bool crossVec3(T* c, T* a, T* b) {
400 |
401 | c[0] = a[1] * b[2] - a[2] * b[1];
402 | c[1] = a[2] * b[0] - a[0] * b[2];
403 | c[2] = a[0] * b[1] - a[1] * b[0];
404 |
405 | return true;
406 | }
407 |
408 | // TODO:: Hier unbedingt nochmal nachdenken und optimieren
409 | __device__ __forceinline__ bool mat4ExtractRotation(float* M, float* A, float* B) {
410 |
411 | float x[3];
412 | x[0] = A[3] - B[3];
413 | x[1] = A[7] - B[7];
414 | x[2] = A[11] - B[11];
415 |
416 | normalizeVec3(x);
417 |
418 | int cnt = 0;
419 | for (int i = 0; i < 3; i++)
420 | {
421 | if (fabsf(x[i]) > CM_EPS)
422 | cnt++;
423 | }
424 |
425 | switch (cnt)
426 | {
427 | case 1:
428 | if (fabsf(x[0]) > CM_EPS){
429 |
430 | float beta = (x[0] < 0) ? -CM_PI/2.0 : CM_PI/2.0;
431 |
432 | float ct1 = cos(beta);
433 | float st1 = sin(beta);
434 | float ct2 = cos(CM_PI);
435 | float st2 = sin(CM_PI);
436 |
437 | float T1[16] = {ct2, -st2, 0.0, 0.0,
438 | st2, ct2, 0.0, 0.0,
439 | 0.0, 0.0, 1.0, 0.0,
440 | 0.0, 0.0, 0.0, 1.0 };
441 |
442 | float T2[16] = { ct1, 0.0, st1, 0.0,
443 | 0.0, 1.0, 0.0, 0.0,
444 | -st1, 0.0, ct1, 0.0,
445 | 0.0, 0.0, 0.0, 1.0 };
446 |
447 | mat4Mult(M, T1, T2);
448 |
449 | } else if (fabsf(x[0]) > CM_EPS) {
450 |
451 | float gamma = (x[1] < 0) ? CM_PI / 2.0 : -CM_PI / 2.0;
452 |
453 | float ct1 = cos(CM_PI/2);
454 | float st1 = sin(CM_PI/2);
455 | float ct2 = cos(gamma);
456 | float st2 = sin(gamma);
457 |
458 | float T1[16] = { ct2, -st2, 0.0, 0.0,
459 | st2, ct2, 0.0, 0.0,
460 | 0.0, 0.0, 1.0, 0.0,
461 | 0.0, 0.0, 0.0, 1.0 };
462 |
463 | float T2[16] = { ct1, 0.0, st1, 0.0,
464 | 0.0, 1.0, 0.0, 0.0,
465 | -st1, 0.0, ct1, 0.0,
466 | 0.0, 0.0, 0.0, 1.0 };
467 |
468 | mat4Mult(M, T1, T2);
469 |
470 | } else {
471 |
472 | float ct2 = cos(CM_PI);
473 | float st2 = sin(CM_PI);
474 |
475 | M[0] = ct2; M[1] = -st2; M[2] = 0.0; M[3] = 0.0;
476 | M[4] = st2; M[5] = ct2; M[6] = 0.0; M[7] = 0.0;
477 | M[8] = 0.0; M[9] = 0.0; M[10] = 1.0; M[11] = 0.0;
478 | M[12] = 0.0; M[13] = 0.0; M[14] = 0.0; M[15] = 1.0;
479 | }
480 | break;
481 | case 2:
482 | if (fabsf(x[0]) <= CM_EPS){
483 | // vector u
484 | float u[3];
485 | u[0] = x[2];
486 | u[2] = 0.0;
487 | u[1] = (-(x[0] * u[0]) - (x[2] * u[2])) / x[1];
488 |
489 | normalizeVec3(u);
490 |
491 | // vector v
492 | float v[3];
493 | crossVec3(v, x, u);
494 |
495 | float ct1 = cos(-CM_PI / 2);
496 | float st1 = sin(-CM_PI / 2);
497 | float ct2 = cos(CM_PI);
498 | float st2 = sin(CM_PI);
499 |
500 | //TODO:: hier 3x3 wegen schnelligkeit
501 | float T1[16] = { x[0], u[0], v[0], 0.0,
502 | x[1], u[1], v[1], 0.0,
503 | x[2], u[2], v[2], 0.0,
504 | 0.0, 0.0, 0.0, 1.0 };
505 |
506 | float T2[16] = { ct1, 0.0, st1, 0.0,
507 | 0.0, 1.0, 0.0, 0.0,
508 | -st1, 0.0, ct1, 0.0,
509 | 0.0, 0.0, 0.0, 1.0 };
510 |
511 | float T3[16];
512 | mat4Mult(T3, T1, T2);
513 |
514 | T1[0] = ct2; T1[1] = -st2; T1[2] = 0.0; T1[3] = 0.0;
515 | T1[4] = st2; T1[5] = ct2; T1[6] = 0.0; T1[7] = 0.0;
516 | T1[8] = 0.0; T1[9] = 0.0; T1[10] = 1.0; T1[11] = 0.0;
517 | T1[12] = 0.0; T1[13] = 0.0; T1[14] = 0.0; T1[15] = 1.0;
518 |
519 | mat4Mult(M, T3, T1);
520 | }
521 | else if (fabsf(x[1]) <= CM_EPS) {
522 | // vector u
523 | float u[3];
524 | u[1] = fabsf(x[2]);
525 | u[2] = 0.0;
526 | u[0] = (-(x[1] * u[1]) - (x[2] * u[2])) / x[0];
527 |
528 | normalizeVec3(u);
529 |
530 | // vector v
531 | float v[3];
532 | crossVec3(v, x, u);
533 |
534 | float ct1 = cos(-CM_PI / 2);
535 | float st1 = sin(-CM_PI / 2);
536 | float ct2 = cos(CM_PI);
537 | float st2 = sin(CM_PI);
538 |
539 | //TODO:: hier 3x3 wegen schnelligkeit
540 | float T1[16] = { x[0], u[0], v[0], 0.0,
541 | x[1], u[1], v[1], 0.0,
542 | x[2], u[2], v[2], 0.0,
543 | 0.0, 0.0, 0.0, 1.0 };
544 |
545 | float T2[16] = { ct1, 0.0, st1, 0.0,
546 | 0.0, 1.0, 0.0, 0.0,
547 | -st1, 0.0, ct1, 0.0,
548 | 0.0, 0.0, 0.0, 1.0 };
549 |
550 | float T3[16];
551 | mat4Mult(T3, T1, T2);
552 |
553 | T1[0] = ct2; T1[1] = -st2; T1[2] = 0.0; T1[3] = 0.0;
554 | T1[4] = st2; T1[5] = ct2; T1[6] = 0.0; T1[7] = 0.0;
555 | T1[8] = 0.0; T1[9] = 0.0; T1[10] = 1.0; T1[11] = 0.0;
556 | T1[12] = 0.0; T1[13] = 0.0; T1[14] = 0.0; T1[15] = 1.0;
557 |
558 | mat4Mult(M, T3, T1);
559 | }
560 | else {
561 | // vector u
562 | float u[3];
563 | u[0] = 0.0;
564 | u[2] = x[1];
565 | //u[1] = (-(x[2] * u[2]) - (x[1] * u[2])) / x[1];
566 | u[1] = 0.0;
567 |
568 | normalizeVec3(u);
569 |
570 | // vector v
571 | float v[3];
572 | crossVec3(v, x, u);
573 |
574 | float ct1 = cos(-CM_PI / 2);
575 | float st1 = sin(-CM_PI / 2);
576 |
577 | //TODO:: hier 3x3 wegen schnelligkeit
578 | float T1[16] = { x[0], u[0], v[0], 0.0,
579 | x[1], u[1], v[1], 0.0,
580 | x[2], u[2], v[2], 0.0,
581 | 0.0, 0.0, 0.0, 1.0 };
582 |
583 | float T2[16] = { ct1, 0.0, st1, 0.0,
584 | 0.0, 1.0, 0.0, 0.0,
585 | -st1, 0.0, ct1, 0.0,
586 | 0.0, 0.0, 0.0, 1.0 };
587 |
588 | float T3[16];
589 | mat4Mult(T3, T1, T2);
590 |
591 | T1[0] = ct1; T1[1] = -st1; T1[2] = 0.0; T1[3] = 0.0;
592 | T1[4] = st1; T1[5] = ct1; T1[6] = 0.0; T1[7] = 0.0;
593 | T1[8] = 0.0; T1[9] = 0.0; T1[10] = 1.0; T1[11] = 0.0;
594 | T1[12] = 0.0; T1[13] = 0.0; T1[14] = 0.0; T1[15] = 1.0;
595 |
596 | mat4Mult(M, T3, T1);
597 | }
598 | break;
599 | case 3:
600 | {
601 | // vector u
602 | float u[3];
603 | u[0] = x[2];
604 | u[2] = 0.0;
605 | u[1] = (-(x[0] * u[0]) - (x[2] * u[2])) / x[1];
606 |
607 | normalizeVec3(u);
608 |
609 | // vector v
610 | float v[3];
611 | crossVec3(v, x, u);
612 |
613 | float ct1 = cos(-CM_PI / 2);
614 | float st1 = sin(-CM_PI / 2);
615 | float ct2 = cos(CM_PI);
616 | float st2 = sin(CM_PI);
617 |
618 | //TODO:: hier 3x3 wegen schnelligkeit
619 | float T1[16] = {x[0], u[0], v[0], 0.0,
620 | x[1], u[1], v[1], 0.0,
621 | x[2], u[2], v[2], 0.0,
622 | 0.0, 0.0, 0.0, 1.0};
623 |
624 | float T2[16] = {ct1, 0.0, st1, 0.0,
625 | 0.0, 1.0, 0.0, 0.0,
626 | -st1, 0.0, ct1, 0.0,
627 | 0.0, 0.0, 0.0, 1.0 };
628 |
629 | float T3[16];
630 | mat4Mult(T3, T1, T2);
631 |
632 | T1[0] = ct2; T1[1] = -st2; T1[2] = 0.0; T1[3] = 0.0;
633 | T1[4] = st2; T1[5] = ct2; T1[6] = 0.0; T1[7] = 0.0;
634 | T1[8] = 0.0; T1[9] = 0.0; T1[10] = 1.0; T1[11] = 0.0;
635 | T1[12] = 0.0; T1[13] = 0.0; T1[14] = 0.0; T1[15] = 1.0;
636 |
637 | mat4Mult(M, T3, T1);
638 |
639 | break;
640 | }
641 | default:
642 | // fuck off state
643 | break;
644 | }
645 |
646 | return true;
647 | }
648 |
649 | //#endif // __CUDA_MATH_CUH
--------------------------------------------------------------------------------