├── GraphReg ├── input.png ├── result.png ├── input_clean.png ├── result_clean.png ├── TransMat.m ├── AxisAngle.m ├── readme.txt ├── computeVariationAndFeaturePoint.m ├── cuda │ ├── MatchingMethods.cuh │ ├── KernelMatching.cu │ ├── KernelMatching.ptx │ └── CudaMath.cuh ├── SalientFeature.m ├── GraphReg.m ├── AdaptSimulatedAnnealing.m ├── AdaptiveSimulatedAnnealingOptimization.m ├── findPointNormals.m └── IterateOnGPU5.m └── README.md /GraphReg/input.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zikai1/GraphReg/HEAD/GraphReg/input.png -------------------------------------------------------------------------------- /GraphReg/result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zikai1/GraphReg/HEAD/GraphReg/result.png -------------------------------------------------------------------------------- /GraphReg/input_clean.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zikai1/GraphReg/HEAD/GraphReg/input_clean.png -------------------------------------------------------------------------------- /GraphReg/result_clean.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zikai1/GraphReg/HEAD/GraphReg/result_clean.png -------------------------------------------------------------------------------- /GraphReg/TransMat.m: -------------------------------------------------------------------------------- 1 | function [ M ] = TransMat( vector ) 2 | %TRANSMAT Generates a homogenuous 4x4-matrix representing a translation 3 | % about the vector (x,y,z) 4 | % 5 | %input: 6 | % vector the translation vector (x,y,z) 7 | % 8 | %return: 9 | % M the matrix 10 | 11 | M = [1 0 0 vector(1); ... 12 | 0 1 0 vector(2); ... 13 | 0 0 1 vector(3); ... 14 | 0 0 0 1]; 15 | end 16 | 17 | -------------------------------------------------------------------------------- /GraphReg/AxisAngle.m: -------------------------------------------------------------------------------- 1 | function [ T ] = AxisAngle( axis, angle ) 2 | %AXISANGLE Convertes axis angle representation to a 4x4 homogenuous 3 | %rotation matrix 4 | % 5 | 6 | c = cos(angle); 7 | s = sin(angle); 8 | t = 1.0 - c; 9 | 10 | axis = axis / norm(axis); 11 | x = axis(1); 12 | y = axis(2); 13 | z = axis(3); 14 | 15 | T = [(x*x*t + c), (x*y*t - z*s), (x*z*t + y*s), 0;... 16 | (y*x*t + z*s), (y*y*t + c), (y*z*t - x*s), 0;... 17 | (z*x*t - y*s), (z*y*t + x*s), (z*z*t + c), 0;... 18 | 0, 0, 0, 1]; 19 | end 20 | 21 | -------------------------------------------------------------------------------- /GraphReg/readme.txt: -------------------------------------------------------------------------------- 1 | This work aims to improve the robustness and convergence process with respect to dynamical point cloud registration methods. 2 | 3 | The designed method is a local registration framework and is an effective alternative to ICP. 4 | 5 | We use the graph signal processing theory to describe the local geometry features, i.e., the point response intensity and the local geometry variations, through which we can remove outliers and attain invariants to rigid transformations. 6 | 7 | Run "GraphReg.m" to see demo examples. 8 | 9 | There are several parameters that can be adjusted for better registration results or faster convergence process, such as "cool_down". 10 | 11 | 12 | If you find any question, please do not hesitate to contact me 13 | myzhao@baai.ac.cn 14 | 15 | 16 | If you find our work useful, please cite 17 | M. Zhao, L. Ma, X. Jia, D. -M. Yan and T. Huang, "GraphReg: Dynamical Point Cloud Registration With Geometry-Aware Graph Signal Processing," in IEEE Transactions on Image Processing, vol. 31, pp. 7449-7464, 2022. -------------------------------------------------------------------------------- /GraphReg/computeVariationAndFeaturePoint.m: -------------------------------------------------------------------------------- 1 | function [pointScore,featScore] = computeVariationAndFeaturePoint(coords, geoFeat,K) 2 | %========================== 3 | % Compute the point response intensity and the graph gradient w.r.t. normals and curvatures. 4 | %Input: 5 | % coords: Nx3 array point coordinate 6 | % geoFeat: Nx4 point normal and curvature 7 | % K: K nearest neighbors 8 | % 9 | % Output: 10 | % pointScore: Nx1 the response of each data point with respect to local... 11 | % variation 12 | % featScore: Nx2 the difference between adjacent normals and curvature 13 | %========================== 14 | 15 | z = single( coords ); 16 | N = size(z, 1); 17 | 18 | 19 | %----------determine the radius and build the adjacency matrix 20 | 21 | 22 | [~, dist] = knnsearch( coords, coords( round( linspace(1,N, 20)) ,:), 'k', K, 'distance', 'euclidean'); 23 | radius = max( dist(:, K) ); 24 | 25 | % find the K nearest points in z of each point in z 26 | [idx, dist] = knnsearch( z, z, 'k', K, 'distance', 'euclidean'); 27 | dist(:,1)=[]; 28 | idx(:,1)=[]; 29 | 30 | %build the adjacency matrix 31 | tmp=exp( -(dist./(radius*0.5) ).^2); 32 | weight=tmp./sum(tmp,2); 33 | 34 | 35 | 36 | weightPointSum=zeros(N,3); 37 | normalDiff=zeros(N,3); 38 | 39 | 40 | %--------------------compute the featScore 41 | for i=1:N 42 | % near index of current point 43 | nearInx=idx(i,:); 44 | %extract current normal and near normal 45 | currentNormal=geoFeat(i,1:3); 46 | nearPoint=z(nearInx,:); 47 | nearNormal=geoFeat(nearInx,1:3); 48 | 49 | % reshape near weight by K 50 | nearWeight=repmat(weight(i,:)',1,3); 51 | 52 | % difference of current point and normal with its nieghborhood 53 | weightPointSum(i,:)=sum(nearPoint.*nearWeight,1); 54 | 55 | % the second calculation manner 56 | normalDiffSquare=sum((repmat(currentNormal,K-1,1)-nearNormal).^2,2); 57 | normalDiff(i,:)=sqrt(nearWeight(:,1)')*normalDiffSquare; 58 | end 59 | 60 | %record scores of point response intensity and local geometry variation 61 | pointScore=sum((z-weightPointSum).^2,2); 62 | featScore=[normalDiff(:,1) geoFeat(:,4)]; -------------------------------------------------------------------------------- /GraphReg/cuda/MatchingMethods.cuh: -------------------------------------------------------------------------------- 1 | #include "CudaMath.cuh" 2 | 3 | __device__ void Gravity(const float* p1, const float* p2, float* force) 4 | { 5 | float r[3]; 6 | r[0] = p1[0] - p2[0]; 7 | r[1] = p1[1] - p2[1]; 8 | r[2] = p1[2] - p2[2]; 9 | 10 | float dist = sqrt(r[0]*r[0] + r[1]*r[1] + r[2]*r[2]); 11 | 12 | dist = dist*dist*dist; 13 | 14 | if(dist < 0.0000001) 15 | dist = 0.0000001; 16 | 17 | 18 | force[0] = r[0] / dist; 19 | force[1] = r[1] / dist; 20 | force[2] = r[2] / dist; 21 | 22 | } 23 | 24 | __device__ void Coulomb(const float* p1, const float* p2, const int numFeat, const float* f1, int idx1, const float* f2, int idx2, float* force) 25 | { 26 | float r[3]; 27 | r[0] = p1[0] - p2[0]; 28 | r[1] = p1[1] - p2[1]; 29 | r[2] = p1[2] - p2[2]; 30 | 31 | float dist = sqrt(r[0]*r[0] + r[1]*r[1] + r[2]*r[2]); 32 | 33 | float cDist = 0.f; 34 | 35 | for(int i = 0; i < numFeat; i++) 36 | { 37 | float fd = abs(f1[idx1*numFeat + i] - f2[idx2*numFeat + i]); 38 | cDist += abs(fd*fd); 39 | } 40 | 41 | cDist = cDist / sqrt((float)numFeat); 42 | 43 | float c = 1-abs(cDist); 44 | 45 | dist = dist*dist*dist; 46 | 47 | //c = c*c*c; 48 | 49 | if(dist < 0.0000001) 50 | dist = 0.0000001; 51 | 52 | 53 | force[0] = c * r[0] / dist; 54 | force[1] = c * r[1] / dist; 55 | force[2] = c * r[2] / dist; 56 | } 57 | 58 | __device__ void CoulombPM(const float* p1, const float* p2, const int numFeat, const float* f1, int idx1, const float* f2, int idx2, float* force) 59 | { 60 | float r[3]; 61 | r[0] = p1[0] - p2[0]; 62 | r[1] = p1[1] - p2[1]; 63 | r[2] = p1[2] - p2[2]; 64 | 65 | float dist = sqrt(r[0]*r[0] + r[1]*r[1] + r[2]*r[2]); 66 | 67 | float cDist = 0.f; 68 | for(int i = 0; i < numFeat; i++) 69 | { 70 | float fd = abs(f1[idx1*numFeat + i] - f2[idx2*numFeat + i]); 71 | cDist += abs(fd*fd); 72 | } 73 | cDist = cDist / sqrt((float)numFeat); 74 | float c = 2 * (1 - cDist) - 1; 75 | 76 | dist = dist*dist*dist; 77 | 78 | //c = c*c*c; 79 | 80 | if(dist < 0.0000001) 81 | dist = 0.0000001; 82 | 83 | force[0] = c * r[0] / dist; 84 | force[1] = c * r[1] / dist; 85 | force[2] = c * r[2] / dist; 86 | } -------------------------------------------------------------------------------- /GraphReg/SalientFeature.m: -------------------------------------------------------------------------------- 1 | function [pt,feature_score,struct_tensor]=SalientFeature(pc,num,removeOut) 2 | %======================================== 3 | % Compute the point response intensity and the local geometry variation 4 | % Input: 5 | % pc the point cloud 6 | % num the downsampling rate of the salient points 7 | % removeOut if removing outliers according to the x84 statistics 8 | % Output: 9 | % pt the downsampled point cloud with salient feature points 10 | % feature_score the point response intensity 11 | % struct_tensor the local geometry variation 12 | % 13 | %======================================== 14 | 15 | coordinate=pc.Location; 16 | 17 | %------------compute normal and curvature for each data point---------- 18 | [Nor,Cur]=findPointNormals(coordinate,10,[0,0,10],true); 19 | 20 | geoFeat=[Nor Cur]; 21 | 22 | %-----------point response intensity (pointScore) and local feature variation (featScore)----------------- 23 | [pointScore,featScore] = computeVariationAndFeaturePoint(coordinate, geoFeat,10); 24 | 25 | nanInx=isnan(pointScore); 26 | featScore(nanInx,:)=[]; 27 | pointScore(nanInx,:)=[]; 28 | coordinate(nanInx,:)=[]; 29 | 30 | N = size(pointScore,1); 31 | 32 | 33 | %----------------sample round(N/num) points------------------ 34 | s = RandStream('mlfg6331_64'); % set random number 35 | 36 | 37 | if removeOut 38 | % use robust statistics x84 rule to remove scattered outliers 39 | x84_rule=median(abs(pointScore-repmat(median(pointScore),N,1))); 40 | M=find(pointScore<=5.2*x84_rule);%5.2 in default or smaller values to remove more outliers 41 | 42 | else 43 | % random sampling 44 | % use graph filter h(A)=I-A 45 | M = datasample(s,1:N, round(N/num), 'Replace', false,'Weights', pointScore(:,1) ); 46 | end 47 | 48 | 49 | feature_data=[coordinate(M,1), coordinate(M,2), coordinate(M, 3)]; 50 | feature_score=pointScore(M,:); 51 | struct_tensor=featScore(M,:); 52 | 53 | 54 | %----------sampled points are stored as point cloud--------------- 55 | pt=pointCloud(feature_data); 56 | 57 | 58 | if ~removeOut 59 | %---------no sampling 60 | pt=pc; 61 | feature_score=pointScore; 62 | struct_tensor=featScore; 63 | end 64 | 65 | 66 | 67 | 68 | -------------------------------------------------------------------------------- /GraphReg/GraphReg.m: -------------------------------------------------------------------------------- 1 | %=========================== 2 | % GraphReg: Dynamical point cloud registration with geometry-aware graph signal 3 | % processing 4 | % GraphReg is a local registration framework aiming to produce robust and 5 | % accurate results through the graph signal processing theory and the adaptive simulated annealing mechanism. 6 | % M. Zhao, L. Ma, X. Jia, D. -M. Yan and T. Huang, 7 | %"GraphReg: Dynamical Point Cloud Registration With Geometry-Aware Graph Signal Processing," 8 | % IEEE Transactions on Image Processing, vol. 31, pp. 7449-7464, 2022. 9 | %========================== 10 | 11 | clear; 12 | clc; 13 | close all; 14 | 15 | %------read the input point clouds 16 | file1='.\data\bun000.ply'; 17 | file2='.\data\bun045.ply'; 18 | 19 | % file1='.\data\dragonStandRight_0.ply'; 20 | % file2='.\data\dragonStandRight_24.ply'; 21 | 22 | % add outliers 23 | file1='.\data\bun000_outlier_0.1.ply'; 24 | file2='.\data\bun045_outlier_0.1.ply'; 25 | 26 | % file1='.\data\bun000_outlier_0.5.ply'; 27 | % file2='.\data\bun045_outlier_0.5.ply'; 28 | 29 | 30 | 31 | tgt=pcread(file1); 32 | src=pcread(file2); 33 | 34 | %------downsample and show the point clouds 35 | tgt=pcdownsample(tgt,'gridAverage',0.001); 36 | src=pcdownsample(src,'gridAverage',0.001); 37 | 38 | figure; 39 | pcshowpair(src,tgt); 40 | title("Input point clouds"); 41 | 42 | 43 | %------compute the point intensity (scoreP,scoreQ) and the local geometric feature (featP, featQ) 44 | [tgt2,scoreP,featP]=SalientFeature(tgt,10,true);% 10 in general 45 | [src2,scoreQ,featQ]=SalientFeature(src,10,true); 46 | 47 | 48 | tgtPt=tgt2.Location; 49 | srcPt=src2.Location; 50 | 51 | 52 | tgtPt=tgtPt'; 53 | srcPt=srcPt'; 54 | 55 | fp=featP'; 56 | fq=featQ'; 57 | 58 | 59 | %-----------start registration---------------- 60 | % add features and score to a struct 61 | feat = struct('p', fp, 'q', fq); 62 | score=struct('pScore',scoreP','qScore',scoreQ'); 63 | 64 | 65 | addpath('cuda'); 66 | 67 | 68 | cool_down=0.9;%0.9 in default, adjust cool_down for better results or faster convergence process such as 0.8:0.02:0.98; 69 | 70 | 71 | [T] = AdaptiveSimulatedAnnealingOptimization(tgtPt, srcPt, feat,score,cool_down); 72 | 73 | 74 | tform=affine3d(T'); 75 | 76 | src2tgt=pctransform(src,tform); 77 | 78 | 79 | figure; 80 | pcshowpair(src2tgt,tgt); 81 | title("Registration results"); 82 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GraphReg (TIP 2022) 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |
10 | 11 | ## 1. Motivation 12 | This work aims to improve the robustness and convergence process with respect to currently dynamical point cloud registration methods. 13 | 14 | The designed method is a local registration framework and is an effective alternative to ICP. 15 | 16 | We use the graph signal processing theory to describe the local geometry features, i.e., the point response intensity and the local geometry variations, through which we can remove outliers and attain invariants to rigid transformations. 17 | 18 | ## 2. Useage 19 | 20 | `Run "GraphReg.m" to see demo examples.` 21 | 22 | There are several parameters that can be adjusted for better registration results or faster convergence process: 23 | + **cool_down**: Smaller values typically result in faster convergence but with fluctuations, whereas larger ones ensure more accurate registrations. The suggested interval scope empirically attaining good results is [0.8, 0.98]. 24 | + **$\alpha$**: Although we can fix $\alpha=5.2$ to remove outliers for most test settings, smaller $\alpha$ will further improve the robusteness when there are a large percentage of outliers. 25 | 26 | ## 3. Contact 27 | If you have any question, please do not hesitate to contact myzhao@baai.ac.cn 28 | 29 | ## 4. Citation 30 | If you find this implementation useful for your research, please cite: 31 | + M. Zhao, L. Ma, X. Jia, D. -M. Yan and T. Huang, "GraphReg: Dynamical Point Cloud Registration With Geometry-Aware Graph Signal Processing," in IEEE Transactions on Image Processing, vol. 31, pp. 7449-7464, 2022, doi: 10.1109/TIP.2022.3223793. 32 | + P. Jauer, I. Kuhlemann, R. Bruder, A. Schweikard and F. Ernst, "Efficient Registration of High-Resolution Feature Enhanced Point Clouds," in IEEE Transactions on Pattern Analysis and Machine Intelligence, vol. 41, no. 5, pp. 1102-1115, 1 May 2019, doi: 10.1109/TPAMI.2018.2831670. 33 | -------------------------------------------------------------------------------- /GraphReg/AdaptSimulatedAnnealing.m: -------------------------------------------------------------------------------- 1 | function [ temperature,AcceptRate,transition, Ekin ] = AdaptSimulatedAnnealing(iter,EvalsMax,AcceptRate,temperature, Ekin, Etrans, Erot,cool_down) 2 | 3 | 4 | transition = 0; 5 | 6 | cEkin = log(Etrans + Erot)/log(10); 7 | Ekin = [Ekin cEkin];%cEkin: current kinetic energy 8 | 9 | if(length(Ekin) > 3) 10 | 11 | %% case 3: avoid large steps of both energies 12 | oE = Ekin(end - 1);%last kinect over E 13 | cE = Ekin(end);%current kinect over E 14 | 15 | dE = cE - oE; 16 | 17 | if (dE<0) 18 | AcceptRate = 1/500*(499*AcceptRate + 1); 19 | else 20 | p = min(1, exp(-dE / temperature)); 21 | if(rand < p) 22 | AcceptRate = 1/500*(499*AcceptRate + 1); 23 | else 24 | transition = 1; 25 | AcceptRate = 1/500*(499*AcceptRate); 26 | end 27 | 28 | end 29 | 30 | 31 | if iter/EvalsMax < 0.15 32 | LamRate = 0.44 + 0.56 * 560^(-1*iter/EvalsMax/0.15); 33 | else 34 | if iter/EvalsMax < 0.65 35 | LamRate = 0.44; 36 | else 37 | LamRate = 0.44 * 440^- (((iter/EvalsMax)-0.65)/0.35); 38 | end 39 | end 40 | 41 | %adaptive update temperature 42 | if AcceptRate > LamRate 43 | temperature = 0.9*temperature; 44 | else 45 | temperature = temperature /cool_down; % cool_down can be adjusted for better results 46 | end 47 | end 48 | end 49 | 50 | 51 | -------------------------------------------------------------------------------- /GraphReg/cuda/KernelMatching.cu: -------------------------------------------------------------------------------- 1 | #include "MatchingMethods.cuh" 2 | 3 | __device__ void GetForce(int matchingMethod, const float* p1, const float* p2, const int numFeat, const float* f1, int id1, const float* f2, int id2, float* force) 4 | { 5 | switch (matchingMethod) 6 | { 7 | // Gravitation 8 | case 0: 9 | { 10 | Gravity(p1, p2, force); 11 | break; 12 | } 13 | case 1: 14 | { 15 | Coulomb(p1, p2, numFeat, f1, id1, f2, id2, force); 16 | break; 17 | } 18 | case 2: 19 | { 20 | CoulombPM(p1, p2, numFeat, f1, id1, f2, id2, force); 21 | break; 22 | } 23 | default: 24 | { 25 | Gravity(p1, p2, force); 26 | break; 27 | } 28 | } 29 | 30 | } 31 | 32 | __global__ void MatchingStep( const float* cloud1, const float* cloud2, 33 | const int* index1, const int* index2, 34 | const float* feat1, const float* feat2, 35 | const int numFeat, const float* centroid2, 36 | const float* M, 37 | float* force, float* torque, float* momentInt, 38 | const int matchingMethod, 39 | const int numPoints, const int numSteps) 40 | { 41 | int tID = threadIdx.x; 42 | 43 | if(tID >= numPoints) 44 | return; 45 | 46 | int idx2 = index2[tID]; 47 | 48 | float p2[3]; 49 | p2[0] = cloud2[idx2 * 3 + 0]; 50 | p2[1] = cloud2[idx2 * 3 + 1]; 51 | p2[2] = cloud2[idx2 * 3 + 2]; 52 | 53 | // apply the current transformation to the points of cloud2 and the centroid 2 54 | // transform the cloud2 55 | float tp2[3]; 56 | tp2[0] = M[0]*p2[0] + M[1]*p2[1] + M[2]*p2[2] + M[3]; 57 | tp2[1] = M[4]*p2[0] + M[5]*p2[1] + M[6]*p2[2] + M[7]; 58 | tp2[2] = M[8]*p2[0] + M[9]*p2[1] + M[10]*p2[2] + M[11]; 59 | 60 | float f[3]; 61 | f[0] = 0; 62 | f[1] = 0; 63 | f[2] = 0; 64 | 65 | for(int i = 0; i < numSteps; i++) 66 | { 67 | //int idx1 = index1[tID]; 68 | 69 | int idx1 = index1[i]; 70 | 71 | float p1[3]; 72 | p1[0] = cloud1[idx1 * 3 + 0]; 73 | p1[1] = cloud1[idx1 * 3 + 1]; 74 | p1[2] = cloud1[idx1 * 3 + 2]; 75 | 76 | float fDir[3]; 77 | 78 | GetForce(matchingMethod, p1, tp2, numFeat, feat1, idx1, feat2, idx2, fDir); 79 | 80 | f[0] += fDir[0]; 81 | f[1] += fDir[1]; 82 | f[2] += fDir[2]; 83 | 84 | } 85 | 86 | force[tID * 3 + 0] = f[0] ; 87 | force[tID * 3 + 1] = f[1] ; 88 | force[tID * 3 + 2] = f[2] ; 89 | 90 | float p2c[3]; 91 | p2c[0] = tp2[0] - centroid2[0]; 92 | p2c[1] = tp2[1] - centroid2[1]; 93 | p2c[2] = tp2[2] - centroid2[2]; 94 | 95 | float tDir[3]; 96 | crossVec3(tDir, p2c, f); 97 | 98 | torque[tID * 3 + 0] = tDir[0]; 99 | torque[tID * 3 + 1] = tDir[1]; 100 | torque[tID * 3 + 2] = tDir[2]; 101 | 102 | // the moment of inertia with the mass = 1 103 | momentInt[tID] = sqrt( p2c[0]*p2c[0] + p2c[1]*p2c[1] + p2c[2]*p2c[2] ); 104 | 105 | } -------------------------------------------------------------------------------- /GraphReg/AdaptiveSimulatedAnnealingOptimization.m: -------------------------------------------------------------------------------- 1 | function [ T] = AdaptiveSimulatedAnnealingOptimization( p, q, features,score,cool_down) 2 | %-------------adaptive simulated annealing 3 | [T] = IterateOnGPU5(p, q, features,score,cool_down); 4 | end 5 | 6 | -------------------------------------------------------------------------------- /GraphReg/findPointNormals.m: -------------------------------------------------------------------------------- 1 | function [ normals, curvature ] = findPointNormals(points, numNeighbours, viewPoint, dirLargest) 2 | %FINDPOINTNORMALS Estimates the normals of a sparse set of n 3d points by 3 | % using a set of the closest neighbours to approximate a plane. 4 | % 5 | % Required Inputs: 6 | % points- nx3 set of 3d points (x,y,z) 7 | % 8 | % Optional Inputs: (will give default values on empty array []) 9 | % numNeighbours- number of neighbouring points to use in plane fitting 10 | % (default 9) 11 | % viewPoint- location all normals will point towards (default [0,0,0]) 12 | % dirLargest- use only the largest component of the normal in determining 13 | % its direction wrt the viewPoint (generally provides a more stable 14 | % estimation of planes near the viewPoint, default true) 15 | % 16 | % Outputs: 17 | % normals- nx3 set of normals (nx,ny,nz) 18 | % curvature- nx1 set giving the curvature 19 | % 20 | % References- 21 | % The implementation closely follows the method given at 22 | % http://pointclouds.org/documentation/tutorials/normal_estimation.php 23 | % This code was used in generating the results for the journal paper 24 | % Multi-modal sensor calibration using a gradient orientation measure 25 | % http://www.zjtaylor.com/welcome/download_pdf?pdf=JFR2013.pdf 26 | % 27 | % This code was written by Zachary Taylor 28 | % zacharyjeremytaylor@gmail.com 29 | % http://www.zjtaylor.com 30 | 31 | %% check inputs 32 | validateattributes(points, {'numeric'},{'ncols',3}); 33 | 34 | if(nargin < 2) 35 | numNeighbours = []; 36 | end 37 | if(isempty(numNeighbours)) 38 | numNeighbours = 9; 39 | else 40 | validateattributes(numNeighbours, {'numeric'},{'scalar','positive'}); 41 | if(numNeighbours > 100) 42 | warning(['%i neighbouring points will be used in plane'... 43 | ' estimation, expect long run times, large ram usage and'... 44 | ' poor results near edges'],numNeighbours); 45 | end 46 | end 47 | 48 | if(nargin < 3) 49 | viewPoint = []; 50 | end 51 | if(isempty(viewPoint)) 52 | viewPoint = [0,0,0]; 53 | else 54 | validateattributes(viewPoint, {'numeric'},{'size',[1,3]}); 55 | end 56 | 57 | if(nargin < 4) 58 | dirLargest = []; 59 | end 60 | if(isempty(dirLargest)) 61 | dirLargest = true; 62 | else 63 | validateattributes(dirLargest, {'logical'},{'scalar'}); 64 | end 65 | 66 | %% setup 67 | 68 | %ensure inputs of correct type 69 | points = double(points); 70 | viewPoint = double(viewPoint); 71 | 72 | %create kdtree 73 | kdtreeobj = KDTreeSearcher(points,'distance','euclidean'); 74 | 75 | %get nearest neighbours 76 | n = knnsearch(kdtreeobj,points,'k',(numNeighbours+1)); 77 | 78 | %remove self 79 | n = n(:,2:end); 80 | 81 | %find difference in position from neighbouring points 82 | p = repmat(points(:,1:3),numNeighbours,1) - points(n(:),1:3); 83 | p = reshape(p, size(points,1),numNeighbours,3); 84 | 85 | %calculate values for covariance matrix 86 | C = zeros(size(points,1),6); 87 | C(:,1) = sum(p(:,:,1).*p(:,:,1),2); 88 | C(:,2) = sum(p(:,:,1).*p(:,:,2),2); 89 | C(:,3) = sum(p(:,:,1).*p(:,:,3),2); 90 | C(:,4) = sum(p(:,:,2).*p(:,:,2),2); 91 | C(:,5) = sum(p(:,:,2).*p(:,:,3),2); 92 | C(:,6) = sum(p(:,:,3).*p(:,:,3),2); 93 | C = C ./ numNeighbours; 94 | 95 | %% normals and curvature calculation 96 | 97 | normals = zeros(size(points)); 98 | curvature = zeros(size(points,1),1); 99 | for i = 1:(size(points,1)) 100 | 101 | %form covariance matrix 102 | Cmat = [C(i,1) C(i,2) C(i,3);... 103 | C(i,2) C(i,4) C(i,5);... 104 | C(i,3) C(i,5) C(i,6)]; 105 | 106 | %get eigen values and vectors 107 | [v,d] = eig(Cmat); 108 | d = diag(d); 109 | [lambda,k] = min(d); 110 | 111 | %store normals 112 | normals(i,:) = v(:,k)'; 113 | 114 | %store curvature 115 | curvature(i) = lambda / (sum(d)+1e-6); 116 | end 117 | 118 | %% flipping normals 119 | 120 | %ensure normals point towards viewPoint 121 | points = points - repmat(viewPoint,size(points,1),1); 122 | if(dirLargest) 123 | [~,idx] = max(abs(normals),[],2); 124 | idx = (1:size(normals,1))' + (idx-1)*size(normals,1); 125 | dir = normals(idx).*points(idx) > 0; 126 | else 127 | dir = sum(normals.*points,2) > 0; 128 | end 129 | 130 | normals(dir,:) = -normals(dir,:); 131 | 132 | end -------------------------------------------------------------------------------- /GraphReg/cuda/KernelMatching.ptx: -------------------------------------------------------------------------------- 1 | // 2 | // Generated by NVIDIA NVVM Compiler 3 | // 4 | // Compiler Build ID: CL-21124049 5 | // Cuda compilation tools, release 8.0, V8.0.44 6 | // Based on LLVM 3.4svn 7 | // 8 | 9 | .version 5.0 10 | .target sm_52 11 | .address_size 64 12 | 13 | // .globl _Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii 14 | 15 | .visible .entry _Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii( 16 | .param .u64 _Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_0, 17 | .param .u64 _Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_1, 18 | .param .u64 _Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_2, 19 | .param .u64 _Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_3, 20 | .param .u64 _Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_4, 21 | .param .u64 _Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_5, 22 | .param .u32 _Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_6, 23 | .param .u64 _Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_7, 24 | .param .u64 _Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_8, 25 | .param .u64 _Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_9, 26 | .param .u64 _Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_10, 27 | .param .u64 _Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_11, 28 | .param .u32 _Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_12, 29 | .param .u32 _Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_13, 30 | .param .u32 _Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_14 31 | ) 32 | { 33 | .reg .pred %p<16>; 34 | .reg .f32 %f<165>; 35 | .reg .b32 %r<32>; 36 | .reg .f64 %fd<5>; 37 | .reg .b64 %rd<61>; 38 | 39 | 40 | ld.param.u64 %rd27, [_Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_0]; 41 | ld.param.u64 %rd19, [_Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_1]; 42 | ld.param.u64 %rd28, [_Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_2]; 43 | ld.param.u64 %rd20, [_Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_3]; 44 | ld.param.u64 %rd29, [_Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_4]; 45 | ld.param.u64 %rd21, [_Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_5]; 46 | ld.param.u32 %r12, [_Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_6]; 47 | ld.param.u64 %rd22, [_Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_7]; 48 | ld.param.u64 %rd23, [_Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_8]; 49 | ld.param.u64 %rd24, [_Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_9]; 50 | ld.param.u64 %rd25, [_Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_10]; 51 | ld.param.u64 %rd26, [_Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_11]; 52 | ld.param.u32 %r13, [_Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_12]; 53 | ld.param.u32 %r15, [_Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_13]; 54 | ld.param.u32 %r14, [_Z12MatchingStepPKfS0_PKiS2_S0_S0_iS0_S0_PfS3_S3_iii_param_14]; 55 | cvta.to.global.u64 %rd1, %rd29; 56 | cvta.to.global.u64 %rd2, %rd27; 57 | cvta.to.global.u64 %rd60, %rd28; 58 | mov.u32 %r1, %tid.x; 59 | setp.ge.s32 %p1, %r1, %r15; 60 | @%p1 bra BB0_18; 61 | 62 | cvta.to.global.u64 %rd30, %rd23; 63 | cvta.to.global.u64 %rd31, %rd19; 64 | cvta.to.global.u64 %rd32, %rd20; 65 | cvt.s64.s32 %rd4, %r1; 66 | mul.wide.s32 %rd33, %r1, 4; 67 | add.s64 %rd34, %rd32, %rd33; 68 | ld.global.u32 %r2, [%rd34]; 69 | mul.lo.s32 %r16, %r2, 3; 70 | mul.wide.s32 %rd35, %r16, 4; 71 | add.s64 %rd36, %rd31, %rd35; 72 | ld.global.f32 %f45, [%rd30]; 73 | ld.global.f32 %f46, [%rd36]; 74 | ld.global.f32 %f47, [%rd30+4]; 75 | ld.global.f32 %f48, [%rd36+4]; 76 | mul.f32 %f49, %f48, %f47; 77 | fma.rn.f32 %f50, %f46, %f45, %f49; 78 | ld.global.f32 %f51, [%rd30+8]; 79 | ld.global.f32 %f52, [%rd36+8]; 80 | fma.rn.f32 %f53, %f52, %f51, %f50; 81 | ld.global.f32 %f54, [%rd30+12]; 82 | add.f32 %f1, %f54, %f53; 83 | ld.global.f32 %f55, [%rd30+16]; 84 | ld.global.f32 %f56, [%rd30+20]; 85 | mul.f32 %f57, %f48, %f56; 86 | fma.rn.f32 %f58, %f46, %f55, %f57; 87 | ld.global.f32 %f59, [%rd30+24]; 88 | fma.rn.f32 %f60, %f52, %f59, %f58; 89 | ld.global.f32 %f61, [%rd30+28]; 90 | add.f32 %f2, %f61, %f60; 91 | ld.global.f32 %f62, [%rd30+32]; 92 | ld.global.f32 %f63, [%rd30+36]; 93 | mul.f32 %f64, %f48, %f63; 94 | fma.rn.f32 %f65, %f46, %f62, %f64; 95 | ld.global.f32 %f66, [%rd30+40]; 96 | fma.rn.f32 %f67, %f52, %f66, %f65; 97 | ld.global.f32 %f68, [%rd30+44]; 98 | add.f32 %f3, %f68, %f67; 99 | mov.f32 %f164, 0f00000000; 100 | mov.f32 %f163, %f164; 101 | mov.f32 %f162, %f164; 102 | setp.lt.s32 %p2, %r14, 1; 103 | @%p2 bra BB0_17; 104 | 105 | cvt.rn.f32.s32 %f4, %r12; 106 | setp.eq.s32 %p3, %r13, 0; 107 | mov.f32 %f164, 0f00000000; 108 | mov.f32 %f163, %f164; 109 | mov.f32 %f162, %f164; 110 | mov.u32 %r31, 0; 111 | @%p3 bra BB0_16; 112 | 113 | cvta.to.global.u64 %rd37, %rd21; 114 | mul.lo.s32 %r19, %r2, %r12; 115 | mul.wide.s32 %rd38, %r19, 4; 116 | add.s64 %rd6, %rd37, %rd38; 117 | mov.u64 %rd5, %rd6; 118 | mov.f32 %f164, 0f00000000; 119 | mov.f32 %f163, %f164; 120 | mov.f32 %f162, %f164; 121 | mov.u32 %r28, 0; 122 | 123 | BB0_4: 124 | mul.wide.s32 %rd39, %r28, 4; 125 | add.s64 %rd40, %rd60, %rd39; 126 | ld.global.u32 %r4, [%rd40]; 127 | mul.lo.s32 %r20, %r4, 3; 128 | mul.wide.s32 %rd41, %r20, 4; 129 | add.s64 %rd42, %rd2, %rd41; 130 | ld.global.f32 %f75, [%rd42]; 131 | sub.f32 %f8, %f75, %f1; 132 | ld.global.f32 %f76, [%rd42+4]; 133 | sub.f32 %f9, %f76, %f2; 134 | ld.global.f32 %f77, [%rd42+8]; 135 | sub.f32 %f10, %f77, %f3; 136 | mul.f32 %f78, %f9, %f9; 137 | fma.rn.f32 %f79, %f8, %f8, %f78; 138 | fma.rn.f32 %f80, %f10, %f10, %f79; 139 | sqrt.rn.f32 %f11, %f80; 140 | setp.eq.s32 %p4, %r13, 2; 141 | @%p4 bra BB0_10; 142 | bra.uni BB0_5; 143 | 144 | BB0_10: 145 | mov.f32 %f158, 0f00000000; 146 | setp.lt.s32 %p9, %r12, 1; 147 | @%p9 bra BB0_13; 148 | 149 | mul.lo.s32 %r24, %r12, %r4; 150 | mul.wide.s32 %rd44, %r24, 4; 151 | add.s64 %rd59, %rd1, %rd44; 152 | mov.f32 %f158, 0f00000000; 153 | mov.u32 %r30, 0; 154 | mov.u64 %rd58, %rd6; 155 | 156 | BB0_12: 157 | mov.u64 %rd13, %rd58; 158 | ld.global.f32 %f102, [%rd13]; 159 | ld.global.f32 %f103, [%rd59]; 160 | sub.f32 %f104, %f103, %f102; 161 | abs.f32 %f105, %f104; 162 | mul.f32 %f106, %f105, %f105; 163 | abs.f32 %f107, %f106; 164 | add.f32 %f158, %f158, %f107; 165 | add.s64 %rd59, %rd59, 4; 166 | add.s64 %rd16, %rd13, 4; 167 | add.s32 %r30, %r30, 1; 168 | setp.lt.s32 %p10, %r30, %r12; 169 | mov.u64 %rd58, %rd16; 170 | @%p10 bra BB0_12; 171 | 172 | BB0_13: 173 | sqrt.rn.f32 %f108, %f4; 174 | div.rn.f32 %f109, %f158, %f108; 175 | mov.f32 %f110, 0f3F800000; 176 | sub.f32 %f111, %f110, %f109; 177 | fma.rn.f32 %f112, %f111, 0f40000000, 0fBF800000; 178 | mul.f32 %f113, %f11, %f11; 179 | mul.f32 %f114, %f11, %f113; 180 | cvt.f64.f32 %fd2, %f114; 181 | setp.lt.f64 %p11, %fd2, 0d3E7AD7F29ABCAF48; 182 | selp.f32 %f115, 0f33D6BF95, %f114, %p11; 183 | mul.f32 %f116, %f8, %f112; 184 | div.rn.f32 %f161, %f116, %f115; 185 | mul.f32 %f117, %f9, %f112; 186 | div.rn.f32 %f160, %f117, %f115; 187 | mul.f32 %f118, %f10, %f112; 188 | div.rn.f32 %f159, %f118, %f115; 189 | bra.uni BB0_15; 190 | 191 | BB0_5: 192 | setp.ne.s32 %p5, %r13, 1; 193 | @%p5 bra BB0_14; 194 | 195 | mov.f32 %f157, 0f00000000; 196 | setp.lt.s32 %p6, %r12, 1; 197 | @%p6 bra BB0_9; 198 | 199 | mul.lo.s32 %r22, %r12, %r4; 200 | mul.wide.s32 %rd43, %r22, 4; 201 | add.s64 %rd57, %rd1, %rd43; 202 | mov.f32 %f157, 0f00000000; 203 | mov.u32 %r29, 0; 204 | mov.u64 %rd56, %rd5; 205 | 206 | BB0_8: 207 | mov.u64 %rd8, %rd56; 208 | ld.global.f32 %f83, [%rd8]; 209 | ld.global.f32 %f84, [%rd57]; 210 | sub.f32 %f85, %f84, %f83; 211 | abs.f32 %f86, %f85; 212 | mul.f32 %f87, %f86, %f86; 213 | abs.f32 %f88, %f87; 214 | add.f32 %f157, %f157, %f88; 215 | add.s64 %rd57, %rd57, 4; 216 | add.s64 %rd11, %rd8, 4; 217 | add.s32 %r29, %r29, 1; 218 | setp.lt.s32 %p7, %r29, %r12; 219 | mov.u64 %rd56, %rd11; 220 | @%p7 bra BB0_8; 221 | 222 | BB0_9: 223 | sqrt.rn.f32 %f89, %f4; 224 | div.rn.f32 %f90, %f157, %f89; 225 | abs.f32 %f91, %f90; 226 | mov.f32 %f92, 0f3F800000; 227 | sub.f32 %f93, %f92, %f91; 228 | mul.f32 %f94, %f11, %f11; 229 | mul.f32 %f95, %f11, %f94; 230 | cvt.f64.f32 %fd1, %f95; 231 | setp.lt.f64 %p8, %fd1, 0d3E7AD7F29ABCAF48; 232 | selp.f32 %f96, 0f33D6BF95, %f95, %p8; 233 | mul.f32 %f97, %f8, %f93; 234 | div.rn.f32 %f161, %f97, %f96; 235 | mul.f32 %f98, %f9, %f93; 236 | div.rn.f32 %f160, %f98, %f96; 237 | mul.f32 %f99, %f10, %f93; 238 | div.rn.f32 %f159, %f99, %f96; 239 | bra.uni BB0_15; 240 | 241 | BB0_14: 242 | mul.f32 %f119, %f11, %f11; 243 | mul.f32 %f120, %f11, %f119; 244 | cvt.f64.f32 %fd3, %f120; 245 | setp.lt.f64 %p12, %fd3, 0d3E7AD7F29ABCAF48; 246 | selp.f32 %f121, 0f33D6BF95, %f120, %p12; 247 | div.rn.f32 %f161, %f8, %f121; 248 | div.rn.f32 %f160, %f9, %f121; 249 | div.rn.f32 %f159, %f10, %f121; 250 | 251 | BB0_15: 252 | add.f32 %f164, %f164, %f161; 253 | add.f32 %f163, %f163, %f160; 254 | add.f32 %f162, %f162, %f159; 255 | add.s32 %r28, %r28, 1; 256 | setp.lt.s32 %p13, %r28, %r14; 257 | @%p13 bra BB0_4; 258 | bra.uni BB0_17; 259 | 260 | BB0_16: 261 | ld.global.u32 %r25, [%rd60]; 262 | mul.lo.s32 %r26, %r25, 3; 263 | mul.wide.s32 %rd45, %r26, 4; 264 | add.s64 %rd46, %rd2, %rd45; 265 | ld.global.f32 %f122, [%rd46]; 266 | sub.f32 %f123, %f122, %f1; 267 | ld.global.f32 %f124, [%rd46+4]; 268 | sub.f32 %f125, %f124, %f2; 269 | ld.global.f32 %f126, [%rd46+8]; 270 | sub.f32 %f127, %f126, %f3; 271 | mul.f32 %f128, %f125, %f125; 272 | fma.rn.f32 %f129, %f123, %f123, %f128; 273 | fma.rn.f32 %f130, %f127, %f127, %f129; 274 | sqrt.rn.f32 %f131, %f130; 275 | mul.f32 %f132, %f131, %f131; 276 | mul.f32 %f133, %f131, %f132; 277 | cvt.f64.f32 %fd4, %f133; 278 | setp.lt.f64 %p14, %fd4, 0d3E7AD7F29ABCAF48; 279 | selp.f32 %f134, 0f33D6BF95, %f133, %p14; 280 | div.rn.f32 %f135, %f123, %f134; 281 | div.rn.f32 %f136, %f125, %f134; 282 | div.rn.f32 %f137, %f127, %f134; 283 | add.f32 %f164, %f164, %f135; 284 | add.f32 %f163, %f163, %f136; 285 | add.f32 %f162, %f162, %f137; 286 | add.s64 %rd60, %rd60, 4; 287 | add.s32 %r31, %r31, 1; 288 | setp.lt.s32 %p15, %r31, %r14; 289 | @%p15 bra BB0_16; 290 | 291 | BB0_17: 292 | cvta.to.global.u64 %rd47, %rd26; 293 | cvta.to.global.u64 %rd48, %rd25; 294 | cvta.to.global.u64 %rd49, %rd22; 295 | cvta.to.global.u64 %rd50, %rd24; 296 | mul.lo.s32 %r27, %r1, 3; 297 | mul.wide.s32 %rd51, %r27, 4; 298 | add.s64 %rd52, %rd50, %rd51; 299 | st.global.f32 [%rd52], %f164; 300 | st.global.f32 [%rd52+4], %f163; 301 | st.global.f32 [%rd52+8], %f162; 302 | ld.global.f32 %f138, [%rd49]; 303 | sub.f32 %f139, %f1, %f138; 304 | ld.global.f32 %f140, [%rd49+4]; 305 | sub.f32 %f141, %f2, %f140; 306 | ld.global.f32 %f142, [%rd49+8]; 307 | sub.f32 %f143, %f3, %f142; 308 | mul.f32 %f144, %f162, %f141; 309 | mul.f32 %f145, %f163, %f143; 310 | sub.f32 %f146, %f144, %f145; 311 | mul.f32 %f147, %f164, %f143; 312 | mul.f32 %f148, %f162, %f139; 313 | sub.f32 %f149, %f147, %f148; 314 | mul.f32 %f150, %f163, %f139; 315 | mul.f32 %f151, %f164, %f141; 316 | sub.f32 %f152, %f150, %f151; 317 | add.s64 %rd53, %rd48, %rd51; 318 | st.global.f32 [%rd53], %f146; 319 | st.global.f32 [%rd53+4], %f149; 320 | st.global.f32 [%rd53+8], %f152; 321 | mul.f32 %f153, %f141, %f141; 322 | fma.rn.f32 %f154, %f139, %f139, %f153; 323 | fma.rn.f32 %f155, %f143, %f143, %f154; 324 | sqrt.rn.f32 %f156, %f155; 325 | shl.b64 %rd54, %rd4, 2; 326 | add.s64 %rd55, %rd47, %rd54; 327 | st.global.f32 [%rd55], %f156; 328 | 329 | BB0_18: 330 | ret; 331 | } 332 | 333 | 334 | -------------------------------------------------------------------------------- /GraphReg/IterateOnGPU5.m: -------------------------------------------------------------------------------- 1 | function [T] = IterateOnGPU5( p, q, feat,score,coolDown) 2 | %================================================================ 3 | % Adopt the adaptive simulated annealing to optimize the registration 4 | % process with faster convergence process 5 | % Input: p q: 3xN 3xM the model and the template point clouds 6 | % feat: struct the features of point clouds p and q 7 | % score: struct the score of point clouds p and q 8 | 9 | % Output: T: 4x4 array the solved transformation matrix 10 | % The dynamical framework is based on 11 | % P. Jauer, I. Kuhlemann, R. Bruder, A. Schweikard, and F. Ernst, 12 | %“Efficient registration of high-resolution feature enhanced point clouds,” 13 | % IEEE Trans. Pattern Anal. Mach. Intell., vol. 41, no. 5, pp. 1102–1115, 14 | % May 2019. 15 | %============================================================== 16 | 17 | % get the current matching method, i.e., the Coulomb’s law 18 | matchingMethod = 2; 19 | 20 | 21 | % features 22 | Np=length(p); 23 | Nq=length(q); 24 | 25 | [rp,cp]=size(feat.p); 26 | [rq,cq]=size(feat.q); 27 | 28 | 29 | feat1=zeros(2,cp); 30 | feat2=zeros(2,cq); 31 | 32 | numFeat = min(rp, rq); 33 | 34 | % scale all features within a range of [0,1] 35 | for fcnt = 1:numFeat 36 | ft1 = feat.p(fcnt, :); 37 | ft2 = feat.q(fcnt, :); 38 | 39 | fmax = max(max(ft1), max(ft2)); 40 | fmin = min(min(ft1), min(ft2)); 41 | dist =(fmax - fmin); 42 | if(dist == 0) 43 | feat1(fcnt,:)=ft1 - fmin; 44 | feat2(fcnt,:)=ft2-fmin; 45 | else 46 | %make feat in [0,1] 47 | feat1(fcnt,:)=(ft1 - fmin)/dist; 48 | feat2(fcnt,:)=(ft2-fmin)/dist; 49 | 50 | end 51 | end 52 | 53 | 54 | % init the cuda device 55 | % number of random points per iteration step 56 | numRand = 100; 57 | 58 | % if the number of random points is larger then the number of points, 59 | % compare each single point 60 | numPoints1 = Np; 61 | numPoints2 = Nq; 62 | 63 | %% scale the clouds onto a maximum edge size of 100 units 64 | mP = max(abs(max(p,[],2) - min(p,[],2))); 65 | mQ = max(abs(max(q,[],2) - min(q,[],2))); 66 | 67 | % scaleFactor 68 | sclFct = 100/max(mP,mQ); 69 | 70 | p = p.*sclFct; 71 | q = q.*sclFct; 72 | 73 | %% init gpu kernel 74 | kernel = parallel.gpu.CUDAKernel('KernelMatching.ptx', 'KernelMatching.cu', 'MatchingStep'); 75 | 76 | numCmp = round(max(1, numPoints1 * numRand/100)); % procent 77 | numThrd = round(max(1, numPoints2 * numRand/100)); % procent 78 | 79 | numThrd = min(kernel.MaxThreadsPerBlock, numThrd); 80 | numCmp = min(numThrd, numCmp); 81 | 82 | kernel.ThreadBlockSize = [kernel.MaxThreadsPerBlock,1,1]; 83 | kernel.GridSize = [ceil(numThrd/kernel.MaxThreadsPerBlock),1]; 84 | 85 | % forces computed on GPU 86 | gF = gpuArray( single( zeros(1, numThrd*3) )); 87 | % torques computed on GPU 88 | gT = gpuArray( single( zeros(1, numThrd*3) )); 89 | % the moment of inertia 90 | gI = gpuArray( single( zeros(1, numThrd) )); 91 | 92 | %% init point clouds 93 | p1 = gpuArray( single( reshape(p, 1, []) )); 94 | p2 = gpuArray( single( reshape(q, 1, []) )); 95 | 96 | 97 | centroid2 = mean(q, 2); 98 | centroid1 = mean(p, 2); 99 | 100 | 101 | kdtree=KDTreeSearcher(feat2'); 102 | [idx,~]=knnsearch(kdtree,feat1'); 103 | 104 | 105 | 106 | % transfer features to GPU memory 107 | gf1 = gpuArray( single( reshape(feat1, 1, []) ) ); 108 | gf2 = gpuArray( single( reshape(feat2, 1, []) ) ); 109 | 110 | 111 | %% Iteration parameter 112 | 113 | % transformation matrix, init with centroid to centroid trafo 114 | M = TransMat((centroid1-centroid2)); 115 | M = single(M); 116 | 117 | 118 | % temperature parameter for the adaptive simulated annealing 119 | temperature = 0.5; 120 | 121 | % vector to collect the kinetic energy per step (evaluation ?) ; 122 | oA = 0;%over Angle 123 | oT = 0;%over Translation 124 | 125 | 126 | if(1) 127 | plotQ = [q; ones(1,length(q))]; 128 | end 129 | 130 | 131 | AcceptRate = 0.5; 132 | EvalsMax=200;%100 in default or larger iterations for better results 133 | 134 | %% start iteation 135 | Ekin=1; 136 | for iter=1:1:EvalsMax 137 | iter 138 | %% get the forces 139 | % randomly choosen indicies of the reference cloud 140 | tmpIdx1=unique(randi(numPoints1, numCmp, 1)); 141 | i1=gpuArray(int32(tmpIdx1)); 142 | 143 | tmpIdx2=idx(tmpIdx1,:); 144 | 145 | i2=gpuArray(int32(tmpIdx2)); 146 | 147 | GM = gpuArray( reshape(M', 1, []) ); 148 | 149 | cent = M * [centroid2; 1]; 150 | c2 = gpuArray( cent(1:3) ); 151 | 152 | [gF, gT, gI] = feval(kernel,... 153 | p1, p2,... 154 | i1, i2,... 155 | gf1, gf2,... 156 | numFeat, c2, GM,... 157 | gF, gT, gI,... 158 | matchingMethod,... 159 | length(i2), length(i1)); 160 | 161 | 162 | %% resulting force, copy back to RAM (gather) for speed up 163 | 164 | f = reshape(gather(gF), 3, []); 165 | F = sum(f, 2); 166 | 167 | % resulting torques 168 | t = reshape(gather(gT), 3, []); 169 | Torque = sum(t, 2); 170 | 171 | % resulting moment of inertia 172 | I = sum(gather(gI)); 173 | 174 | %% get the transformations 175 | % angular accelration 176 | alpha = Torque ./ I; 177 | 178 | angle = norm(alpha)/2; 179 | 180 | if(angle == 0) 181 | rotAxis = [0 0 0]'; 182 | else 183 | rotAxis = alpha / norm(angle); 184 | end 185 | 186 | % linear acceleration 187 | a = F ./ numThrd; 188 | 189 | magnitude = norm(a)/2; 190 | magnitude = max(1,magnitude); 191 | transDir = a / norm(a); 192 | 193 | 194 | %% simulated annealing 195 | 196 | % current kinetic energy 197 | 198 | Etrans = (numThrd/2) * magnitude^2;%1/2*M*v^2 199 | Erot = (I/2) * angle^2;%1/2*I*w^2 200 | 201 | 202 | 203 | [temperature,AcceptRate, transition, Ekin] = AdaptSimulatedAnnealing(iter,EvalsMax,AcceptRate,temperature,... 204 | Ekin,... 205 | Etrans,... 206 | Erot,coolDown); 207 | 208 | 209 | if (transition) 210 | angle = oA; 211 | magnitude = oT; 212 | 213 | Ekin(end) = Ekin(end-1); 214 | else 215 | oA = angle; 216 | oT = magnitude; 217 | 218 | end 219 | 220 | 221 | %% compute the new tranformation matrix 222 | ss = transDir * magnitude * temperature; 223 | rr = deg2rad( rad2deg( angle ) * temperature); 224 | 225 | 226 | 227 | %% compute the transformation matrix 228 | T = TransMat(ss); 229 | CM = TransMat(-centroid2); 230 | CP = TransMat( centroid2); 231 | R = eye(4); 232 | 233 | if(rr ~= 0) 234 | R = AxisAngle(rotAxis, rr); 235 | end 236 | 237 | 238 | tmpMat = M * CP * R * T * CM;% transformation composite 239 | if(~isnan(tmpMat)) 240 | M = tmpMat; 241 | end 242 | 243 | %%{ 244 | %% visualisation 245 | %if(args.Visualize) 246 | if(1) 247 | 248 | figure(999); 249 | 250 | clf 251 | hold on; 252 | axis equal; 253 | view(-21,12); 254 | 255 | tmpP2 = M * plotQ(:, 1:length(plotQ)) ; 256 | 257 | scale = 4; 258 | 259 | if( 1 ) 260 | plot3(p(1,1:scale:end), p(2,1:scale:end), p(3,1:scale:end),'bo'); 261 | plot3(tmpP2(1,1:scale:end), tmpP2(2,1:scale:end), tmpP2(3,1:scale:end),'r+'); 262 | else 263 | scatter3(p(1,1:scale:end), p(2,1:scale:end), p(3,1:scale:end), 30, feat.p(1,1:scale:end), 'o'); 264 | scatter3(tmpP2(1,1:scale:end), tmpP2(2,1:scale:end), tmpP2(3,1:scale:end), 30, feat.q(1,1:scale:end), '*'); 265 | end 266 | axis off; 267 | set(gcf,'color','w'); 268 | pause(0.01); 269 | end 270 | end 271 | 272 | %% back scaling to orign size 273 | S = [1/sclFct 0 0 0; ... 274 | 0 1/sclFct 0 0; ... 275 | 0 0 1/sclFct 0; ... 276 | 0 0 0 1]; 277 | 278 | tS = S * M(:,4); 279 | 280 | outM = [M(:,1:3), tS]; 281 | 282 | T = outM; 283 | 284 | %% clear graphics memory 285 | clear q1 q2 i1 i2 M GM gF gT gL; 286 | end -------------------------------------------------------------------------------- /GraphReg/cuda/CudaMath.cuh: -------------------------------------------------------------------------------- 1 | //////////////////////////////////////////////////////////////////////////////////////////////////// 2 | // file: CudaMath.cuh 3 | // 4 | // summary: cuda mathematic operations 5 | //////////////////////////////////////////////////////////////////////////////////////////////////// 6 | 7 | //#ifndef __CUDA_MATH_CUH__ 8 | //#define __CUDA_MATH_CUH__ 9 | #pragma once 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | /// Defines an epsilon for computational accuracy. 17 | #define CM_EPS 1e-6 18 | 19 | /// Defines PI. 20 | #define CM_PI 3.1415926535897932384626433832795028841971693993751f 21 | 22 | /// Size of the float 4x4 matrix. 23 | #define SIZE_OF_FLOAT_MAT4 sizeof(float) * 16 24 | 25 | 26 | //////////////////////////////////////////////////////////////////////////////////////////////////// 27 | /// Creates a 4x4 identity. 28 | /// 29 | /// I. Kuhlemann, P. Jauer, 2014-10-27. 30 | /// 31 | /// [in,out] If non-null, the float* to process. 32 | /// 33 | /// true if it succeeds, false if it fails. 34 | //////////////////////////////////////////////////////////////////////////////////////////////////// 35 | template 36 | __device__ __forceinline__ bool mat4CreateIdentity(T* M) { 37 | 38 | 39 | /*if (sizeof(M) != SIZE_OF_FLOAT_MAT4) 40 | return false;*/ 41 | 42 | M[0] = 1.0; M[1] = 0.0; M[2] = 0.0; M[3] = 0.0; 43 | M[4] = 0.0; M[5] = 1.0; M[6] = 0.0; M[7] = 0.0; 44 | M[8] = 0.0; M[9] = 0.0; M[10] = 1.0; M[11] = 0.0; 45 | M[12] = 0.0; M[13] = 0.0; M[14] = 0.0; M[15] = 1.0; 46 | 47 | return true; 48 | } 49 | 50 | //////////////////////////////////////////////////////////////////////////////////////////////////// 51 | /// Makes a exact copy of a 4x4 matrix. 52 | /// 53 | /// Philipp Jauer, 2014-12-15. 54 | /// 55 | /// [in,out] The copy destination. 56 | /// [in,out] The copy source. 57 | /// 58 | /// true if it succeeds, false if it fails. 59 | //////////////////////////////////////////////////////////////////////////////////////////////////// 60 | template 61 | __device__ __forceinline__ bool mat4Copy(T* Dst, T* Src) { 62 | 63 | for (int i = 0; i < 16; i++) 64 | Dst[i] = Src[i]; 65 | 66 | return true; 67 | } 68 | 69 | //////////////////////////////////////////////////////////////////////////////////////////////////// 70 | /// Creates a 4x4 matrix rotation about the x-axis. 71 | /// 72 | /// I. Kuhlemann, P. Jauer, 2014-10-27. 73 | /// 74 | /// [in,out] If non-null, the float* to process. 75 | /// The angle in radians. 76 | /// 77 | /// true if it succeeds, false if it fails. 78 | //////////////////////////////////////////////////////////////////////////////////////////////////// 79 | __device__ __forceinline__ bool mat4CreateRotX(float* M, float angle) { 80 | 81 | /*if (sizeof(M) != SIZE_OF_FLOAT_MAT4) 82 | return false;*/ 83 | 84 | float ct = cos(angle); 85 | float st = sin(angle); 86 | 87 | M[0] = 1.0; M[1] = 0.0; M[2] = 0.0; M[3] = 0.0; 88 | M[4] = 0.0; M[5] = ct; M[6] = -st; M[7] = 0.0; 89 | M[8] = 0.0; M[9] = st; M[10] = ct; M[11] = 0.0; 90 | M[12] = 0.0; M[13] = 0.0; M[14] = 0.0; M[15] = 1.0; 91 | 92 | return true; 93 | } 94 | 95 | //////////////////////////////////////////////////////////////////////////////////////////////////// 96 | /// Creates a 4x4 matrix rotation about the y-axis. 97 | /// 98 | /// I. Kuhlemann, P. Jauer, 2014-10-27. 99 | /// 100 | /// [in,out] If non-null, the float* to process. 101 | /// The angle. 102 | /// 103 | /// true if it succeeds, false if it fails. 104 | //////////////////////////////////////////////////////////////////////////////////////////////////// 105 | __device__ __forceinline__ bool mat4CreateRotY(float* M, float angle) { 106 | 107 | /*if (sizeof(M) != SIZE_OF_FLOAT_MAT4) 108 | return false;*/ 109 | 110 | float ct = cos(angle); 111 | float st = sin(angle); 112 | 113 | M[0] = ct; M[1] = 0.0; M[2] = st; M[3] = 0.0; 114 | M[4] = 0.0; M[5] = 1.0; M[6] = 0.0; M[7] = 0.0; 115 | M[8] = -st; M[9] = 0.0; M[10] = ct; M[11] = 0.0; 116 | M[12] = 0.0; M[13] = 0.0; M[14] = 0.0; M[15] = 1.0; 117 | 118 | return true; 119 | } 120 | 121 | //////////////////////////////////////////////////////////////////////////////////////////////////// 122 | /// Creates a 4x4 matrix rotation about the z-axis. 123 | /// 124 | /// I. Kuhlemann, P. Jauer, 2014-10-27. 125 | /// 126 | /// [in,out] If non-null, the float* to process. 127 | /// The angle in radians. 128 | /// 129 | /// true if it succeeds, false if it fails. 130 | //////////////////////////////////////////////////////////////////////////////////////////////////// 131 | __device__ __forceinline__ bool mat4CreateRotZ(float* M, float angle){ 132 | 133 | /*if (sizeof(M) != SIZE_OF_FLOAT_MAT4) 134 | return false;*/ 135 | 136 | float ct = cos(angle); 137 | float st = sin(angle); 138 | 139 | M[0] = ct; M[1] = -st; M[2] = 0.0; M[3] = 0.0; 140 | M[4] = st; M[5] = ct; M[6] = 0.0; M[7] = 0.0; 141 | M[8] = 0.0; M[9] = 0.0; M[10] = 1.0; M[11] = 0.0; 142 | M[12] = 0.0; M[13] = 0.0; M[14] = 0.0; M[15] = 1.0; 143 | 144 | return true; 145 | } 146 | 147 | //////////////////////////////////////////////////////////////////////////////////////////////////// 148 | /// Multiplies two 4x4 matrices A and B, A*B = C. 149 | /// 150 | /// Philipp Jauer, 2014-10-23. 151 | /// 152 | /// [out] Resulting matrix C. 153 | /// [in] Left Matrix A. 154 | /// [in] Right Matrix B. 155 | /// 156 | /// true if it succeeds, false if it fails. 157 | //////////////////////////////////////////////////////////////////////////////////////////////////// 158 | __device__ __forceinline__ bool mat4Mult(float* C, float* A, float* B) { 159 | 160 | /*if (sizeof(C) != SIZE_OF_FLOAT_MAT4 || sizeof(A) != SIZE_OF_FLOAT_MAT4 || sizeof(B) != SIZE_OF_FLOAT_MAT4) 161 | return false;*/ 162 | 163 | for (int i = 0; i < 4; i++) { 164 | for (int j = 0; j < 4; j++) { 165 | float sum = 0.0; 166 | for (int k = 0; k < 4; k++) 167 | { 168 | sum += A[i * 4 + k] * B[k * 4 + j]; 169 | } 170 | C[i * 4 + j] = sum; 171 | } 172 | } 173 | 174 | return true; 175 | } 176 | 177 | //////////////////////////////////////////////////////////////////////////////////////////////////// 178 | /// 4x4 matrix inverse. 179 | /// 180 | /// I. Kuhlemann, P. Jauer, 2014-10-24. 181 | /// 182 | /// [out] Result of the inversion. 183 | /// [in] Matrix to invert. 184 | /// 185 | /// true if it succeeds, false if it fails. 186 | //////////////////////////////////////////////////////////////////////////////////////////////////// 187 | __device__ __forceinline__ bool mat4Inv( float* A) 188 | { 189 | //if (sizeof(M) != SIZE_OF_FLOAT_MAT4 || sizeof(A) != SIZE_OF_FLOAT_MAT4) 190 | // return false; 191 | 192 | 193 | double inv[16], det; 194 | int i; 195 | 196 | inv[0] = A[5] * A[10] * A[15] - 197 | A[5] * A[11] * A[14] - 198 | A[9] * A[6] * A[15] + 199 | A[9] * A[7] * A[14] + 200 | A[13] * A[6] * A[11] - 201 | A[13] * A[7] * A[10]; 202 | 203 | inv[4] = -A[4] * A[10] * A[15] + 204 | A[4] * A[11] * A[14] + 205 | A[8] * A[6] * A[15] - 206 | A[8] * A[7] * A[14] - 207 | A[12] * A[6] * A[11] + 208 | A[12] * A[7] * A[10]; 209 | 210 | inv[8] = A[4] * A[9] * A[15] - 211 | A[4] * A[11] * A[13] - 212 | A[8] * A[5] * A[15] + 213 | A[8] * A[7] * A[13] + 214 | A[12] * A[5] * A[11] - 215 | A[12] * A[7] * A[9]; 216 | 217 | inv[12] = -A[4] * A[9] * A[14] + 218 | A[4] * A[10] * A[13] + 219 | A[8] * A[5] * A[14] - 220 | A[8] * A[6] * A[13] - 221 | A[12] * A[5] * A[10] + 222 | A[12] * A[6] * A[9]; 223 | 224 | inv[1] = -A[1] * A[10] * A[15] + 225 | A[1] * A[11] * A[14] + 226 | A[9] * A[2] * A[15] - 227 | A[9] * A[3] * A[14] - 228 | A[13] * A[2] * A[11] + 229 | A[13] * A[3] * A[10]; 230 | 231 | inv[5] = A[0] * A[10] * A[15] - 232 | A[0] * A[11] * A[14] - 233 | A[8] * A[2] * A[15] + 234 | A[8] * A[3] * A[14] + 235 | A[12] * A[2] * A[11] - 236 | A[12] * A[3] * A[10]; 237 | 238 | inv[9] = -A[0] * A[9] * A[15] + 239 | A[0] * A[11] * A[13] + 240 | A[8] * A[1] * A[15] - 241 | A[8] * A[3] * A[13] - 242 | A[12] * A[1] * A[11] + 243 | A[12] * A[3] * A[9]; 244 | 245 | inv[13] = A[0] * A[9] * A[14] - 246 | A[0] * A[10] * A[13] - 247 | A[8] * A[1] * A[14] + 248 | A[8] * A[2] * A[13] + 249 | A[12] * A[1] * A[10] - 250 | A[12] * A[2] * A[9]; 251 | 252 | inv[2] = A[1] * A[6] * A[15] - 253 | A[1] * A[7] * A[14] - 254 | A[5] * A[2] * A[15] + 255 | A[5] * A[3] * A[14] + 256 | A[13] * A[2] * A[7] - 257 | A[13] * A[3] * A[6]; 258 | 259 | inv[6] = -A[0] * A[6] * A[15] + 260 | A[0] * A[7] * A[14] + 261 | A[4] * A[2] * A[15] - 262 | A[4] * A[3] * A[14] - 263 | A[12] * A[2] * A[7] + 264 | A[12] * A[3] * A[6]; 265 | 266 | inv[10] = A[0] * A[5] * A[15] - 267 | A[0] * A[7] * A[13] - 268 | A[4] * A[1] * A[15] + 269 | A[4] * A[3] * A[13] + 270 | A[12] * A[1] * A[7] - 271 | A[12] * A[3] * A[5]; 272 | 273 | inv[14] = -A[0] * A[5] * A[14] + 274 | A[0] * A[6] * A[13] + 275 | A[4] * A[1] * A[14] - 276 | A[4] * A[2] * A[13] - 277 | A[12] * A[1] * A[6] + 278 | A[12] * A[2] * A[5]; 279 | 280 | inv[3] = -A[1] * A[6] * A[11] + 281 | A[1] * A[7] * A[10] + 282 | A[5] * A[2] * A[11] - 283 | A[5] * A[3] * A[10] - 284 | A[9] * A[2] * A[7] + 285 | A[9] * A[3] * A[6]; 286 | 287 | inv[7] = A[0] * A[6] * A[11] - 288 | A[0] * A[7] * A[10] - 289 | A[4] * A[2] * A[11] + 290 | A[4] * A[3] * A[10] + 291 | A[8] * A[2] * A[7] - 292 | A[8] * A[3] * A[6]; 293 | 294 | inv[11] = -A[0] * A[5] * A[11] + 295 | A[0] * A[7] * A[9] + 296 | A[4] * A[1] * A[11] - 297 | A[4] * A[3] * A[9] - 298 | A[8] * A[1] * A[7] + 299 | A[8] * A[3] * A[5]; 300 | 301 | inv[15] = A[0] * A[5] * A[10] - 302 | A[0] * A[6] * A[9] - 303 | A[4] * A[1] * A[10] + 304 | A[4] * A[2] * A[9] + 305 | A[8] * A[1] * A[6] - 306 | A[8] * A[2] * A[5]; 307 | 308 | det = A[0] * inv[0] + A[1] * inv[4] + A[2] * inv[8] + A[3] * inv[12]; 309 | 310 | // if nearly zero 311 | if (det < CM_EPS) 312 | return false; 313 | 314 | det = 1.0 / det; 315 | 316 | for (i = 0; i < 16; i++) 317 | A[i] = inv[i] * det; 318 | 319 | return true; 320 | } 321 | 322 | //////////////////////////////////////////////////////////////////////////////////////////////////// 323 | /// Normalizes the given vector of size 2. 324 | /// 325 | /// I. Kuhlemann, P. Jauer, 2014-10-27. 326 | /// 327 | /// [in,out] If non-null, the vector. 328 | /// 329 | /// true if it succeeds, false if it fails. 330 | //////////////////////////////////////////////////////////////////////////////////////////////////// 331 | __device__ __forceinline__ bool normalizeVec2(float* vector) { 332 | 333 | //int size = sizeof(vector) / sizeof(float); 334 | 335 | float n = sqrtf(vector[0] * vector[0] 336 | + vector[1] * vector[1]); 337 | vector[0] /= n; 338 | vector[1] /= n; 339 | 340 | return true; 341 | } 342 | 343 | //////////////////////////////////////////////////////////////////////////////////////////////////// 344 | /// Normalize vector of size 3. 345 | /// 346 | /// I. Kuhlemann, P. Jauer, 2014-10-27. 347 | /// 348 | /// [in,out] If non-null, the vector. 349 | /// 350 | /// true if it succeeds, false if it fails. 351 | //////////////////////////////////////////////////////////////////////////////////////////////////// 352 | __device__ __forceinline__ bool normalizeVec3(float* vector) { 353 | 354 | float n = sqrtf(vector[0] * vector[0] 355 | + vector[1] * vector[1] 356 | + vector[2] * vector[2]); 357 | vector[0] /= n; 358 | vector[1] /= n; 359 | vector[2] /= n; 360 | 361 | return true; 362 | } 363 | 364 | //////////////////////////////////////////////////////////////////////////////////////////////////// 365 | /// Normalize vector of size 4. 366 | /// 367 | /// I. Kuhlemann, P. Jauer, 2014-10-27. 368 | /// 369 | /// [in,out] If non-null, the vector. 370 | /// 371 | /// true if it succeeds, false if it fails. 372 | //////////////////////////////////////////////////////////////////////////////////////////////////// 373 | __device__ __forceinline__ bool normalizeVec4(float* vector) { 374 | float n = sqrtf(vector[0] * vector[0] 375 | + vector[1] * vector[1] 376 | + vector[2] * vector[2] 377 | + vector[3] * vector[3]); 378 | 379 | vector[0] /= n; 380 | vector[1] /= n; 381 | vector[2] /= n; 382 | vector[3] /= n; 383 | 384 | return true; 385 | } 386 | 387 | //////////////////////////////////////////////////////////////////////////////////////////////////// 388 | /// Cross product of a vector of size 3. 389 | /// 390 | /// I. Kuhlemann, P. Jauer, 2014-10-27. 391 | /// 392 | /// [in,out] If non-null, the float* to process. 393 | /// [in,out] If non-null, the float* to process. 394 | /// [in,out] If non-null, the float* to process. 395 | /// 396 | /// true if it succeeds, false if it fails. 397 | //////////////////////////////////////////////////////////////////////////////////////////////////// 398 | template 399 | __device__ __forceinline__ bool crossVec3(T* c, T* a, T* b) { 400 | 401 | c[0] = a[1] * b[2] - a[2] * b[1]; 402 | c[1] = a[2] * b[0] - a[0] * b[2]; 403 | c[2] = a[0] * b[1] - a[1] * b[0]; 404 | 405 | return true; 406 | } 407 | 408 | // TODO:: Hier unbedingt nochmal nachdenken und optimieren 409 | __device__ __forceinline__ bool mat4ExtractRotation(float* M, float* A, float* B) { 410 | 411 | float x[3]; 412 | x[0] = A[3] - B[3]; 413 | x[1] = A[7] - B[7]; 414 | x[2] = A[11] - B[11]; 415 | 416 | normalizeVec3(x); 417 | 418 | int cnt = 0; 419 | for (int i = 0; i < 3; i++) 420 | { 421 | if (fabsf(x[i]) > CM_EPS) 422 | cnt++; 423 | } 424 | 425 | switch (cnt) 426 | { 427 | case 1: 428 | if (fabsf(x[0]) > CM_EPS){ 429 | 430 | float beta = (x[0] < 0) ? -CM_PI/2.0 : CM_PI/2.0; 431 | 432 | float ct1 = cos(beta); 433 | float st1 = sin(beta); 434 | float ct2 = cos(CM_PI); 435 | float st2 = sin(CM_PI); 436 | 437 | float T1[16] = {ct2, -st2, 0.0, 0.0, 438 | st2, ct2, 0.0, 0.0, 439 | 0.0, 0.0, 1.0, 0.0, 440 | 0.0, 0.0, 0.0, 1.0 }; 441 | 442 | float T2[16] = { ct1, 0.0, st1, 0.0, 443 | 0.0, 1.0, 0.0, 0.0, 444 | -st1, 0.0, ct1, 0.0, 445 | 0.0, 0.0, 0.0, 1.0 }; 446 | 447 | mat4Mult(M, T1, T2); 448 | 449 | } else if (fabsf(x[0]) > CM_EPS) { 450 | 451 | float gamma = (x[1] < 0) ? CM_PI / 2.0 : -CM_PI / 2.0; 452 | 453 | float ct1 = cos(CM_PI/2); 454 | float st1 = sin(CM_PI/2); 455 | float ct2 = cos(gamma); 456 | float st2 = sin(gamma); 457 | 458 | float T1[16] = { ct2, -st2, 0.0, 0.0, 459 | st2, ct2, 0.0, 0.0, 460 | 0.0, 0.0, 1.0, 0.0, 461 | 0.0, 0.0, 0.0, 1.0 }; 462 | 463 | float T2[16] = { ct1, 0.0, st1, 0.0, 464 | 0.0, 1.0, 0.0, 0.0, 465 | -st1, 0.0, ct1, 0.0, 466 | 0.0, 0.0, 0.0, 1.0 }; 467 | 468 | mat4Mult(M, T1, T2); 469 | 470 | } else { 471 | 472 | float ct2 = cos(CM_PI); 473 | float st2 = sin(CM_PI); 474 | 475 | M[0] = ct2; M[1] = -st2; M[2] = 0.0; M[3] = 0.0; 476 | M[4] = st2; M[5] = ct2; M[6] = 0.0; M[7] = 0.0; 477 | M[8] = 0.0; M[9] = 0.0; M[10] = 1.0; M[11] = 0.0; 478 | M[12] = 0.0; M[13] = 0.0; M[14] = 0.0; M[15] = 1.0; 479 | } 480 | break; 481 | case 2: 482 | if (fabsf(x[0]) <= CM_EPS){ 483 | // vector u 484 | float u[3]; 485 | u[0] = x[2]; 486 | u[2] = 0.0; 487 | u[1] = (-(x[0] * u[0]) - (x[2] * u[2])) / x[1]; 488 | 489 | normalizeVec3(u); 490 | 491 | // vector v 492 | float v[3]; 493 | crossVec3(v, x, u); 494 | 495 | float ct1 = cos(-CM_PI / 2); 496 | float st1 = sin(-CM_PI / 2); 497 | float ct2 = cos(CM_PI); 498 | float st2 = sin(CM_PI); 499 | 500 | //TODO:: hier 3x3 wegen schnelligkeit 501 | float T1[16] = { x[0], u[0], v[0], 0.0, 502 | x[1], u[1], v[1], 0.0, 503 | x[2], u[2], v[2], 0.0, 504 | 0.0, 0.0, 0.0, 1.0 }; 505 | 506 | float T2[16] = { ct1, 0.0, st1, 0.0, 507 | 0.0, 1.0, 0.0, 0.0, 508 | -st1, 0.0, ct1, 0.0, 509 | 0.0, 0.0, 0.0, 1.0 }; 510 | 511 | float T3[16]; 512 | mat4Mult(T3, T1, T2); 513 | 514 | T1[0] = ct2; T1[1] = -st2; T1[2] = 0.0; T1[3] = 0.0; 515 | T1[4] = st2; T1[5] = ct2; T1[6] = 0.0; T1[7] = 0.0; 516 | T1[8] = 0.0; T1[9] = 0.0; T1[10] = 1.0; T1[11] = 0.0; 517 | T1[12] = 0.0; T1[13] = 0.0; T1[14] = 0.0; T1[15] = 1.0; 518 | 519 | mat4Mult(M, T3, T1); 520 | } 521 | else if (fabsf(x[1]) <= CM_EPS) { 522 | // vector u 523 | float u[3]; 524 | u[1] = fabsf(x[2]); 525 | u[2] = 0.0; 526 | u[0] = (-(x[1] * u[1]) - (x[2] * u[2])) / x[0]; 527 | 528 | normalizeVec3(u); 529 | 530 | // vector v 531 | float v[3]; 532 | crossVec3(v, x, u); 533 | 534 | float ct1 = cos(-CM_PI / 2); 535 | float st1 = sin(-CM_PI / 2); 536 | float ct2 = cos(CM_PI); 537 | float st2 = sin(CM_PI); 538 | 539 | //TODO:: hier 3x3 wegen schnelligkeit 540 | float T1[16] = { x[0], u[0], v[0], 0.0, 541 | x[1], u[1], v[1], 0.0, 542 | x[2], u[2], v[2], 0.0, 543 | 0.0, 0.0, 0.0, 1.0 }; 544 | 545 | float T2[16] = { ct1, 0.0, st1, 0.0, 546 | 0.0, 1.0, 0.0, 0.0, 547 | -st1, 0.0, ct1, 0.0, 548 | 0.0, 0.0, 0.0, 1.0 }; 549 | 550 | float T3[16]; 551 | mat4Mult(T3, T1, T2); 552 | 553 | T1[0] = ct2; T1[1] = -st2; T1[2] = 0.0; T1[3] = 0.0; 554 | T1[4] = st2; T1[5] = ct2; T1[6] = 0.0; T1[7] = 0.0; 555 | T1[8] = 0.0; T1[9] = 0.0; T1[10] = 1.0; T1[11] = 0.0; 556 | T1[12] = 0.0; T1[13] = 0.0; T1[14] = 0.0; T1[15] = 1.0; 557 | 558 | mat4Mult(M, T3, T1); 559 | } 560 | else { 561 | // vector u 562 | float u[3]; 563 | u[0] = 0.0; 564 | u[2] = x[1]; 565 | //u[1] = (-(x[2] * u[2]) - (x[1] * u[2])) / x[1]; 566 | u[1] = 0.0; 567 | 568 | normalizeVec3(u); 569 | 570 | // vector v 571 | float v[3]; 572 | crossVec3(v, x, u); 573 | 574 | float ct1 = cos(-CM_PI / 2); 575 | float st1 = sin(-CM_PI / 2); 576 | 577 | //TODO:: hier 3x3 wegen schnelligkeit 578 | float T1[16] = { x[0], u[0], v[0], 0.0, 579 | x[1], u[1], v[1], 0.0, 580 | x[2], u[2], v[2], 0.0, 581 | 0.0, 0.0, 0.0, 1.0 }; 582 | 583 | float T2[16] = { ct1, 0.0, st1, 0.0, 584 | 0.0, 1.0, 0.0, 0.0, 585 | -st1, 0.0, ct1, 0.0, 586 | 0.0, 0.0, 0.0, 1.0 }; 587 | 588 | float T3[16]; 589 | mat4Mult(T3, T1, T2); 590 | 591 | T1[0] = ct1; T1[1] = -st1; T1[2] = 0.0; T1[3] = 0.0; 592 | T1[4] = st1; T1[5] = ct1; T1[6] = 0.0; T1[7] = 0.0; 593 | T1[8] = 0.0; T1[9] = 0.0; T1[10] = 1.0; T1[11] = 0.0; 594 | T1[12] = 0.0; T1[13] = 0.0; T1[14] = 0.0; T1[15] = 1.0; 595 | 596 | mat4Mult(M, T3, T1); 597 | } 598 | break; 599 | case 3: 600 | { 601 | // vector u 602 | float u[3]; 603 | u[0] = x[2]; 604 | u[2] = 0.0; 605 | u[1] = (-(x[0] * u[0]) - (x[2] * u[2])) / x[1]; 606 | 607 | normalizeVec3(u); 608 | 609 | // vector v 610 | float v[3]; 611 | crossVec3(v, x, u); 612 | 613 | float ct1 = cos(-CM_PI / 2); 614 | float st1 = sin(-CM_PI / 2); 615 | float ct2 = cos(CM_PI); 616 | float st2 = sin(CM_PI); 617 | 618 | //TODO:: hier 3x3 wegen schnelligkeit 619 | float T1[16] = {x[0], u[0], v[0], 0.0, 620 | x[1], u[1], v[1], 0.0, 621 | x[2], u[2], v[2], 0.0, 622 | 0.0, 0.0, 0.0, 1.0}; 623 | 624 | float T2[16] = {ct1, 0.0, st1, 0.0, 625 | 0.0, 1.0, 0.0, 0.0, 626 | -st1, 0.0, ct1, 0.0, 627 | 0.0, 0.0, 0.0, 1.0 }; 628 | 629 | float T3[16]; 630 | mat4Mult(T3, T1, T2); 631 | 632 | T1[0] = ct2; T1[1] = -st2; T1[2] = 0.0; T1[3] = 0.0; 633 | T1[4] = st2; T1[5] = ct2; T1[6] = 0.0; T1[7] = 0.0; 634 | T1[8] = 0.0; T1[9] = 0.0; T1[10] = 1.0; T1[11] = 0.0; 635 | T1[12] = 0.0; T1[13] = 0.0; T1[14] = 0.0; T1[15] = 1.0; 636 | 637 | mat4Mult(M, T3, T1); 638 | 639 | break; 640 | } 641 | default: 642 | // fuck off state 643 | break; 644 | } 645 | 646 | return true; 647 | } 648 | 649 | //#endif // __CUDA_MATH_CUH --------------------------------------------------------------------------------