├── Build_Epsilon.m ├── Build_KNN.m ├── Constraint_Dijkstra.m ├── Constraint_Dijkstra1.m ├── Demo.m ├── Dollarsign.m ├── Mixedshapes.m ├── Path_Based_Clustering.m ├── README.md ├── Synthetic Data ├── ConeandPlane.m ├── Cross.m ├── Cross_3lins.m ├── Dollarsign.m ├── Funnyshape.m ├── Mixedshapes.m ├── Roll.m ├── RoseandCircle.m ├── RoseandRose.m ├── Tschirnhausen.m ├── TwoCurve.m ├── TwoPlane.m ├── TwoSphere.m ├── Two_arcs.m ├── dollarsignSample.m ├── five_affine_subspaces.mat ├── hybrid.mat ├── three_linear_planes.mat └── two_spirals.mat ├── TwoSphere.m └── kmeansplus.m /Build_Epsilon.m: -------------------------------------------------------------------------------- 1 | function [edge_matrix, weights]=Build_Epsilon(D,ep) 2 | % Bulding K-nearest-neighbour graph 3 | % ON Entry: 4 | % D Input Data 5 | % ep Parameter ep( radius of ball for epsilon graph) 6 | % On exit: 7 | % edge_matrix Find k nearest neighbours of each node 8 | % weight Compute the weights of conected edges 9 | % Amir Babaeian 10 | % Department of Mathematics 11 | % UC San Diego 12 | % USA 13 | % 14 | % July 25 2014: Original version. 15 | %%% computing epsilon graph 16 | n=size(D,1); 17 | for i=1:n 18 | edge_matrix{i}=[]; 19 | weights{i}=[]; 20 | 21 | for j=1:n 22 | dist=norm(D(i,:)-D(j,:)); 23 | if dist<=ep && dist>=.5*ep && j~=i 24 | edge_matrix{i}=[edge_matrix{i} j]; % find the neighbours inside the ball ep ball 25 | weights{i}=[weights{i} dist]; % and outside the .5ep ball 26 | 27 | end 28 | end 29 | end 30 | 31 | 32 | end -------------------------------------------------------------------------------- /Build_KNN.m: -------------------------------------------------------------------------------- 1 | function [edge_matrix, weights]=Build_KNN(D,k) 2 | % Bulding K-nearest-neighbour graph 3 | % ON Entry: 4 | % D Input Data 5 | % k Parameter K for K nearest neighbour graph 6 | % On exit: 7 | % edge_matrix Find k nearest neighbours of each node 8 | % weight Gives weights of conected edges 9 | % Amir Babaeian. 10 | % Department of Mathematics 11 | % UC San Diego 12 | % USA 13 | % 14 | % July 25 2014: Original version. 15 | [n1,d1] = knnsearch(D,D,'K',k+1,'Distance','euclidean'); 16 | edge_matrix=n1(:,2:end); % matrix of conected edges to each node 17 | weights=d1(:,2:end); % matrix of distance between conected nodes 18 | end -------------------------------------------------------------------------------- /Constraint_Dijkstra.m: -------------------------------------------------------------------------------- 1 | function [ Binary_matrix,path] = Constraint_Dijkstra( D,edge_matrix,weights,sn,ang) 2 | % Computing Sonstrained version of shortest path 3 | % ON Entry: 4 | % edge_matrix each cell gives the weight of all the nodes conected 5 | % to a given node 6 | % weights weight of edges 7 | % sn landmarks points 8 | % ang angle constrained 9 | % D data points 10 | % On Exit: 11 | % path path 12 | % Binary_matrix each row represent a binary vector where each 13 | % element of vector defines whether the node is connected to landmrk or not 14 | % 15 | % Amir Babaeian. 16 | % Department of Mathematics 17 | % UC San Diego 18 | % USA 19 | % 20 | % July 25 2014: Original version. 21 | n=size(D,1); 22 | mm=size(sn,1); 23 | Binary_matrix=zeros(n,mm); 24 | for ii=1:mm 25 | s=sn(ii); 26 | distance=inf(1,n); %%% distance matrix of nodes 27 | t=s; 28 | cost(1:n)=inf; 29 | parents(1:n)=inf; 30 | m=0; 31 | temporary=zeros(1,n); 32 | ss=s; 33 | w=weights; 34 | for i=1:n 35 | path{ii,i}=s; 36 | end 37 | 38 | for j=1:n 39 | distance(edge_matrix{t})=w{t}; 40 | 41 | for i=1:n 42 | if distance(i)+m p, 17 | m(i,1) = 0; 18 | 19 | % surface is along x=0, y=[0,1], z=[-3,3] 20 | X(i,:) = [0 rand() 6*rand()-3]; 21 | 22 | Y(i,1) = 13 + X(i,3) + randn(); % will be from 10 to 16 +/- 2 23 | else 24 | m(i,1) = 1; 25 | 26 | angle = 1.5*rand()*pi; 27 | 28 | x = cos(angle); 29 | y = rand(); 30 | z = sin(angle); 31 | 32 | % choose between top and bottom of S 33 | if rand() > 0.5, 34 | X(i,:) = [x y z+1]; 35 | Y(i,1) = angle; % Y is from 0 to 3*pi 36 | else, 37 | X(i,:) = [-x y -z-1]; 38 | Y(i,1) = 3*pi - angle; 39 | end 40 | 41 | end 42 | end 43 | 44 | X(find(m),3) = X(find(m),3) + randn(length(find(m)),1)*noise; 45 | 46 | Y = Y + randn(n,1)*0.1; 47 | 48 | % randomly shuffle the points, so we can easily subsample later 49 | randidx = randperm(n); 50 | X = X(randidx,:); 51 | Y = Y(randidx,:); 52 | m = m(randidx,:); 53 | 54 | 55 | % [group,path]=Path_Based_Cluster_LandMarks( D ,10, 70 , 15); -------------------------------------------------------------------------------- /Mixedshapes.m: -------------------------------------------------------------------------------- 1 | function [ D ] = Mixedshapes( ~ ) 2 | 3 | 4 | %%cone 5 | n1=1200; 6 | x=-2+2*rand(n1,1); 7 | y=rand(n1,1); 8 | z=x.^2-2*y.^2+2*x+3.*y; 9 | D1=[x y z]; 10 | 11 | 12 | 13 | n1=400; 14 | teta=(2*pi)*randn(n1,1); % Using spherical coordinates 15 | x=.8*sin(teta); 16 | y=.5*ones(n1,1); 17 | z=.8*cos(teta); 18 | D2=[x y z]; 19 | 20 | n1=800; 21 | x=-1+2*rand(n1,1); 22 | y=-1+2*rand(n1,1); 23 | z=zeros(n1,1); 24 | D3=[x y z]; 25 | 26 | 27 | % %Plane 28 | % n2=2000; 29 | % D2=zeros(n2,3); 30 | % D2(:,1)=-2+4.*rand(n2,1); 31 | % D2(:,2)=-2+4.*rand(n2,1); 32 | % D2(:,3)=1+.2*D2(:,1); 33 | 34 | 35 | D=[D1;D2;D3]; 36 | 37 | 38 | end 39 | 40 | -------------------------------------------------------------------------------- /Path_Based_Clustering.m: -------------------------------------------------------------------------------- 1 | function lables = Path_Based_Clustering( D, ep, l,ang, cl) 2 | % Path based clustering algorithm 3 | % ON Entry: 4 | % D n*p data(N should be the number of data points and 5 | % p is the number of variables) 6 | % ep Radius of epsilon graph 7 | % l Number of landmarks 8 | % ang Angel constraint used in shortest path algorithm 9 | % c Number of clusters 10 | % On Exit: 11 | % labels Lables that computed using result of clustering 12 | % algorithm 13 | % 14 | % Amir Babaeian. 15 | % Department of Mathematics 16 | % UC San Diego 17 | % USA 18 | % 19 | % July 25 2014: Original version. 20 | 21 | n=size(D,1); 22 | %%%%%%%%%%%%%%%%%%% Build in epsilon graph %%%%%%%%%%%%%% 23 | %[edge_matrix, weights]=Build_Epsilon(D,ep); % epsilon graph 24 | [edge_matrix, weights]=Build_KNN(D,ep); % KNN graph 25 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 26 | sn=randperm(n,l); % choose landmarks randomly 27 | %%%%%%%%%%%%%% applying constrained shortest path algorithm %%%%%%%%%%%%%%% 28 | %[Binary_matrix,path] = Constraint_Dijkstra(D,edge_matrix,weights,sn,ang); 29 | [Binary_matrix,path] = Constraint_Dijkstra1(D,edge_matrix,weights,sn,ang); 30 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 31 | [C,ia,ic] = unique(Binary_matrix,'rows'); 32 | %%%%%%%%%%%%%%%%%%%%%% clustering %%%%%%%%%%%%%%%%%%%%%%%% 33 | % [K,E]=kmeansplus(C',cl); % Apply K-Means++ on embeded data 34 | % K=K'; 35 | %%%%%%%%%%%%%%%%%%%%% Inestead of K-Means we can use hierarchical clustering 36 | % with complete linkage 37 | 38 | BD=pdist(C,'euclidean'); % Euclidean distance of correlation matrix 39 | Z = linkage(BD,'complete'); 40 | K = cluster(Z,'maxclust',cl); 41 | 42 | for i=1:size(K,1) 43 | ic(ic==i)=K(i); 44 | end 45 | lables=ic; % Give the labels of clustering algorithm 46 | end 47 | 48 | 49 | 50 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ##Source code for papers titled: 2 | **Nonlinear subspace clustering using curvature constrained distances. Pattern Recognition Letters, 68, 118-125.** 3 | 4 | 5 | Please acknowledge and cite the related papers. 6 | 7 | ``` 8 | Amir Babaeian 9 | Email: ababaeian@ucsd.edu 10 | 11 | ``` 12 | 13 | 14 | ##Related articles: 15 | ANGLE CONSTRAINED PATH FOR CLUSTERING OF MULTIPLE MANIFOLDS, International conference on image processing(ICIP 2015) 16 | 17 | Multiple Manifold Clustering Using Curvature Constrained Path. PloS one, 10(9), e0137986. 18 | 19 | ![alt text](http://i68.tinypic.com/2zylg5i.png "Journal Logo") 20 | [Journal link] (http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0137986) 21 | 22 | Thank to Professor Ery Arias-Castro for his advise regarding this project. 23 | 24 | ##How to use the code: 25 | 26 | In order to use the code you should run the demo file as follow: 27 | 28 | Example: 29 | ```matlab 30 | % labels = Path_Based_Clustering(data, k, no_landmarks, angle_constraint, no_clusters); 31 | labels = Path_Based_Clustering(D, 60, 10, 15, 2); 32 | % Labels are the output of clustering algorithm. 33 | ``` 34 | 35 | | ON Entry | Description | 36 | | ------------- | ----------- | 37 | | data | N*P data (N should be the number of data points and P is the number of variables)| 38 | | k | Radius of epsilon graph or no neighbours | 39 | | no_landmarks | Number of landmarks | 40 | | angle_constraint | Angel constraint used in shortest path algorithm| 41 | | no_clusters | Number of clusters | 42 | 43 | |On Exit | Description | 44 | | ------------- | ----------- | 45 | | labels | Labels that computed using result of clustering algorithm | 46 | 47 | 48 | -------------------------------------------------------------------------------- /Synthetic Data/ConeandPlane.m: -------------------------------------------------------------------------------- 1 | function [ D ] = ConeandPlane( ~ ) 2 | 3 | 4 | %%cone 5 | n1=8000; 6 | teta=(2*pi)*rand(n1,1); % Using spherical coordinates 7 | r=2*rand(n1,1); 8 | x=r.*sin(teta); 9 | y=r.*cos(teta); 10 | z=r; 11 | D1=[x y z]; 12 | 13 | 14 | 15 | 16 | %Plane 17 | n2=4000; 18 | D2=zeros(n2,3); 19 | D2(:,1)=-2+4.*rand(n2,1); 20 | D2(:,2)=-2+4.*rand(n2,1); 21 | D2(:,3)=1+.2*D2(:,1); 22 | 23 | 24 | D=[D1;D2]; 25 | 26 | 27 | end 28 | 29 | -------------------------------------------------------------------------------- /Synthetic Data/Cross.m: -------------------------------------------------------------------------------- 1 | function [ D ] = Cross( ~ ) 2 | n1=2000; 3 | D1=zeros(n1,2); 4 | X1=-.5+1*rand(n1,1); 5 | D1(:,1)=X1; 6 | % Y=-X.^2+1; 7 | Y1=zeros(n1,1); 8 | noise=-1+2*rand(n1,1); % generate uniform noise on the interval [-1 1] 9 | D1(:,2)=Y1+.0*noise; % add noise to the data 10 | % cluster 1 11 | n2=6500; 12 | D2=zeros(n2,2); 13 | X2=zeros(n2,1); 14 | noise=-1+2*rand(n2,1); % generate uniform noise on the interval [-1 1] 15 | D2(:,1)=X2+.0*noise; % add noise to the data 16 | Y2=-.5+1*rand(n2,1); 17 | D2(:,2)=Y2; 18 | % Y=-X.^2+1; 19 | D=[D1;D2]; 20 | 21 | end 22 | % [group,path] = Path_Based_Cluster( D , 70 , 6 ); -------------------------------------------------------------------------------- /Synthetic Data/Cross_3lins.m: -------------------------------------------------------------------------------- 1 | function [ D ] = Cross_3lins( ~ ) 2 | n1=2000; 3 | D1=zeros(n1,2); 4 | X1=-1+2*rand(n1,1); 5 | D1(:,1)=X1; 6 | % Y=-X.^2+1; 7 | Y1=zeros(n1,1); 8 | noise=-1+2*rand(n1,1); % generate uniform noise on the interval [-1 1] 9 | D1(:,2)=Y1+.02*noise; % add noise to the data 10 | % cluster 1 11 | n2=2000; 12 | D2=zeros(n2,2); 13 | X2=zeros(n2,1); 14 | noise=-1+2*rand(n2,1); % generate uniform noise on the interval [-1 1] 15 | D2(:,1)=X2+.02*noise; % add noise to the data 16 | Y2=-1+2*rand(n2,1); 17 | D2(:,2)=Y2; 18 | % Y=-X.^2+1; 19 | 20 | n3=2000; 21 | D3=zeros(n3,2); 22 | X3=-1+2*rand(n3,1); 23 | noise=-1+2*rand(n2,1); % generate uniform noise on the interval [-1 1] 24 | D3(:,1)=X3; 25 | Y3=X3; 26 | D3(:,2)=Y3+.02*noise; 27 | 28 | D=[D1;D2;D3]; 29 | 30 | end 31 | % [group,path] = Path_Based_Cluster( D , 70 , 6 ); -------------------------------------------------------------------------------- /Synthetic Data/Dollarsign.m: -------------------------------------------------------------------------------- 1 | function [X] = Dollarsign(~) 2 | % X: generated data set 3 | % m : label 4 | n=16000; 5 | noise=.03; 6 | 7 | % choose S part over | part with prob based on area of each 8 | % S is 3*pi, | is 6 9 | p = (3*pi)/(6+3*pi); 10 | 11 | X = zeros(n,3); 12 | Y = zeros(n,1); 13 | m = zeros(n,1); 14 | 15 | for i = 1:n, 16 | if rand() > p, 17 | m(i,1) = 0; 18 | 19 | % surface is along x=0, y=[0,1], z=[-3,3] 20 | X(i,:) = [0 rand() 6*rand()-3]; 21 | 22 | Y(i,1) = 13 + X(i,3) + randn(); % will be from 10 to 16 +/- 2 23 | else 24 | m(i,1) = 1; 25 | 26 | angle = 1.5*rand()*pi; 27 | 28 | x = cos(angle); 29 | y = rand(); 30 | z = sin(angle); 31 | 32 | % choose between top and bottom of S 33 | if rand() > 0.5, 34 | X(i,:) = [x y z+1]; 35 | Y(i,1) = angle; % Y is from 0 to 3*pi 36 | else, 37 | X(i,:) = [-x y -z-1]; 38 | Y(i,1) = 3*pi - angle; 39 | end 40 | 41 | end 42 | end 43 | 44 | X(find(m),3) = X(find(m),3) + randn(length(find(m)),1)*noise; 45 | 46 | Y = Y + randn(n,1)*0.1; 47 | 48 | % randomly shuffle the points, so we can easily subsample later 49 | randidx = randperm(n); 50 | X = X(randidx,:); 51 | Y = Y(randidx,:); 52 | m = m(randidx,:); 53 | 54 | 55 | % [group,path]=Path_Based_Cluster_LandMarks( D ,10, 70 , 15); -------------------------------------------------------------------------------- /Synthetic Data/Funnyshape.m: -------------------------------------------------------------------------------- 1 | function [ D ] = Funnyshape( ~ ) 2 | 3 | n1=1000; 4 | x=-1+2*rand(n1,1); 5 | y=-1+2*rand(n1,1); 6 | z=x.^2-y.^2; 7 | D1=[x y z]; 8 | 9 | n2=1000; 10 | u=rand(n2,1); 11 | v=rand(n2,1); 12 | r=.5; 13 | phi=2*pi*u; 14 | teta=acos(2*v-1); 15 | z=r*cos(teta); 16 | x=sqrt(r^2-z.^2).*cos(phi); 17 | y=sqrt(r^2-z.^2).*sin(phi); 18 | D2=[x y z]; 19 | D=[D1;D2]; 20 | 21 | %%%Generate uniformly random in polar cordinate 22 | teta=0:5*pi/1000000:5*pi; 23 | teta=teta'; 24 | x=cos(teta/5).*sin(teta); 25 | y=cos(teta/5).*cos(teta); 26 | D1=[x y]; 27 | 28 | n1=2000; 29 | k=1000000*rand(n1,1); 30 | k=floor(k); 31 | D=D1(k,:); 32 | 33 | end 34 | 35 | % [group,path]=Path_Based_Cluster_LandMarks( D ,30 , 70 , 15 , 2 ); 36 | -------------------------------------------------------------------------------- /Synthetic Data/Mixedshapes.m: -------------------------------------------------------------------------------- 1 | function [ D ] = Mixedshapes( ~ ) 2 | 3 | 4 | %%cone 5 | n1=1200; 6 | x=-2+2*rand(n1,1); 7 | y=rand(n1,1); 8 | z=x.^2-2*y.^2+2*x+3.*y; 9 | D1=[x y z]; 10 | 11 | 12 | 13 | n1=400; 14 | teta=(2*pi)*randn(n1,1); % Using spherical coordinates 15 | x=.8*sin(teta); 16 | y=.5*ones(n1,1); 17 | z=.8*cos(teta); 18 | D2=[x y z]; 19 | 20 | n1=800; 21 | x=-1+2*rand(n1,1); 22 | y=-1+2*rand(n1,1); 23 | z=zeros(n1,1); 24 | D3=[x y z]; 25 | 26 | 27 | % %Plane 28 | % n2=2000; 29 | % D2=zeros(n2,3); 30 | % D2(:,1)=-2+4.*rand(n2,1); 31 | % D2(:,2)=-2+4.*rand(n2,1); 32 | % D2(:,3)=1+.2*D2(:,1); 33 | 34 | 35 | D=[D1;D2;D3]; 36 | 37 | 38 | end 39 | 40 | -------------------------------------------------------------------------------- /Synthetic Data/Roll.m: -------------------------------------------------------------------------------- 1 | function [ D ] = Roll( ~ ) 2 | 3 | n1=8000; 4 | D1=zeros(n1,3); 5 | D1(:,1)=-.4+.8.*rand(n1,1); 6 | Xt =.8.*rand(n1,1); 7 | D1(:,2)=cos(4*pi*Xt).*Xt; 8 | noise=-1+2.*rand(n1,1); % generate uniform noise on the interval [-1 1] 9 | D1(:,3)=-sin(4*pi*Xt).*Xt+.0*noise; 10 | 11 | n2=4000; 12 | D2=zeros(n2,3); 13 | D2(:,1)=-.4+.8.*rand(n2,1); 14 | D2(:,2)=-1+2.*rand(n2,1); 15 | noise=-1+2.*rand(n2,1); % generate uniform noise on the interval [-1 1] 16 | D2(:,3)=zeros(n2,1)+.0*noise; 17 | D=[D1;D2]; 18 | 19 | end 20 | % [group,path]=Path_Based_Cluster_LandMarks( D ,10, 70 , 15); 21 | -------------------------------------------------------------------------------- /Synthetic Data/RoseandCircle.m: -------------------------------------------------------------------------------- 1 | function [ D ] = RoseandCircle( ~ ) 2 | 3 | n1=1500; 4 | teta=(2*pi)*randn(n1,1); % Using spherical coordinates 5 | x=cos(teta/.5).*sin(teta); 6 | y=cos(teta/.5).*cos(teta); 7 | D1=[x y]; 8 | 9 | 10 | 11 | n1=500; 12 | teta=(2*pi)*randn(n1,1); % Using spherical coordinates 13 | x=.5*sin(teta); 14 | y=.5*cos(teta); 15 | D2=[x y]; 16 | 17 | n1=500; 18 | teta=(2*pi)*randn(n1,1); % Using spherical coordinates 19 | x=.5*sin(teta)+.5; 20 | y=.5*cos(teta)+.5; 21 | D3=[x y]; 22 | 23 | % n1=2500; 24 | % teta=(2*pi)*randn(n1,1); % Using spherical coordinates 25 | % x=cos(3*teta).*sin(teta); 26 | % y=cos(3*teta).*cos(teta); 27 | % D2=[x y]; 28 | 29 | D=[D1;D2;D3]; 30 | 31 | end 32 | 33 | % [group,path]=Path_Based_Cluster_LandMarks( D ,30 , 70 , 15 , 2 ); 34 | -------------------------------------------------------------------------------- /Synthetic Data/RoseandRose.m: -------------------------------------------------------------------------------- 1 | function [ D ] = RoseandRose( ~ ) 2 | 3 | n1=2000; 4 | teta=(2*pi)*randn(n1,1); % Using spherical coordinates 5 | x=cos(teta/5).*sin(teta); 6 | y=cos(teta/5).*cos(teta); 7 | D1=[x y]; 8 | 9 | 10 | 11 | % n1=1000; 12 | % teta=(2*pi)*randn(n1,1); % Using spherical coordinates 13 | % x=.5*sin(teta); 14 | % y=.6*cos(teta); 15 | % D2=[x y]; 16 | 17 | n1=2000; 18 | teta=(2*pi)*randn(n1,1); % Using spherical coordinates 19 | x=cos(3*teta).*sin(teta); 20 | y=cos(3*teta).*cos(teta); 21 | D2=[x y]; 22 | 23 | D=[D1;D2]; 24 | 25 | end 26 | 27 | % [group,path]=Path_Based_Cluster_LandMarks( D ,30 , 70 , 15 , 2 ); 28 | -------------------------------------------------------------------------------- /Synthetic Data/Tschirnhausen.m: -------------------------------------------------------------------------------- 1 | function [ D ] = Tschirnhausen( ~ ) 2 | 3 | n1=6000; 4 | t=-2+4*rand(n1,1); % Using spherical coordinates 5 | 6 | x=t.^3-3*t; 7 | y=-t.^2; 8 | D=[x y]; 9 | 10 | 11 | 12 | 13 | % n1=2500; 14 | % teta=(2*pi)*randn(n1,1); % Using spherical coordinates 15 | % x=cos(3*teta).*sin(teta); 16 | % y=cos(3*teta).*cos(teta); 17 | % D2=[x y]; 18 | 19 | % D=[D1;D2;D3]; 20 | 21 | end 22 | 23 | % [group,path]=Path_Based_Cluster_LandMarks( D ,30 , 70 , 15 , 2 ); 24 | -------------------------------------------------------------------------------- /Synthetic Data/TwoCurve.m: -------------------------------------------------------------------------------- 1 | function [ D ] = TwoCurve( ~ ) 2 | n1=1000; 3 | D1=zeros(n1,2); 4 | X1=.68+.07*rand(n1,1); 5 | D1(:,1)=X1; 6 | Y1=-X1.^2+1; 7 | noise=-1+2*rand(n1,1); % generate uniform noise on the interval [-1 1] 8 | D1(:,2)=Y1+.00*noise; % add noise to the data 9 | % cluster 1 10 | n2=1000; 11 | D2=zeros(n2,2); 12 | X2=.68+.07*rand(n2,1); 13 | D2(:,1)=X2; 14 | Y2=X2.^2; 15 | noise=-1+2*rand(n2,1); % generate uniform noise on the interval [-1 1] 16 | D2(:,2)=Y2+.00*noise; % add noise to the data 17 | D=[D1;D2]; 18 | 19 | end 20 | %%[group,path] = Path_Based_Cluster( D , 70 , 7 ); -------------------------------------------------------------------------------- /Synthetic Data/TwoPlane.m: -------------------------------------------------------------------------------- 1 | function [ D ] = TwoPlane( ~ ) 2 | 3 | n1=8000; 4 | D1=zeros(n1,3); 5 | D1(:,1)=-.4+.8.*rand(n1,1); 6 | D1(:,2)=-1+2.*rand(n1,1); 7 | % D1(:,3)=.8+3.5*D1(:,1); 8 | D1(:,3)=1-5*D1(:,1); 9 | 10 | 11 | n2=8000; 12 | D2=zeros(n2,3); 13 | D2(:,1)=-.4+.8.*rand(n2,1); 14 | D2(:,2)=-1+2.*rand(n2,1); 15 | D2(:,3)=1; 16 | D=[D1;D2]; 17 | 18 | end 19 | 20 | % [group,path]=Path_Based_Cluster_LandMarks( D ,30 , 70 , 15 , 2 ); 21 | -------------------------------------------------------------------------------- /Synthetic Data/TwoSphere.m: -------------------------------------------------------------------------------- 1 | function [ D ] = TwoSphere( ~ ) 2 | 3 | 4 | n1=8000; 5 | u=rand(n1,1); 6 | v=rand(n1,1); 7 | r=1; 8 | phi=2*pi*u; 9 | teta=acos(2*v-1); 10 | z=r*cos(teta); 11 | x=sqrt(r^2-z.^2).*cos(phi); 12 | y=sqrt(r^2-z.^2).*sin(phi); 13 | D1=[x y z]; 14 | 15 | u=rand(n1,1); 16 | v=rand(n1,1); 17 | r=1; 18 | phi=2*pi*u; 19 | teta=acos(2*v-1); 20 | z=r*cos(teta); 21 | x=sqrt(r^2-z.^2).*cos(phi); 22 | y=sqrt(r^2-z.^2).*sin(phi)+1; 23 | D2=[x y z]; 24 | 25 | D=[D1;D2]; 26 | 27 | % n1=50; 28 | % D1=zeros(n1*n1,3); 29 | % r=.1; 30 | % for i=1:n1 31 | % phi=pi*rand(n1,1); 32 | % teta=(pi)*randn(n1,1); % Using spherical coordinates 33 | % x=r*sin(phi(i))*cos(teta); 34 | % y=r*sin(phi(i))*sin(teta); 35 | % z=r*repmat(cos(phi(i)), n1,1); 36 | % D1((i-1)*n1+1:i*n1,:)=[x y z]; 37 | % end 38 | % 39 | % 40 | % n1=50; 41 | % D2=zeros(n1*n1,3); 42 | % r=.1; 43 | % for i=1:n1 44 | % phi=pi*rand(n1,1); 45 | % teta=(pi)*randn(n1,1); % Using spherical coordinates 46 | % x=.1+r*sin(phi(i))*cos(teta); 47 | % y=r*sin(phi(i))*sin(teta); 48 | % z=r*repmat(cos(phi(i)), n1,1); 49 | % D2((i-1)*n1+1:i*n1,:)=[x y z]; 50 | % end 51 | % 52 | % 53 | % D=[D1;D2]; 54 | 55 | end 56 | 57 | % [group,path]=Path_Based_Cluster_LandMarks( D ,20 , 50 , 15 , 2 ); 58 | 59 | -------------------------------------------------------------------------------- /Synthetic Data/Two_arcs.m: -------------------------------------------------------------------------------- 1 | function [ D, tt] = Two_arcs( n ) 2 | 3 | n1=n/2; 4 | 5 | tt1=(pi/2)*rand(n1,1)+pi/4; % Using spherical coordinates 6 | 7 | x1=cos(tt1); 8 | y1=sin(tt1); 9 | D1=[x1 y1]; 10 | 11 | tt2=(pi/2)*rand(n1,1)-pi/4; 12 | x2=cos(tt2)-1; 13 | y2=sin(tt2)+1; 14 | D2=[x2 y2]; 15 | D=[D1;D2]; 16 | tt=[tt1;tt2]; 17 | 18 | % n1=2500; 19 | % teta=(2*pi)*randn(n1,1); % Using spherical coordinates 20 | % x=cos(3*teta).*sin(teta); 21 | % y=cos(3*teta).*cos(teta); 22 | % D2=[x y]; 23 | % D=[D1;D2;D3]; 24 | 25 | end 26 | 27 | % [group,path]=Path_Based_Cluster_LandMarks( D ,30 , 70 , 15 , 2 ); 28 | -------------------------------------------------------------------------------- /Synthetic Data/dollarsignSample.m: -------------------------------------------------------------------------------- 1 | function [X,Y,m] = dollarsignSample(n, noise) 2 | % X: generated data set 3 | % m : label 4 | if nargin < 2, 5 | noise = 0; 6 | end 7 | 8 | % choose S part over | part with prob based on area of each 9 | % S is 3*pi, | is 6 10 | p = (3*pi)/(6+3*pi); 11 | 12 | X = zeros(n,3); 13 | Y = zeros(n,1); 14 | m = zeros(n,1); 15 | 16 | for i = 1:n, 17 | if rand() > p, 18 | m(i,1) = 0; 19 | 20 | % surface is along x=0, y=[0,1], z=[-3,3] 21 | X(i,:) = [0 rand() 6*rand()-3]; 22 | 23 | Y(i,1) = 13 + X(i,3) + randn(); % will be from 10 to 16 +/- 2 24 | else, 25 | m(i,1) = 1; 26 | 27 | angle = 1.5*rand()*pi; 28 | 29 | x = cos(angle); 30 | y = rand(); 31 | z = sin(angle); 32 | 33 | % choose between top and bottom of S 34 | if rand() > 0.5, 35 | X(i,:) = [x y z+1]; 36 | Y(i,1) = angle; % Y is from 0 to 3*pi 37 | else, 38 | X(i,:) = [-x y -z-1]; 39 | Y(i,1) = 3*pi - angle; 40 | end 41 | 42 | end 43 | end 44 | 45 | X(find(m),3) = X(find(m),3) + randn(length(find(m)),1)*noise; 46 | 47 | Y = Y + randn(n,1)*0.1; 48 | 49 | % randomly shuffle the points, so we can easily subsample later 50 | randidx = randperm(n); 51 | X = X(randidx,:); 52 | Y = Y(randidx,:); 53 | m = m(randidx,:); 54 | -------------------------------------------------------------------------------- /Synthetic Data/five_affine_subspaces.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Amirbabaeian/manifold-clustering-algorithm/99f5e30c396091f3a7bd76cce1638834b1f89303/Synthetic Data/five_affine_subspaces.mat -------------------------------------------------------------------------------- /Synthetic Data/hybrid.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Amirbabaeian/manifold-clustering-algorithm/99f5e30c396091f3a7bd76cce1638834b1f89303/Synthetic Data/hybrid.mat -------------------------------------------------------------------------------- /Synthetic Data/three_linear_planes.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Amirbabaeian/manifold-clustering-algorithm/99f5e30c396091f3a7bd76cce1638834b1f89303/Synthetic Data/three_linear_planes.mat -------------------------------------------------------------------------------- /Synthetic Data/two_spirals.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Amirbabaeian/manifold-clustering-algorithm/99f5e30c396091f3a7bd76cce1638834b1f89303/Synthetic Data/two_spirals.mat -------------------------------------------------------------------------------- /TwoSphere.m: -------------------------------------------------------------------------------- 1 | function [ D ] = TwoSphere( ~ ) 2 | 3 | 4 | n1=8000; 5 | u=rand(n1,1); 6 | v=rand(n1,1); 7 | r=1; 8 | phi=2*pi*u; 9 | teta=acos(2*v-1); 10 | z=r*cos(teta); 11 | x=sqrt(r^2-z.^2).*cos(phi); 12 | y=sqrt(r^2-z.^2).*sin(phi); 13 | D1=[x y z]; 14 | 15 | u=rand(n1,1); 16 | v=rand(n1,1); 17 | r=1; 18 | phi=2*pi*u; 19 | teta=acos(2*v-1); 20 | z=r*cos(teta); 21 | x=sqrt(r^2-z.^2).*cos(phi); 22 | y=sqrt(r^2-z.^2).*sin(phi)+1; 23 | D2=[x y z]; 24 | 25 | D=[D1;D2]; 26 | 27 | % n1=50; 28 | % D1=zeros(n1*n1,3); 29 | % r=.1; 30 | % for i=1:n1 31 | % phi=pi*rand(n1,1); 32 | % teta=(pi)*randn(n1,1); % Using spherical coordinates 33 | % x=r*sin(phi(i))*cos(teta); 34 | % y=r*sin(phi(i))*sin(teta); 35 | % z=r*repmat(cos(phi(i)), n1,1); 36 | % D1((i-1)*n1+1:i*n1,:)=[x y z]; 37 | % end 38 | % 39 | % 40 | % n1=50; 41 | % D2=zeros(n1*n1,3); 42 | % r=.1; 43 | % for i=1:n1 44 | % phi=pi*rand(n1,1); 45 | % teta=(pi)*randn(n1,1); % Using spherical coordinates 46 | % x=.1+r*sin(phi(i))*cos(teta); 47 | % y=r*sin(phi(i))*sin(teta); 48 | % z=r*repmat(cos(phi(i)), n1,1); 49 | % D2((i-1)*n1+1:i*n1,:)=[x y z]; 50 | % end 51 | % 52 | % 53 | % D=[D1;D2]; 54 | 55 | end 56 | 57 | % [group,path]=Path_Based_Cluster_LandMarks( D ,20 , 50 , 15 , 2 ); 58 | 59 | -------------------------------------------------------------------------------- /kmeansplus.m: -------------------------------------------------------------------------------- 1 | function [L,C] = kmeansplus(X,k) 2 | %KMEANS Cluster multivariate data using the k-means++ algorithm. 3 | % [L,C] = kmeans(X,k) produces a 1-by-size(X,2) vector L with one class 4 | % label per column in X and a size(X,1)-by-k matrix C containing the 5 | % centers corresponding to each class. 6 | 7 | % Version: 07/08/11 8 | % Authors: Laurent Sorber (Laurent.Sorber@cs.kuleuven.be) 9 | % 10 | % References: 11 | % [1] J. B. MacQueen, "Some Methods for Classification and Analysis of 12 | % MultiVariate Observations", in Proc. of the fifth Berkeley 13 | % Symposium on Mathematical Statistics and Probability, L. M. L. Cam 14 | % and J. Neyman, eds., vol. 1, UC Press, 1967, pp. 281-297. 15 | % [2] D. Arthur and S. Vassilvitskii, "k-means++: The Advantages of 16 | % Careful Seeding", Technical Report 2006-13, Stanford InfoLab, 2006. 17 | 18 | L = []; 19 | L1 = 0; 20 | 21 | while length(unique(L)) ~= k 22 | 23 | C = X(:,1+round(rand*(size(X,2)-1))); 24 | L = ones(1,size(X,2)); 25 | for i = 2:k 26 | D = X-C(:,L); 27 | D = cumsum(sqrt(dot(D,D))); 28 | if D(end) == 0, C(:,i:k) = X(:,ones(1,k-i+1)); return; end 29 | C(:,i) = X(:,find(rand < D/D(end),1)); 30 | [tmp,L] = max(bsxfun(@minus,2*real(C'*X),dot(C,C).')); 31 | end 32 | 33 | while any(L ~= L1) 34 | L1 = L; 35 | for i = 1:k, l = L==i; C(:,i) = sum(X(:,l),2)/sum(l); end 36 | [tmp,L] = max(bsxfun(@minus,2*real(C'*X),dot(C,C).'),[],1); 37 | end 38 | 39 | end --------------------------------------------------------------------------------