├── Clusters.png ├── DistanceMatrixBeforeSorting.png ├── EigenvectorOnData.png ├── FiedlerVector.png ├── FiedlerVectorLaplacian.py ├── InputData.png ├── LICENSE ├── PointDistance.png ├── PytorchConnectivityGraph.png ├── PytorchFiedlerVector.png ├── PytorchInputData.png ├── Pytorchclusters.png ├── README.md ├── Sorted_matrix.png ├── demo.py └── diffusion_map.py /Clusters.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dimkastan/PyTorch-Spectral-clustering/6f08aaf511d9ee55def55b04d8a555b37318fd0e/Clusters.png -------------------------------------------------------------------------------- /DistanceMatrixBeforeSorting.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dimkastan/PyTorch-Spectral-clustering/6f08aaf511d9ee55def55b04d8a555b37318fd0e/DistanceMatrixBeforeSorting.png -------------------------------------------------------------------------------- /EigenvectorOnData.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dimkastan/PyTorch-Spectral-clustering/6f08aaf511d9ee55def55b04d8a555b37318fd0e/EigenvectorOnData.png -------------------------------------------------------------------------------- /FiedlerVector.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dimkastan/PyTorch-Spectral-clustering/6f08aaf511d9ee55def55b04d8a555b37318fd0e/FiedlerVector.png -------------------------------------------------------------------------------- /FiedlerVectorLaplacian.py: -------------------------------------------------------------------------------- 1 | """ 2 | % ------------------------------------------------------------- 3 | % Matlab code 4 | % ------------------------------------------------------------- 5 | % grpah partition using the eigenvector corresponding to the second 6 | % smallest eigenvalue 7 | % grpah partition using the eigenvector corresponding to the second 8 | % smallest eigenvalue 9 | t=[randn(500,2)+repmat([-2,-2],500,1) ;randn(500,2)+repmat([2,2],500,1)]; 10 | scatter(t(:,1),t(:,2)) 11 | W=squareform(pdist(t)); 12 | A=W<3; % create adjacency matrix (set connected notes equal to one) 13 | D = sum(A,1); 14 | L = diag(D)-A; 15 | Lsym = diag(D.^-0.5)*L*diag(D.^-0.5); 16 | [u,s,v] = svd(Lsym); 17 | 18 | figure; plot(u(:, (end-1))) 19 | F = u(:, (end-1)); 20 | plot(F);title('Second smallest non-zero eigenvalue eigenvector'); 21 | scatter(t(F<0,1),t(F<0,2),'bo','filled');hold on 22 | scatter(t(F>0,1),t(F>0,2),'go','filled'); 23 | """ 24 | # Pytorch equivalent code 25 | import torch 26 | from torch.autograd import Variable 27 | 28 | 29 | import numpy as np 30 | import matplotlib.pyplot as plt 31 | import matplotlib.cm as cm 32 | 33 | 34 | import matplotlib.colors as colors 35 | import matplotlib.cm as cm 36 | import matplotlib as mpl 37 | 38 | 39 | color_map = plt.get_cmap('jet') 40 | 41 | def distance_matrix(mat): 42 | d= ((mat.unsqueeze (0)-mat.unsqueeze (1))**2).sum (2)**0.5 43 | return d 44 | 45 | 46 | # Generate Clusters 47 | mat = torch.cat([torch.randn(500,2)+torch.Tensor([-2,-3]), torch.randn(500,2)+torch.Tensor([2,1])]) 48 | plt.scatter(mat[:,0].numpy(),mat[:,1].numpy()) 49 | plt.show(block=False) 50 | ##------------------------------------------- 51 | # Compute distance matrix and then the Laplacian 52 | ##------------------------------------------- 53 | d= distance_matrix(mat); 54 | da=d<2; 55 | plt.figure() 56 | plt.imshow(da.numpy()) 57 | plt.show(block=False) 58 | 59 | D= ((da.float()).sum(1)).diag() 60 | L = D -da.float() 61 | plt.figure() 62 | plt.title("Laplacian") 63 | plt.imshow(L.numpy()) 64 | plt.show(block=False) 65 | 66 | 67 | 68 | Lsym=torch.mm(torch.mm(torch.diag(torch.pow(torch.diag(D),-0.5)),L),torch.diag(torch.pow(torch.diag(D),-0.5))); 69 | plt.figure() 70 | plt.imshow(Lsym.numpy()) 71 | plt.title("Symmetric Laplacian") 72 | plt.show(block=False) 73 | 74 | 75 | [u,s,v]=torch.svd(Lsym) 76 | 77 | # plot fiedler vector 78 | 79 | plt.figure() 80 | plt.title('Fiedler vector') 81 | plt.plot(u[:,-2].numpy()); 82 | plt.show(block=False) 83 | norm = colors.Normalize(vmin=-1, vmax=1) 84 | 85 | scalarMap = cm.ScalarMappable( norm=norm , cmap=color_map) 86 | 87 | 88 | plt.figure() 89 | plt.title('clusters') 90 | for i in range(len(u[:,-2])): 91 | if u[i,-2]<0: 92 | color = scalarMap.to_rgba(-1) 93 | plt.scatter(mat[i,0],mat[i,1], color=color,marker='o') 94 | else: 95 | color = scalarMap.to_rgba(1) 96 | plt.scatter(mat[i,0],mat[i,1], color=color,marker='*') 97 | 98 | plt.show(block=False) 99 | 100 | raw_input("Press Enter to exit..") 101 | plt.close('all') 102 | -------------------------------------------------------------------------------- /InputData.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dimkastan/PyTorch-Spectral-clustering/6f08aaf511d9ee55def55b04d8a555b37318fd0e/InputData.png -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Dimitris Kastaniotis 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /PointDistance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dimkastan/PyTorch-Spectral-clustering/6f08aaf511d9ee55def55b04d8a555b37318fd0e/PointDistance.png -------------------------------------------------------------------------------- /PytorchConnectivityGraph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dimkastan/PyTorch-Spectral-clustering/6f08aaf511d9ee55def55b04d8a555b37318fd0e/PytorchConnectivityGraph.png -------------------------------------------------------------------------------- /PytorchFiedlerVector.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dimkastan/PyTorch-Spectral-clustering/6f08aaf511d9ee55def55b04d8a555b37318fd0e/PytorchFiedlerVector.png -------------------------------------------------------------------------------- /PytorchInputData.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dimkastan/PyTorch-Spectral-clustering/6f08aaf511d9ee55def55b04d8a555b37318fd0e/PytorchInputData.png -------------------------------------------------------------------------------- /Pytorchclusters.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dimkastan/PyTorch-Spectral-clustering/6f08aaf511d9ee55def55b04d8a555b37318fd0e/Pytorchclusters.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PyTorch-Spectral-clustering 2 | [Under development]- Implementation of various methods for dimensionality reduction and spectral clustering with PyTorch and Matlab equivalent code. 3 |
4 | Sample Images from PyTorch code 5 |
6 |

7 | Input Data 8 | Fiedler Vector 9 | Clusters 10 |

11 | 12 |
13 | Drawing the second eigenvector on data (diffusion map) 14 | Diffusion Map- Second Eigenvector on data 15 |
16 | Drawing the point-wise diffusion distances 17 | Diffusion Map- point-wise distances 18 | 19 |
20 | Sorting matrix 21 |
22 |

23 | Unsorted PairWiseDistance Matrix 24 | Sorted Distance Matrix 25 | 26 |

27 | 28 |

29 | 30 |
31 | ## Goal 32 | Use with Pytorch for general purpose computations by implementing some very elegant methods for dimensionality reduction and graph spectral clustering. 33 |
34 | 35 | ## Description 36 | In this repo, I am using PyTorch in order to implement various methods for dimensionality reduction and spectral clustering. 37 | At the moment, I have added Diffusion Maps [1] and I am working on the methods presented in the following list (as well as some other that I will add in the future). 38 |
39 | 40 | Except from some examples based on 2-D Gaussian distributed clusters I will also add examples with face, food, imagenet categories etc. 41 |
42 | 43 | 44 | ## Prerequisites 45 | In order to run these examples you need to have Pytorch installed in your system. I worked with Anaconda2 and Pytorch:
46 | 47 | pytorch 0.2.0 py27hc03bea1_4cu80 [cuda80] soumith 48 |
49 | (you can verify your pytorch installation by running 50 | 51 | conda list | grep pytorch 52 | 53 | Feel free to contact me for suggestions, comments etc. 54 | 55 | ### References 56 | - [1] Diffusion maps, RR Coifman, S Lafon, Applied and computational harmonic analysis 21 (1), 5-30
57 | - [2] Jianbo Shi and Jitendra Malik (1997): "Normalized Cuts and Image Segmentation", IEEE Conference on Computer Vision and Pattern Recognition, pp 731–737
58 | - [3] Andrew Y. Ng, Michael I. Jordan, and Yair Weiss. 2001. On spectral clustering: analysis and an algorithm. In Proceedings of the 14th International Conference on Neural Information Processing Systems: Natural and Synthetic (NIPS'01), T. G. Dietterich, S. Becker, and Z. Ghahramani (Eds.). MIT Press, Cambridge, MA, USA, 849-856. 59 | - [4] ... 60 | 61 | -------------------------------------------------------------------------------- /Sorted_matrix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dimkastan/PyTorch-Spectral-clustering/6f08aaf511d9ee55def55b04d8a555b37318fd0e/Sorted_matrix.png -------------------------------------------------------------------------------- /demo.py: -------------------------------------------------------------------------------- 1 | """ 2 | Implementing various dimensionality reduction methods with PyTorch Tensors 3 | 4 | 5 | Under development. Please use with caution. 6 | 7 | """ 8 | import torch 9 | from torch.autograd import Variable 10 | 11 | 12 | import numpy as np 13 | import matplotlib.pyplot as plt 14 | import matplotlib.cm as cm 15 | 16 | 17 | 18 | def distance_matrix(mat): 19 | d= ((mat.unsqueeze (0)-mat.unsqueeze (1))**2).sum (2)**0.5 20 | return d 21 | 22 | def similarity_matrix(mat): 23 | d= distance_matrix(mat) 24 | D = torch.exp(-(d )) 25 | return D.sqrt() 26 | 27 | # Generate Clusters 28 | mat = torch.cat([torch.randn(500,2)+torch.Tensor([-2,-3]), torch.randn(500,2)+torch.Tensor([2,1])]) 29 | 30 | mat = mat[torch.randperm(mat.size(0))] 31 | plt.scatter(mat[:,0].numpy(),mat[:,1].numpy()) 32 | plt.show(block=False) 33 | 34 | ##------------------------------------------- 35 | # Spectral analysis on distance matrix 36 | ##------------------------------------------- 37 | d= distance_matrix(mat); 38 | 39 | plt.figure() 40 | plt.imshow(d.numpy()) 41 | plt.title('Distance Matrix-Before Ordering') 42 | plt.show(block=False) 43 | 44 | 45 | [u,s,v]=torch.svd(d) 46 | 47 | colors = cm.rainbow(np.linspace(0, 1, mat.size(0))) 48 | [val, ind] = torch.sort(u[:,1] ) 49 | plt.figure() 50 | 51 | sorted_u = u[ind,:] 52 | 53 | for x, color in zip(sorted_u.numpy(), colors): 54 | plt.scatter(x[1],x[2], color=color) 55 | 56 | plt.title('Eigenvector-Mapping') 57 | plt.show(block=False) 58 | 59 | 60 | plt.figure() 61 | plt.imshow(d[[ind]][:,ind].numpy()) 62 | plt.show(block=False) 63 | plt.title('Sorted Matrix'); 64 | 65 | plt.figure() 66 | plt.plot(torch.sort(u[:,1 ])[0].numpy()) 67 | plt.show(block=False) 68 | plt.title("Sorted Eigenvector") 69 | 70 | raw_input("Press Enter to exit..") 71 | plt.close('all') 72 | 73 | 74 | 75 | -------------------------------------------------------------------------------- /diffusion_map.py: -------------------------------------------------------------------------------- 1 | """ 2 | Implementing various dimensionality reduction methods with PyTorch Tensors 3 | 4 | Here I am using PyTorch to implement Diffusion Map method. 5 | 6 | [1] Diffusion maps, RR Coifman, S Lafon, Applied and computational harmonic analysis 21 (1), 5-30 7 | 8 | 9 | 10 | Under development. Please use with caution. 11 | 12 | """ 13 | import torch 14 | from torch.autograd import Variable 15 | 16 | 17 | import numpy as np 18 | import matplotlib.pyplot as plt 19 | import matplotlib.colors as colors 20 | import matplotlib.cm as cm 21 | import matplotlib as mpl 22 | 23 | 24 | color_map = plt.get_cmap('jet') 25 | 26 | 27 | def distance_matrix(mat): 28 | d= ((mat.unsqueeze (0)-mat.unsqueeze (1))**2).sum (2)**0.5 29 | return d 30 | 31 | 32 | def diffusion_distance(mat, sigma=8.0, alpha=1.0): 33 | D =distance_matrix(mat); 34 | K = torch.exp(-(torch.pow(torch.div(D,sigma) ,2))) # Kernel 35 | p = K.sum(1) 36 | K1 = K/(torch.pow(p.unsqueeze(1)*p,alpha)+1e-9) # alpha = 1 Laplace Beltrami, 0.5 Fokker Planck diffusion. 37 | v = torch.sqrt(K1.sum(1)) 38 | A = K1/(1e-9+v.unsqueeze(1)*v) 39 | [u,s,v]=torch.svd(A) 40 | u=u/(1e-9+u[:,0].unsqueeze(1)) 41 | return K1,u,s 42 | 43 | # Generate Clusters 44 | mat = torch.cat([torch.randn(500,2)+torch.Tensor([-2,-3]), torch.randn(500,2)+torch.Tensor([2,1])]) 45 | 46 | # mat = mat[torch.randperm(mat.size(0))] 47 | plt.scatter(mat[:,0].numpy(),mat[:,1].numpy()) 48 | plt.show(block=False) 49 | plt.pause(1) 50 | 51 | 52 | ##------------------------------------------- 53 | # Diffusion map 54 | ##------------------------------------------- 55 | [d,u,s]= diffusion_distance(mat,4.0,0.5) 56 | plt.figure(1) 57 | plt.imshow(d.numpy(),cmap= color_map) 58 | plt.title('Distance Matrix-Before Ordering') 59 | plt.show(block=False) 60 | 61 | color_vals = cm.rainbow(np.linspace(0, 1, mat.size(0))) 62 | [val, ind] = torch.sort(u[:,1] ) 63 | plt.figure(2) 64 | 65 | sorted_u = u[ind,:] 66 | for x, color in zip(sorted_u.numpy(), color_vals): 67 | plt.scatter(x[1],x[2], color=color) 68 | 69 | plt.title('Eigenvector-Mapping') 70 | plt.show(block=False) 71 | plt.pause(0.1) 72 | 73 | plt.figure(3) 74 | plt.imshow(d[[ind]][:,ind].numpy(),cmap= color_map) 75 | plt.show(block=False) 76 | plt.title('Sorted Matrix'); 77 | plt.pause(0.1) 78 | 79 | plt.figure(4) 80 | plt.plot(torch.sort(u[:,1 ])[0].numpy()) 81 | plt.show(block=False) 82 | plt.title("Sorted Eigenvector") 83 | plt.pause(0.1) 84 | 85 | 86 | 87 | data = u[:,1:4]*(torch.pow(s[1:4].expand_as(u[:,1:4]),0)) 88 | d=distance_matrix(data) 89 | min_d = d.min(); 90 | max_d = d.max(); 91 | 92 | assert min_d ==0 , "Error in distance matrix" 93 | values = u[:,1 ] 94 | norm = colors.Normalize(vmin=values.min(), vmax=values.max()) 95 | 96 | scalarMap = cm.ScalarMappable( norm=norm , cmap=color_map) 97 | random_point = min(torch.round(torch.abs(torch.randn(1)/2.0)*len(mat))[0],len(mat)); 98 | 99 | plt.figure() 100 | plt.scatter(mat[:,0].numpy(),mat[:,1].numpy()) 101 | for i in range(len(data)): 102 | color = scalarMap.to_rgba(d[int(random_point),i]) # take the distance from one point 103 | plt.scatter(mat[i,0],mat[i,1], color=color) 104 | plt.scatter(mat[int(random_point),0],mat[int(random_point),1], color=[0.0 ,0.0,0.0], marker="*") 105 | plt.title('distance from point at time:'+str(0)) 106 | 107 | plt.show(block=False) 108 | 109 | 110 | 111 | 112 | for t in range(1,10,1): 113 | plt.figure(); 114 | values = u[:,1 ]*(s[1]**t) 115 | 116 | plt.scatter(mat[:,0].numpy(),mat[:,1].numpy()) 117 | for i in range(len(values)): 118 | color = scalarMap.to_rgba(values[i]) 119 | plt.scatter(mat[i,0],mat[i,1], color=color) 120 | 121 | plt.show(block=False) 122 | plt.title("Second Eigenvector at time:"+str(t)) 123 | plt.pause(0.1) 124 | p = torch.pow(s,t) 125 | data = u[:,1:3]*(p[1:3].expand_as(u[:,1:3])) 126 | d=distance_matrix(data) 127 | plt.figure(); 128 | plt.imshow(d[[ind]][:,ind].numpy(),cmap= color_map, vmin= 0, vmax=max_d) 129 | plt.title('distance matrix at time:'+str(t)) 130 | plt.show(block=False) 131 | # draw the distances from one point 132 | plt.figure() 133 | plt.scatter(mat[:,0].numpy(),mat[:,1].numpy()) 134 | for i in range(len(data)): 135 | color = scalarMap.to_rgba(d[int(random_point),i]) # take the distance from one point 136 | plt.scatter(mat[i,0],mat[i,1], color=color) 137 | 138 | plt.scatter(mat[int(random_point),0],mat[int(random_point),1], color=[0.0 ,0.0,0.0], marker="*") 139 | plt.title('distance from point at time:'+str(t)) 140 | plt.show(block=False) 141 | raw_input("Press Enter to continue..") 142 | 143 | 144 | raw_input("Press Enter to exit..") 145 | plt.close('all') 146 | --------------------------------------------------------------------------------