├── README.md
├── example.ipynb
├── kernels.py
└── kpca.py


/README.md:
--------------------------------------------------------------------------------
 1 | # Kernel-PCA
 2 | 
 3 | A Python implementation of Kernel Principal Component Analysis (KPCA). Kernels implemented:
 4 | 
 5 | - Linear
 6 | - Radial Basis Function
 7 | - Exponential
 8 | - Laplacian
 9 | - Anova
10 | - Polynomial
11 | - Sigmoid
12 | - Rotational quadratic
13 | - Multiquadric
14 | - Power
15 | - Spherical
16 | - Circular
17 | 
18 | 
19 | 
20 | ## Requirements
21 | 
22 | * numpy
23 | * matplotlib
24 | * seaborn
25 | 
26 | ## Run
27 | 
28 | ~~~python
29 | from kpca import KPCA
30 | from kernels import kernel
31 | X = np.array([[2,3,4], [1,2,3]]) # dxn
32 | k = kernel(sigma=0.0009, d_anova=3).anova
33 | kpca = KPCA(X, k, 3)
34 | scores = kpca.project().T
35 | ~~~
36 | 
37 | 


--------------------------------------------------------------------------------
/kernels.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding: utf-8
  3 | 
  4 | import numpy as np
  5 | 
  6 | class kernel:
  7 |     
  8 |     def __init__(self, gamma = 1, sigma = 1, d_anova = 1, d_poly = 2, d_power = 1, alpha = 1, c = 0):
  9 |         self.gamma = gamma
 10 |         self.sigma = sigma
 11 |         self.d_anova = d_anova
 12 |         self.alpha = alpha
 13 |         self.c = c
 14 |         self.d_poly = d_poly
 15 |         self.d_power = d_power
 16 |         
 17 |     def linear(self, x, y):
 18 |         """
 19 |         k(x, y) = <x, y> + c
 20 |         Hiperparámetros: c
 21 |         """
 22 |         return x.T@y + self.c
 23 |     
 24 |     def rbf(self, x, y):
 25 |         """
 26 |         k(x, y) = exp(- gamma * ||x-y||^2)
 27 |         Hiperparámetros: gamma
 28 |         """
 29 |         return np.exp(- self.gamma * (np.linalg.norm(x-y)**2))
 30 |     
 31 |     def exp(self, x, y):
 32 |         """
 33 |         k(x, y) = exp(- ||x-y|| / (2 * sigma^2) )
 34 |         Hiperparámetros: sigma
 35 |         """
 36 |         return np.exp(- (1/ (2*self.sigma**2)) * np.linalg.norm(x-y))
 37 |     
 38 |     def laplacian(self, x, y):
 39 |         """
 40 |         k(x, y) = exp(- ||x-y|| / sigma )
 41 |         Hiperparámetros: sigma
 42 |         """
 43 |         return np.exp(- (1/self.sigma) * np.linalg.norm(x-y))
 44 |     
 45 |     def anova(self, x, y):
 46 |         """
 47 |         k(x, y) = sum( exp(- sigma * ((x_i - y_i)^2))^d_anova )
 48 |         Hiperparámetros: sigma, d_anova
 49 |         """
 50 |         suma = 0
 51 |         for i in range(0, len(x)):
 52 |             term_1 = - self.sigma * ( (x[i] - y[i] )**2 )
 53 |             suma += np.exp(term_1) ** self.d_anova
 54 |         return suma
 55 |     
 56 |     def polynomial(self, x, y):
 57 |         """
 58 |         k(x, y) = (alpha * <x, y> + c)^d
 59 |         Hiperparámetros: alpha, c, d_poly
 60 |         """
 61 |         return (self.alpha * (x.T@y) + self.c)**self.d_poly
 62 |     
 63 |     def sigmoid(self, x, y):
 64 |         """
 65 |         k(x, y) = tanh( alpha * <x, y> + c)
 66 |         Hiperparámetros: alpha, c
 67 |         """
 68 |         return np.tanh(self.alpha * (x.T@y) + self.c)
 69 |     
 70 |     def rotational_quadratic(self, x, y):
 71 |         """
 72 |         k(x, y) = 1 - (||x-y||^2 / ||x-y||^2 + c)
 73 |         Hiperparámetros: c
 74 |         """
 75 |         dist = np.linalg.norm(x-y)
 76 |         return 1 - (dist**2 / (dist**2 + self.c))
 77 |     
 78 |     def multiquadric(self, x, y):
 79 |         """
 80 |         k(x, y) = sqrt(||x-y||^2 + c^2)
 81 |         Hiperparámetros: c
 82 |         """
 83 |         return np.sqrt(np.linalg.norm(x-y)**2 + self.c**2)
 84 |     
 85 |     def power(self, x, y):
 86 |         """
 87 |         k(x, y) = -||x-y||^d
 88 |         Hiperparámetros: d_power
 89 |         """
 90 |         return - np.linalg.norm(x-y)**self.d_power
 91 |     
 92 |     def spherical(self, x, y):
 93 |         dist = np.linalg.norm(x-y)
 94 |         if dist > self.sigma:
 95 |             return 0
 96 |         return 1 - (3/2)*(dist/self.sigma)+(1/2)*((dist/self.sigma)**3)
 97 |     
 98 |     def circular(self, x, y):
 99 |         dist = np.linalg.norm(x-y)
100 |         if dist > self.sigma:
101 |             return 0
102 |         return (2/np.pi)*np.arccos(- dist/self.sigma)-(2/np.pi)*(dist/self.sigma)*np.sqrt(1 - (dist/self.sigma)**2)


--------------------------------------------------------------------------------
/kpca.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding: utf-8
  3 | 
  4 | import numpy as np
  5 | import matplotlib.pyplot as plt
  6 | from mpl_toolkits.mplot3d import Axes3D
  7 | import seaborn as sns
  8 | import warnings
  9 | 
 10 | class KPCA:
 11 |     def __init__(self, X, kernel, d):
 12 |         """
 13 |         KPCA object
 14 |         Parameters
 15 |         ----------
 16 |         
 17 |         X: dxn matrix
 18 |         kernel: kernel function from kernel class
 19 |         d: number of principal components to be chosen
 20 |         """
 21 |         self.X = X
 22 |         self.kernel = kernel 
 23 |         self.d = d
 24 |     
 25 |     def _is_pos_semidef(self, x):
 26 |         return np.all(x >= 0)
 27 | 
 28 |     def __kernel_matrix(self):
 29 |         """
 30 |         Compute kernel matrix
 31 |         Output:
 32 |         
 33 |         K: nxn matrix
 34 |         """
 35 |         K = []
 36 |         r, c = self.X.shape
 37 |         for fil in range(c):
 38 |             k_aux = []
 39 |             for col in range(c):
 40 |                 k_aux.append(self.kernel(self.X[:, fil], self.X[:, col]))
 41 |             K.append(k_aux)
 42 |         K = np.array(K)
 43 |         # Centering K
 44 |         ones = np.ones(K.shape)/c
 45 |         K = K - ones@K - K@ones + ones@K@ones
 46 |         return K
 47 |     
 48 |     def __descomp(self):
 49 |         """
 50 |         Decomposition of K
 51 |         Output:
 52 |         
 53 |         tuplas_eig: List of ordered tuples by singular 
 54 |                     values; (singular_value, eigenvector)
 55 |         """
 56 |         self.K = self.__kernel_matrix()
 57 |         eigval, eigvec = np.linalg.eig(self.K)
 58 |         if not self._is_pos_semidef(eigval):
 59 |             warnings.warn("La matriz K no es semidefinida positiva")
 60 |         # Normalize eigenvectors and compute singular values of K
 61 |         tuplas_eig = [(np.sqrt(eigval[i]), eigvec[:,i]/np.sqrt(eigval[i]) ) for i in range(len(eigval))]
 62 |         tuplas_eig.sort(key=lambda x: x[0], reverse=True)
 63 |         return tuplas_eig
 64 |     
 65 |     def project(self):
 66 |         """
 67 |         Compute scores
 68 |         Output:
 69 |         
 70 |         scores: T = sigma * V_d^t
 71 |         """
 72 |         self.tuplas_eig = self.__descomp()
 73 |         tuplas_eig_dim = self.tuplas_eig[:self.d]
 74 |         self.sigma = np.diag([i[0] for i in tuplas_eig_dim])
 75 |         self.v = np.array([list(j[1]) for j in tuplas_eig_dim]).T
 76 |         self.sigma = np.real_if_close(self.sigma, tol=1)
 77 |         self.v = np.real_if_close(self.v, tol=1)
 78 |         self.scores = self.sigma @ self.v.T
 79 |         return self.scores
 80 |     
 81 |     def plot_singular_values(self, grid = True):
 82 |         eig_plot = [np.real_if_close(e, tol=1) for (e, _) in self.tuplas_eig if e > 0.01]
 83 |         plt.style.use('seaborn-whitegrid')
 84 |         fig = plt.figure(figsize=(15,7.5))
 85 |         plt.plot(list(range(1, len(eig_plot) + 1)), eig_plot)
 86 |         plt.grid(grid)
 87 |         plt.title('Valores singulares de la matriz $K$ distintos de 0')
 88 |         plt.ylabel('$\sigma^2$')
 89 |         plt.show()
 90 |         
 91 |     def plot_scores_2d(self, colors, grid = True, dim_1 = 1, dim_2 = 2):
 92 |         if self.d < 2:
 93 |             warnings.warn("No hay suficientes componentes prinicpales")
 94 |             return
 95 |         
 96 |         plt.style.use('seaborn-whitegrid')
 97 |         fig = plt.figure(figsize=(15,10))
 98 |         plt.axhline(c = 'black', alpha = 0.2)
 99 |         plt.axvline(c = 'black', alpha = 0.2)
100 |         plt.scatter(self.scores[dim_1 - 1,:], self.scores[dim_2 - 1,:], c = colors)
101 |         plt.grid(grid)
102 |         plt.title('KPCA Space')
103 |         plt.xlabel('${}^a$ componente principal en el espacio $\phi(X)$'.format(dim_1))
104 |         plt.ylabel('${}^a$ componente principal en el espacio $\phi(X)$'.format(dim_2))
105 |         plt.show()
106 |         
107 |     def plot_scores_3d(self, colors, grid = True, dim_1 = 1, dim_2 = 2, dim_3 = 3):
108 |         if self.d < 3:
109 |             warnings.warn("No hay suficientes componentes prinicpales")
110 |             return
111 |         
112 |         plt.style.use('seaborn-whitegrid')
113 |         fig = plt.figure(figsize=(15,10))
114 |         ax = fig.add_subplot(111, projection="3d")
115 |         ax.scatter(self.scores[dim_1 - 1,:], self.scores[dim_2 - 1,:], self.scores[dim_3 - 1,:], c = colors)
116 |         plt.grid(grid)
117 |         ax.axis('on')
118 |         plt.title('KPCA Space')
119 |         ax.set_xlabel('${}^a$ componente principal en el espacio $\phi(X)$'.format(dim_1))
120 |         ax.set_ylabel('${}^a$ componente principal en el espacio $\phi(X)$'.format(dim_2))
121 |         ax.set_zlabel('${}^a$ componente principal en el espacio $\phi(X)$'.format(dim_3))
122 |         plt.show()
123 |         
124 |     def plot_density(self, labels, dim=1, grid = False):
125 |         plt.style.use('seaborn-whitegrid')
126 |         fig = plt.figure(figsize=(15,5))
127 |         for ele in np.unique(labels):
128 |             sns.distplot(self.scores[dim - 1,:][np.where(labels == ele)], hist = False, 
129 |                          kde = True, kde_kws = {'linewidth': 3}, label = ele)
130 |         plt.grid(grid)
131 |         plt.legend()
132 |         plt.title('Distribuciones en la ${}^a$ componente principal'.format(dim))
133 |         plt.show()


--------------------------------------------------------------------------------