├── README.md ├── example.ipynb ├── kernels.py └── kpca.py /README.md: -------------------------------------------------------------------------------- 1 | # Kernel-PCA 2 | 3 | A Python implementation of Kernel Principal Component Analysis (KPCA). Kernels implemented: 4 | 5 | - Linear 6 | - Radial Basis Function 7 | - Exponential 8 | - Laplacian 9 | - Anova 10 | - Polynomial 11 | - Sigmoid 12 | - Rotational quadratic 13 | - Multiquadric 14 | - Power 15 | - Spherical 16 | - Circular 17 | 18 | 19 | 20 | ## Requirements 21 | 22 | * numpy 23 | * matplotlib 24 | * seaborn 25 | 26 | ## Run 27 | 28 | ~~~python 29 | from kpca import KPCA 30 | from kernels import kernel 31 | X = np.array([[2,3,4], [1,2,3]]) # dxn 32 | k = kernel(sigma=0.0009, d_anova=3).anova 33 | kpca = KPCA(X, k, 3) 34 | scores = kpca.project().T 35 | ~~~ 36 | 37 | -------------------------------------------------------------------------------- /kernels.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | import numpy as np 5 | 6 | class kernel: 7 | 8 | def __init__(self, gamma = 1, sigma = 1, d_anova = 1, d_poly = 2, d_power = 1, alpha = 1, c = 0): 9 | self.gamma = gamma 10 | self.sigma = sigma 11 | self.d_anova = d_anova 12 | self.alpha = alpha 13 | self.c = c 14 | self.d_poly = d_poly 15 | self.d_power = d_power 16 | 17 | def linear(self, x, y): 18 | """ 19 | k(x, y) = + c 20 | Hiperparámetros: c 21 | """ 22 | return x.T@y + self.c 23 | 24 | def rbf(self, x, y): 25 | """ 26 | k(x, y) = exp(- gamma * ||x-y||^2) 27 | Hiperparámetros: gamma 28 | """ 29 | return np.exp(- self.gamma * (np.linalg.norm(x-y)**2)) 30 | 31 | def exp(self, x, y): 32 | """ 33 | k(x, y) = exp(- ||x-y|| / (2 * sigma^2) ) 34 | Hiperparámetros: sigma 35 | """ 36 | return np.exp(- (1/ (2*self.sigma**2)) * np.linalg.norm(x-y)) 37 | 38 | def laplacian(self, x, y): 39 | """ 40 | k(x, y) = exp(- ||x-y|| / sigma ) 41 | Hiperparámetros: sigma 42 | """ 43 | return np.exp(- (1/self.sigma) * np.linalg.norm(x-y)) 44 | 45 | def anova(self, x, y): 46 | """ 47 | k(x, y) = sum( exp(- sigma * ((x_i - y_i)^2))^d_anova ) 48 | Hiperparámetros: sigma, d_anova 49 | """ 50 | suma = 0 51 | for i in range(0, len(x)): 52 | term_1 = - self.sigma * ( (x[i] - y[i] )**2 ) 53 | suma += np.exp(term_1) ** self.d_anova 54 | return suma 55 | 56 | def polynomial(self, x, y): 57 | """ 58 | k(x, y) = (alpha * + c)^d 59 | Hiperparámetros: alpha, c, d_poly 60 | """ 61 | return (self.alpha * (x.T@y) + self.c)**self.d_poly 62 | 63 | def sigmoid(self, x, y): 64 | """ 65 | k(x, y) = tanh( alpha * + c) 66 | Hiperparámetros: alpha, c 67 | """ 68 | return np.tanh(self.alpha * (x.T@y) + self.c) 69 | 70 | def rotational_quadratic(self, x, y): 71 | """ 72 | k(x, y) = 1 - (||x-y||^2 / ||x-y||^2 + c) 73 | Hiperparámetros: c 74 | """ 75 | dist = np.linalg.norm(x-y) 76 | return 1 - (dist**2 / (dist**2 + self.c)) 77 | 78 | def multiquadric(self, x, y): 79 | """ 80 | k(x, y) = sqrt(||x-y||^2 + c^2) 81 | Hiperparámetros: c 82 | """ 83 | return np.sqrt(np.linalg.norm(x-y)**2 + self.c**2) 84 | 85 | def power(self, x, y): 86 | """ 87 | k(x, y) = -||x-y||^d 88 | Hiperparámetros: d_power 89 | """ 90 | return - np.linalg.norm(x-y)**self.d_power 91 | 92 | def spherical(self, x, y): 93 | dist = np.linalg.norm(x-y) 94 | if dist > self.sigma: 95 | return 0 96 | return 1 - (3/2)*(dist/self.sigma)+(1/2)*((dist/self.sigma)**3) 97 | 98 | def circular(self, x, y): 99 | dist = np.linalg.norm(x-y) 100 | if dist > self.sigma: 101 | return 0 102 | return (2/np.pi)*np.arccos(- dist/self.sigma)-(2/np.pi)*(dist/self.sigma)*np.sqrt(1 - (dist/self.sigma)**2) -------------------------------------------------------------------------------- /kpca.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | from mpl_toolkits.mplot3d import Axes3D 7 | import seaborn as sns 8 | import warnings 9 | 10 | class KPCA: 11 | def __init__(self, X, kernel, d): 12 | """ 13 | KPCA object 14 | Parameters 15 | ---------- 16 | 17 | X: dxn matrix 18 | kernel: kernel function from kernel class 19 | d: number of principal components to be chosen 20 | """ 21 | self.X = X 22 | self.kernel = kernel 23 | self.d = d 24 | 25 | def _is_pos_semidef(self, x): 26 | return np.all(x >= 0) 27 | 28 | def __kernel_matrix(self): 29 | """ 30 | Compute kernel matrix 31 | Output: 32 | 33 | K: nxn matrix 34 | """ 35 | K = [] 36 | r, c = self.X.shape 37 | for fil in range(c): 38 | k_aux = [] 39 | for col in range(c): 40 | k_aux.append(self.kernel(self.X[:, fil], self.X[:, col])) 41 | K.append(k_aux) 42 | K = np.array(K) 43 | # Centering K 44 | ones = np.ones(K.shape)/c 45 | K = K - ones@K - K@ones + ones@K@ones 46 | return K 47 | 48 | def __descomp(self): 49 | """ 50 | Decomposition of K 51 | Output: 52 | 53 | tuplas_eig: List of ordered tuples by singular 54 | values; (singular_value, eigenvector) 55 | """ 56 | self.K = self.__kernel_matrix() 57 | eigval, eigvec = np.linalg.eig(self.K) 58 | if not self._is_pos_semidef(eigval): 59 | warnings.warn("La matriz K no es semidefinida positiva") 60 | # Normalize eigenvectors and compute singular values of K 61 | tuplas_eig = [(np.sqrt(eigval[i]), eigvec[:,i]/np.sqrt(eigval[i]) ) for i in range(len(eigval))] 62 | tuplas_eig.sort(key=lambda x: x[0], reverse=True) 63 | return tuplas_eig 64 | 65 | def project(self): 66 | """ 67 | Compute scores 68 | Output: 69 | 70 | scores: T = sigma * V_d^t 71 | """ 72 | self.tuplas_eig = self.__descomp() 73 | tuplas_eig_dim = self.tuplas_eig[:self.d] 74 | self.sigma = np.diag([i[0] for i in tuplas_eig_dim]) 75 | self.v = np.array([list(j[1]) for j in tuplas_eig_dim]).T 76 | self.sigma = np.real_if_close(self.sigma, tol=1) 77 | self.v = np.real_if_close(self.v, tol=1) 78 | self.scores = self.sigma @ self.v.T 79 | return self.scores 80 | 81 | def plot_singular_values(self, grid = True): 82 | eig_plot = [np.real_if_close(e, tol=1) for (e, _) in self.tuplas_eig if e > 0.01] 83 | plt.style.use('seaborn-whitegrid') 84 | fig = plt.figure(figsize=(15,7.5)) 85 | plt.plot(list(range(1, len(eig_plot) + 1)), eig_plot) 86 | plt.grid(grid) 87 | plt.title('Valores singulares de la matriz $K$ distintos de 0') 88 | plt.ylabel('$\sigma^2$') 89 | plt.show() 90 | 91 | def plot_scores_2d(self, colors, grid = True, dim_1 = 1, dim_2 = 2): 92 | if self.d < 2: 93 | warnings.warn("No hay suficientes componentes prinicpales") 94 | return 95 | 96 | plt.style.use('seaborn-whitegrid') 97 | fig = plt.figure(figsize=(15,10)) 98 | plt.axhline(c = 'black', alpha = 0.2) 99 | plt.axvline(c = 'black', alpha = 0.2) 100 | plt.scatter(self.scores[dim_1 - 1,:], self.scores[dim_2 - 1,:], c = colors) 101 | plt.grid(grid) 102 | plt.title('KPCA Space') 103 | plt.xlabel('${}^a$ componente principal en el espacio $\phi(X)$'.format(dim_1)) 104 | plt.ylabel('${}^a$ componente principal en el espacio $\phi(X)$'.format(dim_2)) 105 | plt.show() 106 | 107 | def plot_scores_3d(self, colors, grid = True, dim_1 = 1, dim_2 = 2, dim_3 = 3): 108 | if self.d < 3: 109 | warnings.warn("No hay suficientes componentes prinicpales") 110 | return 111 | 112 | plt.style.use('seaborn-whitegrid') 113 | fig = plt.figure(figsize=(15,10)) 114 | ax = fig.add_subplot(111, projection="3d") 115 | ax.scatter(self.scores[dim_1 - 1,:], self.scores[dim_2 - 1,:], self.scores[dim_3 - 1,:], c = colors) 116 | plt.grid(grid) 117 | ax.axis('on') 118 | plt.title('KPCA Space') 119 | ax.set_xlabel('${}^a$ componente principal en el espacio $\phi(X)$'.format(dim_1)) 120 | ax.set_ylabel('${}^a$ componente principal en el espacio $\phi(X)$'.format(dim_2)) 121 | ax.set_zlabel('${}^a$ componente principal en el espacio $\phi(X)$'.format(dim_3)) 122 | plt.show() 123 | 124 | def plot_density(self, labels, dim=1, grid = False): 125 | plt.style.use('seaborn-whitegrid') 126 | fig = plt.figure(figsize=(15,5)) 127 | for ele in np.unique(labels): 128 | sns.distplot(self.scores[dim - 1,:][np.where(labels == ele)], hist = False, 129 | kde = True, kde_kws = {'linewidth': 3}, label = ele) 130 | plt.grid(grid) 131 | plt.legend() 132 | plt.title('Distribuciones en la ${}^a$ componente principal'.format(dim)) 133 | plt.show() --------------------------------------------------------------------------------