├── setup.cfg ├── MANIFEST.in ├── kernel_eca ├── __init__.py └── kernel_eca.py ├── .travis.yml ├── .gitignore ├── README.md ├── LICENSE ├── setup.py └── Example.ipynb /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include *.md 2 | include *.py 3 | include *.cfg 4 | recursive-include kernel_eca *.py 5 | include LICENSE 6 | -------------------------------------------------------------------------------- /kernel_eca/__init__.py: -------------------------------------------------------------------------------- 1 | """Python implementation of Kernel Entropy Component Analysis""" 2 | from __future__ import absolute_import 3 | 4 | from .kernel_eca import KernelECA 5 | 6 | __version__ = '0.3.1dev' -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | # Setting sudo to false opts in to Travis-CI container-based builds. 4 | sudo: false 5 | 6 | python: 7 | - 2.7 8 | - 3.4 9 | - 3.5 10 | 11 | env: 12 | - CONDA_DEPS="numpy scipy nose scikit-learn" 13 | 14 | install: 15 | - conda create -n testenv --yes python=$TRAVIS_PYTHON_VERSION 16 | - source activate testenv 17 | - conda install --yes $CONDA_DEPS 18 | - python setup.py install 19 | 20 | before_install: 21 | - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh 22 | - chmod +x miniconda.sh 23 | - ./miniconda.sh -b 24 | - export PATH=/home/travis/miniconda3/bin:$PATH 25 | # miniconda is not always up-to-date with conda. 26 | - conda update --yes conda 27 | 28 | script: nosetests 29 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | MANIFEST 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | 47 | # Translations 48 | *.mo 49 | *.pot 50 | 51 | # Django stuff: 52 | *.log 53 | 54 | # Sphinx documentation 55 | docs/_build/ 56 | 57 | # PyBuilder 58 | target/ 59 | 60 | 61 | # emacs 62 | *~ 63 | .#* 64 | 65 | 66 | # notebook 67 | .ipynb_checkpoints -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Kernel Entropy Component Analysis in Python 2 | 3 | This is a Python implementation of Kernel Entropy Component Analysis (KECA), compatible with scikit-learn. 4 | 5 | [![version status](http://img.shields.io/pypi/v/kernel_eca.svg?style=flat)](https://pypi.python.org/pypi/kernel_eca) 6 | [![downloads](http://img.shields.io/pypi/dm/kernel_eca.svg?style=flat)](https://pypi.python.org/pypi/kernel_eca) 7 | [![build status](http://img.shields.io/travis/tsterbak/kernel_eca/master.svg?style=flat)](https://travis-ci.org/tsterbak/kernel_eca) 8 | [![license](http://img.shields.io/badge/license-BSD-blue.svg?style=flat)](https://github.com/tsterbak/kernel_eca/blob/master/LICENSE) 9 | 10 | ## Installation and Use 11 | 12 | This package is pure Python, and depends only on [numpy](http://numpy.org/), [scipy](http://scipy.org/), and [scikit-learn](http://scikit-learn.org/). 13 | The released version can be installed via pip: 14 | 15 | $ pip install kernel_eca 16 | 17 | A basic example of its use can be found in [Example.ipynb](Example.ipynb). 18 | 19 | ## Requirements 20 | ``` 21 | numpy >= 1.10.1 22 | sklearn >= 0.16.1 23 | scipy >= 0.15.1 24 | ``` 25 | 26 | ## More Informations 27 | The implementation is based on 28 | ``` 29 | R. Jenssen, "Kernel Entropy Component Analysis," 30 | IEEE Transactions on Pattern Analysis and Machine 31 | Intelligence, 32(5), 847-860, 2010. 32 | ``` 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016, 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | * Neither the name of kernel_eca nor the names of its 15 | contributors may be used to endorse or promote products derived from 16 | this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import io 2 | import os 3 | import re 4 | 5 | from distutils.core import setup 6 | 7 | 8 | def read(path, encoding='utf-8'): 9 | path = os.path.join(os.path.dirname(__file__), path) 10 | with io.open(path, encoding=encoding) as fp: 11 | return fp.read() 12 | 13 | 14 | def version(path): 15 | """Obtain the packge version from a python file e.g. pkg/__init__.py 16 | See . 17 | """ 18 | version_file = read(path) 19 | version_match = re.search(r"""^__version__ = ['"]([^'"]*)['"]""", 20 | version_file, re.M) 21 | if version_match: 22 | return version_match.group(1) 23 | raise RuntimeError("Unable to find version string.") 24 | 25 | 26 | DESCRIPTION = "Python implementation of Kernel entropy component analysis" 27 | LONG_DESCRIPTION = """ 28 | Kernel entropy component analysis 29 | ================================= 30 | This is a scikit-learn compatible implementation of Kernel entropy component analysis. 31 | For more information, see the github project page: 32 | http://github.com/ 33 | """ 34 | NAME = "kernel_eca" 35 | AUTHOR = "Tobias Sterbak" 36 | AUTHOR_EMAIL = "sterbak-it@outlook.com" 37 | MAINTAINER = "Tobias Sterbak" 38 | MAINTAINER_EMAIL = "sterbak-it@outlook.com" 39 | URL = 'http://github.com/tsterbak/kernel_eca' 40 | DOWNLOAD_URL = 'http://github.com/tsterbak/kernel_eca' 41 | LICENSE = 'BSD' 42 | 43 | VERSION = version('kernel_eca/__init__.py') 44 | 45 | setup(name=NAME, 46 | version=VERSION, 47 | description=DESCRIPTION, 48 | long_description=LONG_DESCRIPTION, 49 | author=AUTHOR, 50 | author_email=AUTHOR_EMAIL, 51 | maintainer=MAINTAINER, 52 | maintainer_email=MAINTAINER_EMAIL, 53 | url=URL, 54 | download_url=DOWNLOAD_URL, 55 | license=LICENSE, 56 | packages=['kernel_eca', 57 | ], 58 | classifiers=[ 59 | 'Development Status :: 4 - Beta', 60 | 'Environment :: Console', 61 | 'Intended Audience :: Science/Research', 62 | 'License :: OSI Approved :: BSD License', 63 | 'Natural Language :: English', 64 | 'Programming Language :: Python :: 2.7', 65 | 'Programming Language :: Python :: 3.4', 66 | 'Programming Language :: Python :: 3.5'], 67 | ) -------------------------------------------------------------------------------- /Example.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Kernel Entropy Component Analysis in Python\n", 8 | "``kernel_eca`` is a Python implementation of the Kernel Entropy Component Analysis, build to be compatible with scikit-learn. It can be installed with pip.\n", 9 | "```\n", 10 | "pip install kernel_eca\n", 11 | "```\n" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "metadata": { 18 | "collapsed": false 19 | }, 20 | "outputs": [ 21 | { 22 | "ename": "ImportError", 23 | "evalue": "No module named 'kernel_eca'", 24 | "output_type": "error", 25 | "traceback": [ 26 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 27 | "\u001b[1;31mImportError\u001b[0m Traceback (most recent call last)", 28 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mmatplotlib\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpyplot\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mplt\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 5\u001b[1;33m \u001b[1;32mfrom\u001b[0m \u001b[0mkernel_eca\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mKernelECA\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 6\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdecomposition\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mkernel_pca\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mKernelPCA\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 7\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", 29 | "\u001b[1;31mImportError\u001b[0m: No module named 'kernel_eca'" 30 | ] 31 | } 32 | ], 33 | "source": [ 34 | "%matplotlib inline\n", 35 | "import numpy as np\n", 36 | "import matplotlib.pyplot as plt\n", 37 | "\n", 38 | "from kernel_eca import KernelECA\n", 39 | "from sklearn.decomposition.kernel_pca import KernelPCA\n", 40 | "\n", 41 | "from sklearn.datasets import make_circles\n", 42 | "X,y = make_circles(n_samples=300, noise=0.05, factor=0.3, random_state=571)\n", 43 | "\n", 44 | "kpca = KernelPCA(n_components=2, kernel= \"rbf\",gamma=10)\n", 45 | "X_kpca = kpca.fit_transform(X)\n", 46 | "\n", 47 | "keca = KernelECA(n_components=2, kernel= \"rbf\",gamma=10)\n", 48 | "X_keca = keca.fit_transform(X)\n", 49 | "\n", 50 | "plt.figure()\n", 51 | "plt.subplot(2, 2, 1, aspect='equal')\n", 52 | "plt.title(\"Original space\")\n", 53 | "reds = y == 0\n", 54 | "blues = y == 1\n", 55 | "\n", 56 | "plt.plot(X[reds, 0], X[reds, 1], \"ro\")\n", 57 | "plt.plot(X[blues, 0], X[blues, 1], \"bo\")\n", 58 | "plt.xlabel(\"$x_1$\")\n", 59 | "plt.ylabel(\"$x_2$\")\n", 60 | "\n", 61 | "X1, X2 = np.meshgrid(np.linspace(-1.5, 1.5, 50), np.linspace(-1.5, 1.5, 50))\n", 62 | "X_grid = np.array([np.ravel(X1), np.ravel(X2)]).T\n", 63 | "# projection on the first principal component (in the phi space)\n", 64 | "Z_grid = kpca.transform(X_grid)[:, 0].reshape(X1.shape)\n", 65 | "plt.contour(X1, X2, Z_grid, colors='grey', linewidths=1, origin='lower')\n", 66 | "\n", 67 | "plt.subplot(2, 2, 2, aspect='equal')\n", 68 | "plt.plot(X_kpca[reds, 0], X_kpca[reds, 1], \"ro\")\n", 69 | "plt.plot(X_kpca[blues, 0], X_kpca[blues, 1], \"bo\")\n", 70 | "plt.title(\"Projection by KPCA\")\n", 71 | "plt.xlabel(\"1st principal component\")\n", 72 | "plt.ylabel(\"2nd component\")\n", 73 | "\n", 74 | "plt.subplot(2, 2, 3, aspect='equal')\n", 75 | "plt.plot(X_keca[reds, 0], X_keca[reds, 1], \"ro\")\n", 76 | "plt.plot(X_keca[blues, 0], X_keca[blues, 1], \"bo\")\n", 77 | "plt.title(\"Projection by KECA\")\n", 78 | "plt.xlabel(\"1st principal component in space induced by $\\phi$\")\n", 79 | "plt.ylabel(\"2nd component\")\n", 80 | "\n", 81 | "plt.subplot(2, 2, 4, aspect='equal')\n", 82 | "plt.plot(X[reds, 0], X[reds, 1], \"ro\")\n", 83 | "plt.plot(X[blues, 0], X[blues, 1], \"bo\")\n", 84 | "plt.title(\"Original space after inverse transform\")\n", 85 | "plt.xlabel(\"$x_1$\")\n", 86 | "plt.ylabel(\"$x_2$\")\n", 87 | "\n", 88 | "plt.subplots_adjust(0.02, 0.10, 0.98, 0.94, 0.04, 0.35)\n", 89 | "plt.show()" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": null, 95 | "metadata": { 96 | "collapsed": true 97 | }, 98 | "outputs": [], 99 | "source": [] 100 | } 101 | ], 102 | "metadata": { 103 | "kernelspec": { 104 | "display_name": "Python 3", 105 | "language": "python", 106 | "name": "python3" 107 | } 108 | }, 109 | "nbformat": 4, 110 | "nbformat_minor": 0 111 | } 112 | -------------------------------------------------------------------------------- /kernel_eca/kernel_eca.py: -------------------------------------------------------------------------------- 1 | """Kernel Entropy Components Analysis""" 2 | 3 | # Author: Tobias Sterbak 4 | # License: BSD 3 clause 5 | 6 | import numpy as np 7 | from scipy import linalg 8 | 9 | from sklearn.utils import check_random_state 10 | from sklearn.utils.arpack import eigsh 11 | from sklearn.utils.validation import check_is_fitted 12 | from sklearn.base import BaseEstimator, TransformerMixin 13 | from sklearn.preprocessing import KernelCenterer 14 | from sklearn.metrics.pairwise import pairwise_kernels 15 | 16 | class KernelECA(BaseEstimator, TransformerMixin): 17 | """Kernel Entropy component analysis (KECA) 18 | 19 | Non-linear dimensionality reduction through the use of kernels (see 20 | :ref:`metrics`). 21 | 22 | Parameters 23 | ---------- 24 | n_components: int or None 25 | Number of components. If None, all non-zero components are kept. 26 | 27 | kernel: "linear" | "poly" | "rbf" | "sigmoid" | "cosine" | "precomputed" 28 | Kernel. 29 | Default: "linear" 30 | 31 | degree : int, default=3 32 | Degree for poly kernels. Ignored by other kernels. 33 | 34 | gamma : float, optional 35 | Kernel coefficient for rbf and poly kernels. Default: 1/n_features. 36 | Ignored by other kernels. 37 | 38 | coef0 : float, optional 39 | Independent term in poly and sigmoid kernels. 40 | Ignored by other kernels. 41 | 42 | kernel_params : mapping of string to any, optional 43 | Parameters (keyword arguments) and values for kernel passed as 44 | callable object. Ignored by other kernels. 45 | 46 | 47 | eigen_solver: string ['auto'|'dense'|'arpack'] 48 | Select eigensolver to use. If n_components is much less than 49 | the number of training samples, arpack may be more efficient 50 | than the dense eigensolver. 51 | 52 | tol: float 53 | convergence tolerance for arpack. 54 | Default: 0 (optimal value will be chosen by arpack) 55 | 56 | max_iter : int 57 | maximum number of iterations for arpack 58 | Default: None (optimal value will be chosen by arpack) 59 | 60 | random_state : int seed, RandomState instance, or None, default : None 61 | A pseudo random number generator used for the initialization of the 62 | residuals when eigen_solver == 'arpack'. 63 | 64 | Attributes 65 | ---------- 66 | 67 | lambdas_ : 68 | Eigenvalues of the centered kernel matrix 69 | 70 | alphas_ : 71 | Eigenvectors of the centered kernel matrix 72 | 73 | dual_coef_ : 74 | Inverse transform matrix 75 | 76 | X_transformed_fit_ : 77 | Projection of the fitted data on the kernel entropy components 78 | 79 | References 80 | ---------- 81 | Kernel ECA based on: 82 | (c) Robert Jenssen, University of Tromso, Norway, 2010 83 | R. Jenssen, "Kernel Entropy Component Analysis," 84 | IEEE Trans. Patt. Anal. Mach. Intel., 32(5), 847-860, 2010. 85 | 86 | """ 87 | 88 | def __init__(self, n_components=None, kernel="linear", 89 | gamma=None, degree=3, coef0=1, kernel_params=None, eigen_solver='auto', 90 | tol=0, max_iter=None, random_state=None,center=False): 91 | self.n_components = n_components 92 | self._kernel = kernel 93 | self.kernel_params = kernel_params 94 | self.gamma = gamma 95 | self.degree = degree 96 | self.coef0 = coef0 97 | self.eigen_solver = eigen_solver 98 | self.tol = tol 99 | self.max_iter = max_iter 100 | self.random_state = random_state 101 | self._centerer = KernelCenterer() 102 | self.center = center 103 | 104 | @property 105 | def _pairwise(self): 106 | return self.kernel == "precomputed" 107 | 108 | def _get_kernel(self, X, Y=None): 109 | if callable(self._kernel): 110 | params = self.kernel_params or {} 111 | else: 112 | params = {"gamma": self.gamma, 113 | "degree": self.degree, 114 | "coef0": self.coef0} 115 | return pairwise_kernels(X, Y, metric=self._kernel, 116 | filter_params=True, **params) 117 | 118 | def _fit_transform(self, K): 119 | """ Fit's using kernel K""" 120 | # center kernel 121 | if self.center == True: 122 | K = self._centerer.fit_transform(K) 123 | 124 | X_transformed = self.kernelECA(K=K) 125 | self.X_transformed = X_transformed 126 | return K 127 | 128 | def fit(self, X, y=None): 129 | """Fit the model from data in X. 130 | 131 | Parameters 132 | ---------- 133 | X: array-like, shape (n_samples, n_features) 134 | Training vector, where n_samples in the number of samples 135 | and n_features is the number of features. 136 | 137 | Returns 138 | ------- 139 | self : object 140 | Returns the instance itself. 141 | """ 142 | K = self._get_kernel(X) 143 | self._fit_transform(K) 144 | 145 | self.X_fit_ = X 146 | return self 147 | 148 | def fit_transform(self, X, y=None, **params): 149 | """Fit the model from data in X and transform X. 150 | 151 | Parameters 152 | ---------- 153 | X: array-like, shape (n_samples, n_features) 154 | Training vector, where n_samples in the number of samples 155 | and n_features is the number of features. 156 | 157 | Returns 158 | ------- 159 | X_new: array-like, shape (n_samples, n_components) 160 | """ 161 | self.fit(X, **params) 162 | 163 | X_transformed= self.X_transformed 164 | 165 | return X_transformed 166 | 167 | def transform(self, X): 168 | """Transform X. 169 | 170 | Parameters 171 | ---------- 172 | X: array-like, shape (n_samples, n_features) 173 | 174 | Returns 175 | ------- 176 | X_new: array-like, shape (n_samples, n_components) 177 | """ 178 | check_is_fitted(self, 'X_fit_') 179 | 180 | K = self._centerer.transform(self._get_kernel(X, self.X_fit_)) 181 | return np.dot(K, self.alphas_ / np.sqrt(self.lambdas_)) 182 | 183 | def inverse_transform(self, X): 184 | raise NotImplementedError("Function inverse_transform is not implemented.") 185 | 186 | # here are the helper functions => to integrate in the code! 187 | def kernelECA(self,K): 188 | if self.n_components is None: 189 | n_components = K.shape[0] 190 | else: 191 | n_components = min(K.shape[0], self.n_components) 192 | 193 | # compute eigenvectors 194 | self.lambdas_, self.alphas_ = linalg.eigh(K) 195 | 196 | d = self.lambdas_ 197 | E = self.alphas_ 198 | # sort eigenvectors in descending order 199 | D,E = self.sort_eigenvalues(d,E) 200 | 201 | d = np.diag(D) 202 | sorted_entropy_index,entropy = self.ECA(D,E) 203 | Es = E[:,sorted_entropy_index] 204 | ds = d[sorted_entropy_index] 205 | 206 | Phi = np.zeros((K.shape[0],n_components)) 207 | for i in range(n_components): 208 | Phi[:,i] = np.sqrt(ds[i]) * Es[:,i] 209 | 210 | X_transformed = Phi 211 | 212 | return X_transformed 213 | 214 | def sort_eigenvalues(self,D,E): 215 | d = D 216 | indices = np.argsort(d)[::-1] 217 | 218 | d = d[indices] 219 | D = np.zeros((len(d),len(d))) 220 | for i in range(len(d)): 221 | D[i,i] = d[i] 222 | E = E[:,indices] 223 | 224 | return D,E 225 | 226 | def ECA(self,D,E): 227 | N = E.shape[0] 228 | entropy = np.multiply(np.diag(D).T , (np.dot(np.ones((1,N)),E))**2)[0] 229 | indices = np.argsort(entropy)[::-1] 230 | entropy = entropy[indices] 231 | return indices,entropy --------------------------------------------------------------------------------