├── .gitignore ├── .travis.yml ├── Example.ipynb ├── LICENSE ├── MANIFEST.in ├── README.md ├── lpproj ├── __init__.py ├── lpproj.py └── tests │ ├── __init__.py │ ├── test_eigh_robust.py │ └── test_lpp.py ├── setup.cfg └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | MANIFEST 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | 47 | # Translations 48 | *.mo 49 | *.pot 50 | 51 | # Django stuff: 52 | *.log 53 | 54 | # Sphinx documentation 55 | docs/_build/ 56 | 57 | # PyBuilder 58 | target/ 59 | 60 | 61 | # emacs 62 | *~ 63 | .#* 64 | 65 | 66 | # notebook 67 | .ipynb_checkpoints -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | # Setting sudo to false opts in to Travis-CI container-based builds. 4 | sudo: false 5 | 6 | python: 7 | - 2.7 8 | - 3.4 9 | - 3.5 10 | 11 | env: 12 | - CONDA_DEPS="numpy scipy nose scikit-learn" 13 | 14 | install: 15 | - conda create -n testenv --yes python=$TRAVIS_PYTHON_VERSION 16 | - source activate testenv 17 | - conda install --yes $CONDA_DEPS 18 | - python setup.py install 19 | 20 | before_install: 21 | - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh 22 | - chmod +x miniconda.sh 23 | - ./miniconda.sh -b 24 | - export PATH=/home/travis/miniconda3/bin:$PATH 25 | # miniconda is not always up-to-date with conda. 26 | - conda update --yes conda 27 | 28 | script: 29 | - nosetests --with-doctest lpproj 30 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016, Jake Vanderplas 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | * Neither the name of lpproj nor the names of its 15 | contributors may be used to endorse or promote products derived from 16 | this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include *.md 2 | include *.py 3 | include *.cfg 4 | include Example.ipynb 5 | recursive-include lpproj *.py 6 | include LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Locality Preserving Projections in Python 2 | 3 | This is a Python implementation of Locality Preserving Projections (LPP), compatible with scikit-learn. 4 | 5 | [![version status](http://img.shields.io/pypi/v/lpproj.svg?style=flat)](https://pypi.python.org/pypi/lpproj) 6 | [![downloads](http://img.shields.io/pypi/dm/lpproj.svg?style=flat)](https://pypi.python.org/pypi/lpproj) 7 | [![build status](http://img.shields.io/travis/jakevdp/lpproj/master.svg?style=flat)](https://travis-ci.org/jakevdp/lpproj) 8 | [![license](http://img.shields.io/badge/license-BSD-blue.svg?style=flat)](https://github.com/jakevdp/lpproj/blob/master/LICENSE) 9 | 10 | ## Installation and Use 11 | 12 | This package is pure Python, and depends only on [numpy](http://numpy.org/), [scipy](http://scipy.org/), and [scikit-learn](http://scikit-learn.org/). 13 | The released version can be installed via pip: 14 | 15 | $ pip install lpproj 16 | 17 | A basic example of its use can be found in [Example.ipynb](Example.ipynb); the library follows scikit-learn's patterns for ``fit()``, ``transform()``, and ``fit_transform()``. The simplest example for a feature matrix ``X``: 18 | 19 | ```python 20 | from lpproj import LocalityPreservingProjection 21 | lpp = LocalityPreservingProjection(n_components=2) 22 | 23 | X_2D = lpp.fit_transform(X) 24 | ``` 25 | 26 | 27 | ## More Information 28 | 29 | For information on the LPP algorithm, see the paper, [Locality Preserving Projections (pdf)](http://papers.nips.cc/paper/2359-locality-preserving-projections.pdf). 30 | -------------------------------------------------------------------------------- /lpproj/__init__.py: -------------------------------------------------------------------------------- 1 | """Python implementation of Locality Preserving Projections""" 2 | from __future__ import absolute_import 3 | 4 | from .lpproj import LocalityPreservingProjection 5 | 6 | __version__ = '0.2.dev0' 7 | -------------------------------------------------------------------------------- /lpproj/lpproj.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy import linalg 3 | 4 | from sklearn.neighbors import kneighbors_graph, NearestNeighbors 5 | from sklearn.utils import check_array 6 | from sklearn.base import BaseEstimator, TransformerMixin 7 | 8 | 9 | class LocalityPreservingProjection(BaseEstimator, TransformerMixin): 10 | """Locality Preserving Projection 11 | 12 | Parameters 13 | ---------- 14 | n_components : integer 15 | number of coordinates for the manifold 16 | 17 | n_neighbors : integer 18 | number of neighbors to consider for each point. 19 | 20 | weight : string ['adjacency'|'heat'] 21 | Weight function to use for the mapping 22 | 23 | weight_width : float 24 | Width of the heat kernel for building the weight matrix. 25 | Only referenced if weights == 'heat' 26 | 27 | neighbors_algorithm : string ['auto'|'brute'|'kd_tree'|'ball_tree'] 28 | Algorithm to use for nearest neighbors search, 29 | passed to neighbors.NearestNeighbors instance. 30 | 31 | Attributes 32 | ---------- 33 | projection_ : array-like, shape (n_features, n_components) 34 | Linear projection matrix for the embedding 35 | """ 36 | def __init__(self, n_components=2, n_neighbors=5, 37 | weight='adjacency', weight_width=1.0, 38 | neighbors_algorithm='auto'): 39 | # TODO: allow radius neighbors 40 | # TODO: allow for precomputed weights 41 | self.n_components = n_components 42 | self.n_neighbors = n_neighbors 43 | self.weight = weight 44 | self.weight_width = weight_width 45 | self.neighbors_algorithm = neighbors_algorithm 46 | 47 | def fit(self, X, y=None): 48 | X = check_array(X) 49 | W = self._compute_weights(X) 50 | self.projection_ = self._compute_projection(X, W) 51 | return self 52 | 53 | def transform(self, X): 54 | X = check_array(X) 55 | return np.dot(X, self.projection_) 56 | 57 | def _compute_projection(self, X, W): 58 | """Compute the LPP projection matrix 59 | 60 | Parameters 61 | ---------- 62 | X : array_like, (n_samples, n_features) 63 | The input data 64 | W : array_like or sparse matrix, (n_samples, n_samples) 65 | The precomputed adjacency matrix 66 | 67 | Returns 68 | ------- 69 | P : ndarray, (n_features, self.n_components) 70 | The matrix encoding the locality preserving projection 71 | """ 72 | # TODO: check W input; handle sparse case 73 | X = check_array(X) 74 | 75 | D = np.diag(W.sum(1)) 76 | L = D - W 77 | evals, evecs = eigh_robust(np.dot(X.T, np.dot(L, X)), 78 | np.dot(X.T, np.dot(D, X)), 79 | eigvals=(0, self.n_components - 1)) 80 | return evecs 81 | 82 | def _compute_weights(self, X): 83 | X = check_array(X) 84 | self.nbrs_ = NearestNeighbors(n_neighbors=self.n_neighbors, 85 | algorithm=self.neighbors_algorithm) 86 | self.nbrs_.fit(X) 87 | 88 | if self.weight == 'adjacency': 89 | W = kneighbors_graph(self.nbrs_, self.n_neighbors, 90 | mode='connectivity', include_self=True) 91 | elif self.weight == 'heat': 92 | W = kneighbors_graph(self.nbrs_, self.n_neighbors, 93 | mode='distance', include_self=True) 94 | W.data = np.exp(-W.data ** 2 / self.weight_width ** 2) 95 | else: 96 | raise ValueError("Unrecognized Weight") 97 | 98 | # symmetrize the matrix 99 | # TODO: make this more efficient & keep sparse output 100 | W = W.toarray() 101 | W = np.maximum(W, W.T) 102 | return W 103 | 104 | 105 | def eigh_robust(a, b=None, eigvals=None, eigvals_only=False, 106 | overwrite_a=False, overwrite_b=False, 107 | turbo=True, check_finite=True): 108 | """Robustly solve the Hermitian generalized eigenvalue problem 109 | 110 | This function robustly solves the Hermetian generalized eigenvalue problem 111 | ``A v = lambda B v`` in the case that B is not strictly positive definite. 112 | When B is strictly positive-definite, the result is equivalent to 113 | scipy.linalg.eigh() within floating-point accuracy. 114 | 115 | Parameters 116 | ---------- 117 | a : (M, M) array_like 118 | A complex Hermitian or real symmetric matrix whose eigenvalues and 119 | eigenvectors will be computed. 120 | b : (M, M) array_like, optional 121 | A complex Hermitian or real symmetric matrix. 122 | If omitted, identity matrix is assumed. 123 | eigvals : tuple (lo, hi), optional 124 | Indexes of the smallest and largest (in ascending order) eigenvalues 125 | and corresponding eigenvectors to be returned: 0 <= lo <= hi <= M-1. 126 | If omitted, all eigenvalues and eigenvectors are returned. 127 | eigvals_only : bool, optional 128 | Whether to calculate only eigenvalues and no eigenvectors. 129 | (Default: both are calculated) 130 | turbo : bool, optional 131 | Use divide and conquer algorithm (faster but expensive in memory, 132 | only for generalized eigenvalue problem and if eigvals=None) 133 | overwrite_a : bool, optional 134 | Whether to overwrite data in `a` (may improve performance) 135 | overwrite_b : bool, optional 136 | Whether to overwrite data in `b` (may improve performance) 137 | check_finite : bool, optional 138 | Whether to check that the input matrices contain only finite numbers. 139 | Disabling may give a performance gain, but may result in problems 140 | (crashes, non-termination) if the inputs do contain infinities or NaNs. 141 | 142 | Returns 143 | ------- 144 | w : (N,) float ndarray 145 | The N (1<=N<=M) selected eigenvalues, in ascending order, each 146 | repeated according to its multiplicity. 147 | v : (M, N) complex ndarray 148 | (if eigvals_only == False) 149 | """ 150 | kwargs = dict(eigvals=eigvals, eigvals_only=eigvals_only, 151 | turbo=turbo, check_finite=check_finite, 152 | overwrite_a=overwrite_a, overwrite_b=overwrite_b) 153 | 154 | # Check for easy case first: 155 | if b is None: 156 | return linalg.eigh(a, **kwargs) 157 | 158 | # Compute eigendecomposition of b 159 | kwargs_b = dict(turbo=turbo, check_finite=check_finite, 160 | overwrite_a=overwrite_b) # b is a for this operation 161 | S, U = linalg.eigh(b, **kwargs_b) 162 | 163 | # Combine a and b on left hand side via decomposition of b 164 | S[S <= 0] = np.inf 165 | Sinv = 1. / np.sqrt(S) 166 | W = Sinv[:, None] * np.dot(U.T, np.dot(a, U)) * Sinv 167 | output = linalg.eigh(W, **kwargs) 168 | 169 | if eigvals_only: 170 | return output 171 | else: 172 | evals, evecs = output 173 | return evals, np.dot(U, Sinv[:, None] * evecs) 174 | -------------------------------------------------------------------------------- /lpproj/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakevdp/lpproj/9c9042b0c2d16c153b53dcc0a759c7fe8c272176/lpproj/tests/__init__.py -------------------------------------------------------------------------------- /lpproj/tests/test_eigh_robust.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.testing import assert_allclose 3 | 4 | from scipy import linalg 5 | from ..lpproj import eigh_robust 6 | 7 | def sym_matrix(N, rand): 8 | A = rand.rand(N, N) 9 | return np.dot(A, A.T) 10 | 11 | def assert_allclose_evecs(evecs1, evecs2): 12 | """Check that eigenvectors are equal up to a sign in each column""" 13 | sign = np.sign((evecs1 * evecs2).sum(0)) 14 | assert_allclose(evecs1, evecs2 * sign) 15 | 16 | 17 | def test_eigh_robust_basic(): 18 | rand = np.random.RandomState(42) 19 | 20 | A = sym_matrix(10, rand) 21 | B = sym_matrix(10, rand) 22 | 23 | # Eigenvalues and vectors 24 | evals1, evecs1 = linalg.eigh(A, B) 25 | evals2, evecs2 = eigh_robust(A, B) 26 | assert_allclose_evecs(evecs1, evecs2) 27 | assert_allclose(evals1, evals2) 28 | 29 | # Eigenvectors only 30 | evals2 = eigh_robust(A, B, eigvals_only=True) 31 | assert_allclose(evals1, evals2) 32 | 33 | 34 | def test_eigh_robust_overwrite(): 35 | rand = np.random.RandomState(42) 36 | 37 | def check_results(rand, overwrite_a, overwrite_b): 38 | A = sym_matrix(10, rand) 39 | B = sym_matrix(10, rand) 40 | kwargs = dict(overwrite_a=overwrite_a, 41 | overwrite_b=overwrite_b) 42 | evals1, evecs1 = linalg.eigh(A.copy(), B.copy(), **kwargs) 43 | evals2, evecs2 = eigh_robust(A.copy(), B.copy(), **kwargs) 44 | assert_allclose_evecs(evecs1, evecs2) 45 | assert_allclose(evals1, evals2) 46 | 47 | for overwrite_a in [True, False]: 48 | for overwrite_b in [True, False]: 49 | yield check_results, rand, overwrite_a, overwrite_b 50 | 51 | 52 | def test_eigh_robust_eigval_selection(): 53 | rand = np.random.RandomState(42) 54 | A = sym_matrix(10, rand) 55 | B = sym_matrix(10, rand) 56 | 57 | def check_results(eigvals): 58 | evals1, evecs1 = linalg.eigh(A, B, eigvals=eigvals) 59 | evals2, evecs2 = eigh_robust(A, B, eigvals=eigvals) 60 | assert_allclose_evecs(evecs1, evecs2) 61 | assert_allclose(evals1, evals2) 62 | 63 | for eigvals in [(0, 9), (0, 2), (8, 9), (4, 6)]: 64 | yield check_results, eigvals 65 | 66 | 67 | def test_eigh_robust_turbo(): 68 | rand = np.random.RandomState(42) 69 | A = sym_matrix(10, rand) 70 | B = sym_matrix(10, rand) 71 | 72 | def check_results(turbo): 73 | evals1, evecs1 = linalg.eigh(A, B, turbo=turbo) 74 | evals2, evecs2 = eigh_robust(A, B, turbo=turbo) 75 | assert_allclose_evecs(evecs1, evecs2) 76 | assert_allclose(evals1, evals2) 77 | 78 | for turbo in [True, False]: 79 | yield check_results, turbo 80 | 81 | def test_non_generalized(): 82 | rand = np.random.RandomState(42) 83 | A = sym_matrix(10, rand) 84 | 85 | evals1, evecs1 = eigh_robust(A) 86 | evals2, evecs2 = linalg.eigh(A) 87 | 88 | assert_allclose_evecs(evecs1, evecs2) 89 | assert_allclose(evals1, evals2) 90 | -------------------------------------------------------------------------------- /lpproj/tests/test_lpp.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.testing import assert_equal, assert_allclose, assert_raises 3 | from sklearn.datasets import make_blobs 4 | 5 | from .. import LocalityPreservingProjection 6 | 7 | 8 | def test_estimator_checks(): 9 | """Run scikit-learn's suite of basic estimator checks""" 10 | from sklearn.utils.estimator_checks import check_estimator 11 | check_estimator(LocalityPreservingProjection) 12 | 13 | 14 | def test_bad_weights(): 15 | lpp = LocalityPreservingProjection(weight='bad_argument') 16 | assert_raises(ValueError, lpp.fit, np.zeros((10, 2))) 17 | 18 | 19 | def test_lpp_transform(): 20 | # just a smoketest 21 | X = np.random.rand(100, 2) 22 | 23 | lpp = LocalityPreservingProjection(n_components=1) 24 | lpp = lpp.fit(X) 25 | Xproj = lpp.transform(X) 26 | assert_equal(Xproj.shape, X[:, :1].shape) 27 | 28 | Xproj = lpp.fit_transform(X) 29 | assert_equal(Xproj.shape, X[:, :1].shape) 30 | 31 | 32 | def test_line_plus_outlier(): 33 | # test that a basic line plus outlier is projected as expected 34 | # cf. figure 1 in the LPP paper 35 | X = np.zeros((100, 2)) 36 | X[:, 0] = np.arange(100) 37 | X[0, 1] = 30 38 | 39 | lpp = LocalityPreservingProjection(n_components=2) 40 | Xlpp = lpp.fit_transform(X) 41 | assert_allclose(Xlpp[1:, 1], 0, atol=1E-3) 42 | 43 | ratio = Xlpp[1:, 0] / X[1:, 0] 44 | assert_allclose(ratio, ratio[0]) 45 | 46 | 47 | def test_weights(): 48 | X, y = make_blobs(100, n_features=3, random_state=42) 49 | 50 | # with large enough weights, results should be equivalent to adjacency 51 | lpp1 = LocalityPreservingProjection(n_components=2, weight='adjacency') 52 | lpp2 = LocalityPreservingProjection(n_components=2, weight='heat', 53 | weight_width=1E6) 54 | 55 | assert_allclose(lpp1.fit_transform(X), 56 | lpp2.fit_transform(X)) 57 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import io 2 | import os 3 | import re 4 | 5 | from distutils.core import setup 6 | 7 | 8 | def read(path, encoding='utf-8'): 9 | path = os.path.join(os.path.dirname(__file__), path) 10 | with io.open(path, encoding=encoding) as fp: 11 | return fp.read() 12 | 13 | 14 | def version(path): 15 | """Obtain the packge version from a python file e.g. pkg/__init__.py 16 | 17 | See . 18 | """ 19 | version_file = read(path) 20 | version_match = re.search(r"""^__version__ = ['"]([^'"]*)['"]""", 21 | version_file, re.M) 22 | if version_match: 23 | return version_match.group(1) 24 | raise RuntimeError("Unable to find version string.") 25 | 26 | 27 | DESCRIPTION = "Python implementation of Locality Preserving Projections" 28 | LONG_DESCRIPTION = """ 29 | Locality Preserving Projections 30 | =============================== 31 | This is a scikit-learn compatible implementation of Locality Preserving 32 | Projections. 33 | 34 | For more information, see the github project page: 35 | http://github.com/jakevdp/lpproj 36 | """ 37 | NAME = "lpproj" 38 | AUTHOR = "Jake VanderPlas" 39 | AUTHOR_EMAIL = "jakevdp@uw.edu" 40 | MAINTAINER = "Jake VanderPlas" 41 | MAINTAINER_EMAIL = "jakevdp@uw.edu" 42 | URL = 'http://github.com/jakevdp/lpproj' 43 | DOWNLOAD_URL = 'http://github.com/jakevdp/lpproj' 44 | LICENSE = 'BSD' 45 | 46 | VERSION = version('lpproj/__init__.py') 47 | 48 | setup(name=NAME, 49 | version=VERSION, 50 | description=DESCRIPTION, 51 | long_description=LONG_DESCRIPTION, 52 | author=AUTHOR, 53 | author_email=AUTHOR_EMAIL, 54 | maintainer=MAINTAINER, 55 | maintainer_email=MAINTAINER_EMAIL, 56 | url=URL, 57 | download_url=DOWNLOAD_URL, 58 | license=LICENSE, 59 | packages=['lpproj', 60 | 'lpproj.tests', 61 | ], 62 | classifiers=[ 63 | 'Development Status :: 4 - Beta', 64 | 'Environment :: Console', 65 | 'Intended Audience :: Science/Research', 66 | 'License :: OSI Approved :: BSD License', 67 | 'Natural Language :: English', 68 | 'Programming Language :: Python :: 2.7', 69 | 'Programming Language :: Python :: 3.4', 70 | 'Programming Language :: Python :: 3.5'], 71 | ) 72 | --------------------------------------------------------------------------------