├── LICENSE
├── README
├── plot_sparse_filtering.py
├── setup.py
└── sparse_filtering.py


/LICENSE:
--------------------------------------------------------------------------------
 1 | New BSD License
 2 | 
 3 | Copyright (c) 2014 Jan Hendrik Metzen
 4 | All rights reserved.
 5 | 
 6 | 
 7 | Redistribution and use in source and binary forms, with or without
 8 | modification, are permitted provided that the following conditions are met:
 9 | 
10 |   a. Redistributions of source code must retain the above copyright notice,
11 |      this list of conditions and the following disclaimer.
12 |   b. Redistributions in binary form must reproduce the above copyright
13 |      notice, this list of conditions and the following disclaimer in the
14 |      documentation and/or other materials provided with the distribution.
15 |   c. Neither the name of the software's developer  nor the names of
16 |      its contributors may be used to endorse or promote products
17 |      derived from this software without specific prior written
18 |      permission. 
19 | 
20 | 
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 | ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR
25 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 | OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
31 | DAMAGE.
32 | 
33 | 


--------------------------------------------------------------------------------
/README:
--------------------------------------------------------------------------------
 1 | sparse-filtering
 2 | ================
 3 | 
 4 | Unsupervised feature learning based on sparse-filtering
 5 | 
 6 | This implements the method described `Jiquan Ngiam, Pang Wei Koh,
 7 | Zhenghao Chen, Sonia Bhaskar, Andrew Y. Ng:
 8 | Sparse Filtering. NIPS 2011: 1125-1133`
 9 | and is based on the Matlab code provided in the supplementary material
10 | 


--------------------------------------------------------------------------------
/plot_sparse_filtering.py:
--------------------------------------------------------------------------------
  1 | """
  2 | ===========================================
  3 | Sparse filtering on Olivetti faces
  4 | ===========================================
  5 | 
  6 | Unsupervised learning of features for images from the Olivetti faces dataset
  7 | using the sparse filtering algorithm. Linear features for sub-patches of the
  8 | Olivetti faces are learned using the sparse filtering algorithm. This algorithm
  9 | does not try to model the data's distribution but rather to learn features
 10 | which are sparsely activated (in the sense that for each image, only a small
 11 | subset of features is activated, that each feature is only activated on a small
 12 | subset of the examples, and that features are roughly activated equally often).
 13 | This sparsity is encoded as an objective function and L-BFGS is used to
 14 | minimize this function.
 15 | 
 16 | Plotted are the weight matrices of the features (corresponding roughly to gabor
 17 | filters) and feature activation histograms.
 18 | """
 19 | print(__doc__)
 20 | 
 21 | import numpy as np
 22 | import pylab as pl
 23 | 
 24 | from sparse_filtering import SparseFiltering
 25 | 
 26 | from sklearn.feature_extraction.image import extract_patches_2d
 27 | 
 28 | from sklearn.datasets import fetch_olivetti_faces
 29 | 
 30 | patch_width = 16  # Learn features for patches of size patch_width*patch_width
 31 | n_patches = 25  # Determines number of random patches extracted from each image
 32 | n_features = 64  # How many features are learned
 33 | maxfun = 200  # The maximal number of evaluations of the objective function
 34 | iprint = 10  # after how many function evaluations is information printed
 35 |              # by L-BFGS. -1 for no information
 36 | 
 37 | ###############################################################################
 38 | # Load faces data, normalize faces, and convert 2d structures
 39 | dataset = fetch_olivetti_faces(shuffle=True)
 40 | faces = dataset.data
 41 | 
 42 | n_samples, _ = faces.shape
 43 | 
 44 | faces_centered = faces - faces.mean(axis=0)  # global centering
 45 | 
 46 | faces_centered -= \
 47 |     faces_centered.mean(axis=1).reshape(n_samples, -1)  # local centering
 48 | 
 49 | faces_centered = \
 50 |     faces_centered.reshape(n_samples, 64, 64)  # Reshaping to 64*64 pixel
 51 | 
 52 | print("Dataset consists of %d faces" % n_samples)
 53 | 
 54 | ###############################################################################
 55 | # Extract n_patches patches randomly from each image
 56 | patches = [extract_patches_2d(faces_centered[i], (patch_width, patch_width),
 57 |                               max_patches=n_patches, random_state=i)
 58 |            for i in range(n_samples)]
 59 | patches = np.array(patches).reshape(-1, patch_width * patch_width)
 60 | 
 61 | ###############################################################################
 62 | estimator = \
 63 |     SparseFiltering(n_features=n_features, maxfun=maxfun, iprint=iprint)
 64 | features = estimator.fit_transform(patches)
 65 | 
 66 | # #############################################################################
 67 | # Plot weights of features
 68 | pl.figure(0, figsize=(12, 10))
 69 | pl.subplots_adjust(left=0.01, bottom=0.01, right=0.99, top=0.95,
 70 |                    wspace=0.1, hspace=0.4)
 71 | for i in range(estimator.w_.shape[0]):
 72 |     pl.subplot(int(np.sqrt(n_features)), int(np.sqrt(n_features)), i + 1)
 73 |     pl.pcolor(estimator.w_[i].reshape(patch_width, patch_width),
 74 |               cmap=pl.cm.gray)
 75 |     pl.xticks(())
 76 |     pl.yticks(())
 77 |     pl.title("Feature %4d" % i)
 78 | 
 79 | # Plot feature histogram
 80 | pl.figure(1)
 81 | pl.hist(features)
 82 | pl.title("Feature activation histogram")
 83 | 
 84 | # Plot Lifetime Sparsity histogram
 85 | # Lifetime Sparsity: Each feature should only be active for a few examples
 86 | pl.figure(2)
 87 | activated_features = (features > 0.1).mean(0)
 88 | pl.hist(activated_features)
 89 | pl.xlabel("Feature activation ratio over all examples")
 90 | pl.title("Lifetime Sparsity Histogram")
 91 | 
 92 | # Plot Population Sparsity histogram
 93 | # Population Sparsity: Each example should be represented by only a few active
 94 | #                      features
 95 | pl.figure(3)
 96 | activated_features = (features > 0.1).mean(1)
 97 | pl.hist(activated_features)
 98 | pl.xlabel("Ratio of active features in example")
 99 | pl.title("Population Sparsity Histogram")
100 | 
101 | pl.show()
102 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from distutils.core import setup
 4 | 
 5 | setup(name='sparse_filtering',
 6 |       version='1.1',
 7 |       description='Unsupervised feature learning based on sparse-filtering',
 8 |       author='Jan Hendrik Metzen',
 9 |       author_email='jhm@informatik.uni-bremen.de',
10 |       url='https://github.com/jmetzen/sparse-filtering',
11 |       py_modules = ['sparse_filtering']
12 |      )
13 | 


--------------------------------------------------------------------------------
/sparse_filtering.py:
--------------------------------------------------------------------------------
  1 | """Feature learning based on sparse filtering"""
  2 | # Author: Jan Hendrik Metzen
  3 | # License: BSD 3 clause
  4 | 
  5 | import numpy as np
  6 | from scipy.optimize import fmin_l_bfgs_b
  7 | 
  8 | from sklearn.base import BaseEstimator
  9 | 
 10 | 
 11 | class SparseFiltering(BaseEstimator):
 12 |     """Sparse filtering
 13 | 
 14 |     Unsupervised learning of features using the sparse filtering algorithm.
 15 |     Features are linear in the inputs, i.e., f_j(x) = \sum_i w_{ij}x_i
 16 |     This algorithm does not try to model the
 17 |     data's distribution but rather to learn features which are sparsely
 18 |     activated in the sense of
 19 |         * Population Sparsity: for each image, only a small subset of features
 20 |                                is activated
 21 |         * Lifetime Sparsity: each feature is only activated on a small
 22 |                              subset of the examples
 23 |         * High Dispersal: Uniform activity distribution of features.
 24 | 
 25 |     This is encoded as an objective function which maps the the weight vector w
 26 |     onto a scalar value which is the smaller the more sparse the features are.
 27 |     L-BFGS is used to minimize this objective function.
 28 | 
 29 |     Parameters
 30 |     ----------
 31 |     n_features : int,
 32 |         Number of features to be learned.
 33 | 
 34 |     maxfun : int,
 35 |         Maximum number of evaluations of the objective function in L-BFGS-B.
 36 |         Defaults to 500.
 37 | 
 38 |     iprint : int,
 39 |         Verbosity of the L-BFGS-B. Prints information regarding the objective
 40 |         function every iprint iterations. Does not print any information if set
 41 |         to -1. Defaults to -1.
 42 | 
 43 |     Attributes
 44 |     ----------
 45 |     `w_` : array, [n_features, n_inputs]
 46 |         Sparse components extracted from the data.
 47 | 
 48 |     Notes
 49 |     -----
 50 |     This implements the method described in `Jiquan Ngiam, Pang Wei Koh,
 51 |     Zhenghao Chen, Sonia Bhaskar, Andrew Y. Ng:
 52 |     Sparse Filtering. NIPS 2011: 1125-1133`
 53 |     and is based on the Matlab code provided in the supplementary material
 54 |     """
 55 | 
 56 |     def __init__(self, n_features, maxfun=500, iprint=-1):
 57 |         self.n_features = n_features
 58 |         self.iprint = iprint
 59 |         self.maxfun = maxfun
 60 | 
 61 |     def fit(self, X, y=None, **params):
 62 |         """Fit the model with X.
 63 | 
 64 |         Parameters
 65 |         ----------
 66 |         X: array-like, shape (n_samples, n_features)
 67 |             Training data, where n_samples in the number of samples
 68 |             and n_features is the number of features.
 69 | 
 70 |         Returns
 71 |         -------
 72 |         self : object
 73 |             Returns the instance itself.
 74 |         """
 75 |         self.w_ = self._fit(X, **params)
 76 |         return self
 77 | 
 78 |     def transform(self, X):
 79 |         return self._transform(X)
 80 | 
 81 |     def fit_transform(self, X, y=None, **params):
 82 |         """Fit the model with X and apply the dimensionality reduction on X.
 83 | 
 84 |         Parameters
 85 |         ----------
 86 |         X : array-like, shape (n_samples, n_features)
 87 |             Training data, where n_samples in the number of samples
 88 |             and n_features is the number of features.
 89 | 
 90 |         Returns
 91 |         -------
 92 |         X_new : array-like, shape (n_samples, n_components)
 93 | 
 94 |         """
 95 |         self.w_ = self._fit(X, **params)
 96 |         return self._transform(X)
 97 | 
 98 |     def _fit(self, X):
 99 |         # transpose data in order to be consistent with the Matlab code
100 |         X = np.array(X.T)
101 |         # substract the mean from each image patch
102 |         X -= X.mean(0)
103 | 
104 |         def objective_fct(w):
105 |             # View 1d weight vector as a 2d matrix
106 |             W = w.reshape(self.n_features, X.shape[0])
107 | 
108 |             # Determine features resulting from weight vector
109 |             F, Fs, L2Fs, NFs, L2Fn, Fhat = self._determine_features(X, W)
110 | 
111 |             # Compute sparsity of each feature over all example, i.e., compute
112 |             # its l1-norm; the objective function is the sum over these
113 |             # sparsities
114 |             obj = np.apply_along_axis(np.linalg.norm, 1, Fhat, 1).sum()
115 |             # Backprop through each feedforward step
116 |             deltaW = l2grad(NFs.T, Fhat, L2Fn, np.ones_like(Fhat))
117 |             deltaW = l2grad(Fs, NFs, L2Fs, deltaW.T)
118 |             deltaW = (deltaW * (F / Fs)).dot(X.T)
119 | 
120 |             # Return objective value and gradient
121 |             return obj, deltaW.flatten()
122 | 
123 |         def l2grad(X, Y, N, D):
124 |             # Backpropagate through normalization
125 |             return D / N[:, None] - Y \
126 |                 * (D * X).sum(1)[:, None] / (N ** 2)[:, None]
127 | 
128 |         # Choose initial weights randomly
129 |         w0 = np.random.random(X.shape[0] * self.n_features) * 2 - 1
130 |         # Use L-BFGS to find weights which correspond to a (local) minimum of
131 |         # the objective function
132 |         w, s, d = fmin_l_bfgs_b(objective_fct, w0, iprint=self.iprint,
133 |                                 maxfun=self.maxfun)
134 | 
135 |         return w.reshape(self.n_features, X.shape[0])
136 | 
137 |     def _transform(self, X):
138 |         # transpose data in order to be consistent with the Matlab code
139 |         X = np.array(X.T)
140 |         # substract the mean from each image patch
141 |         X -= X.mean(0)
142 | 
143 |         W = self.w_.reshape(self.n_features, X.shape[0])
144 | 
145 |         # Determine features resulting from weight vector
146 |         # (ignore internals required for gradient)
147 |         _, _, _, _, _, Fhat = self._determine_features(X, W)
148 | 
149 |         return Fhat
150 | 
151 |     def _determine_features(self, X, W):
152 |         # Compute unnormalized features by multiplying weight matrix with
153 |         # data
154 |         F = W.dot(X)  # Linear Activation
155 |         Fs = np.sqrt(F ** 2 + 1e-8)  # Soft-Absolute Activation
156 |         # Normalize each feature to be equally active by dividing each
157 |         # feature by its l2-norm across all examples
158 |         L2Fs = np.apply_along_axis(np.linalg.norm, 1, Fs)
159 |         NFs = Fs / L2Fs[:, None]
160 |         # Normalize features per example, so that they lie on the unit
161 |         # l2-ball
162 |         L2Fn = np.apply_along_axis(np.linalg.norm, 1, NFs.T)
163 |         Fhat = NFs.T / L2Fn[:, None]
164 | 
165 |         return F, Fs, L2Fs, NFs, L2Fn, Fhat
166 | 


--------------------------------------------------------------------------------