├── toy_example.png ├── README.md ├── loss.py ├── .gitignore ├── fista.py ├── LICENSE ├── test_pyowl.py ├── plot_toy_example.py └── pyowl.py /toy_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vene/pyowl/HEAD/toy_example.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pyowl: Ordered Weighted L1 Regularization in Python 2 | 3 | ![OWL vs Lasso example](toy_example.png?raw=true "OWL vs Lasso example") 4 | 5 | The OWL norm generalizes L1, L_inf and OSCAR. In particular, OSCAR selects 6 | coefficients in groups with equal values, therefore handling highly 7 | correlated features in a robust way. 8 | 9 | Also known as Sorted L1 norm or SLOPE. 10 | 11 | This implementation manages to be very short thanks to the awesome scientific 12 | python ecosystem. 13 | -------------------------------------------------------------------------------- /loss.py: -------------------------------------------------------------------------------- 1 | # Author: Vlad Niculae 2 | # License: BSD 3 clause 3 | 4 | import numpy as np 5 | 6 | 7 | def squared_loss(y_true, y_pred, return_derivative=False): 8 | diff = y_pred - y_true 9 | obj = 0.5 * np.dot(diff, diff) 10 | if return_derivative: 11 | return obj, diff 12 | else: 13 | return obj 14 | 15 | 16 | def squared_hinge_loss(y_true, y_scores, return_derivative=False): 17 | # labels in (-1, 1) 18 | z = np.maximum(0, 1 - y_true * y_scores) 19 | obj = np.sum(z ** 2) 20 | 21 | if return_derivative: 22 | return obj, -2 * y_true * z 23 | else: 24 | return obj 25 | 26 | 27 | def get_loss(name): 28 | losses = {'squared': squared_loss, 29 | 'squared-hinge': squared_hinge_loss} 30 | return losses[name] 31 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # IPython Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # dotenv 79 | .env 80 | 81 | # virtualenv 82 | venv/ 83 | ENV/ 84 | 85 | # Spyder project settings 86 | .spyderproject 87 | 88 | # Rope project settings 89 | .ropeproject 90 | -------------------------------------------------------------------------------- /fista.py: -------------------------------------------------------------------------------- 1 | """ 2 | Efficient implementation of FISTA. 3 | """ 4 | 5 | # Author: Mathieu Blondel 6 | # License: BSD 3 clause 7 | # based on https://gist.github.com/mblondel/5105786d740693a6996bcb8e482c7083 8 | 9 | import numpy as np 10 | 11 | 12 | def fista(sfunc, nsfunc, x0, max_iter=500, max_linesearch=20, eta=2.0, tol=1e-3, 13 | verbose=0): 14 | 15 | y = x0.copy() 16 | x = y 17 | L = 1.0 18 | t = 1.0 19 | 20 | for it in range(max_iter): 21 | f_old, grad = sfunc(y, True) 22 | 23 | for ls in range(max_linesearch): 24 | y_proj = nsfunc(y - grad / L, L) 25 | diff = (y_proj - y).ravel() 26 | sqdist = np.dot(diff, diff) 27 | dist = np.sqrt(sqdist) 28 | 29 | F = sfunc(y_proj) 30 | Q = f_old + np.dot(diff, grad.ravel()) + 0.5 * L * sqdist 31 | 32 | if F <= Q: 33 | break 34 | 35 | L *= eta 36 | 37 | if ls == max_linesearch - 1 and verbose: 38 | print("Line search did not converge.") 39 | 40 | if verbose: 41 | print("%d. %f" % (it + 1, dist)) 42 | 43 | if dist <= tol: 44 | if verbose: 45 | print("Converged.") 46 | break 47 | 48 | x_next = y_proj 49 | t_next = (1 + np.sqrt(1 + 4 * t ** 2)) / 2. 50 | y = x_next + (t-1) / t_next * (x_next - x) 51 | t = t_next 52 | x = x_next 53 | 54 | return y_proj 55 | 56 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2017, Vlad Niculae 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /test_pyowl.py: -------------------------------------------------------------------------------- 1 | # Author: Vlad Niculae 2 | # License: BSD 3 clause 3 | 4 | import numpy as np 5 | from numpy.testing import assert_array_almost_equal 6 | from pyowl import prox_owl 7 | 8 | rng = np.random.RandomState(0) 9 | 10 | # cf. scikit-learn-contrib/lightning impl/penalty.py 11 | def project_simplex(v, z=1): 12 | if np.sum(v) <= z: 13 | return v 14 | 15 | n_features = v.shape[0] 16 | u = np.sort(v)[::-1] 17 | cssv = np.cumsum(u) - z 18 | ind = np.arange(n_features) + 1 19 | cond = u - cssv / ind > 0 20 | rho = ind[cond][-1] 21 | theta = cssv[cond][-1] / float(rho) 22 | w = np.maximum(v - theta, 0) 23 | return w 24 | 25 | 26 | # cf. scikit-learn-contrib/lightning impl/penalty.py 27 | def project_l1_ball(v, z=1): 28 | return np.sign(v) * project_simplex(np.abs(v), z) 29 | 30 | 31 | def prox_linf(v, alpha): 32 | # cf. Proximal Algorithms, Parikh & Boyd, eq. 6.8 33 | # dual ball B is the L1 ball 34 | 35 | p = project_l1_ball(v / alpha) 36 | return v - alpha * p 37 | 38 | 39 | 40 | def test_prox_special_cases(): 41 | for _ in range(20): 42 | v = rng.randn(10) 43 | alpha = rng.uniform(0.001, 1) 44 | 45 | # l1 proximal operator 46 | z_expected = np.maximum(0, v - alpha) 47 | z_expected -= np.maximum(0, -v - alpha) 48 | z_obtained = prox_owl(v, alpha * np.ones_like(v)) 49 | 50 | assert_array_almost_equal(z_expected, z_obtained) 51 | 52 | # l_inf proximal operator 53 | z_expected = prox_linf(v, alpha) 54 | w = np.zeros_like(v) 55 | w[0] = alpha 56 | z_obtained = prox_owl(v, w) 57 | assert_array_almost_equal(z_expected, z_obtained) 58 | -------------------------------------------------------------------------------- /plot_toy_example.py: -------------------------------------------------------------------------------- 1 | """ OWL vs LASSO on a known correlated design. 2 | 3 | Reproduces figure 1 from Figueiredo and Nowak, 4 | Ordered Weighted L1 Regularized Regression with Strongly 5 | Correlated Covariates: Theoretical Aspects. 6 | http://www.jmlr.org/proceedings/papers/v51/figueiredo16.pdf 7 | """ 8 | 9 | # Author: Vlad Niculae 10 | # License: BSD 3 clause 11 | 12 | 13 | import numpy as np 14 | import matplotlib.pyplot as plt 15 | from sklearn.linear_model import Lasso 16 | from pyowl import OwlRegressor 17 | 18 | n_samples = 10 19 | n_features = 100 20 | 21 | coef = np.zeros(n_features) 22 | coef[20:30] = -1 23 | coef[60:70] = 1 24 | coef /= np.linalg.norm(coef) 25 | 26 | rng = np.random.RandomState(1) 27 | X = rng.randn(n_samples, n_features) 28 | X[:, 20:30] = X[:, 20] 29 | X[:, 60:70] = X[:, 20] 30 | X += 0.001 * rng.randn(n_samples, n_features) 31 | X /= np.linalg.norm(X, axis=0) 32 | y = np.dot(X, coef) 33 | 34 | plt.figure() 35 | 36 | # ground truth: 37 | plt.subplot(221) 38 | plt.stem(np.arange(n_features), coef) 39 | plt.title("True coefficients") 40 | 41 | alpha = 0.0001 42 | beta = 0.01 # only in OWL 43 | 44 | # scikit-learn LASSO 45 | plt.subplot(222) 46 | lasso_skl = Lasso(alpha=alpha / (2 * n_samples), fit_intercept=False) 47 | lasso_skl.fit(X, y) 48 | plt.stem(np.arange(n_features), lasso_skl.coef_) 49 | plt.title("LASSO coefficients (scikit-learn)") 50 | 51 | # pyowl lasso 52 | plt.subplot(223) 53 | lasso_owl = OwlRegressor(weights=np.ones(n_features) * alpha) 54 | lasso_owl.fit(X, y) 55 | plt.stem(np.arange(n_features), lasso_owl.coef_) 56 | plt.title("LASSO coefficients (pyowl)") 57 | 58 | # pyowl lasso 59 | plt.subplot(224) 60 | oscar_owl = OwlRegressor(weights=(alpha, beta)) 61 | oscar_owl.fit(X, y) 62 | plt.stem(np.arange(n_features), oscar_owl.coef_) 63 | plt.title("OSCAR coefficients (pyowl)") 64 | 65 | plt.tight_layout() 66 | plt.savefig("toy_example.png") 67 | 68 | -------------------------------------------------------------------------------- /pyowl.py: -------------------------------------------------------------------------------- 1 | # Author: Vlad Niculae 2 | # License: BSD 3 clause 3 | 4 | from __future__ import print_function 5 | from __future__ import division 6 | 7 | import numpy as np 8 | 9 | from sklearn.base import BaseEstimator, ClassifierMixin, RegressorMixin 10 | from sklearn.utils.extmath import safe_sparse_dot 11 | from sklearn.isotonic import isotonic_regression 12 | from sklearn.preprocessing import LabelBinarizer 13 | 14 | from fista import fista 15 | from loss import get_loss 16 | 17 | 18 | def prox_owl(v, w): 19 | """Proximal operator of the OWL norm dot(w, reversed(sort(v))) 20 | 21 | Follows description and notation from: 22 | X. Zeng, M. Figueiredo, 23 | The ordered weighted L1 norm: Atomic formulation, dual norm, 24 | and projections. 25 | eprint http://arxiv.org/abs/1409.4271 26 | """ 27 | 28 | # wlog operate on absolute values 29 | v_abs = np.abs(v) 30 | ix = np.argsort(v_abs)[::-1] 31 | v_abs = v_abs[ix] 32 | # project to K+ (monotone non-negative decreasing cone) 33 | v_abs = isotonic_regression(v_abs - w, y_min=0, increasing=False) 34 | 35 | # undo the sorting 36 | inv_ix = np.zeros_like(ix) 37 | inv_ix[ix] = np.arange(len(v)) 38 | v_abs = v_abs[inv_ix] 39 | 40 | return np.sign(v) * v_abs 41 | 42 | 43 | def _oscar_weights(alpha, beta, size): 44 | w = np.arange(size - 1, -1, -1, dtype=np.double) 45 | w *= beta 46 | w += alpha 47 | return w 48 | 49 | 50 | def _fit_owl_fista(X, y, w, loss, max_iter=500, max_linesearch=20, eta=2.0, 51 | tol=1e-3, verbose=0): 52 | 53 | # least squares loss 54 | def sfunc(coef, grad=False): 55 | y_scores = safe_sparse_dot(X, coef) 56 | if grad: 57 | obj, lp = loss(y, y_scores, return_derivative=True) 58 | grad = safe_sparse_dot(X.T, lp) 59 | return obj, grad 60 | else: 61 | return loss(y, y_scores) 62 | 63 | def nsfunc(coef, L): 64 | return prox_owl(coef, w / L) 65 | 66 | coef = np.zeros(X.shape[1]) 67 | return fista(sfunc, nsfunc, coef, max_iter, max_linesearch, 68 | eta, tol, verbose) 69 | 70 | 71 | class _BaseOwl(BaseEstimator): 72 | """ 73 | 74 | Solves sum loss(y_pred, y) + sum_j weights_j |coef|_(j) 75 | where u_(j) is the jth largest component of the vector u. 76 | and weights is a monotonic nonincreasing vector. 77 | 78 | OWL is also known as: sorted L1 norm, SLOPE 79 | 80 | Parameters 81 | ---------- 82 | 83 | weights: array, shape (n_features,) or tuple, length 2 84 | Nonincreasing weights vector for the ordered weighted L1 penalty. 85 | If weights = (alpha, 0, 0, ..., 0), this amounts to a L_inf penalty. 86 | If weights = alpha * np.ones(n_features) it amounts to L1. 87 | If weights is a tuple = (alpha, beta), the OSCAR penalty is used:: 88 | alpha ||coef||_1 + beta sum_{i 0 178 | return self.lb_.inverse_transform(y_pred) 179 | 180 | 181 | if __name__ == '__main__': 182 | 183 | from sklearn.model_selection import train_test_split 184 | from sklearn.datasets import load_boston, load_breast_cancer 185 | 186 | print("OSCAR proximal operator on toy example:") 187 | v = np.array([1, 3, 2.9, 4, 0]) 188 | w_oscar = _oscar_weights(alpha=0.01, beta=1, size=5) 189 | print(prox_owl(v, w_oscar)) 190 | print() 191 | 192 | print("Regression") 193 | X, y = load_boston(return_X_y=True) 194 | X = np.column_stack([X, -X[:, 0] + 0.01 * np.random.randn(X.shape[0])]) 195 | X_tr, X_te, y_tr, y_te = train_test_split(X, y, random_state=0) 196 | clf = OwlRegressor(weights=(1, 100)) 197 | clf.fit(X_tr, y_tr) 198 | print("Correlated coefs", clf.coef_[0], clf.coef_[-1]) 199 | print("Test score", clf.score(X_te, y_te)) 200 | print() 201 | 202 | print("Classification") 203 | X, y = load_breast_cancer(return_X_y=True) 204 | X = np.column_stack([X, -X[:, 0] + 0.01 * np.random.randn(X.shape[0])]) 205 | X_tr, X_te, y_tr, y_te = train_test_split(X, y, random_state=0) 206 | clf = OwlClassifier(weights=(1, 100), loss='squared-hinge') 207 | clf.fit(X_tr, y_tr) 208 | print("Correlated coefs", clf.coef_[0], clf.coef_[-1]) 209 | print("Test score", clf.score(X_te, y_te)) 210 | --------------------------------------------------------------------------------