├── toy_example.png
├── README.md
├── loss.py
├── .gitignore
├── fista.py
├── LICENSE
├── test_pyowl.py
├── plot_toy_example.py
└── pyowl.py


/toy_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vene/pyowl/HEAD/toy_example.png


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # pyowl: Ordered Weighted L1 Regularization in Python
 2 | 
 3 | ![OWL vs Lasso example](toy_example.png?raw=true "OWL vs Lasso example")
 4 | 
 5 | The OWL norm generalizes L1, L_inf and OSCAR. In particular, OSCAR selects
 6 | coefficients in groups with equal values, therefore handling highly
 7 | correlated features in a robust way.
 8 | 
 9 | Also known as Sorted L1 norm or SLOPE.
10 | 
11 | This implementation manages to be very short thanks to the awesome scientific
12 | python ecosystem.
13 | 


--------------------------------------------------------------------------------
/loss.py:
--------------------------------------------------------------------------------
 1 | # Author: Vlad Niculae <vlad@vene.ro>
 2 | # License: BSD 3 clause
 3 | 
 4 | import numpy as np
 5 | 
 6 | 
 7 | def squared_loss(y_true, y_pred, return_derivative=False):
 8 |     diff = y_pred - y_true
 9 |     obj = 0.5 * np.dot(diff, diff)
10 |     if return_derivative:
11 |         return obj, diff
12 |     else:
13 |         return obj
14 | 
15 | 
16 | def squared_hinge_loss(y_true, y_scores, return_derivative=False):
17 |     # labels in (-1, 1)
18 |     z = np.maximum(0, 1 - y_true * y_scores)
19 |     obj = np.sum(z ** 2)
20 | 
21 |     if return_derivative:
22 |         return obj, -2 * y_true * z
23 |     else:
24 |         return obj
25 | 
26 | 
27 | def get_loss(name):
28 |     losses = {'squared': squared_loss,
29 |               'squared-hinge': squared_hinge_loss}
30 |     return losses[name]
31 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | 
27 | # PyInstaller
28 | #  Usually these files are written by a python script from a template
29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 | 
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 | 
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 | .hypothesis/
47 | 
48 | # Translations
49 | *.mo
50 | *.pot
51 | 
52 | # Django stuff:
53 | *.log
54 | local_settings.py
55 | 
56 | # Flask stuff:
57 | instance/
58 | .webassets-cache
59 | 
60 | # Scrapy stuff:
61 | .scrapy
62 | 
63 | # Sphinx documentation
64 | docs/_build/
65 | 
66 | # PyBuilder
67 | target/
68 | 
69 | # IPython Notebook
70 | .ipynb_checkpoints
71 | 
72 | # pyenv
73 | .python-version
74 | 
75 | # celery beat schedule file
76 | celerybeat-schedule
77 | 
78 | # dotenv
79 | .env
80 | 
81 | # virtualenv
82 | venv/
83 | ENV/
84 | 
85 | # Spyder project settings
86 | .spyderproject
87 | 
88 | # Rope project settings
89 | .ropeproject
90 | 


--------------------------------------------------------------------------------
/fista.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Efficient implementation of FISTA.
 3 | """
 4 | 
 5 | # Author: Mathieu Blondel
 6 | # License: BSD 3 clause
 7 | # based on https://gist.github.com/mblondel/5105786d740693a6996bcb8e482c7083
 8 | 
 9 | import numpy as np
10 | 
11 | 
12 | def fista(sfunc, nsfunc, x0, max_iter=500, max_linesearch=20, eta=2.0, tol=1e-3,
13 |           verbose=0):
14 | 
15 |     y = x0.copy()
16 |     x = y
17 |     L = 1.0
18 |     t = 1.0
19 | 
20 |     for it in range(max_iter):
21 |         f_old, grad = sfunc(y, True)
22 | 
23 |         for ls in range(max_linesearch):
24 |             y_proj = nsfunc(y - grad / L, L)
25 |             diff = (y_proj - y).ravel()
26 |             sqdist = np.dot(diff, diff)
27 |             dist = np.sqrt(sqdist)
28 | 
29 |             F = sfunc(y_proj)
30 |             Q = f_old + np.dot(diff, grad.ravel()) + 0.5 * L * sqdist
31 | 
32 |             if F <= Q:
33 |                 break
34 | 
35 |             L *= eta
36 | 
37 |         if ls == max_linesearch - 1 and verbose:
38 |             print("Line search did not converge.")
39 | 
40 |         if verbose:
41 |             print("%d. %f" % (it + 1, dist))
42 | 
43 |         if dist <= tol:
44 |             if verbose:
45 |                 print("Converged.")
46 |             break
47 | 
48 |         x_next = y_proj
49 |         t_next = (1 + np.sqrt(1 + 4 * t ** 2)) / 2.
50 |         y = x_next + (t-1) / t_next * (x_next - x)
51 |         t = t_next
52 |         x = x_next
53 | 
54 |     return y_proj
55 | 
56 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2017, Vlad Niculae
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | * Redistributions of source code must retain the above copyright notice, this
10 |   list of conditions and the following disclaimer.
11 | 
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 |   this list of conditions and the following disclaimer in the documentation
14 |   and/or other materials provided with the distribution.
15 | 
16 | * Neither the name of the copyright holder nor the names of its
17 |   contributors may be used to endorse or promote products derived from
18 |   this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/test_pyowl.py:
--------------------------------------------------------------------------------
 1 | # Author: Vlad Niculae <vlad@vene.ro>
 2 | # License: BSD 3 clause
 3 | 
 4 | import numpy as np
 5 | from numpy.testing import assert_array_almost_equal
 6 | from pyowl import prox_owl
 7 | 
 8 | rng = np.random.RandomState(0)
 9 | 
10 | # cf. scikit-learn-contrib/lightning impl/penalty.py
11 | def project_simplex(v, z=1):
12 |     if np.sum(v) <= z:
13 |         return v
14 | 
15 |     n_features = v.shape[0]
16 |     u = np.sort(v)[::-1]
17 |     cssv = np.cumsum(u) - z
18 |     ind = np.arange(n_features) + 1
19 |     cond = u - cssv / ind > 0
20 |     rho = ind[cond][-1]
21 |     theta = cssv[cond][-1] / float(rho)
22 |     w = np.maximum(v - theta, 0)
23 |     return w
24 | 
25 | 
26 | # cf. scikit-learn-contrib/lightning impl/penalty.py
27 | def project_l1_ball(v, z=1):
28 |     return np.sign(v) * project_simplex(np.abs(v), z)
29 | 
30 | 
31 | def prox_linf(v, alpha):
32 |     # cf. Proximal Algorithms, Parikh & Boyd, eq. 6.8
33 |     # dual ball B is the L1 ball
34 | 
35 |     p = project_l1_ball(v / alpha)
36 |     return v - alpha * p
37 | 
38 | 
39 | 
40 | def test_prox_special_cases():
41 |     for _ in range(20):
42 |         v = rng.randn(10)
43 |         alpha = rng.uniform(0.001, 1)
44 | 
45 |         # l1 proximal operator
46 |         z_expected = np.maximum(0, v - alpha)
47 |         z_expected -= np.maximum(0, -v - alpha)
48 |         z_obtained = prox_owl(v, alpha * np.ones_like(v))
49 | 
50 |         assert_array_almost_equal(z_expected, z_obtained)
51 | 
52 |         # l_inf proximal operator
53 |         z_expected = prox_linf(v, alpha)
54 |         w = np.zeros_like(v)
55 |         w[0] = alpha
56 |         z_obtained = prox_owl(v, w)
57 |         assert_array_almost_equal(z_expected, z_obtained)
58 | 


--------------------------------------------------------------------------------
/plot_toy_example.py:
--------------------------------------------------------------------------------
 1 | """ OWL vs LASSO on a known correlated design.
 2 | 
 3 | Reproduces figure 1 from Figueiredo and Nowak,
 4 | Ordered Weighted L1 Regularized Regression with Strongly
 5 | Correlated Covariates: Theoretical Aspects.
 6 | http://www.jmlr.org/proceedings/papers/v51/figueiredo16.pdf
 7 | """
 8 | 
 9 | # Author: Vlad Niculae <vlad@vene.ro>
10 | # License: BSD 3 clause
11 | 
12 | 
13 | import numpy as np
14 | import matplotlib.pyplot as plt
15 | from sklearn.linear_model import Lasso
16 | from pyowl import OwlRegressor
17 | 
18 | n_samples = 10
19 | n_features = 100
20 | 
21 | coef = np.zeros(n_features)
22 | coef[20:30] = -1
23 | coef[60:70] = 1
24 | coef /= np.linalg.norm(coef)
25 | 
26 | rng = np.random.RandomState(1)
27 | X = rng.randn(n_samples, n_features)
28 | X[:, 20:30] = X[:, 20]
29 | X[:, 60:70] = X[:, 20]
30 | X += 0.001 * rng.randn(n_samples, n_features)
31 | X /= np.linalg.norm(X, axis=0)
32 | y = np.dot(X, coef)
33 | 
34 | plt.figure()
35 | 
36 | # ground truth:
37 | plt.subplot(221)
38 | plt.stem(np.arange(n_features), coef)
39 | plt.title("True coefficients")
40 | 
41 | alpha = 0.0001
42 | beta = 0.01  # only in OWL
43 | 
44 | # scikit-learn LASSO
45 | plt.subplot(222)
46 | lasso_skl = Lasso(alpha=alpha / (2 * n_samples), fit_intercept=False)
47 | lasso_skl.fit(X, y)
48 | plt.stem(np.arange(n_features), lasso_skl.coef_)
49 | plt.title("LASSO coefficients (scikit-learn)")
50 | 
51 | # pyowl lasso
52 | plt.subplot(223)
53 | lasso_owl = OwlRegressor(weights=np.ones(n_features) * alpha)
54 | lasso_owl.fit(X, y)
55 | plt.stem(np.arange(n_features), lasso_owl.coef_)
56 | plt.title("LASSO coefficients (pyowl)")
57 | 
58 | # pyowl lasso
59 | plt.subplot(224)
60 | oscar_owl = OwlRegressor(weights=(alpha, beta))
61 | oscar_owl.fit(X, y)
62 | plt.stem(np.arange(n_features), oscar_owl.coef_)
63 | plt.title("OSCAR coefficients (pyowl)")
64 | 
65 | plt.tight_layout()
66 | plt.savefig("toy_example.png")
67 | 
68 | 


--------------------------------------------------------------------------------
/pyowl.py:
--------------------------------------------------------------------------------
  1 | # Author: Vlad Niculae <vlad@vene.ro>
  2 | # License: BSD 3 clause
  3 | 
  4 | from __future__ import print_function
  5 | from __future__ import division
  6 | 
  7 | import numpy as np
  8 | 
  9 | from sklearn.base import BaseEstimator, ClassifierMixin, RegressorMixin
 10 | from sklearn.utils.extmath import safe_sparse_dot
 11 | from sklearn.isotonic import isotonic_regression
 12 | from sklearn.preprocessing import LabelBinarizer
 13 | 
 14 | from fista import fista
 15 | from loss import get_loss
 16 | 
 17 | 
 18 | def prox_owl(v, w):
 19 |     """Proximal operator of the OWL norm dot(w, reversed(sort(v)))
 20 | 
 21 |     Follows description and notation from:
 22 |     X. Zeng, M. Figueiredo,
 23 |     The ordered weighted L1 norm: Atomic formulation, dual norm,
 24 |     and projections.
 25 |     eprint http://arxiv.org/abs/1409.4271
 26 |     """
 27 | 
 28 |     # wlog operate on absolute values
 29 |     v_abs = np.abs(v)
 30 |     ix = np.argsort(v_abs)[::-1]
 31 |     v_abs = v_abs[ix]
 32 |     # project to K+ (monotone non-negative decreasing cone)
 33 |     v_abs = isotonic_regression(v_abs - w, y_min=0, increasing=False)
 34 | 
 35 |     # undo the sorting
 36 |     inv_ix = np.zeros_like(ix)
 37 |     inv_ix[ix] = np.arange(len(v))
 38 |     v_abs = v_abs[inv_ix]
 39 | 
 40 |     return np.sign(v) * v_abs
 41 | 
 42 | 
 43 | def _oscar_weights(alpha, beta, size):
 44 |     w = np.arange(size - 1, -1, -1, dtype=np.double)
 45 |     w *= beta
 46 |     w += alpha
 47 |     return w
 48 | 
 49 | 
 50 | def _fit_owl_fista(X, y, w, loss, max_iter=500, max_linesearch=20, eta=2.0,
 51 |                    tol=1e-3, verbose=0):
 52 | 
 53 |     # least squares loss
 54 |     def sfunc(coef, grad=False):
 55 |         y_scores = safe_sparse_dot(X, coef)
 56 |         if grad:
 57 |             obj, lp = loss(y, y_scores, return_derivative=True)
 58 |             grad = safe_sparse_dot(X.T, lp)
 59 |             return obj, grad
 60 |         else:
 61 |             return loss(y, y_scores)
 62 | 
 63 |     def nsfunc(coef, L):
 64 |         return prox_owl(coef, w / L)
 65 | 
 66 |     coef = np.zeros(X.shape[1])
 67 |     return fista(sfunc, nsfunc, coef, max_iter, max_linesearch,
 68 |                  eta, tol, verbose)
 69 | 
 70 | 
 71 | class _BaseOwl(BaseEstimator):
 72 |     """
 73 | 
 74 |     Solves sum loss(y_pred, y) + sum_j weights_j |coef|_(j)
 75 |            where u_(j) is the jth largest component of the vector u.
 76 |            and weights is a monotonic nonincreasing vector.
 77 | 
 78 |     OWL is also known as: sorted L1 norm, SLOPE
 79 | 
 80 |     Parameters
 81 |     ----------
 82 | 
 83 |     weights: array, shape (n_features,) or tuple, length 2
 84 |         Nonincreasing weights vector for the ordered weighted L1 penalty.
 85 |         If weights = (alpha, 0, 0, ..., 0), this amounts to a L_inf penalty.
 86 |         If weights = alpha * np.ones(n_features) it amounts to L1.
 87 |         If weights is a tuple = (alpha, beta), the OSCAR penalty is used::
 88 |             alpha ||coef||_1 + beta sum_{i<j} max{|x_i|, |x_j|)
 89 |         by computing the corresponding `weights` vector as::
 90 |             weights_i = alpha + beta(n_features - i - 1)
 91 | 
 92 |     loss: string, default: "squared"
 93 |         Loss function to use, see loss.py to add your own.
 94 | 
 95 |     max_iter: int, default: 500
 96 |         Maximum FISTA iterations.
 97 | 
 98 |     max_linesearch: int, default: 20
 99 |         Maximum number of FISTA backtracking line search steps.
100 | 
101 |     eta: float, default: 2
102 |         Amount by which to increase step size in FISTA bactracking line search.
103 | 
104 |     tol: float, default: 1e-3
105 |         Tolerance for the convergence criterion.
106 | 
107 |     verbose: int, default 0:
108 |         Degree of verbosity to print from the solver.
109 | 
110 |     References
111 |     ----------
112 |         X. Zeng, M. Figueiredo,
113 |         The ordered weighted L1 norm: Atomic formulation, dual norm,
114 |         and projections.
115 |         eprint http://arxiv.org/abs/1409.4271
116 |     """
117 | 
118 |     def __init__(self, weights, loss='squared', max_iter=500,
119 |                  max_linesearch=20, eta=2.0, tol=1e-3, verbose=0):
120 |         self.weights = weights
121 |         self.loss = loss
122 |         self.max_iter = max_iter
123 |         self.max_linesearch = max_linesearch
124 |         self.eta = eta
125 |         self.tol = tol
126 |         self.verbose = verbose
127 | 
128 |     def fit(self, X, y):
129 | 
130 |         n_features = X.shape[1]
131 | 
132 |         loss = self.get_loss()
133 |         weights = self.weights
134 |         if isinstance(weights, tuple) and len(weights) == 2:
135 |             alpha, beta = self.weights
136 |             weights = _oscar_weights(alpha, beta, n_features)
137 | 
138 |         self.coef_ = _fit_owl_fista(X, y, weights, loss, self.max_iter,
139 |                                     self.max_linesearch, self.eta, self.tol,
140 |                                     self.verbose)
141 |         return self
142 | 
143 |     def _decision_function(self, X):
144 |         return safe_sparse_dot(X, self.coef_)
145 | 
146 | 
147 | class OwlRegressor(_BaseOwl, RegressorMixin):
148 |     """Ordered Weighted L1--penalized (OWL) regression solved by FISTA"""
149 |     __doc__ += _BaseOwl.__doc__
150 | 
151 |     def get_loss(self):
152 |         if self.loss != 'squared':
153 |             raise NotImplementedError('Only regression loss implemented '
154 |                                       'at the moment is squared.')
155 | 
156 |         return get_loss(self.loss)
157 | 
158 |     def predict(self, X):
159 |         return self._decision_function(X)
160 | 
161 | 
162 | class OwlClassifier(_BaseOwl, ClassifierMixin):
163 |     """Ordered Weighted L1--penalized (OWL) classification solved by FISTA"""
164 |     __doc__ += _BaseOwl.__doc__
165 |     def get_loss(self):
166 |         return get_loss(self.loss)
167 | 
168 |     def fit(self, X, y):
169 |         self.lb_ = LabelBinarizer(neg_label=-1)
170 |         y_ = self.lb_.fit_transform(y).ravel()
171 |         return super(OwlClassifier, self).fit(X, y_)
172 | 
173 |     def decision_function(self, X):
174 |         return self._decision_function(X)
175 | 
176 |     def predict(self, X):
177 |         y_pred = self.decision_function(X) > 0
178 |         return self.lb_.inverse_transform(y_pred)
179 | 
180 | 
181 | if __name__ == '__main__':
182 | 
183 |     from sklearn.model_selection import train_test_split
184 |     from sklearn.datasets import load_boston, load_breast_cancer
185 | 
186 |     print("OSCAR proximal operator on toy example:")
187 |     v = np.array([1, 3, 2.9, 4, 0])
188 |     w_oscar = _oscar_weights(alpha=0.01, beta=1, size=5)
189 |     print(prox_owl(v, w_oscar))
190 |     print()
191 | 
192 |     print("Regression")
193 |     X, y = load_boston(return_X_y=True)
194 |     X = np.column_stack([X, -X[:, 0] + 0.01 * np.random.randn(X.shape[0])])
195 |     X_tr, X_te, y_tr, y_te = train_test_split(X, y, random_state=0)
196 |     clf = OwlRegressor(weights=(1, 100))
197 |     clf.fit(X_tr, y_tr)
198 |     print("Correlated coefs", clf.coef_[0], clf.coef_[-1])
199 |     print("Test score", clf.score(X_te, y_te))
200 |     print()
201 | 
202 |     print("Classification")
203 |     X, y = load_breast_cancer(return_X_y=True)
204 |     X = np.column_stack([X, -X[:, 0] + 0.01 * np.random.randn(X.shape[0])])
205 |     X_tr, X_te, y_tr, y_te = train_test_split(X, y, random_state=0)
206 |     clf = OwlClassifier(weights=(1, 100), loss='squared-hinge')
207 |     clf.fit(X_tr, y_tr)
208 |     print("Correlated coefs", clf.coef_[0], clf.coef_[-1])
209 |     print("Test score", clf.score(X_te, y_te))
210 | 


--------------------------------------------------------------------------------