├── .coveragerc
├── .github
└── workflows
│ └── test.yml
├── .gitignore
├── .travis.yml
├── .zenodo.json
├── AUTHORS
├── LICENSE
├── MANIFEST.in
├── README.rst
├── ci
├── doc.sh
├── pylintrc
└── test.sh
├── cloudbuild.yml
├── copt
├── __init__.py
├── constraint.py
├── data
│ └── img1.csv
├── datasets.py
├── frank_wolfe.py
├── loss.py
├── penalty.py
├── proximal_gradient.py
├── randomized.py
├── splitting.py
├── tv_prox.py
├── utils.py
└── utils_pytorch.py
├── doc
├── Makefile
├── _static
│ └── css
│ │ └── custom.css
├── citing.rst
├── conf.py
├── index.rst
├── logo.png
├── loss_functions.rst
├── make.bat
├── paper
│ ├── biblio.bib
│ ├── index.tex
│ └── jmlr2e.sty
├── solvers.rst
├── sphinx_ext
│ └── github_link.py
└── utils.rst
├── examples
├── README.txt
├── frank_wolfe
│ ├── README.txt
│ ├── plot_sfw.py
│ ├── plot_sfw_real_data.py
│ ├── plot_sparse_benchmark.py
│ ├── plot_sparse_benchmark_pairwise.py
│ └── plot_vertex_overlap.py
├── plot_accelerated.py
├── plot_group_lasso.py
├── plot_jax_copt.py
├── plot_saga_vs_svrg.py
├── proximal_splitting
│ ├── README.txt
│ ├── data
│ │ └── blur_matrix.npz
│ ├── plot_overlapping_group_lasso.py
│ ├── plot_sparse_nuclear_norm.py
│ └── plot_tv_deblurring.py
└── pytorch
│ ├── README.txt
│ ├── adversarial_example.py
│ └── adversarial_example_accuracies.py
├── pyproject.toml
├── pytest.ini
├── requirements.txt
├── setup.py
└── tests
├── test_frank_wolfe.py
├── test_loss.py
├── test_matmul_speedup.py
├── test_penalties.py
├── test_proximal_gradient.py
├── test_randomized.py
├── test_splitting.py
└── test_stochastic_fw.py
/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | branch = True
3 | source = copt
4 | include = */copt/*
5 |
6 | [report]
7 | omit = */copt/datasets.py
8 | exclude_lines =
9 | if verbose
10 |
--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
1 | name: Test suite
2 |
3 | on: [push, pull_request]
4 |
5 | jobs:
6 | build:
7 |
8 | runs-on: ubuntu-latest
9 | strategy:
10 | max-parallel: 4
11 | matrix:
12 | python-version: ["3.8", "3.9", "3.10"]
13 |
14 | steps:
15 | - name: Checkout repo
16 | uses: actions/checkout@v1
17 | - name: Set up Python ${{ matrix.python-version }}
18 | uses: actions/setup-python@v1
19 | with:
20 | python-version: ${{ matrix.python-version }}
21 | - name: Install
22 | run: |
23 | python -m pip install --upgrade pip
24 | pip install .
25 | pip install pytest-parallel scikit-image coveralls coverage pytest-cov scikit-learn h5py Pillow pip-conflict-checker py
26 | - name: Check Dependencies
27 | run: |
28 | pipconflictchecker
29 | - name: Lint
30 | run: |
31 | flake8 --ignore N802,N806,W503 --select W504 `find . -name \*.py | grep -v setup.py | grep -v __init__.py | grep -v /doc/`
32 | - name: Test
33 | run: |
34 | pytest --cov-report term-missing --cov=copt
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | doc/_build
2 | dist/
3 | doc/auto_examples
4 | doc/generated
5 | .coverage
6 | .vscode
7 | .mypy_cache/
8 | copt.egg-info
9 | __pycache__
10 | .pytest_cache/
11 | doc/modules/
12 | *code-workspace
13 | *.swp
14 | .DS_Store
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 | python:
3 | - "3.6"
4 | - "3.7"
5 | install:
6 | - pip install --upgrade -r requirements.txt
7 | - # make sure tests run with these minimal requirements
8 | - python setup.py develop
9 | - # run _one_ test file, just to make sure everything imports fine
10 | - py.test -v copt tests/test_proximal_gradient.py
11 | - # the following are only needed for the examples and coverage tests
12 | - pip install pytest-parallel scikit-image coveralls coverage pytest-cov scikit-learn h5py Pillow
13 | - py.test --version
14 | script:
15 | - NUMBA_DISABLE_JIT=1 pytest -v --cov=copt --workers auto
16 | after_success: coveralls
17 | cache:
18 | directories:
19 | - $HOME/copt_data
20 |
--------------------------------------------------------------------------------
/.zenodo.json:
--------------------------------------------------------------------------------
1 | {
2 | "creators": [{
3 | "name": "Fabian Pedregosa",
4 | "affiliation": "Google",
5 | "orcid": "0000-0003-4025-3953"
6 | }],
7 | "description": "
copt is a library for mathematical optimization written in Python.
",
8 | "access_right": "open",
9 | "license": "BSD-3-Clause",
10 | "upload_type": "software"
11 | }
12 |
--------------------------------------------------------------------------------
/AUTHORS:
--------------------------------------------------------------------------------
1 | Fabian Pedregosa
2 | Google LLC
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | New BSD License
2 |
3 | Copyright (c) 2007–2018 Fabian Pedregosa.
4 | All rights reserved.
5 |
6 |
7 | Redistribution and use in source and binary forms, with or without
8 | modification, are permitted provided that the following conditions are met:
9 |
10 | a. Redistributions of source code must retain the above copyright notice,
11 | this list of conditions and the following disclaimer.
12 | b. Redistributions in binary form must reproduce the above copyright
13 | notice, this list of conditions and the following disclaimer in the
14 | documentation and/or other materials provided with the distribution.
15 | c. Neither the name of the Scikit-learn Developers nor the names of
16 | its contributors may be used to endorse or promote products
17 | derived from this software without specific prior written
18 | permission.
19 |
20 |
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 | ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR
25 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 | OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
31 | DAMAGE.
32 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include *.rst
2 | recursive-include doc *
3 | recursive-include tests *.py
4 | recursive-include examples *
5 | recursive-include sklearn *.c *.h *.pyx *.pxd *.pxi *.tp
6 | recursive-include sklearn/datasets *.csv *.csv.gz *.rst *.jpg *.txt *.arff.gz *.json.gz
7 | include COPYING
8 | include README.rst
9 |
--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
1 | .. image:: https://travis-ci.org/openopt/copt.svg?branch=master
2 | :target: https://travis-ci.org/openopt/copt
3 | .. image:: https://coveralls.io/repos/github/openopt/copt/badge.svg?branch=master
4 | :target: https://coveralls.io/github/openopt/copt?branch=master
5 | .. image:: https://zenodo.org/badge/46262908.svg
6 | :target: https://zenodo.org/badge/latestdoi/46262908
7 | .. image:: https://storage.googleapis.com/copt-doc/doc_status.svg
8 | :target: http://openopt.github.io/copt/
9 | .. image:: https://storage.googleapis.com/copt-doc/pylint.svg
10 | :target: https://storage.googleapis.com/copt-doc/pylint.txt
11 |
12 |
13 | Note: This package is no longer actively maintained. I won't be actively responding to issues. If you'd like to volunteer to maintain it, please drop me a line at f@bianp.net
14 |
15 | copt: composite optimization in Python
16 | =======================================
17 |
18 | copt is an optimization library for Python. Its goal is to provide a high quality implementation of classical optimization algorithms under a consistent API.
19 |
20 |
21 |
22 | `Docs `_ | `Examples `_
23 |
24 |
25 |
26 |
27 | Installation
28 | ============
29 |
30 | If you already have a working installation of numpy and scipy,
31 | the easiest way to install copt is using ``pip`` ::
32 |
33 | pip install -U copt
34 |
35 |
36 | Alternatively, you can install the latest development from github with the command::
37 |
38 | pip install git+https://github.com/openopt/copt.git
39 |
40 |
41 | Citing
42 | ======
43 |
44 | If this software is useful for your research, please consider citing it as
45 |
46 | .. code::
47 |
48 | @article{copt,
49 | author = {Fabian Pedregosa, Geoffrey Negiar, Gideon Dresdner},
50 | title = {copt: composite optimization in Python},
51 | year = 2020,
52 | DOI = {10.5281/zenodo.1283339},
53 | url={http://openopt.github.io/copt/}
54 | }
55 |
56 | Development
57 | ===========
58 |
59 | The recommended way to work on the development versionis the following:
60 |
61 | 1. Clone locally the github repo. This can be done with the command::
62 |
63 | git clone https://github.com/openopt/copt.git
64 |
65 | This will create a copt directory.
66 |
67 | 2. Link this directory to your Python interpreter. This can be done by
68 | running the following command from the copt directory created with the
69 | previous step::
70 |
71 | python setup.py develop
72 |
73 | Now you can run the tests with :code:`py.test tests/`
74 |
--------------------------------------------------------------------------------
/ci/doc.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | #
3 | mkdir -p _build/html/
4 | pip install -r requirements.txt
5 | pip install -U sphinx sphinx-gallery loky joblib sphinx_copybutton memory_profiler jax jaxlib anybadge numba
6 | # For pytorch examples
7 | pip install torch==1.7.0+cpu torchvision==0.8.1+cpu torchaudio==0.7.0 -f https://download.pytorch.org/whl/torch_stable.html
8 | pip install git+https://github.com/RobustBench/robustbench
9 | python setup.py install
10 | cd doc
11 | make html > doc_log.txt
12 | if [ $? -eq 0 ]; then
13 | # set up a badge depending on the result of the build
14 | echo "Building of documentation succeeded"
15 | anybadge --label=doc --value=passing --file=_build/html/doc_status.svg passing=green failing=red
16 | else
17 | echo "Building of documentation failed"
18 | anybadge --label=doc --value=failing --file=_build/html/doc_status.svg passing=green failing=red
19 | fi
20 |
--------------------------------------------------------------------------------
/ci/test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | #
3 | pip install -r requirements.txt
4 | pip install pytest-parallel # run tests in parallel
5 | python setup.py install
6 | # py.test --workers auto
7 |
8 | # pylint
9 | pip install pylint anybadge
10 | pylint --rcfile=ci/pylintrc --output-format=text copt tests/*.py examples/*.py examples/*/*.py | tee pylint.txt
11 | score=$(sed -n 's/^Your code has been rated at \([-0-9.]*\)\/.*/\1/p' pylint.txt)
12 | echo "Pylint score was $score"
13 | anybadge --value=$score --file=pylint.svg pylint
14 |
--------------------------------------------------------------------------------
/cloudbuild.yml:
--------------------------------------------------------------------------------
1 | steps:
2 | - name: 'python:3.6'
3 | id: Test
4 | entrypoint: /bin/sh
5 | args:
6 | - -c
7 | - 'ci/test.sh'
8 | timeout: 1800s # 30 min.
9 | - name: 'python:3.6'
10 | id: Doc
11 | entrypoint: /bin/sh
12 | args:
13 | - -c
14 | - 'ci/doc.sh'
15 | timeout: 50000s
16 | waitFor: ['-'] # The '-' indicates that this step begins immediately.
17 | - name: 'gcr.io/cloud-builders/gsutil'
18 | args: ['-m', 'cp', '-r', 'doc/_build/html/*', 'gs://openo.pt/copt/']
19 |
20 | timeout: 50000s
21 |
22 | artifacts:
23 | objects:
24 | location: 'gs://openo.pt/copt/'
25 | paths: ['doc/_build/html/doc_status.svg', 'pylint.txt', 'pylint.svg']
26 |
--------------------------------------------------------------------------------
/copt/__init__.py:
--------------------------------------------------------------------------------
1 | """COPT: composite optimization in Python."""
2 | __version__ = "0.9.1" # if you modify this, change it also in setup.py
3 |
4 | from . import datasets
5 | from . import tv_prox
6 | from . import utils
7 | from . import loss
8 | from . import constraint
9 | from .frank_wolfe import minimize_frank_wolfe
10 | from .proximal_gradient import minimize_proximal_gradient
11 | from .randomized import minimize_saga
12 | from .randomized import minimize_svrg
13 | from .randomized import minimize_vrtos
14 | from .randomized import minimize_sfw
15 | from .splitting import minimize_primal_dual
16 | from .splitting import minimize_three_split
17 |
--------------------------------------------------------------------------------
/copt/constraint.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from numpy import ma as ma
3 | from scipy import linalg
4 | from scipy.sparse import linalg as splinalg
5 |
6 | class LinfBall:
7 | """L-infinity ball.
8 |
9 | Args:
10 | alpha: float
11 | radius of the ball.
12 | """
13 | p = np.inf
14 |
15 | def __init__(self, alpha):
16 | self.alpha = alpha
17 |
18 | def prox(self, x, step_size=None):
19 | """Projection onto the L-infinity ball.
20 |
21 | Args:
22 | x: array-like
23 |
24 | Returns:
25 | p : array-like, same shape as x
26 | projection of x onto the L-infinity ball.
27 | """
28 | return x.clip(-self.alpha, self.alpha)
29 |
30 |
31 | class L2Ball:
32 | """L2 ball.
33 |
34 | Args:
35 | alpha: float
36 | radius of the ball.
37 | """
38 | p = 2
39 |
40 | def __init__(self, alpha):
41 | self.alpha = alpha
42 |
43 | def prox(self, x, step_size=None):
44 | """Projection onto the L-2 ball.
45 |
46 | Args:
47 | x: array-like
48 |
49 | Returns:
50 | p : array-like, same shape as x
51 | projection of x onto the L-2 ball.
52 | """
53 |
54 | norm = np.sqrt((x ** 2).sum())
55 | if norm <= self.alpha:
56 | return x
57 | return self.alpha * x / norm
58 |
59 |
60 | class L1Ball:
61 | """Indicator function over the L1 ball
62 |
63 | This function is 0 if the sum of absolute values is less than or equal to
64 | alpha, and infinity otherwise.
65 |
66 | Args:
67 | alpha: float
68 | radius of the ball.
69 | """
70 | p = 1
71 |
72 | def __init__(self, alpha):
73 | self.alpha = alpha
74 |
75 | def __call__(self, x):
76 | if np.abs(x).sum() <= self.alpha:
77 | return 0
78 | else:
79 | return np.inf
80 |
81 | def prox(self, x, step_size=None):
82 | """Projection onto the L-infinity ball.
83 |
84 | Parameters
85 | ----------
86 | x: array-like
87 |
88 | Returns
89 | -------
90 | p : array-like, same shape as x
91 | projection of x onto the L-infinity ball.
92 | """
93 | return euclidean_proj_l1ball(x, self.alpha)
94 |
95 | def lmo(self, u, x, active_set=None):
96 | """Linear Minimization Oracle.
97 |
98 | Return s - x with s solving the linear problem
99 | max_{||s||_1 <= alpha}
100 |
101 | Args:
102 | u: array-like
103 | usually -gradient
104 | x: array-like
105 | usually the iterate of the considered algorithm
106 | active_set: no effect here.
107 |
108 | Returns:
109 | update_direction: array,
110 | s - x, where s is the vertex of the constraint most correlated
111 | with u
112 | fw_vertex_rep: (float, int)
113 | a hashable representation of s, for active set management
114 | None: not used here
115 | max_step_size: float
116 | 1. for a Frank-Wolfe step.
117 | """
118 | abs_u = np.abs(u)
119 | largest_coordinate = np.argmax(abs_u)
120 | sign = np.sign(u[largest_coordinate])
121 |
122 | update_direction = -x.copy()
123 | update_direction[largest_coordinate] += self.alpha * sign
124 |
125 | # Only useful for active_set management in pairwise FW
126 | fw_vertex_rep = (sign, largest_coordinate)
127 | max_step_size = 1.
128 | return update_direction, fw_vertex_rep, None, max_step_size
129 |
130 | def lmo_pairwise(self, u, x, active_set):
131 | """Pairwise Linear Minimization Oracle.
132 |
133 | Return s - v with s solving the linear problem
134 | max_{||s||_1 <= alpha}
135 | and v solving the linear problem
136 | min_{v \in active_set}
137 |
138 | Args:
139 | u: array,
140 | usually -gradient
141 | x: array,
142 | usually the iterate of the considered algorithm
143 | active_set: used to compute v
144 |
145 | Returns:
146 | update_direction: array
147 | s - v, where s is the vertex of the constraint most correlated with u
148 | and v is the vertex of the active set least correlated with u
149 | fw_vertex_rep: (float, int)
150 | a hashable representation of s, for active set management
151 | away_vertex_rep: (float, int)
152 | a hashable representation of v, for active set management
153 | max_step_size: float
154 | max_step_size to not move out of the constraint. Given by active_set[away_vertex_rep].
155 | """
156 | update_direction, fw_vertex_rep, _, _ = self.lmo(u, x)
157 | update_direction += x
158 |
159 | def _correlation(vertex_rep, u):
160 | """Compute the correlation between vertex represented by vertex_rep and vector u."""
161 | sign, idx = vertex_rep
162 | return sign * u[idx]
163 |
164 | away_vertex_rep, max_step_size = min(active_set.items(),
165 | key=lambda item: _correlation(item[0], u))
166 |
167 | sign, idx = away_vertex_rep
168 | update_direction[idx] -= sign * self.alpha
169 | return update_direction, fw_vertex_rep, away_vertex_rep, max_step_size
170 |
171 |
172 | class SimplexConstraint:
173 | def __init__(self, s=1):
174 | self.s = s
175 |
176 | def prox(self, x, step_size):
177 | return euclidean_proj_simplex(x, self.s)
178 |
179 | def lmo(self, u, x):
180 | """Return v - x, s solving the linear problem
181 | max_{||v||_1 <= s, v >= 0}
182 | """
183 | largest_coordinate = np.argmax(u)
184 |
185 | update_direction = -x.copy()
186 | update_direction[largest_coordinate] += self.s * np.sign(
187 | u[largest_coordinate]
188 | )
189 |
190 | return update_direction, int(largest_coordinate), None, 1
191 |
192 | def euclidean_proj_simplex(v, s=1.0):
193 | r""" Compute the Euclidean projection on a positive simplex
194 |
195 | Solves the optimization problem (using the algorithm from [1]):
196 | min_w 0.5 * || w - v ||_2^2 , s.t. \sum_i w_i = s, w_i >= 0
197 |
198 | Args:
199 | v: (n,) numpy array,
200 | n-dimensional vector to project
201 | s: float, optional, default: 1,
202 | radius of the simplex
203 |
204 | Returns:
205 | w: (n,) numpy array,
206 | Euclidean projection of v on the simplex
207 |
208 | Notes:
209 | The complexity of this algorithm is in O(n log(n)) as it involves sorting v.
210 | Better alternatives exist for high-dimensional sparse vectors (cf. [1])
211 | However, this implementation still easily scales to millions of dimensions.
212 |
213 | References:
214 | [1] Efficient Projections onto the .1-Ball for Learning in High Dimensions
215 | John Duchi, Shai Shalev-Shwartz, Yoram Singer, and Tushar Chandra.
216 | International Conference on Machine Learning (ICML 2008)
217 | http://www.cs.berkeley.edu/~jduchi/projects/DuchiSiShCh08.pdf
218 | """
219 | assert s > 0, "Radius s must be strictly positive (%d <= 0)" % s
220 | (n,) = v.shape # will raise ValueError if v is not 1-D
221 | # check if we are already on the simplex
222 | if v.sum() == s and np.alltrue(v >= 0):
223 | # best projection: itself!
224 | return v
225 | # get the array of cumulative sums of a sorted (decreasing) copy of v
226 | u = np.sort(v)[::-1]
227 | cssv = np.cumsum(u)
228 | # get the number of > 0 components of the optimal solution
229 | rho = np.nonzero(u * np.arange(1, n + 1) > (cssv - s))[0][-1]
230 | # compute the Lagrange multiplier associated to the simplex constraint
231 | theta = (cssv[rho] - s) / (rho + 1.0)
232 | # compute the projection by thresholding v using theta
233 | w = (v - theta).clip(min=0)
234 | return w
235 |
236 |
237 | def euclidean_proj_l1ball(v, s=1):
238 | """ Compute the Euclidean projection on a L1-ball
239 |
240 | Solves the optimisation problem (using the algorithm from [1]):
241 | min_w 0.5 * || w - v ||_2^2 , s.t. || w ||_1 <= s
242 |
243 | Args:
244 | v: (n,) numpy array,
245 | n-dimensional vector to project
246 | s: float, optional, default: 1,
247 | radius of the L1-ball
248 |
249 | Returns:
250 | w: (n,) numpy array,
251 | Euclidean projection of v on the L1-ball of radius s
252 |
253 | Notes:
254 | Solves the problem by a reduction to the positive simplex case
255 | See also :ref:`euclidean_proj_simplex`
256 | """
257 | assert s > 0, "Radius s must be strictly positive (%d <= 0)" % s
258 | if len(v.shape) > 1:
259 | raise ValueError
260 | # compute the vector of absolute values
261 | u = np.abs(v)
262 | # check if v is already a solution
263 | if u.sum() <= s:
264 | # L1-norm is <= s
265 | return v
266 | # v is not already a solution: optimum lies on the boundary (norm == s)
267 | # project *u* on the simplex
268 | w = euclidean_proj_simplex(u, s=s)
269 | # compute the solution to the original problem on v
270 | w *= np.sign(v)
271 | return w
272 |
273 |
274 | class TraceBall:
275 | """Projection onto the trace (aka nuclear) norm, sum of singular values
276 |
277 | Args:
278 | alpha: float
279 | radius of the ball.
280 |
281 | """
282 |
283 | is_separable = False
284 |
285 | def __init__(self, alpha, shape):
286 | assert len(shape) == 2
287 | self.shape = shape
288 | self.alpha = alpha
289 |
290 | def __call__(self, x):
291 | X = x.reshape(self.shape)
292 | if linalg.svdvals(X).sum() <= self.alpha + np.finfo(np.float32).eps:
293 | return 0
294 | else:
295 | return np.inf
296 |
297 | def prox(self, x, step_size):
298 | X = x.reshape(self.shape)
299 | U, s, Vt = linalg.svd(X, full_matrices=False)
300 | s_threshold = euclidean_proj_l1ball(s, self.alpha)
301 | return (U * s_threshold).dot(Vt).ravel()
302 |
303 | def prox_factory(self):
304 | raise NotImplementedError
305 |
306 | def lmo(self, u, x, active_set=None):
307 | """Linear Minimization Oracle.
308 |
309 | Return s - x with s solving the linear problem
310 | max_{||s||_nuc <= alpha}
311 |
312 | Args:
313 | u: usually -gradient
314 | x: usually the iterate of the considered algorithm
315 | active_set: no effect here.
316 |
317 | Returns:
318 | update_direction: s - x, where s is the vertex of the constraint most correlated with u
319 | None: not used here
320 | None: not used here
321 | max_step_size: 1. for a Frank-Wolfe step.
322 | """
323 | u_mat = u.reshape(self.shape)
324 | ut, _, vt = splinalg.svds(u_mat, k=1)
325 | vertex = self.alpha * np.outer(ut, vt).ravel()
326 | return vertex - x, None, None, 1.
327 |
--------------------------------------------------------------------------------
/copt/frank_wolfe.py:
--------------------------------------------------------------------------------
1 | """Frank-Wolfe and related algorithms."""
2 | import warnings
3 | from collections import defaultdict
4 | import numpy as np
5 | from scipy import linalg
6 | from scipy import optimize
7 | from copt import utils
8 |
9 |
10 | EPS = np.finfo(np.float32).eps
11 |
12 |
13 | def backtracking_step_size(
14 | x,
15 | f_t,
16 | old_f_t,
17 | f_grad,
18 | certificate,
19 | lipschitz_t,
20 | max_step_size,
21 | update_direction,
22 | norm_update_direction,
23 | ):
24 | """Backtracking step-size finding routine for FW-like algorithms
25 |
26 | Args:
27 | x: array-like, shape (n_features,)
28 | Current iterate
29 |
30 | f_t: float
31 | Value of objective function at the current iterate.
32 |
33 | old_f_t: float
34 | Value of objective function at previous iterate.
35 |
36 | f_grad: callable
37 | Callable returning objective function and gradient at
38 | argument.
39 |
40 | certificate: float
41 | FW gap
42 |
43 | lipschitz_t: float
44 | Current value of the Lipschitz estimate.
45 |
46 | max_step_size: float
47 | Maximum admissible step-size.
48 |
49 | update_direction: array-like, shape (n_features,)
50 | Update direction given by the FW variant.
51 |
52 | norm_update_direction: float
53 | Squared L2 norm of update_direction
54 |
55 | Returns:
56 | step_size_t: float
57 | Step-size to be used to compute the next iterate.
58 |
59 | lipschitz_t: float
60 | Updated value for the Lipschitz estimate.
61 |
62 | f_next: float
63 | Objective function evaluated at x + step_size_t d_t.
64 |
65 | grad_next: array-like
66 | Gradient evaluated at x + step_size_t d_t.
67 | """
68 | ratio_decrease = 0.9
69 | ratio_increase = 2.0
70 | max_ls_iter = 100
71 | if old_f_t is not None:
72 | tmp = (certificate ** 2) / (2 * (old_f_t - f_t) * norm_update_direction)
73 | lipschitz_t = max(min(tmp, lipschitz_t), lipschitz_t * ratio_decrease)
74 | for _ in range(max_ls_iter):
75 | step_size_t = certificate / (norm_update_direction * lipschitz_t)
76 | if step_size_t < max_step_size:
77 | rhs = -0.5 * step_size_t * certificate
78 | else:
79 | step_size_t = max_step_size
80 | rhs = (
81 | -step_size_t * certificate
82 | + 0.5 * (step_size_t ** 2) * lipschitz_t * norm_update_direction
83 | )
84 | f_next, grad_next = f_grad(x + step_size_t * update_direction)
85 | if f_next - f_t <= rhs + EPS:
86 | # .. sufficient decrease condition verified ..
87 | break
88 | else:
89 | lipschitz_t *= ratio_increase
90 | else:
91 | warnings.warn(
92 | "Exhausted line search iterations in minimize_frank_wolfe", RuntimeWarning
93 | )
94 | return step_size_t, lipschitz_t, f_next, grad_next
95 |
96 |
97 | def update_active_set(active_set,
98 | fw_vertex_rep, away_vertex_rep,
99 | step_size):
100 |
101 | max_step_size = active_set[away_vertex_rep]
102 | active_set[fw_vertex_rep] += step_size
103 | active_set[away_vertex_rep] -= step_size
104 |
105 | if active_set[away_vertex_rep] == 0.:
106 | # drop step: remove vertex from active set
107 | del active_set[away_vertex_rep]
108 | if active_set[away_vertex_rep] < 0.:
109 | raise ValueError(f"The step size used is too large. "
110 | f"{step_size: .3f} vs. {max_step_size:.3f}")
111 |
112 | return active_set
113 |
114 |
115 | def minimize_frank_wolfe(
116 | fun,
117 | x0,
118 | lmo,
119 | x0_rep=None,
120 | variant='vanilla',
121 | jac="2-point",
122 | step="backtracking",
123 | lipschitz=None,
124 | args=(),
125 | max_iter=400,
126 | tol=1e-12,
127 | callback=None,
128 | verbose=0,
129 | eps=1e-8,
130 | ):
131 | r"""Frank-Wolfe algorithm.
132 |
133 | Implements the Frank-Wolfe algorithm, see , see :ref:`frank_wolfe` for
134 | a more detailed description.
135 |
136 | Args:
137 | fun : callable
138 | The objective function to be minimized.
139 | ``fun(x, *args) -> float``
140 | where x is an 1-D array with shape (n,) and `args`
141 | is a tuple of the fixed parameters needed to completely
142 | specify the function.
143 |
144 | x0: array-like
145 | Initial guess for solution.
146 |
147 | lmo: callable
148 | Takes as input a vector u of same size as x0 and returns both the update
149 | direction and the maximum admissible step-size.
150 |
151 | x0_rep: immutable
152 | Is used to initialize the active set when variant == 'pairwise'.
153 |
154 | variant: {'vanilla, 'pairwise'}
155 | Determines which Frank-Wolfe variant to use, along with lmo.
156 | Pairwise sets up and updates an active set of vertices.
157 | This is needed to make sure to not move out of the constraint set
158 | when using a pairwise LMO.
159 |
160 | jac : {callable, '2-point', bool}, optional
161 | Method for computing the gradient vector. If it is a callable,
162 | it should be a function that returns the gradient vector:
163 | ``jac(x, *args) -> array_like, shape (n,)``
164 | where x is an array with shape (n,) and `args` is a tuple with
165 | the fixed parameters. Alternatively, the '2-point' select a finite
166 | difference scheme for numerical estimation of the gradient.
167 | If `jac` is a Boolean and is True, `fun` is assumed to return the
168 | gradient along with the objective function. If False, the gradient
169 | will be estimated using '2-point' finite difference estimation.
170 |
171 | step: str or callable, optional
172 | Step-size strategy to use. Should be one of
173 |
174 | - "backtracking", will use the backtracking line-search from [PANJ2020]_
175 |
176 | - "DR", will use the Demyanov-Rubinov step-size. This step-size minimizes a quadratic upper bound ob the objective using the gradient's lipschitz constant, passed in keyword argument `lipschitz`. [P2018]_
177 |
178 | - "sublinear", will use a decreasing step-size of the form 2/(k+2). [J2013]_
179 |
180 | - callable, if step is a callable function, it will use the step-size returned by step(locals).
181 |
182 | lipschitz: None or float, optional
183 | Estimate for the Lipschitz constant of the gradient. Required when step="DR".
184 |
185 | max_iter: integer, optional
186 | Maximum number of iterations.
187 |
188 | tol: float, optional
189 | Tolerance of the stopping criterion. The algorithm will stop whenever
190 | the Frank-Wolfe gap is below tol or the maximum number of iterations
191 | is exceeded.
192 |
193 | callback: callable, optional
194 | Callback to execute at each iteration. If the callable returns False
195 | then the algorithm with immediately return.
196 |
197 | eps: float or ndarray
198 | If jac is approximated, use this value for the step size.
199 |
200 | verbose: int, optional
201 | Verbosity level.
202 |
203 |
204 | Returns:
205 | scipy.optimize.OptimizeResult
206 | The optimization result represented as a
207 | ``scipy.optimize.OptimizeResult`` object. Important attributes are:
208 | ``x`` the solution array, ``success`` a Boolean flag indicating if
209 | the optimizer exited successfully and ``message`` which describes
210 | the cause of the termination. See `scipy.optimize.OptimizeResult`
211 | for a description of other attributes.
212 |
213 |
214 | References:
215 |
216 | .. [J2013] Jaggi, Martin. `"Revisiting Frank-Wolfe: Projection-Free Sparse Convex Optimization." `_ ICML 2013.
217 |
218 | .. [P2018] Pedregosa, Fabian `"Notes on the Frank-Wolfe Algorithm" `_, 2018
219 |
220 | .. [PANJ2020] Pedregosa, Fabian, Armin Askari, Geoffrey Negiar, and Martin Jaggi. `"Step-Size Adaptivity in Projection-Free Optimization." `_ arXiv:1806.05123 (2020).
221 |
222 |
223 | Examples:
224 | * :ref:`sphx_glr_auto_examples_frank_wolfe_plot_sparse_benchmark.py`
225 | * :ref:`sphx_glr_auto_examples_frank_wolfe_plot_vertex_overlap.py`
226 | """
227 | x0 = np.asanyarray(x0, dtype=float)
228 | if tol < 0:
229 | raise ValueError("Tol must be non-negative")
230 | x = x0.copy()
231 |
232 | if variant == 'vanilla':
233 | active_set = None
234 | elif variant == 'pairwise':
235 | active_set = defaultdict(float)
236 | active_set[x0_rep] = 1.
237 |
238 | else:
239 | raise ValueError("Variant must be one of {'vanilla', 'pairwise'}.")
240 |
241 | lipschitz_t = None
242 | step_size = None
243 | if lipschitz is not None:
244 | lipschitz_t = lipschitz
245 |
246 | func_and_grad = utils.build_func_grad(jac, fun, args, eps)
247 |
248 | f_t, grad = func_and_grad(x)
249 | old_f_t = None
250 |
251 | for it in range(max_iter):
252 | update_direction, fw_vertex_rep, away_vertex_rep, max_step_size = lmo(-grad, x, active_set)
253 | norm_update_direction = linalg.norm(update_direction) ** 2
254 | certificate = np.dot(update_direction, -grad)
255 |
256 | # .. compute an initial estimate for the ..
257 | # .. Lipschitz estimate if not given ...
258 | if lipschitz_t is None:
259 | eps = 1e-3
260 | grad_eps = func_and_grad(x + eps * update_direction)[1]
261 | lipschitz_t = linalg.norm(grad - grad_eps) / (
262 | eps * np.sqrt(norm_update_direction)
263 | )
264 | print("Estimated L_t = %s" % lipschitz_t)
265 |
266 | if certificate <= tol:
267 | break
268 | if hasattr(step, "__call__"):
269 | step_size = step(locals())
270 | f_next, grad_next = func_and_grad(x + step_size * update_direction)
271 | elif step == "backtracking":
272 | step_size, lipschitz_t, f_next, grad_next = backtracking_step_size(
273 | x,
274 | f_t,
275 | old_f_t,
276 | func_and_grad,
277 | certificate,
278 | lipschitz_t,
279 | max_step_size,
280 | update_direction,
281 | norm_update_direction,
282 | )
283 | elif step == "DR":
284 | if lipschitz is None:
285 | raise ValueError('lipschitz needs to be specified with step="DR"')
286 | step_size = min(
287 | certificate / (norm_update_direction * lipschitz_t), max_step_size
288 | )
289 | f_next, grad_next = func_and_grad(x + step_size * update_direction)
290 | elif step == "sublinear":
291 | # .. without knowledge of the Lipschitz constant ..
292 | # .. we take the sublinear 2/(k+2) step-size ..
293 | step_size = 2.0 / (it + 2)
294 | f_next, grad_next = func_and_grad(x + step_size * update_direction)
295 | else:
296 | raise ValueError("Invalid option step=%s" % step)
297 | if callback is not None:
298 | if callback(locals()) is False: # pylint: disable=g-bool-id-comparison
299 | break
300 | x += step_size * update_direction
301 | if variant == 'pairwise':
302 | update_active_set(active_set, fw_vertex_rep, away_vertex_rep,
303 | step_size)
304 | old_f_t = f_t
305 | f_t, grad = f_next, grad_next
306 | if callback is not None:
307 | callback(locals())
308 | return optimize.OptimizeResult(x=x, nit=it, certificate=certificate,
309 | active_set=active_set)
310 |
--------------------------------------------------------------------------------
/copt/loss.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from scipy import sparse, special
3 | from scipy.sparse import linalg as splinalg
4 | from sklearn.utils.extmath import safe_sparse_dot
5 |
6 | from copt.utils import safe_sparse_add, njit, prange
7 |
8 |
9 | class LogLoss:
10 | r"""Logistic loss function.
11 |
12 | The logistic loss function is defined as
13 |
14 | .. math::
15 | -\frac{1}{n}\sum_{i=1}^n b_i \log(\sigma(\bs{a}_i^T \bs{x}))
16 | + (1 - b_i) \log(1 - \sigma(\bs{a}_i^T \bs{x}))
17 |
18 | where :math:`\sigma` is the sigmoid function
19 | :math:`\sigma(t) = 1/(1 + e^{-t})`.
20 |
21 | The input vector b verifies :math:`0 \leq b_i \leq 1`. When it comes from
22 | class labels, it should have the values 0 or 1.
23 |
24 | References:
25 | http://fa.bianp.net/blog/2019/evaluate_logistic/
26 | """
27 |
28 | def __init__(self, A, b, alpha=0.0):
29 | if A is None:
30 | A = sparse.eye(b.size, b.size, format="csr")
31 | self.A = A
32 | if np.max(b) > 1 or np.min(b) < 0:
33 | raise ValueError("b can only contain values between 0 and 1 ")
34 | if not A.shape[0] == b.size:
35 | raise ValueError("Dimensions of A and b do not coincide")
36 | self.b = b
37 | self.alpha = alpha
38 | self.intercept = False
39 |
40 | def __call__(self, x):
41 | return self.f_grad(x, return_gradient=False)
42 |
43 | def _sigma(self, z, idx):
44 | z0 = np.zeros_like(z)
45 | tmp = np.exp(-z[idx])
46 | z0[idx] = 1 / (1 + tmp)
47 | tmp = np.exp(z[~idx])
48 | z0[~idx] = tmp / (1 + tmp)
49 | return z0
50 |
51 | def logsig(self, x):
52 | """Compute log(1 / (1 + exp(-t))) component-wise."""
53 | out = np.zeros_like(x)
54 | idx0 = x < -33
55 | out[idx0] = x[idx0]
56 | idx1 = (x >= -33) & (x < -18)
57 | out[idx1] = x[idx1] - np.exp(x[idx1])
58 | idx2 = (x >= -18) & (x < 37)
59 | out[idx2] = -np.log1p(np.exp(-x[idx2]))
60 | idx3 = x >= 37
61 | out[idx3] = -np.exp(-x[idx3])
62 | return out
63 |
64 | def expit_b(self, x, b):
65 | """Compute sigmoid(x) - b."""
66 | idx = x < 0
67 | out = np.zeros_like(x)
68 | exp_x = np.exp(x[idx])
69 | b_idx = b[idx]
70 | out[idx] = ((1 - b_idx) * exp_x - b_idx) / (1 + exp_x)
71 | exp_nx = np.exp(-x[~idx])
72 | b_nidx = b[~idx]
73 | out[~idx] = ((1 - b_nidx) - b_nidx * exp_nx) / (1 + exp_nx)
74 | return out
75 |
76 | def f_grad(self, x, return_gradient=True):
77 | if self.intercept:
78 | x_, c = x[:-1], x[-1]
79 | else:
80 | x_, c = x, 0.0
81 | z = safe_sparse_dot(self.A, x_, dense_output=True).ravel() + c
82 | loss = np.mean((1 - self.b) * z - self.logsig(z))
83 | penalty = safe_sparse_dot(x_.T, x_, dense_output=True).ravel()[0]
84 | loss += 0.5 * self.alpha * penalty
85 |
86 | if not return_gradient:
87 | return loss
88 |
89 | z0_b = self.expit_b(z, self.b)
90 |
91 | grad = safe_sparse_add(self.A.T.dot(z0_b) / self.A.shape[0], self.alpha * x_)
92 | grad = np.asarray(grad).ravel()
93 | grad_c = z0_b.mean()
94 | if self.intercept:
95 | return np.concatenate((grad, [grad_c]))
96 |
97 | return loss, grad
98 |
99 | def hessian_mv(self, x):
100 | """Return a callable that returns matrix-vector products with the Hessian."""
101 |
102 | n_samples, n_features = self.A.shape
103 | if self.intercept:
104 | x_, c = x[:-1], x[-1]
105 | else:
106 | x_, c = x, 0.0
107 |
108 | z = special.expit(safe_sparse_dot(self.A, x_, dense_output=True).ravel() + c)
109 |
110 | # The mat-vec product of the Hessian
111 | d = z * (1 - z)
112 | if sparse.issparse(self.A):
113 | dX = safe_sparse_dot(
114 | sparse.dia_matrix((d, 0), shape=(n_samples, n_samples)), self.A
115 | )
116 | else:
117 | # Precompute as much as possible
118 | dX = d[:, np.newaxis] * self.A
119 |
120 | if self.intercept:
121 | # Calculate the double derivative with respect to intercept
122 | # In the case of sparse matrices this returns a matrix object.
123 | dd_intercept = np.squeeze(np.array(dX.sum(axis=0)))
124 |
125 | def _Hs(s):
126 | ret = np.empty_like(s)
127 | ret[:n_features] = self.A.T.dot(dX.dot(s[:n_features]))
128 | ret[:n_features] += self.alpha * s[:n_features]
129 |
130 | # For the fit intercept case.
131 | if self.intercept:
132 | ret[:n_features] += s[-1] * dd_intercept
133 | ret[-1] = dd_intercept.dot(s[:n_features])
134 | ret[-1] += d.sum() * s[-1]
135 | return ret / n_samples
136 |
137 | return _Hs
138 |
139 | def hessian_trace(self, x):
140 | """Return a callable that returns matrix-vector products with the Hessian."""
141 |
142 | n_samples, n_features = self.A.shape
143 | if self.intercept:
144 | x_, c = x[:-1], x[-1]
145 | else:
146 | x_, c = x, 0.0
147 |
148 | z = special.expit(safe_sparse_dot(self.A, x_, dense_output=True).ravel() + c)
149 |
150 | # The mat-vec product of the Hessian
151 | d = z * (1 - z)
152 | if sparse.issparse(self.A):
153 | dX = safe_sparse_dot(
154 | sparse.dia_matrix((d, 0), shape=(n_samples, n_samples)), self.A
155 | )
156 | else:
157 | # Precompute as much as possible
158 | dX = d[:, np.newaxis] * self.A
159 |
160 | if self.intercept:
161 | # Calculate the double derivative with respect to intercept
162 | # In the case of sparse matrices this returns a matrix object.
163 | dd_intercept = np.squeeze(np.array(dX.sum(axis=0)))
164 |
165 | def _Hs(s):
166 | ret = np.empty_like(s)
167 | ret[:n_features] = self.A.T.dot(dX.dot(s[:n_features]))
168 | ret[:n_features] += self.alpha * s[:n_features]
169 |
170 | # For the fit intercept case.
171 | if self.intercept:
172 | ret[:n_features] += s[-1] * dd_intercept
173 | ret[-1] = dd_intercept.dot(s[:n_features])
174 | ret[-1] += d.sum() * s[-1]
175 | return ret / n_samples
176 |
177 | return _Hs
178 |
179 | @property
180 | def partial_deriv(self):
181 | """Note: this will ignore the regularization parameter alpha"""
182 | @njit(parallel=True)
183 | def log_deriv(p, y):
184 | # derivative of logistic loss
185 | # same as in lightning (with minus sign)
186 | out = np.zeros_like(p)
187 | for i in prange(p.size):
188 | if p[i] < 0:
189 | exp_p = np.exp(p[i])
190 | out[i] = ((1 - y[i]) * exp_p - y[i]) / (1 + exp_p)
191 | else:
192 | exp_nx = np.exp(-p[i])
193 | out[i] = ((1 - y[i]) - y[i] * exp_nx) / (1 + exp_nx)
194 | return out
195 |
196 | return log_deriv
197 |
198 | @property
199 | def lipschitz(self):
200 | s = splinalg.svds(self.A, k=1, return_singular_vectors=False)[0]
201 | return 0.25 * (s * s) / self.A.shape[0] + self.alpha
202 |
203 | @property
204 | def max_lipschitz(self):
205 | from sklearn.utils.extmath import row_norms
206 |
207 | max_squared_sum = row_norms(self.A, squared=True).max()
208 |
209 | return 0.25 * max_squared_sum + self.alpha
210 |
211 |
212 | class SquareLoss:
213 | r"""Squared loss.
214 |
215 | The Squared loss is defined as
216 |
217 | .. math::
218 | \frac{1}{2n}\|A x - b\|^2 + \frac{1}{2} \alpha \|x\|^2
219 |
220 | where :math:`\|\cdot\|` is the euclidean norm.
221 | """
222 |
223 | def __init__(self, A, b, alpha=0):
224 | if A is None:
225 | A = sparse.eye(b.size, b.size, format="csr")
226 | self.b = b
227 | self.alpha = alpha
228 | self.A = A
229 | self.name = "square"
230 |
231 | def __call__(self, x):
232 | z = safe_sparse_dot(self.A, x, dense_output=True).ravel() - self.b
233 | pen = self.alpha * safe_sparse_dot(x.T, x, dense_output=True).ravel()[0]
234 | return 0.5 * (z * z).mean() + 0.5 * pen
235 |
236 | def f_grad(self, x, return_gradient=True):
237 | z = safe_sparse_dot(self.A, x, dense_output=True).ravel() - self.b
238 | pen = self.alpha * safe_sparse_dot(x.T, x, dense_output=True).ravel()[0]
239 | loss = 0.5 * (z * z).mean() + 0.5 * pen
240 | if not return_gradient:
241 | return loss
242 | grad = safe_sparse_add(self.A.T.dot(z) / self.A.shape[0], self.alpha * x.T)
243 | return loss, np.asarray(grad).ravel()
244 |
245 | @property
246 | def partial_deriv(self):
247 | @njit
248 | def square_deriv(p, y):
249 | return p - y
250 | return square_deriv
251 |
252 | @property
253 | def lipschitz(self):
254 | s = splinalg.svds(self.A, k=1, return_singular_vectors=False)[0]
255 | return (s * s) / self.A.shape[0] + self.alpha
256 |
257 | @property
258 | def max_lipschitz(self):
259 | from sklearn.utils.extmath import row_norms
260 |
261 | max_squared_sum = row_norms(self.A, squared=True).max()
262 |
263 | return max_squared_sum + self.alpha
264 |
265 |
266 | class HuberLoss:
267 | """Huber loss"""
268 |
269 | def __init__(self, A, b, alpha=0, delta=1):
270 | self.delta = delta
271 | self.A = A
272 | self.b = b
273 | self.alpha = alpha
274 | self.name = "huber"
275 |
276 | def __call__(self, x):
277 | return self.f_grad(x, return_gradient=False)
278 |
279 | def f_grad(self, x, return_gradient=True):
280 | z = safe_sparse_dot(self.A, x, dense_output=True).ravel() - self.b
281 | idx = np.abs(z) < self.delta
282 | loss = 0.5 * np.sum(z[idx] * z[idx])
283 | loss += np.sum(self.delta * (np.abs(z[~idx]) - 0.5 * self.delta))
284 | loss = (
285 | loss / z.size
286 | + 0.5 * self.alpha * safe_sparse_dot(x.T, x, dense_output=True).ravel()[0]
287 | )
288 | if not return_gradient:
289 | return loss
290 | grad = self.A[idx].T.dot(z[idx]) / self.A.shape[0] + self.alpha * x.T
291 | grad = np.asarray(grad)
292 | grad += self.A[~idx].T.dot(self.delta * np.sign(z[~idx])) / self.A.shape[0]
293 | return loss, np.asarray(grad).ravel()
294 |
295 | @property
296 | def lipschitz(self):
297 | s = splinalg.svds(self.A, k=1, return_singular_vectors=False)[0]
298 | return (s * s) / self.A.shape[0] + self.alpha
299 |
--------------------------------------------------------------------------------
/copt/penalty.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from scipy import sparse, linalg
3 |
4 | from copt.utils import njit
5 |
6 |
7 | class L1Norm:
8 | """L1 norm, that is, the sum of absolute values:
9 |
10 | .. math::
11 | \\alpha\\sum_i^d |x_i|
12 |
13 | Args:
14 | alpha: float
15 | constant multiplying the L1 norm
16 |
17 | """
18 |
19 | def __init__(self, alpha):
20 | self.alpha = alpha
21 |
22 | def __call__(self, x):
23 | return self.alpha * np.abs(x).sum()
24 |
25 | def prox(self, x, step_size):
26 | """Proximal operator of the L1 norm.
27 |
28 | This routine can be used in gradient-based methods like
29 | minimize_proximal_gradient, minimize_three_split and
30 | minimize_primal_dual.
31 | """
32 | return np.fmax(x - self.alpha * step_size, 0) - np.fmax(
33 | -x - self.alpha * step_size, 0
34 | )
35 |
36 | def prox_factory(self, n_features):
37 | """Proximal operator of the L1 norm.
38 |
39 | This method is meant to be used with stochastic algorithms that need
40 | access to a proximal operator over a potentially sparse vector,
41 | like minimize_saga, minimize_svrg and minimize_vrtos
42 | """
43 | alpha = self.alpha
44 |
45 | @njit
46 | def _prox_L1(x, i, indices, indptr, d, step_size):
47 | for j in range(indptr[i], indptr[i + 1]):
48 | j_idx = indices[j] # for L1 this is the same
49 | a = x[j_idx] - alpha * d[j_idx] * step_size
50 | b = -x[j_idx] - alpha * d[j_idx] * step_size
51 | x[j_idx] = np.fmax(a, 0) - np.fmax(b, 0)
52 |
53 | return _prox_L1, sparse.eye(n_features, format="csr")
54 |
55 |
56 | class GroupL1:
57 | """
58 | Group Lasso penalty
59 |
60 | Args:
61 | alpha: float
62 | Constant multiplying this loss
63 |
64 | blocks: list of lists
65 |
66 | """
67 |
68 | def __init__(self, alpha, groups):
69 | self.alpha = alpha
70 | # groups need to be increasing
71 | for i, g in enumerate(groups):
72 | if not np.all(np.diff(g) == 1):
73 | raise ValueError("Groups must be contiguous")
74 | if i > 0 and groups[i - 1][-1] >= g[0]:
75 | raise ValueError("Groups must be increasing")
76 | self.groups = groups
77 |
78 | def __call__(self, x):
79 | return self.alpha * np.sum([np.linalg.norm(x[g]) for g in self.groups])
80 |
81 | def prox(self, x, step_size):
82 | out = x.copy()
83 | for g in self.groups:
84 |
85 | norm = np.linalg.norm(x[g])
86 | if norm > self.alpha * step_size:
87 | out[g] -= step_size * self.alpha * out[g] / norm
88 | else:
89 | out[g] = 0
90 | return out
91 |
92 | def prox_factory(self, n_features):
93 | B_data = np.zeros(n_features)
94 | B_indices = np.arange(n_features, dtype=np.int32)
95 | B_indptr = np.zeros(n_features + 1, dtype=np.int32)
96 |
97 | feature_pointer = 0
98 | block_pointer = 0
99 | for g in self.groups:
100 | while feature_pointer < g[0]:
101 | # non-penalized feature
102 | B_data[feature_pointer] = -1.0
103 | B_indptr[block_pointer + 1] = B_indptr[block_pointer] + 1
104 | feature_pointer += 1
105 | block_pointer += 1
106 | B_indptr[block_pointer + 1] = B_indptr[block_pointer]
107 | for _ in g:
108 | B_data[feature_pointer] = 1.0
109 | B_indptr[block_pointer + 1] += 1
110 | feature_pointer += 1
111 | block_pointer += 1
112 | for _ in range(feature_pointer, n_features):
113 | B_data[feature_pointer] = -1.0
114 | B_indptr[block_pointer + 1] = B_indptr[block_pointer] + 1
115 | feature_pointer += 1
116 | block_pointer += 1
117 |
118 | B_indptr = B_indptr[: block_pointer + 1]
119 | B = sparse.csr_matrix((B_data, B_indices, B_indptr))
120 | alpha = self.alpha
121 |
122 | @njit
123 | def _prox_gl(x, i, indices, indptr, d, step_size):
124 | for b in range(indptr[i], indptr[i + 1]):
125 | h = indices[b]
126 | if B_data[B_indices[B_indptr[h]]] <= 0:
127 | continue
128 | ss = step_size * d[h]
129 | norm = 0.0
130 | for j in range(B_indptr[h], B_indptr[h + 1]):
131 | j_idx = B_indices[j]
132 | norm += x[j_idx] ** 2
133 | norm = np.sqrt(norm)
134 | if norm > alpha * ss:
135 | for j in range(B_indptr[h], B_indptr[h + 1]):
136 | j_idx = B_indices[j]
137 | x[j_idx] *= 1 - alpha * ss / norm
138 | else:
139 | for j in range(B_indptr[h], B_indptr[h + 1]):
140 | j_idx = B_indices[j]
141 | x[j_idx] = 0.0
142 |
143 | return _prox_gl, B
144 |
145 |
146 | class FusedLasso:
147 | """
148 | Fused Lasso penalty
149 |
150 | Args:
151 | alpha: float
152 | Constant multiplying this function.
153 | """
154 |
155 | def __init__(self, alpha):
156 | self.alpha = alpha
157 |
158 | def __call__(self, x):
159 | return self.alpha * np.sum(np.abs(np.diff(x)))
160 |
161 | def prox(self, x, step_size):
162 | # imported here to avoid circular imports
163 | from copt import tv_prox
164 |
165 | return tv_prox.prox_tv1d(x, step_size * self.alpha)
166 |
167 | def prox_1_factory(self, n_features):
168 | B_1_data = np.ones(n_features)
169 | B_1_indices = np.arange(n_features, dtype=np.int32)
170 | B_1_indptr = np.arange(0, n_features + 1, 2, dtype=np.int32)
171 | if n_features % 2 == 1:
172 | B_1_indptr = np.concatenate((B_1_indptr, [B_1_indptr[-1] + 1]))
173 | B_1_data[-1] = -1
174 | n_blocks = (n_features + 1) // 2
175 | B_1 = sparse.csr_matrix(
176 | (B_1_data, B_1_indices, B_1_indptr), shape=(n_blocks, n_features)
177 | )
178 | alpha = self.alpha
179 |
180 | @njit
181 | def _prox_1_fl(x, i, indices, indptr, d, step_size):
182 | for b in range(indptr[i], indptr[i + 1]):
183 | h = indices[b]
184 | j_idx = B_1_indices[B_1_indptr[h]]
185 | if B_1_data[j_idx] <= 0:
186 | continue
187 | ss = step_size * d[h] * alpha
188 | if x[j_idx] - ss >= x[j_idx + 1] + ss:
189 | x[j_idx] -= ss
190 | x[j_idx + 1] += ss
191 | elif x[j_idx] + ss <= x[j_idx + 1] - ss:
192 | x[j_idx] += ss
193 | x[j_idx + 1] -= ss
194 | else:
195 | avg = (x[j_idx] + x[j_idx + 1]) / 2.0
196 | x[j_idx] = avg
197 | x[j_idx + 1] = avg
198 |
199 | return _prox_1_fl, B_1
200 |
201 | def prox_2_factory(self, n_features):
202 | B_2_data = np.ones(n_features)
203 | B_2_indices = np.arange(n_features, dtype=np.int32)
204 | _indptr = np.arange(1, n_features + 2, 2, dtype=np.int32)
205 | B_2_indptr = np.concatenate(([0], _indptr))
206 | B_2_data[0] = -1
207 | if n_features % 2 == 0:
208 | B_2_indptr[-1] -= 1
209 | B_2_data[-1] = -1
210 | n_blocks = n_features // 2 + 1
211 | B_2 = sparse.csr_matrix(
212 | (B_2_data, B_2_indices, B_2_indptr), shape=(n_blocks, n_features)
213 | )
214 | alpha = self.alpha
215 |
216 | @njit
217 | def _prox_2_fl(x, i, indices, indptr, d, step_size):
218 | for b in range(indptr[i], indptr[i + 1]):
219 | h = indices[b]
220 | j_idx = B_2_indices[B_2_indptr[h]]
221 | if B_2_data[j_idx] <= 0:
222 | continue
223 | ss = step_size * d[h] * alpha
224 | if x[j_idx] - ss >= x[j_idx + 1] + ss:
225 | x[j_idx] -= ss
226 | x[j_idx + 1] += ss
227 | elif x[j_idx] + ss <= x[j_idx + 1] - ss:
228 | x[j_idx] += ss
229 | x[j_idx + 1] -= ss
230 | else:
231 | avg = (x[j_idx] + x[j_idx + 1]) / 2.0
232 | x[j_idx] = avg
233 | x[j_idx + 1] = avg
234 |
235 | return _prox_2_fl, B_2
236 |
237 |
238 | class TraceNorm:
239 | """Trace (aka nuclear) norm, sum of singular values.
240 |
241 | Args:
242 | alpha: float
243 | Constant multiplying this function.
244 | shape: float
245 | Shape of original matrix, since input is given as
246 | a raveled vector.
247 | """
248 |
249 | is_separable = False
250 |
251 | def __init__(self, alpha, shape):
252 | assert len(shape) == 2
253 | self.shape = shape
254 | self.alpha = alpha
255 |
256 | def __call__(self, x):
257 | X = x.reshape(self.shape)
258 | return self.alpha * linalg.svdvals(X).sum()
259 |
260 | def prox(self, x, step_size):
261 | X = x.reshape(self.shape)
262 | U, s, Vt = linalg.svd(X, full_matrices=False)
263 | s_threshold = np.fmax(s - self.alpha * step_size, 0) - np.fmax(
264 | -s - self.alpha * step_size, 0
265 | )
266 | return (U * s_threshold).dot(Vt).ravel()
267 |
268 | def prox_factory(self):
269 | raise NotImplementedError
270 |
271 |
272 | class TotalVariation2D:
273 | """2-dimensional Total Variation pseudo-norm.
274 |
275 | Args:
276 | alpha: float
277 | Constant multiplying this function.
278 | shape: float
279 | Shape of original matrix, since input is given as
280 | a raveled vector.
281 | """
282 |
283 | def __init__(self, alpha, shape, max_iter=100, tol=1e-6):
284 | self.alpha = alpha
285 | self.n_rows = shape[0]
286 | self.n_cols = shape[1]
287 | self.max_iter = max_iter
288 | self.tol = tol
289 |
290 | def __call__(self, x):
291 | img = x.reshape((self.n_rows, self.n_cols))
292 | tmp1 = np.abs(np.diff(img, axis=0))
293 | tmp2 = np.abs(np.diff(img, axis=1))
294 | return self.alpha * (tmp1.sum() + tmp2.sum())
295 |
296 | def prox(self, x, step_size):
297 | # here to avoid circular imports
298 | from copt import tv_prox
299 |
300 | return tv_prox.prox_tv2d(
301 | x,
302 | step_size * self.alpha,
303 | self.n_rows,
304 | self.n_cols,
305 | max_iter=self.max_iter,
306 | tol=self.tol,
307 | )
--------------------------------------------------------------------------------
/copt/proximal_gradient.py:
--------------------------------------------------------------------------------
1 | # python3
2 | """Proximal-gradient algorithms."""
3 | import warnings
4 | import numpy as np
5 | from scipy import optimize
6 | from copt import utils
7 |
8 |
9 | def minimize_proximal_gradient(
10 | fun,
11 | x0,
12 | prox=None,
13 | jac="2-point",
14 | tol=1e-6,
15 | max_iter=500,
16 | args=(),
17 | verbose=0,
18 | callback=None,
19 | step="backtracking",
20 | accelerated=False,
21 | eps=1e-8,
22 | max_iter_backtracking=1000,
23 | backtracking_factor=0.6,
24 | trace_certificate=False,
25 | ):
26 | """Proximal gradient descent.
27 |
28 | Solves problems of the form
29 |
30 | minimize_x f(x) + g(x)
31 |
32 | where f is a differentiable function and we have access to the proximal
33 | operator of g.
34 |
35 | Args:
36 | fun : callable
37 | The objective function to be minimized.
38 | ``fun(x, *args) -> float``
39 | where x is an 1-D array with shape (n,) and `args`
40 | is a tuple of the fixed parameters needed to completely
41 | specify the function.
42 |
43 | x0 : ndarray, shape (n,)
44 | Initial guess. Array of real elements of size (n,),
45 | where 'n' is the number of independent variables.
46 |
47 | jac : {callable, '2-point', bool}, optional
48 | Method for computing the gradient vector. If it is a callable,
49 | it should be a function that returns the gradient vector:
50 | ``jac(x, *args) -> array_like, shape (n,)``
51 | where x is an array with shape (n,) and `args` is a tuple with
52 | the fixed parameters. Alternatively, the '2-point' select a finite
53 | difference scheme for numerical estimation of the gradient.
54 | If `jac` is a Boolean and is True, `fun` is assumed to return the
55 | gradient along with the objective function. If False, the gradient
56 | will be estimated using '2-point' finite difference estimation.
57 |
58 | prox : callable, optional.
59 | Proximal operator g.
60 |
61 | args : tuple, optional
62 | Extra arguments passed to the objective function and its
63 | derivatives.
64 |
65 | tol: float, optional
66 | Tolerance of the optimization procedure. The iteration stops when the gradient mapping
67 | (a generalization of the gradient to non-smooth functions) is below this tolerance.
68 |
69 | max_iter : int, optional.
70 | Maximum number of iterations.
71 |
72 | verbose : int, optional.
73 | Verbosity level, from 0 (no output) to 2 (output on each iteration)
74 |
75 | callback : callable.
76 | callback function (optional). Takes a single argument (x) with the
77 | current coefficients in the algorithm. The algorithm will exit if
78 | callback returns False.
79 |
80 | step : "backtracking" or callable.
81 | Step-size strategy to use. "backtracking" will use a backtracking line-search,
82 | while callable will use the value returned by step(locals()).
83 |
84 | accelerated: boolean
85 | Whether to use the accelerated variant of the algorithm.
86 |
87 | eps: float or ndarray
88 | If jac is approximated, use this value for the step size.
89 |
90 | max_iter_backtracking: int
91 |
92 | backtracking_factor: float
93 |
94 | trace_certificate: bool
95 |
96 | Returns:
97 | res : The optimization result represented as a
98 | ``scipy.optimize.OptimizeResult`` object. Important attributes are:
99 | ``x`` the solution array, ``success`` a Boolean flag indicating if
100 | the optimizer exited successfully and ``message`` which describes
101 | the cause of the termination. See `scipy.optimize.OptimizeResult`
102 | for a description of other attributes.
103 |
104 | References:
105 | Beck, Amir, and Marc Teboulle. "Gradient-based algorithms with applications
106 | to signal recovery." Convex optimization in signal processing and
107 | communications (2009)
108 |
109 | Examples:
110 | * :ref:`sphx_glr_auto_examples_plot_group_lasso.py`
111 | """
112 | x = np.asarray(x0).flatten()
113 | if max_iter_backtracking <= 0:
114 | raise ValueError("Line search iterations need to be greater than 0")
115 |
116 | if prox is None:
117 |
118 | def _prox(x, _):
119 | return x
120 |
121 | prox = _prox
122 |
123 | success = False
124 | certificate = np.nan
125 |
126 | func_and_grad = utils.build_func_grad(jac, fun, args, eps)
127 |
128 | # find initial step-size
129 | if step == "backtracking":
130 | step_size = 1.8 / utils.init_lipschitz(func_and_grad, x0)
131 | else:
132 | # to avoid step_size being undefined upon return
133 | step_size = None
134 |
135 | n_iterations = 0
136 | certificate_list = []
137 | # .. a while loop instead of a for loop ..
138 | # .. allows for infinite or floating point max_iter ..
139 | if not accelerated:
140 | fk, grad_fk = func_and_grad(x)
141 | while True:
142 | if callback is not None:
143 | if callback(locals()) is False: # pylint: disable=g-bool-id-comparison
144 | break
145 | # .. compute gradient and step size
146 | if hasattr(step, "__call__"):
147 | step_size = step(locals())
148 | x_next = prox(x - step_size * grad_fk, step_size)
149 | update_direction = x_next - x
150 | f_next, grad_next = func_and_grad(x_next)
151 | elif step == "backtracking":
152 | x_next = prox(x - step_size * grad_fk, step_size)
153 | update_direction = x_next - x
154 | step_size *= 1.1
155 | for _ in range(max_iter_backtracking):
156 | f_next, grad_next = func_and_grad(x_next)
157 | rhs = (
158 | fk
159 | + grad_fk.dot(update_direction)
160 | + update_direction.dot(update_direction) / (2.0 * step_size)
161 | )
162 | if f_next <= rhs:
163 | # .. step size found ..
164 | break
165 | else:
166 | # .. backtracking, reduce step size ..
167 | step_size *= backtracking_factor
168 | x_next = prox(x - step_size * grad_fk, step_size)
169 | update_direction = x_next - x
170 | else:
171 | warnings.warn("Maxium number of line-search iterations reached")
172 | elif step == "fixed":
173 | x_next = prox(x - step_size * grad_fk, step_size)
174 | update_direction = x_next - x
175 | f_next, grad_next = func_and_grad(x_next)
176 | else:
177 | raise ValueError("Step-size strategy not understood")
178 | certificate = np.linalg.norm((x - x_next) / step_size)
179 | if trace_certificate:
180 | certificate_list.append(certificate)
181 | x[:] = x_next
182 | fk = f_next
183 | grad_fk = grad_next
184 |
185 | if certificate < tol:
186 | success = True
187 | break
188 |
189 | if n_iterations >= max_iter:
190 | break
191 | else:
192 | n_iterations += 1
193 | else:
194 | warnings.warn(
195 | "minimize_proximal_gradient did not reach the desired tolerance level",
196 | RuntimeWarning,
197 | )
198 | else:
199 | tk = 1
200 | # .. a while loop instead of a for loop ..
201 | # .. allows for infinite or floating point max_iter ..
202 | yk = x.copy()
203 | while True:
204 | grad_fk = func_and_grad(yk)[1]
205 | if callback is not None:
206 | if callback(locals()) is False: # pylint: disable=g-bool-id-comparison
207 | break
208 |
209 | # .. compute gradient and step size
210 | if hasattr(step, "__call__"):
211 | current_step_size = step(locals())
212 | x_next = prox(yk - current_step_size * grad_fk, current_step_size)
213 | t_next = (1 + np.sqrt(1 + 4 * tk * tk)) / 2
214 | yk = x_next + ((tk - 1.0) / t_next) * (x_next - x)
215 |
216 | t_next = (1 + np.sqrt(1 + 4 * tk * tk)) / 2
217 | yk = x_next + ((tk - 1.0) / t_next) * (x_next - x)
218 |
219 | x_prox = prox(
220 | x_next - current_step_size * func_and_grad(x_next)[1],
221 | current_step_size,
222 | )
223 | certificate = np.linalg.norm((x - x_prox) / current_step_size)
224 | tk = t_next
225 | x = x_next.copy()
226 |
227 | elif step == "backtracking":
228 | current_step_size = step_size
229 | x_next = prox(yk - current_step_size * grad_fk, current_step_size)
230 | for _ in range(max_iter_backtracking):
231 | update_direction = x_next - yk
232 | if func_and_grad(x_next)[0] <= func_and_grad(yk)[0] + grad_fk.dot(
233 | update_direction
234 | ) + update_direction.dot(update_direction) / (
235 | 2.0 * current_step_size
236 | ):
237 | # .. step size found ..
238 | break
239 | else:
240 | # .. backtracking, reduce step size ..
241 | current_step_size *= backtracking_factor
242 | x_next = prox(
243 | yk - current_step_size * grad_fk, current_step_size
244 | )
245 | else:
246 | warnings.warn("Maxium number of line-search iterations reached")
247 | t_next = (1 + np.sqrt(1 + 4 * tk * tk)) / 2
248 | yk = x_next + ((tk - 1.0) / t_next) * (x_next - x)
249 |
250 | x_prox = prox(
251 | x_next - current_step_size * func_and_grad(x_next)[1],
252 | current_step_size,
253 | )
254 | certificate = np.linalg.norm((x - x_prox) / current_step_size)
255 | if trace_certificate:
256 | certificate_list.append(certificate)
257 | tk = t_next
258 | x = x_next.copy()
259 |
260 | if certificate < tol:
261 | success = True
262 | break
263 |
264 | if n_iterations >= max_iter:
265 | break
266 | else:
267 | n_iterations += 1
268 |
269 | if n_iterations >= max_iter:
270 | warnings.warn(
271 | "minimize_proximal_gradient did not reach the desired tolerance level",
272 | RuntimeWarning,
273 | )
274 |
275 | return optimize.OptimizeResult(
276 | x=x,
277 | success=success,
278 | certificate=certificate,
279 | nit=n_iterations,
280 | step_size=step_size,
281 | trace_certificate=certificate_list,
282 | )
283 |
284 |
--------------------------------------------------------------------------------
/copt/splitting.py:
--------------------------------------------------------------------------------
1 | import warnings
2 | import numpy as np
3 | from scipy import optimize, linalg, sparse
4 |
5 | from . import utils
6 |
7 |
8 | def minimize_three_split(
9 | f_grad,
10 | x0,
11 | prox_1=None,
12 | prox_2=None,
13 | tol=1e-6,
14 | max_iter=1000,
15 | verbose=0,
16 | callback=None,
17 | line_search=True,
18 | step_size=None,
19 | max_iter_backtracking=100,
20 | backtracking_factor=0.7,
21 | h_Lipschitz=None,
22 | args_prox=(),
23 | ):
24 | """Davis-Yin three operator splitting method.
25 |
26 | This algorithm can solve problems of the form
27 |
28 | minimize_x f(x) + g(x) + h(x)
29 |
30 | where f is a smooth function and g and h are (possibly non-smooth)
31 | functions for which the proximal operator is known.
32 |
33 | Args:
34 | f_grad: callable
35 | Returns the function value and gradient of the objective function.
36 | With return_gradient=False, returns only the function value.
37 |
38 | x0 : array-like
39 | Initial guess
40 |
41 | prox_1 : callable or None, optional
42 | prox_1(x, alpha, *args) returns the proximal operator of g at xa
43 | with parameter alpha.
44 |
45 | prox_2 : callable or None, optional
46 | prox_2(x, alpha, *args) returns the proximal operator of g at xa
47 | with parameter alpha.
48 |
49 | tol: float, optional
50 | Tolerance of the stopping criterion.
51 |
52 | max_iter : int, optional
53 | Maximum number of iterations.
54 |
55 | verbose : int, optional
56 | Verbosity level, from 0 (no output) to 2 (output on each iteration)
57 |
58 | callback : callable, optional
59 | Callback function. Takes a single argument (x) with the
60 | current coefficients in the algorithm. The algorithm will exit if
61 | callback returns False.
62 |
63 | line_search : boolean, optional
64 | Whether to perform line-search to estimate the step size.
65 |
66 | step_size : float, optional
67 | Starting value for the line-search procedure.
68 |
69 | max_iter_backtracking : int, optional
70 | Maximun number of backtracking iterations. Used in line search.
71 |
72 | backtracking_factor : float, optional
73 | The amount to backtrack by during line search.
74 |
75 | args_prox : tuple, optional
76 | Optional Extra arguments passed to the prox functions.
77 |
78 | h_Lipschitz : float, optional
79 | If given, h is assumed to be Lipschitz continuous with constant h_Lipschitz.
80 |
81 |
82 | Returns:
83 | res : OptimizeResult
84 | The optimization result represented as a
85 | ``scipy.optimize.OptimizeResult`` object. Important attributes are:
86 | ``x`` the solution array, ``success`` a Boolean flag indicating if
87 | the optimizer exited successfully and ``message`` which describes
88 | the cause of the termination. See `scipy.optimize.OptimizeResult`
89 | for a description of other attributes.
90 |
91 |
92 | References:
93 | [1] Davis, Damek, and Wotao Yin. `"A three-operator splitting scheme and
94 | its optimization applications."
95 | `_ Set-Valued and Variational
96 | Analysis, 2017.
97 |
98 | [2] Pedregosa, Fabian, and Gauthier Gidel. `"Adaptive Three Operator
99 | Splitting." `_ Proceedings of the 35th
100 | International Conference on Machine Learning, 2018.
101 | """
102 | success = False
103 | if not max_iter_backtracking > 0:
104 | raise ValueError("Line search iterations need to be greater than 0")
105 |
106 | if prox_1 is None:
107 |
108 | def prox_1(x, s, *args):
109 | return x
110 |
111 | if prox_2 is None:
112 |
113 | def prox_2(x, s, *args):
114 | return x
115 |
116 | if step_size is None:
117 | line_search = True
118 | step_size = 1.0 / utils.init_lipschitz(f_grad, x0)
119 |
120 | z = prox_2(x0, step_size, *args_prox)
121 | LS_EPS = np.finfo(float).eps
122 |
123 | fk, grad_fk = f_grad(z)
124 | x = prox_1(z - step_size * grad_fk, step_size, *args_prox)
125 | u = np.zeros_like(x)
126 |
127 | for it in range(max_iter):
128 |
129 | fk, grad_fk = f_grad(z)
130 | x = prox_1(z - step_size * (u + grad_fk), step_size, *args_prox)
131 | incr = x - z
132 | norm_incr = np.linalg.norm(incr)
133 | ls = norm_incr > 1e-7 and line_search
134 | if ls:
135 | for it_ls in range(max_iter_backtracking):
136 | x = prox_1(z - step_size * (u + grad_fk), step_size, *args_prox)
137 | incr = x - z
138 | norm_incr = np.linalg.norm(incr)
139 | rhs = fk + grad_fk.dot(incr) + (norm_incr ** 2) / (2 * step_size)
140 | ls_tol = f_grad(x, return_gradient=False) - rhs
141 | if ls_tol <= LS_EPS:
142 | # step size found
143 | # if ls_tol > 0:
144 | # ls_tol = 0.
145 | break
146 | else:
147 | step_size *= backtracking_factor
148 |
149 | z = prox_2(x + step_size * u, step_size, *args_prox)
150 | u += (x - z) / step_size
151 | certificate = norm_incr / step_size
152 |
153 | if ls and h_Lipschitz is not None:
154 | if h_Lipschitz == 0:
155 | step_size = step_size * 1.02
156 | else:
157 | quot = h_Lipschitz ** 2
158 | tmp = np.sqrt(step_size ** 2 + (2 * step_size / quot) * (-ls_tol))
159 | step_size = min(tmp, step_size * 1.02)
160 |
161 | if callback is not None:
162 | if callback(locals()) is False:
163 | break
164 |
165 | if it > 0 and certificate < tol:
166 | success = True
167 | break
168 |
169 | return optimize.OptimizeResult(
170 | x=x, success=success, nit=it, certificate=certificate, step_size=step_size
171 | )
172 |
173 |
174 | def minimize_primal_dual(
175 | f_grad,
176 | x0,
177 | prox_1=None,
178 | prox_2=None,
179 | L=None,
180 | tol=1e-12,
181 | max_iter=1000,
182 | callback=None,
183 | step_size=1.0,
184 | step_size2=None,
185 | line_search=True,
186 | max_iter_ls=20,
187 | verbose=0,
188 | ):
189 | """Primal-dual hybrid gradient splitting method.
190 |
191 | This method for optimization problems of the form
192 |
193 | minimize_x f(x) + g(x) + h(L x)
194 |
195 | where f is a smooth function and g is a (possibly non-smooth)
196 | function for which the proximal operator is known.
197 |
198 | Args:
199 | f_grad: callable
200 | Returns the function value and gradient of the objective function.
201 | It should accept the optional argument return_gradient, and when False
202 | it should return only the function value.
203 |
204 | prox_1 : callable of the form prox_1(x, alpha)
205 | prox_1(x, alpha, *args) returns the proximal operator of g at x
206 | with parameter alpha.
207 |
208 | prox_2 : callable or None
209 | prox_2(y, alpha, *args) returns the proximal operator of h at y
210 | with parameter alpha.
211 |
212 | x0 : array-like
213 | Initial guess of solution.
214 |
215 | L : array-like or linear operator
216 | Linear operator inside the h term. It may be any of the following types:
217 | - ndarray
218 | - matrix
219 | - sparse matrix (e.g. csr_matrix, lil_matrix, etc.)
220 | - LinearOperator
221 | - An object with .shape and .matvec attributes
222 |
223 | max_iter : int
224 | Maximum number of iterations.
225 |
226 | verbose : int
227 | Verbosity level, from 0 (no output) to 2 (output on each iteration)
228 |
229 | callback : callable.
230 | callback function (optional). Takes a single argument (x) with the
231 | current coefficients in the algorithm. The algorithm will exit if
232 | callback returns False.
233 |
234 | Returns:
235 | res : OptimizeResult
236 | The optimization result represented as a
237 | ``scipy.optimize.OptimizeResult`` object. Important attributes are:
238 | ``x`` the solution array, ``success`` a Boolean flag indicating if
239 | the optimizer exited successfully and ``message`` which describes
240 | the cause of the termination. See `scipy.optimize.OptimizeResult`
241 | for a description of other attributes.
242 |
243 | References:
244 |
245 | * Malitsky, Yura, and Thomas Pock. `A first-order primal-dual algorithm with linesearch `_,
246 | SIAM Journal on Optimization (2018) (Algorithm 4 for the line-search variant)
247 |
248 | * Condat, Laurent. "A primal-dual splitting method for convex optimization
249 | involving Lipschitzian, proximable and linear composite terms." Journal of
250 | Optimization Theory and Applications (2013).
251 | """
252 | x = np.array(x0, copy=True)
253 | n_features = x.size
254 |
255 | if L is None:
256 | L = sparse.eye(n_features, n_features, format="csr")
257 | L = sparse.linalg.aslinearoperator(L)
258 |
259 | y = L.matvec(x)
260 |
261 | success = False
262 | if not max_iter_ls > 0:
263 | raise ValueError("Line search iterations need to be greater than 0")
264 |
265 | if prox_1 is None:
266 |
267 | def prox_1(x, step_size):
268 | return x
269 |
270 | if prox_2 is None:
271 |
272 | def prox_2(x, step_size):
273 | return x
274 |
275 | # conjugate of prox_2
276 | def prox_2_conj(x, ss):
277 | return x - ss * prox_2(x / ss, 1.0 / ss)
278 |
279 | # .. main iteration ..
280 | theta = 1.0
281 | delta = 0.5
282 | sigma = step_size
283 | if step_size2 is None:
284 | ss_ratio = 0.5
285 | tau = ss_ratio * sigma
286 | else:
287 | tau = step_size2
288 | ss_ratio = tau / sigma
289 |
290 | fk, grad_fk = f_grad(x)
291 | norm_incr = np.inf
292 | x_next = x.copy()
293 |
294 | for it in range(max_iter):
295 | y_next = prox_2_conj(y + tau * L.matvec(x), tau)
296 | if line_search:
297 | tau_next = tau * (1 + np.sqrt(1 + theta)) / 2
298 | while True:
299 | theta = tau_next / tau
300 | sigma = ss_ratio * tau_next
301 | y_bar = y_next + theta * (y_next - y)
302 | x_next = prox_1(x - sigma * (L.rmatvec(y_bar) + grad_fk), sigma)
303 | incr_x = np.linalg.norm(L.matvec(x_next) - L.matvec(x))
304 | f_next, f_grad_next = f_grad(x_next)
305 | if incr_x <= 1e-10:
306 | break
307 |
308 | tmp = (sigma * tau_next) * (incr_x ** 2)
309 | tmp += 2 * sigma * (f_next - fk - grad_fk.dot(x_next - x))
310 | if tmp / delta <= (incr_x ** 2):
311 | tau = tau_next
312 | break
313 | else:
314 | tau_next *= 0.9
315 | else:
316 | y_bar = 2 * y_next - y
317 | x_next = prox_1(x - sigma * (L.rmatvec(y_bar) + grad_fk), sigma)
318 | f_next, f_grad_next = f_grad(x_next)
319 |
320 | if it % 100 == 0:
321 | norm_incr = linalg.norm(x_next - x) + linalg.norm(y_next - y)
322 |
323 | x[:] = x_next[:]
324 | y[:] = y_next[:]
325 | fk, grad_fk = f_next, f_grad_next
326 |
327 | if norm_incr < tol:
328 | success = True
329 | break
330 |
331 | if callback is not None:
332 | if callback(locals()) is False:
333 | break
334 |
335 | if it >= max_iter:
336 | warnings.warn(
337 | "proximal_gradient did not reach the desired tolerance level",
338 | RuntimeWarning,
339 | )
340 |
341 | return optimize.OptimizeResult(
342 | x=x, success=success, nit=it, certificate=norm_incr, step_size=sigma
343 | )
344 |
--------------------------------------------------------------------------------
/copt/tv_prox.py:
--------------------------------------------------------------------------------
1 | # Authors: Fabian Pedregosa. Code for total variation is based on the
2 | # code of Laurent Condat
3 | #
4 |
5 | """
6 | These are implementations of some proximal operators
7 | """
8 |
9 | import numpy as np
10 | import warnings
11 | from . import utils
12 |
13 |
14 | def prox_tv1d(w, step_size):
15 | """
16 | Computes the proximal operator of the 1-dimensional total variation operator.
17 |
18 | This solves a problem of the form
19 |
20 | argmin_x TV(x) + (1/(2 stepsize)) ||x - w||^2
21 |
22 | where TV(x) is the one-dimensional total variation
23 |
24 | Parameters
25 | ----------
26 | w: array
27 | vector of coefficients
28 | step_size: float
29 | step size (sometimes denoted gamma) in proximal objective function
30 |
31 | References
32 | ----------
33 | Condat, Laurent. "A direct algorithm for 1D total variation denoising."
34 | IEEE Signal Processing Letters (2013)
35 | """
36 |
37 | if w.dtype not in (np.float32, np.float64):
38 | raise ValueError("argument w must be array of floats")
39 | w = w.copy()
40 | output = np.empty_like(w)
41 | _prox_tv1d(step_size, w, output)
42 | return output
43 |
44 |
45 | @utils.njit
46 | def _prox_tv1d(step_size, input, output):
47 | """low level function call, no checks are performed"""
48 | width = input.size + 1
49 | index_low = np.zeros(width, dtype=np.int32)
50 | slope_low = np.zeros(width, dtype=input.dtype)
51 | index_up = np.zeros(width, dtype=np.int32)
52 | slope_up = np.zeros(width, dtype=input.dtype)
53 | index = np.zeros(width, dtype=np.int32)
54 | z = np.zeros(width, dtype=input.dtype)
55 | y_low = np.empty(width, dtype=input.dtype)
56 | y_up = np.empty(width, dtype=input.dtype)
57 | s_low, c_low, s_up, c_up, c = 0, 0, 0, 0, 0
58 | y_low[0] = y_up[0] = 0
59 | y_low[1] = input[0] - step_size
60 | y_up[1] = input[0] + step_size
61 | incr = 1
62 |
63 | for i in range(2, width):
64 | y_low[i] = y_low[i - 1] + input[(i - 1) * incr]
65 | y_up[i] = y_up[i - 1] + input[(i - 1) * incr]
66 |
67 | y_low[width - 1] += step_size
68 | y_up[width - 1] -= step_size
69 | slope_low[0] = np.inf
70 | slope_up[0] = -np.inf
71 | z[0] = y_low[0]
72 |
73 | for i in range(1, width):
74 | c_low += 1
75 | c_up += 1
76 | index_low[c_low] = index_up[c_up] = i
77 | slope_low[c_low] = y_low[i] - y_low[i - 1]
78 | while (c_low > s_low + 1) and (
79 | slope_low[max(s_low, c_low - 1)] <= slope_low[c_low]
80 | ):
81 | c_low -= 1
82 | index_low[c_low] = i
83 | if c_low > s_low + 1:
84 | slope_low[c_low] = (y_low[i] - y_low[index_low[c_low - 1]]) / (
85 | i - index_low[c_low - 1]
86 | )
87 | else:
88 | slope_low[c_low] = (y_low[i] - z[c]) / (i - index[c])
89 |
90 | slope_up[c_up] = y_up[i] - y_up[i - 1]
91 | while (c_up > s_up + 1) and (slope_up[max(c_up - 1, s_up)] >= slope_up[c_up]):
92 | c_up -= 1
93 | index_up[c_up] = i
94 | if c_up > s_up + 1:
95 | slope_up[c_up] = (y_up[i] - y_up[index_up[c_up - 1]]) / (
96 | i - index_up[c_up - 1]
97 | )
98 | else:
99 | slope_up[c_up] = (y_up[i] - z[c]) / (i - index[c])
100 |
101 | while (
102 | (c_low == s_low + 1)
103 | and (c_up > s_up + 1)
104 | and (slope_low[c_low] >= slope_up[s_up + 1])
105 | ):
106 | c += 1
107 | s_up += 1
108 | index[c] = index_up[s_up]
109 | z[c] = y_up[index[c]]
110 | index_low[s_low] = index[c]
111 | slope_low[c_low] = (y_low[i] - z[c]) / (i - index[c])
112 | while (
113 | (c_up == s_up + 1)
114 | and (c_low > s_low + 1)
115 | and (slope_up[c_up] <= slope_low[s_low + 1])
116 | ):
117 | c += 1
118 | s_low += 1
119 | index[c] = index_low[s_low]
120 | z[c] = y_low[index[c]]
121 | index_up[s_up] = index[c]
122 | slope_up[c_up] = (y_up[i] - z[c]) / (i - index[c])
123 |
124 | for i in range(1, c_low - s_low + 1):
125 | index[c + i] = index_low[s_low + i]
126 | z[c + i] = y_low[index[c + i]]
127 | c = c + c_low - s_low
128 | j, i = 0, 1
129 | while i <= c:
130 | a = (z[i] - z[i - 1]) / (index[i] - index[i - 1])
131 | while j < index[i]:
132 | output[j * incr] = a
133 | output[j * incr] = a
134 | j += 1
135 | i += 1
136 | return
137 |
138 |
139 | @utils.njit
140 | def prox_tv1d_cols(stepsize, a, n_rows, n_cols):
141 | """apply prox_tv1d along columns of the matri a
142 | """
143 | A = a.reshape((n_rows, n_cols))
144 | out = np.empty_like(A)
145 | for i in range(n_cols):
146 | _prox_tv1d(stepsize, A[:, i], out[:, i])
147 | return out.ravel()
148 |
149 |
150 | @utils.njit
151 | def prox_tv1d_rows(stepsize, a, n_rows, n_cols):
152 | """apply prox_tv1d along rows of the matri a
153 | """
154 | A = a.reshape((n_rows, n_cols))
155 | out = np.empty_like(A)
156 | for i in range(n_rows):
157 | _prox_tv1d(stepsize, A[i, :], out[i, :])
158 | return out.ravel()
159 |
160 |
161 | def c_prox_tv2d(step_size, x, n_rows, n_cols, max_iter, tol):
162 | """
163 | Proximal Dykstra to minimize a 2-dimensional total variation.
164 |
165 | Reference: Algorithm 7 in https://arxiv.org/abs/1411.0589
166 | """
167 | n_features = n_rows * n_cols
168 | p = np.zeros(n_features)
169 | q = np.zeros(n_features)
170 |
171 | for it in range(max_iter):
172 | y = x + p
173 | y = prox_tv1d_cols(step_size, y, n_rows, n_cols)
174 | p += x - y
175 | x = y + q
176 | x = prox_tv1d_rows(step_size, x, n_rows, n_cols)
177 | q += y - x
178 |
179 | # check convergence
180 | accuracy = np.max(np.abs(y - x))
181 | if accuracy < tol:
182 | break
183 | else:
184 | warnings.warn(
185 | "prox_tv2d did not converged to desired accuracy\n"
186 | + "Accuracy reached: %s" % accuracy
187 | )
188 | return x
189 |
190 |
191 | def prox_tv2d(w, step_size, n_rows, n_cols, max_iter=500, tol=1e-6):
192 | """
193 | Computes the proximal operator of the 2-dimensional total variation operator.
194 |
195 | This solves a problem of the form
196 |
197 | argmin_x TV(x) + (1/(2 stepsize)) ||x - w||^2
198 |
199 | where TV(x) is the two-dimensional total variation. It does so using the
200 | Douglas-Rachford algorithm [Barbero and Sra, 2014].
201 |
202 | Parameters
203 | ----------
204 | w: array
205 | vector of coefficients
206 |
207 | step_size: float
208 | step size (often denoted gamma) in proximal objective function
209 |
210 | max_iter: int
211 |
212 | tol: float
213 |
214 | References
215 | ----------
216 | Condat, Laurent. "A direct algorithm for 1D total variation denoising."
217 | IEEE Signal Processing Letters (2013)
218 |
219 | Barbero, Alvaro, and Suvrit Sra. "Modular proximal optimization for
220 | multidimensional total-variation regularization." arXiv preprint
221 | arXiv:1411.0589 (2014).
222 | """
223 |
224 | x = w.copy().astype(np.float64)
225 | return c_prox_tv2d(step_size, x, n_rows, n_cols, max_iter, tol)
226 |
227 |
228 | def tv2d_linear_operator(n_rows, n_cols):
229 | """
230 | Return the linear operator L such ||L x||_1 is the 2D total variation norm.
231 |
232 | Parameters
233 | ----------
234 | n_rows
235 | n_cols
236 |
237 | Returns
238 | -------
239 |
240 | """
241 |
242 | L = []
243 | for i in range(n_rows):
244 | for j in range(n_cols):
245 | if i < n_rows - 1:
246 | tmp1 = np.zeros((n_rows, n_cols))
247 | tmp1[i, j] = 1
248 | tmp1[i + 1, j] = -1
249 | L.append(tmp1.ravel())
250 |
251 | if j < n_cols - 1:
252 | tmp2 = np.zeros((n_rows, n_cols))
253 | tmp2[i, j] = 1
254 | tmp2[i, j + 1] = -1
255 | L.append(tmp2.ravel())
256 | return np.array(L)
257 |
--------------------------------------------------------------------------------
/copt/utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from scipy import sparse
3 | from scipy import optimize
4 | from datetime import datetime
5 | from sklearn.utils.extmath import safe_sparse_dot
6 |
7 | try:
8 | from numba import njit, prange
9 | except ImportError:
10 | from functools import wraps
11 |
12 | def njit(*args, **kw):
13 | if len(args) == 1 and len(kw) == 0 and hasattr(args[0], "__call__"):
14 | func = args[0]
15 |
16 | @wraps(func)
17 | def inner_function(*args, **kwargs):
18 | return func(*args, **kwargs)
19 |
20 | return inner_function
21 | else:
22 |
23 | def inner_function(function):
24 | @wraps(function)
25 | def wrapper(*args, **kwargs):
26 | return function(*args, **kwargs)
27 |
28 | return wrapper
29 |
30 | return inner_function
31 |
32 | prange = range
33 |
34 |
35 | def build_func_grad(jac, fun, args, eps):
36 | if not callable(jac):
37 | if bool(jac):
38 | fun = optimize._optimize.MemoizeJac(fun)
39 | jac = fun.derivative
40 | elif jac == "2-point":
41 | jac = None
42 | else:
43 | raise NotImplementedError("jac has unexpected value.")
44 |
45 | if jac is None:
46 |
47 | def func_and_grad(x):
48 | f = fun(x, *args)
49 | g = optimize._approx_fprime_helper(x, fun, eps, args=args, f0=f)
50 |
51 | else:
52 |
53 | def func_and_grad(x):
54 | f = fun(x, *args)
55 | g = jac(x, *args)
56 | return f, g
57 | return func_and_grad
58 |
59 |
60 | def safe_sparse_add(a, b):
61 | if sparse.issparse(a) and sparse.issparse(b):
62 | # both are sparse, keep the result sparse
63 | return a + b
64 | else:
65 | # one of them is non-sparse, convert
66 | # everything to dense.
67 | if sparse.issparse(a):
68 | a = a.toarray()
69 | if a.ndim == 2 and b.ndim == 1:
70 | b.ravel()
71 | elif sparse.issparse(b):
72 | b = b.toarray()
73 | if b.ndim == 2 and a.ndim == 1:
74 | b = b.ravel()
75 | return a + b
76 |
77 |
78 | @njit(parallel=True)
79 | def sample_batches(n_samples, n_batches, batch_size):
80 | idx = np.zeros(n_batches * batch_size, dtype=np.int32)
81 | for k in prange(n_batches):
82 | idx[k * batch_size:(k + 1) * batch_size] = np.random.choice(n_samples, size=batch_size, replace=False)
83 | return idx
84 |
85 |
86 | @njit(nogil=True)
87 | def fast_csr_vm(x, data, indptr, indices, d, idx):
88 | """
89 | Returns the vector matrix product x * M[idx]. M is described
90 | in the csr format.
91 |
92 | Returns x * M[idx]
93 |
94 | x: 1-d iterable
95 | data: data field of a scipy.sparse.csr_matrix
96 | indptr: indptr field of a scipy.sparse.csr_matrix
97 | indices: indices field of a scipy.sparse.csr_matrix
98 | d: output dimension
99 | idx: 1-d iterable: index of the sparse.csr_matrix
100 | """
101 | res = np.zeros(d)
102 | assert x.shape[0] == len(idx)
103 | for k, i in np.ndenumerate(idx):
104 | for j in range(indptr[i], indptr[i+1]):
105 | j_idx = indices[j]
106 | res[j_idx] += x[k] * data[j]
107 | return res
108 |
109 |
110 | @njit(nogil=True)
111 | def fast_csr_mv(data, indptr, indices, x, idx):
112 | """
113 | Returns the matrix vector product M[idx] * x. M is described
114 | in the csr format.
115 |
116 | data: data field of a scipy.sparse.csr_matrix
117 | indptr: indptr field of a scipy.sparse.csr_matrix
118 | indices: indices field of a scipy.sparse.csr_matrix
119 | x: 1-d iterable
120 | idx: 1-d iterable: index of the sparse.csr_matrix
121 | """
122 |
123 | res = np.zeros(len(idx))
124 | for i, row_idx in np.ndenumerate(idx):
125 | for k, j in enumerate(range(indptr[row_idx], indptr[row_idx+1])):
126 | j_idx = indices[j]
127 | res[i] += x[j_idx] * data[j]
128 | return res
129 |
130 |
131 | def parse_step_size(step_size):
132 | if hasattr(step_size, "__len__") and len(step_size) == 2:
133 | return step_size[0], step_size[1]
134 | elif isinstance(step_size, float):
135 | return step_size, "fixed"
136 | elif hasattr(step_size, "__call__") or step_size == "adaptive":
137 | # without other information start with a step-size of one
138 | return 1, step_size
139 | else:
140 | raise ValueError("Could not understand value step_size=%s" % step_size)
141 |
142 |
143 | class Trace:
144 | """Trace callback."""
145 | def __init__(self, f=None, freq=1):
146 | self.trace_x = []
147 | self.trace_time = []
148 | self.trace_fx = []
149 | self.trace_step_size = []
150 | self.start = datetime.now()
151 | self._counter = 0
152 | self.freq = int(freq)
153 | self.f = f
154 |
155 | def __call__(self, dl):
156 | if self._counter % self.freq == 0:
157 | if self.f is not None:
158 | self.trace_fx.append(self.f(dl["x"]))
159 | else:
160 | self.trace_x.append(dl["x"].copy())
161 | delta = (datetime.now() - self.start).total_seconds()
162 | self.trace_time.append(delta)
163 | self.trace_step_size.append(dl["step_size"])
164 | self._counter += 1
165 |
166 |
167 | def init_lipschitz(f_grad, x0):
168 | L0 = 1e-3
169 | f0, grad0 = f_grad(x0)
170 | if sparse.issparse(grad0) and not sparse.issparse(x0):
171 | x0 = sparse.csc_matrix(x0).T
172 | elif sparse.issparse(x0) and not sparse.issparse(grad0):
173 | grad0 = sparse.csc_matrix(grad0).T
174 | x_tilde = x0 - (1.0 / L0) * grad0
175 | f_tilde = f_grad(x_tilde)[0]
176 | for _ in range(100):
177 | if f_tilde <= f0:
178 | break
179 | L0 *= 10
180 | x_tilde = x0 - (1.0 / L0) * grad0
181 | f_tilde = f_grad(x_tilde)[0]
182 | return L0
183 |
184 |
185 | def get_max_lipschitz(A, loss, alpha=0):
186 | """
187 | XXX DEPRECATED
188 |
189 | Estimate the max Lipschitz constant (as appears in
190 | many stochastic methods).
191 |
192 | A : array-like
193 |
194 | loss : {"logloss", "square", "huber"}
195 | """
196 | from sklearn.utils.extmath import row_norms
197 |
198 | max_squared_sum = row_norms(A, squared=True).max()
199 |
200 | if loss == "logloss":
201 | return 0.25 * max_squared_sum + alpha
202 | elif loss in ("huber", "square"):
203 | raise NotImplementedError
204 | raise NotImplementedError
205 |
206 |
207 |
--------------------------------------------------------------------------------
/copt/utils_pytorch.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | def make_func_and_grad(loss_func, shape, device, dtype=None):
4 | """Wraps loss_func to take and return numpy 1D arrays, for interfacing PyTorch and copt.
5 |
6 | Args:
7 | loss_func: callable
8 | PyTorch callable, taking a torch.Tensor a input, and returning a scalar
9 |
10 | shape: tuple(*int)
11 | shape of the optimization variable, as input to loss_func
12 |
13 | device: torch.Device
14 | device on which to send the optimization variable
15 |
16 | dtype: dtype
17 | data type for the torch.Tensor holding the optimization variable
18 |
19 | Returns:
20 | f_grad: callable
21 | function taking a 1D numpy array as input and returning (loss_val, grad_val): (float, array).
22 | """
23 | def func_and_grad(x, return_gradient=True):
24 | x_tensor = torch.tensor(x, dtype=dtype)
25 | x_tensor = x_tensor.view(*shape)
26 | x_tensor = x_tensor.to(device)
27 | x_tensor.requires_grad = True
28 |
29 | loss = loss_func(x_tensor)
30 | loss.backward()
31 | if return_gradient:
32 | return loss.item(), x_tensor.grad.cpu().numpy().flatten()
33 |
34 | return loss.item()
35 | return func_and_grad
36 |
37 | # TODO: write generic function wrapping copt optimizers for taking pytorch input,
38 | # returning pytorch output for use of copt in a PyTorch pipeline
--------------------------------------------------------------------------------
/doc/Makefile:
--------------------------------------------------------------------------------
1 | # Makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line.
5 | SPHINXOPTS =
6 | SPHINXBUILD = sphinx-build
7 | PAPER =
8 | BUILDDIR = _build
9 |
10 | # User-friendly check for sphinx-build
11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
13 | endif
14 |
15 | # Internal variables.
16 | PAPEROPT_a4 = -D latex_paper_size=a4
17 | PAPEROPT_letter = -D latex_paper_size=letter
18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
19 | # the i18n builder cannot share the environment and doctrees with the others
20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
21 |
22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
23 |
24 | help:
25 | @echo "Please use \`make ' where is one of"
26 | @echo " html to make standalone HTML files"
27 | @echo " dirhtml to make HTML files named index.html in directories"
28 | @echo " singlehtml to make a single large HTML file"
29 | @echo " pickle to make pickle files"
30 | @echo " json to make JSON files"
31 | @echo " htmlhelp to make HTML files and a HTML help project"
32 | @echo " qthelp to make HTML files and a qthelp project"
33 | @echo " devhelp to make HTML files and a Devhelp project"
34 | @echo " epub to make an epub"
35 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
36 | @echo " latexpdf to make LaTeX files and run them through pdflatex"
37 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
38 | @echo " text to make text files"
39 | @echo " man to make manual pages"
40 | @echo " texinfo to make Texinfo files"
41 | @echo " info to make Texinfo files and run them through makeinfo"
42 | @echo " gettext to make PO message catalogs"
43 | @echo " changes to make an overview of all changed/added/deprecated items"
44 | @echo " xml to make Docutils-native XML files"
45 | @echo " pseudoxml to make pseudoxml-XML files for display purposes"
46 | @echo " linkcheck to check all external links for integrity"
47 | @echo " doctest to run all doctests embedded in the documentation (if enabled)"
48 |
49 | clean:
50 | -rm -rf $(BUILDDIR)/*
51 | -rm -rf auto_examples/
52 | -rm -rf generated/*
53 | -rm -rf modules/generated/*
54 |
55 | html:
56 | # These two lines make the build a bit more lengthy, and the
57 | # the embedding of images more robust
58 | rm -rf $(BUILDDIR)/html/_images
59 | #rm -rf _build/doctrees/
60 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
61 | @echo
62 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
63 |
64 | dirhtml:
65 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
66 | @echo
67 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
68 |
69 | singlehtml:
70 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
71 | @echo
72 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
73 |
74 | pickle:
75 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
76 | @echo
77 | @echo "Build finished; now you can process the pickle files."
78 |
79 | json:
80 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
81 | @echo
82 | @echo "Build finished; now you can process the JSON files."
83 |
84 | htmlhelp:
85 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
86 | @echo
87 | @echo "Build finished; now you can run HTML Help Workshop with the" \
88 | ".hhp project file in $(BUILDDIR)/htmlhelp."
89 |
90 | qthelp:
91 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
92 | @echo
93 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \
94 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
95 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/project-template.qhcp"
96 | @echo "To view the help file:"
97 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/project-template.qhc"
98 |
99 | devhelp:
100 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
101 | @echo
102 | @echo "Build finished."
103 | @echo "To view the help file:"
104 | @echo "# mkdir -p $$HOME/.local/share/devhelp/project-template"
105 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/project-template"
106 | @echo "# devhelp"
107 |
108 | epub:
109 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
110 | @echo
111 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub."
112 |
113 | latex:
114 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
115 | @echo
116 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
117 | @echo "Run \`make' in that directory to run these through (pdf)latex" \
118 | "(use \`make latexpdf' here to do that automatically)."
119 |
120 | latexpdf:
121 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
122 | @echo "Running LaTeX files through pdflatex..."
123 | $(MAKE) -C $(BUILDDIR)/latex all-pdf
124 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
125 |
126 | latexpdfja:
127 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
128 | @echo "Running LaTeX files through platex and dvipdfmx..."
129 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
130 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
131 |
132 | text:
133 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
134 | @echo
135 | @echo "Build finished. The text files are in $(BUILDDIR)/text."
136 |
137 | man:
138 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
139 | @echo
140 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man."
141 |
142 | texinfo:
143 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
144 | @echo
145 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
146 | @echo "Run \`make' in that directory to run these through makeinfo" \
147 | "(use \`make info' here to do that automatically)."
148 |
149 | info:
150 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
151 | @echo "Running Texinfo files through makeinfo..."
152 | make -C $(BUILDDIR)/texinfo info
153 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
154 |
155 | gettext:
156 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
157 | @echo
158 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
159 |
160 | changes:
161 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
162 | @echo
163 | @echo "The overview file is in $(BUILDDIR)/changes."
164 |
165 | linkcheck:
166 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
167 | @echo
168 | @echo "Link check complete; look for any errors in the above output " \
169 | "or in $(BUILDDIR)/linkcheck/output.txt."
170 |
171 | doctest:
172 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
173 | @echo "Testing of doctests in the sources finished, look at the " \
174 | "results in $(BUILDDIR)/doctest/output.txt."
175 |
176 | xml:
177 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
178 | @echo
179 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml."
180 |
181 | pseudoxml:
182 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
183 | @echo
184 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
185 |
186 | upload:
187 | cp -r _build/html/* ~/dev/copt_web/ && cd ~/dev/copt_web/ && git add * && git ci -a -m "update doc" && git push origin gh-pages
188 |
189 | apidoc:
190 | sphinx-apidoc -o source/ ../copt
191 |
192 | html-noplot:
193 | $(SPHINXBUILD) -D plot_gallery=0 -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
194 | @echo
195 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
196 |
--------------------------------------------------------------------------------
/doc/_static/css/custom.css:
--------------------------------------------------------------------------------
1 |
2 | /* this makes inline code look prettier with a slight border around it */
3 | pre {
4 | border: 1px solid #CCC;
5 | }
6 |
7 | div.admonition-proximal-gradient,
8 | div.admonition-frank-wolfe,
9 | div.admonition-stochastic-methods,
10 | div.admonition-examples {
11 | background-color: #d9edf7;
12 | border-color: #bce8f1;
13 | }
14 |
15 | div.admonition-proximal-gradient a,
16 | div.admonition-frank-wolfe a,
17 | div.admonition-stochastic-methods a,
18 | div.admonition-examples a {
19 | color: #3E4349;
20 | }
--------------------------------------------------------------------------------
/doc/citing.rst:
--------------------------------------------------------------------------------
1 | .. _citing:
2 |
3 | Citing
4 | ======
5 |
6 | If you use this software in a scientific publication, please consider citing it as
7 |
8 | .. code::
9 |
10 | @article{copt,
11 | author = {Fabian Pedregosa},
12 | title = {C-OPT: composite optimization in Python},
13 | year = 2018,
14 | DOI = {10.5281/zenodo.1283339},
15 | url={http://openopt.github.io/copt/}
16 | }
--------------------------------------------------------------------------------
/doc/index.rst:
--------------------------------------------------------------------------------
1 | COPT: a Python library for Constrained OPTimization
2 | ===================================================
3 |
4 | .. image:: https://travis-ci.org/openopt/copt.svg?branch=master
5 | :target: https://travis-ci.org/openopt/copt
6 | .. image:: https://storage.googleapis.com/copt-doc/doc_status.svg
7 | :target: https://storage.googleapis.com/copt-doc/index.html
8 | .. image:: https://coveralls.io/repos/github/openopt/copt/badge.svg?branch=master
9 | :target: https://coveralls.io/github/openopt/copt?branch=master
10 | .. image:: https://storage.googleapis.com/copt-doc/pylint.svg
11 | :target: https://storage.googleapis.com/copt-doc/pylint.txt
12 | .. image:: https://zenodo.org/badge/46262908.svg
13 | :target: citing.html
14 |
15 |
16 |
17 | Life is too short to learn another API
18 | --------------------------------------
19 |
20 | COPT is an optimization library that does not reinvent the wheel. It packs classical optimization algorithms in an API following that of `scipy.optimize `_. So if you've already used that library, you should feel right at ease.
21 |
22 | It provides:
23 |
24 | * State of the art implementation of classical optimization algorithms such as :ref:`proximal gradient descent ` and :ref:`Frank-Wolfe ` under a consistent API.
25 | * Few dependencies, pure Python library for easy deployment.
26 | * An :ref:`example gallery `.
27 |
28 |
29 |
30 | Contents
31 | -----------------------
32 |
33 | The methods implements in copt can be categorized as:
34 |
35 | .. admonition:: Proximal-gradient
36 |
37 | These are methods that combine the gradient of a smooth term with the proximal operator of a potentially non-smooth term.
38 | They can be used to solve problems involving one or several non-smooth terms. :ref:`Read more ...`
39 |
40 | .. admonition:: Frank-Wolfe
41 |
42 | Frank-Wolfe, also known as conditional gradient, are a family of methods to solve constrained optimization problems. Contrary to proximal-gradient methods, they don't require access to the projection onto the constraint set. :ref:`Read more ...`
43 |
44 |
45 | .. admonition:: Stochastic Methods
46 |
47 | Methods that can solve optimization problems with access only to a noisy evaluation of the objective.
48 | :ref:`Read more ...`.
49 |
50 |
51 | Installation
52 | ------------
53 |
54 | If you already have a working installation of numpy and scipy,
55 | the easiest way to install copt is using ``pip`` ::
56 |
57 | pip install -U copt
58 |
59 |
60 | Alternatively, you can install the latest development from github with the command::
61 |
62 | pip install git+https://github.com/openopt/copt.git
63 |
64 |
65 |
66 | Where to go from here?
67 | ----------------------
68 |
69 | To know more about copt, check out our :ref:`example gallery ` or browse through the module reference using the left navigation bar.
70 |
71 |
72 | .. toctree::
73 | :maxdepth: 2
74 | :hidden:
75 |
76 | solvers
77 | loss_functions
78 | auto_examples/index
79 | utils
80 | citing
81 |
82 | Last change: |today|
83 |
--------------------------------------------------------------------------------
/doc/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openopt/copt/c0d5d46ae709f77b7dc1fc692bbe476aa63f029b/doc/logo.png
--------------------------------------------------------------------------------
/doc/loss_functions.rst:
--------------------------------------------------------------------------------
1 |
2 | Loss, constraints and regularizers
3 | ==================================
4 |
5 | These are some convenience functions that implement common losses, constraints and regularizers.
6 |
7 | Smooth loss functions:
8 |
9 |
10 | .. autosummary::
11 | :toctree: generated/
12 |
13 | copt.loss.LogLoss
14 | copt.loss.SquareLoss
15 | copt.loss.HuberLoss
16 |
17 | Non-smooth terms accessed through their proximal operator
18 |
19 | .. autosummary::
20 | :toctree: generated/
21 |
22 | copt.penalty.L1Norm
23 | copt.penalty.GroupL1
24 | copt.penalty.TraceNorm
25 | copt.penalty.FusedLasso
26 | copt.penalty.TotalVariation2D
27 |
28 | Constraints can be incorporated in a similar way through
29 |
30 |
31 | .. autosummary::
32 | :toctree: generated/
33 |
34 | copt.constraint.L1Ball
35 | copt.constraint.L2Ball
36 | copt.constraint.LinfBall
37 | copt.constraint.TraceBall
38 |
--------------------------------------------------------------------------------
/doc/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | REM Command file for Sphinx documentation
4 |
5 | if "%SPHINXBUILD%" == "" (
6 | set SPHINXBUILD=sphinx-build
7 | )
8 | set BUILDDIR=_build
9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
10 | set I18NSPHINXOPTS=%SPHINXOPTS% .
11 | if NOT "%PAPER%" == "" (
12 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
13 | set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
14 | )
15 |
16 | if "%1" == "" goto help
17 |
18 | if "%1" == "help" (
19 | :help
20 | echo.Please use `make ^` where ^ is one of
21 | echo. html to make standalone HTML files
22 | echo. dirhtml to make HTML files named index.html in directories
23 | echo. singlehtml to make a single large HTML file
24 | echo. pickle to make pickle files
25 | echo. json to make JSON files
26 | echo. htmlhelp to make HTML files and a HTML help project
27 | echo. qthelp to make HTML files and a qthelp project
28 | echo. devhelp to make HTML files and a Devhelp project
29 | echo. epub to make an epub
30 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter
31 | echo. text to make text files
32 | echo. man to make manual pages
33 | echo. texinfo to make Texinfo files
34 | echo. gettext to make PO message catalogs
35 | echo. changes to make an overview over all changed/added/deprecated items
36 | echo. xml to make Docutils-native XML files
37 | echo. pseudoxml to make pseudoxml-XML files for display purposes
38 | echo. linkcheck to check all external links for integrity
39 | echo. doctest to run all doctests embedded in the documentation if enabled
40 | goto end
41 | )
42 |
43 | if "%1" == "clean" (
44 | for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
45 | del /q /s %BUILDDIR%\*
46 | goto end
47 | )
48 |
49 |
50 | %SPHINXBUILD% 2> nul
51 | if errorlevel 9009 (
52 | echo.
53 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
54 | echo.installed, then set the SPHINXBUILD environment variable to point
55 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
56 | echo.may add the Sphinx directory to PATH.
57 | echo.
58 | echo.If you don't have Sphinx installed, grab it from
59 | echo.http://sphinx-doc.org/
60 | exit /b 1
61 | )
62 |
63 | if "%1" == "html" (
64 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
65 | if errorlevel 1 exit /b 1
66 | echo.
67 | echo.Build finished. The HTML pages are in %BUILDDIR%/html.
68 | goto end
69 | )
70 |
71 | if "%1" == "dirhtml" (
72 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
73 | if errorlevel 1 exit /b 1
74 | echo.
75 | echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
76 | goto end
77 | )
78 |
79 | if "%1" == "singlehtml" (
80 | %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
81 | if errorlevel 1 exit /b 1
82 | echo.
83 | echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
84 | goto end
85 | )
86 |
87 | if "%1" == "pickle" (
88 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
89 | if errorlevel 1 exit /b 1
90 | echo.
91 | echo.Build finished; now you can process the pickle files.
92 | goto end
93 | )
94 |
95 | if "%1" == "json" (
96 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
97 | if errorlevel 1 exit /b 1
98 | echo.
99 | echo.Build finished; now you can process the JSON files.
100 | goto end
101 | )
102 |
103 | if "%1" == "htmlhelp" (
104 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
105 | if errorlevel 1 exit /b 1
106 | echo.
107 | echo.Build finished; now you can run HTML Help Workshop with the ^
108 | .hhp project file in %BUILDDIR%/htmlhelp.
109 | goto end
110 | )
111 |
112 | if "%1" == "qthelp" (
113 | %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
114 | if errorlevel 1 exit /b 1
115 | echo.
116 | echo.Build finished; now you can run "qcollectiongenerator" with the ^
117 | .qhcp project file in %BUILDDIR%/qthelp, like this:
118 | echo.^> qcollectiongenerator %BUILDDIR%\qthelp\project-template.qhcp
119 | echo.To view the help file:
120 | echo.^> assistant -collectionFile %BUILDDIR%\qthelp\project-template.ghc
121 | goto end
122 | )
123 |
124 | if "%1" == "devhelp" (
125 | %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
126 | if errorlevel 1 exit /b 1
127 | echo.
128 | echo.Build finished.
129 | goto end
130 | )
131 |
132 | if "%1" == "epub" (
133 | %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
134 | if errorlevel 1 exit /b 1
135 | echo.
136 | echo.Build finished. The epub file is in %BUILDDIR%/epub.
137 | goto end
138 | )
139 |
140 | if "%1" == "latex" (
141 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
142 | if errorlevel 1 exit /b 1
143 | echo.
144 | echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
145 | goto end
146 | )
147 |
148 | if "%1" == "latexpdf" (
149 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
150 | cd %BUILDDIR%/latex
151 | make all-pdf
152 | cd %BUILDDIR%/..
153 | echo.
154 | echo.Build finished; the PDF files are in %BUILDDIR%/latex.
155 | goto end
156 | )
157 |
158 | if "%1" == "latexpdfja" (
159 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
160 | cd %BUILDDIR%/latex
161 | make all-pdf-ja
162 | cd %BUILDDIR%/..
163 | echo.
164 | echo.Build finished; the PDF files are in %BUILDDIR%/latex.
165 | goto end
166 | )
167 |
168 | if "%1" == "text" (
169 | %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
170 | if errorlevel 1 exit /b 1
171 | echo.
172 | echo.Build finished. The text files are in %BUILDDIR%/text.
173 | goto end
174 | )
175 |
176 | if "%1" == "man" (
177 | %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
178 | if errorlevel 1 exit /b 1
179 | echo.
180 | echo.Build finished. The manual pages are in %BUILDDIR%/man.
181 | goto end
182 | )
183 |
184 | if "%1" == "texinfo" (
185 | %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
186 | if errorlevel 1 exit /b 1
187 | echo.
188 | echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
189 | goto end
190 | )
191 |
192 | if "%1" == "gettext" (
193 | %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
194 | if errorlevel 1 exit /b 1
195 | echo.
196 | echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
197 | goto end
198 | )
199 |
200 | if "%1" == "changes" (
201 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
202 | if errorlevel 1 exit /b 1
203 | echo.
204 | echo.The overview file is in %BUILDDIR%/changes.
205 | goto end
206 | )
207 |
208 | if "%1" == "linkcheck" (
209 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
210 | if errorlevel 1 exit /b 1
211 | echo.
212 | echo.Link check complete; look for any errors in the above output ^
213 | or in %BUILDDIR%/linkcheck/output.txt.
214 | goto end
215 | )
216 |
217 | if "%1" == "doctest" (
218 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
219 | if errorlevel 1 exit /b 1
220 | echo.
221 | echo.Testing of doctests in the sources finished, look at the ^
222 | results in %BUILDDIR%/doctest/output.txt.
223 | goto end
224 | )
225 |
226 | if "%1" == "xml" (
227 | %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
228 | if errorlevel 1 exit /b 1
229 | echo.
230 | echo.Build finished. The XML files are in %BUILDDIR%/xml.
231 | goto end
232 | )
233 |
234 | if "%1" == "pseudoxml" (
235 | %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
236 | if errorlevel 1 exit /b 1
237 | echo.
238 | echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
239 | goto end
240 | )
241 |
242 | :end
243 |
--------------------------------------------------------------------------------
/doc/paper/biblio.bib:
--------------------------------------------------------------------------------
1 | @article{virtanen2019scipy,
2 | title={SciPy 1.0--Fundamental Algorithms for Scientific Computing in Python},
3 | author={Virtanen, Pauli and Gommers, Ralf and Oliphant, Travis E and Haberland, Matt and Reddy, Tyler and Cournapeau, David and Burovski, Evgeni and Peterson, Pearu and Weckesser, Warren and Bright, Jonathan and others},
4 | journal={arXiv preprint arXiv:1907.10121},
5 | year={2019}
6 | }
7 |
8 | @article{pedregosa2011scikit,
9 | title={Scikit-learn: Machine learning in Python},
10 | author={Pedregosa, Fabian and Varoquaux, Ga{\"e}l and Gramfort, Alexandre and Michel, Vincent and Thirion, Bertrand and Grisel, Olivier and Blondel, Mathieu and Prettenhofer, Peter and Weiss, Ron and Dubourg, Vincent and others},
11 | journal={Journal of machine learning research},
12 | volume={12},
13 | number={Oct},
14 | pages={2825--2830},
15 | year={2011}
16 | }
--------------------------------------------------------------------------------
/doc/paper/index.tex:
--------------------------------------------------------------------------------
1 | \documentclass[twoside,11pt]{article}
2 |
3 | \usepackage[nohyperref, preprint]{jmlr2e}
4 |
5 | \usepackage{amsmath}
6 | % \usepackage{amsthm}
7 | \usepackage{amssymb}
8 | \usepackage{empheq}
9 | \usepackage{xcolor, color, colortbl}
10 | \usepackage{mdframed}
11 | \usepackage{pifont}
12 | \newcommand{\cmark}{\ding{51}}%
13 | \newcommand{\xmark}{\ding{55}}%
14 | \usepackage{enumitem}
15 | % For figures
16 | \usepackage{graphicx} % more modern
17 |
18 |
19 | \newcommand{\blue}{\color{blue}}
20 |
21 | \definecolor{mydarkblue}{rgb}{0,0.08,0.45}
22 | \usepackage[colorlinks=true,
23 | linkcolor=mydarkblue,
24 | citecolor=mydarkblue,
25 | filecolor=mydarkblue,
26 | urlcolor=mydarkblue,
27 | pdfview=FitH]{hyperref}
28 |
29 | \graphicspath{{./figures/}}
30 |
31 |
32 | \jmlrheading{1}{2019}{1-48}{4/00}{10/00}{X}{Authors}
33 |
34 | % Short headings should be running head and authors last names
35 |
36 | \ShortHeadings{C-OPT: Composite Optimization in Python}{Pedregosa}
37 | \firstpageno{1}
38 |
39 |
40 | \begin{document}
41 |
42 | \title{C-OPT: Composite Optimization in Python}
43 | \author{\name Fabian Pedregosa \email pedregosa@google.com \\
44 | \addr Google Research\\
45 | }
46 | \editor{}
47 |
48 |
49 | \maketitle
50 |
51 |
52 | \begin{abstract}
53 | \emph{copt} is a Python library integrating a wide range of classical optimization algorithm for medium-scale problems. By packaging a wide array of optimization algorithms into a consistent API, this package focuses on brining optimization algorithms to practitioners. Emphasis is on robustness, doocumentation, performance and API consistency. It has minimal dependencies and is distributed under the Apache-2.0 license, encouraging its use in both academic and commercial settings.
54 | \end{abstract}
55 |
56 | \begin{keywords}
57 | optimization, python
58 | \end{keywords}
59 |
60 | \section{Introduction}
61 |
62 | {\blue Big environment Python for scientific computing. \newcommand{\blue}{\color{blue}}
63 | }
64 |
65 | \section{Project Vision}
66 |
67 | \paragraph{Code quality.}
68 |
69 | \paragraph{Bare-bones design and API.}
70 |
71 | \paragraph{Documentation.}
72 |
73 | \paragraph{Apache license.}
74 |
75 |
76 | \section{Underlying Technologies}
77 |
78 | \citep{virtanen2019scipy}
79 |
80 | \citep{pedregosa2011scikit}
81 |
82 | \section{Computational}
83 |
84 | \bibliography{biblio}
85 |
86 |
87 | \end{document}
88 |
--------------------------------------------------------------------------------
/doc/solvers.rst:
--------------------------------------------------------------------------------
1 | .. _proximal_gradient:
2 |
3 | Solvers
4 | =======
5 |
6 |
7 | Proximal-Gradient
8 | -----------------
9 |
10 | .. autosummary::
11 | :toctree: generated/
12 |
13 | copt.minimize_proximal_gradient
14 |
15 | The proximal-gradient method [BT2009]_, [N2013]_ is a method to solve problems of the form
16 |
17 | .. math::
18 | \argmin_{\bs{x} \in \mathbb{R}^d} f(\bs{x}) + g(\bs{x})
19 |
20 |
21 | where $f$ is a differentiable function for which we have access to its gradient and $g$ is a potentially non-smooth function for which we have access to its proximal operator.
22 |
23 |
24 | .. admonition:: Examples
25 |
26 | * :ref:`sphx_glr_auto_examples_plot_group_lasso.py`
27 |
28 |
29 | .. topic:: References
30 |
31 | .. [BT2009] Beck, Amir, and Marc Teboulle. `"Gradient-based algorithms with applications to signal recovery." `_ Convex optimization in signal processing and communications (2009)
32 |
33 | .. [N2013] Nesterov, Yu. `"Gradient methods for minimizing composite functions." `_ Mathematical Programming 140.1 (2013): 125-161.
34 |
35 |
36 | Primal-dual hybrid gradient
37 | ---------------------------
38 |
39 | .. autosummary::
40 | :toctree: generated/
41 |
42 | copt.minimize_primal_dual
43 |
44 |
45 | The primal-dual hybrid gradient method [C2013]_ [V2013]_ [CP2016]_ is a method to solve problems of the form
46 |
47 | .. math::
48 | \argmin_{\bs{x} \in \mathbb{R}^d} f(\bs{x}) + g(\bs{x}) + h(\bs{A}\bs{x})
49 |
50 | where $f$ is a differentiable function for which we have access to its gradient and $g$ and $h$ are potentially non-smooth functions for which we have access to their proximal operator.
51 |
52 |
53 |
54 | .. admonition:: Examples
55 |
56 | * :ref:`sphx_glr_auto_examples_proximal_splitting_plot_tv_deblurring.py`
57 | * :ref:`sphx_glr_auto_examples_proximal_splitting_plot_overlapping_group_lasso.py`
58 |
59 |
60 | .. topic:: References
61 |
62 | .. [C2013] Condat, Laurent. "A primal–dual splitting method for convex optimization involving Lipschitzian, proximable and linear composite terms." Journal of Optimization Theory and Applications 158.2 (2013): 460-479.
63 |
64 | .. [V2013] Vũ, Bằng Công. "A splitting algorithm for dual monotone inclusions involving cocoercive operators." Advances in Computational Mathematics 38.3 (2013)
65 |
66 | .. [CP2016] Chambolle, Antonin, and Thomas Pock. "An introduction to continuous optimization for imaging." Acta Numerica 25 (2016)
67 |
68 |
69 | Three-operator splitting
70 | ------------------------
71 |
72 |
73 | .. autosummary::
74 | :toctree: generated/
75 |
76 | copt.minimize_three_split
77 |
78 |
79 | The three operator splitting [DY2017]_ [PG2018]_ is a method to solve problems of the form
80 |
81 | .. math::
82 | \argmin_{\bs{x} \in \mathbb{R}^d} f(\bs{x}) + g(\bs{x}) + h(\bs{x})
83 |
84 | where $f$ is a differentiable function for which we have access to its gradient and $g$ and $h$ are potentially non-smooth functions for which we have access to their proximal operator.
85 |
86 |
87 | .. admonition:: Examples
88 |
89 | * :ref:`sphx_glr_auto_examples_proximal_splitting_plot_sparse_nuclear_norm.py`
90 | * :ref:`sphx_glr_auto_examples_proximal_splitting_plot_tv_deblurring.py`
91 | * :ref:`sphx_glr_auto_examples_proximal_splitting_plot_overlapping_group_lasso.py`
92 |
93 |
94 | .. topic:: References
95 |
96 | .. [DY2017] Davis, Damek, and Wotao Yin. `"A three-operator splitting scheme and
97 | its optimization applications."
98 | `_ Set-Valued and Variational
99 | Analysis, 2017.
100 |
101 | .. [PG2018] Pedregosa, Fabian, and Gauthier Gidel. `"Adaptive Three Operator
102 | Splitting." `_ Proceedings of the 35th
103 | International Conference on Machine Learning, 2018.
104 |
105 |
106 | .. _frank_wolfe:
107 |
108 | Frank-Wolfe
109 | -----------
110 |
111 | .. autosummary::
112 | :toctree: generated/
113 |
114 | copt.minimize_frank_wolfe
115 |
116 |
117 | The Frank-Wolfe (FW) or conditional gradient algorithm [J2003]_, [P2018]_, [PANJ2018]_ is a method for constrained optimization. It can solve problems of the form
118 |
119 | .. math::
120 | \argmin_{\bs{x} \in \mathcal{D}} f(\bs{x})
121 |
122 | where :math:`f` is a differentiable function for which we have access to its gradient and :math:`\mathcal{D}` is a compact set for which we have access to its linear minimization oracle (lmo). This is a routine that given a vector :math:`\bs{u}` returns a solution to
123 |
124 | .. math::
125 | \argmin_{\bs{x} \in D}\, \langle\bs{u}, \bs{x}\rangle~.
126 |
127 |
128 | Contrary to other constrained optimization algorithms like projected gradient descent, the Frank-Wolfe algorithm does not require access to a projection, hence why it is sometimes referred to as a projection-free algorithm. It instead relies exclusively on the linear minimization oracle described above.
129 |
130 |
131 | .. TODO describe the LMO API in more detail
132 |
133 |
134 | The Frank-Wolfe algorithm is implemented in this library in the method :meth:`copt.minimize_frank_wolfe`. As most other methods it takes as argument an objective function to minimize, but unlike most other methods, it requires access to a *linear minimization oracle*, which is a routine that for a given $d$-dimensional vector :math:`\bs{u}` solves the linear problems :math:`\argmin_{\bs{z} \in D}\, \langle \bs{u}, \bs{z}\rangle`.
135 |
136 |
137 | At each iteration, the Frank-Wolfe algorithm uses the linear minimization oracle to identify the vertex :math:`\bs{s}_t` that correlates most with the negative gradient. Then next iterate :math:`\bs{x}^+` is constructed as a convex combination of the current iterate :math:`\bs{x}` and the newly acquired vertex :math:`\bs{s}`:
138 |
139 |
140 | .. math::
141 | \boldsymbol{x}^+ = (1 - \gamma)\boldsymbol{x} + \gamma \boldsymbol{s}
142 |
143 |
144 |
145 | The step-size :math:`\gamma` can be chosen by different strategies:
146 |
147 | * **Backtracking line-search**. This is the default option and corresponds to the keyword argument :code:`step_size="backtracking"` This is typically the fastest and simplest method, if unsure, use this option.
148 |
149 | * **Demyanov-Rubinov step-size**. This is a step-size of the form
150 |
151 | .. math::
152 | \gamma = \langle \nabla f(\bs{x}), \bs{s} - \bs{x}\rangle / (L \|\bs{s} - \bs{x}\|^2)~.
153 |
154 |
155 |
156 | This step-size typically performs well but has the drawback that it requires knowledge of the Lipschitz constant of :math:`\nabla f`. This step-size can be used with the keyword argument :code:`step_size="DR"`. In this case the Lipschitz
157 | constant :math:`L` needs to be specified through the keyword argument :code:`lipschitz`. For example, if the lipschitz constant is 0.1, then the signature should include :code:`step_size="DR", lipschitz=0.1`.
158 |
159 |
160 | * **Oblivious step-size**. This is the very simple step-size of the form
161 |
162 | .. math::
163 | \gamma = \frac{2}{t+2}~,
164 |
165 | where :math:`t` is the number of iterations. This step-size is oblivious since it doesn't use any previous information of the objective. It typically performs worst than the alternatives, but is simple to implement and can be competitive in the case in the case of noisy objectives.
166 |
167 |
168 | .. admonition:: Examples
169 |
170 | * :ref:`sphx_glr_auto_examples_frank_wolfe_plot_sparse_benchmark.py`
171 | * :ref:`sphx_glr_auto_examples_frank_wolfe_plot_vertex_overlap.py`
172 | * :ref:`sphx_glr_auto_examples_frank_wolfe_plot_sparse_benchmark_pairwise.py`
173 |
174 |
175 |
176 | .. topic:: References:
177 |
178 | .. [J2003] Jaggi, Martin. `"Revisiting Frank-Wolfe: Projection-Free Sparse Convex Optimization." `_ ICML 2013.
179 |
180 | .. [P2018] Pedregosa, Fabian `"Notes on the Frank-Wolfe Algorithm" `_, 2018
181 |
182 | .. [PANJ2018] Pedregosa, Fabian, Armin Askari, Geoffrey Negiar, and Martin Jaggi. `"Step-Size Adaptivity in Projection-Free Optimization." `_ arXiv:1806.05123 (2018).
183 |
184 |
185 | .. [LJ2015] Lacoste-Julien, Simon, and Martin Jaggi. `"On the global linear convergence of Frank-Wolfe optimization variants." `_ Advances in Neural Information Processing Systems. 2015.
186 |
187 |
188 |
189 |
190 | .. _stochastic_methods:
191 |
192 | Stochastic methods
193 | ------------------
194 |
195 | .. autosummary::
196 | :toctree: generated/
197 |
198 | copt.minimize_saga
199 | copt.minimize_svrg
200 | copt.minimize_vrtos
201 | copt.minimize_sfw
202 |
203 |
204 | .. topic:: Examples:
205 |
206 | * :ref:`sphx_glr_auto_examples_plot_saga_vs_svrg.py`
207 | * :ref:`sphx_glr_auto_examples_frank_wolfe_plot_sfw.py`
208 | * :ref:`sphx_glr_auto_examples_frank_wolfe_plot_sfw_real_data.py`:
209 |
210 |
--------------------------------------------------------------------------------
/doc/sphinx_ext/github_link.py:
--------------------------------------------------------------------------------
1 | from operator import attrgetter
2 | import inspect
3 | import subprocess
4 | import os
5 | import sys
6 | from functools import partial
7 |
8 | REVISION_CMD = 'git rev-parse --short HEAD'
9 |
10 |
11 | def _get_git_revision():
12 | try:
13 | revision = subprocess.check_output(REVISION_CMD.split()).strip()
14 | except (subprocess.CalledProcessError, OSError):
15 | print('Failed to execute git to get revision')
16 | return None
17 | return revision.decode('utf-8')
18 |
19 |
20 | def _linkcode_resolve(domain, info, package, url_fmt, revision):
21 | """Determine a link to online source for a class/method/function
22 |
23 | This is called by sphinx.ext.linkcode
24 |
25 | An example with a long-untouched module that everyone has
26 | >>> _linkcode_resolve('py', {'module': 'tty',
27 | ... 'fullname': 'setraw'},
28 | ... package='tty',
29 | ... url_fmt='http://hg.python.org/cpython/file/'
30 | ... '{revision}/Lib/{package}/{path}#L{lineno}',
31 | ... revision='xxxx')
32 | 'http://hg.python.org/cpython/file/xxxx/Lib/tty/tty.py#L18'
33 | """
34 |
35 | if revision is None:
36 | return
37 | if domain not in ('py', 'pyx'):
38 | return
39 | if not info.get('module') or not info.get('fullname'):
40 | return
41 |
42 | class_name = info['fullname'].split('.')[0]
43 | if type(class_name) != str:
44 | # Python 2 only
45 | class_name = class_name.encode('utf-8')
46 | module = __import__(info['module'], fromlist=[class_name])
47 | obj = attrgetter(info['fullname'])(module)
48 |
49 | try:
50 | fn = inspect.getsourcefile(obj)
51 | except Exception:
52 | fn = None
53 | if not fn:
54 | try:
55 | fn = inspect.getsourcefile(sys.modules[obj.__module__])
56 | except Exception:
57 | fn = None
58 | if not fn:
59 | return
60 |
61 | fn = os.path.relpath(fn,
62 | start=os.path.dirname(__import__(package).__file__))
63 | try:
64 | lineno = inspect.getsourcelines(obj)[1]
65 | except Exception:
66 | lineno = ''
67 | return url_fmt.format(revision=revision, package=package,
68 | path=fn, lineno=lineno)
69 |
70 |
71 | def make_linkcode_resolve(package, url_fmt):
72 | """Returns a linkcode_resolve function for the given URL format
73 |
74 | revision is a git commit reference (hash or name)
75 |
76 | package is the name of the root module of the package
77 |
78 | url_fmt is along the lines of ('https://github.com/USER/PROJECT/'
79 | 'blob/{revision}/{package}/'
80 | '{path}#L{lineno}')
81 | """
82 | revision = _get_git_revision()
83 | return partial(_linkcode_resolve, revision=revision, package=package,
84 | url_fmt=url_fmt)
--------------------------------------------------------------------------------
/doc/utils.rst:
--------------------------------------------------------------------------------
1 | Utility functions
2 | =================
3 |
4 | Datasets
5 | --------
6 |
7 | .. autosummary::
8 | :toctree: generated/
9 |
10 | copt.datasets.load_img1
11 | copt.datasets.load_rcv1
12 | copt.datasets.load_url
13 | copt.datasets.load_covtype
14 | copt.datasets.load_gisette
15 | copt.datasets.load_madelon
16 |
17 | Misc
18 | ----
19 |
20 | .. autosummary::
21 | :toctree: generated/
22 |
23 | copt.utils.Trace
24 |
--------------------------------------------------------------------------------
/examples/README.txt:
--------------------------------------------------------------------------------
1 | .. _general_examples:
2 |
3 | Example Gallery
4 | ===============
5 |
6 | Miscellaneous examples
7 | ----------------------
8 |
9 | Miscellaneous and introductory examples for copt.
--------------------------------------------------------------------------------
/examples/frank_wolfe/README.txt:
--------------------------------------------------------------------------------
1 | .. _frank_wolfe_examples:
2 |
3 | Frank-Wolfe
4 | -----------
5 |
6 | Examples based on the Frank-Wolfe algorithm
7 |
--------------------------------------------------------------------------------
/examples/frank_wolfe/plot_sfw.py:
--------------------------------------------------------------------------------
1 | """
2 | Comparison of variants of Stochastic FW
3 | ===========================================
4 |
5 | The problem solved in this case is a L1 constrained logistic regression
6 | (sometimes referred to as sparse logistic regression).
7 | """
8 |
9 | import copt as cp
10 | import matplotlib.pyplot as plt
11 | import numpy as np
12 | import sklearn
13 |
14 |
15 | # .. construct (random) dataset ..
16 | import copt
17 |
18 | n_samples, n_features = 500, 200
19 | np.random.seed(0)
20 | X = np.random.randn(n_samples, n_features)
21 | y = np.random.rand(n_samples)
22 | batch_size = n_samples // 10
23 | n_batches = n_samples // batch_size
24 | max_iter = int(1e3)
25 | freq = max(n_batches, max_iter // 1000)
26 |
27 | # .. objective function and regularizer ..
28 | f = copt.loss.LogLoss(X, y)
29 | alpha = 1.
30 | constraint = copt.constraint.L1Ball(1.)
31 |
32 | x0 = [0] * n_features
33 | x0[0] += alpha
34 |
35 | # .. callbacks to track progress ..
36 | def fw_gap(x):
37 | _, grad = f.f_grad(x)
38 | return constraint.lmo(-grad, x)[0].dot(-grad)
39 |
40 |
41 | class TraceGaps(cp.utils.Trace):
42 | def __init__(self, f=None, freq=1):
43 | super(TraceGaps, self).__init__(f, freq)
44 | self.trace_gaps = []
45 |
46 | def __call__(self, dl):
47 | if self._counter % self.freq == 0:
48 | self.trace_gaps.append(fw_gap(dl['x']))
49 | super(TraceGaps, self).__call__(dl)
50 |
51 |
52 | cb_sfw_SAG = TraceGaps(f, freq=freq)
53 | cb_sfw_SAG_pairwise = TraceGaps(f, freq=freq)
54 | cb_sfw_SAGA = TraceGaps(f, freq=freq)
55 | cb_sfw_mokhtari = TraceGaps(f, freq=freq)
56 | cb_sfw_lu_freund = TraceGaps(f, freq=freq)
57 |
58 | # .. run the SFW algorithm ..
59 | print("Running SAGFW Pairwise with DR step size")
60 | result_sfw_SAG_pairwise = cp.minimize_sfw(
61 | f.partial_deriv,
62 | X,
63 | y,
64 | np.zeros(n_features),
65 | constraint.lmo_pairwise,
66 | batch_size=batch_size,
67 | x0_rep=(1., 0),
68 | callback=cb_sfw_SAG_pairwise,
69 | tol=0,
70 | max_iter=max_iter,
71 | variant='SAG',
72 | step_size='DR',
73 | lipschitz=f.max_lipschitz / n_samples,
74 | lmo_variant='pairwise'
75 | )
76 |
77 | print("Running SAGFW")
78 | result_sfw_SAG = cp.minimize_sfw(
79 | f.partial_deriv,
80 | X,
81 | y,
82 | np.zeros(n_features),
83 | constraint.lmo,
84 | batch_size=batch_size,
85 | callback=cb_sfw_SAG,
86 | tol=0,
87 | max_iter=max_iter,
88 | variant='SAG'
89 | )
90 |
91 | print("Running SAGAFW")
92 | result_sfw_SAGA = cp.minimize_sfw(
93 | f.partial_deriv,
94 | X,
95 | y,
96 | np.zeros(n_features),
97 | constraint.lmo,
98 | batch_size=batch_size,
99 | callback=cb_sfw_SAGA,
100 | tol=0,
101 | max_iter=max_iter,
102 | variant='SAGA'
103 | )
104 |
105 | print("Running MHK")
106 | result_sfw_mokhtari = cp.minimize_sfw(
107 | f.partial_deriv,
108 | X,
109 | y,
110 | np.zeros(n_features),
111 | constraint.lmo,
112 | batch_size=batch_size,
113 | callback=cb_sfw_mokhtari,
114 | tol=0,
115 | max_iter=max_iter,
116 | variant='MHK'
117 | )
118 |
119 | print("Running LF")
120 | result_sfw_lu_freund = cp.minimize_sfw(
121 | f.partial_deriv,
122 | X,
123 | y,
124 | np.zeros(n_features),
125 | constraint.lmo,
126 | batch_size=batch_size,
127 | callback=cb_sfw_lu_freund,
128 | tol=0,
129 | max_iter=max_iter,
130 | variant='LF'
131 | )
132 | # .. plot the result ..
133 | max_gap = max(cb_sfw_SAG.trace_gaps[0],
134 | cb_sfw_SAG_pairwise.trace_gaps[0],
135 | cb_sfw_mokhtari.trace_gaps[0],
136 | cb_sfw_lu_freund.trace_gaps[0],
137 | cb_sfw_SAGA.trace_gaps[0])
138 |
139 | max_val = max(cb_sfw_SAG.trace_fx[0],
140 | cb_sfw_SAG_pairwise.trace_fx[0],
141 | cb_sfw_mokhtari.trace_fx[0],
142 | cb_sfw_lu_freund.trace_fx[0],
143 | cb_sfw_SAGA.trace_fx[0])
144 |
145 | min_val = min(np.min(cb_sfw_SAG.trace_fx),
146 | np.min(cb_sfw_SAG_pairwise.trace_fx),
147 | np.min(cb_sfw_mokhtari.trace_fx),
148 | np.min(cb_sfw_lu_freund.trace_fx),
149 | np.min(cb_sfw_SAGA.trace_fx),
150 | )
151 |
152 | fig, (ax1, ax2) = plt.subplots(2, sharex=True)
153 | fig.suptitle('Stochastic Frank-Wolfe')
154 |
155 | ax1.plot(freq * batch_size * np.arange(len(cb_sfw_SAG.trace_gaps)), np.array(cb_sfw_SAG.trace_gaps) / max_gap, label="SAG")
156 | ax1.plot(freq * batch_size * np.arange(len(cb_sfw_SAG_pairwise.trace_gaps)), np.array(cb_sfw_SAG_pairwise.trace_gaps) / max_gap, label="SAG Pairwise")
157 | ax1.plot(freq * batch_size * np.arange(len(cb_sfw_SAGA.trace_gaps)), np.array(cb_sfw_SAGA.trace_gaps) / max_gap, label="SAGA")
158 | ax1.plot(freq * batch_size * np.arange(len(cb_sfw_mokhtari.trace_gaps)), np.array(cb_sfw_mokhtari.trace_gaps) / max_gap, label='Mokhtari et al. (2018)')
159 | ax1.plot(freq * batch_size * np.arange(len(cb_sfw_lu_freund.trace_gaps)), np.array(cb_sfw_lu_freund.trace_gaps) / max_gap, label='Lu and Freund (2018)')
160 | ax1.set_ylabel("Relative FW gap", fontweight="bold")
161 | ax1.set_yscale('log')
162 | ax1.set_xscale('log')
163 | ax1.grid(True)
164 |
165 | ax2.plot(freq * batch_size * np.arange(len(cb_sfw_SAG.trace_fx)), (np.array(cb_sfw_SAG.trace_fx) - min_val) / (max_val - min_val), label="SAG")
166 | ax2.plot(freq * batch_size * np.arange(len(cb_sfw_SAG_pairwise.trace_fx)), (np.array(cb_sfw_SAG_pairwise.trace_fx) - min_val) / (max_val - min_val), label="SAG Pairwise")
167 | ax2.plot(freq * batch_size * np.arange(len(cb_sfw_SAGA.trace_fx)), (np.array(cb_sfw_SAGA.trace_fx) - min_val) / (max_val - min_val), label="SAGA")
168 | ax2.plot(freq * batch_size * np.arange(len(cb_sfw_mokhtari.trace_fx)), (np.array(cb_sfw_mokhtari.trace_fx) - min_val) / (max_val - min_val), label='Mokhtari et al. (2018)')
169 | ax2.plot(freq * batch_size * np.arange(len(cb_sfw_lu_freund.trace_fx)), (np.array(cb_sfw_lu_freund.trace_fx) - min_val) / (max_val - min_val), label='Lu and Freund (2018)')
170 | ax2.set_ylabel("Relative suboptimality", fontweight="bold")
171 | ax2.set_xlabel("Number of gradient evaluations", fontweight="bold")
172 | ax2.set_yscale('log')
173 | ax2.set_xscale("log")
174 | ax2.grid(True)
175 |
176 | plt.xlim(1e4, 4e8)
177 | plt.legend()
178 | plt.show()
179 |
--------------------------------------------------------------------------------
/examples/frank_wolfe/plot_sfw_real_data.py:
--------------------------------------------------------------------------------
1 | """
2 | Comparison of variants of Stochastic FW on real data
3 | ====================================================
4 |
5 | The problem solved in this case is a L1 constrained logistic regression
6 | (sometimes referred to as sparse logistic regression).
7 | """
8 |
9 | import matplotlib.pyplot as plt
10 | import numpy as np
11 | import copt as cp
12 |
13 | # .. Load dataset ..
14 | import copt.constraint
15 | import copt.loss
16 |
17 | np.random.seed(0)
18 | X, y = cp.datasets.load_rcv1("train")
19 | dataset_name = "RCV1"
20 | n_samples, n_features = X.shape
21 | batch_size = 500
22 | max_iter = int(1e4)
23 | freq = max(n_samples // (batch_size * 2), 1)
24 |
25 | # .. objective function and regularizer ..
26 | f = copt.loss.LogLoss(X, y)
27 | constraint = copt.constraint.L1Ball(2e3)
28 |
29 | # .. callbacks to track progress ..
30 | def fw_gap(x):
31 | _, grad = f.f_grad(x)
32 | return constraint.lmo(-grad, x)[0].dot(-grad)
33 |
34 |
35 | class TraceGaps(cp.utils.Trace):
36 | def __init__(self, f=None, freq=1):
37 | super(TraceGaps, self).__init__(f, freq)
38 | self.trace_gaps = []
39 |
40 | def __call__(self, dl):
41 | if self._counter % self.freq == 0:
42 | self.trace_gaps.append(fw_gap(dl['x']))
43 | super(TraceGaps, self).__call__(dl)
44 |
45 |
46 | cb_SAG = TraceGaps(f, freq=freq)
47 | cb_MHK = TraceGaps(f, freq=freq)
48 | cb_LF = TraceGaps(f, freq=freq)
49 |
50 |
51 | # .. run the SFW algorithm ..
52 | print("Running SAGFW")
53 | result_SAG = cp.minimize_sfw(
54 | f.partial_deriv,
55 | X,
56 | y,
57 | np.zeros(n_features),
58 | constraint.lmo,
59 | batch_size,
60 | callback=cb_SAG,
61 | tol=0,
62 | max_iter=max_iter,
63 | variant='SAG'
64 | )
65 |
66 | print("Running MHK")
67 | result_MHK = cp.minimize_sfw(
68 | f.partial_deriv,
69 | X,
70 | y,
71 | np.zeros(n_features),
72 | constraint.lmo,
73 | batch_size,
74 | callback=cb_MHK,
75 | tol=0,
76 | max_iter=max_iter,
77 | variant='MHK'
78 | )
79 |
80 | print("Running LF")
81 | result_LF = cp.minimize_sfw(
82 | f.partial_deriv,
83 | X,
84 | y,
85 | np.zeros(n_features),
86 | constraint.lmo,
87 | batch_size,
88 | callback=cb_LF,
89 | tol=0,
90 | max_iter=max_iter,
91 | variant='LF'
92 | )
93 |
94 | print("Plotting...")
95 | # .. plot the result ..
96 | max_gap = max(cb_SAG.trace_gaps[0],
97 | cb_MHK.trace_gaps[0],
98 | cb_LF.trace_gaps[0],
99 | )
100 |
101 | max_val = max(np.max(cb_SAG.trace_fx),
102 | np.max(cb_MHK.trace_fx),
103 | np.max(cb_LF.trace_fx),
104 | )
105 |
106 | min_val = min(np.min(cb_SAG.trace_fx),
107 | np.min(cb_MHK.trace_fx),
108 | np.min(cb_LF.trace_fx),
109 | )
110 |
111 |
112 | fig, (ax1, ax2) = plt.subplots(2, sharex=True)
113 |
114 | ax1.set_title("Sparse Logistic Regression -- {}".format(dataset_name), fontweight="bold")
115 | ax1.plot(batch_size * freq * np.arange(len(cb_LF.trace_gaps)), np.array(cb_LF.trace_gaps) / max_gap, label='SFW -- Lu and Freund (2020)')
116 | ax1.plot(batch_size * freq * np.arange(len(cb_MHK.trace_gaps)), np.array(cb_MHK.trace_gaps) / max_gap, label='SFW -- Mokhtari et al. (2020)')
117 | ax1.plot(batch_size * freq * np.arange(len(cb_SAG.trace_gaps)), np.array(cb_SAG.trace_gaps) / max_gap, label="SFW -- Negiar et al. (2020)")
118 | ax1.set_ylabel("Relative FW gap", fontweight="bold")
119 | ax1.set_xscale('log')
120 | ax1.set_yscale('log')
121 | ax1.grid()
122 |
123 |
124 | ax2.plot(batch_size * freq * np.arange(len(cb_LF.trace_fx)), (np.array(cb_LF.trace_fx) - min_val) / (max_val - min_val), label='SFW -- Lu and Freund (2020)')
125 | ax2.plot(batch_size * freq * np.arange(len(cb_MHK.trace_fx)), (np.array(cb_MHK.trace_fx) - min_val) / (max_val - min_val), label='SFW -- Mokhtari et al. (2018)')
126 | ax2.plot(batch_size * freq * np.arange(len(cb_SAG.trace_fx)), (np.array(cb_SAG.trace_fx) - min_val) / (max_val - min_val), label="SFW -- Négiar et al. (2020)")
127 | ax2.set_ylabel("Relative suboptimality", fontweight="bold")
128 | ax2.set_xlabel("Number of gradient evaluations", fontweight="bold")
129 | ax2.set_xscale("log")
130 | ax2.set_yscale("log")
131 | ax2.grid()
132 | plt.legend()
133 | plt.show()
134 | print("Done.")
--------------------------------------------------------------------------------
/examples/frank_wolfe/plot_sparse_benchmark.py:
--------------------------------------------------------------------------------
1 | # python3
2 | """
3 | Benchmark of Frank-Wolfe variants for sparse logistic regression
4 | ================================================================
5 |
6 | Comparison of different Frank-Wolfe variants on various
7 | problems with a logistic regression loss (:meth:`copt.utils.LogLoss`)
8 | and a L1 ball constraint (:meth:`copt.utils.L1Ball`).
9 | """
10 | import matplotlib.pyplot as plt
11 | import numpy as np
12 | import copt as cp
13 |
14 | # .. datasets and their loading functions ..
15 | import copt.constraint
16 | import copt.loss
17 |
18 | datasets = [
19 | ("Gisette", cp.datasets.load_gisette, 6e3),
20 | ("RCV1", cp.datasets.load_rcv1, 2e4),
21 | ("Madelon", cp.datasets.load_madelon, 20.0),
22 | ("Covtype", cp.datasets.load_covtype, 200.0),
23 | ]
24 |
25 |
26 | variants_fw = [
27 | ["backtracking", "adaptive step-size"],
28 | ["DR", "Lipschitz step-size"],
29 | ]
30 |
31 | for dataset_title, load_data, alpha in datasets:
32 | plt.figure()
33 | print("Running on the %s dataset" % dataset_title)
34 |
35 | X, y = load_data()
36 | n_samples, n_features = X.shape
37 |
38 | l1_ball = copt.constraint.L1Ball(alpha)
39 | f = copt.loss.LogLoss(X, y)
40 | x0 = np.zeros(n_features)
41 |
42 | for step, label in variants_fw:
43 |
44 | cb = cp.utils.Trace(f)
45 | sol = cp.minimize_frank_wolfe(
46 | f.f_grad, x0, l1_ball.lmo, callback=cb, step=step, lipschitz=f.lipschitz
47 | )
48 |
49 | plt.plot(cb.trace_time, cb.trace_fx, label=label, markevery=10)
50 |
51 | print("Sparsity of solution: %s" % np.mean(np.abs(sol.x) > 1e-8))
52 | plt.legend()
53 | plt.xlabel("Time (in seconds)")
54 | plt.ylabel("Objective function")
55 | plt.title(dataset_title)
56 | plt.tight_layout() # otherwise the right y-label is slightly clipped
57 | plt.xlim((0, 0.7 * cb.trace_time[-1])) # for aesthetics
58 | plt.grid()
59 | plt.show()
60 |
--------------------------------------------------------------------------------
/examples/frank_wolfe/plot_sparse_benchmark_pairwise.py:
--------------------------------------------------------------------------------
1 | # python3
2 | """
3 | Benchmark of Pairwise Frank-Wolfe variants for sparse logistic regression
4 | =========================================================================
5 |
6 | Speed of convergence of different Frank-Wolfe variants on various
7 | problems with a logistic regression loss (:meth:`copt.utils.LogLoss`)
8 | and a L1 ball constraint (:meth:`copt.utils.L1Ball`).
9 | """
10 | import matplotlib.pyplot as plt
11 | import numpy as np
12 | import copt as cp
13 |
14 | # .. datasets and their loading functions ..
15 | # .. alpha is the regularization parameter ..
16 | # .. which has been chosen to give 10% feature sparsity ..
17 | import copt.constraint
18 | import copt.loss
19 |
20 | datasets = (
21 | {
22 | "name": "madelon",
23 | "loader": cp.datasets.load_madelon,
24 | "alpha": 1e4,
25 | "max_iter": 5000,
26 | "f_star": 0.0,
27 | },
28 | {
29 | "name": "gisette",
30 | "loader": cp.datasets.load_gisette,
31 | "alpha": 1e4,
32 | "max_iter": 5000,
33 | "f_star": 2.293654421822428,
34 | },
35 | {
36 | "name": "covtype",
37 | "loader": cp.datasets.load_covtype,
38 | "alpha": 1e4,
39 | "max_iter": 5000,
40 | "f_star": 0,
41 | },
42 | {
43 | "name": "RCV1",
44 | "loader": cp.datasets.load_rcv1,
45 | "alpha": 1e3,
46 | "max_iter": 5000,
47 | "f_star": 0.3114744279728717,
48 | },
49 | )
50 |
51 |
52 | variants_fw = [
53 | ["backtracking", "backtracking line-search"],
54 | ["DR", "Lipschitz step-size"],
55 | ]
56 |
57 | for d in datasets:
58 | plt.figure()
59 | print(f"Running on the {d['name']} dataset.")
60 |
61 | X, y = d["loader"]()
62 | print(X.shape)
63 | n_samples, n_features = X.shape
64 |
65 | l1_ball = copt.constraint.L1Ball(d["alpha"])
66 | f = copt.loss.LogLoss(X, y)
67 | x0 = np.zeros(n_features)
68 | x0[0] = d["alpha"] # start from a (random) vertex
69 |
70 | for step, label in variants_fw:
71 |
72 | cb = cp.utils.Trace(f)
73 | sol = cp.minimize_frank_wolfe(
74 | f.f_grad,
75 | x0,
76 | l1_ball.lmo_pairwise,
77 | variant='pairwise',
78 | x0_rep=(1., 0),
79 | callback=cb,
80 | step=step,
81 | lipschitz=f.lipschitz,
82 | max_iter=d["max_iter"],
83 | verbose=True,
84 | tol=0,
85 | )
86 |
87 | plt.plot(
88 | cb.trace_time,
89 | np.array(cb.trace_fx) - d["f_star"],
90 | label=label,
91 | markevery=10,
92 | )
93 |
94 | print("Sparsity of solution: %s" % np.mean(np.abs(sol.x) > 1e-8))
95 | print(f(sol.x))
96 | plt.legend()
97 | plt.xlabel("Time (in seconds)")
98 | plt.ylabel("Objective function")
99 | plt.yscale("log")
100 | plt.title(d["name"])
101 | plt.tight_layout() # otherwise the right y-label is slightly clipped
102 | plt.grid()
103 | plt.show()
104 |
--------------------------------------------------------------------------------
/examples/frank_wolfe/plot_vertex_overlap.py:
--------------------------------------------------------------------------------
1 | # python3
2 | """
3 | Update Direction Overlap in Frank-Wolfe
4 | ========================================
5 |
6 | This example quantifies how many times the Frank-Wolfe algorithm selects
7 | the same extremal vertex (which will determine the update direction) twice
8 | in a row. Selecting the same vertex twice in a row is symptomatic of a poor
9 | step-size, as it implies that the last two updates could have been replaced
10 | by a single update with larger step-size.
11 | """
12 | import copt as cp
13 | import matplotlib.pyplot as plt
14 | from matplotlib.ticker import MaxNLocator
15 | import numpy as np
16 |
17 | # datasets and their respective loading functions
18 | import copt.constraint
19 | import copt.loss
20 |
21 | datasets = [
22 | ("Gisette", cp.datasets.load_gisette),
23 | ("RCV1", cp.datasets.load_rcv1),
24 | ("Madelon", cp.datasets.load_madelon),
25 | ("Covtype", cp.datasets.load_covtype),
26 | ]
27 |
28 |
29 | fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(10, 5))
30 | for ax, (dataset_title, load_data) in zip(axes.ravel(), datasets):
31 | print("Running on the %s dataset" % dataset_title)
32 |
33 | X, y = load_data()
34 | n_samples, n_features = X.shape
35 |
36 | l1_ball = copt.constraint.L1Ball(n_features / 2.0)
37 | f = copt.loss.LogLoss(X, y)
38 | x0 = np.zeros(n_features)
39 |
40 | for i, (step, label) in enumerate(
41 | [["backtracking", "backtracking"], ["DR", "DR step-size"]]
42 | ):
43 | print("Running %s variant" % label)
44 | st_prev = []
45 | overlap = []
46 |
47 | def trace(kw):
48 | """Store vertex overlap during execution of the algorithm."""
49 | s_t = kw["update_direction"] + kw["x"]
50 | if st_prev:
51 | # check if the vertex of this and the previous iterate
52 | # coincide. Since these might be sparse vectors, we use
53 | # sparse.linalg.norm to make the comparison
54 | prev_overlap = overlap[-1]
55 | if np.linalg.norm(st_prev[0] - s_t) == 0:
56 | overlap.append(prev_overlap + 1)
57 | else:
58 | overlap.append(prev_overlap)
59 | st_prev[0] = s_t
60 | else:
61 | overlap.append(0)
62 | st_prev.append(s_t)
63 |
64 | cp.minimize_frank_wolfe(
65 | f.f_grad,
66 | x0,
67 | l1_ball.lmo,
68 | callback=trace,
69 | max_iter=int(1e4),
70 | step=step,
71 | verbose=True,
72 | lipschitz=f.lipschitz,
73 | )
74 | ax.plot(overlap, label=label)
75 | ax.yaxis.set_major_locator(MaxNLocator(integer=True))
76 | ax.legend()
77 | ax.set_xlabel("number of iterations")
78 | ax.set_ylabel("LMO overlap")
79 | ax.set_title(dataset_title)
80 | fig.tight_layout() # otherwise the right y-label is slightly clipped
81 | ax.grid()
82 | # plt.legend()
83 | plt.show()
84 |
--------------------------------------------------------------------------------
/examples/plot_accelerated.py:
--------------------------------------------------------------------------------
1 | """
2 | Accelerated gradient descent
3 | ============================
4 |
5 | Speed of convergence comparison between gradient descent
6 | and Nesterov acceleration on a logistic regression problem.
7 | """
8 | import matplotlib.pyplot as plt
9 | import numpy as np
10 | import copt as cp
11 |
12 | # .. construct (random) dataset ..
13 | import copt.loss
14 |
15 | n_samples, n_features = 1000, 200
16 | np.random.seed(0)
17 | X = np.random.randn(n_samples, n_features)
18 | y = np.random.rand(n_samples)
19 |
20 | f = copt.loss.LogLoss(X, y)
21 | step_size = 1.0 / f.lipschitz
22 |
23 | cb_pgd = cp.utils.Trace(f)
24 | result_pgd = cp.minimize_proximal_gradient(
25 | f.f_grad,
26 | np.zeros(n_features),
27 | step=lambda x: step_size,
28 | callback=cb_pgd,
29 | tol=0,
30 | jac=True,
31 | accelerated=False,
32 | )
33 |
34 | cb_apgd = cp.utils.Trace(f)
35 | result_apgd = cp.minimize_proximal_gradient(
36 | f.f_grad,
37 | np.zeros(n_features),
38 | step=lambda x: step_size,
39 | callback=cb_apgd,
40 | tol=0,
41 | jac=True,
42 | accelerated=True,
43 | )
44 |
45 |
46 | # .. plot the result ..
47 | fmin = min(np.min(cb_pgd.trace_fx), np.min(cb_apgd.trace_fx))
48 | plt.title("Comparison of full gradient optimizers")
49 | plt.plot(cb_apgd.trace_fx - fmin, lw=4, label="accelerated gradient descent")
50 | plt.plot(cb_pgd.trace_fx - fmin, lw=4, label="gradient descent")
51 | plt.ylabel("Function suboptimality", fontweight="bold")
52 | plt.xlabel("gradient evaluations", fontweight="bold")
53 | plt.yscale("log")
54 | plt.ylim(ymin=1e-16)
55 | plt.xlim((0, 150))
56 | plt.legend()
57 | plt.grid()
58 | plt.show()
59 |
--------------------------------------------------------------------------------
/examples/plot_group_lasso.py:
--------------------------------------------------------------------------------
1 | """
2 | Group Lasso regularization
3 | ==========================
4 |
5 | This example solves an inverse problem where the ground truth
6 | coefficients (in orange) follow a group structure. In blue are
7 | the recovered coefficients for group lasso with different values
8 | of the regularization parameter.
9 |
10 |
11 | The group lasso regularization enters the optimization through
12 | its proximal operator, which is implemented in copt through the
13 | function prox of object :meth:`copt.utils.GroupL1`.
14 |
15 | """
16 | import copt as cp
17 | import matplotlib.pyplot as plt
18 | import numpy as np
19 | from scipy import sparse
20 |
21 | import copt.loss
22 | import copt.penalty
23 |
24 | np.random.seed(0)
25 |
26 | # .. generate some data ..
27 | n_samples, n_features = 100, 100
28 | groups = [np.arange(10 * i, 10 * i + 10) for i in range(10)]
29 |
30 | # .. construct a ground truth vector in which ..
31 | # .. group 4 and 5 are nonzero ..
32 | ground_truth = np.zeros(n_features)
33 | ground_truth[groups[4]] = 1
34 | ground_truth[groups[5]] = 0.5
35 |
36 | max_iter = 5000
37 | print("#features", n_features)
38 |
39 | A = sparse.rand(n_samples, n_features, density=0.2)
40 | sigma = 1.0
41 | b = A.dot(ground_truth) + sigma * np.random.randn(n_samples)
42 |
43 | np.random.seed(0)
44 | n_samples = n_features
45 |
46 | # .. compute the step-size ..
47 | f = copt.loss.SquareLoss(A, b)
48 | step_size = 1.0 / f.lipschitz
49 |
50 | # .. run the solver for different values ..
51 | # .. of the regularization parameter beta ..
52 | all_betas = [0, 1e-2, 1e-1, 0.2]
53 | all_trace_ls, all_trace_nols = [], []
54 | out_img = []
55 | for i, beta in enumerate(all_betas):
56 | print("beta = %s" % beta)
57 | G1 = copt.penalty.GroupL1(beta, groups)
58 |
59 | def loss(x):
60 | return f(x) + G1(x)
61 |
62 | x0 = np.zeros(n_features)
63 | pgd = cp.minimize_proximal_gradient(
64 | f.f_grad,
65 | x0,
66 | G1.prox,
67 | jac=True,
68 | max_iter=max_iter,
69 | tol=1e-10,
70 | trace_certificate=True,
71 | )
72 | out_img.append(pgd.x)
73 |
74 |
75 | # .. plot the results ..
76 | fig, ax = plt.subplots(2, 4, sharey=False)
77 | xlim = [0.02, 0.02, 0.1]
78 | markevery = [1000, 1000, 100, 100]
79 | for i, beta in enumerate(all_betas):
80 | ax[0, i].set_title("regularization=%s" % beta)
81 | ax[0, i].set_title("$regularization=%s" % beta)
82 | ax[0, i].plot(out_img[i])
83 | ax[0, i].plot(ground_truth)
84 | ax[0, i].set_ylim((-0.5, 1.5))
85 | ax[0, i].set_xticks(())
86 | ax[0, i].set_yticks(())
87 |
88 | plot_tos, = ax[1, i].plot(
89 | pgd.trace_certificate, lw=3, marker="o", markevery=20, markersize=10
90 | )
91 |
92 | ax[1, i].set_xlabel("Iterations")
93 | ax[1, i].set_yscale("log")
94 | ax[1, i].set_ylim((1e-8, None))
95 | ax[1, i].grid(True)
96 |
97 |
98 | ax[1, 0].set_ylabel("certificate")
99 | plt.show()
100 |
--------------------------------------------------------------------------------
/examples/plot_jax_copt.py:
--------------------------------------------------------------------------------
1 | """
2 | Combining COPT with JAX
3 | =======================
4 |
5 | This example shows how `JAX `_
6 | can be used within COPT to compute the gradients of the
7 | objective function.
8 | """
9 | import jax
10 | from jax import numpy as np
11 | import numpy as onp
12 | import matplotlib.pyplot as plt
13 | from sklearn import datasets
14 | import copt as cp
15 |
16 | # .. construct (random) dataset ..
17 | import copt.penalty
18 |
19 | X, y = datasets.make_regression()
20 | n_samples, n_features = X.shape
21 |
22 |
23 | def loss(w):
24 | """Squared error loss."""
25 | z = np.dot(X, w) - y
26 | return np.sum(z * z) / n_samples
27 |
28 |
29 | # .. use JAX to compute the gradient of loss value_and_grad ..
30 | # .. returns both the gradient and the objective, which is ..
31 | # .. the format that COPT accepts ..
32 | f_grad = jax.value_and_grad(loss)
33 |
34 | w0 = onp.zeros(n_features)
35 |
36 | l1_ball = copt.penalty.L1Norm(0.1)
37 | cb = cp.utils.Trace(lambda x: loss(x) + l1_ball(x))
38 | sol = cp.minimize_proximal_gradient(
39 | f_grad, w0, prox=l1_ball.prox, callback=cb, jac=True
40 | )
41 | plt.plot(cb.trace_fx, lw=3)
42 | plt.yscale("log")
43 | plt.xlabel("# Iterations")
44 | plt.ylabel("Objective value")
45 | plt.grid()
46 | plt.show()
47 |
--------------------------------------------------------------------------------
/examples/plot_saga_vs_svrg.py:
--------------------------------------------------------------------------------
1 | """
2 | SAGA vs SVRG
3 | ===========================================
4 |
5 | A comparison between two variance-reduced stochastic gradient methods:
6 | SAGA (implemented in :func:`copt.minimize_saga`) and SVRG (implemented in
7 | :func:`copt.minimize_svrg`). The problem solved in this case is the sum of a
8 | logistic regression and an L1 norm (sometimes referred to as sparse logistic)
9 | """
10 | import copt as cp
11 | import matplotlib.pyplot as plt
12 | import numpy as np
13 |
14 | # .. construct (random) dataset ..
15 | import copt.loss
16 | import copt.penalty
17 |
18 | n_samples, n_features = 1000, 200
19 | np.random.seed(0)
20 | X = np.random.randn(n_samples, n_features)
21 | y = np.random.rand(n_samples)
22 |
23 | # .. objective function and regularizer ..
24 | f = copt.loss.LogLoss(X, y)
25 | g = copt.penalty.L1Norm(1.0 / n_samples)
26 |
27 | # .. callbacks to track progress ..
28 | cb_saga = cp.utils.Trace(lambda x: f(x) + g(x))
29 | cb_svrg = cp.utils.Trace(lambda x: f(x) + g(x))
30 |
31 | # .. run the SAGA and SVRG algorithms ..
32 | step_size = 1.0 / (3 * f.max_lipschitz)
33 | result_saga = cp.minimize_saga(
34 | f.partial_deriv,
35 | X,
36 | y,
37 | np.zeros(n_features),
38 | prox=g.prox_factory(n_features),
39 | step_size=step_size,
40 | callback=cb_saga,
41 | tol=0,
42 | max_iter=100,
43 | )
44 |
45 | result_svrg = cp.minimize_svrg(
46 | f.partial_deriv,
47 | X,
48 | y,
49 | np.zeros(n_features),
50 | prox=g.prox_factory(n_features),
51 | step_size=step_size,
52 | callback=cb_svrg,
53 | tol=0,
54 | max_iter=100,
55 | )
56 |
57 |
58 | # .. plot the result ..
59 | fmin = min(np.min(cb_saga.trace_fx), np.min(cb_svrg.trace_fx))
60 | plt.title("Comparison of full gradient optimizers")
61 | plt.plot(cb_saga.trace_fx - fmin, lw=4, label="SAGA")
62 | # .. for SVRG we multiply the number of iterations by two to ..
63 | # .. account for computation of the snapshot gradient ..
64 | plt.plot(
65 | 2 * np.arange(len(cb_svrg.trace_fx)), cb_svrg.trace_fx - fmin, lw=4, label="SVRG"
66 | )
67 | plt.ylabel("Function suboptimality", fontweight="bold")
68 | plt.xlabel("number of gradient evaluations", fontweight="bold")
69 | plt.yscale("log")
70 | plt.ylim(ymin=1e-16)
71 | plt.xlim((0, 50))
72 | plt.legend()
73 | plt.grid()
74 | plt.show()
75 |
--------------------------------------------------------------------------------
/examples/proximal_splitting/README.txt:
--------------------------------------------------------------------------------
1 | .. _proximal_splitting_examples:
2 |
3 | Proximal Splitting
4 | ------------------
5 |
6 | Examples that use proximal splitting methods.
--------------------------------------------------------------------------------
/examples/proximal_splitting/data/blur_matrix.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openopt/copt/c0d5d46ae709f77b7dc1fc692bbe476aa63f029b/examples/proximal_splitting/data/blur_matrix.npz
--------------------------------------------------------------------------------
/examples/proximal_splitting/plot_overlapping_group_lasso.py:
--------------------------------------------------------------------------------
1 | """
2 | Group lasso with overlap
3 | ========================
4 |
5 | Comparison of solvers for a least squares with
6 | overlapping group lasso regularization.
7 |
8 | References
9 | ----------
10 | This example is modeled after the experiments in `Adaptive Three Operator Splitting `_, Appendix E.3.
11 | """
12 | import copt as cp
13 | import matplotlib.pyplot as plt
14 | import numpy as np
15 | from sklearn import preprocessing
16 |
17 | import copt.loss
18 | import copt.penalty
19 |
20 | np.random.seed(0)
21 |
22 | n_samples, n_features = 100, 1002
23 |
24 | # .. generate some data ..
25 | # .. the first set of blocks is
26 | groups = [np.arange(8 * i, 8 * i + 10) for i in range(125)]
27 | ground_truth = np.zeros(n_features)
28 | g = np.random.randint(0, len(groups), 10)
29 | for i in g:
30 | ground_truth[groups[i]] = np.random.randn()
31 |
32 | A = np.random.randn(n_samples, n_features)
33 | p = 0.95 # create a matrix with correlations between features
34 | for i in range(1, n_features):
35 | A[:, i] = p * A[:, i] + (1 - p) * A[:, i-1]
36 | A[:, 0] /= np.sqrt(1 - p ** 2)
37 | A = preprocessing.StandardScaler().fit_transform(A)
38 | b = A.dot(ground_truth) + np.random.randn(n_samples)
39 |
40 | # make labels in {0, 1}
41 | b = np.sign(b)
42 | b = (b + 1) // 2
43 |
44 |
45 | # .. compute the step-size ..
46 | max_iter = 5000
47 | f = copt.loss.LogLoss(A, b)
48 | step_size = 1. / f.lipschitz
49 |
50 | # .. run the solver for different values ..
51 | # .. of the regularization parameter beta ..
52 | all_betas = np.logspace(-4, -1, 4)
53 | all_trace_ls, all_trace_nols, all_trace_pdhg_nols, all_trace_pdhg = [], [], [], []
54 | all_trace_ls_time, all_trace_nols_time, all_trace_pdhg_nols_time, all_trace_pdhg_time = [], [], [], []
55 | out_img = []
56 | for i, beta in enumerate(all_betas):
57 | print('beta = %s' % beta)
58 | G1 = copt.penalty.GroupL1(beta, groups[::2])
59 | G2 = copt.penalty.GroupL1(beta, groups[1::2])
60 |
61 | def loss(x):
62 | return f(x) + G1(x) + G2(x)
63 |
64 | cb_tosls = cp.utils.Trace()
65 | x0 = np.zeros(n_features)
66 | tos_ls = cp.minimize_three_split(
67 | f.f_grad, x0, G1.prox, G2.prox, step_size=10 * step_size,
68 | max_iter=max_iter, tol=1e-14, verbose=1,
69 | callback=cb_tosls, h_Lipschitz=beta)
70 | trace_ls = np.array([loss(x) for x in cb_tosls.trace_x])
71 | all_trace_ls.append(trace_ls)
72 | all_trace_ls_time.append(cb_tosls.trace_time)
73 |
74 | cb_tos = cp.utils.Trace()
75 | x0 = np.zeros(n_features)
76 | tos = cp.minimize_three_split(
77 | f.f_grad, x0, G1.prox, G2.prox,
78 | step_size=step_size,
79 | max_iter=max_iter, tol=1e-14, verbose=1,
80 | line_search=True, callback=cb_tos)
81 | trace_nols = np.array([loss(x) for x in cb_tos.trace_x])
82 | all_trace_nols.append(trace_nols)
83 | all_trace_nols_time.append(cb_tos.trace_time)
84 | out_img.append(tos.x)
85 |
86 | cb_pdhg = cp.utils.Trace()
87 | x0 = np.zeros(n_features)
88 | pdhg = cp.minimize_primal_dual(
89 | f.f_grad, x0, G1.prox, G2.prox,
90 | callback=cb_pdhg, max_iter=max_iter,
91 | step_size=step_size,
92 | step_size2=(1. / step_size) / 2, tol=0, line_search=True)
93 | trace_pdhg = np.array([loss(x) for x in cb_pdhg.trace_x])
94 | all_trace_pdhg.append(trace_pdhg)
95 | all_trace_pdhg_time.append(cb_pdhg.trace_time)
96 |
97 | cb_pdhg_nols = cp.utils.Trace()
98 | x0 = np.zeros(n_features)
99 | pdhg_nols = cp.minimize_primal_dual(
100 | f.f_grad, x0, G1.prox, G2.prox,
101 | callback=cb_pdhg_nols, max_iter=max_iter,
102 | step_size=step_size,
103 | step_size2=(1. / step_size) / 2, tol=0, line_search=False)
104 | trace_pdhg_nols = np.array([loss(x) for x in cb_pdhg_nols.trace_x])
105 | all_trace_pdhg_nols.append(trace_pdhg_nols)
106 | all_trace_pdhg_nols_time.append(cb_pdhg_nols.trace_time)
107 |
108 |
109 | # .. plot the results ..
110 | fig, ax = plt.subplots(2, 4, sharey=False)
111 | xlim = [2000, 2000, 1000, 2000]
112 | markevery = [x//5 for x in xlim]
113 | for i, beta in enumerate(all_betas):
114 | ax[0, i].set_title(r'$\lambda=%s$' % beta)
115 | ax[0, i].set_title(r'$\lambda=%s$' % beta)
116 | ax[0, i].plot(out_img[i] / np.max(out_img[i]))
117 | ax[0, i].plot(ground_truth / np.max(ground_truth))
118 | ax[0, i].set_xticks(())
119 | ax[0, i].set_yticks(())
120 | ax[0, i].set_ylim((-0.5, 1.5))
121 |
122 | fmin = min(np.min(all_trace_ls[i]), np.min(all_trace_nols[i]))
123 | scale = 1. # all_trace_ls[i][0] - fmin
124 | plot_tos, = ax[1, i].plot(
125 | (all_trace_ls[i] - fmin) / scale, '--',
126 | lw=2, marker='o', markevery=markevery[i],
127 | markersize=5)
128 |
129 | plot_nols, = ax[1, i].plot(
130 | (all_trace_nols[i] - fmin) / scale,
131 | lw=2, marker='h', markevery=markevery[i],
132 | markersize=5)
133 |
134 | plot_pdhg, = ax[1, i].plot(
135 | (all_trace_pdhg[i] - fmin) / scale,
136 | lw=2, marker='^', markevery=markevery[i],
137 | markersize=5)
138 |
139 | plot_pdhg_nols, = ax[1, i].plot(
140 | (all_trace_pdhg_nols[i] - fmin) / scale,
141 | lw=2, marker='d', markevery=markevery[i],
142 | markersize=5)
143 |
144 | ax[1, i].set_xlabel('Iterations')
145 | ax[1, i].set_yscale('log')
146 | ax[1, i].set_ylim((1e-10, None))
147 | ax[1, i].set_xlim((0, xlim[i]))
148 | ax[1, i].grid(True)
149 |
150 |
151 | plt.gcf().subplots_adjust(bottom=0.25)
152 | plt.figlegend(
153 | (plot_tos, plot_nols, plot_pdhg, plot_pdhg_nols),
154 | ('TOS with line search', 'TOS without line search', 'PDHG with line search', 'PDHG without line search'), 'lower center', ncol=2,
155 | scatterpoints=1, frameon=False,)
156 |
157 | ax[1, 0].set_ylabel('Objective minus optimum')
158 | plt.show()
159 |
--------------------------------------------------------------------------------
/examples/proximal_splitting/plot_sparse_nuclear_norm.py:
--------------------------------------------------------------------------------
1 | """
2 | Estimating a sparse and low rank matrix
3 | =======================================
4 |
5 | """
6 | import copt.loss
7 | import copt.penalty
8 |
9 | print(__doc__)
10 | import numpy as np
11 | from scipy.sparse import linalg as splinalg
12 | import matplotlib.pyplot as plt
13 | import copt as cp
14 |
15 | # .. Generate synthetic data ..
16 | np.random.seed(1)
17 |
18 | sigma_2 = 0.6
19 | N = 100
20 | d = 20
21 | blocks = np.array([2 * d / 10, 1 * d / 10, 1 * d / 10, 3 * d / 10, 3 * d / 10]).astype(
22 | np.int
23 | )
24 | epsilon = 10 ** (-15)
25 |
26 | mu = np.zeros(d)
27 | Sigma = np.zeros((d, d))
28 | blck = 0
29 | for k in range(len(blocks)):
30 | v = 2 * np.random.rand(int(blocks[k]), 1)
31 | v = v * (abs(v) > 0.9)
32 | Sigma[blck : blck + blocks[k], blck : blck + blocks[k]] = np.dot(v, v.T)
33 | blck = blck + blocks[k]
34 | X = np.random.multivariate_normal(
35 | mu, Sigma + epsilon * np.eye(d), N
36 | ) + sigma_2 * np.random.randn(N, d)
37 | Sigma_hat = np.cov(X.T)
38 |
39 | threshold = 1e-5
40 | Sigma[np.abs(Sigma) < threshold] = 0
41 | Sigma[np.abs(Sigma) >= threshold] = 1
42 |
43 | # .. generate some data ..
44 |
45 | max_iter = 5000
46 |
47 | n_features = np.multiply(*Sigma.shape)
48 | n_samples = n_features
49 | print("#features", n_features)
50 | A = np.random.randn(n_samples, n_features)
51 |
52 | sigma = 1.0
53 | b = A.dot(Sigma.ravel()) + sigma * np.random.randn(n_samples)
54 |
55 | # .. compute the step-size ..
56 | s = splinalg.svds(A, k=1, return_singular_vectors=False, tol=1e-3, maxiter=500)[0]
57 | f = copt.loss.HuberLoss(A, b)
58 | step_size = 1.0 / f.lipschitz
59 |
60 | # .. run the solver for different values ..
61 | # .. of the regularization parameter beta ..
62 | all_betas = [0, 1e-3, 1e-2, 1e-1]
63 | all_trace_ls, all_trace_nols, all_trace_pdhg_nols, all_trace_pdhg = [], [], [], []
64 | all_trace_ls_time, all_trace_nols_time, all_trace_pdhg_nols_time, all_trace_pdhg_time = (
65 | [],
66 | [],
67 | [],
68 | [],
69 | )
70 | out_img = []
71 | for i, beta in enumerate(all_betas):
72 | print("beta = %s" % beta)
73 | G1 = copt.penalty.TraceNorm(beta, Sigma.shape)
74 | G2 = copt.penalty.L1Norm(beta)
75 |
76 | def loss(x):
77 | return f(x) + G1(x) + G2(x)
78 |
79 | cb_tosls = cp.utils.Trace()
80 | x0 = np.zeros(n_features)
81 | tos_ls = cp.minimize_three_split(
82 | f.f_grad,
83 | x0,
84 | G2.prox,
85 | G1.prox,
86 | step_size=5 * step_size,
87 | max_iter=max_iter,
88 | tol=1e-14,
89 | verbose=1,
90 | callback=cb_tosls,
91 | h_Lipschitz=beta,
92 | )
93 | trace_ls = np.array([loss(x) for x in cb_tosls.trace_x])
94 | all_trace_ls.append(trace_ls)
95 | all_trace_ls_time.append(cb_tosls.trace_time)
96 |
97 | cb_tos = cp.utils.Trace()
98 | x0 = np.zeros(n_features)
99 | tos = cp.minimize_three_split(
100 | f.f_grad,
101 | x0,
102 | G1.prox,
103 | G2.prox,
104 | step_size=step_size,
105 | max_iter=max_iter,
106 | tol=1e-14,
107 | verbose=1,
108 | line_search=False,
109 | callback=cb_tos,
110 | )
111 | trace_nols = np.array([loss(x) for x in cb_tos.trace_x])
112 | all_trace_nols.append(trace_nols)
113 | all_trace_nols_time.append(cb_tos.trace_time)
114 | out_img.append(tos.x)
115 |
116 | # .. plot the results ..
117 | f, ax = plt.subplots(2, 4, sharey=False)
118 | xlim = [0.02, 0.02, 0.1]
119 | for i, beta in enumerate(all_betas):
120 | ax[0, i].set_title(r"$\lambda=%s$" % beta)
121 | ax[0, i].set_title(r"$\lambda=%s$" % beta)
122 | ax[0, i].imshow(
123 | out_img[i].reshape(Sigma.shape), interpolation="nearest", cmap=plt.cm.gray_r
124 | )
125 | ax[0, i].set_xticks(())
126 | ax[0, i].set_yticks(())
127 |
128 | fmin = min(np.min(all_trace_ls[i]), np.min(all_trace_nols[i]))
129 | plot_tos, = ax[1, i].plot(
130 | all_trace_ls[i] - fmin, lw=4, marker="o", markevery=100, markersize=10
131 | )
132 |
133 | plot_nols, = ax[1, i].plot(
134 | all_trace_nols[i] - fmin, lw=4, marker="h", markevery=100, markersize=10
135 | )
136 |
137 | ax[1, i].set_xlabel("Iterations")
138 | ax[1, i].set_yscale("log")
139 | ax[1, i].set_ylim((1e-15, None))
140 | ax[1, i].set_xlim((0, 2000))
141 | ax[1, i].grid(True)
142 |
143 |
144 | plt.gcf().subplots_adjust(bottom=0.15)
145 | plt.figlegend(
146 | (plot_tos, plot_nols),
147 | ("TOS with line search", "TOS without line search"),
148 | ncol=5,
149 | scatterpoints=1,
150 | loc=(-0.00, -0.0),
151 | frameon=False,
152 | bbox_to_anchor=[0.05, 0.01],
153 | )
154 |
155 | ax[1, 0].set_ylabel("Objective minus optimum")
156 | plt.show()
157 |
--------------------------------------------------------------------------------
/examples/proximal_splitting/plot_tv_deblurring.py:
--------------------------------------------------------------------------------
1 | # python3
2 | """
3 | Total variation regularization
4 | ==============================
5 |
6 | Comparison of solvers with total variation regularization.
7 | """
8 | import matplotlib.pyplot as plt
9 | import numpy as np
10 | from PIL import Image
11 | from scipy import misc
12 | from scipy import sparse
13 |
14 | import copt as cp
15 |
16 | np.random.seed(0)
17 |
18 | img = misc.face(gray=True).astype(float)
19 | # resize
20 | img = np.array(Image.fromarray(img).resize((153, 115)))
21 | img = img.astype(float) / img.max()
22 |
23 | n_rows, n_cols = img.shape
24 | n_features = n_rows * n_cols
25 | n_samples = n_features
26 | max_iter = 2000
27 |
28 | # .. compute blurred and noisy image ..
29 | A = sparse.load_npz("data/blur_matrix.npz")
30 | b = A.dot(img.ravel())
31 |
32 | np.random.seed(0)
33 |
34 | # .. compute the step-size ..
35 | f = cp.loss.SquareLoss(A, b)
36 | step_size = 1.0 / f.lipschitz
37 |
38 |
39 | def loss(x, pen):
40 | x_mat = x.reshape((n_rows, n_cols))
41 | tmp1 = np.abs(np.diff(x_mat, axis=0))
42 | tmp2 = np.abs(np.diff(x_mat, axis=1))
43 | return f(x) + pen * (tmp1.sum() + tmp2.sum())
44 |
45 |
46 | # .. run the solver for different values ..
47 | # .. of the regularization parameter beta ..
48 | all_betas = [0, 1e-7, 1e-6]
49 | all_trace_ls, all_trace_nols, all_trace_pdhg, out_img = [], [], [], []
50 | all_trace_ls_time, all_trace_nols_time, all_trace_pdhg_time = [], [], []
51 | for i, beta in enumerate(all_betas):
52 | print("Iteration %s, beta %s" % (i, beta))
53 |
54 | def g_prox(x, gamma, pen=beta):
55 | return cp.tv_prox.prox_tv1d_cols(gamma * pen, x, n_rows, n_cols)
56 |
57 | def h_prox(x, gamma, pen=beta):
58 | return cp.tv_prox.prox_tv1d_rows(gamma * pen, x, n_rows, n_cols)
59 |
60 | cb_adatos = cp.utils.Trace()
61 | adatos = cp.minimize_three_split(
62 | f.f_grad,
63 | np.zeros(n_features),
64 | g_prox,
65 | h_prox,
66 | step_size=step_size,
67 | max_iter=max_iter,
68 | tol=0,
69 | callback=cb_adatos,
70 | h_Lipschitz=beta,
71 | )
72 | trace_ls = [loss(x, beta) for x in cb_adatos.trace_x]
73 | all_trace_ls.append(trace_ls)
74 | all_trace_ls_time.append(cb_adatos.trace_time)
75 | out_img.append(adatos.x.reshape(img.shape))
76 |
77 | cb_tos = cp.utils.Trace()
78 | cp.minimize_three_split(
79 | f.f_grad,
80 | np.zeros(n_features),
81 | g_prox,
82 | h_prox,
83 | step_size=step_size,
84 | max_iter=max_iter,
85 | tol=0,
86 | callback=cb_tos,
87 | line_search=False,
88 | )
89 | trace_nols = [loss(x, beta) for x in cb_tos.trace_x]
90 | all_trace_nols.append(trace_nols)
91 | all_trace_nols_time.append(cb_tos.trace_time)
92 |
93 | cb_pdhg = cp.utils.Trace()
94 | cp.minimize_primal_dual(
95 | f.f_grad,
96 | np.zeros(n_features),
97 | g_prox,
98 | h_prox,
99 | callback=cb_pdhg,
100 | max_iter=max_iter,
101 | step_size=step_size,
102 | step_size2=(1. / step_size) / 2,
103 | line_search=False,
104 | )
105 | trace_pdhg = np.array([loss(x, beta) for x in cb_pdhg.trace_x])
106 | all_trace_pdhg.append(trace_pdhg)
107 | all_trace_pdhg_time.append(cb_pdhg.trace_time)
108 |
109 | # .. plot the results ..
110 | f, ax = plt.subplots(2, 3, sharey=False)
111 | xlim = [0.02, 0.02, 0.1]
112 | for i, beta in enumerate(all_betas):
113 | ax[0, i].set_title(r"$\lambda=%s$" % beta)
114 | ax[0, i].imshow(out_img[i], interpolation="nearest", cmap=plt.cm.gray)
115 | ax[0, i].set_xticks(())
116 | ax[0, i].set_yticks(())
117 |
118 | fmin = min(np.min(all_trace_ls[i]), np.min(all_trace_pdhg[i]))
119 | scale = all_trace_ls[i][0] - fmin
120 | plot_tos, = ax[1, i].plot(
121 | (all_trace_ls[i] - fmin) / scale,
122 | "--",
123 | lw=2,
124 | marker="o",
125 | markevery=400,
126 | markersize=7,
127 | )
128 |
129 | plot_tos_nols, = ax[1, i].plot(
130 | (all_trace_nols[i] - fmin) / scale,
131 | lw=2,
132 | marker="<",
133 | markevery=400,
134 | markersize=7,
135 | )
136 |
137 | plot_pdhg, = ax[1, i].plot(
138 | (all_trace_pdhg[i] - fmin) / scale,
139 | "--",
140 | lw=2,
141 | marker="^",
142 | markevery=400,
143 | markersize=7,
144 | )
145 |
146 | ax[1, i].set_xlabel("Iterations")
147 | ax[1, i].set_yscale("log")
148 | ax[1, i].set_ylim((1e-14, None))
149 | ax[1, i].set_xlim((0, 1500))
150 | ax[1, i].grid(True)
151 |
152 |
153 | plt.gcf().subplots_adjust(bottom=0.25)
154 | plt.figlegend(
155 | (plot_tos, plot_tos_nols, plot_pdhg),
156 | (
157 | "Adaptive three operator splitting",
158 | "Three operator splitting",
159 | "Primal-dual hybrid gradient",
160 | ),
161 | "lower center",
162 | ncol=2,
163 | scatterpoints=1,
164 | frameon=False,
165 | )
166 |
167 | ax[1, 0].set_ylabel("Objective minus optimum")
168 | plt.show()
169 |
--------------------------------------------------------------------------------
/examples/pytorch/README.txt:
--------------------------------------------------------------------------------
1 | .. _pytorch_examples:
2 |
3 | PyTorch integration
4 | ------------------
5 |
6 | Examples that optimize PyTorch functions.
--------------------------------------------------------------------------------
/examples/pytorch/adversarial_example.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 | import copt
4 | from copt.utils_pytorch import make_func_and_grad
5 |
6 | from robustbench.data import load_cifar10
7 | from robustbench.utils import load_model
8 |
9 | import matplotlib.pyplot as plt
10 |
11 | n_examples = 20
12 | data_batch, target_batch = load_cifar10(n_examples=n_examples, data_dir='~/datasets')
13 |
14 | model = load_model("Standard")
15 | criterion = torch.nn.CrossEntropyLoss()
16 |
17 | # Define the constraint set + initial point
18 | alpha = 10.
19 | constraint = copt.constraint.L1Ball(alpha)
20 |
21 | for data, target in zip(data_batch, target_batch):
22 | data, target = data.unsqueeze(0), target.unsqueeze(0)
23 |
24 | # Define the loss function to be minimized, using Pytorch
25 | def loss_fun(delta):
26 | adv_input = data + delta
27 | return -criterion(model(adv_input), target)
28 |
29 | # Change the function to f_grad: returns loss_val, grad in flattened, numpy array
30 | f_grad = make_func_and_grad(loss_fun, data.shape, data.device, dtype=data.dtype)
31 |
32 | img_np = data.cpu().numpy().squeeze().flatten()
33 |
34 | def image_constraint_prox(delta, step_size=None):
35 | """Projects perturbation delta so that x + delta is in the set of images,
36 | i.e. the (0, 1) range."""
37 | adv_img_np = img_np + delta
38 | delta = adv_img_np.clip(0, 1) - img_np
39 | return delta
40 |
41 | callback = copt.utils.Trace(lambda delta: f_grad(delta)[0])
42 |
43 | delta0 = np.zeros(data.shape, dtype=float).flatten()
44 |
45 | sol = copt.minimize_three_split(f_grad, delta0, constraint.prox,
46 | image_constraint_prox, callback=callback,
47 | max_iter=50
48 | )
49 |
50 | fig, ax = plt.subplots()
51 | ax.plot([-loss_val for loss_val in callback.trace_fx], lw=3)
52 | ax.set_yscale("log")
53 | ax.set_xlabel("# Iterations")
54 | ax.set_ylabel("Objective value")
55 | ax.grid()
56 |
57 | plt.show()
58 |
59 | classes = ('plane', 'car', 'bird', 'cat',
60 | 'deer', 'dog', 'frog', 'horse',
61 | 'ship', 'truck')
62 |
63 | img = data.cpu().numpy().squeeze()
64 | perturbation = sol.x.reshape(img.shape)
65 | adv_img = img + perturbation
66 |
67 | img = img.transpose(1, 2, 0)
68 | perturbation = perturbation.transpose(1, 2, 0)
69 | adv_img = adv_img.transpose(1, 2, 0)
70 |
71 | # Project back so that 0 <= x + delta <= 1
72 | adv_img = np.clip(adv_img, 0, 1)
73 | perturbation = adv_img - img
74 |
75 | fig, axes = plt.subplots(ncols=3)
76 | img_ax, pert_ax, adv_img_ax = axes
77 |
78 | output = torch.nn.functional.softmax(model(data), dim=-1)
79 | label = torch.argmax(output)
80 |
81 | pert_tensor = torch.tensor(sol.x, dtype=data.dtype).to(data.device)
82 | pert_tensor = pert_tensor.reshape(data.shape)
83 | adv_output = torch.nn.functional.softmax(model(torch.clamp(data + pert_tensor, 0., 1.)), dim=-1)
84 | adv_label = torch.argmax(adv_output)
85 |
86 | img_ax.set_title(f"Original image: {classes[label]}, p={output[:, label].item():.2f}")
87 | img_ax.imshow(img)
88 |
89 | pert_ax.set_title("Perturbation")
90 | pert_ax.imshow(abs(perturbation))
91 |
92 | adv_img_ax.set_title(f"Perturbed image: {classes[adv_label]}, p={adv_output[:, adv_label].item():.2f}")
93 | adv_img_ax.imshow(adv_img)
94 | plt.tight_layout()
95 |
96 | plt.show()
97 |
--------------------------------------------------------------------------------
/examples/pytorch/adversarial_example_accuracies.py:
--------------------------------------------------------------------------------
1 | from functools import partial
2 | import numpy as np
3 | import torch
4 |
5 | from tqdm import tqdm
6 |
7 | import copt
8 | from copt.utils_pytorch import make_func_and_grad
9 |
10 | from robustbench.data import load_cifar10
11 | from robustbench.utils import load_model
12 |
13 |
14 | n_examples = 10000
15 | data_batch, target_batch = load_cifar10(n_examples=n_examples, data_dir='~/datasets')
16 |
17 | model_name = "Engstrom2019Robustness"
18 | model = load_model(model_name)
19 | criterion = torch.nn.CrossEntropyLoss()
20 |
21 | # Define the constraint set
22 | alpha = 0.5
23 | constraint = copt.constraint.L2Ball(alpha)
24 |
25 | n_correct = 0
26 | n_correct_adv = 0
27 |
28 |
29 | # Define the loss function to be minimized, using Pytorch
30 | def loss_fun(delta, data):
31 | adv_input = data + delta
32 | return -criterion(model(adv_input), target)
33 |
34 |
35 | print(f"Evaluating model {model_name}, on L{constraint.p}Ball({alpha}).")
36 |
37 | for k, (data, target) in tqdm(enumerate(zip(data_batch, target_batch))):
38 | data, target = data.unsqueeze(0), target.unsqueeze(0)
39 |
40 | loss_fun_data = partial(loss_fun, data=data)
41 | # Change the function to f_grad: returns loss_val, grad in flattened, numpy array
42 | f_grad = make_func_and_grad(loss_fun_data, data.shape, data.device, dtype=data.dtype)
43 |
44 | img_np = data.cpu().numpy().squeeze().flatten()
45 |
46 | def image_constraint_prox(delta, step_size=None):
47 | """Projects perturbation delta so that x + delta is in the set of images,
48 | i.e. the (0, 1) range."""
49 | adv_img_np = img_np + delta
50 | delta = adv_img_np.clip(0, 1) - img_np
51 | return delta
52 |
53 | delta0 = np.zeros(data.shape, dtype=float).flatten()
54 |
55 | callback = copt.utils.Trace(lambda delta: f_grad(delta)[0])
56 |
57 | sol = copt.minimize_three_split(f_grad, delta0, constraint.prox,
58 | image_constraint_prox, callback=callback,
59 | max_iter=25
60 | )
61 | label = torch.argmax(model(data), dim=-1)
62 |
63 | pert_tensor = torch.tensor(sol.x, dtype=data.dtype).to(data.device)
64 | pert_tensor = pert_tensor.reshape(data.shape)
65 | adv_label = torch.argmax(model(torch.clamp(data + pert_tensor, 0., 1.)), dim=-1)
66 |
67 | n_correct += (label == target).item()
68 | n_correct_adv += (adv_label == target).item()
69 |
70 | accuracy = n_correct / n_examples
71 | accuracy_adv = n_correct_adv / n_examples
72 |
73 | print(f"Accuracy: {accuracy:.3f}")
74 | print(f"Robust accuracy: {accuracy_adv:.3f}")
75 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools", "wheel", "numpy", "scipy", "tqdm", "scikit-learn", "six"] # PEP 518 specifications.
3 |
--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | filterwarnings =
3 | ignore::PendingDeprecationWarning
4 | ignore::RuntimeWarning
5 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | scipy
3 | pytest >= 3.8.0
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from distutils.core import setup
2 | import io
3 | import setuptools
4 |
5 | CLASSIFIERS = """\
6 | Development Status :: 5 - Production/Stable
7 | Intended Audience :: Science/Research
8 | Intended Audience :: Developers
9 | License :: OSI Approved
10 | Programming Language :: Python
11 | Programming Language :: Python :: 2
12 | Programming Language :: Python :: 3
13 | Topic :: Software Development
14 | Operating System :: POSIX
15 | Operating System :: Unix
16 |
17 | """
18 |
19 | setup(
20 | name="copt",
21 | description="Library for composite optimization in Python",
22 | long_description=io.open("README.rst", encoding="utf-8").read(),
23 | version="0.9.1",
24 | author="Fabian Pedregosa",
25 | author_email="f@bianp.net",
26 | url="http://pypi.python.org/pypi/copt",
27 | packages=["copt"],
28 | install_requires=["numpy", "scipy", "tqdm", "scikit-learn", "six"],
29 | classifiers=[_f for _f in CLASSIFIERS.split("\n") if _f],
30 | package_data={"copt": ["data/img1.csv"]},
31 | license="New BSD License",
32 | )
33 |
--------------------------------------------------------------------------------
/tests/test_frank_wolfe.py:
--------------------------------------------------------------------------------
1 | """Tests for the Frank-Wolfe algorithm."""
2 | import numpy as np
3 | import pytest
4 | from scipy import optimize
5 | import copt as cp
6 |
7 | np.random.seed(0)
8 | n_samples, n_features = 20, 16
9 | A = np.random.randn(n_samples, n_features)
10 | w = np.random.randn(n_features)
11 | b = A.dot(w) + np.random.randn(n_samples)
12 |
13 | # we will use a logistic loss, which can't have values
14 | # greater than 1
15 | b = np.abs(b / np.max(np.abs(b)))
16 |
17 | LOSS_FUNCS = [cp.loss.LogLoss, cp.loss.SquareLoss]
18 |
19 |
20 | def test_fw_api():
21 | """Check that FW takes the right arguments and raises the right exceptions."""
22 |
23 | # test that the algorithm does not fail if x0
24 | # is a tuple
25 | f = cp.loss.LogLoss(A, b, 1.0 / n_samples)
26 | cb = cp.utils.Trace(f)
27 | alpha = 1.0
28 | l1ball = cp.constraint.L1Ball(alpha)
29 | cp.minimize_frank_wolfe(
30 | f.f_grad,
31 | [0] * n_features,
32 | l1ball.lmo,
33 | tol=0,
34 | lipschitz=f.lipschitz,
35 | callback=cb,
36 | )
37 |
38 | # check that we riase an exception when the DR step-size is used
39 | # but no lipschitz constant is given
40 | with pytest.raises(ValueError):
41 | cp.minimize_frank_wolfe(f.f_grad, [0] * n_features, l1ball.lmo, step="DR")
42 |
43 |
44 | @pytest.mark.parametrize("alpha", [0.1, 1.0, 10.0, 100.0])
45 | @pytest.mark.parametrize("loss_grad", LOSS_FUNCS)
46 | def test_fw_l1(loss_grad, alpha):
47 | """Test result of FW algorithm with L1 constraint."""
48 | f = loss_grad(A, b, 1.0 / n_samples)
49 | cb = cp.utils.Trace(f)
50 | l1ball = cp.constraint.L1Ball(alpha)
51 | opt = cp.minimize_frank_wolfe(
52 | f.f_grad,
53 | np.zeros(n_features),
54 | l1ball.lmo,
55 | tol=1e-3,
56 | lipschitz=f.lipschitz,
57 | callback=cb,
58 | )
59 | assert np.isfinite(opt.x).sum() == n_features
60 |
61 | ss = 1 / f.lipschitz
62 | grad = f.f_grad(opt.x)[1]
63 | grad_map = (opt.x - l1ball.prox(opt.x - ss * grad, ss)) / ss
64 | assert np.linalg.norm(grad_map) < 0.3
65 |
66 |
67 | def test_callback():
68 | """Make sure that the algorithm exists when the callback returns False."""
69 |
70 | def cb(_):
71 | return False
72 |
73 | l1ball = cp.constraint.L1Ball(1)
74 | f = cp.loss.SquareLoss(A, b)
75 | opt = cp.minimize_frank_wolfe(f.f_grad, np.zeros(n_features), l1ball.lmo, callback=cb)
76 | assert opt.nit < 2
77 |
78 |
79 | def exact_line_search(kw):
80 |
81 | def f_on_line(gamma):
82 | return kw["func_and_grad"](kw["x"] + gamma * kw["update_direction"])[0]
83 |
84 | line_sol = optimize.minimize_scalar(f_on_line, method='bounded', bounds=[0, kw["max_step_size"]])
85 | return line_sol.x
86 |
87 |
88 | @pytest.mark.parametrize("alpha", [0.1, 1.0, 10.0, 100.0])
89 | @pytest.mark.parametrize("obj", LOSS_FUNCS)
90 | @pytest.mark.parametrize("step", ["DR", "backtracking", "sublinear", exact_line_search])
91 | def test_fw_backtrack(obj, step, alpha):
92 | """Test FW with different options of the line-search strategy."""
93 | f = obj(A, b, 1.0 / n_samples)
94 | traceball = cp.constraint.TraceBall(alpha, (4, 4))
95 | opt = cp.minimize_frank_wolfe(
96 | f.f_grad,
97 | np.zeros(n_features),
98 | traceball.lmo,
99 | tol=0,
100 | lipschitz=f.lipschitz,
101 | step=step,
102 | max_iter=1000,
103 | )
104 | assert np.isfinite(opt.x).sum() == n_features
105 |
106 | ss = 1 / f.lipschitz
107 | grad = f.f_grad(opt.x)[1]
108 | # this is the proximal mapping, zero at optimum
109 | grad_map = (opt.x - traceball.prox(opt.x - ss * grad, ss)) / ss
110 | assert np.linalg.norm(grad_map) < 0.4
111 |
112 |
113 | @pytest.mark.parametrize("alpha", [0.1, 1.0, 10.0, 100.0])
114 | @pytest.mark.parametrize("obj", LOSS_FUNCS)
115 | @pytest.mark.parametrize("step", ["DR", "backtracking", exact_line_search])
116 | def test_pairwise_fw(obj, step, alpha):
117 | """Test the Pairwise FW method."""
118 | f = obj(A, b, 1.0 / n_samples)
119 |
120 | l1ball = cp.constraint.L1Ball(alpha)
121 | x0 = np.zeros(A.shape[1])
122 | x0[0] = alpha
123 | cb = cp.utils.Trace(f)
124 | opt = cp.minimize_frank_wolfe(
125 | f.f_grad, x0, l1ball.lmo_pairwise, x0_rep=(1., 0),
126 | step=step, lipschitz=f.lipschitz, callback=cb,
127 | variant='pairwise'
128 | )
129 | assert np.isfinite(opt.x).sum() == n_features
130 |
131 | ss = 1 / f.lipschitz
132 | grad = f.f_grad(opt.x)[1]
133 | # this is the proximal mapping, zero at optimum
134 | grad_map = (opt.x - l1ball.prox(opt.x - ss * grad, ss)) / ss
135 |
136 | assert np.linalg.norm(grad_map) < 0.2
137 |
--------------------------------------------------------------------------------
/tests/test_loss.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import copt as cp
3 | from scipy import optimize
4 | from scipy import sparse
5 |
6 | import copt.loss
7 |
8 | n_samples, n_features = 100, 10
9 | A_dense = np.random.randn(n_samples, n_features)
10 | b = np.random.uniform(0, 1, size=n_samples)
11 | A_sparse = sparse.rand(n_samples, n_features, density=0.5, format="csr")
12 |
13 |
14 | def test_loss_grad():
15 | for A in (A_dense, A_sparse):
16 | for loss in [copt.loss.LogLoss, copt.loss.SquareLoss, copt.loss.HuberLoss]:
17 | f = loss(A, b)
18 | err = optimize.check_grad(
19 | f, lambda x: f.f_grad(x)[1], np.random.randn(n_features)
20 | )
21 | assert err < 1e-6
22 |
23 |
24 | def test_log_hess():
25 | for A in (A_dense, A_sparse):
26 | f = copt.loss.LogLoss(A, b)
27 | x = np.random.randn(n_features)
28 | Hs = f.hessian_mv(x)
29 |
30 | def obj(x):
31 | return f.f_grad(x)[1][0]
32 |
33 | def grad(x):
34 | return f.hessian_mv(x)(np.eye(x.size)[0])
35 |
36 | err = optimize.check_grad(obj, grad, np.random.randn(n_features))
37 | assert err < 1e-6
38 |
--------------------------------------------------------------------------------
/tests/test_matmul_speedup.py:
--------------------------------------------------------------------------------
1 | import scipy.sparse as sparse
2 | import numpy as np
3 | import copt as cp
4 |
5 | n_samples, n_features = 1000, 100
6 | n_subset = 5
7 | A_sparse = sparse.rand(n_samples, n_features, density=0.5, format="csr")
8 | x = np.random.rand(n_features)
9 | u = np.random.rand(n_subset)
10 | idx = np.random.choice(n_samples, n_subset)
11 |
12 |
13 | def test_fast_csr_vm():
14 | res = cp.utils.fast_csr_vm(u,
15 | A_sparse.data, A_sparse.indptr, A_sparse.indices,
16 | n_features, idx)
17 | assert np.allclose(res, cp.utils.safe_sparse_dot(u, A_sparse[idx]))
18 |
19 |
20 | def test_fast_csr_mv():
21 | res = cp.utils.fast_csr_mv(A_sparse.data, A_sparse.indptr, A_sparse.indices,
22 | x, idx)
23 | assert np.allclose(res, cp.utils.safe_sparse_dot(A_sparse[idx], x))
24 |
--------------------------------------------------------------------------------
/tests/test_penalties.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import copt as cp
3 | import copt.constraint
4 | import copt.penalty
5 | from copt import tv_prox
6 | from numpy import testing
7 | import pytest
8 |
9 | proximal_penalties = [
10 | copt.penalty.L1Norm(1.0),
11 | copt.penalty.GroupL1(1.0, np.array_split(np.arange(16), 5)),
12 | copt.penalty.TraceNorm(1.0, (4, 4)),
13 | copt.constraint.TraceBall(1.0, (4, 4)),
14 | copt.penalty.TotalVariation2D(1.0, (4, 4)),
15 | copt.penalty.FusedLasso(1.0),
16 | ]
17 |
18 |
19 | def test_GroupL1():
20 | groups = [(0, 1), (2, 3)]
21 | g1 = copt.penalty.GroupL1(1.0, groups)
22 | _, B = g1.prox_factory(5)
23 | assert np.all(
24 | B.toarray()
25 | == np.array(
26 | [
27 | [1.0, 1.0, 0.0, 0.0, 0.0],
28 | [0.0, 0.0, 1.0, 1.0, 0.0],
29 | [0.0, 0.0, 0.0, 0.0, -1.0],
30 | ]
31 | )
32 | )
33 |
34 | groups = [(0, 1), (3, 4)]
35 | g2 = copt.penalty.GroupL1(1.0, groups)
36 | _, B = g2.prox_factory(5)
37 | assert np.all(
38 | B.toarray()
39 | == np.array(
40 | [
41 | [1.0, 1.0, 0.0, 0.0, 0.0],
42 | [0.0, 0.0, -1.0, 0.0, 0.0],
43 | [0.0, 0.0, 0.0, 1.0, 1.0],
44 | ]
45 | )
46 | )
47 |
48 |
49 | #
50 | # for blocks in [[(0, 1), (2, 3)], ]:
51 | # pen = cp.utils.GroupL1(1., blocks)
52 | # counter = 0
53 | # for g in pen.groups:
54 | # for j in g:
55 | # counter += 1
56 | # assert counter == blocks.size
57 | # assert pen.groups
58 | # for g in pen.groups:
59 | # assert np.unique(blocks[g]).size == 1
60 |
61 |
62 | def test_tv1_prox():
63 | """
64 | Use the properties of strongly convex functions to test the implementation
65 | of the TV1D proximal operator. In particular, we use the following inequality
66 | applied to the proximal objective function: if f is mu-strongly convex then
67 |
68 | f(x) - f(x^*) >= ||x - x^*||^2 / (2 mu)
69 |
70 | where x^* is the optimum of f.
71 | """
72 | n_features = 10
73 | gamma = np.random.rand()
74 | epsilon = 1e-10 # account for some numerical errors
75 |
76 | tv_norm = lambda x: np.sum(np.abs(np.diff(x)))
77 | for _ in range(1000):
78 | x = np.random.randn(n_features)
79 | x_next = tv_prox.prox_tv1d(x, gamma)
80 | diff_obj = tv_norm(x) - tv_norm(x_next)
81 | testing.assert_array_less(
82 | ((x - x_next) ** 2).sum() / gamma, (1 + epsilon) * diff_obj
83 | )
84 |
85 |
86 | def test_tv2_prox():
87 | """
88 | similar test, but for 2D total variation penalty.
89 | """
90 | np.random.seed(0)
91 | n_rows, n_cols = 6, 8
92 | n_features = n_rows * n_cols
93 | gamma = np.random.rand()
94 | epsilon = 0.1 # account for some numerical errors
95 |
96 | def tv_norm(x, n_rows, n_cols):
97 | X = x.reshape((n_rows, n_cols))
98 | return np.sum(np.abs(np.diff(X, 0))) + np.sum(np.abs(np.diff(X, 1)))
99 |
100 | for nrun in range(20):
101 | x = np.random.randn(n_features)
102 | x_next = tv_prox.prox_tv2d(x, gamma, n_rows, n_cols, tol=1e-10, max_iter=10000)
103 | diff_obj = tv_norm(x, n_rows, n_cols) - tv_norm(x_next, n_rows, n_cols)
104 | testing.assert_array_less(
105 | ((x - x_next) ** 2).sum() / gamma, (1 + epsilon) * diff_obj
106 | )
107 |
108 |
109 | def test_tv2d_linear_operator():
110 | n_rows, n_cols = 20, 10
111 |
112 | def TV(w):
113 | img = w.reshape((n_rows, n_cols))
114 | tmp1 = np.abs(np.diff(img, axis=0))
115 | tmp2 = np.abs(np.diff(img, axis=1))
116 | return tmp1.sum() + tmp2.sum()
117 |
118 | L = tv_prox.tv2d_linear_operator(n_rows, n_cols)
119 | x = np.random.randn(n_rows * n_cols)
120 | testing.assert_almost_equal(np.abs(L.dot(x)).sum(), TV(x))
121 |
122 |
123 | @pytest.mark.parametrize("pen", proximal_penalties)
124 | def test_three_inequality(pen):
125 | """Test the L1 prox using the three point inequality
126 |
127 | The three-point inequality is described e.g., in Lemma 1.4
128 | in "Gradient-Based Algorithms with Applications to Signal
129 | Recovery Problems", Amir Beck and Marc Teboulle
130 | """
131 | n_features = 16
132 |
133 | for _ in range(10):
134 | z = np.random.randn(n_features)
135 | u = np.random.randn(n_features)
136 | xi = pen.prox(z, 1.0)
137 |
138 | lhs = 2 * (pen(xi) - pen(u))
139 | rhs = (
140 | np.linalg.norm(u - z) ** 2
141 | - np.linalg.norm(u - xi) ** 2
142 | - np.linalg.norm(xi - z) ** 2
143 | )
144 | assert lhs <= rhs, pen
145 |
--------------------------------------------------------------------------------
/tests/test_proximal_gradient.py:
--------------------------------------------------------------------------------
1 | """Tests for gradient-based methods."""
2 | import copt as cp
3 | import numpy as np
4 | import pytest
5 | from scipy import optimize
6 |
7 | import copt.loss
8 | import copt.penalty
9 |
10 | np.random.seed(0)
11 | n_samples, n_features = 20, 10
12 | A = np.random.randn(n_samples, n_features)
13 | w = np.random.randn(n_features)
14 | b = A.dot(w) + np.random.randn(n_samples)
15 |
16 | # we will use a logistic loss, which can't have values
17 | # greater than 1
18 | b = np.abs(b / np.max(np.abs(b)))
19 |
20 |
21 | # the accelerated variant, to pass it as a method parameter
22 | def minimize_accelerated(*args, **kw):
23 | kw["accelerated"] = True
24 | return cp.minimize_proximal_gradient(*args, **kw)
25 |
26 |
27 | loss_funcs = [copt.loss.LogLoss, copt.loss.SquareLoss, copt.loss.HuberLoss]
28 | penalty_funcs = [None, copt.penalty.L1Norm]
29 |
30 |
31 | def test_gradient():
32 | for _ in range(20):
33 | A = np.random.randn(10, 5)
34 | b = np.random.rand(10)
35 | for loss in loss_funcs:
36 | f_grad = loss(A, b).f_grad
37 | f = lambda x: f_grad(x)[0]
38 | grad = lambda x: f_grad(x)[1]
39 | eps = optimize.check_grad(f, grad, np.random.randn(5))
40 | assert eps < 0.001
41 |
42 |
43 | def certificate(x, grad_x, prox):
44 | if prox is None:
45 |
46 | def prox_(x, _):
47 | return x
48 |
49 | else:
50 | prox_ = prox
51 |
52 | return np.linalg.norm(x - prox_(x - grad_x, 1))
53 |
54 |
55 | @pytest.mark.parametrize("accelerated", [True, False])
56 | @pytest.mark.parametrize("loss", loss_funcs)
57 | @pytest.mark.parametrize("penalty", penalty_funcs)
58 | def test_optimize(accelerated, loss, penalty):
59 | """Test a method on both the line_search and fixed step size strategy."""
60 | max_iter = 200
61 | for alpha in np.logspace(-1, 3, 3):
62 | obj = loss(A, b, alpha)
63 | if penalty is not None:
64 | prox = penalty(1e-3).prox
65 | else:
66 | prox = None
67 | opt = cp.minimize_proximal_gradient(
68 | obj.f_grad,
69 | np.zeros(n_features),
70 | prox=prox,
71 | jac=True,
72 | step="backtracking",
73 | max_iter=max_iter,
74 | accelerated=accelerated,
75 | )
76 | grad_x = obj.f_grad(opt.x)[1]
77 | assert certificate(opt.x, grad_x, prox) < 1e-5
78 |
79 | opt_2 = cp.minimize_proximal_gradient(
80 | obj.f_grad,
81 | np.zeros(n_features),
82 | prox=prox,
83 | jac=True,
84 | max_iter=max_iter,
85 | step=lambda x: 1 / obj.lipschitz,
86 | accelerated=accelerated,
87 | )
88 | grad_2x = obj.f_grad(opt_2.x)[1]
89 | assert certificate(opt_2.x, grad_2x, prox) < 1e-5
90 |
91 |
92 | @pytest.mark.parametrize(
93 | "solver",
94 | [
95 | cp.minimize_proximal_gradient,
96 | cp.minimize_three_split,
97 | cp.minimize_primal_dual,
98 | minimize_accelerated,
99 | ],
100 | )
101 | def test_callback(solver):
102 | """Make sure that the algorithm exists when the callback returns False."""
103 |
104 | def cb(_):
105 | return False
106 |
107 | f = copt.loss.SquareLoss(A, b)
108 | opt = solver(f.f_grad, np.zeros(n_features), callback=cb)
109 | assert opt.nit < 2
110 |
111 |
112 | @pytest.mark.parametrize(
113 | "solver", [cp.minimize_proximal_gradient, minimize_accelerated]
114 | )
115 | def test_line_search(solver):
116 | """Test the custom line search option."""
117 |
118 | def ls_wrong(_):
119 | return -10
120 |
121 | ls_loss = copt.loss.SquareLoss(A, b)
122 |
123 | # define a function with unused arguments for the API
124 | def f_grad(x, r1, r2):
125 | return ls_loss.f_grad(x)
126 |
127 | opt = solver(
128 | f_grad, np.zeros(n_features), step=ls_wrong, args=(None, None), jac=True
129 | )
130 | assert not opt.success
131 |
132 | # Define an exact line search strategy
133 | def exact_ls(kw):
134 | def f_ls(gamma):
135 | x_next = kw["prox"](kw["x"] - gamma * kw["grad_fk"], gamma)
136 | return kw["func_and_grad"](x_next)[0]
137 |
138 | ls_sol = optimize.minimize_scalar(f_ls, bounds=[0, 1], method="bounded")
139 | return ls_sol.x
140 |
141 | opt = solver(
142 | f_grad, np.zeros(n_features), step=exact_ls, args=(None, None), jac=True
143 | )
144 | assert opt.success
145 |
--------------------------------------------------------------------------------
/tests/test_randomized.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from scipy import sparse
3 | import copt as cp
4 | import copt.loss
5 | import copt.penalty
6 | from copt import randomized
7 | import pytest
8 |
9 | np.random.seed(0)
10 | n_samples, n_features = 20, 10
11 | density = 0.5
12 | A = sparse.random(n_samples, n_features, density=density)
13 | A2 = sparse.random(n_samples, n_features + 1, density=density)
14 | w = np.random.randn(n_features)
15 | b = A.dot(w) + np.random.randn(n_samples)
16 |
17 | # we will use a logistic loss, which can't have values
18 | # greater than 1
19 | b = np.abs(b / np.max(np.abs(b)))
20 |
21 | all_solvers_unconstrained = (
22 | ["SAGA", cp.minimize_saga, 1e-3],
23 | ["SVRG", cp.minimize_svrg, 1e-3],
24 | ["VRTOS", cp.minimize_vrtos, 1e-3],
25 | )
26 |
27 |
28 | @pytest.mark.parametrize("name_solver, solver, tol", all_solvers_unconstrained)
29 | def test_optimize(name_solver, solver, tol):
30 | f = copt.loss.LogLoss(A, b)
31 | for alpha in np.logspace(-3, 3, 3):
32 | L = cp.utils.get_max_lipschitz(A, "logloss") + alpha / density
33 | opt = solver(
34 | f.partial_deriv,
35 | A,
36 | b,
37 | np.zeros(n_features),
38 | 1 / (3 * L),
39 | alpha=alpha,
40 | max_iter=200,
41 | tol=1e-10,
42 | )
43 | grad = copt.loss.LogLoss(A, b, alpha).f_grad(opt.x)[1]
44 | assert np.linalg.norm(grad) < tol, name_solver
45 |
46 |
47 | def test_saga_l1():
48 | alpha = 1.0 / n_samples
49 | f = copt.loss.LogLoss(A, b, alpha)
50 | for beta in np.logspace(-3, 3, 3):
51 | pen = copt.penalty.L1Norm(beta)
52 | L = cp.utils.get_max_lipschitz(A, "logloss") + alpha / density
53 |
54 | for solver in [cp.minimize_saga, cp.minimize_svrg]:
55 | opt = solver(
56 | f.partial_deriv,
57 | A,
58 | b,
59 | np.zeros(n_features),
60 | 1 / (3 * L),
61 | alpha=alpha,
62 | max_iter=500,
63 | tol=1e-8,
64 | prox=pen.prox_factory(n_features),
65 | )
66 | grad = copt.loss.LogLoss(A, b, alpha).f_grad(opt.x)[1]
67 | x = opt.x
68 | ss = 1.0 / L
69 | # check that the gradient mapping vanishes
70 | grad_map = (x - pen.prox(x - ss * grad, ss)) / ss
71 | assert np.linalg.norm(grad_map) < 1e-6
72 |
73 |
74 | def test_vrtos():
75 | """Test VRTOS with no penalty."""
76 | alpha = 1.0 / n_samples
77 | f = copt.loss.LogLoss(A, b)
78 | for beta in np.logspace(-3, 3, 3):
79 | L = cp.utils.get_max_lipschitz(A, "logloss") + alpha / density
80 |
81 | opt = cp.minimize_vrtos(
82 | f.partial_deriv,
83 | A,
84 | b,
85 | np.zeros(n_features),
86 | 1 / (3 * L),
87 | alpha=alpha,
88 | max_iter=200,
89 | )
90 |
91 | grad = copt.loss.LogLoss(A, b, alpha).f_grad(opt.x)[1]
92 | assert np.linalg.norm(grad) < 1e-6
93 |
94 |
95 | def test_vrtos_l1():
96 | alpha = 1.0 / n_samples
97 | f = copt.loss.LogLoss(A, b, alpha)
98 | for beta in np.logspace(-3, 3, 3):
99 | p_1 = copt.penalty.L1Norm(beta)
100 | L = cp.utils.get_max_lipschitz(A, "logloss") + alpha / density
101 |
102 | # blocks = np.arange(n_features)
103 | opt_1 = cp.minimize_vrtos(
104 | f.partial_deriv,
105 | A,
106 | b,
107 | np.zeros(n_features),
108 | 1 / (3 * L),
109 | alpha=alpha,
110 | max_iter=200,
111 | prox_1=p_1.prox_factory(n_features),
112 | )
113 |
114 | opt_2 = cp.minimize_vrtos(
115 | f.partial_deriv,
116 | A,
117 | b,
118 | np.zeros(n_features),
119 | 1 / (3 * L),
120 | alpha=alpha,
121 | max_iter=200,
122 | prox_2=p_1.prox_factory(n_features),
123 | )
124 |
125 | for x in [opt_1.x, opt_2.x]:
126 | full_prox = copt.penalty.L1Norm(beta)
127 | grad = f.f_grad(x)[1]
128 | ss = 1.0 / L
129 | # check that the gradient mapping vanishes
130 | grad_map = (x - full_prox.prox(x - ss * grad, ss)) / ss
131 | assert np.linalg.norm(grad_map) < 1e-6
132 |
133 |
134 | all_groups = [
135 | [np.arange(5)],
136 | np.arange(5).reshape((-1, 1)),
137 | [np.arange(5), [5], [6], [7], [8], [9]],
138 | [np.arange(5), np.arange(5, 10)],
139 | ]
140 |
141 |
142 | @pytest.mark.parametrize("groups", all_groups)
143 | def test_gl(groups):
144 | alpha = 1.0 / n_samples
145 | f = copt.loss.LogLoss(A, b, alpha)
146 | for beta in np.logspace(-3, 3, 3):
147 | p_1 = copt.penalty.GroupL1(beta, groups)
148 | L = cp.utils.get_max_lipschitz(A, "logloss") + alpha / density
149 |
150 | opt_1 = cp.minimize_vrtos(
151 | f.partial_deriv,
152 | A,
153 | b,
154 | np.zeros(n_features),
155 | 1 / (3 * L),
156 | alpha=alpha,
157 | max_iter=200,
158 | prox_1=p_1.prox_factory(n_features),
159 | )
160 |
161 | opt_2 = cp.minimize_vrtos(
162 | f.partial_deriv,
163 | A,
164 | b,
165 | np.zeros(n_features),
166 | 1 / (3 * L),
167 | alpha=alpha,
168 | max_iter=200,
169 | prox_2=p_1.prox_factory(n_features),
170 | )
171 |
172 | opt_3 = cp.minimize_saga(
173 | f.partial_deriv,
174 | A,
175 | b,
176 | np.zeros(n_features),
177 | 1 / (3 * L),
178 | alpha=alpha,
179 | max_iter=200,
180 | prox=p_1.prox_factory(n_features),
181 | )
182 |
183 | for x in [opt_1.x, opt_2.x, opt_3.x]:
184 | grad = copt.loss.LogLoss(A, b, alpha).f_grad(x)[1]
185 | ss = 1.0 / L
186 | # check that the gradient mapping vanishes
187 | grad_map = (x - p_1.prox(x - ss * grad, ss)) / ss
188 | assert np.linalg.norm(grad_map) < 1e-6
189 |
190 |
191 | def test_vrtos_ogl():
192 | """Test on overlapping group lasso"""
193 | alpha = 1.0 / n_samples
194 | groups_1 = [np.arange(8)]
195 | groups_2 = [np.arange(5, 10)]
196 | f = copt.loss.LogLoss(A, b, alpha)
197 | for beta in np.logspace(-3, 3, 3):
198 | p_1 = copt.penalty.GroupL1(beta, groups_1)
199 | p_2 = copt.penalty.GroupL1(beta, groups_2)
200 | L = cp.utils.get_max_lipschitz(A, "logloss") + alpha / density
201 |
202 | opt_vrtos = cp.minimize_vrtos(
203 | f.partial_deriv,
204 | A,
205 | b,
206 | np.zeros(n_features),
207 | 1 / (3 * L),
208 | alpha=alpha,
209 | max_iter=200,
210 | prox_1=p_1.prox_factory(n_features),
211 | prox_2=p_2.prox_factory(n_features),
212 | )
213 |
214 | opt_tos = cp.minimize_three_split(
215 | f.f_grad, np.zeros(n_features), prox_1=p_1.prox, prox_2=p_2.prox
216 | )
217 |
218 | norm = np.linalg.norm(opt_tos.x)
219 | if norm < 1e-10:
220 | norm = 1
221 | assert np.linalg.norm(opt_vrtos.x - opt_tos.x) / norm < 1e-4
222 |
223 |
224 | @pytest.mark.parametrize("A_data", [A, A2])
225 | def test_vrtos_fl(A_data):
226 | """Test on overlapping group lasso"""
227 | n_samples, n_features = A_data.shape
228 | alpha = 1.0 / n_samples
229 | f = copt.loss.LogLoss(A_data, b, alpha)
230 | for beta in np.logspace(-3, 3, 3):
231 | pen = copt.penalty.FusedLasso(beta)
232 | L = cp.utils.get_max_lipschitz(A_data, "logloss") + alpha / density
233 |
234 | opt_vrtos = cp.minimize_vrtos(
235 | f.partial_deriv,
236 | A_data,
237 | b,
238 | np.zeros(n_features),
239 | 1 / (3 * L),
240 | alpha=alpha,
241 | max_iter=2000,
242 | prox_1=pen.prox_1_factory(n_features),
243 | prox_2=pen.prox_2_factory(n_features),
244 | tol=0,
245 | )
246 |
247 | opt_pgd = cp.minimize_proximal_gradient(
248 | f.f_grad, np.zeros(n_features), prox=pen.prox, max_iter=2000, tol=0
249 | )
250 |
251 | norm = np.linalg.norm(opt_pgd.x)
252 | if norm < 1e-10:
253 | norm = 1
254 | assert np.linalg.norm(opt_vrtos.x - opt_pgd.x) / norm < 1e-4
255 |
256 | # check also the gradient mapping
257 | ss = 1.0 / L
258 | grad = f.f_grad(opt_vrtos.x)[1]
259 | grad_map = (opt_vrtos.x - pen.prox(opt_vrtos.x - ss * grad, ss)) / ss
260 | assert np.linalg.norm(grad_map) < 1e-6
261 |
--------------------------------------------------------------------------------
/tests/test_splitting.py:
--------------------------------------------------------------------------------
1 | """Tests for gradient-based methods."""
2 | import copt as cp
3 | import numpy as np
4 | import pytest
5 |
6 | import copt.loss
7 | import copt.penalty
8 |
9 | np.random.seed(0)
10 | n_samples, n_features = 20, 10
11 | A = np.random.randn(n_samples, n_features)
12 | w = np.random.randn(n_features)
13 | b = A.dot(w) + np.random.randn(n_samples)
14 |
15 | # we will use a logistic loss, which can't have values
16 | # greater than 1
17 | b = np.abs(b / np.max(np.abs(b)))
18 |
19 | all_solvers = (
20 | ["TOS", cp.minimize_three_split, 1e-12],
21 | ["PDHG", cp.minimize_primal_dual, 1e-5],
22 | )
23 |
24 | loss_funcs = [copt.loss.LogLoss, copt.loss.SquareLoss, copt.loss.HuberLoss]
25 | penalty_funcs = [(None, None), (copt.penalty.L1Norm, None), (None, copt.penalty.L1Norm)]
26 |
27 |
28 | def _get_prox(penalty):
29 | if penalty is not None:
30 | prox = penalty(1e-3).prox
31 | else:
32 | prox = None
33 | return prox
34 |
35 |
36 | @pytest.mark.parametrize("name_solver, solver, tol", all_solvers)
37 | @pytest.mark.parametrize("loss", loss_funcs)
38 | @pytest.mark.parametrize("penalty", penalty_funcs)
39 | def test_primal_dual_certificate(name_solver, solver, tol, loss, penalty):
40 | """Test a method on both the backtracking and fixed step size strategy."""
41 | max_iter = 1000
42 | for alpha in np.logspace(-1, 3, 3):
43 | obj = loss(A, b, alpha)
44 | prox_1 = _get_prox(penalty[0])
45 | prox_2 = _get_prox(penalty[1])
46 | trace = cp.utils.Trace(obj)
47 | opt = solver(
48 | obj.f_grad,
49 | np.zeros(n_features),
50 | prox_1=prox_1,
51 | prox_2=prox_2,
52 | tol=1e-12,
53 | max_iter=max_iter,
54 | callback=trace,
55 | )
56 | assert opt.certificate < tol, name_solver
57 |
58 | opt_2 = solver(
59 | obj.f_grad,
60 | np.zeros(n_features),
61 | prox_1=prox_1,
62 | prox_2=prox_2,
63 | max_iter=max_iter,
64 | tol=1e-12,
65 | line_search=False,
66 | step_size=1.0 / obj.lipschitz,
67 | )
68 | assert opt.certificate < tol, name_solver
69 | assert opt_2.certificate < tol, name_solver
70 |
71 |
72 | @pytest.mark.parametrize("line_search", [False, True])
73 | def test_PDHG_Lasso(line_search):
74 | # test the PDHG on a 1d-TV problem where we also
75 | loss = copt.loss.SquareLoss(A, b)
76 | alpha = 0.1
77 | L = np.eye(A.shape[1]) # (np.diag(np.ones(A.shape[1]), k=0))[:-1]
78 | opt1 = copt.minimize_primal_dual(
79 | loss.f_grad,
80 | np.zeros(n_features),
81 | prox_1=None,
82 | prox_2=copt.penalty.L1Norm(alpha).prox,
83 | L=L,
84 | tol=1e-14,
85 | line_search=line_search,
86 | step_size=0.4,
87 | )
88 |
89 | opt2 = copt.minimize_proximal_gradient(
90 | loss.f_grad,
91 | np.zeros(n_features),
92 | prox=copt.penalty.L1Norm(alpha).prox,
93 | tol=1e-12,
94 | )
95 |
96 | assert np.linalg.norm(opt1.x - opt2.x) / np.linalg.norm(opt1.x) < 1e-3
97 |
98 |
99 | @pytest.mark.parametrize("line_search", [False, True])
100 | def test_PDHG_FusedLasso(line_search):
101 | """PDHG on a 1d-TV problem (aka FusedLasso)."""
102 | loss = copt.loss.SquareLoss(A, b)
103 | alpha = 0.1
104 | L = (np.diag(np.ones(A.shape[1]), k=0) - np.diag(np.ones(A.shape[1] - 1), k=1))[:-1]
105 | opt1 = copt.minimize_primal_dual(
106 | loss.f_grad,
107 | np.zeros(n_features),
108 | prox_1=None,
109 | prox_2=copt.penalty.L1Norm(alpha).prox,
110 | L=L,
111 | tol=1e-14,
112 | line_search=line_search,
113 | step_size=0.4,
114 | )
115 |
116 | opt2 = copt.minimize_proximal_gradient(
117 | loss.f_grad,
118 | np.zeros(n_features),
119 | prox=copt.penalty.FusedLasso(alpha).prox,
120 | tol=1e-12,
121 | )
122 |
123 | assert np.linalg.norm(opt1.x - opt2.x) / np.linalg.norm(opt1.x) < 1e-3
124 |
125 |
126 | @pytest.mark.parametrize("regularization", np.logspace(-5, 1, 4))
127 | @pytest.mark.parametrize("line_search", [False, True])
128 | def test_PDHG_TV2D(regularization, line_search):
129 | """PDHG on a 2d-TV problem."""
130 |
131 | img = np.random.randn(10, 10)
132 | n_rows, n_cols = img.shape
133 | n_feat = n_rows * n_cols
134 | loss = copt.loss.SquareLoss(np.eye(n_feat), img.ravel())
135 |
136 | def g_prox(x, gamma, pen=regularization):
137 | return cp.tv_prox.prox_tv1d_cols(gamma * pen, x, n_rows, n_cols)
138 |
139 | def h_prox(x, gamma, pen=regularization):
140 | return cp.tv_prox.prox_tv1d_rows(gamma * pen, x, n_rows, n_cols)
141 |
142 | opt1 = copt.minimize_primal_dual(
143 | loss.f_grad,
144 | np.zeros(n_feat),
145 | prox_1=g_prox,
146 | prox_2=h_prox,
147 | tol=1e-14,
148 | line_search=line_search,
149 | #step_size=0.4,
150 | )
151 |
152 | opt2 = copt.minimize_three_split(
153 | loss.f_grad,
154 | np.zeros(n_feat),
155 | prox_1=g_prox,
156 | prox_2=h_prox,
157 | tol=1e-12,
158 | )
159 |
160 | assert np.linalg.norm(opt1.x - opt2.x) / np.linalg.norm(opt1.x) < 1e-2
161 |
--------------------------------------------------------------------------------
/tests/test_stochastic_fw.py:
--------------------------------------------------------------------------------
1 | """Tests for the Stochastic Frank-Wolfe algorithms."""
2 | import numpy as np
3 | import pytest
4 | from scipy import optimize, sparse
5 | import copt as cp
6 | import copt.constraint
7 | import copt.loss
8 |
9 | np.random.seed(0)
10 | n_samples, n_features = 20, 16
11 | A = np.random.randn(n_samples, n_features)
12 | w = np.random.randn(n_features)
13 | b = A.dot(w) + np.random.randn(n_samples)
14 |
15 | # we will use a logistic loss, which can't have values
16 | # greater than 1
17 | b = np.abs(b / np.max(np.abs(b)))
18 |
19 | LOSS_FUNCS = [copt.loss.LogLoss]
20 | VARIANTS = ['SAGA', 'SAG', 'MHK', 'LF']
21 | BATCH_SIZES = [1, 10, n_samples]
22 |
23 |
24 | @pytest.mark.parametrize("variant", VARIANTS)
25 | @pytest.mark.parametrize("batch_size", BATCH_SIZES)
26 | def test_fw_api(variant, batch_size):
27 | """Check that SFW algorithms take the right arguments and raises the right exceptions."""
28 |
29 | # test that the algorithm does not fail if x0
30 | # is a tuple
31 | f = copt.loss.LogLoss(A, b, 1.0 / n_samples)
32 | cb = cp.utils.Trace(f)
33 | alpha = 1.0
34 | l1ball = copt.constraint.L1Ball(alpha)
35 | cp.randomized.minimize_sfw(
36 | f.partial_deriv,
37 | A,
38 | b,
39 | [0] * n_features,
40 | l1ball.lmo,
41 | batch_size=batch_size,
42 | tol=0,
43 | callback=cb,
44 | variant=variant
45 | )
46 |
47 |
48 | @pytest.mark.parametrize("variant", VARIANTS)
49 | @pytest.mark.parametrize("alpha", [0.1, 1.0, 10.0, 100.0])
50 | @pytest.mark.parametrize("loss_grad", LOSS_FUNCS)
51 | def test_sfw_l1(variant, loss_grad, alpha):
52 | """Test SFW algorithms with L1 constraint."""
53 | f = loss_grad(A, b, 1.0 / n_samples)
54 | cb = cp.utils.Trace(f)
55 | l1ball = copt.constraint.L1Ball(alpha)
56 | opt = cp.randomized.minimize_sfw(
57 | f.partial_deriv,
58 | A,
59 | b,
60 | np.zeros(n_features),
61 | l1ball.lmo,
62 | tol=1e-3,
63 | callback=cb,
64 | variant=variant
65 | )
66 |
67 |
68 | @pytest.mark.parametrize("variant", VARIANTS)
69 | @pytest.mark.parametrize("alpha", [0.1, 1.0, 10.0, 100.0])
70 | @pytest.mark.parametrize("loss_grad", LOSS_FUNCS)
71 | def test_sfw_gap_traceback(variant, loss_grad, alpha):
72 | """Test outputting the FW gap for SFW algorithms."""
73 | f = loss_grad(A, b, 1.0 / n_samples)
74 | l1ball = copt.constraint.L1Ball(alpha)
75 |
76 | def fw_gap(x):
77 | _, grad = f.f_grad(x)
78 | return l1ball.lmo(-grad, x)[0].dot(-grad)
79 |
80 | class TraceGaps(cp.utils.Trace):
81 | def __init__(self, f=None, freq=1):
82 | super(TraceGaps, self).__init__(f, freq)
83 | self.trace_gaps = []
84 |
85 | def __call__(self, dl):
86 | self.trace_gaps.append(fw_gap(dl['x']))
87 | super(TraceGaps, self).__call__(dl)
88 |
89 | cb = TraceGaps(f)
90 |
91 | opt = cp.randomized.minimize_sfw(
92 | f.partial_deriv,
93 | A,
94 | b,
95 | np.zeros(n_features),
96 | l1ball.lmo,
97 | tol=1e-3,
98 | callback=cb,
99 | variant=variant
100 | )
101 |
102 |
103 | @pytest.mark.parametrize("variant", VARIANTS)
104 | @pytest.mark.parametrize("A", [sparse.random(n_samples, n_features, 0.1,
105 | fmt)
106 | for fmt in ['coo', 'csr', 'csc', 'lil']])
107 | def test_sfw_sparse(variant, A):
108 | """Check that SFW algorithms run on sparse data matrices."""
109 |
110 | f = copt.loss.LogLoss(A, b, 1.0 / n_samples)
111 | cb = cp.utils.Trace(f)
112 | alpha = 1.0
113 | l1ball = copt.constraint.L1Ball(alpha)
114 | cp.randomized.minimize_sfw(
115 | f.partial_deriv,
116 | A,
117 | b,
118 | np.zeros(n_features),
119 | l1ball.lmo,
120 | tol=0,
121 | callback=cb,
122 | variant=variant
123 | )
124 |
125 |
--------------------------------------------------------------------------------