├── .gitignore ├── LICENSE ├── README.md ├── examples ├── demo_simple_set.ipynb └── demo_unconstr_solvers.ipynb ├── liboptpy ├── __init__.py ├── base_optimizer.py ├── constr_solvers │ ├── __init__.py │ ├── _frank_wolfe.py │ └── _proj_gd.py ├── restarts.py ├── step_size.py └── unconstr_solvers │ ├── __init__.py │ ├── fo │ ├── __init__.py │ ├── _acc_gd.py │ ├── _cg.py │ ├── _dual_average.py │ ├── _gd.py │ ├── _quasi_newton.py │ └── _subgrad.py │ └── so │ ├── __init__.py │ ├── _inexact_newton.py │ └── _newton.py └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | /doc 100 | 101 | # mypy 102 | .mypy_cache/ 103 | 104 | .DS_Store 105 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Alexandr Katrutsa 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # liboptpy 2 | 3 | Library with implementations of optimization methods in Python 3 4 | 5 | ## Installing from source 6 | 7 | - ```git clone https://github.com/amkatrutsa/liboptpy.git``` 8 | - ```cd liboptpy``` 9 | - ```python setup.py install``` 10 | 11 | or 12 | 13 | ```pip install git+https://github.com/amkatrutsa/liboptpy``` 14 | 15 | ## Examples 16 | 17 | 1. [Unconstrained smooth and non-smooth optimization](./examples/demo_unconstr_solvers.ipynb) 18 | 2. [Comparison of projected gradient descent and Frank-Wolfe method](./examples/demo_simple_set.ipynb) 19 | 20 | ## Available optimization methods 21 | 22 | ### Unconstrained optimization problem 23 | 24 | #### Smooth objective functon 25 | 1. Gradient descent 26 | 2. Nesterov accelerated gradient descent 27 | 3. Newton method and inexact (truncated) Newton method with CG as linear solver 28 | 4. Conjugate gradient method 29 | - for convex quadratic function 30 | - for non-quadratic function (Fletcher-Reeves method) 31 | 5. Barzilai-Borwein method 32 | 33 | #### Non-smooth objective function 34 | 35 | 1. Subgradient method 36 | 2. Dual averaging method 37 | 38 | ### Constrained optimization problem 39 | 40 | 1. Projected gradient method 41 | 2. Frank-Wolfe method 42 | 3. Primal barrier method 43 | 44 | ### Available step size 45 | 46 | 1. Constant 47 | 2. Inverse number on iteration and scaled by gradient norm version 48 | 3. Inverse square root of number of iterationas and scaled by gradient norm version 49 | 4. Backtracking 50 | - Armijo rule 51 | - Wolfe rule 52 | - Strong Wolfe rule 53 | - Goldstein rule 54 | 5. Exact line search for quadratic function 55 | 56 | ## Contributing 57 | 58 | If you find any bugs, please fix them and send pull-request. 59 | If you want add some enhancement or something new, please open an issue for discussion. 60 | 61 | To send pull-request, you should make the following steps 62 | 63 | 1. Fork this repository 64 | 2. Clone the forked repository 65 | 3. Add original repositore as remote one 66 | 4. Create a branch in your local repository with specific name for your changes, e.g. ```bugfix``` 67 | 5. Switch to this branch 68 | 6. Change something that you assume make this repository better 69 | 7. Commit your changes in the branch ```bugfix``` with a meaningful comment, e.g. ```Fix typo``` 70 | 8. Switch to the branch ```master``` 71 | 9. Pull new commits to the branch ```master``` from this repository, not forked one 72 | 10. Switch to branch ```bugfix``` 73 | 11. Make ```git rebase master``` to take all new commits from original repository to branch ```bugfix``` 74 | 12. Make push to your forked repository in new remote branch ```bugfix``` 75 | 13. Send pull-request from your remote branch ```bugfix``` to ```master``` branch of the original repository 76 | -------------------------------------------------------------------------------- /examples/demo_simple_set.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import liboptpy.base_optimizer as base\n", 11 | "import liboptpy.constr_solvers as cs\n", 12 | "import liboptpy.step_size as ss\n", 13 | "import matplotlib.pyplot as plt\n", 14 | "%matplotlib inline\n", 15 | "plt.rc(\"text\", usetex=True)\n", 16 | "fontsize = 24\n", 17 | "figsize = (8, 6)\n", 18 | "import seaborn as sns\n", 19 | "sns.set_context(\"talk\")\n", 20 | "from tqdm import tqdm" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "## Box constrained least-squares problem\n", 28 | "\n", 29 | "\\begin{align*}\n", 30 | "& \\min \\frac{1}{2} \\|Ax - b\\|^2_2\\\\\n", 31 | "\\text{s.t. } & 0 \\leq x_i \\leq 1\n", 32 | "\\end{align*}" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 2, 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [ 41 | "def func(x, A, b):\n", 42 | " return 0.5 * np.linalg.norm(A.dot(x) - b)**2\n", 43 | "\n", 44 | "f = lambda x: func(x, A, b)\n", 45 | "\n", 46 | "def grad_f(x, A, b):\n", 47 | " grad = -A.T.dot(b)\n", 48 | " grad = grad + A.T.dot(A).dot(x)\n", 49 | " return grad\n", 50 | "\n", 51 | "grad = lambda x: grad_f(x, A, b)" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": 3, 57 | "metadata": {}, 58 | "outputs": [], 59 | "source": [ 60 | "def linsolver(gradient):\n", 61 | " x = np.zeros(gradient.shape[0])\n", 62 | " pos_grad = gradient > 0\n", 63 | " neg_grad = gradient < 0\n", 64 | " x[pos_grad] = np.zeros(np.sum(pos_grad == True))\n", 65 | " x[neg_grad] = np.ones(np.sum(neg_grad == True))\n", 66 | " return x" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 4, 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": [ 75 | "def projection(y):\n", 76 | " return np.clip(y, 0, 1)" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": 10, 82 | "metadata": {}, 83 | "outputs": [], 84 | "source": [ 85 | "m = 50\n", 86 | "n = 100\n", 87 | "A = np.random.randn(m, n)\n", 88 | "x_true = np.random.rand(n)\n", 89 | "b = A.dot(x_true) + 0.01 * np.random.randn(m)" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": 11, 95 | "metadata": {}, 96 | "outputs": [], 97 | "source": [ 98 | "methods = {\"FW\": cs.FrankWolfe(f, grad, linsolver, ss.Backtracking(rule_type=\"Armijo\", rho=0.5, beta=0.1, init_alpha=1.)),\n", 99 | " \"PGD\": cs.ProjectedGD(f, grad, projection, ss.Backtracking(rule_type=\"Armijo\", rho=0.5, beta=0.1, init_alpha=1.))\n", 100 | " }" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": 12, 106 | "metadata": {}, 107 | "outputs": [], 108 | "source": [ 109 | "x0 = np.random.randn(n)\n", 110 | "max_iter = 300\n", 111 | "tol = 1e-5" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": 13, 117 | "metadata": {}, 118 | "outputs": [ 119 | { 120 | "name": "stdout", 121 | "output_type": "stream", 122 | "text": [ 123 | "\t FW\n", 124 | "Convergence in 75 iterations\n", 125 | "Function value = 5.503265417124177e-05\n", 126 | "Difference in function values = 7.738903387263182e-06\n", 127 | "Difference in argument = 0.0005312599507423358\n", 128 | "\t PGD\n", 129 | "Convergence in 32 iterations\n", 130 | "Function value = 4.9117890657412715e-05\n", 131 | "Difference in function values = 8.574133898022826e-06\n", 132 | "Difference in argument = 0.00043779727486897987\n" 133 | ] 134 | } 135 | ], 136 | "source": [ 137 | "for m_name in methods:\n", 138 | " print(\"\\t\", m_name)\n", 139 | " x = methods[m_name].solve(x0=x0, max_iter=max_iter, tol=tol, disp=1)" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": 14, 145 | "metadata": {}, 146 | "outputs": [ 147 | { 148 | "data": { 149 | "image/png": "\n", 150 | "text/plain": [ 151 | "" 152 | ] 153 | }, 154 | "metadata": {}, 155 | "output_type": "display_data" 156 | } 157 | ], 158 | "source": [ 159 | "plt.figure(figsize=figsize)\n", 160 | "for m_name in methods:\n", 161 | " plt.semilogy([f(x) for x in methods[m_name].get_convergence()], label=m_name)\n", 162 | "plt.legend(fontsize=fontsize)\n", 163 | "plt.xlabel(\"Number of iteration, $k$\", fontsize=fontsize)\n", 164 | "plt.ylabel(r\"$f(x_k)$\", fontsize=fontsize)\n", 165 | "plt.xticks(fontsize=fontsize)\n", 166 | "_ = plt.yticks(fontsize=fontsize)" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": 10, 172 | "metadata": {}, 173 | "outputs": [ 174 | { 175 | "name": "stdout", 176 | "output_type": "stream", 177 | "text": [ 178 | "\t FW\n", 179 | "31 ms ± 2.87 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n", 180 | "\t PGD\n", 181 | "8.89 ms ± 458 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" 182 | ] 183 | } 184 | ], 185 | "source": [ 186 | "for key in methods:\n", 187 | " print(\"\\t {}\".format(key))\n", 188 | " %timeit methods[key].solve(x0, max_iter, tol)" 189 | ] 190 | }, 191 | { 192 | "cell_type": "markdown", 193 | "metadata": {}, 194 | "source": [ 195 | "## Dependance accuracy and number of iterations on the required accuarcy " 196 | ] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "execution_count": 11, 201 | "metadata": {}, 202 | "outputs": [ 203 | { 204 | "name": "stderr", 205 | "output_type": "stream", 206 | "text": [ 207 | "8it [01:27, 10.88s/it]\n" 208 | ] 209 | } 210 | ], 211 | "source": [ 212 | "eps = [10**(-i) for i in range(8)]\n", 213 | "time_pg = np.zeros(len(eps))\n", 214 | "time_cg = np.zeros(len(eps))\n", 215 | "iter_pg = np.zeros(len(eps))\n", 216 | "iter_cg = np.zeros(len(eps))\n", 217 | "pg = cs.ProjectedGD(f, grad, projection)\n", 218 | "cg = cs.FrankWolfe(f, grad, linsolver, ss.Backtracking(rule_type=\"Armijo\", rho=0.5, beta=0.1, init_alpha=1.))\n", 219 | "for i, tol in tqdm(enumerate(eps)):\n", 220 | " res = %timeit -o -q pg.solve(x0=x0, tol=tol, max_iter=100000)\n", 221 | " time_pg[i] = res.average\n", 222 | " iter_pg[i] = len(pg.get_convergence())\n", 223 | " res = %timeit -o -q cg.solve(x0=x0, tol=tol, max_iter=100000)\n", 224 | " time_cg[i] = res.average\n", 225 | " iter_cg[i] = len(cg.get_convergence())" 226 | ] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "execution_count": 12, 231 | "metadata": {}, 232 | "outputs": [ 233 | { 234 | "data": { 235 | "text/plain": [ 236 | "Text(0,0.5,'Time, s')" 237 | ] 238 | }, 239 | "execution_count": 12, 240 | "metadata": {}, 241 | "output_type": "execute_result" 242 | }, 243 | { 244 | "data": { 245 | "image/png": "\n", 246 | "text/plain": [ 247 | "" 248 | ] 249 | }, 250 | "metadata": {}, 251 | "output_type": "display_data" 252 | } 253 | ], 254 | "source": [ 255 | "plt.figure(figsize=figsize)\n", 256 | "plt.loglog(eps, time_cg, label=\"FW\")\n", 257 | "plt.loglog(eps, time_pg, label=\"PGD\")\n", 258 | "plt.legend(fontsize=fontsize)\n", 259 | "plt.xticks(fontsize=fontsize)\n", 260 | "plt.yticks(fontsize=fontsize)\n", 261 | "plt.xlabel(r\"Accuracy, $\\varepsilon$\", fontsize=fontsize)\n", 262 | "plt.ylabel(r\"Time, s\", fontsize=fontsize)" 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": 13, 268 | "metadata": {}, 269 | "outputs": [ 270 | { 271 | "data": { 272 | "text/plain": [ 273 | "Text(0,0.5,'Number of iterations')" 274 | ] 275 | }, 276 | "execution_count": 13, 277 | "metadata": {}, 278 | "output_type": "execute_result" 279 | }, 280 | { 281 | "data": { 282 | "image/png": "\n", 283 | "text/plain": [ 284 | "" 285 | ] 286 | }, 287 | "metadata": {}, 288 | "output_type": "display_data" 289 | } 290 | ], 291 | "source": [ 292 | "plt.figure(figsize=figsize)\n", 293 | "plt.loglog(eps, iter_cg, label=\"FW\")\n", 294 | "plt.loglog(eps, iter_pg, label=\"PGD\")\n", 295 | "plt.legend(fontsize=fontsize)\n", 296 | "plt.xticks(fontsize=fontsize)\n", 297 | "plt.yticks(fontsize=fontsize)\n", 298 | "plt.xlabel(r\"Accuracy, $\\varepsilon$\", fontsize=fontsize)\n", 299 | "plt.ylabel(r\"Number of iterations\", fontsize=fontsize)" 300 | ] 301 | }, 302 | { 303 | "cell_type": "markdown", 304 | "metadata": {}, 305 | "source": [ 306 | "## Least-squares problem on simplex\n", 307 | "\\begin{align*}\n", 308 | "& \\min \\frac{1}{2} \\|Ax - b\\|^2_2\\\\\n", 309 | "\\text{s.t. } & \\|x\\|_1 \\leq 1\\\\\n", 310 | "& x_i \\geq 0,\n", 311 | "\\end{align*}\n", 312 | "$A \\in \\mathbb{R}^{m \\times n}$, $n \\gg m$" 313 | ] 314 | }, 315 | { 316 | "cell_type": "code", 317 | "execution_count": 15, 318 | "metadata": {}, 319 | "outputs": [], 320 | "source": [ 321 | "def func(x, A, b):\n", 322 | " return 0.5 * np.linalg.norm(A.dot(x) - b)**2\n", 323 | "\n", 324 | "f = lambda x: func(x, A, b)\n", 325 | "\n", 326 | "def grad_f(x, A, b):\n", 327 | " grad = -A.T.dot(b)\n", 328 | " grad = grad + A.T.dot(A).dot(x)\n", 329 | " return grad\n", 330 | "\n", 331 | "grad = lambda x: grad_f(x, A, b)" 332 | ] 333 | }, 334 | { 335 | "cell_type": "code", 336 | "execution_count": 16, 337 | "metadata": {}, 338 | "outputs": [], 339 | "source": [ 340 | "m = 50\n", 341 | "n = 100\n", 342 | "A = np.random.randn(m, n)\n", 343 | "x_true = np.random.rand(n)\n", 344 | "b = A.dot(x_true) + 0.01 * np.random.randn(m)" 345 | ] 346 | }, 347 | { 348 | "cell_type": "code", 349 | "execution_count": 17, 350 | "metadata": {}, 351 | "outputs": [], 352 | "source": [ 353 | "def linsolver(gradient):\n", 354 | " x = np.zeros(gradient.shape[0])\n", 355 | " idx_min = np.argmin(gradient)\n", 356 | " if gradient[idx_min] > 0:\n", 357 | " x[idx_min] = 0\n", 358 | " else:\n", 359 | " x[idx_min] = 1\n", 360 | " return x" 361 | ] 362 | }, 363 | { 364 | "cell_type": "code", 365 | "execution_count": 18, 366 | "metadata": {}, 367 | "outputs": [], 368 | "source": [ 369 | "def projection(y):\n", 370 | " x = y.copy()\n", 371 | " if np.all(x >= 0) and np.sum(x) <= 1:\n", 372 | " return x\n", 373 | " x = np.clip(x, 0, np.max(x))\n", 374 | " if np.sum(x) <= 1:\n", 375 | " return x\n", 376 | " n = x.shape[0]\n", 377 | " bget = False\n", 378 | " x.sort()\n", 379 | " x = x[::-1]\n", 380 | " temp_sum = 0\n", 381 | " t_hat = 0\n", 382 | " for i in range(n - 1):\n", 383 | " temp_sum += x[i]\n", 384 | " t_hat = (temp_sum - 1.0) / (i + 1)\n", 385 | " if t_hat >= x[i + 1]:\n", 386 | " bget = True\n", 387 | " break\n", 388 | " if not bget:\n", 389 | " t_hat = (temp_sum + x[n - 1] - 1.0) / n\n", 390 | " return np.maximum(y - t_hat, 0)" 391 | ] 392 | }, 393 | { 394 | "cell_type": "code", 395 | "execution_count": 20, 396 | "metadata": {}, 397 | "outputs": [], 398 | "source": [ 399 | "methods = {\n", 400 | " \"FW\": cs.FrankWolfe(f, grad, linsolver, ss.Backtracking(rule_type=\"Armijo\", rho=0.5, beta=0.1, init_alpha=1.)),\n", 401 | " \"PGD\": cs.ProjectedGD(f, grad, projection, ss.Backtracking(rule_type=\"Armijo\", rho=0.5, beta=0.1, init_alpha=1.))\n", 402 | " }" 403 | ] 404 | }, 405 | { 406 | "cell_type": "code", 407 | "execution_count": 21, 408 | "metadata": {}, 409 | "outputs": [], 410 | "source": [ 411 | "x0 = np.random.randn(n)\n", 412 | "max_iter = 300\n", 413 | "tol = 1e-5" 414 | ] 415 | }, 416 | { 417 | "cell_type": "code", 418 | "execution_count": 22, 419 | "metadata": {}, 420 | "outputs": [ 421 | { 422 | "name": "stdout", 423 | "output_type": "stream", 424 | "text": [ 425 | "\t FW\n", 426 | "Convergence in 32 iterations\n", 427 | "Function value = 619.1425573630355\n", 428 | "Difference in function values = 5.300833890942158e-06\n", 429 | "Difference in argument = 0.0003707118312833002\n", 430 | "\t PGD\n", 431 | "Convergence in 11 iterations\n", 432 | "Function value = 619.1425429905358\n", 433 | "Difference in function values = 2.5743413516465807e-06\n", 434 | "Difference in argument = 0.0003181933658590257\n" 435 | ] 436 | } 437 | ], 438 | "source": [ 439 | "for m_name in methods:\n", 440 | " print(\"\\t\", m_name)\n", 441 | " x = methods[m_name].solve(x0=x0, max_iter=max_iter, tol=tol, disp=1)" 442 | ] 443 | }, 444 | { 445 | "cell_type": "code", 446 | "execution_count": 23, 447 | "metadata": {}, 448 | "outputs": [ 449 | { 450 | "data": { 451 | "image/png": "\n", 452 | "text/plain": [ 453 | "" 454 | ] 455 | }, 456 | "metadata": {}, 457 | "output_type": "display_data" 458 | } 459 | ], 460 | "source": [ 461 | "plt.figure(figsize=figsize)\n", 462 | "for m_name in methods:\n", 463 | " plt.semilogy([f(x) for x in methods[m_name].get_convergence()], label=m_name)\n", 464 | "plt.legend(fontsize=fontsize)\n", 465 | "plt.xlabel(\"Number of iteration, $k$\", fontsize=fontsize)\n", 466 | "plt.ylabel(r\"$f(x_k)$\", fontsize=fontsize)\n", 467 | "plt.xticks(fontsize=fontsize)\n", 468 | "_ = plt.yticks(fontsize=fontsize)" 469 | ] 470 | }, 471 | { 472 | "cell_type": "code", 473 | "execution_count": 21, 474 | "metadata": {}, 475 | "outputs": [ 476 | { 477 | "name": "stdout", 478 | "output_type": "stream", 479 | "text": [ 480 | "\t FW\n", 481 | "3.33 ms ± 327 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n", 482 | "\t PGD\n", 483 | "6.58 ms ± 222 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" 484 | ] 485 | } 486 | ], 487 | "source": [ 488 | "for key in methods:\n", 489 | " print(\"\\t {}\".format(key))\n", 490 | " %timeit methods[key].solve(x0, max_iter, tol)" 491 | ] 492 | }, 493 | { 494 | "cell_type": "code", 495 | "execution_count": 22, 496 | "metadata": {}, 497 | "outputs": [ 498 | { 499 | "name": "stderr", 500 | "output_type": "stream", 501 | "text": [ 502 | "8it [01:13, 9.13s/it]\n" 503 | ] 504 | } 505 | ], 506 | "source": [ 507 | "eps = [10**(-i) for i in range(8)]\n", 508 | "time_pg = np.zeros(len(eps))\n", 509 | "time_cg = np.zeros(len(eps))\n", 510 | "iter_pg = np.zeros(len(eps))\n", 511 | "iter_cg = np.zeros(len(eps))\n", 512 | "pg = cs.ProjectedGD(f, grad, projection)\n", 513 | "cg = cs.FrankWolfe(f, grad, linsolver, ss.Backtracking(rule_type=\"Armijo\", rho=0.5, beta=0.1, init_alpha=1.))\n", 514 | "for i, tol in tqdm(enumerate(eps)):\n", 515 | " res = %timeit -o -q pg.solve(x0=x0, tol=tol, max_iter=100000)\n", 516 | " time_pg[i] = res.average\n", 517 | " iter_pg[i] = len(pg.get_convergence())\n", 518 | " res = %timeit -o -q cg.solve(x0=x0, tol=tol, max_iter=100000)\n", 519 | " time_cg[i] = res.average\n", 520 | " iter_cg[i] = len(cg.get_convergence())" 521 | ] 522 | }, 523 | { 524 | "cell_type": "code", 525 | "execution_count": 23, 526 | "metadata": {}, 527 | "outputs": [ 528 | { 529 | "data": { 530 | "text/plain": [ 531 | "Text(0,0.5,'Time, s')" 532 | ] 533 | }, 534 | "execution_count": 23, 535 | "metadata": {}, 536 | "output_type": "execute_result" 537 | }, 538 | { 539 | "data": { 540 | "image/png": "\n", 541 | "text/plain": [ 542 | "" 543 | ] 544 | }, 545 | "metadata": {}, 546 | "output_type": "display_data" 547 | } 548 | ], 549 | "source": [ 550 | "plt.figure(figsize=figsize)\n", 551 | "plt.loglog(eps, time_cg, label=\"FW\")\n", 552 | "plt.loglog(eps, time_pg, label=\"PGD\")\n", 553 | "plt.legend(fontsize=fontsize)\n", 554 | "plt.xticks(fontsize=fontsize)\n", 555 | "plt.yticks(fontsize=fontsize)\n", 556 | "plt.xlabel(r\"Accuracy, $\\varepsilon$\", fontsize=fontsize)\n", 557 | "plt.ylabel(r\"Time, s\", fontsize=fontsize)" 558 | ] 559 | }, 560 | { 561 | "cell_type": "code", 562 | "execution_count": 24, 563 | "metadata": {}, 564 | "outputs": [ 565 | { 566 | "data": { 567 | "text/plain": [ 568 | "Text(0,0.5,'Number of iterations')" 569 | ] 570 | }, 571 | "execution_count": 24, 572 | "metadata": {}, 573 | "output_type": "execute_result" 574 | }, 575 | { 576 | "data": { 577 | "image/png": "\n", 578 | "text/plain": [ 579 | "" 580 | ] 581 | }, 582 | "metadata": {}, 583 | "output_type": "display_data" 584 | } 585 | ], 586 | "source": [ 587 | "plt.figure(figsize=figsize)\n", 588 | "plt.loglog(eps, iter_cg, label=\"FW\")\n", 589 | "plt.loglog(eps, iter_pg, label=\"PGD\")\n", 590 | "plt.legend(fontsize=fontsize)\n", 591 | "plt.xticks(fontsize=fontsize)\n", 592 | "plt.yticks(fontsize=fontsize)\n", 593 | "plt.xlabel(r\"Accuracy, $\\varepsilon$\", fontsize=fontsize)\n", 594 | "plt.ylabel(r\"Number of iterations\", fontsize=fontsize)" 595 | ] 596 | } 597 | ], 598 | "metadata": { 599 | "kernelspec": { 600 | "display_name": "Python 3 (cvxpy)", 601 | "language": "python", 602 | "name": "cvxpy" 603 | }, 604 | "language_info": { 605 | "codemirror_mode": { 606 | "name": "ipython", 607 | "version": 3 608 | }, 609 | "file_extension": ".py", 610 | "mimetype": "text/x-python", 611 | "name": "python", 612 | "nbconvert_exporter": "python", 613 | "pygments_lexer": "ipython3", 614 | "version": "3.6.4" 615 | } 616 | }, 617 | "nbformat": 4, 618 | "nbformat_minor": 2 619 | } 620 | -------------------------------------------------------------------------------- /liboptpy/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amkatrutsa/liboptpy/a40d8839dd12edb07fb6860d1337f034a2ea4754/liboptpy/__init__.py -------------------------------------------------------------------------------- /liboptpy/base_optimizer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from collections import deque 3 | 4 | class LineSearchOptimizer(object): 5 | def __init__(self, f, grad, step_size, memory_size=1, **kwargs): 6 | self.convergence = [] 7 | self._f = f 8 | self._grad = grad 9 | if step_size is not None: 10 | step_size.assign_function(f, grad, self._f_update_x_next) 11 | self._step_size = step_size 12 | self._par = kwargs 13 | self._grad_mem = deque(maxlen=memory_size) 14 | 15 | def get_convergence(self): 16 | return self.convergence 17 | 18 | def solve(self, x0, max_iter=100, tol=1e-6, disp=False): 19 | self.convergence = [] 20 | self._x_current = x0.copy() 21 | self.convergence.append(self._x_current) 22 | iteration = 0 23 | self._current_grad = None 24 | while True: 25 | self._h = self.get_direction(self._x_current) 26 | if self._current_grad is None: 27 | raise ValueError("Variable self._current_grad has to be initialized in method get_direction()!") 28 | self._grad_mem.append(self._current_grad) 29 | if self.check_convergence(tol): 30 | if disp > 0: 31 | print("Required tolerance achieved!") 32 | break 33 | if disp > 1: 34 | print("Iteration {}/{}".format(iteration, max_iter)) 35 | print("Current function val =", self._f(self._x_current)) 36 | self._print_info() 37 | self._alpha = self.get_stepsize() 38 | self._update_x_next() 39 | self._update_x_current() 40 | self._append_conv() 41 | iteration += 1 42 | if iteration >= max_iter: 43 | if disp > 0: 44 | print("Maximum iteration exceeds!") 45 | break 46 | if disp: 47 | print("Convergence in {} iterations".format(iteration)) 48 | print("Function value = {}".format(self._f(self._x_current))) 49 | self._print_info() 50 | return self._get_result_x() 51 | 52 | def get_direction(self, x): 53 | raise NotImplementedError("You have to provide method for finding direction!") 54 | 55 | def _update_x_current(self): 56 | self._x_current = self._x_next 57 | 58 | def _update_x_next(self): 59 | self._x_next = self._f_update_x_next(self._x_current, self._alpha, self._h) 60 | 61 | def _f_update_x_next(self, x, alpha, h): 62 | return x + alpha * h 63 | 64 | def check_convergence(self, tol): 65 | return np.linalg.norm(self._current_grad) < tol 66 | 67 | def get_stepsize(self): 68 | raise NotImplementedError("You have to provide method for finding step size!") 69 | 70 | def _print_info(self): 71 | print("Norm of gradient = {}".format(np.linalg.norm(self._current_grad))) 72 | 73 | def _append_conv(self): 74 | self.convergence.append(self._x_next) 75 | 76 | def _get_result_x(self): 77 | return self._x_current 78 | 79 | class TrustRegionOptimizer(object): 80 | def __init__(self): 81 | raise NotImplementedError("Trust region methods are not implemented yet") -------------------------------------------------------------------------------- /liboptpy/constr_solvers/__init__.py: -------------------------------------------------------------------------------- 1 | from ._frank_wolfe import FrankWolfe 2 | from ._proj_gd import ProjectedGD -------------------------------------------------------------------------------- /liboptpy/constr_solvers/_frank_wolfe.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from ..base_optimizer import LineSearchOptimizer 3 | 4 | class FrankWolfe(LineSearchOptimizer): 5 | 6 | ''' 7 | Class represents conditional gradient descent method aka Frank Wolfe algorithm 8 | ''' 9 | 10 | def __init__(self, f, grad, linsolver, step_size): 11 | super().__init__(f, grad, step_size) 12 | self._linsolver = linsolver 13 | self._h = None 14 | 15 | def get_direction(self, x): 16 | s = self._linsolver(self._grad(x)) 17 | self._current_grad = self._grad(x) 18 | self._h = s - x 19 | return self._h 20 | 21 | def check_convergence(self, tol): 22 | if len(self.convergence) == 1: 23 | return False 24 | if self._f(self.convergence[-2]) - self._f(self.convergence[-1]) < tol: 25 | return True 26 | else: 27 | return False 28 | 29 | def get_stepsize(self): 30 | return self._step_size.get_stepsize(self._h, self.convergence[-1], len(self.convergence)) 31 | 32 | def _print_info(self): 33 | print("Difference in function values = {}".format(self._f(self.convergence[-2]) - self._f(self.convergence[-1]))) 34 | print("Difference in argument = {}".format(np.linalg.norm(self.convergence[-1] - self.convergence[-2]))) 35 | -------------------------------------------------------------------------------- /liboptpy/constr_solvers/_proj_gd.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from ..base_optimizer import LineSearchOptimizer 3 | 4 | class ProjectedGD(LineSearchOptimizer): 5 | 6 | ''' 7 | Class represents projected gradient method 8 | ''' 9 | 10 | def __init__(self, f, grad, projector, step_size): 11 | super().__init__(f, grad, step_size) 12 | self._projector = projector 13 | 14 | def get_direction(self, x): 15 | self._current_grad = self._grad(x) 16 | return -self._current_grad 17 | 18 | def _f_update_x_next(self, x, alpha, h): 19 | return self._projector(x + alpha * h) 20 | 21 | def check_convergence(self, tol): 22 | if len(self.convergence) == 1: 23 | return False 24 | if self._f(self.convergence[-2]) - self._f(self.convergence[-1]) < tol: 25 | return True 26 | else: 27 | return False 28 | 29 | def get_stepsize(self): 30 | return self._step_size.get_stepsize(-self._grad_mem[-1], self.convergence[-1], len(self.convergence)) 31 | 32 | def _print_info(self): 33 | print("Difference in function values = {}".format(self._f(self.convergence[-2]) - self._f(self.convergence[-1]))) 34 | print("Difference in argument = {}".format(np.linalg.norm(self.convergence[-1] - self.convergence[-2]))) 35 | -------------------------------------------------------------------------------- /liboptpy/restarts.py: -------------------------------------------------------------------------------- 1 | class Restart(object): 2 | def __init__(self, limit_dim=None): 3 | self._dim = limit_dim 4 | 5 | def __call__(self, num_iter, x): 6 | if num_iter % self._dim == 0: 7 | return True 8 | else: 9 | return False 10 | 11 | def assign_function(self, f, grad): 12 | self._f = f 13 | self._grad = grad 14 | -------------------------------------------------------------------------------- /liboptpy/step_size.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | __all__ = ["ConstantStepSize", "Backtracking", "ExactLineSearch4Quad", "InvIterStepSize", "ScaledInvIterStepSize"] 4 | 5 | class StepSize(object): 6 | ''' 7 | Base class for all classes for defining step size 8 | ''' 9 | def __init__(self): 10 | pass 11 | def get_stepsize(self, *args, **kwargs): 12 | raise NotImplementedError("Method to get current step size has to be implemented!") 13 | 14 | def assign_function(self, f, grad, *args): 15 | pass 16 | 17 | class ConstantStepSize(StepSize): 18 | ''' 19 | Class represents interface for constant step size 20 | ''' 21 | def __init__(self, stepsize): 22 | self.stepsize = stepsize 23 | 24 | def get_stepsize(self, h, x, num_iter, *args): 25 | return self.stepsize 26 | 27 | class ScaledConstantStepSize(StepSize): 28 | def __init__(self, stepsize): 29 | self.stepsize = stepsize 30 | 31 | def get_stepsize(self, h, x, num_iter, *args): 32 | return self.stepsize / np.linalg.norm(h) 33 | 34 | class InvIterStepSize(StepSize): 35 | def __init__(self): 36 | pass 37 | 38 | def get_stepsize(self, h, x, num_iter, *args): 39 | return 1. / num_iter 40 | 41 | class ScaledInvIterStepSize(StepSize): 42 | def __init__(self): 43 | pass 44 | 45 | def get_stepsize(self, h, x, num_iter, *args): 46 | s = 1. / num_iter 47 | return s / np.linalg.norm(h) 48 | 49 | class InvSqrootIterStepSize(StepSize): 50 | def __init__(self): 51 | pass 52 | 53 | def get_stepsize(self, h, x, num_iter, *args): 54 | return 1. / np.sqrt(num_iter) 55 | 56 | class Backtracking(StepSize): 57 | ''' 58 | Class represents different rules for backtracking search of step size 59 | ''' 60 | def __init__(self, rule_type, **kwargs): 61 | self.rule = rule_type 62 | self.par = kwargs 63 | if self.rule == "Lipschitz" and "eps" not in self.par: 64 | self.par["eps"] = 0. 65 | if "disp" not in self.par: 66 | self.par["disp"] = False 67 | if self.rule == "Lipschitz": 68 | self._alpha = None 69 | 70 | def assign_function(self, f, grad, update_x_next): 71 | self._f = f 72 | self._grad = grad 73 | self._update_x_next = update_x_next 74 | 75 | def get_stepsize(self, h, x, num_iter, *args): 76 | alpha = self.par["init_alpha"] 77 | if self.rule == "Armijo": 78 | rho = self.par["rho"] 79 | beta = self.par["beta"] 80 | assert beta < 0.5, "Armijo rule is applicable for beta less than 0.5" 81 | assert rho < 1, "Decay factor has to be less than 1" 82 | current_grad = self._grad(x) 83 | current_f = self._f(x) 84 | x_next = self._update_x_next(x, alpha, h) 85 | while True: 86 | if np.isnan(self._f(x_next)): 87 | alpha *= rho 88 | else: 89 | if self._f(x_next) >= current_f + beta * current_grad.dot(x_next - x): 90 | alpha *= rho 91 | else: 92 | break 93 | if alpha < 1e-16: 94 | raise ValueError("Step size is too small!") 95 | x_next = self._update_x_next(x, alpha, h) 96 | return alpha 97 | elif self.rule == "Wolfe": 98 | # https://sites.math.washington.edu/~burke/crs/408/notes/nlp/line.pdf 99 | rho = self.par["rho"] 100 | lb = 0 101 | ub = np.inf 102 | assert rho < 1, "Decay factor has to be less than 1" 103 | beta1 = self.par["beta1"] 104 | beta2 = self.par["beta2"] 105 | assert 0 < beta1 < beta2 < 1, "Wolfe rule is applicable for betas such that 0 < beta1 < beta2 < 1" 106 | current_grad = self._grad(x) 107 | current_f = self._f(x) 108 | while True: 109 | if np.isnan(self._f(x + alpha * h)): 110 | alpha *= rho 111 | else: 112 | if self._f(x + alpha * h) > current_f + beta1 * alpha * current_grad.dot(h): 113 | ub = alpha 114 | alpha = 0.5 * (lb + ub) 115 | elif h.dot(self._grad(x + alpha * h)) < beta2 * h.dot(current_grad): 116 | lb = alpha 117 | if np.isinf(ub): 118 | alpha = 2 * lb 119 | else: 120 | alpha = 0.5 * (lb + ub) 121 | else: 122 | break 123 | if alpha < 1e-16: 124 | raise ValueError("Step size is too small!") 125 | return alpha 126 | elif self.rule == "Goldstein": 127 | pass 128 | elif self.rule == "Wolfe strong": 129 | rho = self.par["rho"] 130 | assert rho < 1, "Decay factor has to be less than 1" 131 | beta1 = self.par["beta1"] 132 | beta2 = self.par["beta2"] 133 | lb = 0 134 | ub = np.inf 135 | assert 0 < beta1 < beta2 < 1, "Wolfe rule is applicable for betas such that 0 < beta1 < beta2 < 1" 136 | current_grad = self._grad(x) 137 | current_f = self._f(x) 138 | while True: 139 | if np.isnan(self._f(x + alpha * h)): 140 | alpha *= rho 141 | else: 142 | if self._f(x + alpha * h) > current_f + beta1 * alpha * current_grad.dot(h): 143 | ub = alpha 144 | alpha = 0.5 * (lb + ub) 145 | elif np.abs(h.dot(self._grad(x + alpha * h))) > beta2 * np.abs(h.dot(current_grad)): 146 | lb = alpha 147 | if np.isinf(ub): 148 | alpha = 2 * lb 149 | else: 150 | alpha = 0.5 * (lb + ub) 151 | else: 152 | break 153 | if alpha < 1e-16: 154 | raise ValueError("Step size is too small!") 155 | return alpha 156 | elif self.rule == "Lipschitz": 157 | rho = self.par["rho"] 158 | assert rho < 1, "Decay factor has to be less than 1" 159 | current_grad = self._grad(x) 160 | current_f = self._f(x) 161 | eps = self.par["eps"] 162 | if self._alpha is None: 163 | self._alpha = alpha 164 | else: 165 | self._alpha /= rho 166 | x_next = self._update_x_next(x, self._alpha, h) 167 | while True: 168 | if self.par["disp"]: 169 | print("Current test alpha = {}".format(self._alpha)) 170 | if np.isnan(self._f(x_next)): 171 | self._alpha *= rho 172 | else: 173 | if self._f(x_next) > current_f + current_grad.dot(x_next - x) + np.linalg.norm(x_next - x)**2 / (2 * self._alpha) + eps: 174 | self._alpha *= rho 175 | else: 176 | if self.par["disp"]: 177 | print("Found alpha = {}".format(self._alpha)) 178 | break 179 | if self._alpha < 1e-16: 180 | raise ValueError("Step size is too small!") 181 | x_next = self._update_x_next(x, self._alpha, h) 182 | return self._alpha 183 | else: 184 | raise NotImplementedError("Available rules for backtracking are 'Armijo', 'Goldstein', 'Wolfe', 'Wolfe strong' and 'Lipschitz'") 185 | 186 | class ExactLineSearch4Quad(StepSize): 187 | def __init__(self, A, b=None): 188 | self._A = A 189 | if b is None: 190 | self._b = np.zeros(A.shape[0]) 191 | else: 192 | self._b = b 193 | 194 | def get_stepsize(self, h, x, num_iter): 195 | return h.dot(self._b - self._A.dot(x)) / h.dot(self._A.dot(h)) 196 | -------------------------------------------------------------------------------- /liboptpy/unconstr_solvers/__init__.py: -------------------------------------------------------------------------------- 1 | from . import so 2 | from . import fo -------------------------------------------------------------------------------- /liboptpy/unconstr_solvers/fo/__init__.py: -------------------------------------------------------------------------------- 1 | from ._gd import GradientDescent 2 | from ._cg import ConjugateGradientFR, ConjugateGradientPR 3 | from ._cg import ConjugateGradientQuad 4 | from ._acc_gd import AcceleratedGD 5 | from ._subgrad import SubgradientMethod 6 | from ._dual_average import DualAveraging 7 | from ._quasi_newton import BFGS, LBFGS, DFP, BarzilaiBorweinMethod 8 | 9 | __all__ = ["BarzilaiBorweinMethod", 10 | "AcceleratedGD", 11 | "GradientDescent", 12 | "ConjugateGradientFR", 13 | "ConjugateGradientPR", 14 | "ConjugateGradientQuad", 15 | "SubgradientMethod", 16 | "DualAveraging", 17 | "BFGS", "LBFGS", "DFP"] -------------------------------------------------------------------------------- /liboptpy/unconstr_solvers/fo/_acc_gd.py: -------------------------------------------------------------------------------- 1 | from ... import base_optimizer as _base 2 | import numpy as _np 3 | from ... import step_size as ss 4 | 5 | class AcceleratedGD(_base.LineSearchOptimizer): 6 | def __init__(self, f, grad, step_size, momentum_size=None, **kwargs): 7 | super().__init__(f, grad, step_size, **kwargs) 8 | if momentum_size is not None: 9 | momentum_size.assign(f, grad) 10 | self._momentum_size = momentum_size 11 | self._lam0 = 0 12 | self._lam1 = 1 13 | 14 | def get_direction(self, x): 15 | self._current_grad = self._grad(x) 16 | return -self._current_grad 17 | 18 | def _update_x_current(self): 19 | if self._momentum_size is None: 20 | beta = (self._lam0 - 1) / self._lam1 21 | t = self._lam0 22 | self._lam0 = self._lam1 23 | self._lam1 = (1 + _np.sqrt(1 + 4 * t**2)) / 2. 24 | self._x_current = self._x_next + beta * (self._x_next - self.convergence[-1]) 25 | 26 | def get_stepsize(self): 27 | return self._step_size.get_stepsize(self._h, self.convergence[-1], len(self.convergence)) -------------------------------------------------------------------------------- /liboptpy/unconstr_solvers/fo/_cg.py: -------------------------------------------------------------------------------- 1 | from ... import base_optimizer as _base 2 | import numpy as _np 3 | 4 | class ConjugateGradientFR(_base.LineSearchOptimizer): 5 | def __init__(self, f, grad, step_size, restart=None, **kwargs): 6 | super().__init__(f, grad, step_size, **kwargs) 7 | if restart is not None: 8 | restart.assign_function(f, grad) 9 | self._restart = restart 10 | 11 | def get_direction(self, x): 12 | if (len(self.convergence) == 1) or (self._restart is not None and 13 | self._restart(len(self.convergence), x)): 14 | self._current_grad = self._grad(x) 15 | h = -self._current_grad 16 | else: 17 | self._current_grad = self._grad(self.convergence[-1]) 18 | beta = self._current_grad.dot(self._current_grad) / self._grad_mem[-1].dot(self._grad_mem[-1]) 19 | h = -self._current_grad + beta * self._h 20 | return h 21 | 22 | def get_stepsize(self): 23 | return self._step_size.get_stepsize(self._h, self.convergence[-1], len(self.convergence)) 24 | 25 | class ConjugateGradientPR(_base.LineSearchOptimizer): 26 | def __init__(self, f, grad, step_size, restart=None, **kwargs): 27 | super().__init__(f, grad, step_size, **kwargs) 28 | if restart is not None: 29 | restart.assign_function(f, grad) 30 | self._restart = restart 31 | 32 | def get_direction(self, x): 33 | if (len(self.convergence) == 1): 34 | self._current_grad = self._grad(x) 35 | h = -self._current_grad 36 | else: 37 | self._current_grad = self._grad(self.convergence[-1]) 38 | beta = self._current_grad.dot(self._current_grad - self._grad_mem[-1]) / self._grad_mem[-1].dot(self._grad_mem[-1]) 39 | h = -self._current_grad + beta * self._h 40 | return h 41 | 42 | def get_stepsize(self): 43 | return self._step_size.get_stepsize(self._h, self.convergence[-1], len(self.convergence)) 44 | 45 | class ConjugateGradientQuad(_base.LineSearchOptimizer): 46 | def __init__(self, A, b=None): 47 | if b is None: 48 | b = _np.zeros(A.shape[0]) 49 | f = lambda x: 0.5 * x.dot(A.dot(x)) - b.dot(x) 50 | grad = lambda x: A.dot(x) - b 51 | super().__init__(f, grad, None) 52 | self._A = A 53 | self._b = b 54 | 55 | def get_direction(self, x): 56 | if (len(self.convergence) == 1): 57 | self._current_grad = self._grad(x) 58 | h = -self._current_grad 59 | self._r = -h 60 | else: 61 | r_next = self._r + self._alpha * self._A.dot(self._h) 62 | beta = r_next.dot(r_next) / self._r.dot(self._r) 63 | h = -r_next + beta * self._h 64 | self._r = r_next 65 | return h 66 | 67 | def get_stepsize(self): 68 | # h = self._grad_mem[-1] 69 | self._alpha = self._r.dot(self._r) / self._h.dot(self._A.dot(self._h)) 70 | return self._alpha 71 | 72 | def check_convergence(self, tol): 73 | return _np.linalg.norm(self._grad(self.convergence[-1])) < tol 74 | -------------------------------------------------------------------------------- /liboptpy/unconstr_solvers/fo/_dual_average.py: -------------------------------------------------------------------------------- 1 | from ... import base_optimizer as _base 2 | import numpy as _np 3 | 4 | class DualAveraging(_base.LineSearchOptimizer): 5 | def __init__(self, f, subgrad, primal_step_size, dual_step_size): 6 | super().__init__(f, subgrad, primal_step_size) 7 | self._dual_step_size = dual_step_size 8 | self._sum_lam = 0 9 | 10 | def get_direction(self, x): 11 | self._current_grad = self._grad(x) 12 | if len(self.convergence) == 1: 13 | self._s = _np.zeros(x.shape[0]) 14 | self._lam = self._dual_step_size.get_stepsize(x, self._current_grad, len(self.convergence)) 15 | self._s = (self._sum_lam * self._s + self._lam * self._current_grad) / (self._sum_lam + self._lam) 16 | self._sum_lam += self._lam 17 | return -self._s 18 | 19 | def get_stepsize(self): 20 | return self._step_size.get_stepsize(self._h, self._x_current, len(self.convergence)) 21 | 22 | def _f_update_x_next(self, x, alpha, h): 23 | return self.convergence[0] + alpha * h 24 | 25 | def _append_conv(self): 26 | self.convergence.append(self._x_current) 27 | 28 | def _update_x_current(self): 29 | self._x_current = self._x_next -------------------------------------------------------------------------------- /liboptpy/unconstr_solvers/fo/_gd.py: -------------------------------------------------------------------------------- 1 | from ... import base_optimizer as _base 2 | 3 | class GradientDescent(_base.LineSearchOptimizer): 4 | def __init__(self, f, grad, step_size, **kwargs): 5 | super().__init__(f, grad, step_size, **kwargs) 6 | 7 | def get_direction(self, x): 8 | self._current_grad = self._grad(x) 9 | return -self._current_grad 10 | 11 | def get_stepsize(self): 12 | return self._step_size.get_stepsize(self._h, self.convergence[-1], len(self.convergence)) -------------------------------------------------------------------------------- /liboptpy/unconstr_solvers/fo/_quasi_newton.py: -------------------------------------------------------------------------------- 1 | import numpy as _np 2 | from ... import base_optimizer as _base 3 | from ... import step_size as _ss 4 | from collections import deque 5 | 6 | 7 | class BFGS(_base.LineSearchOptimizer): 8 | 9 | def __init__(self, f, grad, step_size=None, 10 | H=None, **kwargs): 11 | if step_size is None: 12 | step_size = _ss.Backtracking("Wolfe", rho=0.5, beta1=1e-3, beta2=0.9, init_alpha=1.) 13 | super().__init__(f, grad, step_size, memory_size=1, **kwargs) 14 | self._H0 = H 15 | self._H = H 16 | 17 | def get_direction(self, x): 18 | if self._H is None: 19 | self._current_grad = self._grad(x) 20 | return -self._current_grad 21 | else: 22 | return -self._H.dot(self._current_grad) 23 | 24 | def get_stepsize(self): 25 | return self._step_size.get_stepsize(self._h, self.convergence[-1], len(self.convergence)) 26 | 27 | def _update_x_current(self): 28 | self._current_grad = self._grad(self._x_next) 29 | s = self._x_next - self._x_current 30 | y = self._current_grad - self._grad_mem[-1] 31 | rho = 1. / y.dot(s) 32 | if self._H is None: 33 | self._H = _np.eye(self._x_current.shape[0]) / y.dot(y) / rho 34 | Hy = self._H.dot(y) 35 | Hys = _np.outer(Hy, s) 36 | ss = _np.outer(s, s) 37 | self._H = rho * ss + self._H - rho * Hys - rho * Hys.T + \ 38 | rho**2 * y.dot(Hy) * ss 39 | self._x_current = self._x_next 40 | 41 | def _get_result_x(self): 42 | self._H = self._H0 43 | return self._x_current 44 | 45 | 46 | class LBFGS(_base.LineSearchOptimizer): 47 | 48 | def __init__(self, f, grad, step_size=None, 49 | H=None, hist_size=10, **kwargs): 50 | if step_size is None: 51 | step_size = _ss.Backtracking("Wolfe", rho=0.5, beta1=1e-3, beta2=0.9, init_alpha=1.) 52 | super().__init__(f, grad, step_size, memory_size=1, **kwargs) 53 | self._H0 = H 54 | self._H = H 55 | self._s_hist = deque(maxlen=hist_size) 56 | self._y_hist = deque(maxlen=hist_size) 57 | 58 | def get_direction(self, x): 59 | if self._H is None: 60 | self._current_grad = self._grad(x) 61 | return -self._current_grad 62 | else: 63 | q = self._current_grad 64 | alpha = _np.zeros(len(self._s_hist)) 65 | rho = _np.zeros(len(self._s_hist)) 66 | for i in range(len(self._s_hist) - 1, -1, -1): 67 | rho[i] = 1. / self._s_hist[i].dot(self._y_hist[i]) 68 | alpha[i] = self._s_hist[i].dot(q) * rho[i] 69 | q = q - alpha[i] * self._y_hist[i] 70 | r = q * self._H 71 | for i in range(len(self._s_hist)): 72 | beta = rho[i] * self._y_hist[i].dot(r) 73 | r = r + self._s_hist[i] * (alpha[i] - beta) 74 | return -r 75 | 76 | def get_stepsize(self): 77 | return self._step_size.get_stepsize(self._h, self.convergence[-1], len(self.convergence)) 78 | 79 | def _update_x_current(self): 80 | self._current_grad = self._grad(self._x_next) 81 | s = self._x_next - self._x_current 82 | y = self._current_grad - self._grad_mem[-1] 83 | self._s_hist.append(s) 84 | self._y_hist.append(y) 85 | if self._H is None or self._s_hist.maxlen <= len(self.convergence) - 1: 86 | self._H = y.dot(s) / y.dot(y) 87 | self._x_current = self._x_next 88 | 89 | def _get_result_x(self): 90 | self._H = self._H0 91 | return self._x_current 92 | 93 | 94 | class DFP(_base.LineSearchOptimizer): 95 | 96 | def __init__(self, f, grad, step_size=None, 97 | H=None, **kwargs): 98 | if step_size is None: 99 | step_size = _ss.Backtracking("Wolfe", rho=0.5, beta1=1e-3, beta2=0.9, init_alpha=1.) 100 | super().__init__(f, grad, step_size, memory_size=2, **kwargs) 101 | self._H0 = H 102 | self._H = H 103 | 104 | def get_direction(self, x): 105 | if self._H is None: 106 | self._current_grad = self._grad(x) 107 | return -self._current_grad 108 | else: 109 | return -self._H.dot(self._current_grad) 110 | 111 | def get_stepsize(self): 112 | return self._step_size.get_stepsize(self._h, self.convergence[-1], len(self.convergence)) 113 | 114 | def _update_x_current(self): 115 | self._current_grad = self._grad(self._x_next) 116 | s = self._x_next - self._x_current 117 | y = self._current_grad - self._grad_mem[-1] 118 | rho = 1. / y.dot(s) 119 | if self._H is None: 120 | self._H = _np.eye(self._x_current.shape[0]) / y.dot(y) / rho 121 | Hy = self._H.dot(y) 122 | self._H = self._H - ((_np.outer(Hy, Hy)) / (y.dot(Hy))) + (_np.outer(s, s) * rho) 123 | self._x_current = self._x_next 124 | 125 | def _get_result_x(self): 126 | self._H = self._H0 127 | return self._x_current 128 | 129 | class BarzilaiBorweinMethod(_base.LineSearchOptimizer): 130 | def __init__(self, f, grad, **kwargs): 131 | super().__init__(f, grad, None, memory_size=2, **kwargs) 132 | 133 | def get_direction(self, x): 134 | self._current_grad = self._grad(x) 135 | return -self._current_grad 136 | 137 | def get_stepsize(self): 138 | if len(self.convergence) == 1: 139 | return self._par["init_alpha"] 140 | else: 141 | g = self._grad_mem[-1] - self._grad_mem[-2] 142 | s = self.convergence[-1] - self.convergence[-2] 143 | if self._par["type"] == 1: 144 | alpha = g.dot(s) / g.dot(g) 145 | elif self._par["type"] == 2: 146 | alpha = s.dot(s) / g.dot(s) 147 | return alpha -------------------------------------------------------------------------------- /liboptpy/unconstr_solvers/fo/_subgrad.py: -------------------------------------------------------------------------------- 1 | from ... import base_optimizer as _base 2 | import numpy as np 3 | 4 | class SubgradientMethod(_base.LineSearchOptimizer): 5 | def __init__(self, f, subgrad, step_size): 6 | super().__init__(f, subgrad, step_size) 7 | self._x_best = None 8 | self._f_best = np.inf 9 | 10 | def get_direction(self, x): 11 | self._current_grad = self._grad(x) 12 | return -self._current_grad 13 | 14 | def check_convergence(self, tol): 15 | current_f = self._f(self._x_current) 16 | if current_f < self._f_best: 17 | self._x_best = self._x_current 18 | self._f_best = current_f 19 | return False 20 | 21 | def get_stepsize(self): 22 | return self._step_size.get_stepsize(self._h, self._x_current, len(self.convergence)) 23 | 24 | def _print_info(self): 25 | pass 26 | 27 | def _get_result_x(self): 28 | return self._x_best -------------------------------------------------------------------------------- /liboptpy/unconstr_solvers/so/__init__.py: -------------------------------------------------------------------------------- 1 | from ._newton import NewtonMethod 2 | from ._inexact_newton import InexactNewtonMethod 3 | 4 | __all__ = ["NewtonMethod", "InexactNewtonMethod"] -------------------------------------------------------------------------------- /liboptpy/unconstr_solvers/so/_inexact_newton.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from ... import base_optimizer as base 3 | from ..fo import _cg as cg 4 | 5 | class InexactNewtonMethod(base.LineSearchOptimizer): 6 | def __init__(self, f, grad, hess_matvec, step_size, **kwargs): 7 | super().__init__(f, grad, step_size, **kwargs) 8 | self._hess_matvec = hess_matvec 9 | 10 | def get_direction(self, x): 11 | self._current_grad = self._grad(x) 12 | hess = self._hess_matvec(x) 13 | lin_cg = cg.ConjugateGradientQuad(hess, -self._current_grad) 14 | eta = np.minimum(0.5, np.sqrt(np.linalg.norm(self._current_grad))) 15 | h = np.zeros(self._current_grad.shape[0]) 16 | while True: 17 | h = lin_cg.solve(x0=h, tol=eta) 18 | if h.dot(self._current_grad) < 0: 19 | break 20 | else: 21 | eta = eta / 10. 22 | return h 23 | 24 | def get_stepsize(self): 25 | return self._step_size.get_stepsize(self._h, self.convergence[-1], len(self.convergence)) -------------------------------------------------------------------------------- /liboptpy/unconstr_solvers/so/_newton.py: -------------------------------------------------------------------------------- 1 | import numpy as _np 2 | from ... import base_optimizer as _base 3 | 4 | class NewtonMethod(_base.LineSearchOptimizer): 5 | def __init__(self, f, grad, hess, step_size, linsolver=None, **kwargs): 6 | super().__init__(f, grad, step_size, **kwargs) 7 | self._hess = hess 8 | self._linsolver = linsolver 9 | 10 | def get_direction(self, x): 11 | self._current_grad = self._grad(x) 12 | hess = self._hess(x) 13 | if self._linsolver: 14 | h = self._linsolver(hess, -self._current_grad) 15 | else: 16 | h = _np.linalg.solve(hess, -self._current_grad) 17 | return h 18 | 19 | def get_stepsize(self): 20 | return self._step_size.get_stepsize(self._h, self.convergence[-1], len(self.convergence)) 21 | 22 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup( 4 | name='liboptpy', 5 | version='0.0.1', 6 | description='Implementation of various optimization methods for research and study purposes', 7 | author='Alexandr Katrutsa', 8 | author_email="aleksandr.katrutsa@phystech.edu", 9 | packages=['liboptpy', 'liboptpy.unconstr_solvers','liboptpy.unconstr_solvers.fo', 'liboptpy.unconstr_solvers.so', 10 | 'liboptpy.constr_solvers'], 11 | install_requires=['numpy>=1.12', 'scipy>=1.0'], 12 | keywords=[ 'Convex optimization', 'numerical optimization', 13 | 'Python', 'Numpy', 'Scipy'], 14 | url='https://github.com/amkatrutsa/liboptpy', 15 | license='MIT', 16 | classifiers=['License :: OSI Approved :: MIT License', 17 | 'Programming Language :: Python :: 3.5'], 18 | ) 19 | --------------------------------------------------------------------------------