├── .editorconfig ├── .github └── ISSUE_TEMPLATE.md ├── .gitignore ├── .travis.yml ├── AUTHORS.rst ├── LICENSE ├── README.md ├── images ├── rastrigin_BasinHoppingWrapper.png ├── rastrigin_DifferentialEvolutionWrapper.png ├── rastrigin_DualAnnealingWrapper.png └── rastrigin_SHGOWrapper.png ├── mnist ├── hessian_logistic_regression.py └── logistic_regression.py ├── pytorch_minimize ├── __init__.py └── optim.py ├── requirements_dev.txt ├── setup.cfg ├── setup.py └── tests ├── __init__.py ├── test_basinhopping.py ├── test_differential_evolution.py ├── test_dual_annealing.py ├── test_pytorch_minimize.py ├── test_shgo.py ├── test_shgo_example.py └── test_unpack_unravel.py /.editorconfig: -------------------------------------------------------------------------------- 1 | # http://editorconfig.org 2 | 3 | root = true 4 | 5 | [*] 6 | indent_style = space 7 | indent_size = 4 8 | trim_trailing_whitespace = true 9 | insert_final_newline = true 10 | charset = utf-8 11 | end_of_line = lf 12 | 13 | [*.bat] 14 | indent_style = tab 15 | end_of_line = crlf 16 | 17 | [LICENSE] 18 | insert_final_newline = false 19 | 20 | [Makefile] 21 | indent_style = tab 22 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | * PyTorch Minimize version: 2 | * Python version: 3 | * Operating System: 4 | 5 | ### Description 6 | 7 | Describe what you were trying to get done. 8 | Tell us what happened, what went wrong, and what you expected to happen. 9 | 10 | ### What I Did 11 | 12 | ``` 13 | Paste the command(s) you ran and the output. 14 | If there was a crash, please include the traceback here. 15 | ``` 16 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | 58 | # Flask stuff: 59 | instance/ 60 | .webassets-cache 61 | 62 | # Scrapy stuff: 63 | .scrapy 64 | 65 | # Sphinx documentation 66 | docs/_build/ 67 | 68 | # PyBuilder 69 | target/ 70 | 71 | # Jupyter Notebook 72 | .ipynb_checkpoints 73 | 74 | # pyenv 75 | .python-version 76 | 77 | # celery beat schedule file 78 | celerybeat-schedule 79 | 80 | # SageMath parsed files 81 | *.sage.py 82 | 83 | # dotenv 84 | .env 85 | 86 | # virtualenv 87 | .venv 88 | venv/ 89 | ENV/ 90 | 91 | # Spyder project settings 92 | .spyderproject 93 | .spyproject 94 | 95 | # Rope project settings 96 | .ropeproject 97 | 98 | # mkdocs documentation 99 | /site 100 | 101 | # mypy 102 | .mypy_cache/ 103 | 104 | # IDE settings 105 | .vscode/ 106 | 107 | # If you download MNIST data, don't commit it 108 | data 109 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # Config file for automatic testing at travis-ci.com 2 | 3 | language: python 4 | python: 5 | - 3.8 6 | - 3.7 7 | - 3.6 8 | 9 | # Command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors 10 | install: 11 | - sudo apt-get update 12 | # We do this conditionally because it saves us some downloading if the 13 | # version is the same. 14 | - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then 15 | wget https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh -O miniconda.sh; 16 | else 17 | wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh; 18 | fi 19 | - bash miniconda.sh -b -p $HOME/miniconda 20 | - source "$HOME/miniconda/etc/profile.d/conda.sh" 21 | - hash -r 22 | - conda config --set always_yes yes --set changeps1 no 23 | - conda update -q conda 24 | # Useful for debugging any issues with conda 25 | - conda info -a 26 | # create environment 27 | - conda create -q -n test-environment -c pytorch python=$TRAVIS_PYTHON_VERSION pytorch cpuonly scipy scikit-learn pytest 28 | - conda activate test-environment 29 | - python -m pip install . 30 | 31 | # Command to run tests, e.g. python setup.py test 32 | script: pytest 33 | 34 | # Assuming you have installed the travis-ci CLI tool, after you 35 | # create the Github repo and add it to Travis, run the 36 | # following command to finish PyPI deployment setup: 37 | # $ travis encrypt --add deploy.password 38 | deploy: 39 | provider: pypi 40 | distributions: sdist bdist_wheel 41 | user: gngdb 42 | password: 43 | secure: PLEASE_REPLACE_ME 44 | on: 45 | tags: true 46 | repo: gngdb/pytorch_minimize 47 | python: 3.8 48 | -------------------------------------------------------------------------------- /AUTHORS.rst: -------------------------------------------------------------------------------- 1 | ======= 2 | Credits 3 | ======= 4 | 5 | Development Lead 6 | ---------------- 7 | 8 | * Gavin Gray 9 | 10 | Contributors 11 | ------------ 12 | 13 | None yet. Why not be the first? 14 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021, Gavin Gray 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | > [!IMPORTANT] 3 | > This project is not in active development. [Functional updates to PyTorch would 4 | > make everything here cleaner and more reliable][func_issue]. Also, I haven't 5 | > tried it but [`rfeinman`'s re-implementation][rfeinman] of 6 | > `scipy.optimize` in PyTorch may be what you're looking for as it should have 7 | > the same functionality as this project in most cases. 8 | 9 | [func_issue]: https://github.com/gngdb/pytorch-minimize/issues/3 10 | 11 | PyTorch Minimize 12 | ================ 13 | 14 | A wrapper for [`scipy.optimize.minimize`][scipy] to make it a PyTorch 15 | Optimizer implementing Conjugate Gradients, BFGS, l-BFGS, SLSQP, Newton 16 | Conjugate Gradient, Trust Region methods and others in PyTorch. 17 | 18 | *Warning*: this project is a proof of concept and is not necessarily 19 | reliable, although [the code](./pytorch_minimize/optim.py) (that's all of 20 | it) is small enough to be readable. 21 | 22 | * [Quickstart](#quickstart) 23 | * [Install](#install) 24 | * [Using The Optimizer](#using-the-optimizer) 25 | * [Which Algorithms Are Supported?](#which-algorithms-are-supported) 26 | * [Methods that require Hessian evaluations](#methods-that-require-hessian-evaluations) 27 | * [Algorithms without gradients](#algorithms-without-gradients) 28 | * [Algorithms you can choose but don't work](#algorithms-you-can-choose-but-dont-work) 29 | * [Global Optimizers](#global-optimizers) 30 | * [How Does it Work?](#how-does-it-work) 31 | * [Other Implmentations](#other-implementations) 32 | * [How Does This Evaluate the Hessian?](#how-does-this-evaluate-the-hessian) 33 | * [Credits](#credits) 34 | 35 | Quickstart 36 | ---------- 37 | 38 | ### Install 39 | 40 | Dependencies: 41 | 42 | * `pytorch` 43 | * `scipy` 44 | 45 | The following install procedure isn't going to check these are installed. 46 | 47 | This package can be installed with `pip` directly from Github: 48 | 49 | ``` 50 | pip install git+https://github.com/gngdb/pytorch-minimize.git 51 | ``` 52 | 53 | Or by cloning the repository and then installing: 54 | 55 | ``` 56 | git clone https://github.com/gngdb/pytorch-minimize.git 57 | cd pytorch-minimize 58 | python -m pip install . 59 | ``` 60 | 61 | ### Using The Optimizer 62 | 63 | The Optimizer class is `MinimizeWrapper` in `pytorch_minimize.optim`. It 64 | has the same interface as a [PyTorch Optimizer][optimizer], taking 65 | `model.parameters()`, and is configured by passing a dictionary of 66 | arguments, here called `minimizer_args`, that will later be passed to 67 | [`scipy.optimize.minimize`][scipy]: 68 | 69 | ``` 70 | from pytorch_minimize.optim import MinimizeWrapper 71 | minimizer_args = dict(method='CG', options={'disp':True, 'maxiter':100}) 72 | optimizer = MinimizeWrapper(model.parameters(), minimizer_args) 73 | ``` 74 | 75 | The main difference when using this optimizer as opposed to most PyTorch 76 | optimizers is that a [closure][] ([`torch.optim.LBFGS`][torch_lbfgs] also 77 | requires this) must be defined: 78 | 79 | ``` 80 | def closure(): 81 | optimizer.zero_grad() 82 | output = model(input) 83 | loss = loss_fn(output, target) 84 | loss.backward() 85 | return loss 86 | optimizer.step(closure) 87 | ``` 88 | 89 | This optimizer is intended for **deterministic optimisation problems**, 90 | such as [full batch learning problems][batch]. Because of this, 91 | `optimizer.step(closure)` should only need to be called **once**. 92 | 93 | Can `.step(closure)` be called more than once? Technically yes, but it 94 | shouldn't be necessary because multiple steps are run internally up to the 95 | `maxiter` option in `minimizer_args` and multiple calls are not 96 | recommended. Each call to `optimizer.step(closure)` is an independent 97 | evaluation of `scipy.optimize.minimize`, so the internal state of any 98 | optimization algorithm will be interrupted. 99 | 100 | [torch_lbfgs]: https://pytorch.org/docs/stable/optim.html#torch.optim.LBFGS 101 | 102 | 103 | Which Algorithms Are Supported? 104 | ------------------------------- 105 | 106 | Using PyTorch to calculate the Jacobian, the following algorithms are 107 | supported: 108 | 109 | * [Conjugate Gradients][conjugate]: `'CG'` 110 | * [Broyden-Fletcher-Goldfarb-Shanno (BFGS)][bfgs]: `'BFGS'` 111 | * [Limited-memory BFGS][lbfgs]: `'L-BFGS-B'` but **requires double precision**: 112 | * `nn.Module` containing parameters must be cast to double, example: 113 | `model = model.double()` 114 | * [Sequential Least Squares Programming][slsqp]: `'SLSQP'` 115 | * [Truncated Newton][tnc]: `'TNC'` but **also requires double precision** 116 | 117 | The method name string is given on the right, corresponding to the names 118 | used by [scipy.optimize.minimize][scipy]. 119 | 120 | ### Methods that require Hessian evaluations 121 | 122 | **Warning**: this is experimental and probably unpredictable. 123 | 124 | To use the methods that require evaluating the Hessian, a `Closure` object 125 | with the following methods is required (full MNIST example 126 | [here](./mnist/hessian_logistic_regression.py)): 127 | 128 | ``` 129 | class Closure(): 130 | def __init__(self, model): 131 | self.model = model 132 | 133 | @staticmethod 134 | def loss(model): 135 | output = model(data) 136 | return loss_fn(output, target) 137 | 138 | def __call__(self): 139 | optimizer.zero_grad() 140 | loss = self.loss(self.model) 141 | loss.backward() 142 | return loss 143 | closure = Closure(model) 144 | ``` 145 | 146 | The following methods can then be used: 147 | 148 | * [Newton Conjugate Gradient](https://youtu.be/0qUAb94CpOw?t=30m41s): `'Newton-CG'` 149 | * [Newton Conjugate Gradient Trust-Region][trust]: `'trust-ncg'` 150 | * [Krylov Subspace Trust-Region][krylov]: `'trust-krylov'` 151 | * [Nearly Exact Trust-Region][trust]: `'trust-exact'` 152 | * [Constrained Trust-Region][trust]: `'trust-constr'` 153 | 154 | The code contains hacks to make it possible to call 155 | [torch.autograd.functional.hessian][torchhessian] (which is itself only 156 | supplied in PyTorch as beta). 157 | 158 | ### Algorithms without gradients 159 | 160 | If using the `scipy.optimize.minimize` algorithms that don't require 161 | gradients (such as `'Nelder-Mead'`, `'COBYLA'` or `'Powell'`), ensure that 162 | `minimizer_args['jac'] = False` when instancing `MinimizeWrapper`. 163 | 164 | ### Algorithms you can choose but don't work 165 | 166 | Algorithms I tested didn't converge on a toy problem or hit errors. 167 | You can still select them but they may not work: 168 | 169 | * [Dogleg][]: `'dogleg'` 170 | 171 | All the other methods that require gradients converged on a toy problem 172 | that is tested in Travis-CI. 173 | 174 | Global Optimizers 175 | ----------------- 176 | 177 | There are a few [global optimization algorithms in 178 | `scipy.optimize`][global]. The following are supported via their own 179 | wrapper classes: 180 | 181 | * Basin Hopping via `BasinHoppingWrapper` 182 | * Differential Evolution via `DifferentialEvolutionWrapper` 183 | * Simplicial Homology Global Optimization via `SHGOWrapper` 184 | * Dual Annealing via `DualAnnealingWrapper` 185 | 186 | An example of how to use one of these wrappers: 187 | 188 | ``` 189 | from pytorch_minimize.optim import BasinHoppingWrapper 190 | minimizer_args = dict(method='CG', options={'disp':True, 'maxiter':100}) 191 | basinhopping_kwargs = dict(niter=200) 192 | optimizer = BasinHoppingWrapper(model.parameters(), minimizer_args, basinhopping_kwargs) 193 | ``` 194 | 195 | These are also illustrated in [this colab notebook][colab], where the 196 | following plots were generated: 197 | 198 | ![Basin Hopping](images/rastrigin_BasinHoppingWrapper.png) 199 | 200 | ![Differential Evolution](images/rastrigin_DifferentialEvolutionWrapper.png) 201 | 202 | ![Dual Annealing](images/rastrigin_DualAnnealingWrapper.png) 203 | 204 | ![Simplicial Homology Global Optimization](images/rastrigin_SHGOWrapper.png) 205 | 206 | [colab]: https://colab.research.google.com/drive/19hZSxw3ZT3IgWGD9ZOuOYryeJoOGenJU?usp=sharing 207 | [global]: https://docs.scipy.org/doc/scipy/reference/optimize.html#global-optimization 208 | 209 | How Does it Work? 210 | ----------------- 211 | 212 | [`scipy.optimize.minimize`][scipy] is expecting to receive a function `fun` that 213 | returns a scalar and an array of gradients the same size as the initial 214 | input array `x0`. To accomodate this, `MinimizeWrapper` does the following: 215 | 216 | 1. Create a wrapper function that will be passed as `fun` 217 | 2. In that function: 218 | 1. Unpack the umpy array into parameter tensors 219 | 2. Substitute each parameter in place with these tensors 220 | 3. Evaluate `closure`, which will now use these parameter values 221 | 4. Extract the gradients 222 | 5. Pack the gradients back into one 1D Numpy array 223 | 6. Return the loss value and the gradient array 224 | 225 | Then, all that's left is to call `scipy.optimize.minimize` and unpack the 226 | optimal parameters found back into the model. 227 | 228 | This procedure involves unpacking and packing arrays, along with moving 229 | back and forth between Numpy and PyTorch, which may incur some overhead. I 230 | haven't done any profiling to find out if it's likely to be a big problem 231 | and it completes in seconds when optimizing a logistic regression on MNIST 232 | by conjugate gradients. 233 | 234 | ### Other Implementations 235 | 236 | There are a few other projects that incorporate `scipy.optimize` and 237 | pytorch: 238 | 239 | * [This gist][mygist] I wrote in 2018 then forgot about creates an 240 | Objective object to pass into `scipy.optimize` but packs the arrays and 241 | gradients in approximately the same way. 242 | * [botorch's `gen_candidates_scipy`][botorch] wraps 243 | `scipy.optimize.minimize` and uses it to optimize acquisition functions as 244 | part of Bayesian Optimization. 245 | * [autograd-minimize][agmin] wraps the `minimize` function itself, allowing 246 | PyTorch or Tensorflow objectives to be passed directly to a function with 247 | the same interface as `scipy.optimize.minimize`. 248 | 249 | [agmin]: https://github.com/brunorigal/autograd-minimize 250 | [botorch]: https://github.com/pytorch/botorch/blob/main/botorch/generation/gen.py 251 | [mygist]: https://gist.github.com/gngdb/a9f912df362a85b37c730154ef3c294b 252 | 253 | ### Pure PyTorch Minimization 254 | 255 | `rfeinman` has implemented some of the algorithms available in `scipy.optimize` 256 | in a repository with [the same name as this repository][rfeinman]. That 257 | implementation is much more efficient and avoids switching between 258 | 32 and 64 bit floats between Numpy and PyTorch. 259 | 260 | That repository also contains [a wrapper around scipy.optimize.minimize][rfeinmanwrapper]. 261 | 262 | [rfeinman]: https://github.com/rfeinman/pytorch-minimize 263 | [rfeinmanwrapper]: https://github.com/rfeinman/pytorch-minimize/blob/15742bbc17999976e7e3268c9181dadad772698b/torchmin/optim/scipy_minimizer.py#L93-L291 264 | 265 | How Does This Evaluate the Hessian? 266 | ----------------------------------- 267 | 268 | To evaluate the Hessian in PyTorch, 269 | [`torch.autograd.functional.hessian`][torchhessian] takes two arguments: 270 | 271 | * `func`: function that returns a scalar 272 | * `inputs`: variables to take the derivative wrt 273 | 274 | In most PyTorch code, `inputs` is a list of tensors embedded as parameters 275 | in the Modules that make up the `model`. They can't be passed as `inputs` 276 | because we typically don't have a `func` that will take the parameters as 277 | input, build a network from these parameters and then produce a scalar 278 | output. 279 | 280 | From a [discussion on the PyTorch forum][forum] the only way to calculate 281 | the gradient with respect to the parameters would be to monkey patch 282 | `inputs` into the model and then calculate the loss. I wrote a [recursive 283 | monkey patch][monkey] that operates on a [deepcopy][] of the original 284 | `model`. This involves copying everything in the model so it's not very 285 | efficient. 286 | 287 | The function passed to `scipy.optimize.minimize` as `hess` does the 288 | following: 289 | 290 | 1. [`copy.deepcopy`][deepcopy] the entire `model` Module 291 | 2. Input `x` is a Numpy array so cast it to tensor float32 and 292 | `require_grad` 293 | 3. Define a function `f` that unpacks this 1D Numpy array into parameter 294 | tensors 295 | * [Recursively navigate][re_attr] the module object 296 | - Deleting all existing parameters 297 | - Replacing them with unpacked parameters from step 2 298 | * Calculate the loss using the static method stored in the `closure` object 299 | 5. Pass `f` to `torch.autograd.functional.hessian` and `x` then cast the 300 | result back into a Numpy array 301 | 302 | Credits 303 | ------- 304 | 305 | If you use this in your work, please cite this repository using the 306 | following Bibtex entry, along with [Numpy][numpycite], [Scipy][scipycite] 307 | and [PyTorch][pytorchcite]. 308 | 309 | ``` 310 | @misc{gray2021minimize, 311 | author = {Gray, Gavia}, 312 | title = {PyTorch Minimize}, 313 | year = {2021}, 314 | publisher = {GitHub}, 315 | journal = {GitHub repository}, 316 | howpublished = {\url{https://github.com/gngdb/pytorch-minimize}} 317 | } 318 | ``` 319 | 320 | This package was created with [Cookiecutter][] and the 321 | [`audreyr/cookiecutter-pypackage`][audreyr] project template. 322 | 323 | [pytorchcite]: https://github.com/pytorch/pytorch/blob/master/CITATION 324 | [numpycite]: https://www.scipy.org/citing.html#numpy 325 | [scipycite]: https://www.scipy.org/citing.html#scipy-the-library 326 | [re_attr]: https://stackoverflow.com/a/31174427/6937913 327 | [deepcopy]: https://docs.python.org/3/library/copy.html#copy.deepcopy 328 | [monkey]: https://github.com/gngdb/pytorch-minimize/blob/master/pytorch_minimize/optim.py#L106-L122 329 | [forum]: https://discuss.pytorch.org/t/using-autograd-functional-jacobian-hessian-with-respect-to-nn-module-parameters/103994/3 330 | [dogleg]: https://en.wikipedia.org/wiki/Powell%27s_dog_leg_method 331 | [tnc]: https://en.wikipedia.org/wiki/Truncated_Newton_method 332 | [krylov]: https://epubs.siam.org/doi/abs/10.1137/1.9780898719857.ch5 333 | [trust]: https://en.wikipedia.org/wiki/Trust_region 334 | [torchhessian]: https://pytorch.org/docs/stable/autograd.html#torch.autograd.functional.hessian 335 | [slsqp]: https://en.wikipedia.org/wiki/Sequential_quadratic_programming 336 | [conjugate]: https://en.wikipedia.org/wiki/Conjugate_gradient_method 337 | [lbfgs]: https://en.wikipedia.org/wiki/Limited-memory_BFGS 338 | [bfgs]: https://en.wikipedia.org/wiki/Broyden%E2%80%93Fletcher%E2%80%93Goldfarb%E2%80%93Shanno_algorithm 339 | [batch]: https://towardsdatascience.com/batch-mini-batch-stochastic-gradient-descent-7a62ecba642a 340 | [closure]: https://pytorch.org/docs/stable/optim.html#optimizer-step-closure 341 | [optimizer]: https://pytorch.org/docs/stable/optim.html 342 | [scipy]: https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html 343 | [Cookiecutter]: https://github.com/audreyr/cookiecutter 344 | [audreyr]: https://github.com/audreyr/cookiecutter-pypackage 345 | 346 | -------------------------------------------------------------------------------- /images/rastrigin_BasinHoppingWrapper.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gngdb/pytorch-minimize/01ce582f90b49b638cf77c88e75dd3868b5f3f95/images/rastrigin_BasinHoppingWrapper.png -------------------------------------------------------------------------------- /images/rastrigin_DifferentialEvolutionWrapper.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gngdb/pytorch-minimize/01ce582f90b49b638cf77c88e75dd3868b5f3f95/images/rastrigin_DifferentialEvolutionWrapper.png -------------------------------------------------------------------------------- /images/rastrigin_DualAnnealingWrapper.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gngdb/pytorch-minimize/01ce582f90b49b638cf77c88e75dd3868b5f3f95/images/rastrigin_DualAnnealingWrapper.png -------------------------------------------------------------------------------- /images/rastrigin_SHGOWrapper.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gngdb/pytorch-minimize/01ce582f90b49b638cf77c88e75dd3868b5f3f95/images/rastrigin_SHGOWrapper.png -------------------------------------------------------------------------------- /mnist/hessian_logistic_regression.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import argparse 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | from torchvision import datasets, transforms 7 | from pytorch_minimize.optim import MinimizeWrapper 8 | 9 | 10 | class LogReg(nn.Module): 11 | def __init__(self): 12 | super(LogReg, self).__init__() 13 | self.fc = nn.Linear(28*28, 10) 14 | 15 | def forward(self, x): 16 | n = x.size(0) 17 | x = self.fc(x.view(n,-1)) 18 | output = F.log_softmax(x, dim=1) 19 | return output 20 | 21 | 22 | def train(args, model, device, dataset, optimizer): 23 | model.train() 24 | data, target = dataset 25 | data, target = data.to(device), target.to(device) 26 | class Closure(): 27 | def __init__(self, model): 28 | self.model = model 29 | 30 | @staticmethod 31 | def loss(model): 32 | output = model(data) 33 | return F.nll_loss(output, target) 34 | 35 | def __call__(self): 36 | optimizer.zero_grad() 37 | loss = self.loss(self.model) 38 | loss.backward() 39 | self._loss = loss.item() 40 | return loss 41 | closure = Closure(model) 42 | optimizer.step(closure) 43 | print(f"Train Loss: {closure._loss:.2f}") 44 | 45 | def test(model, device, dataset): 46 | model.eval() 47 | test_loss = 0 48 | correct = 0 49 | with torch.no_grad(): 50 | data, target = dataset 51 | data, target = data.to(device), target.to(device) 52 | output = model(data) 53 | test_loss += F.nll_loss(output, target, reduction='mean').item() # sum up batch loss 54 | pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability 55 | correct += pred.eq(target.view_as(pred)).sum().item() 56 | 57 | print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format( 58 | test_loss, correct, len(data), 59 | 100. * correct / len(data))) 60 | 61 | 62 | def main(): 63 | # Training settings 64 | parser = argparse.ArgumentParser(description='Logistic Regression' 65 | ' Example Optimization with Hessian') 66 | parser.add_argument('--method', type=str, default='Newton-CG', 67 | choices=["Newton-CG", "dogleg", "trust-ncg", 68 | "trust-krylov", "trust-exact", "trust-constr"], 69 | help='Which scipy.optimize.minimize method to use.') 70 | parser.add_argument('--no-cuda', action='store_true', default=False, 71 | help='disables CUDA training') 72 | parser.add_argument('--seed', type=int, default=1, metavar='S', 73 | help='random seed (default: 1)') 74 | parser.add_argument('--save-model', action='store_true', default=False, 75 | help='For Saving the current Model') 76 | args = parser.parse_args() 77 | use_cuda = not args.no_cuda and torch.cuda.is_available() 78 | 79 | torch.manual_seed(args.seed) 80 | 81 | device = torch.device("cuda" if use_cuda else "cpu") 82 | 83 | # train_kwargs = {'batch_size': 50000} # all of MNIST 84 | # test_kwargs = {'batch_size': 10000} # all of MNIST 85 | train_kwargs = {'batch_size': 500} 86 | test_kwargs = {'batch_size': 100} 87 | if use_cuda: 88 | cuda_kwargs = {'num_workers': 1, 89 | 'pin_memory': True, 90 | 'shuffle': True} 91 | train_kwargs.update(cuda_kwargs) 92 | test_kwargs.update(cuda_kwargs) 93 | 94 | transform=transforms.Compose([ 95 | transforms.ToTensor(), 96 | transforms.Normalize((0.1307,), (0.3081,)) 97 | ]) 98 | dataset1 = datasets.MNIST('../data', train=True, download=True, 99 | transform=transform) 100 | dataset2 = datasets.MNIST('../data', train=False, 101 | transform=transform) 102 | train_loader = torch.utils.data.DataLoader(dataset1,**train_kwargs) 103 | test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs) 104 | train_dataset = next(iter(train_loader)) 105 | test_dataset = next(iter(test_loader)) 106 | 107 | model = LogReg().to(device) 108 | minimizer_args = dict(method=args.method, options={'disp':True, 'maxiter':100}) 109 | optimizer = MinimizeWrapper(model.parameters(), minimizer_args) 110 | 111 | train(args, model, device, train_dataset, optimizer) 112 | test(model, device, test_dataset) 113 | 114 | if args.save_model: 115 | torch.save(model.state_dict(), "mnist_logreg.pt") 116 | 117 | if __name__ == '__main__': 118 | main() 119 | -------------------------------------------------------------------------------- /mnist/logistic_regression.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import argparse 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | from torchvision import datasets, transforms 7 | from pytorch_minimize.optim import MinimizeWrapper 8 | 9 | 10 | class LogReg(nn.Module): 11 | def __init__(self): 12 | super(LogReg, self).__init__() 13 | self.fc = nn.Linear(28*28, 10) 14 | 15 | def forward(self, x): 16 | n = x.size(0) 17 | x = self.fc(x.view(n,-1)) 18 | output = F.log_softmax(x, dim=1) 19 | return output 20 | 21 | 22 | def train(args, model, device, dataset, optimizer): 23 | model.train() 24 | data, target = dataset 25 | data, target = data.to(device), target.to(device) 26 | class Closure(): 27 | def __call__(self): 28 | optimizer.zero_grad() 29 | output = model(data) 30 | loss = F.nll_loss(output, target) 31 | loss.backward() 32 | self.loss = loss.item() 33 | return loss 34 | closure = Closure() 35 | optimizer.step(closure) 36 | print(f"Train Loss: {closure.loss:.2f}") 37 | 38 | def test(model, device, dataset): 39 | model.eval() 40 | test_loss = 0 41 | correct = 0 42 | with torch.no_grad(): 43 | data, target = dataset 44 | data, target = data.to(device), target.to(device) 45 | output = model(data) 46 | test_loss += F.nll_loss(output, target, reduction='mean').item() # sum up batch loss 47 | pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability 48 | correct += pred.eq(target.view_as(pred)).sum().item() 49 | 50 | print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format( 51 | test_loss, correct, len(data), 52 | 100. * correct / len(data))) 53 | 54 | 55 | def main(): 56 | # Training settings 57 | parser = argparse.ArgumentParser(description='Logistic Regression' 58 | ' Example Optimization without Hessian') 59 | parser.add_argument('--method', type=str, default='CG', 60 | choices=["CG", "BFGS", "L-BFGS-B", "TNC", "SLSQP"], 61 | help='Which scipy.optimize.minimize method to use.') 62 | parser.add_argument('--no-cuda', action='store_true', default=False, 63 | help='disables CUDA training') 64 | parser.add_argument('--seed', type=int, default=1, metavar='S', 65 | help='random seed (default: 1)') 66 | parser.add_argument('--save-model', action='store_true', default=False, 67 | help='For Saving the current Model') 68 | args = parser.parse_args() 69 | use_cuda = not args.no_cuda and torch.cuda.is_available() 70 | 71 | torch.manual_seed(args.seed) 72 | 73 | device = torch.device("cuda" if use_cuda else "cpu") 74 | 75 | # train_kwargs = {'batch_size': 50000} # all of MNIST 76 | # test_kwargs = {'batch_size': 10000} # all of MNIST 77 | train_kwargs = {'batch_size': 500} 78 | test_kwargs = {'batch_size': 100} 79 | if use_cuda: 80 | cuda_kwargs = {'num_workers': 1, 81 | 'pin_memory': True, 82 | 'shuffle': True} 83 | train_kwargs.update(cuda_kwargs) 84 | test_kwargs.update(cuda_kwargs) 85 | 86 | transform=transforms.Compose([ 87 | transforms.ToTensor(), 88 | transforms.Normalize((0.1307,), (0.3081,)) 89 | ]) 90 | dataset1 = datasets.MNIST('../data', train=True, download=True, 91 | transform=transform) 92 | dataset2 = datasets.MNIST('../data', train=False, 93 | transform=transform) 94 | train_loader = torch.utils.data.DataLoader(dataset1,**train_kwargs) 95 | test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs) 96 | train_dataset = next(iter(train_loader)) 97 | test_dataset = next(iter(test_loader)) 98 | 99 | model = LogReg().to(device) 100 | minimizer_args = dict(method=args.method, options={'disp':True, 'maxiter':100}) 101 | optimizer = MinimizeWrapper(model.parameters(), minimizer_args) 102 | 103 | train(args, model, device, train_dataset, optimizer) 104 | test(model, device, test_dataset) 105 | 106 | if args.save_model: 107 | torch.save(model.state_dict(), "mnist_logreg.pt") 108 | 109 | if __name__ == '__main__': 110 | main() 111 | -------------------------------------------------------------------------------- /pytorch_minimize/__init__.py: -------------------------------------------------------------------------------- 1 | """Top-level package for PyTorch Minimize.""" 2 | 3 | __author__ = """Gavin Gray""" 4 | __email__ = 'gngdb.labs@gmail.com' 5 | __version__ = '0.1.0' 6 | -------------------------------------------------------------------------------- /pytorch_minimize/optim.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from scipy.optimize import ( 4 | minimize, 5 | basinhopping, 6 | brute, 7 | differential_evolution, 8 | shgo, 9 | dual_annealing 10 | ) 11 | import functools 12 | from copy import deepcopy 13 | 14 | 15 | # thanks to https://stackoverflow.com/a/31174427/6937913 16 | # recursively set attributes 17 | def rsetattr(obj, attr, val): 18 | pre, _, post = attr.rpartition('.') 19 | return setattr(rgetattr(obj, pre) if pre else obj, post, val) 20 | 21 | def rgetattr(obj, attr, *args): 22 | def _getattr(obj, attr): 23 | return getattr(obj, attr, *args) 24 | return functools.reduce(_getattr, [obj] + attr.split('.')) 25 | 26 | def rdelattr(obj, attr): 27 | pre, _, post = attr.rpartition('.') 28 | return delattr(rgetattr(obj, pre) if pre else obj, post) 29 | 30 | # generic float casting 31 | def floatX(x, np_to, torch_to): 32 | if isinstance(x, np.ndarray): 33 | return x.astype(np_to) 34 | elif isinstance(x, torch.Tensor): 35 | return x.to(torch_to) 36 | elif isinstance(x, float): 37 | return np_to(x) 38 | else: 39 | raise ValueError('Only numpy arrays and torch tensors can be cast to' 40 | f'float, not {x} of type {type(x)}') 41 | 42 | float32 = functools.partial(floatX, np_to=np.float32, torch_to=torch.float32) 43 | float64 = functools.partial(floatX, np_to=np.float64, torch_to=torch.float64) 44 | 45 | 46 | class MinimizeWrapper(torch.optim.Optimizer): 47 | def __init__(self, params, minimizer_args): 48 | assert type(minimizer_args) is dict 49 | if 'jac' not in minimizer_args: 50 | minimizer_args['jac'] = True 51 | assert minimizer_args['jac'] in [True, False], \ 52 | "separate jac function not supported" 53 | params = self.set_floatX(params) 54 | self.jac_methods = ["CG", "BFGS", "L-BFGS-B", "TNC", "SLSQP"] 55 | self.hess_methods = ["Newton-CG", "dogleg", "trust-ncg", 56 | "trust-krylov", "trust-exact", "trust-constr"] 57 | self.gradfree_methods = ["Nelder-Mead", "Powell", "COBYLA"] 58 | method = minimizer_args['method'] 59 | if method in self.jac_methods: 60 | self.use_hess = False 61 | elif method in self.hess_methods: 62 | self.use_hess = True 63 | elif method in self.gradfree_methods: 64 | self.use_hess = False 65 | assert minimizer_args['jac'] == False, \ 66 | "set minimizer_args['jac']=False to use gradient free algorithms" 67 | else: 68 | raise ValueError(f"Method {method} not supported or does not exist") 69 | self.minimizer_args = minimizer_args 70 | if 'options' not in self.minimizer_args: 71 | self.minimizer_args.update({'options':{}}) 72 | if 'maxiter' not in self.minimizer_args['options']: 73 | self.minimizer_args['options'].update({'maxiter':2}) 74 | super(MinimizeWrapper, self).__init__(params, self.minimizer_args) 75 | assert len(self.param_groups) == 1, "only supports one group" 76 | 77 | def set_floatX(self, params): 78 | params = [p for p in params] 79 | if all(p.dtype == torch.float32 for p in params): 80 | self.floatX = float32 81 | elif all(p.dtype == torch.float64 for p in params): 82 | self.floatX = float64 83 | else: 84 | raise ValueError('Only float or double parameters permitted') 85 | return params 86 | 87 | def ravel_pack(self, tensors): 88 | # pack tensors into a numpy array 89 | def numpyify(tensor): 90 | if tensor.device != torch.device('cpu'): 91 | tensor = tensor.cpu() 92 | return tensor.detach().numpy() 93 | x = np.concatenate([numpyify(tensor).ravel() for tensor in tensors], 0) 94 | x = self.floatX(x) 95 | return x 96 | 97 | def np_unravel_unpack(self, x): 98 | x = torch.from_numpy(self.floatX(x)) 99 | return self.unravel_unpack(x) 100 | 101 | def unravel_unpack(self, x): 102 | # unpack parameters from a numpy array 103 | _group = next(iter(self.param_groups)) 104 | _params = _group['params'] # use params as shape reference 105 | i = 0 106 | params = [] 107 | for _p in _params: 108 | j = _p.numel() 109 | p = x[i:i+j].view(_p.size()) 110 | p = p.to(_p.device) 111 | params.append(p) 112 | i += j 113 | return params 114 | 115 | def minimize(self, func, x0, **minimizer_args): 116 | return minimize(func, x0, **minimizer_args) 117 | 118 | @torch.no_grad() 119 | def step(self, closure): 120 | group = next(iter(self.param_groups)) 121 | params = group['params'] 122 | 123 | def torch_wrapper(x, return_grad=False, *args): 124 | # monkey patch set parameter values 125 | _params = self.np_unravel_unpack(x) 126 | for p, _p in zip(params, _params): 127 | p.data = _p 128 | with torch.enable_grad(): 129 | loss = closure() 130 | loss = self.floatX(loss.item()) 131 | if return_grad: 132 | grads = self.ravel_pack([p.grad for p in params]) 133 | return loss, grads 134 | else: 135 | return loss 136 | if self.minimizer_args['jac']: 137 | torch_wrapper = functools.partial(torch_wrapper, return_grad=True) 138 | 139 | if hasattr(closure, 'model') and self.use_hess: 140 | def hess(x): 141 | model = deepcopy(closure.model) 142 | with torch.enable_grad(): 143 | x = self.floatX(torch.tensor(x)).requires_grad_() 144 | def f(x): 145 | _params = self.unravel_unpack(x) 146 | # monkey patch substitute variables 147 | named_params = list(model.named_parameters()) 148 | for _p, (n, _) in zip(_params, named_params): 149 | rdelattr(model, n) 150 | rsetattr(model, n, _p) 151 | return closure.loss(model) 152 | def numpyify(x): 153 | if x.device != torch.device('cpu'): 154 | x = x.cpu() 155 | #return x.numpy().astype(np.float64) 156 | return self.floatX(x.numpy()) 157 | return numpyify(torch.autograd.functional.hessian(f, x)) 158 | else: 159 | hess = None 160 | 161 | # run the minimizer 162 | x0 = self.ravel_pack(params) 163 | self.res = self.minimize(torch_wrapper, x0, hess=hess, **self.minimizer_args) 164 | 165 | # set the final parameters 166 | _params = self.np_unravel_unpack(self.res.x) 167 | for p, _p in zip(params, _params): 168 | p.data = _p 169 | 170 | 171 | class BasinHoppingWrapper(MinimizeWrapper): 172 | def __init__(self, params, minimizer_args, basinhopping_kwargs): 173 | self.basinhopping_kwargs = basinhopping_kwargs 174 | super().__init__(params, minimizer_args) 175 | 176 | def minimize(self, func, x0, **minimizer_args): 177 | return basinhopping(func, x0, minimizer_kwargs=minimizer_args, 178 | **self.basinhopping_kwargs) 179 | 180 | 181 | class DifferentialEvolutionWrapper(MinimizeWrapper): 182 | def __init__(self, params, de_kwargs): 183 | self.minimizer_args = {'jac': False} 184 | self.de_kwargs = de_kwargs 185 | params = self.set_floatX(params) 186 | super(MinimizeWrapper, self).__init__(params, self.minimizer_args) 187 | 188 | def minimize(self, func, x0, hess, **kwargs): 189 | return differential_evolution(func, **self.de_kwargs) 190 | 191 | 192 | class SHGOWrapper(MinimizeWrapper): 193 | def __init__(self, params, minimizer_args, shgo_kwargs): 194 | minimizer_args.update({'jac': False}) 195 | self.shgo_kwargs = shgo_kwargs 196 | super().__init__(params, minimizer_args) 197 | 198 | def minimize(self, func, x0, **minimizer_args): 199 | def jac_fun(x, *args): 200 | return func(x, True)[1] 201 | def obj_fun(x, *args): 202 | return func(x, False) 203 | minimizer_args['jac'] = jac_fun 204 | return shgo(obj_fun, minimizer_kwargs=minimizer_args, 205 | args=[False], 206 | **self.shgo_kwargs) 207 | 208 | 209 | class DualAnnealingWrapper(MinimizeWrapper): 210 | def __init__(self, params, minimizer_args, da_kwargs): 211 | minimizer_args.update({'jac': False}) 212 | self.da_kwargs = da_kwargs 213 | super().__init__(params, minimizer_args) 214 | 215 | def minimize(self, func, x0, **minimizer_args): 216 | jac_fun = lambda x: func(x, True)[1] 217 | minimizer_args['jac'] = jac_fun 218 | return dual_annealing(func, local_search_options=minimizer_args, 219 | args=[False], 220 | **self.da_kwargs) 221 | 222 | -------------------------------------------------------------------------------- /requirements_dev.txt: -------------------------------------------------------------------------------- 1 | pip==19.2.3 2 | bump2version==0.5.11 3 | wheel==0.33.6 4 | watchdog==0.9.0 5 | flake8==3.7.8 6 | tox==3.14.0 7 | coverage==4.5.4 8 | Sphinx==1.8.5 9 | twine==1.14.0 10 | 11 | pytest==4.6.5 12 | pytest-runner==5.1 13 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bumpversion] 2 | current_version = 0.1.0 3 | commit = True 4 | tag = True 5 | 6 | [bumpversion:file:setup.py] 7 | search = version='{current_version}' 8 | replace = version='{new_version}' 9 | 10 | [bumpversion:file:pytorch_minimize/__init__.py] 11 | search = __version__ = '{current_version}' 12 | replace = __version__ = '{new_version}' 13 | 14 | [bdist_wheel] 15 | universal = 1 16 | 17 | [flake8] 18 | exclude = docs 19 | 20 | [aliases] 21 | # Define setup.py command aliases here 22 | test = pytest 23 | 24 | [tool:pytest] 25 | collect_ignore = ['setup.py'] 26 | 27 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """The setup script.""" 4 | 5 | from setuptools import setup, find_packages 6 | 7 | with open('README.md') as readme_file: 8 | readme = readme_file.read() 9 | 10 | requirements = ['scipy'] 11 | 12 | setup_requirements = ['pytest-runner', ] 13 | 14 | test_requirements = ['pytest>=3', ] 15 | 16 | setup( 17 | author="Gavin Gray", 18 | author_email='gngdb.labs@gmail.com', 19 | python_requires='>=3.5', 20 | classifiers=[ 21 | 'Development Status :: 2 - Pre-Alpha', 22 | 'Intended Audience :: Developers', 23 | 'License :: OSI Approved :: MIT License', 24 | 'Natural Language :: English', 25 | 'Programming Language :: Python :: 3', 26 | 'Programming Language :: Python :: 3.5', 27 | 'Programming Language :: Python :: 3.6', 28 | 'Programming Language :: Python :: 3.7', 29 | 'Programming Language :: Python :: 3.8', 30 | ], 31 | description="Use scipy.optimize.minimize as a PyTorch Optimizer.", 32 | install_requires=requirements, 33 | license="MIT license", 34 | long_description=readme + '\n', 35 | include_package_data=True, 36 | keywords='pytorch_minimize', 37 | name='pytorch_minimize', 38 | packages=find_packages(include=['pytorch_minimize', 'pytorch_minimize.*']), 39 | setup_requires=setup_requirements, 40 | test_suite='tests', 41 | tests_require=test_requirements, 42 | url='https://github.com/gngdb/pytorch_minimize', 43 | version='0.2.0', 44 | zip_safe=False, 45 | ) 46 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | """Unit test package for pytorch_minimize.""" 2 | -------------------------------------------------------------------------------- /tests/test_basinhopping.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import argparse 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | from pytorch_minimize.optim import BasinHoppingWrapper 7 | import numpy as np 8 | from sklearn.datasets import make_classification 9 | from sklearn.model_selection import train_test_split 10 | from sklearn.linear_model import LogisticRegression 11 | from sklearn.metrics import log_loss 12 | 13 | 14 | n_samples = 120 15 | n_features = 20 16 | n_classes = 10 17 | 18 | 19 | class LogReg(nn.Module): 20 | def __init__(self): 21 | super(LogReg, self).__init__() 22 | self.fc = nn.Linear(n_features, n_classes) 23 | 24 | def forward(self, x): 25 | n = x.size(0) 26 | x = self.fc(x.view(n,-1)) 27 | output = F.log_softmax(x, dim=1) 28 | return output 29 | 30 | def main(method, disp=True, floatX='float32'): 31 | # only run tests on CPU 32 | device = torch.device('cpu') 33 | 34 | # seed everything 35 | torch.manual_seed(0) 36 | np.random.seed(0) 37 | 38 | # generate classification dataset 39 | X, y = make_classification(n_samples=n_samples, 40 | n_informative=10, 41 | n_features=n_features, 42 | n_classes=n_classes) 43 | # split into training and test 44 | X_train, X_test, y_train, y_test = train_test_split(X, y, 45 | test_size=(2./12.), random_state=0) 46 | def torchify(X, y): 47 | return torch.from_numpy(X).float(), torch.from_numpy(y).long() 48 | train_dataset = torchify(X_train, y_train) 49 | test_dataset = torchify(X_test, y_test) 50 | 51 | # test sklearn 52 | # clf = LogisticRegression(penalty='none').fit(X_train, y_train) 53 | # print(clf.score(X_train, y_train)) 54 | # print(log_loss(y_train, clf.predict_proba(X_train))) 55 | 56 | # instance model 57 | model = LogReg().to(device) 58 | 59 | # instance optimizer 60 | minimizer_args = dict(method=method, options={'disp':True, 'maxiter':10000}) 61 | basinhopping_kwargs = {'niter':4} 62 | if floatX == 'float64': 63 | model = model.double() 64 | optimizer = BasinHoppingWrapper(model.parameters(), minimizer_args, basinhopping_kwargs) 65 | 66 | # train 67 | model.train() 68 | data, target = train_dataset 69 | data, target = data.to(device), target.to(device) 70 | if floatX == 'float64': 71 | data = data.double() 72 | class Closure(): 73 | def __init__(self, model): 74 | self.model = model 75 | 76 | @staticmethod 77 | def loss(model): 78 | output = model(data) 79 | return F.nll_loss(output, target) 80 | 81 | def __call__(self): 82 | optimizer.zero_grad() 83 | loss = self.loss(self.model) 84 | loss.backward() 85 | self._loss = loss.item() 86 | return loss 87 | closure = Closure(model) 88 | optimizer.step(closure) 89 | 90 | # check if train loss is zero (overfitting) 91 | assert abs(closure._loss) < 1e-1, f"Train loss not near zero with {method}: {closure._loss}" 92 | return optimizer.res, closure._loss 93 | 94 | def test_jac_methods(): 95 | # test methods that require only the jacobian and not the hessian 96 | methods = ["CG", "BFGS", "L-BFGS-B", "SLSQP", "TNC"] 97 | failing_combinations = [("L-BFGS-B", "float32"), ("TNC", "float32")] 98 | for method in methods: 99 | for floatX in ["float32", "float64"]: 100 | if (method, floatX) not in failing_combinations: 101 | _ = main(method, disp=False, floatX=floatX) 102 | 103 | def test_hess_methods(): 104 | methods = ["Newton-CG", "trust-ncg", "trust-krylov", "trust-exact", "trust-constr"] 105 | failing_methods = ["dogleg"] 106 | for method in methods: 107 | for floatX in ['float32', 'float64']: 108 | _ = main(method, disp=False, floatX=floatX) 109 | 110 | if __name__ == "__main__": 111 | res, loss = main("L-BFGS-B", floatX='float64') 112 | #res, loss = main("TNC", floatX='float32') 113 | # print(res) 114 | print(f"Train Loss: {loss:.2f}") 115 | 116 | -------------------------------------------------------------------------------- /tests/test_differential_evolution.py: -------------------------------------------------------------------------------- 1 | from pytorch_minimize.optim import DifferentialEvolutionWrapper 2 | 3 | import math 4 | import torch 5 | import torch.nn as nn 6 | 7 | def test_differential_evolution(double=True, disp=False): 8 | def ackley(x): 9 | arg1 = -0.2 * np.sqrt(0.5 * (x[0] ** 2 + x[1] ** 2)) 10 | arg2 = 0.5 * (np.cos(2. * np.pi * x[0]) + np.cos(2. * np.pi * x[1])) 11 | return -20. * np.exp(arg1) - np.exp(arg2) + 20. + np.e 12 | 13 | class Ackley(nn.Module): 14 | def __init__(self): 15 | super().__init__() 16 | self.x = nn.Parameter(torch.zeros(2)) 17 | 18 | def forward(self): 19 | x = self.x 20 | arg1 = -0.2 * torch.sqrt(0.5 * (x[0] ** 2 + x[1] ** 2)) 21 | arg2 = 0.5 * (torch.cos(2. * math.pi * x[0]) + torch.cos(2. * math.pi * x[1])) 22 | return -20. * torch.exp(arg1) - torch.exp(arg2) + 20. + math.e 23 | 24 | bounds = [(-5, 5), (-5, 5)] 25 | de_kwargs = dict(bounds=bounds, disp=disp) 26 | #result = differential_evolution(ackley, bounds, disp=disp) 27 | ackley = Ackley() 28 | if double: 29 | ackley = ackley.double() 30 | optimizer = DifferentialEvolutionWrapper(ackley.parameters(), de_kwargs) 31 | 32 | def closure(): 33 | with torch.no_grad(): 34 | return ackley() 35 | 36 | optimizer.step(closure) 37 | 38 | print(optimizer.res.x, optimizer.res.fun) 39 | 40 | if __name__ == '__main__': 41 | test_differential_evolution(disp=True) 42 | -------------------------------------------------------------------------------- /tests/test_dual_annealing.py: -------------------------------------------------------------------------------- 1 | from pytorch_minimize.optim import DualAnnealingWrapper 2 | 3 | import math 4 | import torch 5 | import torch.nn as nn 6 | 7 | def test_da(double=True, disp=False): 8 | class Ackley(nn.Module): 9 | def __init__(self): 10 | super().__init__() 11 | self.x = nn.Parameter(torch.ones(2)) 12 | 13 | def forward(self): 14 | x = self.x 15 | arg1 = -0.2 * torch.sqrt(0.5 * (x[0] ** 2 + x[1] ** 2)+1e-3) 16 | arg2 = 0.5 * (torch.cos(2. * math.pi * x[0]) + torch.cos(2. * math.pi * x[1])) 17 | return -20. * torch.exp(arg1) - torch.exp(arg2) + 20. + math.e 18 | 19 | bounds = [(-5, 5), (-5, 5)] 20 | da_kwargs = dict(bounds=bounds) 21 | minimizer_args = dict(method='SLSQP', options={'disp':disp, 'maxiter':10000}) 22 | ackley = Ackley() 23 | if double: 24 | ackley = ackley.double() 25 | optimizer = DualAnnealingWrapper(ackley.parameters(), minimizer_args, da_kwargs) 26 | 27 | def closure(): 28 | optimizer.zero_grad() 29 | loss = ackley() 30 | loss.backward() 31 | return loss 32 | 33 | optimizer.step(closure) 34 | 35 | print(optimizer.res.x, optimizer.res.fun) 36 | 37 | if __name__ == '__main__': 38 | test_da(disp=True) 39 | -------------------------------------------------------------------------------- /tests/test_pytorch_minimize.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import argparse 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | from pytorch_minimize.optim import MinimizeWrapper 7 | import numpy as np 8 | from sklearn.datasets import make_classification 9 | from sklearn.model_selection import train_test_split 10 | from sklearn.linear_model import LogisticRegression 11 | from sklearn.metrics import log_loss 12 | 13 | 14 | n_samples = 120 15 | n_features = 20 16 | n_classes = 10 17 | 18 | 19 | class LogReg(nn.Module): 20 | def __init__(self): 21 | super(LogReg, self).__init__() 22 | self.fc = nn.Linear(n_features, n_classes) 23 | 24 | def forward(self, x): 25 | n = x.size(0) 26 | x = self.fc(x.view(n,-1)) 27 | output = F.log_softmax(x, dim=1) 28 | return output 29 | 30 | def main(method, disp=True, floatX='float32', cuda=False): 31 | # only run tests on CPU 32 | if cuda: 33 | device = torch.device('cuda') 34 | else: 35 | device = torch.device('cpu') 36 | 37 | # seed everything 38 | torch.manual_seed(0) 39 | np.random.seed(0) 40 | 41 | # generate classification dataset 42 | X, y = make_classification(n_samples=n_samples, 43 | n_informative=10, 44 | n_features=n_features, 45 | n_classes=n_classes) 46 | # split into training and test 47 | X_train, X_test, y_train, y_test = train_test_split(X, y, 48 | test_size=(2./12.), random_state=0) 49 | def torchify(X, y): 50 | return torch.from_numpy(X).float(), torch.from_numpy(y).long() 51 | train_dataset = torchify(X_train, y_train) 52 | test_dataset = torchify(X_test, y_test) 53 | 54 | # test sklearn 55 | # clf = LogisticRegression(penalty='none').fit(X_train, y_train) 56 | # print(clf.score(X_train, y_train)) 57 | # print(log_loss(y_train, clf.predict_proba(X_train))) 58 | 59 | # instance model 60 | model = LogReg().to(device) 61 | 62 | # instance optimizer 63 | minimizer_args = dict(method=method, options={'disp':True, 'maxiter':10000}) 64 | if floatX == 'float64': 65 | model = model.double() 66 | optimizer = MinimizeWrapper(model.parameters(), minimizer_args) 67 | 68 | # train 69 | model.train() 70 | data, target = train_dataset 71 | data, target = data.to(device), target.to(device) 72 | if floatX == 'float64': 73 | data = data.double() 74 | class Closure(): 75 | def __init__(self, model): 76 | self.model = model 77 | 78 | @staticmethod 79 | def loss(model): 80 | output = model(data) 81 | return F.nll_loss(output, target) 82 | 83 | def __call__(self): 84 | optimizer.zero_grad() 85 | loss = self.loss(self.model) 86 | loss.backward() 87 | self._loss = loss.item() 88 | return loss 89 | closure = Closure(model) 90 | optimizer.step(closure) 91 | 92 | # check if train loss is zero (overfitting) 93 | assert abs(closure._loss) < 1e-1, f"Train loss not near zero with {method}: {closure._loss}" 94 | return optimizer.res, closure._loss 95 | 96 | def test_jac_methods(): 97 | # test methods that require only the jacobian and not the hessian 98 | methods = ["CG", "BFGS", "L-BFGS-B", "SLSQP", "TNC"] 99 | failing_combinations = [("L-BFGS-B", "float32"), ("TNC", "float32")] 100 | for method in methods: 101 | for floatX in ["float32", "float64"]: 102 | if (method, floatX) not in failing_combinations: 103 | _ = main(method, disp=False, floatX=floatX) 104 | 105 | def test_hess_methods(): 106 | methods = ["Newton-CG", "trust-ncg", "trust-krylov", "trust-exact", "trust-constr"] 107 | failing_methods = ["dogleg"] 108 | for method in methods: 109 | for floatX in ['float32', 'float64']: 110 | _ = main(method, disp=False, floatX=floatX) 111 | 112 | def test_gpu(): 113 | # if there's a GPU, run this test (so this won't run on travis) 114 | if torch.cuda.is_available(): 115 | for method in ["CG", "Newtom-CG"]: 116 | main(method, disp=False, floatX='float32', cuda=True) 117 | 118 | if __name__ == "__main__": 119 | res, loss = main("Newton-CG", floatX='float64', cuda=True) 120 | #res, loss = main("TNC", floatX='float32') 121 | # print(res) 122 | print(f"Train Loss: {loss:.2f}") 123 | 124 | -------------------------------------------------------------------------------- /tests/test_shgo.py: -------------------------------------------------------------------------------- 1 | from pytorch_minimize.optim import SHGOWrapper 2 | 3 | import math 4 | import torch 5 | import torch.nn as nn 6 | 7 | def test_shgo(double=True, disp=False): 8 | class Ackley(nn.Module): 9 | def __init__(self): 10 | super().__init__() 11 | self.x = nn.Parameter(torch.ones(2)) 12 | 13 | def forward(self): 14 | x = self.x 15 | arg1 = -0.2 * torch.sqrt(0.5 * (x[0] ** 2 + x[1] ** 2)+1e-3) 16 | arg2 = 0.5 * (torch.cos(2. * math.pi * x[0]) + torch.cos(2. * math.pi * x[1])) 17 | return -20. * torch.exp(arg1) - torch.exp(arg2) + 20. + math.e 18 | 19 | bounds = [(-5, 5), (-5, 5)] 20 | shgo_kwargs = dict(bounds=bounds, options={'disp':disp}) 21 | minimizer_args = dict(method='SLSQP', options={'disp':disp, 'maxiter':10000}) 22 | ackley = Ackley() 23 | if double: 24 | ackley = ackley.double() 25 | optimizer = SHGOWrapper(ackley.parameters(), minimizer_args, shgo_kwargs) 26 | 27 | def closure(): 28 | optimizer.zero_grad() 29 | loss = ackley() 30 | loss.backward() 31 | return loss 32 | 33 | optimizer.step(closure) 34 | 35 | print(optimizer.res.x, optimizer.res.fun) 36 | 37 | if __name__ == '__main__': 38 | test_shgo(disp=True) 39 | -------------------------------------------------------------------------------- /tests/test_shgo_example.py: -------------------------------------------------------------------------------- 1 | from scipy.optimize import rosen, shgo, brute 2 | 3 | if __name__ == '__main__': 4 | bounds = [(0,2), (0, 2), (0, 2), (0, 2), (0, 2)] 5 | 6 | minimizer_args = dict(method='SLSQP', options={'disp':True, 'maxiter':10000}) 7 | shgo_kwargs = dict(bounds=bounds, options={'disp':True}) 8 | 9 | result = shgo(rosen, minimizer_kwargs=minimizer_args, **shgo_kwargs) 10 | ranges=[slice(a,b,0.25) for a,b in bounds] 11 | brute_kwargs = {'ranges': ranges} 12 | #result = brute(rosen, **brute_kwargs) 13 | 14 | # print(result) 15 | print(result.x, result.fun) 16 | -------------------------------------------------------------------------------- /tests/test_unpack_unravel.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from pytorch_minimize.optim import MinimizeWrapper 5 | 6 | def test_index_bug(): 7 | torch.manual_seed(0) 8 | params = {'a': torch.randn(10), 'b': torch.randn(9), 'c': torch.randn(8)} 9 | params = list(params.values()) 10 | minimizer_args = dict(method='CG', options={'disp':True, 'maxiter':100}) 11 | optimizer = MinimizeWrapper(params, minimizer_args) 12 | 13 | _params = optimizer.np_unravel_unpack(optimizer.ravel_pack(params)) 14 | for p, _p in zip(params, _params): 15 | assert torch.abs(p-_p).max() < 1e-5 16 | 17 | if __name__ == '__main__': 18 | test_index_bug() 19 | --------------------------------------------------------------------------------