├── .editorconfig
├── .github
    └── ISSUE_TEMPLATE.md
├── .gitignore
├── .travis.yml
├── AUTHORS.rst
├── LICENSE
├── README.md
├── images
    ├── rastrigin_BasinHoppingWrapper.png
    ├── rastrigin_DifferentialEvolutionWrapper.png
    ├── rastrigin_DualAnnealingWrapper.png
    └── rastrigin_SHGOWrapper.png
├── mnist
    ├── hessian_logistic_regression.py
    └── logistic_regression.py
├── pytorch_minimize
    ├── __init__.py
    └── optim.py
├── requirements_dev.txt
├── setup.cfg
├── setup.py
└── tests
    ├── __init__.py
    ├── test_basinhopping.py
    ├── test_differential_evolution.py
    ├── test_dual_annealing.py
    ├── test_pytorch_minimize.py
    ├── test_shgo.py
    ├── test_shgo_example.py
    └── test_unpack_unravel.py


/.editorconfig:
--------------------------------------------------------------------------------
 1 | # http://editorconfig.org
 2 | 
 3 | root = true
 4 | 
 5 | [*]
 6 | indent_style = space
 7 | indent_size = 4
 8 | trim_trailing_whitespace = true
 9 | insert_final_newline = true
10 | charset = utf-8
11 | end_of_line = lf
12 | 
13 | [*.bat]
14 | indent_style = tab
15 | end_of_line = crlf
16 | 
17 | [LICENSE]
18 | insert_final_newline = false
19 | 
20 | [Makefile]
21 | indent_style = tab
22 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | * PyTorch Minimize version:
 2 | * Python version:
 3 | * Operating System:
 4 | 
 5 | ### Description
 6 | 
 7 | Describe what you were trying to get done.
 8 | Tell us what happened, what went wrong, and what you expected to happen.
 9 | 
10 | ### What I Did
11 | 
12 | ```
13 | Paste the command(s) you ran and the output.
14 | If there was a crash, please include the traceback here.
15 | ```
16 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | 
 58 | # Flask stuff:
 59 | instance/
 60 | .webassets-cache
 61 | 
 62 | # Scrapy stuff:
 63 | .scrapy
 64 | 
 65 | # Sphinx documentation
 66 | docs/_build/
 67 | 
 68 | # PyBuilder
 69 | target/
 70 | 
 71 | # Jupyter Notebook
 72 | .ipynb_checkpoints
 73 | 
 74 | # pyenv
 75 | .python-version
 76 | 
 77 | # celery beat schedule file
 78 | celerybeat-schedule
 79 | 
 80 | # SageMath parsed files
 81 | *.sage.py
 82 | 
 83 | # dotenv
 84 | .env
 85 | 
 86 | # virtualenv
 87 | .venv
 88 | venv/
 89 | ENV/
 90 | 
 91 | # Spyder project settings
 92 | .spyderproject
 93 | .spyproject
 94 | 
 95 | # Rope project settings
 96 | .ropeproject
 97 | 
 98 | # mkdocs documentation
 99 | /site
100 | 
101 | # mypy
102 | .mypy_cache/
103 | 
104 | # IDE settings
105 | .vscode/
106 | 
107 | # If you download MNIST data, don't commit it
108 | data
109 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | # Config file for automatic testing at travis-ci.com
 2 | 
 3 | language: python
 4 | python:
 5 |   - 3.8
 6 |   - 3.7
 7 |   - 3.6
 8 | 
 9 | # Command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors
10 | install: 
11 |   - sudo apt-get update
12 |   # We do this conditionally because it saves us some downloading if the
13 |   # version is the same.
14 |   - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then
15 |       wget https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh -O miniconda.sh;
16 |     else
17 |       wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh;
18 |     fi
19 |   - bash miniconda.sh -b -p $HOME/miniconda
20 |   - source "$HOME/miniconda/etc/profile.d/conda.sh"
21 |   - hash -r
22 |   - conda config --set always_yes yes --set changeps1 no
23 |   - conda update -q conda
24 |   # Useful for debugging any issues with conda
25 |   - conda info -a
26 |   # create environment
27 |   - conda create -q -n test-environment -c pytorch python=$TRAVIS_PYTHON_VERSION pytorch cpuonly scipy scikit-learn pytest
28 |   - conda activate test-environment
29 |   - python -m pip install .
30 | 
31 | # Command to run tests, e.g. python setup.py test
32 | script: pytest
33 | 
34 | # Assuming you have installed the travis-ci CLI tool, after you
35 | # create the Github repo and add it to Travis, run the
36 | # following command to finish PyPI deployment setup:
37 | # $ travis encrypt --add deploy.password
38 | deploy:
39 |   provider: pypi
40 |   distributions: sdist bdist_wheel
41 |   user: gngdb
42 |   password:
43 |     secure: PLEASE_REPLACE_ME
44 |   on:
45 |     tags: true
46 |     repo: gngdb/pytorch_minimize
47 |     python: 3.8
48 | 


--------------------------------------------------------------------------------
/AUTHORS.rst:
--------------------------------------------------------------------------------
 1 | =======
 2 | Credits
 3 | =======
 4 | 
 5 | Development Lead
 6 | ----------------
 7 | 
 8 | * Gavin Gray <gngdb.labs@gmail.com>
 9 | 
10 | Contributors
11 | ------------
12 | 
13 | None yet. Why not be the first?
14 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021, Gavin Gray
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | > [!IMPORTANT]
  3 | > This project is not in active development. [Functional updates to PyTorch would
  4 | > make everything here cleaner and more reliable][func_issue]. Also, I haven't
  5 | > tried it but [`rfeinman`'s re-implementation][rfeinman] of
  6 | > `scipy.optimize` in PyTorch may be what you're looking for as it should have
  7 | > the same functionality as this project in most cases.
  8 | 
  9 | [func_issue]: https://github.com/gngdb/pytorch-minimize/issues/3
 10 | 
 11 | PyTorch Minimize
 12 | ================
 13 | 
 14 | A wrapper for [`scipy.optimize.minimize`][scipy] to make it a PyTorch
 15 | Optimizer implementing Conjugate Gradients, BFGS, l-BFGS, SLSQP, Newton
 16 | Conjugate Gradient, Trust Region methods and others in PyTorch.
 17 | 
 18 | *Warning*: this project is a proof of concept and is not necessarily
 19 | reliable, although [the code](./pytorch_minimize/optim.py) (that's all of
 20 | it) is small enough to be readable.
 21 | 
 22 | * [Quickstart](#quickstart)
 23 |   * [Install](#install)
 24 |   * [Using The Optimizer](#using-the-optimizer)
 25 | * [Which Algorithms Are Supported?](#which-algorithms-are-supported)
 26 |   * [Methods that require Hessian evaluations](#methods-that-require-hessian-evaluations)
 27 |   * [Algorithms without gradients](#algorithms-without-gradients)
 28 |   * [Algorithms you can choose but don't work](#algorithms-you-can-choose-but-dont-work)
 29 | * [Global Optimizers](#global-optimizers)
 30 | * [How Does it Work?](#how-does-it-work)
 31 |   * [Other Implmentations](#other-implementations)
 32 | * [How Does This Evaluate the Hessian?](#how-does-this-evaluate-the-hessian)
 33 | * [Credits](#credits)
 34 | 
 35 | Quickstart
 36 | ----------
 37 | 
 38 | ### Install
 39 | 
 40 | Dependencies:
 41 | 
 42 | * `pytorch`
 43 | * `scipy`
 44 | 
 45 | The following install procedure isn't going to check these are installed.
 46 | 
 47 | This package can be installed with `pip` directly from Github:
 48 | 
 49 | ```
 50 | pip install git+https://github.com/gngdb/pytorch-minimize.git
 51 | ```
 52 | 
 53 | Or by cloning the repository and then installing:
 54 | 
 55 | ```
 56 | git clone https://github.com/gngdb/pytorch-minimize.git
 57 | cd pytorch-minimize
 58 | python -m pip install .
 59 | ```
 60 | 
 61 | ### Using The Optimizer
 62 | 
 63 | The Optimizer class is `MinimizeWrapper` in `pytorch_minimize.optim`.  It
 64 | has the same interface as a [PyTorch Optimizer][optimizer], taking
 65 | `model.parameters()`, and is configured by passing a dictionary of
 66 | arguments, here called `minimizer_args`, that will later be passed to
 67 | [`scipy.optimize.minimize`][scipy]:
 68 | 
 69 | ```
 70 | from pytorch_minimize.optim import MinimizeWrapper
 71 | minimizer_args = dict(method='CG', options={'disp':True, 'maxiter':100})
 72 | optimizer = MinimizeWrapper(model.parameters(), minimizer_args)
 73 | ```
 74 | 
 75 | The main difference when using this optimizer as opposed to most PyTorch
 76 | optimizers is that a [closure][] ([`torch.optim.LBFGS`][torch_lbfgs] also
 77 | requires this) must be defined:
 78 | 
 79 | ```
 80 | def closure():
 81 |     optimizer.zero_grad()
 82 |     output = model(input)
 83 |     loss = loss_fn(output, target)
 84 |     loss.backward()
 85 |     return loss
 86 | optimizer.step(closure)
 87 | ```
 88 | 
 89 | This optimizer is intended for **deterministic optimisation problems**,
 90 | such as [full batch learning problems][batch]. Because of this,
 91 | `optimizer.step(closure)` should only need to be called **once**.
 92 | 
 93 | Can `.step(closure)` be called more than once? Technically yes, but it
 94 | shouldn't be necessary because multiple steps are run internally up to the
 95 | `maxiter` option in `minimizer_args` and multiple calls are not
 96 | recommended. Each call to `optimizer.step(closure)` is an independent
 97 | evaluation of `scipy.optimize.minimize`, so the internal state of any
 98 | optimization algorithm will be interrupted.
 99 | 
100 | [torch_lbfgs]: https://pytorch.org/docs/stable/optim.html#torch.optim.LBFGS
101 | 
102 | 
103 | Which Algorithms Are Supported?
104 | -------------------------------
105 | 
106 | Using PyTorch to calculate the Jacobian, the following algorithms are
107 | supported:
108 | 
109 | * [Conjugate Gradients][conjugate]: `'CG'`
110 | * [Broyden-Fletcher-Goldfarb-Shanno (BFGS)][bfgs]: `'BFGS'`
111 | * [Limited-memory BFGS][lbfgs]: `'L-BFGS-B'` but **requires double precision**:
112 |     * `nn.Module` containing parameters must be cast to double, example:
113 | `model = model.double()`
114 | * [Sequential Least Squares Programming][slsqp]: `'SLSQP'`
115 | * [Truncated Newton][tnc]: `'TNC'` but **also requires double precision**
116 | 
117 | The method name string is given on the right, corresponding to the names
118 | used by [scipy.optimize.minimize][scipy].
119 | 
120 | ### Methods that require Hessian evaluations
121 | 
122 | **Warning**: this is experimental and probably unpredictable.
123 | 
124 | To use the methods that require evaluating the Hessian, a `Closure` object
125 | with the following methods is required (full MNIST example
126 | [here](./mnist/hessian_logistic_regression.py)):
127 | 
128 | ```
129 | class Closure():
130 |     def __init__(self, model):
131 |         self.model = model
132 | 
133 |     @staticmethod
134 |     def loss(model):
135 |         output = model(data)
136 |         return loss_fn(output, target)
137 | 
138 |     def __call__(self):
139 |         optimizer.zero_grad()
140 |         loss = self.loss(self.model)
141 |         loss.backward()
142 |         return loss
143 | closure = Closure(model)
144 | ```
145 | 
146 | The following methods can then be used:
147 | 
148 | * [Newton Conjugate Gradient](https://youtu.be/0qUAb94CpOw?t=30m41s): `'Newton-CG'`
149 | * [Newton Conjugate Gradient Trust-Region][trust]: `'trust-ncg'`
150 | * [Krylov Subspace Trust-Region][krylov]: `'trust-krylov'`
151 | * [Nearly Exact Trust-Region][trust]: `'trust-exact'`
152 | * [Constrained Trust-Region][trust]: `'trust-constr'`
153 | 
154 | The code contains hacks to make it possible to call
155 | [torch.autograd.functional.hessian][torchhessian] (which is itself only
156 | supplied in PyTorch as beta).
157 | 
158 | ### Algorithms without gradients
159 | 
160 | If using the `scipy.optimize.minimize` algorithms that don't require
161 | gradients (such as `'Nelder-Mead'`, `'COBYLA'` or `'Powell'`), ensure that
162 | `minimizer_args['jac'] = False` when instancing `MinimizeWrapper`.
163 | 
164 | ### Algorithms you can choose but don't work
165 | 
166 | Algorithms I tested didn't converge on a toy problem or hit errors.
167 | You can still select them but they may not work:
168 | 
169 | * [Dogleg][]: `'dogleg'`
170 | 
171 | All the other methods that require gradients converged on a toy problem
172 | that is tested in Travis-CI.
173 | 
174 | Global Optimizers
175 | -----------------
176 | 
177 | There are a few [global optimization algorithms in
178 | `scipy.optimize`][global]. The following are supported via their own
179 | wrapper classes:
180 | 
181 | * Basin Hopping via `BasinHoppingWrapper`
182 | * Differential Evolution via `DifferentialEvolutionWrapper`
183 | * Simplicial Homology Global Optimization via `SHGOWrapper`
184 | * Dual Annealing via `DualAnnealingWrapper`
185 | 
186 | An example of how to use one of these wrappers:
187 | 
188 | ```
189 | from pytorch_minimize.optim import BasinHoppingWrapper
190 | minimizer_args = dict(method='CG', options={'disp':True, 'maxiter':100})
191 | basinhopping_kwargs = dict(niter=200)
192 | optimizer = BasinHoppingWrapper(model.parameters(), minimizer_args, basinhopping_kwargs)
193 | ```
194 | 
195 | These are also illustrated in [this colab notebook][colab], where the
196 | following plots were generated:
197 | 
198 | ![Basin Hopping](images/rastrigin_BasinHoppingWrapper.png)
199 | 
200 | ![Differential Evolution](images/rastrigin_DifferentialEvolutionWrapper.png)
201 | 
202 | ![Dual Annealing](images/rastrigin_DualAnnealingWrapper.png)
203 | 
204 | ![Simplicial Homology Global Optimization](images/rastrigin_SHGOWrapper.png)
205 | 
206 | [colab]: https://colab.research.google.com/drive/19hZSxw3ZT3IgWGD9ZOuOYryeJoOGenJU?usp=sharing
207 | [global]: https://docs.scipy.org/doc/scipy/reference/optimize.html#global-optimization
208 | 
209 | How Does it Work?
210 | -----------------
211 | 
212 | [`scipy.optimize.minimize`][scipy] is expecting to receive a function `fun` that
213 | returns a scalar and an array of gradients the same size as the initial
214 | input array `x0`. To accomodate this, `MinimizeWrapper` does the following:
215 | 
216 | 1. Create a wrapper function that will be passed as `fun`
217 | 2. In that function:
218 |     1. Unpack the umpy array into parameter tensors
219 |     2. Substitute each parameter in place with these tensors
220 |     3. Evaluate `closure`, which will now use these parameter values
221 |     4. Extract the gradients
222 |     5. Pack the gradients back into one 1D Numpy array
223 |     6. Return the loss value and the gradient array
224 | 
225 | Then, all that's left is to call `scipy.optimize.minimize` and unpack the
226 | optimal parameters found back into the model.
227 | 
228 | This procedure involves unpacking and packing arrays, along with moving
229 | back and forth between Numpy and PyTorch, which may incur some overhead. I
230 | haven't done any profiling to find out if it's likely to be a big problem
231 | and it completes in seconds when optimizing a logistic regression on MNIST
232 | by conjugate gradients.
233 | 
234 | ### Other Implementations
235 | 
236 | There are a few other projects that incorporate `scipy.optimize` and
237 | pytorch:
238 | 
239 | * [This gist][mygist] I wrote in 2018 then forgot about creates an
240 | Objective object to pass into `scipy.optimize` but packs the arrays and
241 | gradients in approximately the same way.
242 | * [botorch's `gen_candidates_scipy`][botorch] wraps
243 | `scipy.optimize.minimize` and uses it to optimize acquisition functions as
244 | part of Bayesian Optimization.
245 | * [autograd-minimize][agmin] wraps the `minimize` function itself, allowing
246 | PyTorch or Tensorflow objectives to be passed directly to a function with
247 | the same interface as `scipy.optimize.minimize`.
248 | 
249 | [agmin]: https://github.com/brunorigal/autograd-minimize
250 | [botorch]: https://github.com/pytorch/botorch/blob/main/botorch/generation/gen.py
251 | [mygist]: https://gist.github.com/gngdb/a9f912df362a85b37c730154ef3c294b
252 | 
253 | ### Pure PyTorch Minimization
254 | 
255 | `rfeinman` has implemented some of the algorithms available in `scipy.optimize`
256 | in a repository with [the same name as this repository][rfeinman]. That
257 | implementation is much more efficient and avoids switching between
258 | 32 and 64 bit floats between Numpy and PyTorch.
259 | 
260 | That repository also contains [a wrapper around scipy.optimize.minimize][rfeinmanwrapper].
261 | 
262 | [rfeinman]: https://github.com/rfeinman/pytorch-minimize
263 | [rfeinmanwrapper]: https://github.com/rfeinman/pytorch-minimize/blob/15742bbc17999976e7e3268c9181dadad772698b/torchmin/optim/scipy_minimizer.py#L93-L291
264 | 
265 | How Does This Evaluate the Hessian?
266 | -----------------------------------
267 | 
268 | To evaluate the Hessian in PyTorch,
269 | [`torch.autograd.functional.hessian`][torchhessian] takes two arguments:
270 | 
271 | * `func`: function that returns a scalar
272 | * `inputs`: variables to take the derivative wrt
273 | 
274 | In most PyTorch code, `inputs` is a list of tensors embedded as parameters
275 | in the Modules that make up the `model`. They can't be passed as `inputs`
276 | because we typically don't have a `func` that will take the parameters as
277 | input, build a network from these parameters and then produce a scalar
278 | output.
279 | 
280 | From a [discussion on the PyTorch forum][forum] the only way to calculate
281 | the gradient with respect to the parameters would be to monkey patch
282 | `inputs` into the model and then calculate the loss. I wrote a [recursive
283 | monkey patch][monkey] that operates on a [deepcopy][] of the original
284 | `model`.  This involves copying everything in the model so it's not very
285 | efficient.
286 | 
287 | The function passed to `scipy.optimize.minimize` as `hess` does the
288 | following:
289 | 
290 | 1. [`copy.deepcopy`][deepcopy] the entire `model` Module
291 | 2. Input `x` is a Numpy array so cast it to tensor float32 and
292 | `require_grad`
293 | 3. Define a function `f` that unpacks this 1D Numpy array into parameter
294 | tensors
295 |     * [Recursively navigate][re_attr] the module object
296 |         - Deleting all existing parameters
297 |         - Replacing them with unpacked parameters from step 2
298 |     * Calculate the loss using the static method stored in the `closure` object
299 | 5. Pass `f` to `torch.autograd.functional.hessian` and `x` then cast the
300 | result back into a Numpy array
301 | 
302 | Credits
303 | -------
304 | 
305 | If you use this in your work, please cite this repository using the
306 | following Bibtex entry, along with [Numpy][numpycite], [Scipy][scipycite]
307 | and [PyTorch][pytorchcite].
308 | 
309 | ```
310 | @misc{gray2021minimize,
311 |   author = {Gray, Gavia},
312 |   title = {PyTorch Minimize},
313 |   year = {2021},
314 |   publisher = {GitHub},
315 |   journal = {GitHub repository},
316 |   howpublished = {\url{https://github.com/gngdb/pytorch-minimize}}
317 | }
318 | ```
319 | 
320 | This package was created with [Cookiecutter][] and the
321 | [`audreyr/cookiecutter-pypackage`][audreyr] project template.
322 | 
323 | [pytorchcite]: https://github.com/pytorch/pytorch/blob/master/CITATION
324 | [numpycite]: https://www.scipy.org/citing.html#numpy
325 | [scipycite]: https://www.scipy.org/citing.html#scipy-the-library
326 | [re_attr]: https://stackoverflow.com/a/31174427/6937913
327 | [deepcopy]: https://docs.python.org/3/library/copy.html#copy.deepcopy
328 | [monkey]: https://github.com/gngdb/pytorch-minimize/blob/master/pytorch_minimize/optim.py#L106-L122
329 | [forum]: https://discuss.pytorch.org/t/using-autograd-functional-jacobian-hessian-with-respect-to-nn-module-parameters/103994/3
330 | [dogleg]: https://en.wikipedia.org/wiki/Powell%27s_dog_leg_method
331 | [tnc]: https://en.wikipedia.org/wiki/Truncated_Newton_method
332 | [krylov]: https://epubs.siam.org/doi/abs/10.1137/1.9780898719857.ch5
333 | [trust]: https://en.wikipedia.org/wiki/Trust_region
334 | [torchhessian]: https://pytorch.org/docs/stable/autograd.html#torch.autograd.functional.hessian
335 | [slsqp]: https://en.wikipedia.org/wiki/Sequential_quadratic_programming
336 | [conjugate]: https://en.wikipedia.org/wiki/Conjugate_gradient_method
337 | [lbfgs]: https://en.wikipedia.org/wiki/Limited-memory_BFGS
338 | [bfgs]: https://en.wikipedia.org/wiki/Broyden%E2%80%93Fletcher%E2%80%93Goldfarb%E2%80%93Shanno_algorithm
339 | [batch]: https://towardsdatascience.com/batch-mini-batch-stochastic-gradient-descent-7a62ecba642a
340 | [closure]: https://pytorch.org/docs/stable/optim.html#optimizer-step-closure
341 | [optimizer]: https://pytorch.org/docs/stable/optim.html
342 | [scipy]: https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html
343 | [Cookiecutter]: https://github.com/audreyr/cookiecutter
344 | [audreyr]: https://github.com/audreyr/cookiecutter-pypackage
345 | 
346 | 


--------------------------------------------------------------------------------
/images/rastrigin_BasinHoppingWrapper.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gngdb/pytorch-minimize/01ce582f90b49b638cf77c88e75dd3868b5f3f95/images/rastrigin_BasinHoppingWrapper.png


--------------------------------------------------------------------------------
/images/rastrigin_DifferentialEvolutionWrapper.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gngdb/pytorch-minimize/01ce582f90b49b638cf77c88e75dd3868b5f3f95/images/rastrigin_DifferentialEvolutionWrapper.png


--------------------------------------------------------------------------------
/images/rastrigin_DualAnnealingWrapper.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gngdb/pytorch-minimize/01ce582f90b49b638cf77c88e75dd3868b5f3f95/images/rastrigin_DualAnnealingWrapper.png


--------------------------------------------------------------------------------
/images/rastrigin_SHGOWrapper.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gngdb/pytorch-minimize/01ce582f90b49b638cf77c88e75dd3868b5f3f95/images/rastrigin_SHGOWrapper.png


--------------------------------------------------------------------------------
/mnist/hessian_logistic_regression.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import argparse
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | from torchvision import datasets, transforms
  7 | from pytorch_minimize.optim import MinimizeWrapper
  8 | 
  9 | 
 10 | class LogReg(nn.Module):
 11 |     def __init__(self):
 12 |         super(LogReg, self).__init__()
 13 |         self.fc = nn.Linear(28*28, 10)
 14 | 
 15 |     def forward(self, x):
 16 |         n = x.size(0)
 17 |         x = self.fc(x.view(n,-1))
 18 |         output = F.log_softmax(x, dim=1)
 19 |         return output
 20 | 
 21 | 
 22 | def train(args, model, device, dataset, optimizer):
 23 |     model.train()
 24 |     data, target = dataset
 25 |     data, target = data.to(device), target.to(device)
 26 |     class Closure():
 27 |         def __init__(self, model):
 28 |             self.model = model
 29 |         
 30 |         @staticmethod
 31 |         def loss(model):
 32 |             output = model(data)
 33 |             return F.nll_loss(output, target) 
 34 | 
 35 |         def __call__(self):
 36 |             optimizer.zero_grad()
 37 |             loss = self.loss(self.model)
 38 |             loss.backward()
 39 |             self._loss = loss.item()
 40 |             return loss
 41 |     closure = Closure(model)
 42 |     optimizer.step(closure)
 43 |     print(f"Train Loss: {closure._loss:.2f}")
 44 | 
 45 | def test(model, device, dataset):
 46 |     model.eval()
 47 |     test_loss = 0
 48 |     correct = 0
 49 |     with torch.no_grad():
 50 |         data, target =  dataset
 51 |         data, target = data.to(device), target.to(device)
 52 |         output = model(data)
 53 |         test_loss += F.nll_loss(output, target, reduction='mean').item()  # sum up batch loss
 54 |         pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
 55 |         correct += pred.eq(target.view_as(pred)).sum().item()
 56 | 
 57 |     print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
 58 |         test_loss, correct, len(data),
 59 |         100. * correct / len(data)))
 60 | 
 61 | 
 62 | def main():
 63 |     # Training settings
 64 |     parser = argparse.ArgumentParser(description='Logistic Regression'
 65 |             ' Example Optimization with Hessian')
 66 |     parser.add_argument('--method', type=str, default='Newton-CG',
 67 |                         choices=["Newton-CG", "dogleg", "trust-ncg",
 68 |                              "trust-krylov", "trust-exact", "trust-constr"],
 69 |                         help='Which scipy.optimize.minimize method to use.')
 70 |     parser.add_argument('--no-cuda', action='store_true', default=False,
 71 |                         help='disables CUDA training')
 72 |     parser.add_argument('--seed', type=int, default=1, metavar='S',
 73 |                         help='random seed (default: 1)')
 74 |     parser.add_argument('--save-model', action='store_true', default=False,
 75 |                         help='For Saving the current Model')
 76 |     args = parser.parse_args()
 77 |     use_cuda = not args.no_cuda and torch.cuda.is_available()
 78 | 
 79 |     torch.manual_seed(args.seed)
 80 | 
 81 |     device = torch.device("cuda" if use_cuda else "cpu")
 82 | 
 83 |     # train_kwargs = {'batch_size': 50000} # all of MNIST
 84 |     # test_kwargs = {'batch_size': 10000} # all of MNIST
 85 |     train_kwargs = {'batch_size': 500} 
 86 |     test_kwargs = {'batch_size': 100} 
 87 |     if use_cuda:
 88 |         cuda_kwargs = {'num_workers': 1,
 89 |                        'pin_memory': True,
 90 |                        'shuffle': True}
 91 |         train_kwargs.update(cuda_kwargs)
 92 |         test_kwargs.update(cuda_kwargs)
 93 | 
 94 |     transform=transforms.Compose([
 95 |         transforms.ToTensor(),
 96 |         transforms.Normalize((0.1307,), (0.3081,))
 97 |         ])
 98 |     dataset1 = datasets.MNIST('../data', train=True, download=True,
 99 |                        transform=transform)
100 |     dataset2 = datasets.MNIST('../data', train=False,
101 |                        transform=transform)
102 |     train_loader = torch.utils.data.DataLoader(dataset1,**train_kwargs)
103 |     test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs)
104 |     train_dataset = next(iter(train_loader))
105 |     test_dataset = next(iter(test_loader))
106 | 
107 |     model = LogReg().to(device)
108 |     minimizer_args = dict(method=args.method, options={'disp':True, 'maxiter':100})
109 |     optimizer = MinimizeWrapper(model.parameters(), minimizer_args)
110 | 
111 |     train(args, model, device, train_dataset, optimizer)
112 |     test(model, device, test_dataset)
113 | 
114 |     if args.save_model:
115 |         torch.save(model.state_dict(), "mnist_logreg.pt")
116 | 
117 | if __name__ == '__main__':
118 |     main()
119 | 


--------------------------------------------------------------------------------
/mnist/logistic_regression.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import argparse
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | from torchvision import datasets, transforms
  7 | from pytorch_minimize.optim import MinimizeWrapper
  8 | 
  9 | 
 10 | class LogReg(nn.Module):
 11 |     def __init__(self):
 12 |         super(LogReg, self).__init__()
 13 |         self.fc = nn.Linear(28*28, 10)
 14 | 
 15 |     def forward(self, x):
 16 |         n = x.size(0)
 17 |         x = self.fc(x.view(n,-1))
 18 |         output = F.log_softmax(x, dim=1)
 19 |         return output
 20 | 
 21 | 
 22 | def train(args, model, device, dataset, optimizer):
 23 |     model.train()
 24 |     data, target = dataset
 25 |     data, target = data.to(device), target.to(device)
 26 |     class Closure():
 27 |         def __call__(self):
 28 |             optimizer.zero_grad()
 29 |             output = model(data)
 30 |             loss = F.nll_loss(output, target) 
 31 |             loss.backward()
 32 |             self.loss = loss.item()
 33 |             return loss
 34 |     closure = Closure()
 35 |     optimizer.step(closure)
 36 |     print(f"Train Loss: {closure.loss:.2f}")
 37 | 
 38 | def test(model, device, dataset):
 39 |     model.eval()
 40 |     test_loss = 0
 41 |     correct = 0
 42 |     with torch.no_grad():
 43 |         data, target =  dataset
 44 |         data, target = data.to(device), target.to(device)
 45 |         output = model(data)
 46 |         test_loss += F.nll_loss(output, target, reduction='mean').item()  # sum up batch loss
 47 |         pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
 48 |         correct += pred.eq(target.view_as(pred)).sum().item()
 49 | 
 50 |     print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
 51 |         test_loss, correct, len(data),
 52 |         100. * correct / len(data)))
 53 | 
 54 | 
 55 | def main():
 56 |     # Training settings
 57 |     parser = argparse.ArgumentParser(description='Logistic Regression'
 58 |             ' Example Optimization without Hessian')
 59 |     parser.add_argument('--method', type=str, default='CG',
 60 |                         choices=["CG", "BFGS", "L-BFGS-B", "TNC", "SLSQP"],
 61 |                         help='Which scipy.optimize.minimize method to use.')
 62 |     parser.add_argument('--no-cuda', action='store_true', default=False,
 63 |                         help='disables CUDA training')
 64 |     parser.add_argument('--seed', type=int, default=1, metavar='S',
 65 |                         help='random seed (default: 1)')
 66 |     parser.add_argument('--save-model', action='store_true', default=False,
 67 |                         help='For Saving the current Model')
 68 |     args = parser.parse_args()
 69 |     use_cuda = not args.no_cuda and torch.cuda.is_available()
 70 | 
 71 |     torch.manual_seed(args.seed)
 72 | 
 73 |     device = torch.device("cuda" if use_cuda else "cpu")
 74 | 
 75 |     # train_kwargs = {'batch_size': 50000} # all of MNIST
 76 |     # test_kwargs = {'batch_size': 10000} # all of MNIST
 77 |     train_kwargs = {'batch_size': 500} 
 78 |     test_kwargs = {'batch_size': 100} 
 79 |     if use_cuda:
 80 |         cuda_kwargs = {'num_workers': 1,
 81 |                        'pin_memory': True,
 82 |                        'shuffle': True}
 83 |         train_kwargs.update(cuda_kwargs)
 84 |         test_kwargs.update(cuda_kwargs)
 85 | 
 86 |     transform=transforms.Compose([
 87 |         transforms.ToTensor(),
 88 |         transforms.Normalize((0.1307,), (0.3081,))
 89 |         ])
 90 |     dataset1 = datasets.MNIST('../data', train=True, download=True,
 91 |                        transform=transform)
 92 |     dataset2 = datasets.MNIST('../data', train=False,
 93 |                        transform=transform)
 94 |     train_loader = torch.utils.data.DataLoader(dataset1,**train_kwargs)
 95 |     test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs)
 96 |     train_dataset = next(iter(train_loader))
 97 |     test_dataset = next(iter(test_loader))
 98 | 
 99 |     model = LogReg().to(device)
100 |     minimizer_args = dict(method=args.method, options={'disp':True, 'maxiter':100})
101 |     optimizer = MinimizeWrapper(model.parameters(), minimizer_args)
102 | 
103 |     train(args, model, device, train_dataset, optimizer)
104 |     test(model, device, test_dataset)
105 | 
106 |     if args.save_model:
107 |         torch.save(model.state_dict(), "mnist_logreg.pt")
108 | 
109 | if __name__ == '__main__':
110 |     main()
111 | 


--------------------------------------------------------------------------------
/pytorch_minimize/__init__.py:
--------------------------------------------------------------------------------
1 | """Top-level package for PyTorch Minimize."""
2 | 
3 | __author__ = """Gavin Gray"""
4 | __email__ = 'gngdb.labs@gmail.com'
5 | __version__ = '0.1.0'
6 | 


--------------------------------------------------------------------------------
/pytorch_minimize/optim.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | from scipy.optimize import (
  4 |         minimize, 
  5 |         basinhopping, 
  6 |         brute, 
  7 |         differential_evolution, 
  8 |         shgo,
  9 |         dual_annealing
 10 |         )
 11 | import functools
 12 | from copy import deepcopy
 13 | 
 14 | 
 15 | # thanks to https://stackoverflow.com/a/31174427/6937913
 16 | # recursively set attributes
 17 | def rsetattr(obj, attr, val):
 18 |     pre, _, post = attr.rpartition('.')
 19 |     return setattr(rgetattr(obj, pre) if pre else obj, post, val)
 20 | 
 21 | def rgetattr(obj, attr, *args):
 22 |     def _getattr(obj, attr):
 23 |         return getattr(obj, attr, *args)
 24 |     return functools.reduce(_getattr, [obj] + attr.split('.'))
 25 | 
 26 | def rdelattr(obj, attr):
 27 |     pre, _, post = attr.rpartition('.')
 28 |     return delattr(rgetattr(obj, pre) if pre else obj, post)
 29 | 
 30 | # generic float casting
 31 | def floatX(x, np_to, torch_to):
 32 |     if isinstance(x, np.ndarray):
 33 |         return x.astype(np_to)
 34 |     elif isinstance(x, torch.Tensor):
 35 |         return x.to(torch_to)
 36 |     elif isinstance(x, float):
 37 |         return np_to(x)
 38 |     else:
 39 |         raise ValueError('Only numpy arrays and torch tensors can be cast to'
 40 |                 f'float, not {x} of type {type(x)}')
 41 | 
 42 | float32 = functools.partial(floatX, np_to=np.float32, torch_to=torch.float32)
 43 | float64 = functools.partial(floatX, np_to=np.float64, torch_to=torch.float64)
 44 | 
 45 | 
 46 | class MinimizeWrapper(torch.optim.Optimizer):
 47 |     def __init__(self, params, minimizer_args):
 48 |         assert type(minimizer_args) is dict
 49 |         if 'jac' not in minimizer_args:
 50 |             minimizer_args['jac'] = True
 51 |         assert minimizer_args['jac'] in [True, False], \
 52 |                 "separate jac function not supported"
 53 |         params = self.set_floatX(params)
 54 |         self.jac_methods = ["CG", "BFGS", "L-BFGS-B", "TNC", "SLSQP"]
 55 |         self.hess_methods = ["Newton-CG", "dogleg", "trust-ncg",
 56 |                              "trust-krylov", "trust-exact", "trust-constr"]
 57 |         self.gradfree_methods = ["Nelder-Mead", "Powell", "COBYLA"]
 58 |         method = minimizer_args['method']
 59 |         if method in self.jac_methods:
 60 |             self.use_hess = False
 61 |         elif method in self.hess_methods:
 62 |             self.use_hess = True
 63 |         elif method in self.gradfree_methods:
 64 |             self.use_hess = False
 65 |             assert minimizer_args['jac'] == False, \
 66 |                 "set minimizer_args['jac']=False to use gradient free algorithms"
 67 |         else:
 68 |             raise ValueError(f"Method {method} not supported or does not exist")
 69 |         self.minimizer_args = minimizer_args
 70 |         if 'options' not in self.minimizer_args:
 71 |             self.minimizer_args.update({'options':{}})
 72 |         if 'maxiter' not in self.minimizer_args['options']:
 73 |             self.minimizer_args['options'].update({'maxiter':2})
 74 |         super(MinimizeWrapper, self).__init__(params, self.minimizer_args)
 75 |         assert len(self.param_groups) == 1, "only supports one group"
 76 | 
 77 |     def set_floatX(self, params):
 78 |         params = [p for p in params]
 79 |         if all(p.dtype == torch.float32 for p in params):
 80 |             self.floatX = float32
 81 |         elif all(p.dtype == torch.float64 for p in params):
 82 |             self.floatX = float64
 83 |         else:
 84 |             raise ValueError('Only float or double parameters permitted')
 85 |         return params
 86 | 
 87 |     def ravel_pack(self, tensors):
 88 |         # pack tensors into a numpy array
 89 |         def numpyify(tensor):
 90 |             if tensor.device != torch.device('cpu'):
 91 |                 tensor = tensor.cpu()
 92 |             return tensor.detach().numpy()
 93 |         x = np.concatenate([numpyify(tensor).ravel() for tensor in tensors], 0)
 94 |         x = self.floatX(x)
 95 |         return x
 96 | 
 97 |     def np_unravel_unpack(self, x):
 98 |         x = torch.from_numpy(self.floatX(x))
 99 |         return self.unravel_unpack(x)
100 | 
101 |     def unravel_unpack(self, x):
102 |         # unpack parameters from a numpy array
103 |         _group = next(iter(self.param_groups))
104 |         _params = _group['params'] # use params as shape reference
105 |         i = 0
106 |         params = []
107 |         for _p in _params:
108 |             j = _p.numel()
109 |             p = x[i:i+j].view(_p.size())
110 |             p = p.to(_p.device)
111 |             params.append(p)
112 |             i += j
113 |         return params
114 | 
115 |     def minimize(self, func, x0, **minimizer_args):
116 |         return minimize(func, x0, **minimizer_args)
117 | 
118 |     @torch.no_grad()
119 |     def step(self, closure):
120 |         group = next(iter(self.param_groups))
121 |         params = group['params']
122 | 
123 |         def torch_wrapper(x, return_grad=False, *args):
124 |             # monkey patch set parameter values
125 |             _params = self.np_unravel_unpack(x)
126 |             for p, _p in zip(params, _params):
127 |                 p.data = _p
128 |             with torch.enable_grad():
129 |                 loss = closure()
130 |                 loss = self.floatX(loss.item())
131 |             if return_grad:
132 |                 grads = self.ravel_pack([p.grad for p in params])
133 |                 return loss, grads
134 |             else:
135 |                 return loss
136 |         if self.minimizer_args['jac']:
137 |             torch_wrapper = functools.partial(torch_wrapper, return_grad=True)
138 | 
139 |         if hasattr(closure, 'model') and self.use_hess:
140 |             def hess(x):
141 |                 model = deepcopy(closure.model)
142 |                 with torch.enable_grad():
143 |                     x = self.floatX(torch.tensor(x)).requires_grad_()
144 |                     def f(x):
145 |                         _params = self.unravel_unpack(x)
146 |                         # monkey patch substitute variables
147 |                         named_params = list(model.named_parameters())
148 |                         for _p, (n, _) in zip(_params, named_params):
149 |                             rdelattr(model, n)
150 |                             rsetattr(model, n, _p)
151 |                         return closure.loss(model)
152 |                     def numpyify(x):
153 |                         if x.device != torch.device('cpu'):
154 |                             x = x.cpu()
155 |                         #return x.numpy().astype(np.float64)
156 |                         return self.floatX(x.numpy())
157 |                     return numpyify(torch.autograd.functional.hessian(f, x))
158 |         else:
159 |             hess = None
160 | 
161 |         # run the minimizer
162 |         x0 = self.ravel_pack(params)
163 |         self.res = self.minimize(torch_wrapper, x0, hess=hess, **self.minimizer_args)
164 | 
165 |         # set the final parameters
166 |         _params = self.np_unravel_unpack(self.res.x)
167 |         for p, _p in zip(params, _params):
168 |             p.data = _p
169 | 
170 | 
171 | class BasinHoppingWrapper(MinimizeWrapper):
172 |     def __init__(self, params, minimizer_args, basinhopping_kwargs):
173 |         self.basinhopping_kwargs = basinhopping_kwargs
174 |         super().__init__(params, minimizer_args)
175 | 
176 |     def minimize(self, func, x0, **minimizer_args):
177 |         return basinhopping(func, x0, minimizer_kwargs=minimizer_args,
178 |                 **self.basinhopping_kwargs)
179 | 
180 | 
181 | class DifferentialEvolutionWrapper(MinimizeWrapper):
182 |     def __init__(self, params, de_kwargs):
183 |         self.minimizer_args = {'jac': False}
184 |         self.de_kwargs = de_kwargs
185 |         params = self.set_floatX(params)
186 |         super(MinimizeWrapper, self).__init__(params, self.minimizer_args)
187 | 
188 |     def minimize(self, func, x0, hess, **kwargs):
189 |         return differential_evolution(func, **self.de_kwargs)
190 | 
191 | 
192 | class SHGOWrapper(MinimizeWrapper):
193 |     def __init__(self, params, minimizer_args, shgo_kwargs):
194 |         minimizer_args.update({'jac': False})
195 |         self.shgo_kwargs = shgo_kwargs
196 |         super().__init__(params, minimizer_args)
197 | 
198 |     def minimize(self, func, x0, **minimizer_args):
199 |         def jac_fun(x, *args):
200 |             return func(x, True)[1]
201 |         def obj_fun(x, *args):
202 |             return func(x, False)
203 |         minimizer_args['jac'] = jac_fun
204 |         return shgo(obj_fun, minimizer_kwargs=minimizer_args,
205 |                     args=[False],
206 |                     **self.shgo_kwargs)
207 | 
208 | 
209 | class DualAnnealingWrapper(MinimizeWrapper):
210 |     def __init__(self, params, minimizer_args, da_kwargs):
211 |         minimizer_args.update({'jac': False})
212 |         self.da_kwargs = da_kwargs 
213 |         super().__init__(params, minimizer_args)
214 | 
215 |     def minimize(self, func, x0, **minimizer_args):
216 |         jac_fun = lambda x: func(x, True)[1]
217 |         minimizer_args['jac'] = jac_fun
218 |         return dual_annealing(func, local_search_options=minimizer_args,
219 |                 args=[False],
220 |                 **self.da_kwargs)
221 | 
222 | 


--------------------------------------------------------------------------------
/requirements_dev.txt:
--------------------------------------------------------------------------------
 1 | pip==19.2.3
 2 | bump2version==0.5.11
 3 | wheel==0.33.6
 4 | watchdog==0.9.0
 5 | flake8==3.7.8
 6 | tox==3.14.0
 7 | coverage==4.5.4
 8 | Sphinx==1.8.5
 9 | twine==1.14.0
10 | 
11 | pytest==4.6.5
12 | pytest-runner==5.1
13 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [bumpversion]
 2 | current_version = 0.1.0
 3 | commit = True
 4 | tag = True
 5 | 
 6 | [bumpversion:file:setup.py]
 7 | search = version='{current_version}'
 8 | replace = version='{new_version}'
 9 | 
10 | [bumpversion:file:pytorch_minimize/__init__.py]
11 | search = __version__ = '{current_version}'
12 | replace = __version__ = '{new_version}'
13 | 
14 | [bdist_wheel]
15 | universal = 1
16 | 
17 | [flake8]
18 | exclude = docs
19 | 
20 | [aliases]
21 | # Define setup.py command aliases here
22 | test = pytest
23 | 
24 | [tool:pytest]
25 | collect_ignore = ['setup.py']
26 | 
27 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | """The setup script."""
 4 | 
 5 | from setuptools import setup, find_packages
 6 | 
 7 | with open('README.md') as readme_file:
 8 |     readme = readme_file.read()
 9 | 
10 | requirements = ['scipy']
11 | 
12 | setup_requirements = ['pytest-runner', ]
13 | 
14 | test_requirements = ['pytest>=3', ]
15 | 
16 | setup(
17 |     author="Gavin Gray",
18 |     author_email='gngdb.labs@gmail.com',
19 |     python_requires='>=3.5',
20 |     classifiers=[
21 |         'Development Status :: 2 - Pre-Alpha',
22 |         'Intended Audience :: Developers',
23 |         'License :: OSI Approved :: MIT License',
24 |         'Natural Language :: English',
25 |         'Programming Language :: Python :: 3',
26 |         'Programming Language :: Python :: 3.5',
27 |         'Programming Language :: Python :: 3.6',
28 |         'Programming Language :: Python :: 3.7',
29 |         'Programming Language :: Python :: 3.8',
30 |     ],
31 |     description="Use scipy.optimize.minimize as a PyTorch Optimizer.",
32 |     install_requires=requirements,
33 |     license="MIT license",
34 |     long_description=readme + '\n',
35 |     include_package_data=True,
36 |     keywords='pytorch_minimize',
37 |     name='pytorch_minimize',
38 |     packages=find_packages(include=['pytorch_minimize', 'pytorch_minimize.*']),
39 |     setup_requires=setup_requirements,
40 |     test_suite='tests',
41 |     tests_require=test_requirements,
42 |     url='https://github.com/gngdb/pytorch_minimize',
43 |     version='0.2.0',
44 |     zip_safe=False,
45 | )
46 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | """Unit test package for pytorch_minimize."""
2 | 


--------------------------------------------------------------------------------
/tests/test_basinhopping.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import argparse
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | from pytorch_minimize.optim import BasinHoppingWrapper
  7 | import numpy as np
  8 | from sklearn.datasets import make_classification
  9 | from sklearn.model_selection import train_test_split
 10 | from sklearn.linear_model import LogisticRegression
 11 | from sklearn.metrics import log_loss
 12 | 
 13 | 
 14 | n_samples = 120
 15 | n_features = 20
 16 | n_classes = 10
 17 | 
 18 | 
 19 | class LogReg(nn.Module):
 20 |     def __init__(self):
 21 |         super(LogReg, self).__init__()
 22 |         self.fc = nn.Linear(n_features, n_classes)
 23 | 
 24 |     def forward(self, x):
 25 |         n = x.size(0)
 26 |         x = self.fc(x.view(n,-1))
 27 |         output = F.log_softmax(x, dim=1)
 28 |         return output
 29 | 
 30 | def main(method, disp=True, floatX='float32'):
 31 |     # only run tests on CPU
 32 |     device = torch.device('cpu')
 33 | 
 34 |     # seed everything
 35 |     torch.manual_seed(0)
 36 |     np.random.seed(0)
 37 | 
 38 |     # generate classification dataset
 39 |     X, y = make_classification(n_samples=n_samples,
 40 |                                n_informative=10,
 41 |                                n_features=n_features,
 42 |                                n_classes=n_classes)
 43 |     # split into training and test
 44 |     X_train, X_test, y_train, y_test = train_test_split(X, y,
 45 |             test_size=(2./12.), random_state=0)
 46 |     def torchify(X, y):
 47 |         return torch.from_numpy(X).float(), torch.from_numpy(y).long()
 48 |     train_dataset = torchify(X_train, y_train)
 49 |     test_dataset = torchify(X_test, y_test)
 50 | 
 51 |     # test sklearn
 52 |     # clf = LogisticRegression(penalty='none').fit(X_train, y_train)
 53 |     # print(clf.score(X_train, y_train))
 54 |     # print(log_loss(y_train, clf.predict_proba(X_train)))
 55 | 
 56 |     # instance model
 57 |     model = LogReg().to(device)
 58 | 
 59 |     # instance optimizer
 60 |     minimizer_args = dict(method=method, options={'disp':True, 'maxiter':10000})
 61 |     basinhopping_kwargs = {'niter':4}
 62 |     if floatX == 'float64':
 63 |         model = model.double()
 64 |     optimizer = BasinHoppingWrapper(model.parameters(), minimizer_args, basinhopping_kwargs)
 65 | 
 66 |     # train
 67 |     model.train()
 68 |     data, target = train_dataset
 69 |     data, target = data.to(device), target.to(device)
 70 |     if floatX == 'float64':
 71 |         data = data.double()
 72 |     class Closure():
 73 |         def __init__(self, model):
 74 |             self.model = model
 75 |         
 76 |         @staticmethod
 77 |         def loss(model):
 78 |             output = model(data)
 79 |             return F.nll_loss(output, target) 
 80 | 
 81 |         def __call__(self):
 82 |             optimizer.zero_grad()
 83 |             loss = self.loss(self.model)
 84 |             loss.backward()
 85 |             self._loss = loss.item()
 86 |             return loss
 87 |     closure = Closure(model)
 88 |     optimizer.step(closure)
 89 | 
 90 |     # check if train loss is zero (overfitting)
 91 |     assert abs(closure._loss) < 1e-1, f"Train loss not near zero with {method}: {closure._loss}"
 92 |     return optimizer.res, closure._loss
 93 | 
 94 | def test_jac_methods():
 95 |     # test methods that require only the jacobian and not the hessian
 96 |     methods = ["CG", "BFGS", "L-BFGS-B", "SLSQP", "TNC"]
 97 |     failing_combinations = [("L-BFGS-B", "float32"), ("TNC", "float32")]
 98 |     for method in methods:
 99 |         for floatX in ["float32", "float64"]:
100 |             if (method, floatX) not in failing_combinations:
101 |                 _ = main(method, disp=False, floatX=floatX)
102 | 
103 | def test_hess_methods():
104 |     methods = ["Newton-CG", "trust-ncg", "trust-krylov", "trust-exact", "trust-constr"]
105 |     failing_methods = ["dogleg"]
106 |     for method in methods:
107 |         for floatX in ['float32', 'float64']:
108 |             _ = main(method, disp=False, floatX=floatX)
109 | 
110 | if __name__ == "__main__":
111 |     res, loss = main("L-BFGS-B", floatX='float64')
112 |     #res, loss = main("TNC", floatX='float32')
113 |     # print(res)
114 |     print(f"Train Loss: {loss:.2f}")
115 | 
116 | 


--------------------------------------------------------------------------------
/tests/test_differential_evolution.py:
--------------------------------------------------------------------------------
 1 | from pytorch_minimize.optim import DifferentialEvolutionWrapper
 2 | 
 3 | import math
 4 | import torch
 5 | import torch.nn as nn
 6 | 
 7 | def test_differential_evolution(double=True, disp=False):
 8 |     def ackley(x):
 9 |         arg1 = -0.2 * np.sqrt(0.5 * (x[0] ** 2 + x[1] ** 2))
10 |         arg2 = 0.5 * (np.cos(2. * np.pi * x[0]) + np.cos(2. * np.pi * x[1]))
11 |         return -20. * np.exp(arg1) - np.exp(arg2) + 20. + np.e
12 | 
13 |     class Ackley(nn.Module):
14 |         def __init__(self):
15 |             super().__init__()
16 |             self.x = nn.Parameter(torch.zeros(2))
17 | 
18 |         def forward(self):
19 |             x = self.x
20 |             arg1 = -0.2 * torch.sqrt(0.5 * (x[0] ** 2 + x[1] ** 2))
21 |             arg2 = 0.5 * (torch.cos(2. * math.pi * x[0]) + torch.cos(2. * math.pi * x[1]))
22 |             return -20. * torch.exp(arg1) - torch.exp(arg2) + 20. + math.e
23 | 
24 |     bounds = [(-5, 5), (-5, 5)]
25 |     de_kwargs = dict(bounds=bounds, disp=disp)
26 |     #result = differential_evolution(ackley, bounds, disp=disp)
27 |     ackley = Ackley()
28 |     if double:
29 |         ackley = ackley.double()
30 |     optimizer = DifferentialEvolutionWrapper(ackley.parameters(), de_kwargs)
31 | 
32 |     def closure():
33 |         with torch.no_grad():
34 |             return ackley()
35 | 
36 |     optimizer.step(closure)
37 | 
38 |     print(optimizer.res.x, optimizer.res.fun)
39 | 
40 | if __name__ == '__main__':
41 |     test_differential_evolution(disp=True)
42 | 


--------------------------------------------------------------------------------
/tests/test_dual_annealing.py:
--------------------------------------------------------------------------------
 1 | from pytorch_minimize.optim import DualAnnealingWrapper
 2 | 
 3 | import math
 4 | import torch
 5 | import torch.nn as nn
 6 | 
 7 | def test_da(double=True, disp=False):
 8 |     class Ackley(nn.Module):
 9 |         def __init__(self):
10 |             super().__init__()
11 |             self.x = nn.Parameter(torch.ones(2))
12 | 
13 |         def forward(self):
14 |             x = self.x
15 |             arg1 = -0.2 * torch.sqrt(0.5 * (x[0] ** 2 + x[1] ** 2)+1e-3)
16 |             arg2 = 0.5 * (torch.cos(2. * math.pi * x[0]) + torch.cos(2. * math.pi * x[1]))
17 |             return -20. * torch.exp(arg1) - torch.exp(arg2) + 20. + math.e
18 | 
19 |     bounds = [(-5, 5), (-5, 5)]
20 |     da_kwargs = dict(bounds=bounds)
21 |     minimizer_args = dict(method='SLSQP', options={'disp':disp, 'maxiter':10000})
22 |     ackley = Ackley()
23 |     if double:
24 |         ackley = ackley.double()
25 |     optimizer = DualAnnealingWrapper(ackley.parameters(), minimizer_args, da_kwargs)
26 | 
27 |     def closure():
28 |         optimizer.zero_grad()
29 |         loss = ackley()
30 |         loss.backward()
31 |         return loss
32 | 
33 |     optimizer.step(closure)
34 | 
35 |     print(optimizer.res.x, optimizer.res.fun)
36 | 
37 | if __name__ == '__main__':
38 |     test_da(disp=True)
39 | 


--------------------------------------------------------------------------------
/tests/test_pytorch_minimize.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import argparse
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | from pytorch_minimize.optim import MinimizeWrapper
  7 | import numpy as np
  8 | from sklearn.datasets import make_classification
  9 | from sklearn.model_selection import train_test_split
 10 | from sklearn.linear_model import LogisticRegression
 11 | from sklearn.metrics import log_loss
 12 | 
 13 | 
 14 | n_samples = 120
 15 | n_features = 20
 16 | n_classes = 10
 17 | 
 18 | 
 19 | class LogReg(nn.Module):
 20 |     def __init__(self):
 21 |         super(LogReg, self).__init__()
 22 |         self.fc = nn.Linear(n_features, n_classes)
 23 | 
 24 |     def forward(self, x):
 25 |         n = x.size(0)
 26 |         x = self.fc(x.view(n,-1))
 27 |         output = F.log_softmax(x, dim=1)
 28 |         return output
 29 | 
 30 | def main(method, disp=True, floatX='float32', cuda=False):
 31 |     # only run tests on CPU
 32 |     if cuda:
 33 |         device = torch.device('cuda')
 34 |     else:
 35 |         device = torch.device('cpu')
 36 | 
 37 |     # seed everything
 38 |     torch.manual_seed(0)
 39 |     np.random.seed(0)
 40 | 
 41 |     # generate classification dataset
 42 |     X, y = make_classification(n_samples=n_samples,
 43 |                                n_informative=10,
 44 |                                n_features=n_features,
 45 |                                n_classes=n_classes)
 46 |     # split into training and test
 47 |     X_train, X_test, y_train, y_test = train_test_split(X, y,
 48 |             test_size=(2./12.), random_state=0)
 49 |     def torchify(X, y):
 50 |         return torch.from_numpy(X).float(), torch.from_numpy(y).long()
 51 |     train_dataset = torchify(X_train, y_train)
 52 |     test_dataset = torchify(X_test, y_test)
 53 | 
 54 |     # test sklearn
 55 |     # clf = LogisticRegression(penalty='none').fit(X_train, y_train)
 56 |     # print(clf.score(X_train, y_train))
 57 |     # print(log_loss(y_train, clf.predict_proba(X_train)))
 58 | 
 59 |     # instance model
 60 |     model = LogReg().to(device)
 61 | 
 62 |     # instance optimizer
 63 |     minimizer_args = dict(method=method, options={'disp':True, 'maxiter':10000})
 64 |     if floatX == 'float64':
 65 |         model = model.double()
 66 |     optimizer = MinimizeWrapper(model.parameters(), minimizer_args)
 67 | 
 68 |     # train
 69 |     model.train()
 70 |     data, target = train_dataset
 71 |     data, target = data.to(device), target.to(device)
 72 |     if floatX == 'float64':
 73 |         data = data.double()
 74 |     class Closure():
 75 |         def __init__(self, model):
 76 |             self.model = model
 77 |         
 78 |         @staticmethod
 79 |         def loss(model):
 80 |             output = model(data)
 81 |             return F.nll_loss(output, target) 
 82 | 
 83 |         def __call__(self):
 84 |             optimizer.zero_grad()
 85 |             loss = self.loss(self.model)
 86 |             loss.backward()
 87 |             self._loss = loss.item()
 88 |             return loss
 89 |     closure = Closure(model)
 90 |     optimizer.step(closure)
 91 | 
 92 |     # check if train loss is zero (overfitting)
 93 |     assert abs(closure._loss) < 1e-1, f"Train loss not near zero with {method}: {closure._loss}"
 94 |     return optimizer.res, closure._loss
 95 | 
 96 | def test_jac_methods():
 97 |     # test methods that require only the jacobian and not the hessian
 98 |     methods = ["CG", "BFGS", "L-BFGS-B", "SLSQP", "TNC"]
 99 |     failing_combinations = [("L-BFGS-B", "float32"), ("TNC", "float32")]
100 |     for method in methods:
101 |         for floatX in ["float32", "float64"]:
102 |             if (method, floatX) not in failing_combinations:
103 |                 _ = main(method, disp=False, floatX=floatX)
104 | 
105 | def test_hess_methods():
106 |     methods = ["Newton-CG", "trust-ncg", "trust-krylov", "trust-exact", "trust-constr"]
107 |     failing_methods = ["dogleg"]
108 |     for method in methods:
109 |         for floatX in ['float32', 'float64']:
110 |             _ = main(method, disp=False, floatX=floatX)
111 | 
112 | def test_gpu():
113 |     # if there's a GPU, run this test (so this won't run on travis)
114 |     if torch.cuda.is_available():
115 |         for method in ["CG", "Newtom-CG"]:
116 |             main(method, disp=False, floatX='float32', cuda=True)
117 | 
118 | if __name__ == "__main__":
119 |     res, loss = main("Newton-CG", floatX='float64', cuda=True)
120 |     #res, loss = main("TNC", floatX='float32')
121 |     # print(res)
122 |     print(f"Train Loss: {loss:.2f}")
123 | 
124 | 


--------------------------------------------------------------------------------
/tests/test_shgo.py:
--------------------------------------------------------------------------------
 1 | from pytorch_minimize.optim import SHGOWrapper
 2 | 
 3 | import math
 4 | import torch
 5 | import torch.nn as nn
 6 | 
 7 | def test_shgo(double=True, disp=False):
 8 |     class Ackley(nn.Module):
 9 |         def __init__(self):
10 |             super().__init__()
11 |             self.x = nn.Parameter(torch.ones(2))
12 | 
13 |         def forward(self):
14 |             x = self.x
15 |             arg1 = -0.2 * torch.sqrt(0.5 * (x[0] ** 2 + x[1] ** 2)+1e-3)
16 |             arg2 = 0.5 * (torch.cos(2. * math.pi * x[0]) + torch.cos(2. * math.pi * x[1]))
17 |             return -20. * torch.exp(arg1) - torch.exp(arg2) + 20. + math.e
18 | 
19 |     bounds = [(-5, 5), (-5, 5)]
20 |     shgo_kwargs = dict(bounds=bounds, options={'disp':disp})
21 |     minimizer_args = dict(method='SLSQP', options={'disp':disp, 'maxiter':10000})
22 |     ackley = Ackley()
23 |     if double:
24 |         ackley = ackley.double()
25 |     optimizer = SHGOWrapper(ackley.parameters(), minimizer_args, shgo_kwargs)
26 | 
27 |     def closure():
28 |         optimizer.zero_grad()
29 |         loss = ackley()
30 |         loss.backward()
31 |         return loss
32 | 
33 |     optimizer.step(closure)
34 | 
35 |     print(optimizer.res.x, optimizer.res.fun)
36 | 
37 | if __name__ == '__main__':
38 |     test_shgo(disp=True)
39 | 


--------------------------------------------------------------------------------
/tests/test_shgo_example.py:
--------------------------------------------------------------------------------
 1 | from scipy.optimize import rosen, shgo, brute
 2 | 
 3 | if __name__ == '__main__':
 4 |     bounds = [(0,2), (0, 2), (0, 2), (0, 2), (0, 2)]
 5 |     
 6 |     minimizer_args = dict(method='SLSQP', options={'disp':True, 'maxiter':10000})
 7 |     shgo_kwargs = dict(bounds=bounds, options={'disp':True})
 8 | 
 9 |     result = shgo(rosen, minimizer_kwargs=minimizer_args, **shgo_kwargs)
10 |     ranges=[slice(a,b,0.25) for a,b in bounds]
11 |     brute_kwargs = {'ranges': ranges}
12 |     #result = brute(rosen, **brute_kwargs)
13 | 
14 |     # print(result)
15 |     print(result.x, result.fun)
16 | 


--------------------------------------------------------------------------------
/tests/test_unpack_unravel.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | from pytorch_minimize.optim import MinimizeWrapper
 5 | 
 6 | def test_index_bug():
 7 |     torch.manual_seed(0)
 8 |     params = {'a': torch.randn(10), 'b': torch.randn(9), 'c': torch.randn(8)}
 9 |     params = list(params.values())
10 |     minimizer_args = dict(method='CG', options={'disp':True, 'maxiter':100})
11 |     optimizer = MinimizeWrapper(params, minimizer_args)
12 | 
13 |     _params = optimizer.np_unravel_unpack(optimizer.ravel_pack(params))
14 |     for p, _p in zip(params, _params):
15 |         assert torch.abs(p-_p).max() < 1e-5
16 | 
17 | if __name__ == '__main__':
18 |     test_index_bug()
19 | 


--------------------------------------------------------------------------------