├── CITATION.cff
├── hessianlearn
    ├── test
    │   ├── __init__.py
    │   ├── test_randomizedEigensolver.py
    │   ├── test_rangeFinders.py
    │   ├── test_HessianlearnModel.py
    │   └── test_varianceBasedNystrom.py
    ├── version.py
    ├── model
    │   └── __init__.py
    ├── utilities
    │   ├── plotting.py
    │   ├── __init__.py
    │   ├── parameterList.py
    │   └── finiteDifferenceCheck.py
    ├── __init__.py
    ├── data
    │   ├── __init__.py
    │   └── lfw.py
    ├── problem
    │   ├── __init__.py
    │   ├── preconditioner.py
    │   ├── regularization.py
    │   └── hessian.py
    └── algorithms
    │   ├── __init__.py
    │   ├── globalization.py
    │   ├── gradientDescent.py
    │   ├── adam.py
    │   ├── optimizer.py
    │   ├── gmresSolver.py
    │   ├── inexactNewtonMINRES.py
    │   ├── inexactNewtonGMRES.py
    │   ├── minresSolver.py
    │   ├── rangeFinders.py
    │   ├── inexactNewtonCG.py
    │   ├── randomizedEigensolver.py
    │   ├── varianceBasedNystrom.py
    │   ├── lowRankSaddleFreeNewton.py
    │   └── cgSolver.py
├── ci
    └── run_tests.py
├── .travis.yml
├── .gitignore
├── applications
    ├── README.md
    ├── mnist
    │   ├── mnist_autoencoder.py
    │   └── mnist_vae.py
    └── transfer_learning
    │   ├── imagenet_cifar10_classification_evaluate_test.py
    │   ├── imagenet_cifar100_classification_evaluate_test.py
    │   ├── imagenet_cifar10_classification.py
    │   └── imagenet_cifar100_classification.py
├── LICENSE.md
└── README.md


/CITATION.cff:
--------------------------------------------------------------------------------
 1 | cff-version: 1.2.0
 2 | message: "If you use this software, please cite it as below."
 3 | authors:
 4 | - family-names: "O'Leary-Roseberry"
 5 |   given-names: "Thomas"
 6 |   orcid: "https://orcid.org/0000-0002-8938-7074"
 7 | title: "hessianlearn: Stochastic Nonconvex Optimization in TensorFlow and keras"
 8 | version: 0.1.0
 9 | doi: 10.5281/zenodo.4608644
10 | date-released: 2021-03-16
11 | url: "https://github.com/tomoleary/hessianlearn"


--------------------------------------------------------------------------------
/hessianlearn/test/__init__.py:
--------------------------------------------------------------------------------
 1 | # This file is part of the hessianlearn package
 2 | #
 3 | # hessianlearn is free software: you can redistribute it and/or modify
 4 | # it under the terms of the GNU Lesser General Public License as published by
 5 | # the Free Software Foundation, either version 3 of the License, or any later version.
 6 | #
 7 | # hessianlearn is distributed in the hope that it will be useful,
 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 | # GNU Lesser General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU Lesser General Public License
13 | # If not, see <http://www.gnu.org/licenses/>.
14 | #
15 | # Author: Tom O'Leary-Roseberry
16 | # Contact: tom.olearyroseberry@utexas.edu
17 | 


--------------------------------------------------------------------------------
/hessianlearn/version.py:
--------------------------------------------------------------------------------
 1 | # This file is part of the hessianlearn package
 2 | #
 3 | # hessianlearn is free software: you can redistribute it and/or modify
 4 | # it under the terms of the GNU Lesser General Public License as published by
 5 | # the Free Software Foundation, either version 3 of the License, or any later version.
 6 | #
 7 | # hessianlearn is distributed in the hope that it will be useful,
 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 | # GNU Lesser General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU Lesser General Public License
13 | # If not, see <http://www.gnu.org/licenses/>.
14 | #
15 | # Author: Tom O'Leary-Roseberry
16 | # Contact: tom.olearyroseberry@utexas.edu
17 | 
18 | version_info = (0, 2, 0)
19 | __version__ = '.'.join([str(x) for x in version_info])


--------------------------------------------------------------------------------
/hessianlearn/model/__init__.py:
--------------------------------------------------------------------------------
 1 | # This file is part of the hessianlearn package
 2 | #
 3 | # hessianlearn is free software: you can redistribute it and/or modify
 4 | # it under the terms of the GNU Lesser General Public License as published by
 5 | # the Free Software Foundation, either version 3 of the License, or any later version.
 6 | #
 7 | # hessianlearn is distributed in the hope that it will be useful,
 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 | # GNU Lesser General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU Lesser General Public License
13 | # If not, see <http://www.gnu.org/licenses/>.
14 | #
15 | # Author: Tom O'Leary-Roseberry
16 | # Contact: tom.olearyroseberry@utexas.edu
17 | 
18 | from .model import HessianlearnModel, HessianlearnModelSettings
19 | 
20 | from .kerasModelWrapper import KerasModelWrapper, KerasModelWrapperSettings
21 | 


--------------------------------------------------------------------------------
/hessianlearn/utilities/plotting.py:
--------------------------------------------------------------------------------
 1 | # This file is part of the hessianlearn package
 2 | #
 3 | # hessianlearn is free software: you can redistribute it and/or modify
 4 | # it under the terms of the GNU Lesser General Public License as published by
 5 | # the Free Software Foundation, either version 3 of the License, or any later version.
 6 | #
 7 | # hessianlearn is distributed in the hope that it will be useful,
 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 | # GNU Lesser General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU Lesser General Public License
13 | # If not, see <http://www.gnu.org/licenses/>.
14 | #
15 | # Author: Tom O'Leary-Roseberry
16 | # Contact: tom.olearyroseberry@utexas.edu
17 | 
18 | from __future__ import absolute_import, division, print_function
19 | from abc import ABC, abstractmethod
20 | 
21 | 
22 | 
23 | 
24 | 
25 | 
26 | 
27 | 


--------------------------------------------------------------------------------
/hessianlearn/utilities/__init__.py:
--------------------------------------------------------------------------------
 1 | # This file is part of the hessianlearn package
 2 | #
 3 | # hessianlearn is free software: you can redistribute it and/or modify
 4 | # it under the terms of the GNU Lesser General Public License as published by
 5 | # the Free Software Foundation, either version 3 of the License, or any later version.
 6 | #
 7 | # hessianlearn is distributed in the hope that it will be useful,
 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 | # GNU Lesser General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU Lesser General Public License
13 | # If not, see <http://www.gnu.org/licenses/>.
14 | #
15 | # Author: Tom O'Leary-Roseberry
16 | # Contact: tom.olearyroseberry@utexas.edu
17 | 
18 | from __future__ import absolute_import, division, print_function
19 | 
20 | from .parameterList import ParameterList
21 | 
22 | from .finiteDifferenceCheck import finite_difference_check


--------------------------------------------------------------------------------
/hessianlearn/__init__.py:
--------------------------------------------------------------------------------
 1 | # This file is part of the hessianlearn package
 2 | #
 3 | # hessianlearn is free software: you can redistribute it and/or modify
 4 | # it under the terms of the GNU Lesser General Public License as published by
 5 | # the Free Software Foundation, either version 3 of the License, or any later version.
 6 | #
 7 | # hessianlearn is distributed in the hope that it will be useful,
 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 | # GNU Lesser General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU Lesser General Public License
13 | # If not, see <http://www.gnu.org/licenses/>.
14 | #
15 | # Author: Tom O'Leary-Roseberry
16 | # Contact: tom.olearyroseberry@utexas.edu
17 | 
18 | from __future__ import absolute_import, division, print_function
19 | 
20 | from .algorithms import *
21 | 
22 | from .problem import *
23 | 
24 | from .model import *
25 | 
26 | from .data import *
27 | 
28 | from .utilities import *


--------------------------------------------------------------------------------
/hessianlearn/data/__init__.py:
--------------------------------------------------------------------------------
 1 | # This file is part of the hessianlearn package
 2 | #
 3 | # hessianlearn is free software: you can redistribute it and/or modify
 4 | # it under the terms of the GNU Lesser General Public License as published by
 5 | # the Free Software Foundation, either version 3 of the License, or any later version.
 6 | #
 7 | # hessianlearn is distributed in the hope that it will be useful,
 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 | # GNU Lesser General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU Lesser General Public License
13 | # If not, see <http://www.gnu.org/licenses/>.
14 | #
15 | # Author: Tom O'Leary-Roseberry
16 | # Contact: tom.olearyroseberry@utexas.edu
17 | 
18 | from __future__ import absolute_import, division, print_function
19 | 
20 | from .data import *
21 | 
22 | # from .mnist import load_mnist
23 | 
24 | # from .lfw import load_lfw
25 | 
26 | # from .cifar10 import load_cifar10
27 | 


--------------------------------------------------------------------------------
/ci/run_tests.py:
--------------------------------------------------------------------------------
 1 | # This file is part of the hessianlearn package
 2 | #
 3 | # hessianlearn is free software: you can redistribute it and/or modify
 4 | # it under the terms of the GNU Lesser General Public License as published by
 5 | # the Free Software Foundation, either version 3 of the License, or any later version.
 6 | #
 7 | # hessianlearn is distributed in the hope that it will be useful,
 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 | # GNU Lesser General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU Lesser General Public License
13 | # If not, see <http://www.gnu.org/licenses/>.
14 | #
15 | # Author: Tom O'Leary-Roseberry
16 | # Contact: tom.olearyroseberry@utexas.edu
17 | 
18 | import os
19 | 
20 | # Run randomized eigenvalue test
21 | os.system('python hessianlearn/test/test_randomizedEigensolver.py')
22 | os.system('python hessianlearn/test/test_rangeFinders.py')
23 | os.system('python hessianlearn/test/test_HessianlearnModel.py')
24 | 
25 | 


--------------------------------------------------------------------------------
/hessianlearn/problem/__init__.py:
--------------------------------------------------------------------------------
 1 | # This file is part of the hessianlearn package
 2 | #
 3 | # hessianlearn is free software: you can redistribute it and/or modify
 4 | # it under the terms of the GNU Lesser General Public License as published by
 5 | # the Free Software Foundation, either version 3 of the License, or any later version.
 6 | #
 7 | # hessianlearn is distributed in the hope that it will be useful,
 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 | # GNU Lesser General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU Lesser General Public License
13 | # If not, see <http://www.gnu.org/licenses/>.
14 | #
15 | # Author: Tom O'Leary-Roseberry
16 | # Contact: tom.olearyroseberry@utexas.edu
17 | 
18 | from __future__ import absolute_import, division, print_function
19 | 
20 | from .problem import Problem, ClassificationProblem, KerasModelProblem, RegressionProblem, H1RegressionProblem,\
21 | 					AutoencoderProblem,VariationalAutoencoderProblem, GenerativeAdversarialNetworkProblem
22 | 
23 | from .hessian import Hessian, HessianWrapper
24 | 
25 | from .preconditioner import Preconditioner, IdentityPreconditioner
26 | 
27 | from .regularization import Regularization, L2Regularization


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | # This file is part of the hessianlearn package
 2 | #
 3 | # hessianlearn is free software: you can redistribute it and/or modify
 4 | # it under the terms of the GNU Lesser General Public License as published by
 5 | # the Free Software Foundation, either version 3 of the License, or any later version.
 6 | #
 7 | # hessianlearn is distributed in the hope that it will be useful,
 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 | # GNU Lesser General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU Lesser General Public License
13 | # If not, see <http://www.gnu.org/licenses/>.
14 | #
15 | # Author: Tom O'Leary-Roseberry
16 | # Contact: tom.olearyroseberry@utexas.edu
17 | 
18 | language: python
19 | python:
20 |   - "3.6"
21 |   - "3.7"
22 | install:
23 |   - sudo apt-get update
24 |   - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh;
25 |   - bash miniconda.sh -b -p $HOME/miniconda
26 |   - source "$HOME/miniconda/etc/profile.d/conda.sh"
27 |   - hash -r
28 |   - "export PYTHONPATH=$PYTHONPATH:$(pwd)"
29 |   - conda config --set always_yes yes --set changeps1 no
30 |   - conda update -q conda
31 |   # # Useful for debugging any issues with conda
32 |   # - conda info -a
33 |   # Replace dep1 dep2 ... with your dependencies
34 |   - conda create -n hessianlearn2 python=$TRAVIS_PYTHON_VERSION tensorflow=2.0.0 scipy
35 |   - conda activate hessianlearn2
36 |   # # - python setup.py install
37 | script:
38 |   # - ls -l 
39 |   # - ls ci/
40 |   - python ci/run_tests.py


--------------------------------------------------------------------------------
/hessianlearn/test/test_randomizedEigensolver.py:
--------------------------------------------------------------------------------
 1 | # This file is part of the hessianlearn package
 2 | #
 3 | # hessianlearn is free software: you can redistribute it and/or modify
 4 | # it under the terms of the GNU Lesser General Public License as published by
 5 | # the Free Software Foundation, either version 3 of the License, or any later version.
 6 | #
 7 | # hessianlearn is distributed in the hope that it will be useful,
 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 | # GNU Lesser General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU Lesser General Public License
13 | # If not, see <http://www.gnu.org/licenses/>.
14 | #
15 | # Author: Tom O'Leary-Roseberry
16 | # Contact: tom.olearyroseberry@utexas.edu
17 | from __future__ import absolute_import, division, print_function
18 | 
19 | import unittest 
20 | import numpy as np
21 | import sys
22 | 
23 | sys.path.append('../../')
24 | from hessianlearn import (randomized_eigensolver)
25 | 
26 | class TestRandomizedEigensolver(unittest.TestCase):
27 | 
28 | 	def test_basic(self):
29 | 		my_state = np.random.RandomState(seed=0)
30 | 		n = 100
31 | 		Q,_ = np.linalg.qr(my_state.randn(n,n))
32 | 		d = np.concatenate((np.ones(10),np.exp(-np.arange(n-10))))
33 | 		Aop = lambda x: Q@np.diag(d)@(Q.T@x)
34 | 		d_hl, Q_hl = randomized_eigensolver(Aop,100, 100)
35 | 		assert np.linalg.norm(d[:50] - d_hl[0:50]) < 1e-10
36 | 		error = np.linalg.norm(Q@np.diag(d)@Q.T - Q_hl@np.diag(d_hl)@Q_hl.T)
37 | 		assert error < 1e-10
38 | 
39 | if __name__ == '__main__':
40 |     unittest.main()


--------------------------------------------------------------------------------
/hessianlearn/test/test_rangeFinders.py:
--------------------------------------------------------------------------------
 1 | # This file is part of the hessianlearn package
 2 | #
 3 | # hessianlearn is free software: you can redistribute it and/or modify
 4 | # it under the terms of the GNU Lesser General Public License as published by
 5 | # the Free Software Foundation, either version 3 of the License, or any later version.
 6 | #
 7 | # hessianlearn is distributed in the hope that it will be useful,
 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 | # GNU Lesser General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU Lesser General Public License
13 | # If not, see <http://www.gnu.org/licenses/>.
14 | #
15 | # Author: Tom O'Leary-Roseberry
16 | # Contact: tom.olearyroseberry@utexas.edu
17 | from __future__ import absolute_import, division, print_function
18 | 
19 | import unittest 
20 | import numpy as np
21 | import sys
22 | 
23 | sys.path.append('../../')
24 | from hessianlearn import (block_range_finder)
25 | 
26 | class TestRangeFinders(unittest.TestCase):
27 | 
28 | 	def test_basic(self):
29 | 		my_state = np.random.RandomState(seed=0)
30 | 		n = 100
31 | 		Q,_ = np.linalg.qr(my_state.randn(n,n))
32 | 		d = np.concatenate((np.ones(10),np.exp(-np.arange(n-10))))
33 | 		Aop = lambda x: Q@np.diag(d)@(Q.T@x)
34 | 
35 | 		Q_range = block_range_finder(Aop,100,1e-5,10)
36 | 		assert Q_range.shape[-1] <=40
37 | 		w_action = my_state.randn(100,1)
38 | 		action = Aop(w_action)
39 | 		error = np.linalg.norm(action - Q_range@(Q_range.T@ action))
40 | 		print(error)
41 | 		assert error < 1e-5
42 | 
43 | if __name__ == '__main__':
44 |     unittest.main()


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # Misc
  7 | *.gz
  8 | *.npy
  9 | *.png
 10 | *.h5
 11 | *.DS_Store
 12 | 
 13 | # C extensions
 14 | *.so
 15 | 
 16 | # Distribution / packaging
 17 | .Python
 18 | build/
 19 | develop-eggs/
 20 | dist/
 21 | downloads/
 22 | eggs/
 23 | .eggs/
 24 | lib/
 25 | lib64/
 26 | parts/
 27 | sdist/
 28 | var/
 29 | wheels/
 30 | *.egg-info/
 31 | .installed.cfg
 32 | *.egg
 33 | MANIFEST
 34 | 
 35 | # PyInstaller
 36 | #  Usually these files are written by a python script from a template
 37 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 38 | *.manifest
 39 | *.spec
 40 | 
 41 | # Installer logs
 42 | pip-log.txt
 43 | pip-delete-this-directory.txt
 44 | 
 45 | # Unit test / coverage reports
 46 | htmlcov/
 47 | .tox/
 48 | .coverage
 49 | .coverage.*
 50 | .cache
 51 | nosetests.xml
 52 | coverage.xml
 53 | *.cover
 54 | .hypothesis/
 55 | .pytest_cache/
 56 | 
 57 | # Translations
 58 | *.mo
 59 | *.pot
 60 | 
 61 | # Django stuff:
 62 | *.log
 63 | local_settings.py
 64 | db.sqlite3
 65 | 
 66 | # Flask stuff:
 67 | instance/
 68 | .webassets-cache
 69 | 
 70 | # Scrapy stuff:
 71 | .scrapy
 72 | 
 73 | # Sphinx documentation
 74 | docs/_build/
 75 | 
 76 | # PyBuilder
 77 | target/
 78 | 
 79 | # Jupyter Notebook
 80 | .ipynb_checkpoints
 81 | 
 82 | # pyenv
 83 | .python-version
 84 | 
 85 | # celery beat schedule file
 86 | celerybeat-schedule
 87 | 
 88 | # SageMath parsed files
 89 | *.sage.py
 90 | 
 91 | # Environments
 92 | .env
 93 | .venv
 94 | env/
 95 | venv/
 96 | ENV/
 97 | env.bak/
 98 | venv.bak/
 99 | 
100 | # Spyder project settings
101 | .spyderproject
102 | .spyproject
103 | 
104 | # Rope project settings
105 | .ropeproject
106 | 
107 | # mkdocs documentation
108 | /site
109 | 
110 | # mypy
111 | .mypy_cache/
112 | 


--------------------------------------------------------------------------------
/hessianlearn/algorithms/__init__.py:
--------------------------------------------------------------------------------
 1 | # This file is part of the hessianlearn package
 2 | #
 3 | # hessianlearn is free software: you can redistribute it and/or modify
 4 | # it under the terms of the GNU Lesser General Public License as published by
 5 | # the Free Software Foundation, either version 3 of the License, or any later version.
 6 | #
 7 | # hessianlearn is distributed in the hope that it will be useful,
 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 | # GNU Lesser General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU Lesser General Public License
13 | # If not, see <http://www.gnu.org/licenses/>.
14 | #
15 | # Author: Tom O'Leary-Roseberry
16 | # Contact: tom.olearyroseberry@utexas.edu
17 | 
18 | from __future__ import absolute_import, division, print_function
19 | 
20 | from .randomizedEigensolver import low_rank_hessian, randomized_eigensolver, eigensolver_from_range
21 | 
22 | from .rangeFinders import block_range_finder, noise_aware_adaptive_range_finder
23 | 
24 | from .varianceBasedNystrom import variance_based_nystrom
25 | 
26 | from .optimizer import Optimizer, ParametersOptimizer
27 | 
28 | from .cgSolver import CGSolver, ParametersCGSolver
29 | 
30 | from .gmresSolver import GMRESSolver, ParametersGMRESSolver
31 | 
32 | from .minresSolver import MINRESSolver, ParametersMINRESSolver
33 | 
34 | from .adam import Adam, ParametersAdam
35 | 
36 | from .gradientDescent import GradientDescent, ParametersGradientDescent
37 | 
38 | from .inexactNewtonCG import InexactNewtonCG, ParametersInexactNewtonCG
39 | 
40 | from .inexactNewtonGMRES import InexactNewtonGMRES, ParametersInexactNewtonGMRES
41 | 
42 | from .inexactNewtonMINRES import InexactNewtonMINRES, ParametersInexactNewtonMINRES
43 | 
44 | from .lowRankSaddleFreeNewton import LowRankSaddleFreeNewton, ParametersLowRankSaddleFreeNewton
45 | 


--------------------------------------------------------------------------------
/hessianlearn/utilities/parameterList.py:
--------------------------------------------------------------------------------
 1 | # This file is part of the hessianlearn package
 2 | #
 3 | # hessianlearn is free software: you can redistribute it and/or modify
 4 | # it under the terms of the GNU Lesser General Public License as published by
 5 | # the Free Software Foundation, either version 3 of the License, or any later version.
 6 | #
 7 | # hessianlearn is distributed in the hope that it will be useful,
 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 | # GNU Lesser General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU Lesser General Public License
13 | # If not, see <http://www.gnu.org/licenses/>.
14 | 
15 | from __future__ import absolute_import, division, print_function
16 | 
17 | class ParameterList(object):
18 |     """
19 |     A small abstract class for storing parameters and their description.
20 |     This class will raise an exception if the key one tries to access is not present.
21 |     """
22 |     def __init__(self, data):
23 |         """
24 |         data is a dictionary where each value is the pair (value, description)
25 |         """
26 |         self.data = data
27 |         
28 |     def __getitem__(self,key):
29 |         if self.data.__contains__(key):
30 |             return self.data[key][0]
31 |         else:
32 |             raise ValueError(key)
33 |         
34 |     def __setitem__(self,key, value):
35 |         if self.data.__contains__(key):
36 |             self.data[key][0] = value
37 |         else:
38 |             raise ValueError(key)
39 |         
40 |     def showMe(self, indent=""):
41 |         for k in sorted(self.data.keys()):
42 |             print( indent, "---")
43 |             if type(self.data[k][0]) == ParameterList:
44 |                 print( indent, k, "(ParameterList):", self.data[k][1] )
45 |                 self.data[k][0].showMe(indent+"    ")
46 |             else:
47 |                 print( indent, k, "({0}):".format(self.data[k][0]),  self.data[k][1] )
48 |         
49 |         print( indent, "---")


--------------------------------------------------------------------------------
/hessianlearn/problem/preconditioner.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # This file is part of the hessianlearn package
 3 | #
 4 | # hessianlearn is free software: you can redistribute it and/or modify
 5 | # it under the terms of the GNU Lesser General Public License as published by
 6 | # the Free Software Foundation, either version 3 of the License, or any later version.
 7 | #
 8 | # hessianlearn is distributed in the hope that it will be useful,
 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | # GNU Lesser General Public License for more details.
12 | #
13 | # You should have received a copy of the GNU Lesser General Public License
14 | # If not, see <http://www.gnu.org/licenses/>.
15 | #
16 | # Author: Tom O'Leary-Roseberry
17 | # Contact: tom.olearyroseberry@utexas.edu
18 | 
19 | from __future__ import absolute_import, division, print_function
20 | import numpy as np
21 | import tensorflow as tf
22 | # tf.compat.v1.enable_eager_execution()
23 | if int(tf.__version__[0]) > 1:
24 | 	import tensorflow.compat.v1 as tf
25 | 	tf.disable_v2_behavior()
26 | 	# tf.enable_eager_execution()
27 | 
28 | class Preconditioner(object):
29 | 	"""
30 | 	This class describes a preconditioner, currently it is empty
31 | 
32 | 	Child class should implement method __call__ which implements
33 | 	the preconditioner approximation of the (Hessian) inverse
34 | 	"""
35 | 
36 | 
37 | class IdentityPreconditioner(Preconditioner):
38 | 	"""
39 | 	This class describes identity preconditioning, which means doing nothing
40 | 	"""
41 | 	def __init__(self,problem,dtype = tf.float32):
42 | 		"""
43 | 		The constructor for this class takes:
44 | 			-problem: hessianlearn.problem.Problem class
45 | 			-dtype: data type
46 | 		"""
47 | 		# Rethink this later and improve for Krylov methods.
48 | 		self.x = tf.placeholder(dtype,problem.gradient.shape,name='vec_for_prec_apply')
49 | 
50 | 
51 | 	def __call__(self):
52 | 		"""
53 | 		The call method simply returns vector which must be passed to
54 | 		the sess at runtime. self.x is a placeholder variable.
55 | 		"""
56 | 		return self.x
57 | 
58 | 
59 | 
60 | 


--------------------------------------------------------------------------------
/hessianlearn/problem/regularization.py:
--------------------------------------------------------------------------------
 1 | # This file is part of the hessianlearn package
 2 | #
 3 | # hessianlearn is free software: you can redistribute it and/or modify
 4 | # it under the terms of the GNU Lesser General Public License as published by
 5 | # the Free Software Foundation, either version 3 of the License, or any later version.
 6 | #
 7 | # hessianlearn is distributed in the hope that it will be useful,
 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 | # GNU Lesser General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU Lesser General Public License
13 | # If not, see <http://www.gnu.org/licenses/>.
14 | #
15 | # Author: Tom O'Leary-Roseberry
16 | # Contact: tom.olearyroseberry@utexas.edu
17 | 
18 | from __future__ import absolute_import, division, print_function
19 | import numpy as np
20 | import tensorflow as tf
21 | # tf.compat.v1.enable_eager_execution()
22 | if int(tf.__version__[0]) > 1:
23 | 	import tensorflow.compat.v1 as tf
24 | 	# tf.disable_v2_behavior()
25 | 	# tf.enable_eager_execution()
26 | 
27 | from  ..utilities import ParameterList
28 | from abc import ABC, abstractmethod
29 | 
30 | 
31 | def ParametersRegularization(dictionary = {}):
32 | 	parameters = dictionary
33 | 	parameters["gamma"] = [1e-1, "regularization parameter"]
34 | 
35 | 	return ParameterList(parameters)
36 | 
37 | class Regularization (ABC):
38 | 	"""
39 | 	This class describes the components of regularization used during training.
40 | 
41 | 	The child class implements the specifics during construction
42 | 	"""
43 | 
44 | 	@property
45 | 	def cost(self):
46 | 		return self._cost
47 | 
48 | 	@property
49 | 	def gradient(self):
50 | 		return self._gradient
51 | 
52 | 	@property
53 | 	def Hdw(self):
54 | 		return self._Hdw
55 | 
56 | class L2Regularization(Regularization):
57 | 	"""
58 | 	This class implements standard Tikhonov (L2) regularization
59 | 	with regularization parameter gamma
60 | 		(gamma/2)||w||^2
61 | 	"""
62 | 	def __init__(self,problem, gamma = None,parameters = ParametersRegularization(),dtype = tf.float32):
63 | 		"""
64 | 		The constructor for this class takes
65 | 			-problem: The description of the training problem i.e. hessianlearn.problem.Problem variant
66 | 			-gamma: The regularization parameter, can be found via Morozov discrepancy, trial and error etc.
67 | 		"""
68 | 		self.problem = problem
69 | 		self.parameters = parameters
70 | 
71 | 		if gamma is not None:
72 | 			self.parameters['gamma'] = gamma
73 | 
74 | 		self._cost = 0.5*self.parameters['gamma']*tf.reduce_sum(self.problem._flat_w*self.problem._flat_w)
75 | 		
76 | 		self._gradient = self.parameters['gamma']*self.problem._flat_w
77 | 
78 | 		self._Hdw = self.parameters['gamma']*self.problem.dw
79 | 
80 | 
81 | 
82 | 
83 | 
84 | 
85 | 
86 | 
87 | 
88 | 
89 | 
90 | 
91 | 
92 | 


--------------------------------------------------------------------------------
/hessianlearn/algorithms/globalization.py:
--------------------------------------------------------------------------------
 1 | # This file is part of the hessianlearn package
 2 | #
 3 | # hessianlearn is free software: you can redistribute it and/or modify
 4 | # it under the terms of the GNU Lesser General Public License as published by
 5 | # the Free Software Foundation, either version 3 of the License, or any later version.
 6 | #
 7 | # hessianlearn is distributed in the hope that it will be useful,
 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 | # GNU Lesser General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU Lesser General Public License
13 | # If not, see <http://www.gnu.org/licenses/>.
14 | #
15 | # Author: Tom O'Leary-Roseberry
16 | # Contact: tom.olearyroseberry@utexas.edu
17 | 
18 | from __future__ import absolute_import, division, print_function
19 | import numpy as np
20 | 
21 | # from ..utilities.mpiFunctions import *
22 | 
23 | 
24 | 
25 | 
26 | def ArmijoLineSearch(w_dir,w_dir_inner_g,cost_at_candidate, initial_cost, c_armijo = 1e-4 ,alpha =1.0, max_backtracking_iter = 10,comm = None):
27 | 	"""
28 | 	This function implements Armijo line search given:
29 | 		-w_dir: 
30 | 		-w_dir_inner_g:
31 | 		-cost_at_candidate:
32 | 		-initial_cost:
33 | 		-c_armijo:
34 | 		-alpha:
35 | 		-max_backtracking_iter:
36 | 		-comm
37 | 	"""
38 | 	# Armijo Line Search
39 | 	line_search, line_search_iter = ( True, 0 )
40 | 	while line_search and (line_search_iter <max_backtracking_iter):
41 | 		line_search_iter   += 1
42 | 		cost_new   = cost_at_candidate(alpha*w_dir)
43 | 		# print('cost_new', cost_new, 'sufficient descent:', initial_cost + alpha*c_armijo*w_dir_inner_g )
44 | 		armijo_condition = (cost_new < initial_cost + alpha*c_armijo*w_dir_inner_g)
45 | 		if armijo_condition:
46 | 			line_search   	= False
47 | 		else:
48 | 			alpha          *= 0.5
49 | 	return alpha, line_search, line_search_iter
50 | 
51 | 
52 | class TrustRegion(object):
53 | 	"""
54 | 	This class implements trust region globalization
55 | 	"""
56 | 	def __init__(self,delta_0 = 1.0,delta_hat = 1.0,eta = 0.05):
57 | 		"""
58 | 		The constructor for this class takes:
59 | 			-delta_0: initial trust region radius
60 | 			-delta_hat: maximum trust region radius
61 | 			-eta: eta: threshold for reduction acceptance (rho<eta means we should reject the step)
62 | 		"""
63 | 		self.delta_hat = delta_hat
64 | 		self.radius = delta_0
65 | 		self.eta = eta
66 | 		
67 | 	def evaluate_step(self,actual_reduction = None,predicted_reduction = None,on_boundary = False):
68 | 		"""
69 | 		This method evaluates whether or not a candidate step should be taken.
70 | 		This method takes:
71 | 			-actual_reduction:
72 | 			-predicted_reduction:
73 | 			-on_boundary: 
74 | 		"""
75 | 		rho = actual_reduction/predicted_reduction
76 | 		if rho < 0.25:
77 | 			self.radius *= 0.5
78 | 		elif rho > 0.75 and on_boundary:
79 | 			self.radius *= 2.
80 | 			# self.delta *= max(2,self.delta_hat)
81 | 		if rho > self.eta:
82 | 			accept_step = True
83 | 		else:
84 | 			accept_step	 = False
85 | 
86 | 		return accept_step
87 | 
88 | 
89 | 
90 | 	


--------------------------------------------------------------------------------
/applications/README.md:
--------------------------------------------------------------------------------
 1 | <!-- # hessianlearn -->
 2 | 
 3 | 
 4 | 
 5 | 	      ___          ___          ___          ___                     ___          ___     
 6 | 	     /__/\        /  /\        /  /\        /  /\       ___         /  /\        /__/\    
 7 | 	     \  \:\      /  /:/_      /  /:/_      /  /:/_     /  /\       /  /::\       \  \:\   
 8 | 	      \__\:\    /  /:/ /\    /  /:/ /\    /  /:/ /\   /  /:/      /  /:/\:\       \  \:\  
 9 | 	  ___ /  /::\  /  /:/ /:/_  /  /:/ /::\  /  /:/ /::\ /__/::\     /  /:/~/::\  _____\__\:\ 
10 | 	 /__/\  /:/\:\/__/:/ /:/ /\/__/:/ /:/\:\/__/:/ /:/\:\\__\/\:\__ /__/:/ /:/\:\/__/::::::::\
11 | 	 \  \:\/:/__\/\  \:\/:/ /:/\  \:\/:/~/:/\  \:\/:/~/:/   \  \:\/\\  \:\/:/__\/\  \:\~~\~~\/
12 | 	  \  \::/      \  \::/ /:/  \  \::/ /:/  \  \::/ /:/     \__\::/ \  \::/      \  \:\  ~~~ 
13 | 	   \  \:\       \  \:\/:/    \__\/ /:/    \__\/ /:/      /__/:/   \  \:\       \  \:\     
14 | 	    \  \:\       \  \::/       /__/:/       /__/:/       \__\/     \  \:\       \  \:\    
15 | 	     \__\/        \__\/        \__\/        \__\/                   \__\/        \__\/    
16 | 
17 | 
18 | 			                   ___          ___          ___          ___     
19 | 			                  /  /\        /  /\        /  /\        /__/\    
20 | 			                 /  /:/_      /  /::\      /  /::\       \  \:\   
21 | 			  ___     ___   /  /:/ /\    /  /:/\:\    /  /:/\:\       \  \:\  
22 | 			 /__/\   /  /\ /  /:/ /:/_  /  /:/~/::\  /  /:/~/:/   _____\__\:\ 
23 | 			 \  \:\ /  /://__/:/ /:/ /\/__/:/ /:/\:\/__/:/ /:/___/__/::::::::\
24 | 			  \  \:\  /:/ \  \:\/:/ /:/\  \:\/:/__\/\  \:\/:::::/\  \:\~~\~~\/
25 | 			   \  \:\/:/   \  \::/ /:/  \  \::/      \  \::/~~~~  \  \:\  ~~~ 
26 | 			    \  \::/     \  \:\/:/    \  \:\       \  \:\       \  \:\     
27 | 			     \__\/       \  \::/      \  \:\       \  \:\       \  \:\    
28 | 			                  \__\/        \__\/        \__\/        \__\/    
29 | 
30 | 
31 | 
32 | # Transfer Learning
33 | 
34 | * Examples of CIFAR10, CIFAR100 classification from pre-trained Imagenet ResNet50 model in `transfer_learning/`
35 | 
36 | * Pre-trained model serves as well conditioned initial guess for transfer learning. In this setting Newton methods perform well due to their excellent properties in local convergence. Low Rank Saddle Free Newton is able to zero in on highly generalizable local minimizers bypassing indefinite regions. Below are validation accuracies of best choices of fixed step-length for Adam, SGD and LRSFN with fixed rank of 40.
37 | 
38 | <p align="center">
39 | 	<img src="https://github.com/tomoleary/images/blob/main/hessianlearn/cifar100transfer.png" width="75%" /> 
40 | </p>
41 | 
42 | * For more information see the following manuscript
43 | 
44 | - \[2\] O'Leary-Roseberry, T., Alger, N., Ghattas O.,
45 | [**Low Rank Saddle Free Newton: A Scalable Method for Stochastic Nonconvex Optimization**](https://arxiv.org/abs/2002.02881).
46 | arXiv:2002.02881.
47 | ([Download](https://arxiv.org/pdf/2002.02881.pdf))<details><summary>BibTeX</summary><pre>
48 | @article{OLearyRoseberryAlgerGhattas2020,
49 |   title={Low Rank Saddle Free Newton: Algorithm and Analysis},
50 |   author={O'Leary-Roseberry, Thomas and Alger, Nick and Ghattas, Omar},
51 |   journal={arXiv preprint arXiv:2002.02881},
52 |   year={2020}
53 | }
54 | }</pre></details>
55 | 
56 | 
57 | 
58 | 
59 | 
60 | 


--------------------------------------------------------------------------------
/hessianlearn/test/test_HessianlearnModel.py:
--------------------------------------------------------------------------------
 1 | # This file is part of the hessianlearn package
 2 | #
 3 | # hessianlearn is free software: you can redistribute it and/or modify
 4 | # it under the terms of the GNU Lesser General Public License as published by
 5 | # the Free Software Foundation, either version 3 of the License, or any later version.
 6 | #
 7 | # hessianlearn is distributed in the hope that it will be useful,
 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 | # GNU Lesser General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU Lesser General Public License
13 | # If not, see <http://www.gnu.org/licenses/>.
14 | #
15 | # Author: Tom O'Leary-Roseberry
16 | # Contact: tom.olearyroseberry@utexas.edu
17 | from __future__ import absolute_import, division, print_function
18 | 
19 | import unittest 
20 | import numpy as np
21 | import os
22 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
23 | os.environ['KMP_DUPLICATE_LIB_OK']='True'
24 | os.environ["KMP_WARNINGS"] = "FALSE" 
25 | 
26 | import tensorflow as tf
27 | if int(tf.__version__[0]) > 1:
28 | 	import tensorflow.compat.v1 as tf
29 | 	tf.disable_v2_behavior()
30 | 
31 | 
32 | import sys
33 | sys.path.append('../../')
34 | from hessianlearn import (HessianlearnModel, HessianlearnModelSettings,
35 | 								ClassificationProblem,Data, L2Regularization)
36 | 
37 | tf.set_random_seed(0)
38 | 
39 | class TestHessianlearnModel(unittest.TestCase):
40 | 
41 | 	def test_all_optimizers(self):
42 | 		# Instantiate data
43 | 		(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
44 | 		# Normalize the data
45 | 		x_train = x_train.astype('float32') / 255.
46 | 		x_test = x_test.astype('float32') / 255.
47 | 		def one_hot_vectors(labels_temp):
48 | 			labels = np.zeros((labels_temp.shape[0],10))
49 | 			for i,label in enumerate(labels_temp):
50 | 				labels[i,label] = 1
51 | 			return labels
52 | 		y_train = one_hot_vectors(y_train)
53 | 		y_test = one_hot_vectors(y_test)
54 | 		# Instantiate neural network
55 | 		classifier = tf.keras.Sequential([
56 | 		    tf.keras.layers.Flatten(input_shape=(28, 28)),
57 | 		    tf.keras.layers.Dense(128, activation='relu'),
58 | 		    tf.keras.layers.Dense(10)
59 | 		])
60 | 		# Instantiate the problem, regularization.
61 | 		problem = ClassificationProblem(classifier,loss_type = 'cross_entropy',dtype=tf.float32)
62 | 		regularization = L2Regularization(problem,gamma =0.)
63 | 		# Instante the data object
64 | 		train_dict = {problem.x:x_train, problem.y_true:y_train}
65 | 		validation_dict = {problem.x:x_test, problem.y_true:y_test}
66 | 		data = Data(train_dict,32,validation_data = validation_dict,hessian_batch_size = 8)
67 | 		# Instantiate the model object
68 | 		HLModelSettings = HessianlearnModelSettings()
69 | 		HLModelSettings['max_sweeps'] = 1.
70 | 		HLModel = HessianlearnModel(problem,regularization,data,settings = HLModelSettings)
71 | 
72 | 		for optimizer in ['lrsfn','adam','gd','sgd','incg']:
73 | 			HLModel.settings['optimizer'] = optimizer
74 | 			if optimizer == 'incg':
75 | 				HLModel.settings['alpha'] = 1e-4
76 | 			HLModel.fit()
77 | 			first_loss = HLModel.logger['train_loss'][0]
78 | 			last_iteration = max(HLModel.logger['train_loss'].keys())
79 | 			last_loss = HLModel.logger['train_loss'][last_iteration]
80 | 			print('first loss = ',first_loss)
81 | 			print('last_loss = ',last_loss)
82 | 			assert last_loss < first_loss
83 | 
84 | 
85 | if __name__ == '__main__':
86 |     unittest.main()


--------------------------------------------------------------------------------
/hessianlearn/utilities/finiteDifferenceCheck.py:
--------------------------------------------------------------------------------
  1 | # This file is part of the hessianlearn package
  2 | #
  3 | # hessianlearn is free software: you can redistribute it and/or modify
  4 | # it under the terms of the GNU Lesser General Public License as published by
  5 | # the Free Software Foundation, either version 3 of the License, or any later version.
  6 | #
  7 | # hessianlearn is distributed in the hope that it will be useful,
  8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10 | # GNU Lesser General Public License for more details.
 11 | #
 12 | # You should have received a copy of the GNU Lesser General Public License
 13 | # If not, see <http://www.gnu.org/licenses/>.
 14 | #
 15 | # Author: Tom O'Leary-Roseberry
 16 | # Contact: tom.olearyroseberry@utexas.edu
 17 | 
 18 | from __future__ import absolute_import, division, print_function
 19 | import numpy as np
 20 | from numpy.linalg import norm
 21 | # import tensorflow as tf
 22 | # if int(tf.__version__[0]) > 1:
 23 | # 	import tensorflow.compat.v1 as tf
 24 | # 	tf.disable_v2_behavior()
 25 | 
 26 | 
 27 | def finite_difference_check(sess,problem, feed_dict, w = None, dw=None,verbose = False):
 28 | 	"""
 29 | 	This method implements finite difference checks for a given hessianlearn.problem.Problem 
 30 | 		-sess: tf.Session()
 31 | 		-problem: hessianlearn.problem.Problem
 32 | 		-feed_dict: data used for computation of cost, grad and hess
 33 | 		-w: the point the finite difference check is evaluated at
 34 | 		-dw: the direction for the finite difference check
 35 | 		-verbose: Boolean for printing
 36 | 	"""
 37 | 
 38 | 	if w is None:
 39 | 		w = sess.run(problem.w)
 40 | 		# w_zeros = []
 41 | 		# for w_i in w:
 42 | 		# 	w_zeros.append(np.zeros_like(w))
 43 | 	if dw is None:
 44 | 		dw = []
 45 | 		for w_i in w:
 46 | 			# print('Shape',w_i.shape)
 47 | 			dw.append(np.ones_like(w_i))
 48 | 		# dw = [np.ones_like(w_i) for w_i in w]
 49 | 
 50 | 	eps = np.power(2., np.linspace(-32, 0, 33))
 51 | 	
 52 | 	initial_loss       = sess.run(problem.loss,feed_dict)
 53 | 	
 54 | 
 55 | 	initial_g       = sess.run(problem.gradient,feed_dict)
 56 | 
 57 | 	feed_dict[problem.dw] = dw
 58 | 	initial_gTdw = np.sum(sess.run(problem._gTdw,feed_dict))
 59 | 	
 60 | 	initial_Hdw = sess.run(problem.Hdw,feed_dict)
 61 | 
 62 | 	error_g = np.zeros_like(eps)
 63 | 	error_H = np.zeros_like(eps)
 64 | 
 65 | 	# We will need to modify w during this process so we copy 
 66 | 	# the initial values of w so we can replace them later
 67 | 	print('Copying initial w since it will be modified during this check')
 68 | 	w_array = sess.run(problem.w)	
 69 | 	w_changed = True
 70 | 
 71 | 	if verbose:
 72 | 		print('Initial loss:',initial_loss)
 73 | 		# print('Initial gradient:',initial_g)
 74 | 		print('Initial gTdw',initial_gTdw)
 75 | 		print('{0:10} {1:10} {2:10} {3:10}'.format('epsilon','loss','error_g','error_H'))
 76 | 	
 77 | 
 78 | 	for i in np.arange(eps.shape[0]):
 79 | 
 80 | 
 81 | 		eps_i  = eps[i]
 82 | 		# Momentarily assign w
 83 | 		# w_update = [eps_i*dw_i for dw_i in dw]
 84 | 		# # w_plus = w + eps_i*dw
 85 | 		# problem._update_w(w_update)
 86 | 		new_w = []
 87 | 		for w_i,dw_i in zip(w,dw):
 88 | 			new_w.append(w_i + eps_i*dw_i)
 89 | 		sess.run(problem._assign_to_w(new_w))
 90 | 		#Evaluate new loss and calculate gradient error
 91 | 		loss_plus = sess.run(problem.loss,feed_dict)
 92 | 		error_g_i = np.abs( (loss_plus - initial_loss)/eps_i - initial_gTdw)
 93 | 		error_g[i] = error_g_i
 94 | 		# Evaluate new gradient and calculate Hessian error
 95 | 		g_plus = sess.run(problem.gradient,feed_dict)
 96 | 		error_H_i_ = []
 97 | 		for g_plus_i,initial_g_i,initial_Hdw_i in zip(g_plus,initial_g,initial_Hdw):
 98 | 			error_H_i_.append((g_plus_i - initial_g_i)/eps_i-initial_Hdw_i)
 99 | 		error_H_i = np.sqrt(np.sum([np.linalg.norm(e)**2 for e in error_H_i_]))
100 | 		error_H[i] = error_H_i
101 | 
102 | 		if verbose:
103 | 			print('{0:1.4e} {1:1.4e} {2:1.4e} {3:1.4e}'.format(eps_i,loss_plus,error_g_i,error_H_i))
104 | 		
105 | 	if w_changed:
106 | 		problem._assign_to_w(w_array)
107 | 		print('Succesfully re-assigned w')
108 | 
109 | 	out = {}
110 | 	out['epsilon'] = eps
111 | 	out['error_g'] = error_g
112 | 	out['error_H'] = error_H
113 | 
114 | 	return out
115 | 
116 | 
117 | 
118 | 
119 | 
120 | 
121 | 
122 | 
123 | 
124 | 


--------------------------------------------------------------------------------
/hessianlearn/algorithms/gradientDescent.py:
--------------------------------------------------------------------------------
  1 | # This file is part of the hessianlearn package
  2 | #
  3 | # hessianlearn is free software: you can redistribute it and/or modify
  4 | # it under the terms of the GNU Lesser General Public License as published by
  5 | # the Free Software Foundation, either version 3 of the License, or any later version.
  6 | #
  7 | # hessianlearn is distributed in the hope that it will be useful,
  8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10 | # GNU Lesser General Public License for more details.
 11 | #
 12 | # You should have received a copy of the GNU Lesser General Public License
 13 | # If not, see <http://www.gnu.org/licenses/>.
 14 | #
 15 | # Author: Tom O'Leary-Roseberry
 16 | # Contact: tom.olearyroseberry@utexas.edu
 17 | 
 18 | from __future__ import absolute_import
 19 | from __future__ import division
 20 | from __future__ import print_function
 21 | 
 22 | import numpy as np
 23 | 
 24 | from ..utilities.parameterList import ParameterList
 25 | from ..algorithms import Optimizer
 26 | from ..algorithms.globalization import ArmijoLineSearch, TrustRegion
 27 | 
 28 | 
 29 | 
 30 | 
 31 | def ParametersGradientDescent(parameters = {}):
 32 | 	parameters['alpha']                         = [1e-3, "Initial steplength, or learning rate"]
 33 | 	parameters['rel_tolerance']                 = [1e-3, "Relative convergence when sqrt(g,g)/sqrt(g_0,g_0) <= rel_tolerance"]
 34 | 	parameters['abs_tolerance']                 = [1e-4,"Absolute converge when sqrt(g,g) <= abs_tolerance"]
 35 | 	parameters['max_NN_evals_per_batch']        = [10000, "Scale constant for maximum neural network evaluations per datum"]
 36 | 	parameters['max_NN_evals']                  = [None, "Maximum number of neural network evaluations"]
 37 | 	parameters['max_backtracking_iter']			= [10, 'max backtracking iterations for line search']
 38 | 
 39 | 	parameters['globalization']					= [None, 'Choose from trust_region, line_search or none']
 40 | 	# Reasons for convergence failure
 41 | 	parameters['reasons'] = [[], 'list of reasons for termination']
 42 | 
 43 | 	return ParameterList(parameters)
 44 | 
 45 | 
 46 | class GradientDescent(Optimizer):
 47 | 	"""
 48 | 	This class implements the gradient descent (and stochastic variant) optimizer
 49 | 	"""
 50 | 	def __init__(self,problem,regularization,sess = None,feed_dict = None,parameters = ParametersGradientDescent()):
 51 | 		"""
 52 | 		The constructor for this class takes:
 53 | 			-problem: hessianlearn.problem.Problem
 54 | 			-regularization: hessianlearn.problem.Regularization
 55 | 			-sess: tf.Session()
 56 | 			-parameters: hyperparameters dictionary
 57 | 		"""
 58 | 		if regularization is None:
 59 | 			_regularization = L2Regularization(problem,gamma = 0.0)
 60 | 		else:
 61 | 			_regularization = regularization
 62 | 		super(GradientDescent,self).__init__(problem,_regularization,sess,parameters)
 63 | 
 64 | 		self.grad = self.problem.gradient + self.regularization.gradient
 65 | 		self._sweeps = np.zeros(2)
 66 | 
 67 | 		self.trust_region_initialized = False
 68 | 		if self.parameters['globalization'] == 'trust_region':
 69 | 			self.alpha = 0.0
 70 | 		else:
 71 | 			self.alpha = parameters['alpha']
 72 | 
 73 | 
 74 | 
 75 | 
 76 | 	def minimize(self,feed_dict = None):
 77 | 		r"""
 78 | 		Implements the gradient update:
 79 | 		w-=alpha*g
 80 | 		Takes the parameter:
 81 | 			-feed_dict: data to be used to evaluate stochastic gradient and cost
 82 | 		"""
 83 | 		assert self.sess is not None
 84 | 		assert feed_dict is not None
 85 | 		
 86 | 		g = self.sess.run(self.grad,feed_dict = feed_dict)
 87 | 
 88 | 
 89 | 		if self.parameters['globalization'] == 'line_search':
 90 | 			w_dir = -g
 91 | 			w_dir_inner_g = np.inner(w_dir,g)
 92 | 			initial_cost = self.sess.run(self.problem.loss, feed_dict)
 93 | 			cost_at_candidate = lambda p : self._loss_at_candidate(p,feed_dict)
 94 | 			self.alpha, line_search, line_search_iter = ArmijoLineSearch(w_dir,w_dir_inner_g,\
 95 | 																			cost_at_candidate, initial_cost)
 96 | 			p = self.alpha*w_dir
 97 | 			self._sweeps += [1+0.5*line_search_iter,0]
 98 | 
 99 | 		elif self.parameters['globalization'] == None:
100 | 			self.alpha = self.parameters['alpha']
101 | 			p = -self.parameters['alpha']*g
102 | 			self._sweeps += [1,0]
103 | 
104 | 		self.p = p
105 | 
106 | 		self.sess.run(self.problem._update_ops,feed_dict = {self.problem._update_placeholder:p})
107 | 		


--------------------------------------------------------------------------------
/hessianlearn/algorithms/adam.py:
--------------------------------------------------------------------------------
  1 | # This file is part of the hessianlearn package
  2 | #
  3 | # hessianlearn is free software: you can redistribute it and/or modify
  4 | # it under the terms of the GNU Lesser General Public License as published by
  5 | # the Free Software Foundation, either version 3 of the License, or any later version.
  6 | #
  7 | # hessianlearn is distributed in the hope that it will be useful,
  8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10 | # GNU Lesser General Public License for more details.
 11 | #
 12 | # You should have received a copy of the GNU Lesser General Public License
 13 | # If not, see <http://www.gnu.org/licenses/>.
 14 | #
 15 | # Author: Tom O'Leary-Roseberry
 16 | # Contact: tom.olearyroseberry@utexas.edu
 17 | 
 18 | from __future__ import absolute_import
 19 | from __future__ import division
 20 | from __future__ import print_function
 21 | 
 22 | import numpy as np
 23 | 
 24 | from ..utilities.parameterList import ParameterList
 25 | from ..algorithms import Optimizer
 26 | 
 27 | 
 28 | 
 29 | 
 30 | def ParametersAdam(parameters = {}):
 31 | 	parameters['alpha']                         = [1e-3, "Initial steplength, or learning rate"]
 32 | 	parameters['beta_1']                        = [0.9, "Exponential decay rate for first moment"]
 33 | 	parameters['beta_2']                        = [0.999, "Exponential decay rate for second moment"]
 34 | 	parameters['epsilon']						= [1e-7, "epsilon for denominator involving square root"]
 35 | 
 36 | 	parameters['rel_tolerance']                 = [1e-3, "Relative convergence when sqrt(g,g)/sqrt(g_0,g_0) <= rel_tolerance"]
 37 | 	parameters['abs_tolerance']                 = [1e-4,"Absolute converge when sqrt(g,g) <= abs_tolerance"]
 38 | 	parameters['max_NN_evals_per_batch']        = [10000, "Scale constant for maximum neural network evaluations per datum"]
 39 | 	parameters['max_NN_evals']                  = [None, "Maximum number of neural network evaluations"]
 40 | 
 41 | 	parameters['globalization']					= [None, 'Choose from trust_region, line_search or none']
 42 | 	# Reasons for convergence failure
 43 | 	parameters['reasons'] = [[], 'list of reasons for termination']
 44 | 
 45 | 
 46 | 	return ParameterList(parameters)
 47 | 
 48 | 
 49 | class Adam(Optimizer):
 50 | 	"""
 51 | 	This class implements the Adam optimizer
 52 | 	"""
 53 | 	def __init__(self,problem,regularization = None,sess = None,feed_dict= None,parameters = ParametersAdam()):
 54 | 		"""
 55 | 		The constructor for this class takes:
 56 | 			-problem: hessianlearn.problem.Problem
 57 | 			-regularization: hessianlearn.problem.Regularization
 58 | 			-sess: tf.Session()
 59 | 			-parameters: hyperparameters dictionary
 60 | 		"""
 61 | 		if regularization is None:
 62 | 			_regularization = L2Regularization(problem,gamma = 0.0)
 63 | 		else:
 64 | 			_regularization = regularization
 65 | 		super(Adam,self).__init__(problem,_regularization,sess,parameters)
 66 | 
 67 | 		self.grad = self.problem.gradient + self.regularization.gradient
 68 | 
 69 | 		self.m = np.zeros(self.problem.dimension)
 70 | 		self.v = np.zeros(self.problem.dimension)
 71 | 		self.p = np.zeros(self.problem.dimension)
 72 | 
 73 | 		self._iter = 0
 74 | 		self._sweeps = np.zeros(2)
 75 | 
 76 | 		self.alpha = self.parameters['alpha']
 77 | 
 78 | 	def minimize(self,feed_dict = None):
 79 | 		r"""
 80 | 		This method implements one step of the Adam algorithm:
 81 | 			-feed_dict: data dictionary used to evaluate gradient
 82 | 		"""
 83 | 		assert self.sess is not None
 84 | 		assert feed_dict is not None
 85 | 		self._iter += 1
 86 | 		
 87 | 		alpha = self.parameters['alpha']* np.sqrt(1 - self.parameters['beta_2']**self.iter)/(1 - self.parameters['beta_1']**self.iter)
 88 | 		
 89 | 		gradient = self.sess.run(self.grad,feed_dict = feed_dict)
 90 | 		
 91 | 		self.m = self.parameters['beta_1']*self.m + (1-self.parameters['beta_1'])*gradient 
 92 | 		m_hat = self.m / (1.0 - self.parameters['beta_1']**self._iter)
 93 | 
 94 | 		g_sq_vec = np.square(gradient) 
 95 | 		self.v = self.parameters['beta_2']*self.v + (1-self.parameters['beta_2'])*g_sq_vec 
 96 | 		v_hat = self.v / (1.0 - self.parameters['beta_2']**self._iter)
 97 | 		v_root = np.sqrt(v_hat)
 98 | 
 99 | 
100 | 		update = -alpha*m_hat/(v_root +self.parameters['epsilon'])
101 | 		self.p = update
102 | 		self._sweeps += [1,0]
103 | 		self.sess.run(self.problem._update_ops,feed_dict = {self.problem._update_placeholder:update})
104 | 		
105 | 
106 | 


--------------------------------------------------------------------------------
/hessianlearn/data/lfw.py:
--------------------------------------------------------------------------------
  1 | # This file is part of the hessianlearn package
  2 | #
  3 | # hessianlearn is free software: you can redistribute it and/or modify
  4 | # it under the terms of the GNU Lesser General Public License as published by
  5 | # the Free Software Foundation, either version 3 of the License, or any later version.
  6 | #
  7 | # hessianlearn is distributed in the hope that it will be useful,
  8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10 | # GNU Lesser General Public License for more details.
 11 | #
 12 | # You should have received a copy of the GNU Lesser General Public License
 13 | # If not, see <http://www.gnu.org/licenses/>.
 14 | #
 15 | # Author: Tom O'Leary-Roseberry
 16 | # Contact: tom.olearyroseberry@utexas.edu
 17 | 
 18 | from __future__ import absolute_import
 19 | from __future__ import division
 20 | from __future__ import print_function
 21 | 
 22 | import os
 23 | import sys
 24 | 
 25 | 
 26 | import numpy as np
 27 | from scipy import signal
 28 | import random
 29 | from ..data.data import *
 30 | 
 31 | import math
 32 | import time
 33 | 
 34 | # from statsmodels import robust
 35 | 
 36 | def dir_check(dir):
 37 | 	try:
 38 | 		os.stat(dir)
 39 | 	except:
 40 | 		os.mkdir(dir)
 41 | 
 42 | def reporthook(count, block_size, total_size):
 43 |     global start_time
 44 |     if count == 0:
 45 |         start_time = time.time()
 46 |         return
 47 |     duration = time.time() - start_time
 48 |     progress_size = int(count * block_size)
 49 |     speed = int(progress_size / (1024 * duration))
 50 |     percent = int(count * block_size * 100 / total_size)
 51 |     sys.stdout.write("\r...%d%%, %d MB, %d KB/s, %d seconds passed" %
 52 |                     (percent, progress_size / (1024 * 1024), speed, duration))
 53 |     sys.stdout.flush()
 54 | 
 55 | 
 56 | 
 57 | def load_lfw():
 58 | 	try: 
 59 | 		# read from file
 60 | 		images = np.load('lfw_all_images.npy')
 61 | 		labels = np.load('lfw_all_labels.npy')
 62 | 		print('Loaded successfully locally')
 63 | 		return [images, labels]
 64 | 
 65 | 	except:
 66 | 		# write to file
 67 | 		print(80*'#')
 68 | 		print('Did not load locally.')
 69 | 		print(80*'#')
 70 | 		try:
 71 | 			os.stat("lfw.tgz")
 72 | 		except:
 73 | 			print('Downloading from source, and saving to disk.')
 74 | 			print(80*'#')
 75 | 			import urllib.request
 76 | 			urllib.request.urlretrieve("http://vis-www.cs.umass.edu/lfw/lfw.tgz", "lfw.tgz",reporthook)
 77 | 		folder_name = 'lfw/'
 78 | 		try:
 79 | 			os.stat(folder_name)
 80 | 		except:
 81 | 			try:
 82 | 				import subprocess
 83 | 				subprocess.run(['tar','zxvf',"lfw.tgz"])
 84 | 			except:
 85 | 				pass
 86 | 		import shutil
 87 | 		folder_names = os.listdir(folder_name)
 88 | 		n_folders = len(folder_names)
 89 | 		print(n_folders,' many folder names')
 90 | 		if not os.path.isdir('lfw_all_images'):
 91 | 		    os.mkdir('lfw_all_images')
 92 | 		    print('Making directory lfw_all_images/')
 93 | 		for folder in os.listdir('lfw/'):
 94 | 		    for file in os.listdir('lfw/'+folder):
 95 | 		    	if not os.path.isfile('lfw_all_images/'+file):
 96 | 		        	shutil.move('lfw/'+folder+'/'+file,'lfw_all_images/')
 97 | 		        	print('Moving ',file,'to lfw_all_images')
 98 | 
 99 | 		file_names = os.listdir('lfw_all_images')
100 | 		n_files = len(file_names)
101 | 
102 | 		images = np.empty(shape = (n_files,250,250,3))
103 | 		
104 | 		from keras.preprocessing import image
105 | 		for file,counter in zip(file_names,range(n_files)):
106 | 			img = image.load_img('lfw_all_images/'+file)
107 | 			images[counter,:,:,:] = image.img_to_array(img)
108 | 		labels = np.array(file_names)
109 | 		assert(labels.shape[0]==images.shape[0])
110 | 		print(labels.shape)
111 | 		images = np.array(images)
112 | 		np.save('lfw_all_images.npy',images)
113 | 		np.save('lfw_all_labels.npy',labels)
114 | 		print('Saved locally')
115 | 		return [images,labels]
116 | 
117 | 
118 | 	# def view_random_pair(self):
119 | 	# 	try:
120 | 	# 	    labelkey = ['Airplane','Automobile','Bird','Cat','Deer','Dog','Frog','Horse','Ship','Truck']
121 | 	# 	    i = np.random.choice(range(60000))
122 | 	# 	    index = self.all_data[1][i]
123 | 	# 	    label = labelkey[index]
124 | 	# 	    import matplotlib.pyplot as plt
125 | 	# 	    fig, ax = plt.subplots(figsize = (3,3))
126 | 	# 	    ax.set_title(str(label))
127 | 	# 	    data = self.all_data[0][i,:,:,:].astype(np.uint8)
128 | 	# 	    ax.imshow(data)
129 | 	# 	    plt.show()
130 | 	# 	except:
131 | 	# 		pass
132 | 	
133 | 
134 | 


--------------------------------------------------------------------------------
/hessianlearn/algorithms/optimizer.py:
--------------------------------------------------------------------------------
  1 | # This file is part of the hessianlearn package
  2 | #
  3 | # hessianlearn is free software: you can redistribute it and/or modify
  4 | # it under the terms of the GNU Lesser General Public License as published by
  5 | # the Free Software Foundation, either version 3 of the License, or any later version.
  6 | #
  7 | # hessianlearn is distributed in the hope that it will be useful,
  8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10 | # GNU Lesser General Public License for more details.
 11 | #
 12 | # You should have received a copy of the GNU Lesser General Public License
 13 | # If not, see <http://www.gnu.org/licenses/>.
 14 | #
 15 | # Author: Tom O'Leary-Roseberry
 16 | # Contact: tom.olearyroseberry@utexas.edu
 17 | 
 18 | from __future__ import absolute_import
 19 | from __future__ import division
 20 | from __future__ import print_function
 21 | 
 22 | from abc import ABC, abstractmethod
 23 | import numpy as np
 24 | 
 25 | from ..utilities.parameterList import ParameterList
 26 | from ..problem import Hessian
 27 | 
 28 | def ParametersOptimizer(dictionary = {}):
 29 | 	parameters = dictionary
 30 | 	parameters['alpha']                         = [1.0, "Initial steplength, or learning rate"]
 31 | 	parameters['rel_tolerance']                 = [1e-3, "Relative convergence when sqrt(g,g)/sqrt(g_0,g_0) <= rel_tolerance"]
 32 | 	parameters['abs_tolerance']                 = [1e-4,"Absolute converge when sqrt(g,g) <= abs_tolerance"]
 33 | 	parameters['globalization']					= [None, 'Choose from trust_region, line_search or none']
 34 | 
 35 | 
 36 | 	return ParameterList(parameters)
 37 | 
 38 | 
 39 | class Optimizer(ABC):
 40 | 	"""
 41 | 	This class describes the optimizer used during training
 42 | 
 43 | 	All children must implement the method minimize, which implements 
 44 | 	one step of the optimizers weight update scheme
 45 | 	"""
 46 | 	def __init__(self,problem = None,regularization = None, sess = None,parameters = ParametersOptimizer(),comm = None):
 47 | 		"""
 48 | 		The constructor for this class takes:
 49 | 			-problem: hessianlearn.problem.Problem class
 50 | 			-regularization: hessianlearn.problem.Regularization class
 51 | 			-sess: the tf.Session() used to evaluate the computational graph
 52 | 			-parameters: the dictionary of hyperparameters for the optimizer.
 53 | 		"""
 54 | 		self._problem = problem
 55 | 		self._regularization = regularization
 56 | 		self._sess = sess
 57 | 		self._parameters = parameters
 58 | 		self._sweeps = 0
 59 | 		self._comm = comm
 60 | 		self._iter = 0
 61 | 		self.H = Hessian(problem=problem,sess=sess)
 62 | 
 63 | 	@property
 64 | 	def problem(self):
 65 | 		return self._problem
 66 | 
 67 | 	@property
 68 | 	def sess(self):
 69 | 		return self._sess
 70 | 
 71 | 	@property
 72 | 	def parameters(self):
 73 | 		return self._parameters
 74 | 
 75 | 	@property
 76 | 	def sweeps(self):
 77 | 		return self._sweeps
 78 | 
 79 | 	@property
 80 | 	def comm(self):
 81 | 		return self._comm
 82 | 
 83 | 	@property
 84 | 	def iter(self):
 85 | 		return self._iter
 86 | 
 87 | 	@property
 88 | 	def regularization(self):
 89 | 		return self._regularization
 90 | 
 91 | 	@property
 92 | 	def set_sess(self):
 93 | 		return self._set_sess
 94 | 	
 95 | 
 96 | 	def _set_sess(self,sess):
 97 | 		r"""
 98 | 		Sets the tf.Session()
 99 | 		"""
100 | 		self._sess = sess
101 | 		if 'H' in dir(self):
102 | 			self.H._sess = sess
103 | 
104 | 	def minimize(self):
105 | 		r"""
106 | 		Implements update rule for the algorithm.
107 | 		"""
108 | 		raise NotImplementedError("Child class should implement method minimize") 
109 | 	
110 | 	def initialize_trust_region(self):
111 | 		r"""
112 | 		Initializes trust region parameters
113 | 		"""
114 | 		raise NotImplementedError("Child class should implement method minimize") 
115 | 
116 | 
117 | 
118 | 	def _loss_at_candidate(self,p,feed_dict):
119 | 		"""
120 | 		This method implements a function to assist with Armijo line search
121 | 			-p: candidate update to be evaluated in Armijo line search producedure
122 | 			-feed_dict: data dictionary used to evaluate cost at candidate
123 | 		"""
124 | 		self.sess.run(self.problem._update_ops,feed_dict = {self.problem._update_placeholder:p})
125 | 		# self.sess.run(self.problem._update_w(p))
126 | 		misfit = self.sess.run((self.problem.loss),feed_dict)
127 | 		self.sess.run(self.problem._update_ops,feed_dict = {self.problem._update_placeholder:-p})
128 | 		# self.sess.run(self.problem._update_w(-p))
129 | 		return misfit
130 | 		
131 | 
132 | 
133 | 


--------------------------------------------------------------------------------
/hessianlearn/algorithms/gmresSolver.py:
--------------------------------------------------------------------------------
  1 | # This file is part of the hessianlearn package
  2 | #
  3 | # hessianlearn is free software: you can redistribute it and/or modify
  4 | # it under the terms of the GNU Lesser General Public License as published by
  5 | # the Free Software Foundation, either version 3 of the License, or any later version.
  6 | #
  7 | # hessianlearn is distributed in the hope that it will be useful,
  8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10 | # GNU Lesser General Public License for more details.
 11 | #
 12 | # You should have received a copy of the GNU Lesser General Public License
 13 | # If not, see <http://www.gnu.org/licenses/>.
 14 | #
 15 | # Author: Tom O'Leary-Roseberry
 16 | # Contact: tom.olearyroseberry@utexas.edu
 17 | 
 18 | from __future__ import absolute_import
 19 | from __future__ import division
 20 | from __future__ import print_function
 21 | import math
 22 | import numpy as np
 23 | import tensorflow as tf
 24 | if int(tf.__version__[0]) > 1:
 25 | 	import tensorflow.compat.v1 as tf
 26 | 	tf.disable_v2_behavior()
 27 | 
 28 | from ..utilities.parameterList import ParameterList
 29 | from ..algorithms import Optimizer
 30 | from .. problem import IdentityPreconditioner
 31 | from ..problem import L2Regularization
 32 | from abc import ABC, abstractmethod
 33 | 
 34 | class Identity(object):
 35 | 	def __init__(self):
 36 | 
 37 | 		pass
 38 | 	
 39 | 	def __call__(self, x):
 40 | 		return x
 41 | 
 42 | 
 43 | 
 44 | def ParametersGMRESSolver(dictionary = {}):
 45 | 	parameters = dictionary
 46 | 	parameters["rel_tolerance"] = [1e-9, "the relative tolerance for the stopping criterion"]
 47 | 	parameters["abs_tolerance"] = [1e-12, "the absolute tolerance for the stopping criterion"]
 48 | 	parameters["max_iter"]      = [20, "the maximum number of iterations"]
 49 | 	parameters["zero_initial_guess"] = [True, "if True we start with a 0\
 50 | 						 initial guess; if False we use the x as initial guess."]
 51 | 	parameters["print_level"] = [-1, "verbosity level: -1 --> no output on \
 52 | 			screen; 0 --> only final residual at convergence or reason for not not convergence"]
 53 | 	
 54 | 	parameters['coarse_tol'] = [0.5,'coarse tolerance used in calculation \
 55 | 									of relative tolerances for E-W conditions']
 56 | 	return ParameterList(parameters)
 57 | 
 58 | 
 59 | class GMRESSolver(ABC):
 60 | 	"""
 61 | 	This class implements a GMRES solver
 62 | 	"""
 63 | 	reason = ["Maximum Number of Iterations Reached",
 64 | 			  "Relative/Absolute residual less than tol",
 65 | 			  "Reached a negative direction",
 66 | 			  "Reached trust region boundary"
 67 | 			  ]
 68 | 	def __init__(self,problem,regularization,sess = None,preconditioner = None,\
 69 | 		x = None,parameters = ParametersGMRESSolver()):
 70 | 		self.sess = sess
 71 | 		self.problem = problem
 72 | 		self.regularization = regularization
 73 | 		if x is None:
 74 | 			# self.x = tf.Variable(self.problem.gradient.initialized_value())
 75 | 			self.x = self.problem.gradient
 76 | 		else:
 77 | 			self.x = x
 78 | 		self.parameters = parameters
 79 | 
 80 | 		
 81 | 		self.Aop = self.problem.Hdw + self.regularization.Hdw
 82 | 
 83 | 		# # Define preconditioner 
 84 | 		# if preconditioner is None:
 85 | 		# 	self.Minv = IdentityPreconditioner(problem,self.problem.dtype)
 86 | 		# else:
 87 | 		# 	self.Minv = preconditioner
 88 | 
 89 | 
 90 | 
 91 | 
 92 | 
 93 | 
 94 | 	def solve(self,b,feed_dict = None,x_0 = None):
 95 | 		r"""
 96 | 		Solve Ax=b by the mines method
 97 | 		as defined in Iterative Methods Ed. 2 by Youssef Saad p 140
 98 | 		"""
 99 | 		assert self.sess is not None
100 | 		assert feed_dict is not None
101 | 
102 | 		self.iter = 0
103 | 		self.converged = False
104 | 		self.reason_id = 0
105 | 		x = np.zeros_like(b)
106 | 
107 | 		feed_dict[self.problem.dw] = x
108 | 		Ax_0 = self.sess.run(self.Aop,feed_dict = feed_dict)
109 | 		# Calculate initial residual r = Ax_0 -b
110 | 		r = b - Ax_0
111 | 		# Calculate tolerance for Eisenstat Walker conditions
112 | 		rr_0 = np.dot(r,r)
113 | 		rtol2 = rr_0 * self.parameters["rel_tolerance"] * self.parameters["rel_tolerance"]
114 | 		atol2 = self.parameters["abs_tolerance"] * self.parameters["abs_tolerance"]
115 | 		tol = max(rtol2, atol2)
116 | 		import scipy
117 | 		from scipy.sparse.linalg import LinearOperator
118 | 
119 | 		def Ap(p):
120 | 			feed_dict[self.problem.dw] = p
121 | 			return self.sess.run(self.Aop,feed_dict = feed_dict)
122 | 
123 | 		n = self.problem.dimension
124 | 
125 | 		A = LinearOperator((n,n), matvec=Ap)
126 | 
127 | 		# self.iter += self.parameters["max_iter"]
128 | 
129 | 		def update_iters(rk):
130 | 			self.iter +=1
131 | 
132 | 		return scipy.sparse.linalg.gmres(A, b, tol=tol, maxiter=self.parameters["max_iter"],callback = update_iters)
133 | 
134 | 
135 | 
136 | 
137 | 
138 | 
139 | 
140 | 
141 | 
142 | 


--------------------------------------------------------------------------------
/applications/mnist/mnist_autoencoder.py:
--------------------------------------------------------------------------------
  1 | # This file is part of the hessianlearn package
  2 | #
  3 | # hessianlearn is free software: you can redistribute it and/or modify
  4 | # it under the terms of the GNU Lesser General Public License as published by
  5 | # the Free Software Foundation, either version 3 of the License, or any later version.
  6 | #
  7 | # hessianlearn is distributed in the hope that it will be useful,
  8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10 | # GNU Lesser General Public License for more details.
 11 | #
 12 | # You should have received a copy of the GNU Lesser General Public License
 13 | # If not, see <http://www.gnu.org/licenses/>.
 14 | #
 15 | # Author: Tom O'Leary-Roseberry
 16 | # Contact: tom.olearyroseberry@utexas.edu
 17 | 
 18 | ################################################################################
 19 | # Uses some code from https://blog.keras.io/building-autoencoders-in-keras.html
 20 | ################################################################################
 21 | 
 22 | import numpy as np
 23 | import os
 24 | import tensorflow as tf
 25 | import time
 26 | # if int(tf.__version__[0]) > 1:
 27 | # 	import tensorflow.compat.v1 as tf
 28 | # 	tf.disable_v2_behavior()
 29 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
 30 | os.environ['KMP_DUPLICATE_LIB_OK']='True'
 31 | os.environ["KMP_WARNINGS"] = "FALSE" 
 32 | import sys
 33 | sys.path.append( os.environ.get('HESSIANLEARN_PATH', "../../"))
 34 | from hessianlearn import *
 35 | 
 36 | tf.set_random_seed(0)
 37 | 
 38 | settings = {}
 39 | # Set run specifications
 40 | # Data specs
 41 | settings['batch_size'] = 100
 42 | settings['hess_batch_size'] = 10
 43 | 
 44 | 
 45 | ################################################################################
 46 | # Instantiate data
 47 | (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
 48 | 
 49 | 
 50 | # Normalize the data
 51 | x_train = x_train.astype('float32') / 255.
 52 | x_test = x_test.astype('float32') / 255.
 53 | # Reshape the data
 54 | x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:])))
 55 | x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:])))
 56 | 
 57 | # Instante the data object
 58 | data = Data([x_train,y_train],settings['batch_size'],test_data = [x_test,y_test],hessian_batch_size = settings['hess_batch_size'])
 59 | 
 60 | # settings['input_shape'] = data._input_shape
 61 | # settings['output_shape'] = data._output_shape
 62 | 
 63 | 
 64 | ################################################################################
 65 | # Create the neural network in keras
 66 | 
 67 | encoding_dim = 32  
 68 | input_img = tf.keras.layers.Input(shape=(784,))
 69 | encoded = tf.keras.layers.Dense(encoding_dim, activation='softplus')(input_img)
 70 | decoded = tf.keras.layers.Dense(784, activation='sigmoid')(encoded)
 71 | autoencoder = tf.keras.models.Model(input_img, decoded)
 72 | 
 73 | 
 74 | ################################################################################
 75 | # Instantiate the problem, regularization.
 76 | 
 77 | problem = AutoencoderProblem(autoencoder,dtype=tf.float32)
 78 | 
 79 | settings['tikhonov_gamma'] = 0.0
 80 | 
 81 | regularization = L2Regularization(problem,gamma = settings['tikhonov_gamma'])
 82 | 
 83 | 
 84 | ################################################################################
 85 | # Instantiate the model object
 86 | HLModelSettings = HessianlearnModelSettings()
 87 | 
 88 | HLModelSettings['optimizer'] = 'lrsfn'
 89 | HLModelSettings['alpha'] = 1e-2
 90 | HLModelSettings['globalization'] = 'line_search'
 91 | HLModelSettings['hessian_low_rank'] = 20
 92 | HLModelSettings['max_backtrack'] = 16
 93 | HLModelSettings['max_sweeps'] = 50
 94 | 
 95 | HLModelSettings['problem_name'] = 'mnist_ae'
 96 | HLModelSettings['record_spectrum'] = False
 97 | HLModelSettings['rq_data_size'] = 100
 98 | 
 99 | 
100 | HLModel = HessianlearnModel(problem,regularization,data,settings = HLModelSettings)
101 | 
102 | HLModel.fit()
103 | 
104 | ################################################################################
105 | # Postprocessing with the trained autoencoder
106 | 
107 | encoder = tf.keras.models.Model(input_img, encoded)
108 | 
109 | encoded_input = tf.keras.layers.Input(shape=(encoding_dim,))
110 | 
111 | decoder_layer = autoencoder.layers[-1]
112 | 
113 | decoder = tf.keras.models.Model(encoded_input, decoder_layer(encoded_input))
114 | 
115 | encoded_imgs = encoder.predict(x_test)
116 | decoded_imgs = decoder.predict(encoded_imgs)
117 | 
118 | try:
119 | 	import matplotlib.pyplot as plt
120 | 
121 | 	n = 10  # how many digits we will display
122 | 	plt.figure(figsize=(20, 4))
123 | 	for i in range(n):
124 | 	    # display original
125 | 	    ax = plt.subplot(2, n, i + 1)
126 | 	    plt.imshow(x_test[i].reshape(28, 28))
127 | 	    plt.gray()
128 | 	    ax.get_xaxis().set_visible(False)
129 | 	    ax.get_yaxis().set_visible(False)
130 | 
131 | 	    # display reconstruction
132 | 	    ax = plt.subplot(2, n, i + 1 + n)
133 | 	    plt.imshow(decoded_imgs[i].reshape(28, 28))
134 | 	    plt.gray()
135 | 	    ax.get_xaxis().set_visible(False)
136 | 	    ax.get_yaxis().set_visible(False)
137 | 	plt.show()
138 | except:
139 | 	pass
140 | 
141 | 


--------------------------------------------------------------------------------
/hessianlearn/test/test_varianceBasedNystrom.py:
--------------------------------------------------------------------------------
  1 | # This file is part of the hessianlearn package
  2 | #
  3 | # hessianlearn is free software: you can redistribute it and/or modify
  4 | # it under the terms of the GNU Lesser General Public License as published by
  5 | # the Free Software Foundation, either version 3 of the License, or any later version.
  6 | #
  7 | # hessianlearn is distributed in the hope that it will be useful,
  8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10 | # GNU Lesser General Public License for more details.
 11 | #
 12 | # You should have received a copy of the GNU Lesser General Public License
 13 | # If not, see <http://www.gnu.org/licenses/>.
 14 | #
 15 | # Authors: Nick Alger, Tom O'Leary-Roseberry
 16 | # Contact: tom.olearyroseberry@utexas.edu
 17 | from __future__ import absolute_import, division, print_function
 18 | 
 19 | import unittest 
 20 | import numpy as np
 21 | import sys
 22 | 
 23 | sys.path.append('../../')
 24 | from hessianlearn import (variance_based_nystrom)
 25 | sys.path.append('../algorithms')
 26 | from varianceBasedNystrom import *
 27 | 
 28 | def make_random_symmetric_matrix(n,p):
 29 | 	U, _ = np.linalg.qr(np.random.randn(n,n))
 30 | 	ss = np.random.randn(n)**p
 31 | 	A = np.dot(U, np.dot(np.diag(ss), U.T))
 32 | 	return A
 33 | 
 34 | 
 35 | def compute_Theta_slow(Q, apply_AA):
 36 |     r = Q.shape[1]
 37 |     m = len(apply_AA)
 38 |     Theta_true = np.zeros((r, r, m))
 39 |     for i in range(r):
 40 |         for j in range(r):
 41 |             for k in range(m):
 42 |                 Theta_true[i,j,k] = np.dot(Q[:,i], apply_AA[k](Q[:,j]))
 43 |     return Theta_true
 44 | 
 45 | def compute_rayleigh_statistics_slow(U, apply_AA):
 46 |     m = len(apply_AA)
 47 |     r = U.shape[1]
 48 |     C = np.zeros((r, m))
 49 |     for k in range(m):
 50 |         for i in range(r):
 51 |             C[i,k] = np.dot(U[:,i], apply_AA[k](U[:,i]))
 52 | 
 53 |     all_mu = np.mean(C, axis=1)
 54 |     all_std = np.std(C, axis=1)
 55 |     return all_mu, all_std
 56 | 
 57 | 
 58 | class TestVarianceBasedNystrom(unittest.TestCase):
 59 | 
 60 | 	def setUp(self):
 61 | 		self.n = 500
 62 | 		m = 50
 63 | 		p = 7
 64 | 		self.batch_r = 10
 65 | 		randomness_factor = 0.1
 66 | 
 67 | 		A0 = make_random_symmetric_matrix(self.n,p)
 68 | 		AA = [A0 + randomness_factor * make_random_symmetric_matrix(self.n,p) for _ in range(m)]
 69 | 
 70 | 		self.apply_AA = [lambda x, Ak=Ak: np.dot(Ak,x) for Ak in AA]
 71 | 
 72 | 		self.A = np.sum(AA, axis=0)/m
 73 | 
 74 | 		
 75 | 
 76 | 
 77 | 
 78 | 	def test_all(self):
 79 | 		Y = get_random_range_vectors(self.apply_AA, self.n, self.batch_r)
 80 | 		Q,_ = np.linalg.qr(Y)
 81 | 		Theta = compute_Theta(Q, self.apply_AA)
 82 | 		Theta_true = compute_Theta_slow(Q, self.apply_AA)
 83 | 		err_Theta = np.linalg.norm(Theta - Theta_true)/np.linalg.norm(Theta_true)
 84 | 		print('err_Theta=', err_Theta)
 85 | 		assert err_Theta < 1e-10
 86 | 
 87 | 		dd, U, V = finish_computing_eigenvalue_decomposition(Q, Theta)
 88 | 
 89 | 		A_approx = np.dot(U, np.dot(np.diag(dd), U.T))
 90 | 		err_A_1 = np.linalg.norm(self.A - A_approx)/np.linalg.norm(self.A)
 91 | 		print('err_A_1=', err_A_1)
 92 | 		assert err_A_1 < 1.0
 93 | 
 94 | 		# Errors in computing statistics
 95 | 		all_mu, all_std = compute_rayleigh_statistics(Theta, V)
 96 | 
 97 | 		all_mu_true, all_std_true = compute_rayleigh_statistics_slow(U,self.apply_AA)
 98 | 
 99 | 		err_mu = np.linalg.norm(all_mu - all_mu_true)/np.linalg.norm(all_mu_true)
100 | 		err_std = np.linalg.norm(all_std - all_std_true)/np.linalg.norm(all_std_true)
101 | 
102 | 		print('err_mu=', err_mu)
103 | 		print('err_std=', err_std)
104 | 		assert err_mu < 1e-10
105 | 		assert err_std < 1e-10
106 | 
107 | 		# Redo computations with better range approximation
108 | 		Y2 = get_random_range_vectors(self.apply_AA, self.n, self.batch_r)
109 | 		Y2_perp = Y2 - np.dot(Q,np.dot(Q.T, Y2))
110 | 		Q2,_ = np.linalg.qr(Y2_perp)
111 | 		Q_new = np.hstack([Q, Q2])
112 | 		err_Q_orth = np.linalg.norm(np.dot(Q_new.T, Q_new) - np.eye(Q_new.shape[1]))
113 | 		print('err_Q_orth=', err_Q_orth)
114 | 		assert err_Q_orth < 1e-10
115 | 		
116 | 		Theta_new = update_Theta(Q, Q2, Theta, self.apply_AA)
117 | 
118 | 		Theta_true_new = compute_Theta_slow(Q_new, self.apply_AA)
119 | 
120 | 		err_Theta_new = np.linalg.norm(Theta_new - Theta_true_new)/np.linalg.norm(Theta_true_new)
121 | 		print('err_Theta_new=', err_Theta_new)
122 | 
123 | 		assert err_Theta_new < 1e-10
124 | 
125 | 		dd_new, U_new, V_new = finish_computing_eigenvalue_decomposition(Q_new, Theta_new)
126 | 		A_approx_new = np.dot(U_new, np.dot(np.diag(dd_new), U_new.T))
127 | 		err_A_new = np.linalg.norm(self.A - A_approx_new)/np.linalg.norm(self.A)
128 | 		print('err_A_new=', err_A_new)
129 | 
130 | 		# The approximation error should decrease monotonically as we increase the range
131 | 		assert err_A_new < err_A_1
132 | 
133 | 		# Run the complete method from scratch
134 | 
135 | 		[dd_good, U_good, all_std_good], [dd_all,U_all,all_std] = variance_based_nystrom(self.apply_AA, self.n)
136 | 
137 | 		A_good_approx = np.dot(U_good, np.dot(np.diag(dd_good), U_good.T))
138 | 		err_A_good = np.linalg.norm(A_good_approx - self.A)/np.linalg.norm(self.A)
139 | 		print('err_A_good=', err_A_good)
140 | 		assert err_A_good < 0.1
141 | 
142 | 
143 | 
144 | if __name__ == '__main__':
145 |     unittest.main()


--------------------------------------------------------------------------------
/hessianlearn/algorithms/inexactNewtonMINRES.py:
--------------------------------------------------------------------------------
  1 | # This file is part of the hessianlearn package
  2 | #
  3 | # hessianlearn is free software: you can redistribute it and/or modify
  4 | # it under the terms of the GNU Lesser General Public License as published by
  5 | # the Free Software Foundation, either version 3 of the License, or any later version.
  6 | #
  7 | # hessianlearn is distributed in the hope that it will be useful,
  8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10 | # GNU Lesser General Public License for more details.
 11 | #
 12 | # You should have received a copy of the GNU Lesser General Public License
 13 | # If not, see <http://www.gnu.org/licenses/>.
 14 | #
 15 | # Author: Tom O'Leary-Roseberry
 16 | # Contact: tom.olearyroseberry@utexas.edu
 17 | 
 18 | from __future__ import absolute_import
 19 | from __future__ import division
 20 | from __future__ import print_function
 21 | 
 22 | import numpy as np
 23 | 
 24 | from ..utilities.parameterList import ParameterList
 25 | from ..algorithms import Optimizer, MINRESSolver, ParametersMINRESSolver
 26 | from ..algorithms.globalization import ArmijoLineSearch, TrustRegion
 27 | from ..problem import L2Regularization
 28 | 
 29 | 
 30 | 
 31 | 
 32 | def ParametersInexactNewtonMINRES(parameters = {}):
 33 | 	parameters['alpha']                         = [1e-1, "Initial steplength, or learning rate"]
 34 | 	parameters['rel_tolerance']                 = [1e-3, "Relative convergence when sqrt(g,g)/sqrt(g_0,g_0) <= rel_tolerance"]
 35 | 	parameters['abs_tolerance']                 = [1e-4,"Absolute converge when sqrt(g,g) <= abs_tolerance"]
 36 | 	parameters['max_NN_evals_per_batch']        = [20, "Scale constant for maximum neural network evaluations per datum"]
 37 | 	parameters['max_NN_evals']                  = [None, "Maximum number of neural network evaluations"]
 38 | 
 39 | 	parameters['minres_parameters']					= [ ParametersMINRESSolver(),'CG Parameters']
 40 | 	# CG solver parameters
 41 | 	parameters['cg_coarse_tol']					= [0.5,'CG coarse solve tolerance']
 42 | 	parameters['cg_max_iter']					= [1000,'CG maximum iterations']
 43 | 	parameters['eta_mode']						= [0, 'eta mode for E-W conditions:0,1,2']
 44 | 	parameters['globalization']					= [None, 'Choose from trust_region, line_search or none']
 45 | 	parameters['max_backtracking_iter']			= [10, 'max backtracking iterations for line search']
 46 | 
 47 | 
 48 | 	# Reasons for convergence failure
 49 | 	parameters['reasons'] = [[], 'list of reasons for termination']
 50 | 
 51 | 
 52 | 	return ParameterList(parameters)
 53 | 
 54 | 
 55 | class InexactNewtonMINRES(Optimizer):
 56 | 	"""
 57 | 	This class implements the Inexact Newton MINRES optimizer
 58 | 	"""
 59 | 
 60 | 	def __init__(self,problem,regularization = None,sess = None,parameters = ParametersInexactNewtonMINRES(),preconditioner = None):
 61 | 		"""
 62 | 		The constructor for this class takes:
 63 | 			-problem: hessianlearn.problem.Problem
 64 | 			-regularization: hessianlearn.problem.Regularization
 65 | 			-sess: tf.Session()
 66 | 			-parameters: hyperparameters dictionary
 67 | 			-preconditioner: hessianlearn.problem.Preconditioner
 68 | 		"""
 69 | 		if regularization is None:
 70 | 			_regularization = L2Regularization(problem,gamma = 0.0)
 71 | 		else:
 72 | 			_regularization = regularization
 73 | 		super(InexactNewtonMINRES,self).__init__(problem,_regularization,sess,parameters)
 74 | 
 75 | 		self._sweeps = np.zeros(2)
 76 | 		self.grad = self.problem.gradient + self.regularization.gradient
 77 | 		self.minres_solver = MINRESSolver(self.problem,self.regularization,\
 78 | 			self.sess,parameters= self.parameters['minres_parameters'])
 79 | 		self.alpha = 0.0
 80 | 		
 81 | 
 82 | 	def minimize(self,feed_dict = None,hessian_feed_dict = None):
 83 | 		r"""
 84 | 		Updates using inexact Newton MINRES
 85 | 		"""
 86 | 		assert self.sess is not None
 87 | 		assert feed_dict is not None
 88 | 		if hessian_feed_dict is None:
 89 | 			hessian_feed_dict = feed_dict
 90 | 		
 91 | 		self.gradient = self.sess.run(self.grad,feed_dict = feed_dict)
 92 | 
 93 | 		if self.parameters['globalization'] == 'line_search':
 94 | 			w_dir,_ = self.minres_solver.solve(-self.gradient,hessian_feed_dict)
 95 | 			w_dir_inner_g = np.inner(w_dir,self.gradient)
 96 | 			initial_cost = self.sess.run(self.problem.loss,feed_dict = feed_dict)
 97 | 			cost_at_candidate = lambda p : self._loss_at_candidate(p,feed_dict = feed_dict)
 98 | 			self.alpha, line_search, line_search_iter = ArmijoLineSearch(w_dir,w_dir_inner_g,\
 99 | 																cost_at_candidate, initial_cost,\
100 | 											max_backtracking_iter = self.parameters['max_backtracking_iter'])
101 | 			update = self.alpha*w_dir
102 | 			self._sweeps += [1+0.5*line_search_iter,2*self.minres_solver.iter]
103 | 			self.sess.run(self.problem._update_ops,feed_dict = {self.problem._update_placeholder:update})
104 | 		elif self.parameters['globalization'] == None:
105 | 			self.alpha = self.parameters['alpha']
106 | 			p,converged = self.minres_solver.solve(-self.gradient,hessian_feed_dict)
107 | 			# print(converged)
108 | 			# if converged:
109 | 			# 	print('Converged!')
110 | 			# else:
111 | 			# 	print('NOT CONVERGED!!!!!')
112 | 			self._sweeps += [1, 4*self.minres_solver.iter]
113 | 			self.p = p
114 | 			update = self.alpha*p
115 | 			self.sess.run(self.problem._update_ops,feed_dict = {self.problem._update_placeholder:update})
116 | 
117 | 				
118 | 
119 | 
120 | 		
121 | 
122 | 		


--------------------------------------------------------------------------------
/hessianlearn/algorithms/inexactNewtonGMRES.py:
--------------------------------------------------------------------------------
  1 | # This file is part of the hessianlearn package
  2 | #
  3 | # hessianlearn is free software: you can redistribute it and/or modify
  4 | # it under the terms of the GNU Lesser General Public License as published by
  5 | # the Free Software Foundation, either version 3 of the License, or any later version.
  6 | #
  7 | # hessianlearn is distributed in the hope that it will be useful,
  8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10 | # GNU Lesser General Public License for more details.
 11 | #
 12 | # You should have received a copy of the GNU Lesser General Public License
 13 | # If not, see <http://www.gnu.org/licenses/>.
 14 | #
 15 | # Author: Tom O'Leary-Roseberry
 16 | # Contact: tom.olearyroseberry@utexas.edu
 17 | 
 18 | from __future__ import absolute_import
 19 | from __future__ import division
 20 | from __future__ import print_function
 21 | 
 22 | import numpy as np
 23 | 
 24 | from ..utilities.parameterList import ParameterList
 25 | from ..algorithms import Optimizer, GMRESSolver, ParametersGMRESSolver
 26 | from ..algorithms.globalization import ArmijoLineSearch, TrustRegion
 27 | from ..problem import L2Regularization
 28 | 
 29 | 
 30 | 
 31 | 
 32 | def ParametersInexactNewtonGMRES(parameters = {}):
 33 | 	parameters['alpha']                         = [1e-1, "Initial steplength, or learning rate"]
 34 | 	parameters['rel_tolerance']                 = [1e-3, "Relative convergence when sqrt(g,g)/sqrt(g_0,g_0) <= rel_tolerance"]
 35 | 	parameters['abs_tolerance']                 = [1e-4,"Absolute converge when sqrt(g,g) <= abs_tolerance"]
 36 | 	parameters['max_NN_evals_per_batch']        = [20, "Scale constant for maximum neural network evaluations per datum"]
 37 | 	parameters['max_NN_evals']                  = [None, "Maximum number of neural network evaluations"]
 38 | 
 39 | 	parameters['gmres_parameters']					= [ ParametersGMRESSolver(),'CG Parameters']
 40 | 	# CG solver parameters
 41 | 	parameters['cg_coarse_tol']					= [0.5,'CG coarse solve tolerance']
 42 | 	parameters['cg_max_iter']					= [1000,'CG maximum iterations']
 43 | 	parameters['eta_mode']						= [0, 'eta mode for E-W conditions:0,1,2']
 44 | 	parameters['globalization']					= [None, 'Choose from trust_region, line_search or none']
 45 | 	parameters['max_backtracking_iter']			= [10, 'max backtracking iterations for line search']
 46 | 
 47 | 
 48 | 	# Reasons for convergence failure
 49 | 	parameters['reasons'] = [[], 'list of reasons for termination']
 50 | 
 51 | 
 52 | 	return ParameterList(parameters)
 53 | 
 54 | 
 55 | class InexactNewtonGMRES(Optimizer):
 56 | 	"""
 57 | 	This class implements the inexact Newton GMRES optimizer
 58 | 	"""
 59 | 	def __init__(self,problem,regularization = None,sess = None,feed_dict = None,parameters = ParametersInexactNewtonGMRES(),preconditioner = None):
 60 | 		"""
 61 | 		The constructor for this class takes:
 62 | 			-problem: hessianlearn.problem.Problem
 63 | 			-regularization: hessianlearn.problem.Regularization
 64 | 			-sess: tf.Session()
 65 | 			-parameters: hyperparameters dictionary
 66 | 			-preconditioner: hessianlearn.problem.Preconditioner
 67 | 		"""
 68 | 		if regularization is None:
 69 | 			_regularization = L2Regularization(problem,gamma = 0.0)
 70 | 		else:
 71 | 			_regularization = regularization
 72 | 		super(InexactNewtonGMRES,self).__init__(problem,_regularization,sess,parameters)
 73 | 
 74 | 		self._sweeps = np.zeros(2)
 75 | 		self.grad = self.problem.gradient + self.regularization.gradient
 76 | 		self.gmres_solver = GMRESSolver(self.problem,self.regularization,\
 77 | 			self.sess,parameters= self.parameters['gmres_parameters'])
 78 | 		self.alpha = 0.0
 79 | 		
 80 | 
 81 | 	def minimize(self,feed_dict = None,hessian_feed_dict = None):
 82 | 		r"""
 83 | 		Updates using inexact Newton GMRES
 84 | 		"""
 85 | 		assert self.sess is not None
 86 | 		assert feed_dict is not None
 87 | 		if hessian_feed_dict is None:
 88 | 			hessian_feed_dict = feed_dict
 89 | 		
 90 | 		self.gradient = self.sess.run(self.grad,feed_dict = feed_dict)
 91 | 
 92 | 		if self.parameters['globalization'] == 'line_search':
 93 | 			w_dir,on_boundary = self.gmres_solver.solve(-self.gradient,hessian_feed_dict)
 94 | 			w_dir_inner_g = np.inner(w_dir,self.gradient)
 95 | 			initial_cost = self.sess.run(self.problem.loss,feed_dict = feed_dict)
 96 | 			cost_at_candidate = lambda p : self._loss_at_candidate(p,feed_dict = feed_dict)
 97 | 			self.alpha, line_search, line_search_iter = ArmijoLineSearch(w_dir,w_dir_inner_g,\
 98 | 																cost_at_candidate, initial_cost,\
 99 | 											max_backtracking_iter = self.parameters['max_backtracking_iter'])
100 | 			update = self.alpha*w_dir
101 | 			self._sweeps += [1+0.5*line_search_iter,2*self.gmres_solver.iter]
102 | 			self.sess.run(self.problem._update_ops,feed_dict = {self.problem._update_placeholder:update})
103 | 		elif self.parameters['globalization'] == None:
104 | 			self.alpha = self.parameters['alpha']
105 | 			p,converged = self.gmres_solver.solve(-self.gradient,hessian_feed_dict)
106 | 			# print(converged)
107 | 			# if converged:
108 | 			# 	print('Converged!')
109 | 			# else:
110 | 			# 	print('NOT CONVERGED!!!!!')
111 | 			self._sweeps += [1, 2*self.gmres_solver.iter]
112 | 			self.p = p
113 | 			update = self.alpha*p
114 | 			self.sess.run(self.problem._update_ops,feed_dict = {self.problem._update_placeholder:update})
115 | 
116 | 				
117 | 
118 | 
119 | 		
120 | 		
121 | 
122 | 		


--------------------------------------------------------------------------------
/hessianlearn/algorithms/minresSolver.py:
--------------------------------------------------------------------------------
  1 | # This file is part of the hessianlearn package
  2 | #
  3 | # hessianlearn is free software: you can redistribute it and/or modify
  4 | # it under the terms of the GNU Lesser General Public License as published by
  5 | # the Free Software Foundation, either version 3 of the License, or any later version.
  6 | #
  7 | # hessianlearn is distributed in the hope that it will be useful,
  8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10 | # GNU Lesser General Public License for more details.
 11 | #
 12 | # You should have received a copy of the GNU Lesser General Public License
 13 | # If not, see <http://www.gnu.org/licenses/>.
 14 | #
 15 | # Author: Tom O'Leary-Roseberry
 16 | # Contact: tom.olearyroseberry@utexas.edu
 17 | 
 18 | from __future__ import absolute_import
 19 | from __future__ import division
 20 | from __future__ import print_function
 21 | import math
 22 | import numpy as np
 23 | import tensorflow as tf
 24 | if int(tf.__version__[0]) > 1:
 25 | 	import tensorflow.compat.v1 as tf
 26 | 	tf.disable_v2_behavior()
 27 | 
 28 | from ..utilities.parameterList import ParameterList
 29 | from ..algorithms import Optimizer
 30 | from .. problem import IdentityPreconditioner
 31 | from ..problem import L2Regularization
 32 | from abc import ABC, abstractmethod
 33 | 
 34 | class Identity(object):
 35 | 	def __init__(self):
 36 | 
 37 | 		pass
 38 | 	
 39 | 	def __call__(self, x):
 40 | 		return x
 41 | 
 42 | 
 43 | 
 44 | def ParametersMINRESSolver(dictionary = {}):
 45 | 	parameters = dictionary
 46 | 	parameters["rel_tolerance"] = [1e-9, "the relative tolerance for the stopping criterion"]
 47 | 	parameters["abs_tolerance"] = [1e-12, "the absolute tolerance for the stopping criterion"]
 48 | 	parameters["max_iter"]      = [20, "the maximum number of iterations"]
 49 | 	parameters["zero_initial_guess"] = [True, "if True we start with a 0\
 50 | 						 initial guess; if False we use the x as initial guess."]
 51 | 	parameters["print_level"] = [-1, "verbosity level: -1 --> no output on \
 52 | 			screen; 0 --> only final residual at convergence or reason for not not convergence"]
 53 | 	
 54 | 	parameters['coarse_tol'] = [0.5,'coarse tolerance used in calculation \
 55 | 									of relative tolerances for E-W conditions']
 56 | 	return ParameterList(parameters)
 57 | 
 58 | 
 59 | class MINRESSolver(ABC):
 60 | 	"""
 61 | 	This class implements a basic MINRES Solver
 62 | 	"""
 63 | 
 64 | 	reason = ["Maximum Number of Iterations Reached",
 65 | 			  "Relative/Absolute residual less than tol",
 66 | 			  "Reached a negative direction",
 67 | 			  "Reached trust region boundary"
 68 | 			  ]
 69 | 	def __init__(self,problem,regularization,sess = None,preconditioner = None,\
 70 | 		x = None,parameters = ParametersMINRESSolver()):
 71 | 		"""
 72 | 		The constructor for this class takes:
 73 | 			-problem: hessianlearn.problem.Problem
 74 | 			-regularization: hessianlearn.problem.Regularization
 75 | 			-sess: tf.Session()
 76 | 			-preconditioner: hessianlearn.problem.Preconditioner
 77 | 		"""
 78 | 		self.sess = sess
 79 | 		self.problem = problem
 80 | 		self.regularization = regularization
 81 | 		if x is None:
 82 | 			# self.x = tf.Variable(self.problem.gradient.initialized_value())
 83 | 			self.x = self.problem.gradient
 84 | 		else:
 85 | 			self.x = x
 86 | 		self.parameters = parameters
 87 | 
 88 | 		
 89 | 		self.Aop = self.problem.Hdw + self.regularization.Hdw
 90 | 
 91 | 		# # Define preconditioner 
 92 | 		# if preconditioner is None:
 93 | 		# 	self.Minv = IdentityPreconditioner(problem,self.problem.dtype)
 94 | 		# else:
 95 | 		# 	self.Minv = preconditioner
 96 | 
 97 | 
 98 | 
 99 | 
100 | 
101 | 	def solve(self,b,feed_dict = None,x_0 = None):
102 | 		r"""
103 | 		Solve Ax=b by the mines method
104 | 		as defined in Iterative Methods Ed. 2 by Youssef Saad p 140
105 | 		"""
106 | 		assert self.sess is not None
107 | 		assert feed_dict is not None
108 | 
109 | 		self.iter = 0
110 | 		self.converged = False
111 | 		self.reason_id = 0
112 | 		x = np.zeros_like(b)
113 | 
114 | 		feed_dict[self.problem.dw] = x
115 | 		Ax_0 = self.sess.run(self.Aop,feed_dict = feed_dict)
116 | 		# Calculate initial residual r = Ax_0 -b
117 | 		r = b - Ax_0
118 | 		# Calculate tolerance for Eisenstat Walker conditions
119 | 		rr = np.dot(r,r)
120 | 		rtol2 = rr * self.parameters["rel_tolerance"] * self.parameters["rel_tolerance"]
121 | 		atol2 = self.parameters["abs_tolerance"] * self.parameters["abs_tolerance"]
122 | 		tol = max(rtol2, atol2)
123 | 		import scipy
124 | 		from scipy.sparse.linalg import LinearOperator
125 | 
126 | 		def Ap(p):
127 | 			feed_dict[self.problem.dw] = p
128 | 			return self.sess.run(self.Aop,feed_dict = feed_dict)
129 | 
130 | 		n = self.problem.dimension
131 | 
132 | 		A = LinearOperator((n,n), matvec=Ap)
133 | 
134 | 		x = np.zeros_like(b)
135 | 		p = A(r)
136 | 
137 | 		converged = False
138 | 		while not converged :
139 | 			self.iter +=1
140 | 			alpha = np.dot(p,r)/rr
141 | 			x_old = x 
142 | 			x += alpha*r
143 | 			r -= alpha*p
144 | 
145 | 			p = A(r)
146 | 			# This is the extra query of the network to see if the direction
147 | 			# is about to rescale gradient components in indefinite directions
148 | 			# towards saddle points in which case one can break before 
149 | 			# updating
150 | 			pAp = np.dot(p,A(p))
151 | 			if pAp < 0:
152 | 				return x_old, converged
153 | 
154 | 			converged = (np.dot(r,r) < tol)
155 | 			if self.iter < self.parameters["max_iter"]:
156 | 				return x, converged
157 | 
158 | 		return x, converged
159 | 
160 | 
161 | 
162 | 
163 | 


--------------------------------------------------------------------------------
/hessianlearn/problem/hessian.py:
--------------------------------------------------------------------------------
  1 | # This file is part of the hessianlearn package
  2 | #
  3 | # hessianlearn is free software: you can redistribute it and/or modify
  4 | # it under the terms of the GNU Lesser General Public License as published by
  5 | # the Free Software Foundation, either version 3 of the License, or any later version.
  6 | #
  7 | # hessianlearn is distributed in the hope that it will be useful,
  8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10 | # GNU Lesser General Public License for more details.
 11 | #
 12 | # You should have received a copy of the GNU Lesser General Public License
 13 | # If not, see <http://www.gnu.org/licenses/>.
 14 | #
 15 | # Author: Tom O'Leary-Roseberry
 16 | # Contact: tom.olearyroseberry@utexas.edu
 17 | 
 18 | 
 19 | 
 20 | from __future__ import absolute_import, division, print_function
 21 | import numpy as np
 22 | # import tensorflow as tf
 23 | # if int(tf.__version__[0]) > 1:
 24 | # 	import tensorflow.compat.v1 as tf
 25 | # 	tf.disable_v2_behavior()
 26 | from abc import ABC, abstractmethod
 27 | 
 28 | 
 29 | class Hessian(ABC):
 30 | 	"""
 31 | 	This class implements methods for the neural network training Hessian.
 32 | 
 33 | 	Must have a problem and a sess in order to be evaluated
 34 | 	"""
 35 | 	def __init__(self,problem=None,sess=None):
 36 | 		"""
 37 | 		Create a Hessian given:
 38 | 
 39 | 			- problem: the description of the neural network training problem 
 40 | 				(hessianlearn.problem.Problem)
 41 | 			- sess: the tf.Session() needed for evaluation at run time
 42 | 		"""
 43 | 		self._problem = problem
 44 | 		self._sess = sess
 45 | 
 46 | 	@property
 47 | 	def problem(self):
 48 | 		return self._problem
 49 | 	@property
 50 | 	def sess(self):
 51 | 		return self._sess
 52 | 
 53 | 	@property
 54 | 	def dimension(self):
 55 | 		return self.problem.dimension
 56 | 	
 57 | 
 58 | 	@property
 59 | 	def T(self):
 60 | 		return self._T
 61 | 
 62 | 	def _T(self):
 63 | 		return self
 64 | 	
 65 | 	def __mult__(self,x):
 66 | 		return self(x)
 67 | 
 68 | 	def __call__(self,x,feed_dict,verbose = False):
 69 | 		"""
 70 | 		This method implements Hessian action, must have a problem and sess
 71 | 		set before this method can be evaluated.
 72 | 			-x: numpy array to be multiplied one at a time
 73 | 			-feed_dict: data used in finite sum Hessian evaluation
 74 | 			-verbose: for printing
 75 | 		"""
 76 | 		assert self.problem is not None
 77 | 		assert self.sess is not None
 78 | 
 79 | 		if len(x.shape) == 1:
 80 | 			feed_dict[self.problem.dw] = x
 81 | 			return self.sess.run(self.problem.Hdw,feed_dict)
 82 | 		elif len(x.shape) == 2:
 83 | 			n_vectors = x.shape[-1]
 84 | 			if self.problem._HdW is None:
 85 | 				if verbose:
 86 | 					print('Total vectors = ',n_vectors)
 87 | 					print('Initializing Hessian blocking')
 88 | 				self.problem._initialize_hessian_blocking(n_vectors)
 89 | 			# When the block sizes agree
 90 | 			if n_vectors == self.problem._hessian_block_size:
 91 | 				feed_dict[self.problem._dW] = x
 92 | 				HdW = self.sess.run(self.problem.HdW,feed_dict)
 93 | 				return HdW
 94 | 			# When the requested block size is smaller
 95 | 			elif n_vectors < self.problem._hessian_block_size:
 96 | 				# The speedup is roughly 5x, so in the case that its less 
 97 | 				# than 1/5 its faster to either reinitialize the blocking
 98 | 				# or for loop around running problem.Hdw
 99 | 				if n_vectors < 0.2*self.problem._hessian_block_size:
100 | 					# Could reinitialize the blocking or just for loop
101 | 					# For looping for now
102 | 					HdW = np.zeros_like(x)
103 | 					for i in range(n_vectors):
104 | 						feed_dict[self.problem.dw] = x[:,i]
105 | 						HdW[:,i] = self.sess.run(self.problem.Hdw,feed_dict)
106 | 					return HdW
107 | 				else:
108 | 					dW = np.zeros(self.problem.dimension,self.problem._hessian_block_size)
109 | 					dW[:,:n_vectors] = x
110 | 					feed_dict[self.problem._dW] = dW
111 | 					HdW =  self.sess.run(self.problem.HdW,feed_dict)
112 | 					return HdW[:,:n_vectors]
113 | 			# When the requested block size is larger
114 | 			elif n_vectors > self.problem._hessian_block_size:
115 | 				HdW = np.zeros_like(x)
116 | 				block_size = self.problem._hessian_block_size
117 | 				blocks, remainder = np.divmod(HdW.shape[-1],block_size)
118 | 				for i in range(blocks):
119 | 					feed_dict[self.problem._dW] = x[:,i*block_size:(i+1)*block_size]
120 | 					HdW[:,i*block_size:(i+1)*block_size] = self.sess.run(self.problem.HdW,feed_dict)
121 | 				# The last vectors are done as a for loop or a zeroed out array
122 | 				if remainder == 0:
123 | 					pass
124 | 				elif remainder > 0 and remainder < 0.2*self.problem._hessian_block_size:
125 | 					for i in range(n_vectors):
126 | 						feed_dict[self.problem.dw] = x[:,blocks*block_size+i]
127 | 						HdW[:,blocks*block_size+i] = self.sess.run(self.problem.Hdw,feed_dict)
128 | 				else:
129 | 					dW = np.zeros((self.problem.dimension,self.problem._hessian_block_size))
130 | 					dW[:,:remainder] = x[:,-remainder:]
131 | 					feed_dict[self.problem._dW] = dW
132 | 					HdW[:,-remainder:] = self.sess.run(self.problem.Hdw,feed_dict)
133 | 				return HdW
134 | 		else:
135 | 			# Many different Hessian mat-vecs interpreted as a tensor?
136 | 			print('This case is not yet implemented'.center(80))
137 | 			raise
138 | 
139 | 	def quadratics(self,x,feed_dict,verbose = False):
140 | 		"""
141 | 		This method implements Hessian quadratics xTHx. 
142 | 		Must have self._problem and self._sess set before this method can be evaluated.
143 | 			-x: numpy array to be multiplied one at a time
144 | 			-feed_dict: data used in finite sum Hessian evaluation
145 | 			-verbose: for printing
146 | 		"""
147 | 		assert self.problem is not None
148 | 		assert self.sess is not None
149 | 		if len(x.shape) == 1:
150 | 			feed_dict[self.problem.dw] = x
151 | 			return self.sess.run(self.problem.H_quadratic,feed_dict)
152 | 		elif len(x.shape) == 2:
153 | 			number_of_quadratics = x.shape[1]
154 | 			H_quads = np.zeros(number_of_quadratics)
155 | 			if verbose:
156 | 				try:
157 | 					from tqdm import tqdm
158 | 					for i in tqdm(range(number_of_quadratics)):
159 | 						feed_dict[self.problem.dw] = x[:,i]
160 | 						H_quads[i] = self.sess.run(self.problem.H_quadratic,feed_dict)
161 | 				except:
162 | 					print('No progress bar :(')
163 | 					for i in range(number_of_quadratics):
164 | 						feed_dict[self.problem.dw] = x[:,i]
165 | 						H_quads[i] = self.sess.run(self.problem.H_quadratic,feed_dict)
166 | 			else:
167 | 				for i in range(number_of_quadratics):
168 | 					feed_dict[self.problem.dw] = x[:,i]
169 | 					H_quads[i] = self.sess.run(self.problem.H_quadratic,feed_dict)
170 | 			return H_quads
171 | 		else:
172 | 			raise
173 | 
174 | 
175 | class HessianWrapper:
176 | 	
177 | 	def __init__(self,hessian,data_dictionary):
178 | 		
179 | 		self._hessian = hessian
180 | 		self._data_dictionary = data_dictionary
181 | 		
182 | 		
183 | 	def __call__(self,x):
184 | 		return self._hessian(x,self._data_dictionary)
185 | 


--------------------------------------------------------------------------------
/hessianlearn/algorithms/rangeFinders.py:
--------------------------------------------------------------------------------
  1 | # This file is part of the hessianlearn package
  2 | #
  3 | # hessianlearn is free software: you can redistribute it and/or modify
  4 | # it under the terms of the GNU Lesser General Public License as published by
  5 | # the Free Software Foundation, either version 3 of the License, or any later version.
  6 | #
  7 | # hessianlearn is distributed in the hope that it will be useful,
  8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10 | # GNU Lesser General Public License for more details.
 11 | #
 12 | # You should have received a copy of the GNU Lesser General Public License
 13 | # If not, see <http://www.gnu.org/licenses/>.
 14 | #
 15 | # Author: Tom O'Leary-Roseberry
 16 | # Contact: tom.olearyroseberry@utexas.edu
 17 | 
 18 | from __future__ import absolute_import, division, print_function
 19 | import time
 20 | import sys
 21 | import numpy as np
 22 | 
 23 | 
 24 | from scipy.linalg import cholesky, eigh, solve_triangular, qr, rq
 25 | 
 26 | import time
 27 | 
 28 | 
 29 | def block_range_finder(A_op,n,epsilon,block_size,verbose = False,seed = 0):
 30 |     """
 31 |     Randomized algorithm for block range finding    
 32 |     
 33 |     Parameters:
 34 |     -----------
 35 |     Aop : {Callable} n x n symmetric matrix
 36 |           Hermitian matrix operator whose eigenvalues need to be estimated
 37 |           y = Aop(dw) is the action of A in the direction dw 
 38 |     n   : size of matrix A
 39 |     epsilon : relative reduction in error
 40 | 
 41 |             
 42 |     Returns:
 43 |     --------
 44 |     Q : range for Aop
 45 |     """
 46 |     # Taken from http://people.maths.ox.ac.uk/martinsson/Pubs/2015_randQB.pdf
 47 | 
 48 |     my_state = np.random.RandomState(seed=seed)
 49 |     w = my_state.randn(n,1)
 50 |     Action = A_op(w)
 51 |     initial_error = np.linalg.norm(Action)
 52 |     big_Q = None
 53 |     converged = False
 54 |     iteration = 0
 55 |     while not converged:
 56 |         # Sample Gaussian random matrix
 57 |         Omega = my_state.randn(n,block_size)
 58 |         # Perform QR on action
 59 |         Q,_ = np.linalg.qr(A_op(Omega))
 60 |         # Update basis
 61 |         if big_Q is None:
 62 |             big_Q = Q
 63 |         else:
 64 |             Q -= big_Q@(big_Q.T@Q)
 65 |             big_Q = np.concatenate((big_Q,Q),axis = 1)
 66 |             # This QR gets slow after many iterations, only last columns
 67 |             # need to be orthonormalized
 68 |             big_Q,_ = np.linalg.qr(big_Q)
 69 |         # Error estimation
 70 |         Approximate_Error = Action - big_Q@(big_Q.T@Action)
 71 |         error = np.linalg.norm(Approximate_Error)
 72 |         converged = error < epsilon*initial_error
 73 |         iteration+=1 
 74 |         if verbose:
 75 |             print('At iteration', iteration, ' error/initial_error is ',error/initial_error,' tolerance is ',epsilon,' converged = ',converged)
 76 |         if iteration > n//block_size:
 77 |             break
 78 |     # I believe that the extra action of A_op in forming B for the QB factorization 
 79 |     # is cheaper to do once after the fact, and is not needed for the matrix 
 80 |     # free randomized error estimator. For this reason I just return Q, and 
 81 |     # do not form B.
 82 |     return big_Q
 83 | 
 84 | 
 85 | 
 86 | 
 87 | def noise_aware_adaptive_range_finder(Hessian,hessian_feed_dict,rq_estimator_dict_list,\
 88 |         block_size = None,noise_tolerance = 1.0,epsilon = 1e-1, max_vectors = 20, verbose = False,seed = 0):
 89 |     """
 90 |     Randomized algorithm for noise aware block range finding  (N.A.A.R.F.)
 91 |     
 92 |     Parameters:
 93 |     -----------
 94 |     Hessian : 
 95 |     hessian_feed_dict : 
 96 |     rq_estimator_dict : 
 97 |     block_size :
 98 |     noise_tolerance :
 99 |     epsilon : 
100 |     verbose : 
101 |     seed :
102 |             
103 |     Returns:
104 |     --------
105 |     Q : range for dominant eigenmodes of Hessian
106 |     """
107 | 
108 |     ###################################################################################
109 |     assert type(rq_estimator_dict_list) is list
110 |     n = Hessian.dimension
111 |     if block_size is None:
112 |         block_size = int(0.01*n)
113 |     my_state = np.random.RandomState(seed=seed)
114 |     w = my_state.randn(n,1)
115 | 
116 |     H = lambda x: Hessian(x,hessian_feed_dict,verbose = verbose)
117 |     Action = H(w)
118 |     big_Q = None
119 |     converged = False
120 |     iteration = 0
121 |     rq_noise = 0.
122 | 
123 |     while not converged:
124 |         # Sample Gaussian random matrix
125 |         Omega = my_state.randn(n,block_size)
126 |         # Perform QR on action
127 |         Q,_ = np.linalg.qr(H(Omega))
128 |         # Update basis
129 |         if big_Q is None:
130 |             big_Q = Q
131 |         else:
132 |             Q -= big_Q@(big_Q.T@Q)
133 |             big_Q = np.concatenate((big_Q,Q),axis = 1)
134 |             # This QR gets slow after many iterations, only last columns
135 |             # need to be orthonormalized
136 |             big_Q,_ = np.linalg.qr(big_Q)
137 |         # Error estimation is both for operator error
138 |         # as well as spectral noise
139 |         # Operator error estimation
140 |         Approximate_Error = Action - big_Q@(big_Q.T@Action)
141 |         operator_error = np.linalg.norm(Approximate_Error)
142 |         # Noise error estimation    
143 |         rq_direction = big_Q[:,-block_size:]
144 |         try:
145 |             RQ_samples = np.zeros((len(rq_estimator_dict_list),rq_direction.shape[1]))
146 |         except:
147 |             RQ_samples = np.zeros(len(rq_estimator_dict_list))
148 |         if verbose:
149 |             try:
150 |                 from tqdm import tqdm
151 |                 for samp_i,sample_dictionary in enumerate(tqdm(rq_estimator_dict_list)):
152 |                     RQ_samples[samp_i] = Hessian.quadratics(rq_direction,sample_dictionary)
153 |             except:
154 |                 print('Issue with tqdm')
155 |                 for samp_i,sample_dictionary in enumerate(rq_estimator_dict_list):
156 |                     RQ_samples[samp_i] = Hessian.quadratics(rq_direction,sample_dictionary)
157 |         else:
158 |             for samp_i,sample_dictionary in enumerate(rq_estimator_dict_list):
159 |                 RQ_samples[samp_i] = Hessian.quadratics(rq_direction,sample_dictionary)
160 | 
161 |         rq_snr = np.abs(np.mean(RQ_samples,axis=0))/np.std(RQ_samples,axis = 0)
162 |         too_noisy = (rq_snr < noise_tolerance).any()
163 |         converged = (operator_error < epsilon) or too_noisy
164 |         # print(80*'#')
165 |         # print('rq_snr = ',rq_snr)
166 |         # print('rq_snr < noise_tolerance = ',rq_snr < noise_tolerance)
167 |         # print('too noisy? = ',too_noisy)
168 |         # print('(operator_error < epsilon) = ',(operator_error < epsilon))
169 |         # print(80*'#')
170 |         
171 |         iteration+=1 
172 |         if verbose:
173 |             print('At iteration', iteration, 'operator error is ',operator_error,' convergence = ',(operator_error < epsilon))
174 |         if big_Q.shape[-1] >= max_vectors:
175 |             break
176 | 
177 |         if iteration > n//block_size:
178 |             break
179 |     # I believe that the extra action of A_op in forming B for the QB factorization 
180 |     # is cheaper to do once after the fact, and is not needed for the matrix 
181 |     # free randomized error estimator. For this reason I just return Q, and 
182 |     # do not form B.
183 |     return big_Q
184 | 
185 | 
186 | 
187 | 


--------------------------------------------------------------------------------
/hessianlearn/algorithms/inexactNewtonCG.py:
--------------------------------------------------------------------------------
  1 | # This file is part of the hessianlearn package
  2 | #
  3 | # hessianlearn is free software: you can redistribute it and/or modify
  4 | # it under the terms of the GNU Lesser General Public License as published by
  5 | # the Free Software Foundation, either version 3 of the License, or any later version.
  6 | #
  7 | # hessianlearn is distributed in the hope that it will be useful,
  8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10 | # GNU Lesser General Public License for more details.
 11 | #
 12 | # You should have received a copy of the GNU Lesser General Public License
 13 | # If not, see <http://www.gnu.org/licenses/>.
 14 | #
 15 | # Author: Tom O'Leary-Roseberry
 16 | # Contact: tom.olearyroseberry@utexas.edu
 17 | 
 18 | from __future__ import absolute_import
 19 | from __future__ import division
 20 | from __future__ import print_function
 21 | 
 22 | import numpy as np
 23 | 
 24 | from ..utilities.parameterList import ParameterList
 25 | from ..algorithms import Optimizer, CGSolver, ParametersCGSolver
 26 | from ..algorithms.globalization import ArmijoLineSearch, TrustRegion
 27 | from ..problem import L2Regularization
 28 | 
 29 | 
 30 | 
 31 | 
 32 | def ParametersInexactNewtonCG(parameters = {}):
 33 | 	parameters['alpha']                         	= [1e0, "Initial steplength, or learning rate"]
 34 | 	parameters['rel_tolerance']                 	= [1e-3, "Relative convergence when sqrt(g,g)/sqrt(g_0,g_0) <= rel_tolerance"]
 35 | 	parameters['abs_tolerance']                 	= [1e-4,"Absolute converge when sqrt(g,g) <= abs_tolerance"]
 36 | 	parameters['max_NN_evals_per_batch']        	= [10000, "Scale constant for maximum neural network evaluations per datum"]
 37 | 	parameters['max_NN_evals']                  	= [None, "Maximum number of neural network evaluations"]
 38 | 
 39 | 
 40 | 	parameters['cg_parameters']						= [ ParametersCGSolver(),'CG Parameters']
 41 | 	# CG solver parameters
 42 | 	parameters['cg_coarse_tol']						= [0.5,'CG coarse solve tolerance']
 43 | 	parameters['cg_max_iter']						= [10,'CG maximum iterations']
 44 | 	parameters['eta_mode']							= [0, 'eta mode for E-W conditions:0,1,2']
 45 | 	parameters['globalization']						= [None, 'Choose from trust_region, line_search or none']
 46 | 	parameters['max_backtracking_iter']				= [10, 'max backtracking iterations for line search']
 47 | 
 48 | 	# Reasons for convergence failure
 49 | 	parameters['reasons'] = [[], 'list of reasons for termination']
 50 | 
 51 | 
 52 | 	return ParameterList(parameters)
 53 | 
 54 | 
 55 | 
 56 | 
 57 | 
 58 | 
 59 | class InexactNewtonCG(Optimizer):
 60 | 	"""
 61 | 	This class implements the inexact Newton CG optimizer
 62 | 	"""
 63 | 	def __init__(self,problem,regularization = None,sess = None,feed_dict = None,\
 64 | 			parameters = ParametersInexactNewtonCG(),preconditioner = None):
 65 | 		"""
 66 | 		The constructor for this class takes:
 67 | 			-problem: hessianlearn.problem.Problem
 68 | 			-regularization: hessianlearn.problem.Regularization
 69 | 			-sess: tf.Session()
 70 | 			-parameters: hyperparameters dictionary
 71 | 			-preconditioner: hessianlearn.problem.Preconditioner
 72 | 		"""
 73 | 		if regularization is None:
 74 | 			_regularization = L2Regularization(problem,gamma = 0.0)
 75 | 		else:
 76 | 			_regularization = regularization
 77 | 		super(InexactNewtonCG,self).__init__(problem,_regularization,sess,parameters)
 78 | 
 79 | 	
 80 | 		self.grad = self.problem.gradient + self.regularization.gradient
 81 | 		self.cg_solver = CGSolver(self.problem,self.regularization,self.sess,parameters= self.parameters['cg_parameters'])
 82 | 		self._sweeps = np.zeros(2)
 83 | 		self.trust_region_initialized = False
 84 | 		if self.parameters['globalization'] == 'trust_region':
 85 | 			self.initialize_trust_region()
 86 | 		self.alpha = 0.0
 87 | 
 88 | 
 89 | 
 90 | 	def initialize_trust_region(self):
 91 | 		"""
 92 | 		Initializes trust region
 93 | 		"""
 94 | 		if not self.parameters['globalization'] == 'trust_region':
 95 | 			self.parameters['globalization'] = 'trust_region'
 96 | 		self.trust_region = TrustRegion()
 97 | 		self.cg_solver.initialize_trust_region(coarse_tol = self.parameters['cg_coarse_tol'])
 98 | 		self.cg_solver.set_trust_region_radius(self.trust_region.radius)
 99 | 		self.trust_region_initialized = True
100 | 
101 | 	def minimize(self,feed_dict = None,hessian_feed_dict = None):
102 | 		r"""
103 | 		Solves using inexact Newton CG algorithm
104 | 			-feed_dict: the data dictionary used for evaluating stochastic gradients and cost
105 | 			-hessian_feed_dict: smaller data dictionary used for stochastic Hessian
106 | 		"""
107 | 		assert self.sess is not None
108 | 		assert feed_dict is not None
109 | 		if hessian_feed_dict is None:
110 | 			hessian_feed_dict = feed_dict
111 | 		
112 | 		gradient = self.sess.run(self.grad,feed_dict = feed_dict)
113 | 
114 | 
115 | 
116 | 		if self.parameters['globalization'] is None:
117 | 			self.alpha = self.parameters['alpha']
118 | 			p,on_boundary = self.cg_solver.solve(-gradient,hessian_feed_dict)
119 | 			self._sweeps += [1,2*self.cg_solver.iter]
120 | 			self.p = p
121 | 			update = self.alpha*p
122 | 			self.sess.run(self.problem._update_ops,feed_dict = {self.problem._update_placeholder:update})
123 | 
124 | 		if self.parameters['globalization'] == 'line_search':
125 | 			w_dir,on_boundary = self.cg_solver.solve(-gradient,hessian_feed_dict)
126 | 			w_dir_inner_g = np.inner(w_dir,gradient)
127 | 			initial_cost = self.sess.run(self.problem.loss,feed_dict = feed_dict)
128 | 			cost_at_candidate = lambda p : self._loss_at_candidate(p,feed_dict = feed_dict)
129 | 			self.alpha, line_search, line_search_iter = ArmijoLineSearch(w_dir,w_dir_inner_g,\
130 | 														cost_at_candidate, initial_cost,\
131 | 														max_backtracking_iter = self.parameters['max_backtracking_iter'])
132 | 			update = self.alpha*w_dir
133 | 			self._sweeps += [1+0.5*line_search_iter,2*self.cg_solver.iter]
134 | 			self.sess.run(self.problem._update_ops,feed_dict = {self.problem._update_placeholder:update})
135 | 
136 | 		elif self.parameters['globalization'] == 'trust_region':
137 | 			if not self.trust_region_initialized:
138 | 				self.initialize_trust_region()
139 | 			# Set trust region radius
140 | 			self.cg_solver.set_trust_region_radius(self.trust_region.radius)		
141 | 			# Solve for candidate step
142 | 			p, on_boundary  = self.cg_solver.solve(-gradient,hessian_feed_dict)
143 | 			pg = np.dot(p,gradient)
144 | 			self._sweeps += [1,2*self.cg_solver.iter]
145 | 			self.p = p
146 | 			# Calculate predicted reduction
147 | 			feed_dict[self.cg_solver.problem.dw] = p
148 | 			Hp 					= self.sess.run(self.cg_solver.Aop,feed_dict)
149 | 			pHp = np.dot(p,Hp)
150 | 			predicted_reduction = -pg-0.5*pHp
151 | 			# Calculate actual reduction
152 | 			misfit,reg = self.sess.run((self.problem.loss,self.regularization.cost),\
153 | 								feed_dict = feed_dict)
154 | 			cost = misfit + reg
155 | 			w_copy = self.sess.run(self.problem.w)
156 | 			self.sess.run(self.problem._update_ops,feed_dict = {self.problem._update_placeholder:p})
157 | 
158 | 			misfit,reg = self.sess.run((self.problem.loss,self.regularization.cost),\
159 | 								feed_dict = feed_dict)
160 | 			cost_new = misfit + reg
161 | 			actual_reduction    = cost - cost_new
162 | 
163 | 			# Decide whether or not to accept the step
164 | 			accept_step = self.trust_region.evaluate_step(actual_reduction = actual_reduction,\
165 | 				predicted_reduction = predicted_reduction,on_boundary = on_boundary)
166 | 			if accept_step:
167 | 				pass
168 | 			else:
169 | 				self.sess.run(self.problem._assignment_ops,feed_dict = {self.problem._assignment_placeholder:p})
170 | 				
171 | 
172 | 
173 | 		
174 | 		
175 | 
176 | 		


--------------------------------------------------------------------------------
/hessianlearn/algorithms/randomizedEigensolver.py:
--------------------------------------------------------------------------------
  1 | # This file is part of the hessianlearn package
  2 | #
  3 | # hessianlearn is free software: you can redistribute it and/or modify
  4 | # it under the terms of the GNU Lesser General Public License as published by
  5 | # the Free Software Foundation, either version 3 of the License, or any later version.
  6 | #
  7 | # hessianlearn is distributed in the hope that it will be useful,
  8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10 | # GNU Lesser General Public License for more details.
 11 | #
 12 | # You should have received a copy of the GNU Lesser General Public License
 13 | # If not, see <http://www.gnu.org/licenses/>.
 14 | #
 15 | # Author: Tom O'Leary-Roseberry
 16 | # Contact: tom.olearyroseberry@utexas.edu
 17 | 
 18 | from __future__ import absolute_import, division, print_function
 19 | import time
 20 | import sys
 21 | import numpy as np
 22 | 
 23 | 
 24 | from scipy.linalg import cholesky, eigh, solve_triangular, qr, rq
 25 | 
 26 | import time
 27 | 
 28 | 
 29 | def low_rank_hessian(optimizer,feed_dict,k,p=None,verbose = False):
 30 |     H = lambda x: optimizer.H(x,feed_dict)
 31 |     n = optimizer.problem.dimension
 32 |     return randomized_eigensolver(H, n, k,p = p,verbose = verbose)
 33 | 
 34 | 
 35 | def randomized_eigensolver(Aop, n, k, p = None,seed = 0,verbose = False):
 36 |     """
 37 |     Randomized algorithm for Hermitian eigenvalue problems
 38 |     Returns k largest eigenvalues computed using the randomized algorithm
 39 |     
 40 |     
 41 |     Parameters:
 42 |     -----------
 43 |     Aop : {Callable} n x n
 44 |           Hermitian matrix operator whose eigenvalues need to be estimated
 45 |           y = Aop(dw) is the action of A in the direction dw
 46 |           
 47 |     n : int,
 48 |            number of row/columns of the operator A
 49 |         
 50 |     k :  int, 
 51 |         number of eigenvalues/vectors to be estimated
 52 |     p :  int, optional
 53 |         oversampling parameter which can improve accuracy of resulting solution
 54 |         Default: 20
 55 |             
 56 |     Returns:
 57 |     --------
 58 |     
 59 |     d : ndarray, (k,)           
 60 |         eigenvalues arranged in descending order
 61 |     U : ndarray, (n,k)
 62 |         eigenvectors arranged according to eigenvalues
 63 |     
 64 |     References:
 65 |     -----------
 66 |     .. [1] Halko, Nathan, Per-Gunnar Martinsson, and Joel A. Tropp. "Finding structure with randomness: Probabilistic algorithms for constructing approximate matrix decompositions." SIAM review 53.2 (2011): 217-288.
 67 |     Examples:
 68 |     ---------
 69 |     >>> import numpy as np
 70 |     >>> n = 100
 71 |     >>> A = np.diag(0.95**np.arange(n))
 72 |     >>> Aop = lambda dw: np.dot(A,dw)
 73 |     >>> k = 10
 74 |     >>> p = 5
 75 |     >>> lmbda, U = randomized_eigensolver(Aop, n, k, p)
 76 |     """
 77 |     if n == k:
 78 |         p = 0
 79 |     elif p is None:
 80 |         p = int(0.01*k)
 81 |         if k+p > n:
 82 |             p = n - k
 83 |     random_state = np.random.RandomState(seed=seed)
 84 |     Omega = random_state.randn(n,k+p)
 85 |     n  = Omega.shape[0]  
 86 | 
 87 |     assert(n >= k )
 88 |     
 89 |     m = Omega.shape[1]
 90 |     Y = Aop(Omega)
 91 | 
 92 |     # print('condition number for Y = ',np.linalg.cond(Y))
 93 |     Q,_ = qr(Y, mode = 'economic')
 94 |     T = np.zeros((m,m),dtype = 'd')
 95 |     if verbose:
 96 |         print('Forming small square matrix')
 97 |     AQ = Aop(Q)
 98 |     T = Q.T@AQ
 99 | 
100 |     # Eigenvalue problem for T 
101 |     if verbose:
102 |         print('Computing eigenvalue decomposition')
103 |     d, V = eigh(T)
104 |     d_abs = np.abs(d) #sort by absolute value (we want the k largest eigenvalues regardless of sign)
105 |     sort_perm = d_abs.argsort()
106 |         
107 |     sort_perm = sort_perm[::-1]
108 |     
109 |     d = d[sort_perm[0:k]]
110 |     V = V[:, sort_perm[0:k]] 
111 |     
112 |     #Compute eigenvectors        
113 |     U = np.dot(Q, V)   
114 | 
115 |     return d[:k], U[:,:k]
116 | 
117 | 
118 | def eigensolver_from_range(Aop, Q,verbose = False):
119 |     """
120 |     Randomized algorithm for Hermitian eigenvalue problems
121 |     Returns k largest eigenvalues computed using the randomized algorithm
122 |     
123 |     
124 |     Parameters:
125 |     -----------
126 |     Aop : {Callable} n x n
127 |           Hermitian matrix operator whose eigenvalues need to be estimated
128 |           y = Aop(dw) is the action of A in the direction dw 
129 |     Q : Array n x r
130 |           
131 |             
132 |     Returns:
133 |     --------
134 |     
135 |     d : ndarray, (k,)           
136 |         eigenvalues arranged in descending order
137 |     U : ndarray, (n,k)
138 |         eigenvectors arranged according to eigenvalues
139 |     """
140 |     m = Q.shape[1]
141 |     T = np.zeros((m,m),dtype = 'd')
142 |     if verbose:
143 |         print('Forming small square matrix')
144 |     AQ = Aop(Q)
145 |     T = Q.T@AQ
146 |     # Eigenvalue problem for T 
147 |     if verbose:
148 |         print('Computing eigenvalue decomposition')
149 |     d, V = eigh(T)
150 |     d_abs = np.abs(d) #sort by absolute value (we want the k largest eigenvalues regardless of sign)
151 |     sort_perm = d_abs.argsort()
152 |         
153 |     sort_perm = sort_perm[::-1]
154 |     
155 |     d = d[sort_perm[0:m]]
156 |     V = V[:, sort_perm[0:m]] 
157 |     
158 |     #Compute eigenvectors           
159 |     U = np.dot(Q, V)    
160 | 
161 |     return d[:m], U[:,:m]
162 | 
163 | def randomized_double_pass_eigensolver(Aop, Y, k):
164 |     """
165 |     Randomized algorithm for Hermitian eigenvalue problems
166 |     Returns k largest eigenvalues computed using the randomized algorithm
167 |     
168 |     Parameters:
169 |     -----------
170 |     Aop : {Callable} n x n
171 |           Hermitian matrix operator whose eigenvalues need to be estimated
172 |           y = Aop(dw) is the action of A in the direction dw
173 |     Y = Aop(Omega) : precomputed action of Aop on Omega, a m x n Array of (presumably) sampled Gaussian or l-percent sparse random vectors (row)
174 |     k :  int, 
175 |         number of eigenvalues/vectors to be estimated, 0 < k < m
176 |     Returns:
177 |     --------
178 |     
179 |     lmbda : ndarray, (k,)           
180 |         eigenvalues arranged in descending order
181 |     Ut : ndarray, (k, n)
182 |         eigenvectors arranged according to eigenvalues, rows are eigenvectors
183 |     
184 |     References:
185 |     -----------
186 |     .. [1] Halko, Nathan, Per-Gunnar Martinsson, and Joel A. Tropp. "Finding structure with randomness: Probabilistic algorithms for constructing approximate matrix decompositions." SIAM review 53.2 (2011): 217-288.
187 |     .. [2] Algorithm 2 of Arvind paper
188 |     Examples:
189 |     ---------
190 |     >>> import numpy as np
191 |     >>> n = 100
192 |     >>> A = np.diag(0.95**np.arange(n))
193 |     >>> Aop = lambda dw: np.dot(A,dw)
194 |     >>> k = 10
195 |     >>> p = 5
196 |     >>> Omega = np.random.randn(n, k+p)
197 |     >>> lmbda, Ut = randomized_eigensolver(Aop, Omega, k)
198 |     """
199 |     raise Exception("Need to reimplement this function")
200 |     m, n = Y.shape 
201 |     assert(n >= m >= k) #m = k + p ( p is the oversampling for Omega, to ensure we get a good random projection basis)
202 |     Q, _ = qr(Y.T, mode='economic')
203 |     T =  (Aop(Q.T) @ Q).T #m foward problems , m x m small matrix
204 |     # T = .5*T + .5*T.T
205 | 
206 |     #Eigen subproblem
207 |     lmbda, V = eigh(T, turbo=True, overwrite_a=True, check_finite=False)
208 |     inds = np.abs(lmbda).argsort()[::-1]
209 |     lmbda = lmbda[inds[0:k]]
210 |     V = V[:, inds[0:k]] #S in the original paper m x m
211 | 
212 |     #Compute eigenvectors
213 |     Ut = (Q @  V).T 
214 |     return lmbda, Ut
215 | 


--------------------------------------------------------------------------------
/applications/transfer_learning/imagenet_cifar10_classification_evaluate_test.py:
--------------------------------------------------------------------------------
  1 | # This file is part of the hessianlearn package
  2 | #
  3 | # hessianlearn is free software: you can redistribute it and/or modify
  4 | # it under the terms of the GNU Lesser General Public License as published by
  5 | # the Free Software Foundation, either version 3 of the License, or any later version.
  6 | #
  7 | # hessianlearn is distributed in the hope that it will be useful,
  8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10 | # GNU Lesser General Public License for more details.
 11 | #
 12 | # You should have received a copy of the GNU Lesser General Public License
 13 | # If not, see <http://www.gnu.org/licenses/>.
 14 | #
 15 | # Author: Tom O'Leary-Roseberry
 16 | # Contact: tom.olearyroseberry@utexas.edu
 17 | 
 18 | 
 19 | import numpy as np
 20 | import os
 21 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
 22 | os.environ['KMP_DUPLICATE_LIB_OK']='True'
 23 | os.environ["KMP_WARNINGS"] = "FALSE" 
 24 | # os.environ['CUDA_VISIBLE_DEVICES'] = '1'
 25 | import pickle
 26 | import tensorflow as tf
 27 | import time, datetime
 28 | # if int(tf.__version__[0]) > 1:
 29 | #   import tensorflow.compat.v1 as tf
 30 | #   tf.disable_v2_behavior()
 31 | 
 32 | 
 33 | # Memory issue with GPUs
 34 | gpu_devices = tf.config.experimental.list_physical_devices('GPU')
 35 | for device in gpu_devices:
 36 |     tf.config.experimental.set_memory_growth(device, True)
 37 | # Load hessianlearn library
 38 | import sys
 39 | sys.path.append( os.environ.get('HESSIANLEARN_PATH', "../../"))
 40 | from hessianlearn import *
 41 | 
 42 | # Parse run specifications
 43 | from argparse import ArgumentParser
 44 | 
 45 | parser = ArgumentParser(add_help=True)
 46 | parser.add_argument("-optimizer", dest='optimizer',required=False, default = 'lrsfn', help="optimizer type",type=str)
 47 | parser.add_argument('-fixed_step',dest = 'fixed_step',\
 48 | 					required= False,default = 1,help='boolean for fixed step vs globalization',type = int)
 49 | parser.add_argument('-alpha',dest = 'alpha',required = False,default = 1e-4,help= 'learning rate alpha',type=float)
 50 | parser.add_argument('-hessian_low_rank',dest = 'hessian_low_rank',required= False,default = 40,help='low rank for sfn',type = int)
 51 | parser.add_argument('-record_spectrum',dest = 'record_spectrum',\
 52 | 					required= False,default = 0,help='boolean for recording spectrum',type = int)
 53 | # parser.add_argument('-weight_burn_in',dest = 'weight_burn_in',\
 54 | # 					required= False,default = 0,help='',type = int)
 55 | 
 56 | # parser.add_argument('-data_seed',dest = 'data_seed',\
 57 | # 					required= False,default = 0,help='',type = int)
 58 | 
 59 | parser.add_argument('-batch_size',dest = 'batch_size',required= False,default = 32,help='batch size',type = int)
 60 | parser.add_argument('-hess_batch_size',dest = 'hess_batch_size',required= False,default = 8,help='hess batch size',type = int)
 61 | parser.add_argument('-keras_epochs',dest = 'keras_epochs',required= False,default = 50,help='keras_epochs',type = int)
 62 | parser.add_argument("-keras_opt", dest='keras_opt',required=False, default = 'adam', help="optimizer type for keras",type=str)
 63 | parser.add_argument('-keras_alpha',dest = 'keras_alpha',required= False,default = 1e-3,help='keras learning rate',type = float)
 64 | parser.add_argument('-max_sweeps',dest = 'max_sweeps',required= False,default = 1,help='max sweeps',type = float)
 65 | parser.add_argument('-weights_file',dest = 'weights_file',required= False,default = 'None',help='weight file pickle',type = str)
 66 | 
 67 | args = parser.parse_args()
 68 | 
 69 | try:
 70 |   tf.set_random_seed(0)
 71 | except:
 72 |   tf.random.set_seed(0)
 73 | 
 74 | # GPU Environment Details
 75 | gpu_availabe = tf.test.is_gpu_available()
 76 | built_with_cuda = tf.test.is_built_with_cuda()
 77 | print(80*'#')
 78 | print(('IS GPU AVAILABLE: '+str(gpu_availabe)).center(80))
 79 | print(('IS BUILT WITH CUDA: '+str(built_with_cuda)).center(80))
 80 | print(80*'#')
 81 | 
 82 | settings = {}
 83 | # Set run specifications
 84 | # Data specs
 85 | settings['batch_size'] = args.batch_size
 86 | settings['hess_batch_size'] = args.hess_batch_size
 87 | 
 88 | 
 89 | ################################################################################
 90 | # Instantiate data
 91 | (x_train, y_train), (_x_test, _y_test) = tf.keras.datasets.cifar10.load_data()
 92 | 
 93 | # # Normalize the data
 94 | # x_train = x_train.astype('float32') / 255.
 95 | # x_test = x_test.astype('float32') / 255.
 96 | 
 97 | x_train = tf.keras.applications.resnet50.preprocess_input(x_train)
 98 | x_test_full = tf.keras.applications.resnet50.preprocess_input(_x_test)
 99 | x_val = x_test_full[:2000]
100 | x_test = x_test_full[2000:]
101 | 
102 | y_train = tf.keras.utils.to_categorical(y_train)
103 | y_test_full = tf.keras.utils.to_categorical(_y_test)
104 | y_val = y_test_full[:2000]
105 | y_test = y_test_full[2000:]
106 | 
107 | ################################################################################
108 | # Create the neural network in keras
109 | 
110 | # tf.keras.backend.set_floatx('float64')
111 | 
112 | resnet_input_shape = (200,200,3)
113 | input_tensor = tf.keras.Input(shape = resnet_input_shape)
114 | 
115 | pretrained_resnet50 = tf.keras.applications.resnet50.ResNet50(weights = 'imagenet',include_top=False,input_tensor=input_tensor)
116 | 
117 | for layer in pretrained_resnet50.layers[:143]:
118 |     layer.trainable = False
119 | 
120 | classifier = tf.keras.models.Sequential()
121 | classifier.add(tf.keras.layers.Input(shape=(32,32,3)))
122 | classifier.add(tf.keras.layers.Lambda(lambda image: tf.image.resize(image, resnet_input_shape[:2])))
123 | classifier.add(pretrained_resnet50)
124 | classifier.add(tf.keras.layers.Flatten())
125 | classifier.add(tf.keras.layers.BatchNormalization())
126 | classifier.add(tf.keras.layers.Dense(64, activation='relu'))
127 | classifier.add(tf.keras.layers.Dropout(0.5))
128 | classifier.add(tf.keras.layers.BatchNormalization())
129 | classifier.add(tf.keras.layers.Dense(10, activation='softmax'))
130 | 
131 | 
132 | if args.keras_opt == 'adam':
133 |     optimizer = tf.keras.optimizers.Adam(learning_rate = args.keras_alpha,epsilon = 1e-8)
134 | elif args.keras_opt == 'sgd':
135 |     optimizer = tf.keras.optimizers.SGD(learning_rate=args.keras_alpha)
136 | else: 
137 |     raise
138 | 
139 | classifier.compile(optimizer=optimizer,
140 |                   loss=tf.keras.losses.CategoricalCrossentropy(from_logits = True),
141 |                   metrics=['accuracy'])
142 | 
143 | loss_test_0, acc_test_0 = classifier.evaluate(x_test,y_test,verbose=2)
144 | print('acc_test = ',acc_test_0)
145 | loss_val_0, acc_val_0 = classifier.evaluate(x_val,y_val,verbose=2)
146 | print('acc_val = ',acc_val_0)
147 | 
148 | 
149 | if args.weights_file is not 'None':
150 |     try:
151 |         logger = open(args.weights_file, 'rb')
152 |         best_weights = pickle.load(logger)['best_weights']
153 |         for layer_name,weight in best_weights.items():
154 |             classifier.get_layer(layer_name).set_weights(weight)
155 |     except:
156 |         print('Issue loading best weights')
157 | 
158 | loss_test_final, acc_test_final = classifier.evaluate(x_test,y_test,verbose=2)
159 | print('acc_test final = ',acc_test_final)
160 | loss_val_final, acc_val_final = classifier.evaluate(x_val,y_val,verbose=2)
161 | print('acc_val final = ',acc_val_final)
162 | 
163 | ################################################################################
164 | # Evaluate again on all the data.
165 | (x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
166 | 
167 | # # Normalize the data
168 | # x_train = x_train.astype('float32') / 255.
169 | # x_test = x_test.astype('float32') / 255.
170 | 
171 | x_train = tf.keras.applications.resnet50.preprocess_input(x_train)
172 | x_test = tf.keras.applications.resnet50.preprocess_input(x_test)
173 | 
174 | y_train = tf.keras.utils.to_categorical(y_train)
175 | y_test = tf.keras.utils.to_categorical(y_test)
176 | 
177 | loss_test_total, acc_test_total = classifier.evaluate(x_test,y_test,verbose=2)
178 | print(80*'#')
179 | print('After hessianlearn training'.center(80))
180 | print('acc_test_total = ',acc_test_total)
181 | 


--------------------------------------------------------------------------------
/applications/transfer_learning/imagenet_cifar100_classification_evaluate_test.py:
--------------------------------------------------------------------------------
  1 | # This file is part of the hessianlearn package
  2 | #
  3 | # hessianlearn is free software: you can redistribute it and/or modify
  4 | # it under the terms of the GNU Lesser General Public License as published by
  5 | # the Free Software Foundation, either version 3 of the License, or any later version.
  6 | #
  7 | # hessianlearn is distributed in the hope that it will be useful,
  8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10 | # GNU Lesser General Public License for more details.
 11 | #
 12 | # You should have received a copy of the GNU Lesser General Public License
 13 | # If not, see <http://www.gnu.org/licenses/>.
 14 | #
 15 | # Author: Tom O'Leary-Roseberry
 16 | # Contact: tom.olearyroseberry@utexas.edu
 17 | 
 18 | 
 19 | import numpy as np
 20 | import os
 21 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
 22 | os.environ['KMP_DUPLICATE_LIB_OK']='True'
 23 | os.environ["KMP_WARNINGS"] = "FALSE" 
 24 | # os.environ['CUDA_VISIBLE_DEVICES'] = '1'
 25 | import pickle
 26 | import tensorflow as tf
 27 | import time, datetime
 28 | # if int(tf.__version__[0]) > 1:
 29 | #   import tensorflow.compat.v1 as tf
 30 | #   tf.disable_v2_behavior()
 31 | 
 32 | 
 33 | # Memory issue with GPUs
 34 | gpu_devices = tf.config.experimental.list_physical_devices('GPU')
 35 | for device in gpu_devices:
 36 |     tf.config.experimental.set_memory_growth(device, True)
 37 | # Load hessianlearn library
 38 | import sys
 39 | sys.path.append( os.environ.get('HESSIANLEARN_PATH', "../../"))
 40 | from hessianlearn import *
 41 | 
 42 | # Parse run specifications
 43 | from argparse import ArgumentParser
 44 | 
 45 | parser = ArgumentParser(add_help=True)
 46 | parser.add_argument("-optimizer", dest='optimizer',required=False, default = 'lrsfn', help="optimizer type",type=str)
 47 | parser.add_argument('-fixed_step',dest = 'fixed_step',\
 48 | 					required= False,default = 1,help='boolean for fixed step vs globalization',type = int)
 49 | parser.add_argument('-alpha',dest = 'alpha',required = False,default = 1e-4,help= 'learning rate alpha',type=float)
 50 | parser.add_argument('-hessian_low_rank',dest = 'hessian_low_rank',required= False,default = 40,help='low rank for sfn',type = int)
 51 | parser.add_argument('-record_spectrum',dest = 'record_spectrum',\
 52 | 					required= False,default = 0,help='boolean for recording spectrum',type = int)
 53 | # parser.add_argument('-weight_burn_in',dest = 'weight_burn_in',\
 54 | # 					required= False,default = 0,help='',type = int)
 55 | 
 56 | # parser.add_argument('-data_seed',dest = 'data_seed',\
 57 | # 					required= False,default = 0,help='',type = int)
 58 | 
 59 | parser.add_argument('-batch_size',dest = 'batch_size',required= False,default = 32,help='batch size',type = int)
 60 | parser.add_argument('-hess_batch_size',dest = 'hess_batch_size',required= False,default = 8,help='hess batch size',type = int)
 61 | parser.add_argument('-keras_epochs',dest = 'keras_epochs',required= False,default = 50,help='keras_epochs',type = int)
 62 | parser.add_argument("-keras_opt", dest='keras_opt',required=False, default = 'adam', help="optimizer type for keras",type=str)
 63 | parser.add_argument('-keras_alpha',dest = 'keras_alpha',required= False,default = 1e-3,help='keras learning rate',type = float)
 64 | parser.add_argument('-max_sweeps',dest = 'max_sweeps',required= False,default = 1,help='max sweeps',type = float)
 65 | parser.add_argument('-weights_file',dest = 'weights_file',required= False,default = 'None',help='weight file pickle',type = str)
 66 | 
 67 | args = parser.parse_args()
 68 | 
 69 | try:
 70 |   tf.set_random_seed(0)
 71 | except:
 72 |   tf.random.set_seed(0)
 73 | 
 74 | # GPU Environment Details
 75 | gpu_availabe = tf.test.is_gpu_available()
 76 | built_with_cuda = tf.test.is_built_with_cuda()
 77 | print(80*'#')
 78 | print(('IS GPU AVAILABLE: '+str(gpu_availabe)).center(80))
 79 | print(('IS BUILT WITH CUDA: '+str(built_with_cuda)).center(80))
 80 | print(80*'#')
 81 | 
 82 | settings = {}
 83 | # Set run specifications
 84 | # Data specs
 85 | settings['batch_size'] = args.batch_size
 86 | settings['hess_batch_size'] = args.hess_batch_size
 87 | 
 88 | 
 89 | ################################################################################
 90 | # Instantiate data
 91 | (x_train, y_train), (_x_test, _y_test) = tf.keras.datasets.cifar100.load_data()
 92 | 
 93 | # # Normalize the data
 94 | # x_train = x_train.astype('float32') / 255.
 95 | # x_test = x_test.astype('float32') / 255.
 96 | 
 97 | x_train = tf.keras.applications.resnet50.preprocess_input(x_train)
 98 | x_test_full = tf.keras.applications.resnet50.preprocess_input(_x_test)
 99 | x_val = x_test_full[:2000]
100 | x_test = x_test_full[2000:]
101 | 
102 | y_train = tf.keras.utils.to_categorical(y_train)
103 | y_test_full = tf.keras.utils.to_categorical(_y_test)
104 | y_val = y_test_full[:2000]
105 | y_test = y_test_full[2000:]
106 | 
107 | ################################################################################
108 | # Create the neural network in keras
109 | 
110 | # tf.keras.backend.set_floatx('float64')
111 | 
112 | resnet_input_shape = (200,200,3)
113 | input_tensor = tf.keras.Input(shape = resnet_input_shape)
114 | 
115 | pretrained_resnet50 = tf.keras.applications.resnet50.ResNet50(weights = 'imagenet',include_top=False,input_tensor=input_tensor)
116 | 
117 | for layer in pretrained_resnet50.layers[:143]:
118 |     layer.trainable = False
119 | 
120 | 
121 | 
122 | classifier = tf.keras.models.Sequential()
123 | classifier.add(tf.keras.layers.Input(shape=(32,32,3)))
124 | classifier.add(tf.keras.layers.Lambda(lambda image: tf.image.resize(image, resnet_input_shape[:2])))
125 | classifier.add(pretrained_resnet50)
126 | classifier.add(tf.keras.layers.Flatten())
127 | classifier.add(tf.keras.layers.BatchNormalization())
128 | classifier.add(tf.keras.layers.Dense(128, activation='relu'))
129 | classifier.add(tf.keras.layers.Dropout(0.5))
130 | classifier.add(tf.keras.layers.BatchNormalization())
131 | classifier.add(tf.keras.layers.Dense(100, activation='softmax'))
132 | 
133 | 
134 | if args.keras_opt == 'adam':
135 |     optimizer = tf.keras.optimizers.Adam(learning_rate = args.keras_alpha,epsilon = 1e-8)
136 | elif args.keras_opt == 'sgd':
137 |     optimizer = tf.keras.optimizers.SGD(learning_rate=args.keras_alpha)
138 | else: 
139 |     raise
140 | 
141 | classifier.compile(optimizer=optimizer,
142 |                   loss=tf.keras.losses.CategoricalCrossentropy(from_logits = True),
143 |                   metrics=['accuracy'])
144 | 
145 | loss_test_0, acc_test_0 = classifier.evaluate(x_test,y_test,verbose=2)
146 | print('acc_test = ',acc_test_0)
147 | loss_val_0, acc_val_0 = classifier.evaluate(x_val,y_val,verbose=2)
148 | print('acc_val = ',acc_val_0)
149 | 
150 | 
151 | if args.weights_file is not 'None':
152 |     try:
153 |         logger = open(args.weights_file, 'rb')
154 |         best_weights = pickle.load(logger)['best_weights']
155 |         for layer_name,weight in best_weights.items():
156 |             classifier.get_layer(layer_name).set_weights(weight)
157 |     except:
158 |         print('Issue loading best weights')
159 | 
160 | loss_test_final, acc_test_final = classifier.evaluate(x_test,y_test,verbose=2)
161 | print('acc_test final = ',acc_test_final)
162 | loss_val_final, acc_val_final = classifier.evaluate(x_val,y_val,verbose=2)
163 | print('acc_val final = ',acc_val_final)
164 | 
165 | ################################################################################
166 | # Evaluate again on all the data.
167 | (x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar100.load_data()
168 | 
169 | # # Normalize the data
170 | # x_train = x_train.astype('float32') / 255.
171 | # x_test = x_test.astype('float32') / 255.
172 | 
173 | x_train = tf.keras.applications.resnet50.preprocess_input(x_train)
174 | x_test = tf.keras.applications.resnet50.preprocess_input(x_test)
175 | 
176 | y_train = tf.keras.utils.to_categorical(y_train)
177 | y_test = tf.keras.utils.to_categorical(y_test)
178 | 
179 | loss_test_total, acc_test_total = classifier.evaluate(x_test,y_test,verbose=2)
180 | print(80*'#')
181 | print('After hessianlearn training'.center(80))
182 | print('acc_test_total = ',acc_test_total)
183 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
  1 |                    GNU LESSER GENERAL PUBLIC LICENSE
  2 |                        Version 3, 29 June 2007
  3 | 
  4 |  Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
  5 |  Everyone is permitted to copy and distribute verbatim copies
  6 |  of this license document, but changing it is not allowed.
  7 | 
  8 | 
  9 |   This version of the GNU Lesser General Public License incorporates
 10 | the terms and conditions of version 3 of the GNU General Public
 11 | License, supplemented by the additional permissions listed below.
 12 | 
 13 |   0. Additional Definitions.
 14 | 
 15 |   As used herein, "this License" refers to version 3 of the GNU Lesser
 16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU
 17 | General Public License.
 18 | 
 19 |   "The Library" refers to a covered work governed by this License,
 20 | other than an Application or a Combined Work as defined below.
 21 | 
 22 |   An "Application" is any work that makes use of an interface provided
 23 | by the Library, but which is not otherwise based on the Library.
 24 | Defining a subclass of a class defined by the Library is deemed a mode
 25 | of using an interface provided by the Library.
 26 | 
 27 |   A "Combined Work" is a work produced by combining or linking an
 28 | Application with the Library.  The particular version of the Library
 29 | with which the Combined Work was made is also called the "Linked
 30 | Version".
 31 | 
 32 |   The "Minimal Corresponding Source" for a Combined Work means the
 33 | Corresponding Source for the Combined Work, excluding any source code
 34 | for portions of the Combined Work that, considered in isolation, are
 35 | based on the Application, and not on the Linked Version.
 36 | 
 37 |   The "Corresponding Application Code" for a Combined Work means the
 38 | object code and/or source code for the Application, including any data
 39 | and utility programs needed for reproducing the Combined Work from the
 40 | Application, but excluding the System Libraries of the Combined Work.
 41 | 
 42 |   1. Exception to Section 3 of the GNU GPL.
 43 | 
 44 |   You may convey a covered work under sections 3 and 4 of this License
 45 | without being bound by section 3 of the GNU GPL.
 46 | 
 47 |   2. Conveying Modified Versions.
 48 | 
 49 |   If you modify a copy of the Library, and, in your modifications, a
 50 | facility refers to a function or data to be supplied by an Application
 51 | that uses the facility (other than as an argument passed when the
 52 | facility is invoked), then you may convey a copy of the modified
 53 | version:
 54 | 
 55 |    a) under this License, provided that you make a good faith effort to
 56 |    ensure that, in the event an Application does not supply the
 57 |    function or data, the facility still operates, and performs
 58 |    whatever part of its purpose remains meaningful, or
 59 | 
 60 |    b) under the GNU GPL, with none of the additional permissions of
 61 |    this License applicable to that copy.
 62 | 
 63 |   3. Object Code Incorporating Material from Library Header Files.
 64 | 
 65 |   The object code form of an Application may incorporate material from
 66 | a header file that is part of the Library.  You may convey such object
 67 | code under terms of your choice, provided that, if the incorporated
 68 | material is not limited to numerical parameters, data structure
 69 | layouts and accessors, or small macros, inline functions and templates
 70 | (ten or fewer lines in length), you do both of the following:
 71 | 
 72 |    a) Give prominent notice with each copy of the object code that the
 73 |    Library is used in it and that the Library and its use are
 74 |    covered by this License.
 75 | 
 76 |    b) Accompany the object code with a copy of the GNU GPL and this license
 77 |    document.
 78 | 
 79 |   4. Combined Works.
 80 | 
 81 |   You may convey a Combined Work under terms of your choice that,
 82 | taken together, effectively do not restrict modification of the
 83 | portions of the Library contained in the Combined Work and reverse
 84 | engineering for debugging such modifications, if you also do each of
 85 | the following:
 86 | 
 87 |    a) Give prominent notice with each copy of the Combined Work that
 88 |    the Library is used in it and that the Library and its use are
 89 |    covered by this License.
 90 | 
 91 |    b) Accompany the Combined Work with a copy of the GNU GPL and this license
 92 |    document.
 93 | 
 94 |    c) For a Combined Work that displays copyright notices during
 95 |    execution, include the copyright notice for the Library among
 96 |    these notices, as well as a reference directing the user to the
 97 |    copies of the GNU GPL and this license document.
 98 | 
 99 |    d) Do one of the following:
100 | 
101 |        0) Convey the Minimal Corresponding Source under the terms of this
102 |        License, and the Corresponding Application Code in a form
103 |        suitable for, and under terms that permit, the user to
104 |        recombine or relink the Application with a modified version of
105 |        the Linked Version to produce a modified Combined Work, in the
106 |        manner specified by section 6 of the GNU GPL for conveying
107 |        Corresponding Source.
108 | 
109 |        1) Use a suitable shared library mechanism for linking with the
110 |        Library.  A suitable mechanism is one that (a) uses at run time
111 |        a copy of the Library already present on the user's computer
112 |        system, and (b) will operate properly with a modified version
113 |        of the Library that is interface-compatible with the Linked
114 |        Version.
115 | 
116 |    e) Provide Installation Information, but only if you would otherwise
117 |    be required to provide such information under section 6 of the
118 |    GNU GPL, and only to the extent that such information is
119 |    necessary to install and execute a modified version of the
120 |    Combined Work produced by recombining or relinking the
121 |    Application with a modified version of the Linked Version. (If
122 |    you use option 4d0, the Installation Information must accompany
123 |    the Minimal Corresponding Source and Corresponding Application
124 |    Code. If you use option 4d1, you must provide the Installation
125 |    Information in the manner specified by section 6 of the GNU GPL
126 |    for conveying Corresponding Source.)
127 | 
128 |   5. Combined Libraries.
129 | 
130 |   You may place library facilities that are a work based on the
131 | Library side by side in a single library together with other library
132 | facilities that are not Applications and are not covered by this
133 | License, and convey such a combined library under terms of your
134 | choice, if you do both of the following:
135 | 
136 |    a) Accompany the combined library with a copy of the same work based
137 |    on the Library, uncombined with any other library facilities,
138 |    conveyed under the terms of this License.
139 | 
140 |    b) Give prominent notice with the combined library that part of it
141 |    is a work based on the Library, and explaining where to find the
142 |    accompanying uncombined form of the same work.
143 | 
144 |   6. Revised Versions of the GNU Lesser General Public License.
145 | 
146 |   The Free Software Foundation may publish revised and/or new versions
147 | of the GNU Lesser General Public License from time to time. Such new
148 | versions will be similar in spirit to the present version, but may
149 | differ in detail to address new problems or concerns.
150 | 
151 |   Each version is given a distinguishing version number. If the
152 | Library as you received it specifies that a certain numbered version
153 | of the GNU Lesser General Public License "or any later version"
154 | applies to it, you have the option of following the terms and
155 | conditions either of that published version or of any later version
156 | published by the Free Software Foundation. If the Library as you
157 | received it does not specify a version number of the GNU Lesser
158 | General Public License, you may choose any version of the GNU Lesser
159 | General Public License ever published by the Free Software Foundation.
160 | 
161 |   If the Library as you received it specifies that a proxy can decide
162 | whether future versions of the GNU Lesser General Public License shall
163 | apply, that proxy's public statement of acceptance of any version is
164 | permanent authorization for you to choose that version for the
165 | Library.
166 | 


--------------------------------------------------------------------------------
/hessianlearn/algorithms/varianceBasedNystrom.py:
--------------------------------------------------------------------------------
  1 | # This file is part of the hessianlearn package
  2 | #
  3 | # hessianlearn is free software: you can redistribute it and/or modify
  4 | # it under the terms of the GNU Lesser General Public License as published by
  5 | # the Free Software Foundation, either version 3 of the License, or any later version.
  6 | #
  7 | # hessianlearn is distributed in the hope that it will be useful,
  8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10 | # GNU Lesser General Public License for more details.
 11 | #
 12 | # You should have received a copy of the GNU Lesser General Public License
 13 | # If not, see <http://www.gnu.org/licenses/>.
 14 | #
 15 | # Author: Nick Alger
 16 | 
 17 | import numpy as np
 18 | 
 19 | np.random.seed(0)
 20 | 
 21 | 
 22 | def variance_based_nystrom(apply_AA, num_cols_A, oversampling_parameter=5, block_size=10, 
 23 |                            std_tol=0.5, max_bad_vectors=5, max_vectors=100, verbose=True):
 24 |     """
 25 |     Computes approximate truncated eigenvalue decomposition
 26 |         A = U D U^T
 27 |     of a n x n matrix A which is given by the following sum of matrices:
 28 |         A = (A1 + A2 + ... + Am)/m.
 29 |     U is an n x r orthonormal matrix, and D = diag(dd).
 30 | 
 31 |     The eigenvalue decomposition is terminated when the relative variance
 32 |     of the eigenvalues exceeds the threshold std_tol for at least max_bad_vectors.
 33 |     Only eigenvalues which do not exceed std_tol are retuend.
 34 | 
 35 |     apply_AA is a list of callables, where matvecs with the matrices Ak are computed via
 36 |         apply_AA[k](x) = Ak * x.
 37 | 
 38 |     num_cols_A is the number of columns of A (A is n x n, num_cols_A = n)
 39 | 
 40 |     oversampling_parameter is the number of extra vectors used within randmoized SVD
 41 | 
 42 |     block_size is the number of random vectors per group used in the randomized eigenvalue method.
 43 | 
 44 |     max_vectors is the maximum rank of the truncated eigenvalue decompisition
 45 |     """
 46 |     op = oversampling_parameter
 47 |     n = num_cols_A
 48 |     m = len(apply_AA)
 49 |     
 50 |     Q = np.zeros((n,0))
 51 |     Theta = np.zeros((0,0,m))
 52 |     num_bad_vectors = 0
 53 |     while num_bad_vectors < max_bad_vectors:
 54 |         Q1 = Q
 55 |         Theta11 = Theta
 56 | 
 57 |         Y = get_random_range_vectors(apply_AA, n, block_size)
 58 |         Y_perp = Y - np.dot(Q,np.dot(Q.T, Y))
 59 |         Q2,_ = np.linalg.qr(Y_perp)
 60 |         Q2 = Q2.reshape((n,-1)) # Reshape to guard against case block_size==1
 61 |         Q = np.hstack([Q1, Q2])
 62 | 
 63 |         Theta = compute_or_update_Theta(Q1, Q2, Theta11, apply_AA)
 64 |         dd, U, V = finish_computing_eigenvalue_decomposition(Q, Theta)
 65 |         _, all_std = compute_rayleigh_statistics(Theta, V)
 66 |         
 67 |         bad_inds = (all_std[:-op] / np.abs(dd[:-op])) > std_tol
 68 |         num_bad_vectors = np.sum(bad_inds)
 69 | 
 70 |         current_num_vectors = Q.shape[1]
 71 |         current_rank = current_num_vectors - op - num_bad_vectors
 72 |         if verbose:
 73 |             print('current_rank=', current_rank, ', num_bad_vectors=', num_bad_vectors)
 74 |             
 75 |         if current_num_vectors > max_vectors:
 76 |             break
 77 | 
 78 |     good_inds = np.logical_not(bad_inds)
 79 |     dd_good = dd[:-op][good_inds]
 80 |     U_good = U[:,:-op][:,good_inds]
 81 |     all_std_good = all_std[:-op][good_inds]
 82 |     return [dd_good, U_good, all_std_good],[dd[:-op],U[:,:-op],all_std[:-op]]
 83 |     
 84 | 
 85 | def get_random_range_vectors(apply_AA, num_cols_A, block_size_r,seed = 0):
 86 |     """
 87 |     Computes n x r matrix
 88 |         Y = A * Omega
 89 |     where A is an n x n matrix of the form
 90 |         A = (A1 + A2 + ... + Am)/m,
 91 |     matvecs with the matrices Ak may be computed via the function
 92 |         apply_AA[k](x) = Ak * x,
 93 |     and Omega is a random n x r matrix.
 94 |     """
 95 |     n = num_cols_A
 96 |     r = block_size_r
 97 |     m = len(apply_AA)
 98 |     
 99 |     Omega = np.random.randn(n, r)
100 |     Y = np.zeros((n, r))
101 |     # In Tensorflow:
102 |     #     z = g^T Omega
103 |     #     q = unstack(z)
104 |     #     Y = (1/m) * restack(dq_i / dw)
105 |     for j in range(r): # These loops can be trivially parallelized
106 |         for k in range(m):
107 |             Y[:,j] = Y[:,j] + (1./m)*apply_AA[k](Omega[:,j])
108 |     return Y
109 |     
110 |     
111 | def compute_Theta(orthonormal_range_basis_Q, apply_AA):
112 |     """
113 |     Computes r x r x m 3-tensor Theta with entries
114 |         Theta_ijk = qi^T Ak qj.
115 |     Theta has frontal slices
116 |         Theta_::k = Q^T Ak Q.
117 |     """
118 |     Q = orthonormal_range_basis_Q
119 |     m = len(apply_AA)
120 |     r = Q.shape[1]
121 |     
122 |     Theta = np.zeros((r, r, m))
123 |     for j in range(r): # These loops can be trivially parallelized
124 |         for k in range(m):
125 |             Theta[:,j,k] = np.dot(Q.T, apply_AA[k](Q[:,j]))
126 |     return Theta
127 |     
128 |     
129 | def finish_computing_eigenvalue_decomposition(orthonormal_range_basis_Q, Theta):
130 |     """
131 |     Finishes computing eigenvalue decomposition
132 |         A = U diag(dd) U^T,
133 |     and smaller auxiliary eigenvalue decomposition
134 |         Q^T A Q = V diag(dd) V^T
135 |     where Q is an orthonormal basis for the range of 
136 |         A = (A1+A2+...+Am)/m, 
137 |     and Theta is the matrix with frontal slices
138 |         Theta_::k = Q^T Ak Q.
139 |     """
140 |     Q = orthonormal_range_basis_Q
141 |     m = Theta.shape[-1]
142 |     
143 |     B = (1. / m) * np.sum(Theta, axis=-1)
144 |     dd, V = np.linalg.eigh(B)
145 |     idx = np.argsort(np.abs(dd))[::-1]
146 |     dd = dd[idx]
147 |     V = V[:,idx]
148 |         
149 |     U = np.dot(Q, V)
150 |     return dd, U, V
151 |     
152 |     
153 | def compute_rayleigh_statistics(Theta, small_eigenvectors_V):
154 |     """
155 |     Computes sample mean and standard deviation of Rayleigh quotients
156 |         all_mu[i] = mean(ui^T Ak ui)
157 |         all_std[i] = std(ui^T Ak ui)
158 |     where Ak is randomly chosen, and ui is the i'th eigenvector of 
159 |         A = (A1 + A2 + ... + Am)/m.
160 |     Theta is the r x r x m 3-tensor with frontal slices
161 |         Theta_::k = Q^T Ak Q,
162 |     for orthonormal basis Q such that
163 |         A =approx= Q * Q^T * A
164 |     The columns, vi, of V are the eigenvectors of the matrix Q^T A Q, i.e.,
165 |         Q^T A Q = V D V^T
166 |     where D is the diagonal matrix of eigenvalues, which we do not need here.
167 |     (Note that ui = Q * vi).
168 |     """
169 |     V = small_eigenvectors_V
170 |     r = Theta.shape[0]
171 |     
172 |     C = np.sum(V.reshape((r,r,-1)) * np.einsum('jki,kl->jli', Theta, V), axis=0)
173 |     all_mu = np.mean(C, axis=1)
174 |     all_std = np.std(C, axis=1)
175 |     return all_mu, all_std
176 |     
177 |     
178 | def update_Theta(Q1, Q2, Theta11, apply_AA):
179 |     """
180 |     Computes updated r x r x m 3-tensor Theta with frontal slices
181 |         Theta_::k = Q^T Ak Q
182 |     based on old Theta1 with frontal slices
183 |         Theta11_::k = Q1^T Ak Q1.
184 |     Here Q1 and Q2 are orthonormal matrices, and
185 |         Q = [Q1, Q2]
186 |     is also an orthonormal matrix. 
187 |     Q1 was the old range approximation for A.
188 |     Q2 columns are more vectors to improve the range approximation.
189 |     Q is the new range approximation.
190 |     """
191 |     m = len(apply_AA)
192 |     r1 = Q1.shape[1]
193 |     r2 = Q2.shape[1]
194 |     r = r1 + r2
195 |     Theta12 = np.zeros((r1, r2, m))
196 |     Theta22 = np.zeros((r2, r2, m))
197 |     for i in range(r2): # These loops can be trivially parallelized
198 |         for k in range(m):
199 |             Ak_qi = apply_AA[k](Q2[:,i])
200 |             Theta12[:,i,k] = np.dot(Q1.T, Ak_qi)
201 |             Theta22[:,i,k] = np.dot(Q2.T, Ak_qi)
202 |             
203 |     Theta = np.zeros((r, r, m))
204 |     Theta[:r1, :r1, :] = Theta11
205 |     Theta[:r1, r1:, :] = Theta12
206 |     Theta[r1:, :r1, :] = Theta12.swapaxes(0,1)
207 |     Theta[r1:, r1:, :] = Theta22
208 |     return Theta
209 |     
210 |     
211 | def compute_or_update_Theta(Q1, Q2, Theta11, apply_AA):
212 |     if Theta11.size == 0:
213 |         return compute_Theta(Q2, apply_AA)
214 |     else:
215 |         return update_Theta(Q1, Q2, Theta11, apply_AA)
216 |         
217 | 
218 | 


--------------------------------------------------------------------------------
/applications/mnist/mnist_vae.py:
--------------------------------------------------------------------------------
  1 | # This file is part of the hessianlearn package
  2 | #
  3 | # hessianlearn is free software: you can redistribute it and/or modify
  4 | # it under the terms of the GNU Lesser General Public License as published by
  5 | # the Free Software Foundation, either version 3 of the License, or any later version.
  6 | #
  7 | # hessianlearn is distributed in the hope that it will be useful,
  8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10 | # GNU Lesser General Public License for more details.
 11 | #
 12 | # You should have received a copy of the GNU Lesser General Public License
 13 | # If not, see <http://www.gnu.org/licenses/>.
 14 | #
 15 | # Author: Tom O'Leary-Roseberry
 16 | # Contact: tom.olearyroseberry@utexas.edu
 17 | 
 18 | ################################################################################
 19 | # Uses some code from https://blog.keras.io/building-autoencoders-in-keras.html
 20 | ################################################################################
 21 | 
 22 | import numpy as np
 23 | import os
 24 | import tensorflow as tf
 25 | import time
 26 | # if int(tf.__version__[0]) > 1:
 27 | # 	import tensorflow.compat.v1 as tf
 28 | # 	tf.disable_v2_behavior()
 29 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
 30 | os.environ['KMP_DUPLICATE_LIB_OK']='True'
 31 | os.environ["KMP_WARNINGS"] = "FALSE" 
 32 | import sys
 33 | sys.path.append( os.environ.get('HESSIANLEARN_PATH', "../../"))
 34 | from hessianlearn import *
 35 | 
 36 | tf.set_random_seed(0)
 37 | 
 38 | settings = {}
 39 | # Set run specifications
 40 | # Data specs
 41 | settings['batch_size'] = 100
 42 | settings['hess_batch_size'] = 10
 43 | 
 44 | 
 45 | ################################################################################
 46 | # Instantiate data
 47 | (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
 48 | 
 49 | 
 50 | # Normalize the data
 51 | x_train = x_train.astype('float32') / 255.
 52 | x_test = x_test.astype('float32') / 255.
 53 | # Reshape the data
 54 | flattened_dimension = np.prod(x_train.shape[1:])
 55 | x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:])))
 56 | x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:])))
 57 | 
 58 | # Instante the data object
 59 | data = Data([x_train,y_train],settings['batch_size'],test_data = [x_test,y_test],hessian_batch_size = settings['hess_batch_size'])
 60 | 
 61 | # settings['input_shape'] = data._input_shape
 62 | # settings['output_shape'] = data._output_shape
 63 | 
 64 | 
 65 | ################################################################################
 66 | # Build the variational autoencoder neural network model here
 67 | 
 68 | # network parameters
 69 | input_shape = (flattened_dimension, )
 70 | intermediate_dim = 512
 71 | latent_dim = 2
 72 | 
 73 | # VAE model = encoder + decoder
 74 | # build encoder model
 75 | inputs = tf.keras.layers.Input(shape=input_shape)
 76 | x_encoder = tf.keras.layers.Dense(intermediate_dim, activation='softplus')(inputs)
 77 | z_mean = tf.keras.layers.Dense(latent_dim, name='z_mean')(x_encoder)
 78 | z_log_var = tf.keras.layers.Dense(latent_dim, name='z_log_var')(x_encoder)
 79 | 
 80 | # reparameterization trick
 81 | # instead of sampling from Q(z|X), sample epsilon = N(0,I)
 82 | # z = z_mean + sqrt(var) * epsilon
 83 | def sampling(args):
 84 |     """Reparameterization trick by sampling from an isotropic unit Gaussian.
 85 |     # Arguments
 86 |         args (tensor): mean and log of variance of Q(z|X)
 87 |     # Returns
 88 |         z (tensor): sampled latent vector
 89 |     """
 90 |     z_mean, z_log_var = args
 91 |     batch = tf.keras.backend.shape(z_mean)[0]
 92 |     dim = tf.keras.backend.int_shape(z_mean)[1]
 93 |     # by default, random_normal has mean = 0 and std = 1.0
 94 |     epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
 95 |     return z_mean + tf.keras.backend.exp(0.5 * z_log_var) * epsilon
 96 | # use reparameterization trick to push the sampling out as input
 97 | # note that "output_shape" isn't necessary with the TensorFlow backend
 98 | z = tf.keras.layers.Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_var])
 99 | 
100 | # instantiate encoder model
101 | encoder = tf.keras.models.Model(inputs, [z_mean, z_log_var, z], name='encoder')
102 | 
103 | # build decoder model
104 | latent_inputs = tf.keras.layers.Input(shape=(latent_dim,), name='z_sampling')
105 | x_decoder = tf.keras.layers.Dense(intermediate_dim, activation='softplus')(latent_inputs)
106 | outputs = tf.keras.layers.Dense(flattened_dimension, activation='sigmoid')(x_decoder)
107 | 
108 | # instantiate decoder model
109 | decoder = tf.keras.models.Model(latent_inputs, outputs, name='decoder')
110 | 
111 | # instantiate VAE model
112 | outputs = decoder(encoder(inputs)[2])
113 | vae = tf.keras.models.Model(inputs, outputs, name='vae_mlp')
114 | 
115 | 
116 | 
117 | ################################################################################
118 | # Instantiate the problem, regularization.
119 | 
120 | problem = VariationalAutoencoderProblem(vae,z_mean,z_log_var,dtype=tf.float32)
121 | 
122 | settings['tikhonov_gamma'] = 1e-2
123 | regularization = L2Regularization(problem,gamma = settings['tikhonov_gamma'])
124 | 
125 | 
126 | ################################################################################
127 | # Instantiate the model object
128 | HLModelSettings = HessianlearnModelSettings()
129 | 
130 | HLModelSettings['optimizer'] = 'lrsfn'
131 | HLModelSettings['alpha'] = 5e-4
132 | HLModelSettings['globalization'] = 'line_search'
133 | HLModelSettings['hessian_low_rank'] = 20
134 | HLModelSettings['max_backtrack'] = 16
135 | HLModelSettings['max_sweeps'] = 50
136 | 
137 | HLModelSettings['problem_name'] = 'mnist_vae'
138 | 
139 | 
140 | HLModel = HessianlearnModel(problem,regularization,data,settings = HLModelSettings)
141 | 
142 | 
143 | # Can pass in an initial guess for the weights w_0 to the method fit, if desired.
144 | HLModel.fit(w_0 = None)
145 | 
146 | ################################################################################
147 | # Post processing
148 | import matplotlib.pyplot as plt
149 | def plot_results(models,
150 |                  data,
151 |                  batch_size=128,
152 |                  model_name="vae_mnist"):
153 |     """Plots labels and MNIST digits as a function of the 2D latent vector
154 |     # Arguments
155 |         models (tuple): encoder and decoder models
156 |         data (tuple): test data and label
157 |         batch_size (int): prediction batch size
158 |         model_name (string): which model is using this function
159 |     """
160 | 
161 |     encoder, decoder = models
162 |     x_test, y_test = data
163 |     os.makedirs(model_name, exist_ok=True)
164 | 
165 |     filename = os.path.join(model_name, "vae_mean.png")
166 |     # display a 2D plot of the digit classes in the latent space
167 |     z_mean, _, _ = encoder.predict(x_test,
168 |                                    batch_size=batch_size)
169 |     plt.figure(figsize=(12, 10))
170 |     plt.scatter(z_mean[:, 0], z_mean[:, 1], c=y_test)
171 |     plt.colorbar()
172 |     plt.xlabel("z[0]")
173 |     plt.ylabel("z[1]")
174 |     plt.savefig(filename)
175 |     plt.show()
176 | 
177 |     filename = os.path.join(model_name, "digits_over_latent.png")
178 |     # display a 30x30 2D manifold of digits
179 |     n = 30
180 |     digit_size = 28
181 |     figure = np.zeros((digit_size * n, digit_size * n))
182 |     # linearly spaced coordinates corresponding to the 2D plot
183 |     # of digit classes in the latent space
184 |     grid_x = np.linspace(-4, 4, n)
185 |     grid_y = np.linspace(-4, 4, n)[::-1]
186 | 
187 |     for i, yi in enumerate(grid_y):
188 |         for j, xi in enumerate(grid_x):
189 |             z_sample = np.array([[xi, yi]])
190 |             x_decoded = decoder.predict(z_sample)
191 |             digit = x_decoded[0].reshape(digit_size, digit_size)
192 |             figure[i * digit_size: (i + 1) * digit_size,
193 |                    j * digit_size: (j + 1) * digit_size] = digit
194 | 
195 |     plt.figure(figsize=(10, 10))
196 |     start_range = digit_size // 2
197 |     end_range = (n - 1) * digit_size + start_range + 1
198 |     pixel_range = np.arange(start_range, end_range, digit_size)
199 |     sample_range_x = np.round(grid_x, 1)
200 |     sample_range_y = np.round(grid_y, 1)
201 |     plt.xticks(pixel_range, sample_range_x)
202 |     plt.yticks(pixel_range, sample_range_y)
203 |     plt.xlabel("z[0]")
204 |     plt.ylabel("z[1]")
205 |     plt.imshow(figure, cmap='Greys_r')
206 |     plt.savefig(filename)
207 |     plt.show()
208 | 
209 | 
210 | models = (encoder, decoder)
211 | data = (x_test, y_test)
212 | plot_results(models,
213 |              data,
214 |              batch_size=settings['batch_size'],
215 |              model_name= HLModelSettings['optimizer']+'_vae_mlp')
216 | 
217 | 
218 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <!-- # hessianlearn -->
  2 | 
  3 | 
  4 | 
  5 | 	      ___          ___          ___          ___                     ___          ___     
  6 | 	     /__/\        /  /\        /  /\        /  /\       ___         /  /\        /__/\    
  7 | 	     \  \:\      /  /:/_      /  /:/_      /  /:/_     /  /\       /  /::\       \  \:\   
  8 | 	      \__\:\    /  /:/ /\    /  /:/ /\    /  /:/ /\   /  /:/      /  /:/\:\       \  \:\  
  9 | 	  ___ /  /::\  /  /:/ /:/_  /  /:/ /::\  /  /:/ /::\ /__/::\     /  /:/~/::\  _____\__\:\ 
 10 | 	 /__/\  /:/\:\/__/:/ /:/ /\/__/:/ /:/\:\/__/:/ /:/\:\\__\/\:\__ /__/:/ /:/\:\/__/::::::::\
 11 | 	 \  \:\/:/__\/\  \:\/:/ /:/\  \:\/:/~/:/\  \:\/:/~/:/   \  \:\/\\  \:\/:/__\/\  \:\~~\~~\/
 12 | 	  \  \::/      \  \::/ /:/  \  \::/ /:/  \  \::/ /:/     \__\::/ \  \::/      \  \:\  ~~~ 
 13 | 	   \  \:\       \  \:\/:/    \__\/ /:/    \__\/ /:/      /__/:/   \  \:\       \  \:\     
 14 | 	    \  \:\       \  \::/       /__/:/       /__/:/       \__\/     \  \:\       \  \:\    
 15 | 	     \__\/        \__\/        \__\/        \__\/                   \__\/        \__\/    
 16 | 
 17 | 
 18 | 			                   ___          ___          ___          ___     
 19 | 			                  /  /\        /  /\        /  /\        /__/\    
 20 | 			                 /  /:/_      /  /::\      /  /::\       \  \:\   
 21 | 			  ___     ___   /  /:/ /\    /  /:/\:\    /  /:/\:\       \  \:\  
 22 | 			 /__/\   /  /\ /  /:/ /:/_  /  /:/~/::\  /  /:/~/:/   _____\__\:\ 
 23 | 			 \  \:\ /  /://__/:/ /:/ /\/__/:/ /:/\:\/__/:/ /:/___/__/::::::::\
 24 | 			  \  \:\  /:/ \  \:\/:/ /:/\  \:\/:/__\/\  \:\/:::::/\  \:\~~\~~\/
 25 | 			   \  \:\/:/   \  \::/ /:/  \  \::/      \  \::/~~~~  \  \:\  ~~~ 
 26 | 			    \  \::/     \  \:\/:/    \  \:\       \  \:\       \  \:\     
 27 | 			     \__\/       \  \::/      \  \:\       \  \:\       \  \:\    
 28 | 			                  \__\/        \__\/        \__\/        \__\/    
 29 | 
 30 | 
 31 | 
 32 | 
 33 | 
 34 | [![Build Status](https://travis-ci.com/tomoleary/hessianlearn.svg?branch=master)](https://travis-ci.com/tomoleary/hessianlearn)
 35 | [![DOI](https://zenodo.org/badge/184635062.svg)](https://zenodo.org/badge/latestdoi/184635062)
 36 | [![License](https://img.shields.io/github/license/tomoleary/hessianlearn)](./LICENSE.md)
 37 | [![Top language](https://img.shields.io/github/languages/top/tomoleary/hessianlearn)](https://www.python.org)
 38 | ![Code size](https://img.shields.io/github/languages/code-size/tomoleary/hessianlearn)
 39 | [![Issues](https://img.shields.io/github/issues/tomoleary/hessianlearn)](https://github.com/tomoleary/hessianlearn/issues)
 40 | [![Latest commit](https://img.shields.io/github/last-commit/tomoleary/hessianlearn)](https://github.com/tomoleary/hessianlearn/commits/master)
 41 | 
 42 | # Hessian-based stochastic optimization in TensorFlow and keras
 43 | 
 44 | This code implements Hessian-based stochastic optimization in TensorFlow and keras by exposing the matrix-free Hessian to users. The code is meant to allow for rapid-prototyping of Hessian-based algorithms via the matrix-free Hessian action, which allows users to inspect Hessian based information for stochastic nonconvex (neural network training) optimization problems. 
 45 | 
 46 | The Hessian action is exposed via matrix-vector products:
 47 | <p align="center">
 48 | 	<img src="https://latex.codecogs.com/gif.latex?H\widehat{w}=\frac{d}{dw}(g^T\widehat{w})" /> 
 49 | </p>
 50 | 
 51 | and matrix-matrix products:
 52 | <p align="center">
 53 | 	<img src="https://latex.codecogs.com/gif.latex?H\widehat{W}=\frac{d}{dw}(g^T\widehat{W})" /> 
 54 | </p>
 55 | 
 56 | ## Compatibility
 57 | 
 58 | The code is compatible with Tensorflow v1 and v2, but certain features of v2 are disabled (like eager execution). This is because the Hessian matrix products in hessianlearn are implemented using `placeholders` which have been deprecated in v2. For this reason hessianlearn cannot work with data generators and things like this that require eager execution. If any compatibility issues are found, please open an [issue](https://github.com/tomoleary/hessianlearn/issues).
 59 | 
 60 | ## Usage
 61 | Set `HESSIANLEARN_PATH` environmental variable
 62 | 
 63 | Train a keras model
 64 | 
 65 | ```python
 66 | import os,sys
 67 | import tensorflow as tf
 68 | sys.path.append( os.environ.get('HESSIANLEARN_PATH'))
 69 | from hessianlearn import *
 70 | 
 71 | # Define keras neural network model
 72 | neural_network = tf.keras.models.Model(...)
 73 | # Define loss function and compile model
 74 | neural_network.compile(loss = ...)
 75 | 
 76 | ```
 77 | 
 78 | hessianlearn implements various training [`problem`](https://github.com/tomoleary/hessianlearn/blob/master/hessianlearn/problem/problem.py) constructs (regression, classification, autoencoders, variational autoencoders, generative adversarial networks). Instantiate a `problem`, a `data` object (which takes a dictionary with keys that correspond to the corresponding `placeholders` in `problem`) and `regularization`
 79 | 
 80 | ```python
 81 | # Instantiate the problem (this handles the loss function,
 82 | # construction of hessian and gradient etc.)
 83 | # KerasModelProblem extracts loss function and metrics from
 84 | # a compiled keras model
 85 | problem = KerasModelProblem(neural_network)
 86 | # Instantiate the data object, this handles the train / validation split
 87 | # as well as iterating during training
 88 | data = Data({problem.x:x_data,problem.y_true:y_data},train_batch_size,\
 89 | 	validation_data_size = validation_data_size)
 90 | # Instantiate the regularization: L2Regularization is Tikhonov,
 91 | # gamma = 0 is no regularization
 92 | regularization = L2Regularization(problem,gamma = 0)
 93 | ```
 94 | 
 95 | Pass these objects into the `HessianlearnModel` which handles the training
 96 | 
 97 | ```python
 98 | HLModel = HessianlearnModel(problem,regularization,data)
 99 | HLModel.fit()
100 | ```
101 | 
102 | ### Alternative Usage (More like Keras Interface)
103 | The example above was the original way the optimizer interface was implemented in hessianlearn, however to better mimic the keras interface and allow for more end-user rapid prototyping of the optimizer that is used to fit data, as of December 2021, the following way has been created
104 | 
105 | ```python
106 | import os,sys
107 | import tensorflow as tf
108 | sys.path.append( os.environ.get('HESSIANLEARN_PATH'))
109 | from hessianlearn import *
110 | 
111 | # Define keras neural network model
112 | neural_network = tf.keras.models.Model(...)
113 | # Define loss function and compile model
114 | neural_network.compile(loss = ...)
115 | # Instance keras model wrapper which deals with the 
116 | # construction of the `problem` which handles the construction
117 | # of Hessian computational graph and variables
118 | HLModel = KerasModelWrapper(neural_network)
119 | # Then the end user can pass in an optimizer 
120 | # (e.g. custom end-user optimizer)
121 | optimizer = LowRankSaddleFreeNewton # The class constructor, not an instance
122 | opt_parameters = LowRankSaddleFreeNewtonParameters()
123 | opt_parameters['hessian_low_rank'] = 40
124 | HLModel.set_optimizer(optimizer,optimizer_parameters = opt_parameters)
125 | # The data object still needs to key on to the specific computational
126 | # graph variables that data will be passed in for.
127 | # Note that data can naturally handle multiple input and output data,
128 | # in which case problem.x, problem.y_true are lists corresponding to
129 | # neural_network.inputs, neural_network.outputs
130 | problem = HLModel.problem
131 | data = Data({problem.x:x_data,problem.y_true:y_data},train_batch_size,\
132 | 	validation_data_size = validation_data_size)
133 | # And finally one can call fit!
134 | HLModel.fit(data)
135 | ```
136 | 
137 | ## Examples
138 | 
139 | [Tutorial 0: MNIST Autoencoder](https://github.com/tomoleary/hessianlearn/blob/master/tutorial/Tutorial%200%20MNIST%20Autoencoder.ipynb)
140 | 
141 | 
142 | ## Applications
143 | 
144 | ### Transfer Learning
145 | 
146 | * Examples of CIFAR10, CIFAR100 classification from pre-trained Imagenet ResNet50 model in `applications/transfer_learning/`
147 | 
148 | * Pre-trained model serves as well conditioned initial guess for transfer learning. In this setting Newton methods perform well due to their excellent properties in local convergence. Low Rank Saddle Free Newton is able to zero in on highly generalizable local minimizers bypassing indefinite regions. Below are validation accuracies of best choices of fixed step-length for Adam, SGD and LRSFN with fixed rank of 40.
149 | 
150 | <p align="center">
151 | 	<img src="https://github.com/tomoleary/images/blob/main/hessianlearn/cifar100transfer.png" width="75%" /> 
152 | </p>
153 | 
154 | # References
155 | 
156 | These manuscripts motivate and use the hessianlearn library for stochastic nonconvex optimization
157 | 
158 | - \[1\] O'Leary-Roseberry, T., Alger, N., Ghattas O.,
159 | [**Inexact Newton Methods for Stochastic Nonconvex Optimization with Applications to Neural Network Training**](https://arxiv.org/abs/1905.06738).
160 | arXiv:1905.06738.
161 | ([Download](https://arxiv.org/pdf/1905.06738.pdf))<details><summary>BibTeX</summary><pre>
162 | @article{OLearyRoseberryAlgerGhattas2019,
163 |   title={Inexact Newton methods for stochastic nonconvex optimization with applications to neural network training},
164 |   author={O'Leary-Roseberry, Thomas and Alger, Nick and Ghattas, Omar},
165 |   journal={arXiv preprint arXiv:1905.06738},
166 |   year={2019}
167 | }
168 | }</pre></details>
169 | 
170 | - \[2\] O'Leary-Roseberry, T., Alger, N., Ghattas O.,
171 | [**Low Rank Saddle Free Newton: A Scalable Method for Stochastic Nonconvex Optimization**](https://arxiv.org/abs/2002.02881).
172 | arXiv:2002.02881.
173 | ([Download](https://arxiv.org/pdf/2002.02881.pdf))<details><summary>BibTeX</summary><pre>
174 | @article{OLearyRoseberryAlgerGhattas2020,
175 |   title={Low Rank Saddle Free Newton: Algorithm and Analysis},
176 |   author={O'Leary-Roseberry, Thomas and Alger, Nick and Ghattas, Omar},
177 |   journal={arXiv preprint arXiv:2002.02881},
178 |   year={2020}
179 | }
180 | }</pre></details>
181 | 
182 | 
183 | - \[3\] O'Leary-Roseberry, T., Villa, U., Chen P., Ghattas O.,
184 | [**Derivative-Informed Projected Neural Networks for High-Dimensional Parametric Maps Governed by PDEs**](https://www.sciencedirect.com/science/article/pii/S0045782521005302).
185 | Computer Methods in Applied Mechanics and Engineering. Volume 388, 1 January 2022, 114199.
186 | ([Download](https://arxiv.org/pdf/2011.15110.pdf))<details><summary>BibTeX</summary><pre>
187 | @article{OLearyRoseberryVillaChenEtAl2022,
188 |   title={Derivative-informed projected neural networks for high-dimensional parametric maps governed by {PDE}s},
189 |   author={O’Leary-Roseberry, Thomas and Villa, Umberto and Chen, Peng and Ghattas, Omar},
190 |   journal={Computer Methods in Applied Mechanics and Engineering},
191 |   volume={388},
192 |   pages={114199},
193 |   year={2022},
194 |   publisher={Elsevier}
195 | }
196 | }</pre></details>
197 | 
198 | 
199 | - \[4\] O'Leary-Roseberry, T., Du, X., Chaudhuri, A., Martins, J., Willcox, K., Ghattas, O.,
200 | [**Adaptive Projected Residual Networks for Learning Parametric Maps from Sparse Data**](https://arxiv.org/abs/2112.07096).
201 | arXiv:2112.07096.
202 | ([Download](https://arxiv.org/pdf/2112.07096.pdf))<details><summary>BibTeX</summary><pre>
203 | @article{OLearyRoseberryDuChaudhuriEtAl2021,
204 |   title={Adaptive Projected Residual Networks for Learning Parametric Maps from Sparse Data},
205 |   author={O'Leary-Roseberry, Thomas and Du, Xiaosong, and Chaudhuri, Anirban, and Martins Joaqium R. R. A., and Willcox, Karen, and Ghattas, Omar},
206 |   journal={arXiv preprint arXiv:2112.07096},
207 |   year={2021}
208 | }
209 | }</pre></details>
210 | 
211 | 
212 | 
213 | 
214 | 
215 | 


--------------------------------------------------------------------------------
/hessianlearn/algorithms/lowRankSaddleFreeNewton.py:
--------------------------------------------------------------------------------
  1 | # This file is part of the hessianlearn package
  2 | #
  3 | # hessianlearn is free software: you can redistribute it and/or modify
  4 | # it under the terms of the GNU Lesser General Public License as published by
  5 | # the Free Software Foundation, either version 3 of the License, or any later version.
  6 | #
  7 | # hessianlearn is distributed in the hope that it will be useful,
  8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10 | # GNU Lesser General Public License for more details.
 11 | #
 12 | # You should have received a copy of the GNU Lesser General Public License
 13 | # If not, see <http://www.gnu.org/licenses/>.
 14 | #
 15 | # Author: Tom O'Leary-Roseberry
 16 | # Contact: tom.olearyroseberry@utexas.edu
 17 | 
 18 | from __future__ import absolute_import
 19 | from __future__ import division
 20 | from __future__ import print_function
 21 | 
 22 | import numpy as np
 23 | from scipy.sparse import diags
 24 | import time
 25 | 
 26 | from ..utilities.parameterList import ParameterList
 27 | from ..algorithms import Optimizer
 28 | from ..algorithms.globalization import ArmijoLineSearch, TrustRegion
 29 | from ..algorithms.randomizedEigensolver import randomized_eigensolver, eigensolver_from_range
 30 | from ..algorithms.rangeFinders import block_range_finder, noise_aware_adaptive_range_finder
 31 | from ..algorithms.varianceBasedNystrom import variance_based_nystrom
 32 | from ..problem import L2Regularization, HessianWrapper
 33 | 
 34 | 
 35 | 
 36 | 
 37 | def ParametersLowRankSaddleFreeNewton(parameters = {}):
 38 | 	parameters['alpha']                         = [1e-3, "Initial steplength, or learning rate"]
 39 | 	parameters['rel_tolerance']                 = [1e-3, "Relative convergence when sqrt(g,g)/sqrt(g_0,g_0) <= rel_tolerance"]
 40 | 	parameters['abs_tolerance']                 = [1e-4,"Absolute converge when sqrt(g,g) <= abs_tolerance"]
 41 | 	parameters['default_damping']        		= [1e-3, "Levenberg-Marquardt damping when no regularization is used"]
 42 | 	
 43 | 	# Hessian approximation parameters
 44 | 	parameters['range_finding']					= [None,"Range finding, if None then r = hessian_low_rank\
 45 | 															Choose from None, 'arf', 'naarf', 'vn'"]
 46 | 	parameters['range_rel_error_tolerance']     = [0.1, "Error tolerance for error estimator in adaptive range finding"]
 47 | 	parameters['range_abs_error_tolerance']     = [100, "Error tolerance for error estimator in adaptive range finding"]
 48 | 	parameters['range_block_size']        		= [20, "Block size used in range finder"]
 49 | 	parameters['rq_samples_for_naarf']        	= [100, "Number of partitions for RQ variance evaluation"]
 50 | 	parameters['hessian_low_rank']        		= [20, "Fixed rank for randomized eigenvalue decomposition"]
 51 | 	# Variance Nystrom Parameters
 52 | 	parameters['max_bad_vectors_nystrom']       = [5, "Number of maximum bad vectors for variance based Nystrom"]
 53 | 	parameters['max_vectors_nystrom']       	= [40, "Number of maximum vectors for variance based Nystrom"]
 54 | 	parameters['nystrom_std_tolerance']       	= [0.5, "Noise to eigenvalue ratio used for Nystrom truncation"]
 55 | 	
 56 | 
 57 | 	# Globaliziation parameters
 58 | 	parameters['globalization']					= [None, 'Choose from trust_region, line_search, spectral_step or none']
 59 | 	parameters['max_backtracking_iter']			= [5, 'Max backtracking iterations for armijo line search']
 60 | 	parameters['spectral_step_alpha']			= [1e-2, 'Used in min condition for spectral step']
 61 | 
 62 | 	parameters['verbose']                       = [False, "Printing"]
 63 | 	parameters['record_last_rq_std']			= [False, "Record the last eigenvector RQ variance"]
 64 | 
 65 | 	return ParameterList(parameters)
 66 | 
 67 | 
 68 | class LowRankSaddleFreeNewton(Optimizer):
 69 | 	"""
 70 | 	This class implements the Low Rank Saddle Free Newton (LRSFN) algorithm
 71 | 	"""
 72 | 	def __init__(self,problem,regularization = None,sess = None,parameters = ParametersLowRankSaddleFreeNewton(),preconditioner = None):
 73 | 		"""
 74 | 		The constructor for this class takes:
 75 | 			-problem: hessianlearn.problem.Problem
 76 | 			-regularization: hessianlearn.problem.Regularization
 77 | 			-sess: tf.Session()
 78 | 			-parameters: hyperparameters dictionary
 79 | 			-preconditioner: hessianlearn.problem.Preconditioner
 80 | 		"""
 81 | 		if regularization is None:
 82 | 			_regularization = L2Regularization(problem,gamma = 0.0)
 83 | 		else:
 84 | 			_regularization = regularization
 85 | 		super(LowRankSaddleFreeNewton,self).__init__(problem,_regularization,sess,parameters)
 86 | 
 87 | 		self.grad = self.problem.gradient + self.regularization.gradient
 88 | 
 89 | 		if self.parameters['globalization'] == 'trust_region':
 90 | 			self.trust_region = TrustRegion()
 91 | 		self._sweeps = np.zeros(2)
 92 | 
 93 | 		self.alpha = 0.0
 94 | 		self._rank = 0
 95 | 
 96 | 		self._rq_std = 0.0
 97 | 
 98 | 		self.eigenvalues = None
 99 | 
100 | 	@property
101 | 	def rank(self):
102 | 		return self._rank
103 | 
104 | 	@property
105 | 	def rq_variance(self):
106 | 		return self._rq_variance
107 | 	
108 | 	
109 | 
110 | 
111 | 	def minimize(self,feed_dict = None,hessian_feed_dict = None,rq_estimator_dict = None):
112 | 		r"""
113 | 		Solves the saddle escape problem. Given a misfit (loss) Hessian operator (H)
114 | 		1. H = U_r Lambda_r U_r^T
115 | 		2. Solve [U_r |Lambda_r| U_r^T + gamma I] p = -g for p via Woodbury formula:
116 | 
117 | 		[U_r Lambda_r U_r^T + gamma I]^{-1} = 1/gamma * I - 1/gamma * UDU^T
118 | 		where D = diag(|lambda_i|/(|lambda_i| + gamma))
119 | 			-feed_dict: data dictionary used for evaluating gradient and cost
120 | 			-hessian_feed_dict: dictionary used for stochastic Hessian
121 | 			-rq_estimator_dict: dictionary used for RQ variance calculations
122 | 
123 | 		"""
124 | 		self._iter += 1
125 | 		assert self.sess is not None
126 | 		assert feed_dict is not None
127 | 
128 | 		assert self.parameters['range_finding'] in [None,'arf','naarf','vn']
129 | 
130 | 		if hessian_feed_dict is None:
131 | 			hessian_feed_dict = feed_dict
132 | 		
133 | 		
134 | 		gradient = self.sess.run(self.grad,feed_dict = feed_dict)
135 | 
136 | 		alpha = self.parameters['alpha']
137 | 		
138 | 		if self.parameters['range_finding'] == 'arf':
139 | 			H = lambda x: self.H(x,hessian_feed_dict,verbose = self.parameters['verbose'])
140 | 			n = self.problem.dimension
141 | 			# norm_g = np.linalg.norm(gradient)
142 | 			# tolerance = self.parameters['range_rel_error_tolerance']*norm_g
143 | 			tolerance = self.parameters['range_rel_error_tolerance']
144 | 			Q = block_range_finder(H,n,tolerance,self.parameters['range_block_size'])
145 | 			self._rank = Q.shape[1]
146 | 			Lmbda,U = eigensolver_from_range(H,Q)
147 | 
148 | 		elif self.parameters['range_finding'] == 'naarf':
149 | 			norm_g = np.linalg.norm(gradient)
150 | 			tolerance = self.parameters['range_rel_error_tolerance']*norm_g
151 | 			if rq_estimator_dict is None:
152 | 				rq_estimator_dict_list = self.problem._partition_dictionaries(feed_dict,self.parameters['rq_samples_for_naarf'])
153 | 			elif type(rq_estimator_dict) == list:
154 | 				rq_estimator_dict_list = rq_estimator_dict
155 | 			elif type(rq_estimator_dict) == dict:
156 | 				rq_estimator_dict_list = self.problem._partition_dictionaries(rq_estimator_dict,self.parameters['rq_samples_for_naarf'])
157 | 			else:
158 | 				raise
159 | 			Q = noise_aware_adaptive_range_finder(self.H,hessian_feed_dict,rq_estimator_dict_list,block_size = self.parameters['range_block_size'],epsilon = tolerance)
160 | 			self._rank = Q.shape[1]
161 | 			H = lambda x: self.H(x,hessian_feed_dict,verbose = self.parameters['verbose'])
162 | 			Lmbda,U = eigensolver_from_range(H,Q)
163 | 
164 | 		elif self.parameters['range_finding'] == 'vn':
165 | 			if rq_estimator_dict is None:
166 | 				rq_estimator_dict_list = self.problem._partition_dictionaries(feed_dict,self.parameters['rq_samples_for_naarf'])
167 | 			elif type(rq_estimator_dict) == list:
168 | 				rq_estimator_dict_list = rq_estimator_dict
169 | 			elif type(rq_estimator_dict) == dict:
170 | 				rq_estimator_dict_list = self.problem._partition_dictionaries(rq_estimator_dict,self.parameters['rq_samples_for_naarf'])
171 | 			else:
172 | 				raise
173 | 			nystrom_t0 = time.time()
174 | 			apply_H_list = [HessianWrapper(self.H,dictionary) for dictionary in rq_estimator_dict_list]
175 | 			[Lmbda, U, all_std_good],[Lmbda_all,U_all,all_std] = variance_based_nystrom(apply_H_list, self.H.dimension,\
176 | 																std_tol = self.parameters['nystrom_std_tolerance'],\
177 | 																max_vectors = self.parameters['max_vectors_nystrom'],\
178 | 																max_bad_vectors=self.parameters['max_bad_vectors_nystrom'],\
179 | 																verbose = self.parameters['verbose'])
180 | 			self._rank = U_all.shape[1]
181 | 			if self.parameters['verbose']:
182 | 				print('Nystrom method took ',time.time() - nystrom_t0, 's')
183 | 
184 | 		else:
185 | 			H = lambda x: self.H(x,hessian_feed_dict,verbose = self.parameters['verbose'])
186 | 			n = self.problem.dimension
187 | 			self._rank = self.parameters['hessian_low_rank']
188 | 			Lmbda,U = randomized_eigensolver(H, n, self._rank,verbose=False)
189 | 		
190 | 		self.eigenvalues = Lmbda
191 | 		# Log the variance of the last eigenvector
192 | 		if self.parameters['record_last_rq_std'] :
193 | 			try:
194 | 				rq_direction = U[:,-1]
195 | 				if rq_estimator_dict is None:
196 | 					rq_estimator_dict_list = self.problem._partition_dictionaries(feed_dict,self.parameters['rq_samples_for_naarf'])
197 | 				elif type(rq_estimator_dict) == list:
198 | 					rq_estimator_dict_list = rq_estimator_dict
199 | 				elif type(rq_estimator_dict) == dict:
200 | 					rq_estimator_dict_list = self.problem._partition_dictionaries(rq_estimator_dict,self.parameters['rq_samples_for_naarf'])
201 | 				else:
202 | 					raise	
203 | 				
204 | 				try:
205 | 					RQ_samples = np.zeros((len(rq_estimator_dict_list),rq_direction.shape[1]))
206 | 				except:
207 | 					RQ_samples = np.zeros(len(rq_estimator_dict_list))
208 | 
209 | 				for samp_i,sample_dictionary in enumerate(rq_estimator_dict_list):
210 | 					RQ_samples[samp_i] = self.H.quadratics(rq_direction,sample_dictionary)
211 | 				self._rq_std = np.std(RQ_samples)
212 | 			except:
213 | 				self._rq_std = None
214 | 				print(80*'#')
215 | 				print('U is [], taking gradient step, fix this later?'.center(80))
216 | 
217 | 		# Saddle free inversion via Woodbury
218 | 		if self.regularization.parameters['gamma'] < 1e-4:
219 | 			gamma_damping = self.parameters['default_damping']
220 | 			# Using this condition instead of fixed gamma allows one to take larger step sizes
221 | 			# but does not appear to improve accuracy
222 | 			# gamma_damping = max(0.9*np.abs(Lmbda[-1]),self.parameters['default_damping'])
223 | 		else:
224 | 			gamma_damping = self.regularization.parameters['gamma']
225 | 		# print('Lmbda[0] = ',Lmbda[0])
226 | 		# print('Lmbda[-1] = ',Lmbda[-1])
227 | 		# print('gamma_damping = ',gamma_damping)
228 | 
229 | 		Lmbda_abs = np.abs(Lmbda)
230 | 		Lmbda_diags = diags(Lmbda_abs)
231 | 		# Build terms for Woodbury inversion
232 | 		D_denominator = Lmbda_abs + gamma_damping*np.ones_like(Lmbda_abs)
233 | 		D = np.divide(Lmbda_abs,D_denominator)
234 | 		# Invert by applying terms in Woodbury formula:
235 | 		UTg = np.dot(U.T,gradient)
236 | 		DUTg = np.multiply(D,UTg)
237 | 		UDUTg = np.dot(U,DUTg)
238 | 		minus_p = (gradient - UDUTg)/gamma_damping
239 | 		self.p = -minus_p
240 | 		
241 | 
242 | 		# Globalization: compute alpha and update the weights
243 | 		if self.parameters['globalization'] is None:
244 | 			self.alpha = self.parameters['alpha']
245 | 			self._sweeps += [1,2*self._rank]
246 | 			update = self.alpha*self.p
247 | 			self.sess.run(self.problem._update_ops,feed_dict = {self.problem._update_placeholder:update})
248 | 
249 | 		elif self.parameters['globalization'] is 'spectral_step':
250 | 			# self.alpha = min(self.parameters['spectral_step_alpha'],0.1/Lmbda_abs[0])
251 | 			self.alpha = min(self.parameters['spectral_step_alpha'],0.1/Lmbda_abs[0])
252 | 			self._sweeps += [1,2*self._rank]
253 | 			update = self.alpha*self.p
254 | 			self.sess.run(self.problem._update_ops,feed_dict = {self.problem._update_placeholder:update})
255 | 
256 | 		elif self.parameters['globalization'] == 'line_search':
257 | 			w_dir_inner_g = np.inner(self.p,gradient)
258 | 			initial_cost = self.sess.run(self.problem.loss,feed_dict = feed_dict)
259 | 			cost_at_candidate = lambda p : self._loss_at_candidate(p,feed_dict = feed_dict)
260 | 			self.alpha, line_search, line_search_iter = ArmijoLineSearch(self.p,w_dir_inner_g,\
261 | 														cost_at_candidate, initial_cost,
262 | 														max_backtracking_iter = self.parameters['max_backtracking_iter'])
263 | 			update = self.alpha*self.p
264 | 			self._sweeps += [1+0.5*line_search_iter,2*self._rank]
265 | 			self.sess.run(self.problem._update_ops,feed_dict = {self.problem._update_placeholder:update})
266 | 
267 | 
268 | 
269 | 		
270 | 		
271 | 		


--------------------------------------------------------------------------------
/applications/transfer_learning/imagenet_cifar10_classification.py:
--------------------------------------------------------------------------------
  1 | # This file is part of the hessianlearn package
  2 | #
  3 | # hessianlearn is free software: you can redistribute it and/or modify
  4 | # it under the terms of the GNU Lesser General Public License as published by
  5 | # the Free Software Foundation, either version 3 of the License, or any later version.
  6 | #
  7 | # hessianlearn is distributed in the hope that it will be useful,
  8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10 | # GNU Lesser General Public License for more details.
 11 | #
 12 | # You should have received a copy of the GNU Lesser General Public License
 13 | # If not, see <http://www.gnu.org/licenses/>.
 14 | #
 15 | # Author: Tom O'Leary-Roseberry
 16 | # Contact: tom.olearyroseberry@utexas.edu
 17 | 
 18 | 
 19 | import numpy as np
 20 | import os
 21 | import pickle
 22 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
 23 | os.environ['KMP_DUPLICATE_LIB_OK']='True'
 24 | os.environ["KMP_WARNINGS"] = "FALSE" 
 25 | os.environ['CUDA_VISIBLE_DEVICES'] = '1'
 26 | import pickle
 27 | import tensorflow as tf
 28 | import time, datetime
 29 | # if int(tf.__version__[0]) > 1:
 30 | #   import tensorflow.compat.v1 as tf
 31 | #   tf.disable_v2_behavior()
 32 | 
 33 | 
 34 | # Memory issue with GPUs
 35 | gpu_devices = tf.config.experimental.list_physical_devices('GPU')
 36 | for device in gpu_devices:
 37 |     tf.config.experimental.set_memory_growth(device, True)
 38 | # Load hessianlearn library
 39 | import sys
 40 | sys.path.append( os.environ.get('HESSIANLEARN_PATH', "../../"))
 41 | from hessianlearn import *
 42 | 
 43 | # Parse run specifications
 44 | from argparse import ArgumentParser
 45 | 
 46 | parser = ArgumentParser(add_help=True)
 47 | parser.add_argument("-optimizer", dest='optimizer',required=False, default = 'lrsfn', help="optimizer type",type=str)
 48 | parser.add_argument('-fixed_step',dest = 'fixed_step',\
 49 | 					required= False,default = 1,help='boolean for fixed step vs globalization',type = int)
 50 | parser.add_argument('-alpha',dest = 'alpha',required = False,default = 1e-5,help= 'learning rate alpha',type=float)
 51 | parser.add_argument('-hessian_low_rank',dest = 'hessian_low_rank',required= False,default = 40,help='low rank for sfn',type = int)
 52 | parser.add_argument('-record_spectrum',dest = 'record_spectrum',\
 53 | 					required= False,default = 0,help='boolean for recording spectrum',type = int)
 54 | 
 55 | parser.add_argument("-resnet_weights", dest='resnet_weights',required=False, default = 'imagenet', help="initialization for network weights",type=str)
 56 | 
 57 | parser.add_argument('-batch_size',dest = 'batch_size',required= False,default = 32,help='batch size',type = int)
 58 | parser.add_argument('-hess_batch_size',dest = 'hess_batch_size',required= False,default = 8,help='hess batch size',type = int)
 59 | parser.add_argument('-keras_epochs',dest = 'keras_epochs',required= False,default = 50,help='keras_epochs',type = int)
 60 | parser.add_argument("-keras_opt", dest='keras_opt',required=False, default = 'adam', help="optimizer type for keras",type=str)
 61 | parser.add_argument('-keras_alpha',dest = 'keras_alpha',required= False,default = 1e-3,help='keras learning rate',type = float)
 62 | parser.add_argument('-max_sweeps',dest = 'max_sweeps',required= False,default = 2,help='max sweeps',type = float)
 63 | 
 64 | parser.add_argument("-loss_type", dest='loss_type',required=False, default = 'mixed', help="loss type either cross_entrop or mixed",type=str)
 65 | parser.add_argument('-seed',dest = 'seed',required= False,default = 0,help='seed',type = int)
 66 | 
 67 | 
 68 | args = parser.parse_args()
 69 | 
 70 | try:
 71 |   tf.set_random_seed(args.seed)
 72 | except:
 73 |   tf.random.set_seed(args.seed)
 74 | 
 75 | # GPU Environment Details
 76 | gpu_availabe = tf.test.is_gpu_available()
 77 | built_with_cuda = tf.test.is_built_with_cuda()
 78 | print(80*'#')
 79 | print(('IS GPU AVAILABLE: '+str(gpu_availabe)).center(80))
 80 | print(('IS BUILT WITH CUDA: '+str(built_with_cuda)).center(80))
 81 | print(80*'#')
 82 | 
 83 | settings = {}
 84 | # Set run specifications
 85 | # Data specs
 86 | settings['batch_size'] = args.batch_size
 87 | settings['hess_batch_size'] = args.hess_batch_size
 88 | 
 89 | 
 90 | ################################################################################
 91 | # Instantiate data
 92 | (x_train, y_train), (_x_test, _y_test) = tf.keras.datasets.cifar10.load_data()
 93 | 
 94 | # # Normalize the data
 95 | # x_train = x_train.astype('float32') / 255.
 96 | # x_test = x_test.astype('float32') / 255.
 97 | 
 98 | x_train = tf.keras.applications.resnet50.preprocess_input(x_train)
 99 | x_test_full = tf.keras.applications.resnet50.preprocess_input(_x_test)
100 | x_val = x_test_full[:2000]
101 | x_test = x_test_full[2000:]
102 | 
103 | y_train = tf.keras.utils.to_categorical(y_train)
104 | y_test_full = tf.keras.utils.to_categorical(_y_test)
105 | y_val = y_test_full[:2000]
106 | y_test = y_test_full[2000:]
107 | 
108 | ################################################################################
109 | # Create the neural network in keras
110 | 
111 | # tf.keras.backend.set_floatx('float64')
112 | 
113 | resnet_input_shape = (200,200,3)
114 | input_tensor = tf.keras.Input(shape = resnet_input_shape)
115 | 
116 | if args.resnet_weights == 'None':
117 |     pretrained_resnet50 = tf.keras.applications.resnet50.ResNet50(weights = None,include_top=False,input_tensor=input_tensor)
118 | else:
119 |     pretrained_resnet50 = tf.keras.applications.resnet50.ResNet50(weights = 'imagenet',include_top=False,input_tensor=input_tensor)
120 | 
121 | for layer in pretrained_resnet50.layers[:143]:
122 |     layer.trainable = False
123 | 
124 | classifier = tf.keras.models.Sequential()
125 | classifier.add(tf.keras.layers.Input(shape=(32,32,3)))
126 | classifier.add(tf.keras.layers.Lambda(lambda image: tf.image.resize(image, resnet_input_shape[:2])))
127 | classifier.add(pretrained_resnet50)
128 | classifier.add(tf.keras.layers.Flatten())
129 | classifier.add(tf.keras.layers.BatchNormalization())
130 | classifier.add(tf.keras.layers.Dense(64, activation='relu'))
131 | classifier.add(tf.keras.layers.Dropout(0.5))
132 | classifier.add(tf.keras.layers.BatchNormalization())
133 | classifier.add(tf.keras.layers.Dense(10, activation='softmax'))
134 | 
135 | 
136 | if args.keras_opt == 'adam':
137 |     optimizer = tf.keras.optimizers.Adam(learning_rate = args.keras_alpha,epsilon = 1e-8)
138 | elif args.keras_opt == 'sgd':
139 |     optimizer = tf.keras.optimizers.SGD(learning_rate=args.keras_alpha)
140 | else: 
141 |     raise
142 | 
143 | if args.loss_type == 'mixed':
144 |     def mixed(y_true, y_pred):
145 |         squared_difference = tf.square(y_true - y_pred)
146 |         return tf.reduce_mean(squared_difference, axis=-1) +tf.keras.losses.CategoricalCrossentropy(from_logits = True)(y_true, y_pred)
147 |     loss = mixed
148 | else:
149 |     loss = tf.keras.losses.CategoricalCrossentropy(from_logits = True)
150 | 
151 | 
152 | classifier.compile(optimizer=optimizer,
153 |                   loss=loss,
154 |                   metrics=['accuracy'])
155 | 
156 | 
157 | loss_train_0, acc_train_0 = classifier.evaluate(x_train,y_train,verbose=2)
158 | print('acc_train = ',acc_train_0)
159 | loss_test_0, acc_test_0 = classifier.evaluate(x_test,y_test,verbose=2)
160 | print('acc_test = ',acc_test_0)
161 | loss_val_0, acc_val_0 = classifier.evaluate(x_val,y_val,verbose=2)
162 | print('acc_val = ',acc_val_0)
163 | 
164 | aux_keras_data = {'loss_train_0':loss_train_0,'acc_traun_0':acc_train_0,\
165 |                     'loss_test_0':loss_test_0,'acc_test_0':acc_test_0,\
166 |                     'loss_val_0':loss_val_0, 'acc_val_0':acc_val_0}
167 | 
168 | no_callback = True
169 | if no_callback:
170 |     callbacks = []
171 | else:
172 |     callbacks = [tf.keras.callbacks.EarlyStopping(monitor='val_acc',restore_best_weights = True)]
173 | 
174 | keras_directory = 'keras_logging_cifar10/'
175 | # CSV logging
176 | if not os.path.exists(keras_directory):
177 |     os.makedirs(keras_directory)
178 | keras_logger_name = keras_directory+str(datetime.date.today())+args.keras_opt+str(args.keras_alpha)+'_'+str(args.keras_epochs)+'_seed'+str(args.seed)+'.csv'
179 | callbacks.append(tf.keras.callbacks.CSVLogger(keras_logger_name, append=True, separator=';'))
180 | 
181 | classifier.fit(x_train[:], y_train[:], epochs=args.keras_epochs,batch_size = 32,\
182 |                callbacks = callbacks ,verbose = True,validation_data = (x_val,y_val))
183 | 
184 | 
185 | # Grab the weights and check the accuracy post process
186 | set_weights = {}
187 | 
188 | for layer in classifier.layers:
189 |     set_weights[layer.name] = classifier.get_layer(layer.name).get_weights()
190 | 
191 | # Post process and save additional information from keras training
192 | loss_test_keras_final, acc_test_keras_final = classifier.evaluate(x_test,y_test,verbose=2)
193 | loss_val_keras_final, acc_val_keras_final = classifier.evaluate(x_val,y_val,verbose=2)
194 | print(80*'#')
195 | print('After keras training'.center(80))
196 | print('acc_test = ',acc_test_keras_final)
197 | print('acc_val = ',acc_val_keras_final)
198 | aux_keras_data['loss_test_final'] = loss_test_keras_final
199 | aux_keras_data['acc_test_final'] = acc_test_keras_final
200 | aux_keras_data['loss_val_final'] = loss_val_keras_final
201 | aux_keras_data['acc_val_final'] = acc_val_keras_final
202 | keras_aux_logger_name = keras_logger_name.split('.cvs')[0]+'aux_data.pkl'
203 | with open(keras_aux_logger_name,'wb+') as f:
204 |     pickle.dump(aux_keras_data,f,pickle.HIGHEST_PROTOCOL)
205 | 
206 | 
207 | ################################################################################
208 | # Instantiate the data, problem, regularization.
209 | 
210 | t0_problem_construction = time.time()
211 | problem = ClassificationProblem(classifier,loss_type=args.loss_type,dtype=tf.float32)
212 | print('Finished constructing the problem, and it took ',time.time() - t0_problem_construction , 's')
213 | 
214 | 
215 | # Instante the data object
216 | data = Data({problem.x:x_train,problem.y_true:y_train},settings['batch_size'],\
217 |   validation_data = {problem.x:x_val,problem.y_true:y_val},hessian_batch_size = settings['hess_batch_size'],seed=args.seed)
218 | 
219 | settings['tikhonov_gamma'] = 0.0
220 | 
221 | regularization = L2Regularization(problem,gamma = settings['tikhonov_gamma'])
222 | 
223 | 
224 | ################################################################################
225 | # Instantiate the model object
226 | HLModelSettings = HessianlearnModelSettings()
227 | 
228 | HLModelSettings['optimizer'] = args.optimizer
229 | HLModelSettings['alpha'] = args.alpha
230 | HLModelSettings['globalization'] = None
231 | HLModelSettings['hessian_low_rank'] = args.hessian_low_rank
232 | HLModelSettings['max_backtrack'] = 20
233 | HLModelSettings['max_sweeps'] = args.max_sweeps
234 | HLModelSettings['layer_weights'] = set_weights
235 | 
236 | HLModelSettings['problem_name'] = 'cifar10_resnet_classification_seed'+str(args.seed)
237 | if args.resnet_weights == 'None':
238 |     HLModelSettings['problem_name'] += '_random_guess'
239 | HLModelSettings['record_spectrum'] = bool(args.record_spectrum)
240 | HLModelSettings['rq_data_size'] = 100
241 | HLModelSettings['printing_sweep_frequency'] = None
242 | HLModelSettings['printing_items']               = {'time':'time','sweeps':'sweeps','Loss':'train_loss','acc train':'train_acc',\
243 |                                                       '||g||':'||g||','Loss val':'val_loss','acc val':'val_acc',\
244 |                                                       'maxacc val':'max_val_acc','alpha':'alpha'}
245 | 
246 | 
247 | HLModel = HessianlearnModel(problem,regularization,data,settings = HLModelSettings)
248 | 
249 | if args.max_sweeps > 0:
250 |     HLModel.fit()
251 | 
252 | 
253 | loss_test_final, acc_test_final = classifier.evaluate(x_test,y_test,verbose=2)
254 | loss_val_final, acc_val_final = classifier.evaluate(x_val,y_val,verbose=2)
255 | 
256 | hl_aux_data = {'loss_test_0':loss_test_0,'acc_test_0':acc_test_0,\
257 |                 'loss_val_0':loss_val_0,'acc_val_0':acc_val_0,\
258 |                 'loss_test_final':loss_test_final,'acc_test_final':acc_test_final,\
259 |                 'loss_val_final':loss_val_final,'acc_val_final':acc_val_final}
260 | 
261 | with open(HLModel.settings['problem_name']+'_logging/'+ HLModel.logger_outname +'aux_data.pkl', 'wb+') as f:
262 |     pickle.dump(hl_aux_data, f, pickle.HIGHEST_PROTOCOL)
263 | 
264 | ################################################################################
265 | # Evaluate again on all the data.
266 | (x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
267 | 
268 | # # Normalize the data
269 | # x_train = x_train.astype('float32') / 255.
270 | # x_test = x_test.astype('float32') / 255.
271 | 
272 | x_train = tf.keras.applications.resnet50.preprocess_input(x_train)
273 | x_test = tf.keras.applications.resnet50.preprocess_input(x_test)
274 | 
275 | y_train = tf.keras.utils.to_categorical(y_train)
276 | y_test = tf.keras.utils.to_categorical(y_test)
277 | 
278 | loss_test_total, acc_test_total = classifier.evaluate(x_test,y_test,verbose=2)
279 | print(80*'#')
280 | print('After hessianlearn training'.center(80))
281 | print('acc_test_total = ',acc_test_total)
282 | 
283 | 
284 | 


--------------------------------------------------------------------------------
/applications/transfer_learning/imagenet_cifar100_classification.py:
--------------------------------------------------------------------------------
  1 | # This file is part of the hessianlearn package
  2 | #
  3 | # hessianlearn is free software: you can redistribute it and/or modify
  4 | # it under the terms of the GNU Lesser General Public License as published by
  5 | # the Free Software Foundation, either version 3 of the License, or any later version.
  6 | #
  7 | # hessianlearn is distributed in the hope that it will be useful,
  8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10 | # GNU Lesser General Public License for more details.
 11 | #
 12 | # You should have received a copy of the GNU Lesser General Public License
 13 | # If not, see <http://www.gnu.org/licenses/>.
 14 | #
 15 | # Author: Tom O'Leary-Roseberry
 16 | # Contact: tom.olearyroseberry@utexas.edu
 17 | 
 18 | 
 19 | import numpy as np
 20 | import os
 21 | import pickle
 22 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
 23 | os.environ['KMP_DUPLICATE_LIB_OK']='True'
 24 | os.environ["KMP_WARNINGS"] = "FALSE" 
 25 | os.environ['CUDA_VISIBLE_DEVICES'] = '1'
 26 | import pickle
 27 | import tensorflow as tf
 28 | import time, datetime
 29 | # if int(tf.__version__[0]) > 1:
 30 | #   import tensorflow.compat.v1 as tf
 31 | #   tf.disable_v2_behavior()
 32 | 
 33 | 
 34 | # Memory issue with GPUs
 35 | gpu_devices = tf.config.experimental.list_physical_devices('GPU')
 36 | for device in gpu_devices:
 37 |     tf.config.experimental.set_memory_growth(device, True)
 38 | # Load hessianlearn library
 39 | import sys
 40 | sys.path.append( os.environ.get('HESSIANLEARN_PATH', "../../"))
 41 | from hessianlearn import *
 42 | 
 43 | # Parse run specifications
 44 | from argparse import ArgumentParser
 45 | 
 46 | parser = ArgumentParser(add_help=True)
 47 | parser.add_argument("-optimizer", dest='optimizer',required=False, default = 'lrsfn', help="optimizer type",type=str)
 48 | parser.add_argument('-fixed_step',dest = 'fixed_step',\
 49 | 					required= False,default = 1,help='boolean for fixed step vs globalization',type = int)
 50 | parser.add_argument('-alpha',dest = 'alpha',required = False,default = 1e-5,help= 'learning rate alpha',type=float)
 51 | parser.add_argument('-hessian_low_rank',dest = 'hessian_low_rank',required= False,default = 40,help='low rank for sfn',type = int)
 52 | parser.add_argument('-record_spectrum',dest = 'record_spectrum',\
 53 | 					required= False,default = 0,help='boolean for recording spectrum',type = int)
 54 | 
 55 | parser.add_argument("-resnet_weights", dest='resnet_weights',required=False, default = 'imagenet', help="initialization for network weights",type=str)
 56 | 
 57 | parser.add_argument('-batch_size',dest = 'batch_size',required= False,default = 32,help='batch size',type = int)
 58 | parser.add_argument('-hess_batch_size',dest = 'hess_batch_size',required= False,default = 8,help='hess batch size',type = int)
 59 | parser.add_argument('-keras_epochs',dest = 'keras_epochs',required= False,default = 50,help='keras_epochs',type = int)
 60 | parser.add_argument("-keras_opt", dest='keras_opt',required=False, default = 'adam', help="optimizer type for keras",type=str)
 61 | parser.add_argument('-keras_alpha',dest = 'keras_alpha',required= False,default = 1e-3,help='keras learning rate',type = float)
 62 | parser.add_argument('-max_sweeps',dest = 'max_sweeps',required= False,default = 2,help='max sweeps',type = float)
 63 | 
 64 | parser.add_argument("-loss_type", dest='loss_type',required=False, default = 'mixed', help="loss type either cross_entrop or mixed",type=str)
 65 | parser.add_argument('-seed',dest = 'seed',required= False,default = 0,help='seed',type = int)
 66 | 
 67 | 
 68 | args = parser.parse_args()
 69 | 
 70 | try:
 71 |   tf.set_random_seed(args.seed)
 72 | except:
 73 |   tf.random.set_seed(args.seed)
 74 | 
 75 | # GPU Environment Details
 76 | gpu_availabe = tf.test.is_gpu_available()
 77 | built_with_cuda = tf.test.is_built_with_cuda()
 78 | print(80*'#')
 79 | print(('IS GPU AVAILABLE: '+str(gpu_availabe)).center(80))
 80 | print(('IS BUILT WITH CUDA: '+str(built_with_cuda)).center(80))
 81 | print(80*'#')
 82 | 
 83 | settings = {}
 84 | # Set run specifications
 85 | # Data specs
 86 | settings['batch_size'] = args.batch_size
 87 | settings['hess_batch_size'] = args.hess_batch_size
 88 | 
 89 | 
 90 | ################################################################################
 91 | # Instantiate data
 92 | (x_train, y_train), (_x_test, _y_test) = tf.keras.datasets.cifar100.load_data()
 93 | 
 94 | # # Normalize the data
 95 | # x_train = x_train.astype('float32') / 255.
 96 | # x_test = x_test.astype('float32') / 255.
 97 | 
 98 | x_train = tf.keras.applications.resnet50.preprocess_input(x_train)
 99 | x_test_full = tf.keras.applications.resnet50.preprocess_input(_x_test)
100 | x_val = x_test_full[:2000]
101 | x_test = x_test_full[2000:]
102 | 
103 | y_train = tf.keras.utils.to_categorical(y_train)
104 | y_test_full = tf.keras.utils.to_categorical(_y_test)
105 | y_val = y_test_full[:2000]
106 | y_test = y_test_full[2000:]
107 | 
108 | ################################################################################
109 | # Create the neural network in keras
110 | 
111 | # tf.keras.backend.set_floatx('float64')
112 | 
113 | resnet_input_shape = (200,200,3)
114 | input_tensor = tf.keras.Input(shape = resnet_input_shape)
115 | 
116 | if args.resnet_weights == 'None':
117 |     pretrained_resnet50 = tf.keras.applications.resnet50.ResNet50(weights = None,include_top=False,input_tensor=input_tensor)
118 | else:
119 |     pretrained_resnet50 = tf.keras.applications.resnet50.ResNet50(weights = 'imagenet',include_top=False,input_tensor=input_tensor)
120 | 
121 | for layer in pretrained_resnet50.layers[:143]:
122 |     layer.trainable = False
123 | 
124 | classifier = tf.keras.models.Sequential()
125 | classifier.add(tf.keras.layers.Input(shape=(32,32,3)))
126 | classifier.add(tf.keras.layers.Lambda(lambda image: tf.image.resize(image, resnet_input_shape[:2])))
127 | classifier.add(pretrained_resnet50)
128 | classifier.add(tf.keras.layers.Flatten())
129 | classifier.add(tf.keras.layers.BatchNormalization())
130 | classifier.add(tf.keras.layers.Dense(128, activation='relu'))
131 | classifier.add(tf.keras.layers.Dropout(0.5))
132 | classifier.add(tf.keras.layers.BatchNormalization())
133 | classifier.add(tf.keras.layers.Dense(100, activation='softmax'))
134 | 
135 | 
136 | if args.keras_opt == 'adam':
137 |     optimizer = tf.keras.optimizers.Adam(learning_rate = args.keras_alpha,epsilon = 1e-8)
138 | elif args.keras_opt == 'sgd':
139 |     optimizer = tf.keras.optimizers.SGD(learning_rate=args.keras_alpha)
140 | else: 
141 |     raise
142 | 
143 | if args.loss_type == 'mixed':
144 |     def mixed(y_true, y_pred):
145 |         squared_difference = tf.square(y_true - y_pred)
146 |         return tf.reduce_mean(squared_difference, axis=-1) +tf.keras.losses.CategoricalCrossentropy(from_logits = True)(y_true, y_pred)
147 |     loss = mixed
148 | else:
149 |     loss = tf.keras.losses.CategoricalCrossentropy(from_logits = True)
150 | 
151 | 
152 | classifier.compile(optimizer=optimizer,
153 |                   loss=loss,
154 |                   metrics=['accuracy'])
155 | 
156 | 
157 | loss_train_0, acc_train_0 = classifier.evaluate(x_train,y_train,verbose=2)
158 | print('acc_train = ',acc_train_0)
159 | loss_test_0, acc_test_0 = classifier.evaluate(x_test,y_test,verbose=2)
160 | print('acc_test = ',acc_test_0)
161 | loss_val_0, acc_val_0 = classifier.evaluate(x_val,y_val,verbose=2)
162 | print('acc_val = ',acc_val_0)
163 | 
164 | aux_keras_data = {'loss_train_0':loss_train_0,'acc_traun_0':acc_train_0,\
165 |                     'loss_test_0':loss_test_0,'acc_test_0':acc_test_0,\
166 |                     'loss_val_0':loss_val_0, 'acc_val_0':acc_val_0}
167 | 
168 | no_callback = True
169 | if no_callback:
170 |     callbacks = []
171 | else:
172 |     callbacks = [tf.keras.callbacks.EarlyStopping(monitor='val_acc',restore_best_weights = True)]
173 | 
174 | keras_directory = 'keras_logging_cifar100/'
175 | # CSV logging
176 | if not os.path.exists(keras_directory):
177 |     os.makedirs(keras_directory)
178 | keras_logger_name = keras_directory+str(datetime.date.today())+args.keras_opt+str(args.keras_alpha)+'_'+str(args.keras_epochs)+'_seed'+str(args.seed)+'.csv'
179 | callbacks.append(tf.keras.callbacks.CSVLogger(keras_logger_name, append=True, separator=';'))
180 | 
181 | classifier.fit(x_train[:], y_train[:], epochs=args.keras_epochs,batch_size = 32,\
182 |                callbacks = callbacks ,verbose = True,validation_data = (x_val,y_val))
183 | 
184 | 
185 | # Grab the weights and check the accuracy post process
186 | set_weights = {}
187 | 
188 | for layer in classifier.layers:
189 |     set_weights[layer.name] = classifier.get_layer(layer.name).get_weights()
190 | 
191 | # Post process and save additional information from keras training
192 | loss_test_keras_final, acc_test_keras_final = classifier.evaluate(x_test,y_test,verbose=2)
193 | loss_val_keras_final, acc_val_keras_final = classifier.evaluate(x_val,y_val,verbose=2)
194 | print(80*'#')
195 | print('After keras training'.center(80))
196 | print('acc_test = ',acc_test_keras_final)
197 | print('acc_val = ',acc_val_keras_final)
198 | aux_keras_data['loss_test_final'] = loss_test_keras_final
199 | aux_keras_data['acc_test_final'] = acc_test_keras_final
200 | aux_keras_data['loss_val_final'] = loss_val_keras_final
201 | aux_keras_data['acc_val_final'] = acc_val_keras_final
202 | keras_aux_logger_name = keras_logger_name.split('.cvs')[0]+'aux_data.pkl'
203 | with open(keras_aux_logger_name,'wb+') as f:
204 |     pickle.dump(aux_keras_data,f,pickle.HIGHEST_PROTOCOL)
205 | 
206 | 
207 | ################################################################################
208 | # Instantiate the data, problem, regularization.
209 | 
210 | t0_problem_construction = time.time()
211 | problem = ClassificationProblem(classifier,loss_type=args.loss_type,dtype=tf.float32)
212 | print('Finished constructing the problem, and it took ',time.time() - t0_problem_construction , 's')
213 | 
214 | 
215 | # Instante the data object
216 | data = Data({problem.x:x_train,problem.y_true:y_train},settings['batch_size'],\
217 |   validation_data = {problem.x:x_val,problem.y_true:y_val},hessian_batch_size = settings['hess_batch_size'],seed=args.seed)
218 | 
219 | settings['tikhonov_gamma'] = 0.0
220 | 
221 | regularization = L2Regularization(problem,gamma = settings['tikhonov_gamma'])
222 | 
223 | 
224 | ################################################################################
225 | # Instantiate the model object
226 | HLModelSettings = HessianlearnModelSettings()
227 | 
228 | HLModelSettings['optimizer'] = args.optimizer
229 | HLModelSettings['alpha'] = args.alpha
230 | HLModelSettings['globalization'] = None
231 | HLModelSettings['hessian_low_rank'] = args.hessian_low_rank
232 | HLModelSettings['max_backtrack'] = 20
233 | HLModelSettings['max_sweeps'] = args.max_sweeps
234 | HLModelSettings['layer_weights'] = set_weights
235 | 
236 | HLModelSettings['problem_name'] = 'cifar100_resnet_classification_seed'+str(args.seed)
237 | if args.resnet_weights == 'None':
238 |     HLModelSettings['problem_name'] += '_random_guess'
239 | HLModelSettings['record_spectrum'] = bool(args.record_spectrum)
240 | HLModelSettings['rq_data_size'] = 100
241 | HLModelSettings['printing_sweep_frequency'] = None
242 | HLModelSettings['printing_items']               = {'time':'time','sweeps':'sweeps','Loss':'train_loss','acc train':'train_acc',\
243 |                                                       '||g||':'||g||','Loss val':'val_loss','acc val':'val_acc',\
244 |                                                       'maxacc val':'max_val_acc','alpha':'alpha'}
245 | 
246 | 
247 | HLModel = HessianlearnModel(problem,regularization,data,settings = HLModelSettings)
248 | 
249 | if args.max_sweeps > 0:
250 |     HLModel.fit()
251 | 
252 | 
253 | loss_test_final, acc_test_final = classifier.evaluate(x_test,y_test,verbose=2)
254 | loss_val_final, acc_val_final = classifier.evaluate(x_val,y_val,verbose=2)
255 | 
256 | hl_aux_data = {'loss_test_0':loss_test_0,'acc_test_0':acc_test_0,\
257 |                 'loss_val_0':loss_val_0,'acc_val_0':acc_val_0,\
258 |                 'loss_test_final':loss_test_final,'acc_test_final':acc_test_final,\
259 |                 'loss_val_final':loss_val_final,'acc_val_final':acc_val_final}
260 | 
261 | with open(HLModel.settings['problem_name']+'_logging/'+ HLModel.logger_outname +'aux_data.pkl', 'wb+') as f:
262 |     pickle.dump(hl_aux_data, f, pickle.HIGHEST_PROTOCOL)
263 | 
264 | ################################################################################
265 | # Evaluate again on all the data.
266 | (x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar100.load_data()
267 | 
268 | # # Normalize the data
269 | # x_train = x_train.astype('float32') / 255.
270 | # x_test = x_test.astype('float32') / 255.
271 | 
272 | x_train = tf.keras.applications.resnet50.preprocess_input(x_train)
273 | x_test = tf.keras.applications.resnet50.preprocess_input(x_test)
274 | 
275 | y_train = tf.keras.utils.to_categorical(y_train)
276 | y_test = tf.keras.utils.to_categorical(y_test)
277 | 
278 | loss_test_total, acc_test_total = classifier.evaluate(x_test,y_test,verbose=2)
279 | print(80*'#')
280 | print('After hessianlearn training'.center(80))
281 | print('acc_test_total = ',acc_test_total)
282 | 
283 | 
284 | 


--------------------------------------------------------------------------------
/hessianlearn/algorithms/cgSolver.py:
--------------------------------------------------------------------------------
  1 | # This file is part of the hessianlearn package
  2 | #
  3 | # hessianlearn is free software: you can redistribute it and/or modify
  4 | # it under the terms of the GNU Lesser General Public License as published by
  5 | # the Free Software Foundation, either version 3 of the License, or any later version.
  6 | #
  7 | # hessianlearn is distributed in the hope that it will be useful,
  8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10 | # GNU Lesser General Public License for more details.
 11 | #
 12 | # You should have received a copy of the GNU Lesser General Public License
 13 | # If not, see <http://www.gnu.org/licenses/>.
 14 | #
 15 | # Author: Tom O'Leary-Roseberry
 16 | # Contact: tom.olearyroseberry@utexas.edu
 17 | 
 18 | from __future__ import absolute_import
 19 | from __future__ import division
 20 | from __future__ import print_function
 21 | import math
 22 | import numpy as np
 23 | import tensorflow as tf
 24 | if int(tf.__version__[0]) > 1:
 25 | 	import tensorflow.compat.v1 as tf
 26 | 	tf.disable_v2_behavior()
 27 | 
 28 | from ..utilities.parameterList import ParameterList
 29 | from ..algorithms import Optimizer
 30 | from .. problem import IdentityPreconditioner
 31 | from ..problem import L2Regularization
 32 | from abc import ABC, abstractmethod
 33 | 
 34 | class Identity(object):
 35 | 	def __init__(self):
 36 | 
 37 | 		pass
 38 | 	
 39 | 	def __call__(self, x):
 40 | 		return x
 41 | 
 42 | 
 43 | 
 44 | def ParametersCGSolver(dictionary = {}):
 45 | 	parameters = dictionary
 46 | 	parameters["rel_tolerance"] = [1e-9, "the relative tolerance for the stopping criterion"]
 47 | 	parameters["abs_tolerance"] = [1e-12, "the absolute tolerance for the stopping criterion"]
 48 | 	parameters["max_iter"]      = [10, "the maximum number of iterations"]
 49 | 	parameters["zero_initial_guess"] = [True, "if True we start with a 0 initial guess; if False we use the x as initial guess."]
 50 | 	parameters["print_level"] = [-1, "verbosity level: -1 --> no output on screen; 0 --> only final residual at convergence or reason for not not convergence"]
 51 | 	
 52 | 	parameters['coarse_tol'] = [0.5,'coarse tolerance used in calculation of relative tolerances for E-W conditions']
 53 | 
 54 | 	parameters['default_damping']  = [1e-3, "Levenberg-Marquardt damping when no regularization is used"]
 55 | 	return ParameterList(parameters)
 56 | 
 57 | 
 58 | class CGSolver(ABC):
 59 | 	"""
 60 | 	This class implements a custom CG solver to be used with Inexact Newton CG
 61 | 	"""
 62 | 	reason = ["Maximum Number of Iterations Reached",
 63 | 			  "Relative/Absolute residual less than tol",
 64 | 			  "Reached a negative direction",
 65 | 			  "Reached trust region boundary"
 66 | 			  ]
 67 | 	def __init__(self,problem,regularization,sess = None,Aop = None,preconditioner = None,x = None,parameters = ParametersCGSolver()):
 68 | 		"""
 69 | 		The constructor for this class takes:
 70 | 			-problem: hessianlearn.problem.Problem
 71 | 			-regularization: hessianlearn.problem.Regularization
 72 | 			-sess: tf.Session()
 73 | 			-Aop: matrix vector product callable
 74 | 			-precondition: hessianlearn.problem.Preconditioner
 75 | 			-parameters: solver hyperparameters
 76 | 		"""
 77 | 		self.sess = sess
 78 | 		self.problem = problem
 79 | 		if regularization.parameters['gamma'] < 1e-4:
 80 | 			regularization = L2Regularization(self.problem,gamma = parameters['default_damping'])
 81 | 		self.regularization = regularization
 82 | 		if x is None:
 83 | 			# self.x = tf.Variable(self.problem.gradient.initialized_value())
 84 | 			self.x = self.problem.gradient
 85 | 		else:
 86 | 			self.x = x
 87 | 		self.parameters = parameters
 88 | 		if Aop is None:
 89 | 			self.Aop = self.problem.Hdw + self.regularization.Hdw
 90 | 		else:
 91 | 			# be careful to note what the operator requires be passed into feed_dict 
 92 | 			self.Aop = Aop
 93 | 		# Define preconditioner 
 94 | 		if preconditioner is None:
 95 | 			self.Minv = IdentityPreconditioner(problem,self.problem.dtype)
 96 | 		else:
 97 | 			self.Minv = preconditioner
 98 | 
 99 | 		self.update_x = self.update_without_trust_region
100 | 		self.B_op = None
101 | 
102 | 	def initialize_trust_region(self,coarse_tol = None):
103 | 		"""
104 | 		This method initializes the trust region parameters
105 | 			-coarse_tol: coarse tolerance
106 | 		"""
107 | 		self.update_x = self.update_with_trust_region
108 | 		if coarse_tol is not None:
109 | 			self.parameters['coarse_tol'] = coarse_tol
110 | 
111 | 	def set_trust_region_radius(self,radius,operator = Identity()):
112 | 		"""
113 | 		This method sets the trust region radius when trust region is used
114 | 		for globalization
115 | 			-radius: trust region radius
116 | 			-operator: for use in TR calculations
117 | 		"""
118 | 		assert self.parameters['zero_initial_guess']
119 | 		self.trust_region_radius_squared = radius**2
120 | 		self.B_op = operator
121 | 
122 | 	def update_without_trust_region(self,x,alpha,p):
123 | 		"""
124 | 		This method updates the approximation of x^* and returns False when
125 | 		TR is not used
126 | 			-x: solution at given iteration
127 | 			-alpha: step length
128 | 			-p: search direction
129 | 		"""
130 | 		x = x + alpha*p
131 | 		return False, x
132 | 
133 | 	def update_with_trust_region(self,x,alpha,p):
134 | 		"""
135 | 		This method returns a Boolean delineating whether the point was placed
136 | 		on the trust region boundary or not, as well as the updated x
137 | 			-x: solution at given iteration
138 | 			-alpha: step length
139 | 			-p: search direction
140 | 		"""
141 | 		step = x + alpha*p
142 | 		assert self.B_op is not None
143 | 		step_length = np.dot(x,self.B_op(step))
144 | 		if step_length < self.trust_region_radius_squared:
145 | 			return False, step
146 | 		else:
147 | 			# Move the point to the boundary of the trust region
148 | 			Bp = self.B_op(p)
149 | 			xBp = np.dot(x,Bp)
150 | 			pBp = np.dot(p,Bp)
151 | 			Bx = self.B_op(x)
152 | 			xBx = np.dot(x,Bx)
153 | 			a_tau = alpha*alpha*pBp
154 | 			b_tau = 2* alpha * xBp
155 | 			c_tau = xBx - self.trust_region_radius_squared
156 | 			discriminant = (b_tau - 4*a_tau*c_tau)
157 | 			if discriminant < 0:
158 | 				print('Issue with the discriminant')
159 | 				discriminant *= -1
160 | 			tau = 0.5*(-b_tau + math.sqrt(discriminant))/a_tau
161 | 			alpha_tau = alpha*tau
162 | 			return True, x + alpha*p
163 | 
164 | 	def solve(self,b,feed_dict = None,x_0 = None):
165 | 		r"""
166 | 		Solve Ax=b by the preconditioned conjugate gradients method
167 | 		as defined in Iterative Methods Ed. 2 by Yousef Saad p 263
168 | 			-b: the right hand side
169 | 			-feed_dict: the data dictionary used to evaluate stochastic 
170 | 				operators
171 | 			-x_0: the initial guess for CG
172 | 		"""
173 | 		assert self.sess is not None
174 | 		assert feed_dict is not None
175 | 
176 | 		self.iter = 0
177 | 		self.converged = False
178 | 		self.reason_id = 0
179 | 		x = np.zeros_like(b)
180 | 
181 | 		feed_dict[self.problem.dw] = x
182 | 		Ax_0 = self.sess.run(self.Aop,feed_dict = feed_dict)
183 | 		# Calculate initial residual r = Ax_0 -b
184 | 		r = b - Ax_0
185 | 		# Apply preconditioner z = M^{-1}r
186 | 		feed_dict[self.Minv.x] = r
187 | 		# fix me!!!!! Preconditioner not working for now?
188 | 
189 | 		z = self.sess.run(self.Minv(),feed_dict = feed_dict)
190 | 
191 | 
192 | 		# Calculate p (copy array)
193 | 		p = z.copy()
194 | 		# Calculate tolerance for Eisenstat Walker conditions
195 | 		rz_0 = np.dot(r,z)
196 | 		rtol2 = rz_0 * self.parameters["rel_tolerance"] * self.parameters["rel_tolerance"]
197 | 		atol2 = self.parameters["abs_tolerance"] * self.parameters["abs_tolerance"]
198 | 		tol = max(rtol2, atol2)
199 | 		# Check convergence and initialize for solve:
200 | 		converged = (rz_0 < tol)
201 | 		if converged:
202 | 			self.converged  = True
203 | 			self.reason_id   = 1
204 | 			self.final_norm = math.sqrt(rz_0)
205 | 			if(self.parameters["print_level"] >= 0):
206 | 				print( self.reason[self.reason_id])
207 | 				print( "Converged in ", self.iter, " iterations with final norm ", self.final_norm)
208 | 			return x, False
209 | 		# Check if the direction is negative before taking a step.
210 | 		feed_dict[self.problem.dw] = p
211 | 		Ap = self.sess.run(self.Aop,feed_dict = feed_dict)
212 | 		pAp = np.dot(p,Ap)
213 | 		negative_direction = (pAp <= 0.0)
214 | 		if negative_direction:
215 | 			self.converged = True
216 | 			self.reason_id = 2
217 | 			x += p 
218 | 			r -= Ap
219 | 			feed_dict[self.Minv.x] = r
220 | 			z = self.sess.run(self.Minv(),feed_dict = feed_dict)
221 | 			rz = np.dot(r,z)
222 | 			self.final_norm = math.sqrt(rz)
223 | 			if(self.parameters["print_level"] >= 0):
224 | 				print( self.reason[self.reason_id])
225 | 				print( "Converged in ", self.iter, " iterations with final norm ", self.final_norm)
226 | 			return x, False
227 | 
228 | 		# Loop until convergence
229 | 		self.iter = 1
230 | 		while True:
231 | 			# Calculate alpha
232 | 			alpha = rz_0/pAp
233 | 
234 | 			# Update x
235 | 			on_boundary,x = self.update_x(x,alpha,p)
236 | 			# Update r
237 | 
238 | 			r -= alpha*Ap
239 | 			# Apply preconditioner z = M^{-1}r
240 | 			feed_dict[self.Minv.x] = r
241 | 			z = self.sess.run(self.Minv(),feed_dict = feed_dict)
242 | 
243 | 			# Calculate rz
244 | 			rz = np.dot(r,z)
245 | 			# print(self.iter,rz)
246 | 			# Check convergence
247 | 			converged = (rz < tol)
248 | 			if converged:
249 | 				self.converged = True
250 | 				self.reason_id = 1
251 | 				self.final_norm = math.sqrt(rz)
252 | 				if(self.parameters["print_level"] >= 0):
253 | 					print( self.reason[self.reason_id])
254 | 					print( "Converged in ", self.iter, " iterations with final norm ", self.final_norm)
255 | 				break
256 | 			self.iter += 1
257 | 			if self.iter > self.parameters["max_iter"]:
258 | 				self.converged = False
259 | 				self.reason_id = 0
260 | 				self.final_norm = math.sqrt(rz)
261 | 				if(self.parameters["print_level"] >= 0):
262 | 					print( self.reason[self.reason_id])
263 | 					print( "Not Converged. Final residual norm ", self.final_norm)
264 | 				break
265 | 			beta = rz / rz_0
266 | 			p = z + beta*p
267 | 			# Check if the direction is negative, and prepare for next iteration.
268 | 			feed_dict[self.problem.dw] = p
269 | 			Ap = self.sess.run(self.Aop,feed_dict = feed_dict)
270 | 			pAp = np.dot(p,Ap)
271 | 			negative_direction = (pAp <= 0.0)
272 | 
273 | 			if negative_direction:
274 | 				self.converged = True
275 | 				self.reason_id = 2
276 | 				self.final_norm = math.sqrt(rz)
277 | 				if(self.parameters["print_level"] >= 0):
278 | 					print( self.reason[self.reason_id])
279 | 					print( "Converged in ", self.iter, " iterations with final norm ", self.final_norm)
280 | 				break
281 | 			
282 | 			rz_0 = rz
283 | 
284 | 		return x, on_boundary	
285 | 
286 | 
287 | 
288 | 
289 | 
290 | 
291 | 
292 | 
293 | 
294 | class CGSolver_scipy(ABC):
295 | 	"""
296 | 	This class implements a wrapper for the scipy CG solver
297 | 	"""
298 | 	reason = ["Maximum Number of Iterations Reached",
299 | 			  "Relative/Absolute residual less than tol",
300 | 			  "Reached a negative direction",
301 | 			  "Reached trust region boundary"
302 | 			  ]
303 | 	def __init__(self,problem,regularization,sess = None,Aop = None,preconditioner = None,parameters = ParametersCGSolver()):
304 | 		"""
305 | 		The constructor for this class takes
306 | 			-problem: hessianlearn.problem.Problem
307 | 			-regularization: hessianlearn.problem.Regularization
308 | 			-sees: tf.Session()
309 | 			-Aop: matrix vector product callable
310 | 			-preconditioner: hessianlearn.problem.Preconditioner (not currently even used)
311 | 			-parameters: solver hyperparameters
312 | 		"""
313 | 		self.sess = sess
314 | 		self.problem = problem
315 | 		self.regularization = regularization
316 | 		self.parameters = parameters
317 | 		if Aop is None:
318 | 			self.Aop = self.problem.Hdw + self.regularization.Hdw
319 | 		else:
320 | 			# be careful to note what the operator requires be passed into feed_dict 
321 | 			self.Aop = Aop
322 | 		# # Define preconditioner 
323 | 		# if preconditioner is None:
324 | 		# 	self.Minv = IdentityPreconditioner(problem,self.problem.dtype)
325 | 		# else:
326 | 		# 	self.Minv = preconditioner
327 | 
328 | 
329 | 
330 | 
331 | 
332 | 
333 | 	def solve(self,b,feed_dict = None,x_0 = None):
334 | 		r"""
335 | 		Solve Ax=b by the mines method
336 | 		as defined in Iterative Methods Ed. 2 by Youssef Saad p 140
337 | 			-b: right hand side
338 | 			-feed_dict: data dictionary for 
339 | 			-x_0: initial guess
340 | 		"""
341 | 		assert self.sess is not None
342 | 		assert feed_dict is not None
343 | 
344 | 		self.iter = 0
345 | 		self.converged = False
346 | 		self.reason_id = 0
347 | 		x = np.zeros_like(b)
348 | 
349 | 		feed_dict[self.problem.dw] = x
350 | 		Ax_0 = self.sess.run(self.Aop,feed_dict = feed_dict)
351 | 		# Calculate initial residual r = Ax_0 -b
352 | 		r = b - Ax_0
353 | 		# Calculate tolerance for Eisenstat Walker conditions
354 | 		rr_0 = np.dot(r,r)
355 | 		rtol2 = rr_0 * self.parameters["rel_tolerance"] * self.parameters["rel_tolerance"]
356 | 		atol2 = self.parameters["abs_tolerance"] * self.parameters["abs_tolerance"]
357 | 		tol = max(rtol2, atol2)
358 | 		import scipy
359 | 		from scipy.sparse.linalg import LinearOperator
360 | 
361 | 		def Ap(p):
362 | 			feed_dict[self.problem.dw] = p
363 | 			return self.sess.run(self.Aop,feed_dict = feed_dict)
364 | 
365 | 		n = self.problem.dimension
366 | 
367 | 		A = LinearOperator((n,n), matvec=Ap)
368 | 
369 | 		# self.iter += self.parameters["max_iter"]
370 | 
371 | 		def update_iters(rk):
372 | 			self.iter +=1
373 | 
374 | 		return scipy.sparse.linalg.cg(A, b, tol=tol, maxiter=self.parameters["max_iter"],callback = update_iters)
375 | 
376 | 
377 | 
378 | 
379 | 
380 | 
381 | 
382 | 
383 | 		


--------------------------------------------------------------------------------