├── tests
    ├── pytest.ini
    ├── __init__.py
    ├── utils
    │   ├── __init__.py
    │   └── shared.py
    ├── problems
    │   ├── __init__.py
    │   ├── convolutional.py
    │   ├── conv_utils.py
    │   └── linear.py
    ├── test_experiments.py
    ├── test_conv.py
    ├── test_nn.py
    ├── test_finders.py
    └── test_optimizers.py
├── requirements.txt
├── autocrit
    ├── nn
    │   ├── __init__.py
    │   ├── conv.py
    │   ├── networks.py
    │   └── layers.py
    ├── utils
    │   ├── __init__.py
    │   ├── math.py
    │   └── random_matrix.py
    ├── finders
    │   ├── __init__.py
    │   ├── base.py
    │   ├── gradnormmin.py
    │   ├── newtons.py
    │   └── minresQLP.py
    ├── __init__.py
    ├── defaults.py
    ├── optimizers.py
    └── experiments.py
├── .flake8
├── requirements-dev.txt
├── README.md
├── setup.py
└── .gitignore


/tests/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | addopts = -m "not slow"
3 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | from . import problems
2 | 
3 | __all__ = ["problems"]
4 | 


--------------------------------------------------------------------------------
/tests/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from . import shared
2 | 
3 | __all__ = ["shared"]
4 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | autograd>=1.2
2 | numpy>=1.16
3 | pandas>=0.24
4 | scipy>=1.2
5 | 


--------------------------------------------------------------------------------
/tests/problems/__init__.py:
--------------------------------------------------------------------------------
1 | from . import linear
2 | 
3 | __all__ = ["linear"]
4 | 


--------------------------------------------------------------------------------
/autocrit/nn/__init__.py:
--------------------------------------------------------------------------------
1 | from . import layers, networks
2 | 
3 | __all__ = ["layers", "networks"]
4 | 


--------------------------------------------------------------------------------
/autocrit/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from . import math, random_matrix
2 | 
3 | __all__ = ["math", "random_matrix"]
4 | 


--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | ignore = E731, E741, W504
3 | max-line-length = 100
4 | 
5 | exclude = critfinder/finders/minresQLP.py
6 | 


--------------------------------------------------------------------------------
/autocrit/finders/__init__.py:
--------------------------------------------------------------------------------
1 | from . import base, gradnormmin, newtons
2 | 
3 | __all__ = ["base", "gradnormmin", "newtons"]
4 | 


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
 1 | autograd>=1.2
 2 | numpy>=1.16
 3 | pandas>=0.24
 4 | scipy==1.1
 5 | 
 6 | torch
 7 | torchvision
 8 | 
 9 | pyflakes>=2.1.1
10 | pytest>=4.4.1
11 | scikit-learn>=0.20.3
12 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # autocrit
 2 | 
 3 | Autograd-based package for finding critical points of functions using Newton-based methods and gradient norm minimization.
 4 | Specifically intended for use with neural networks, see `autocrit.nn` module.
 5 | 
 6 | To install, clone this repository, navigate to the directory it was cloned into, and then use the command
 7 | ```
 8 | pip install -e autocrit
 9 | ```
10 | 
11 | For additional tools useful in the analysis of critical point-finding experiments,
12 | see the [`autocrit_tools` repo](https://github.com/charlesfrye/autocrit_tools).
13 | 


--------------------------------------------------------------------------------
/autocrit/__init__.py:
--------------------------------------------------------------------------------
 1 | from .finders import gradnormmin, newtons
 2 | from . import experiments, finders, nn, optimizers
 3 | 
 4 | GradientNormMinimizer = gradnormmin.GradientNormMinimizer
 5 | 
 6 | FastNewtonMR = newtons.FastNewtonMR
 7 | FastNewtonTR = newtons.FastNewtonTR
 8 | 
 9 | OptimizationExperiment = experiments.OptimizationExperiment
10 | CritFinderExperiment = experiments.CritFinderExperiment
11 | 
12 | FullyConnectedNetwork = nn.networks.FullyConnected
13 | 
14 | __all__ = ["finders", "optimizers",
15 |            "gradnormmin", "newtons",
16 |            "GradientNormMinimizer",
17 |            "FastNewtonMR", "FastNewtonTR",
18 |            "OptimizationExperiment", "CritFinderExperiment",
19 |            "FullyConnectedNetwork"]
20 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import setuptools
 2 | 
 3 | version = "0.0.1"
 4 | 
 5 | with open("README.md", "r") as f:
 6 |     long_description = f.read()
 7 | 
 8 | setuptools.setup(
 9 |     name="autocrit",
10 |     version="0.0.1",
11 |     author="Charles Frye",
12 |     author_email="cfrye59@gmail.com",
13 |     description="Critical point-finding algorithms in autograd",
14 |     long_description=long_description,
15 |     long_description_content_type="text/markdown",
16 |     packages=setuptools.find_packages(),
17 |     classifiers=[
18 |         "Programming Language :: Python :: 3.5",
19 |         "Programming Language :: Python :: 3.6",
20 |         "Programming Language :: Python :: 3.7",
21 |         "Programming Language :: Python :: 3.8",
22 |         "Operating System :: OS Independent"],
23 | )
24 | 


--------------------------------------------------------------------------------
/autocrit/defaults.py:
--------------------------------------------------------------------------------
 1 | """Shared default values for numerical constants/hyperparameters
 2 | """
 3 | # ALPHA: learning rate
 4 | DEFAULT_ALPHA = 0.1
 5 | # BETA: learning rate decrease factor in BTLS
 6 | DEFAULT_BETA = 0.5
 7 | # RHO: scaling factor for Armijo/sufficient decrease criterion
 8 | DEFAULT_RHO = 1e-4
 9 | # RHO_PURE: same as RHO,  but for the Pure Newton check
10 | DEFAULT_RHO_PURE = 0.5
11 | # GAMMA: scaling factor for Wolfe/sufficient curvature decrease criterion
12 | DEFAULT_GAMMA = 0.9
13 | # GAMMAS: "nudge" to add to diagonal in NewtonTR
14 | DEFAULT_GAMMAS = (1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6)
15 | 
16 | # MOMENTUM: momentum coefficient for MomentumOptimizers
17 | DEFAULT_MOMENTUM = 0.9
18 | 
19 | # MINIMIZER_PARAMS: default parameters for GNM minimizer
20 | DEFAULT_MINIMIZER_PARAMS = {"lr": DEFAULT_ALPHA}
21 | 
22 | # STEP_SIZE: "learning rate" for Newton methods
23 | DEFAULT_STEP_SIZE = 1.0
24 | 
25 | # RTOL, MAXIT: parameters for MRQLP step of NewtonMR
26 | DEFAULT_RTOL = 1e-10
27 | DEFAULT_MAXIT = 25
28 | 


--------------------------------------------------------------------------------
/tests/test_experiments.py:
--------------------------------------------------------------------------------
 1 | import autograd.numpy as np
 2 | 
 3 | import autocrit
 4 | 
 5 | import tests.utils.shared as shared
 6 | 
 7 | 
 8 | def test_OptimizationExperiment(tmpdir):
 9 |     """test saving, execution, and loading for default kwargs
10 |     """
11 |     num_iters = 10
12 | 
13 |     _, network, init_theta = shared.generate_random_shallow_regression()
14 | 
15 |     experiment = autocrit.OptimizationExperiment(network.loss)
16 | 
17 |     outfile = tmpdir / "optexpt.json"
18 |     experiment_test(experiment, network.loss,
19 |                     init_theta, num_iters, outfile)
20 | 
21 | 
22 | def test_CritFinderExperiment(tmpdir):
23 |     """test saving, execution, and loading for default kwargs
24 |     """
25 |     num_iters = 10
26 | 
27 |     _, network, init_theta = shared.generate_random_shallow_regression()
28 | 
29 |     experiment = autocrit.CritFinderExperiment(network.loss, "newtonMR")
30 | 
31 |     outfile = tmpdir / "cfexpt.json"
32 |     experiment_test(experiment, network.loss,
33 |                     init_theta, num_iters, outfile)
34 | 
35 | 
36 | def experiment_test(experiment, f, init_theta, num_iters, outfile):
37 |     thetas = experiment.run(init_theta, num_iters=num_iters)
38 | 
39 |     experiment.to_json(outfile)
40 | 
41 |     reloaded_expt = experiment.from_json(f, outfile)
42 |     assert experiment.construct_dictionary() == reloaded_expt.construct_dictionary()
43 | 
44 |     reloaded_thetas = reloaded_expt.run(init_theta, num_iters=num_iters)
45 |     assert np.array_equal(thetas, reloaded_thetas)
46 | 


--------------------------------------------------------------------------------
/tests/test_conv.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | 
 3 | import autograd
 4 | import autograd.numpy as np
 5 | 
 6 | import autocrit.nn.conv
 7 | 
 8 | CONV_KWARGS = {"axes": ([2, 3], [2, 3]),
 9 |                "dot_axes": ([1], [0]),
10 |                "mode": "valid"}
11 | 
12 | 
13 | def test_accelerated_equivalence():
14 |     warnings.filterwarnings("ignore")
15 |     batch = 10
16 |     in_ch = 3
17 |     out_ch = 16
18 |     k_size = 3
19 | 
20 |     X = np.random.randn(batch, in_ch, 32, 32)
21 |     w = np.random.randn(out_ch, in_ch, k_size, k_size)
22 |     w = np.ascontiguousarray(np.transpose(w, (1, 0, 2, 3)))
23 | 
24 |     y = autocrit.nn.conv.convolve(X, w, accelerated=False, **CONV_KWARGS)
25 |     accelerated_y = autocrit.nn.conv.convolve(X, w, accelerated=True, **CONV_KWARGS)
26 | 
27 |     loss_grads = loss_grad(X, w)
28 |     accelerated_loss_grads = accelerated_loss_grad(X, w)
29 | 
30 |     assert np.allclose(y, accelerated_y),\
31 |         "accelerated output not equal to autograd output"
32 |     assert np.allclose(loss_grads, accelerated_loss_grads),\
33 |         "accelerated gradients not equal to autograd gradients"
34 | 
35 | 
36 | def accelerated_loss(X, w):
37 |     activations = autocrit.nn.conv.convolve(X, w, accelerated=True, **CONV_KWARGS)
38 |     squared_activations = np.square(activations)
39 |     return np.mean(squared_activations)
40 | 
41 | 
42 | def loss(X, w):
43 |     activations = autocrit.nn.conv.convolve(X, w, accelerated=False, **CONV_KWARGS)
44 |     squared_activations = np.square(activations)
45 |     return np.mean(squared_activations)
46 | 
47 | 
48 | accelerated_loss_grad = autograd.grad(accelerated_loss, argnum=1)
49 | loss_grad = autograd.grad(loss, argnum=1)
50 | 


--------------------------------------------------------------------------------
/autocrit/utils/math.py:
--------------------------------------------------------------------------------
 1 | import autograd.numpy as np
 2 | 
 3 | XH_EPS = 1e-25
 4 | RESCALE_EPS = 1e-6
 5 | 
 6 | 
 7 | def rms(arr):
 8 |     return np.sqrt(np.mean(np.square(arr)))
 9 | 
10 | 
11 | def relu(x):
12 |     return np.where(x > 0., x, 0.)
13 | 
14 | 
15 | def softplus(x, lam=5.):
16 |     return 1 / lam * np.log(1 + np.exp(lam * x))
17 | 
18 | 
19 | def sigmoid(x):
20 |     return np.where(x >= 0, _positive_sigm(x), _negative_sigm(x))
21 | 
22 | 
23 | def swish(x):
24 |     return np.multiply(x, sigmoid(x))
25 | 
26 | 
27 | def _negative_sigm(x):
28 |     expon = np.exp(-x)
29 |     return 1 / (1 + expon)
30 | 
31 | 
32 | def _positive_sigm(x):
33 |     expon = np.exp(x)
34 |     return expon / (1 + expon)
35 | 
36 | 
37 | def mean_squared_error(x, xhat):
38 |     return np.mean(np.square(x - xhat))
39 | 
40 | 
41 | def softmax_cross_entropy(l, p):
42 |     phat = softmax(l)
43 |     return np.mean(cross_entropy(p, phat))
44 | 
45 | 
46 | def softmax(x):
47 |     expon = np.exp(x - np.max(x, axis=0))
48 |     return expon / np.sum(expon, axis=0)
49 | 
50 | 
51 | def cross_entropy(ps, qs, eps=XH_EPS):
52 |     return np.einsum("ij,ij->j", ps, -np.log(qs + eps))
53 | 
54 | 
55 | def logits_to_labels(logits):
56 |     return np.argmax(logits, axis=0)
57 | 
58 | 
59 | def accuracy(yhats, ys):
60 |     return np.mean(yhats == ys)
61 | 
62 | 
63 | def assess_accuracy(network, theta, X, Y_iis):
64 |     logits = network.forward_pass(X, theta)
65 |     labels = logits_to_labels(logits)
66 |     return accuracy(labels, Y_iis)
67 | 
68 | 
69 | def pointwise_nonlinearity(parameters, x, nonlinearity):
70 |     W, b = parameters
71 |     return nonlinearity(np.dot(W, x) + b)
72 | 
73 | 
74 | def cossim(x, y):
75 |     return np.dot(x.T, y) / (np.linalg.norm(x) * np.linalg.norm(y))
76 | 
77 | 
78 | def rescale(arr, eps=RESCALE_EPS):
79 |     return (arr - np.min(arr)) / max((np.max(arr) - np.min(arr)), eps)
80 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | ## Generic Python
  2 | #
  3 | # Byte-compiled / optimized / DLL files
  4 | __pycache__/
  5 | *.py[cod]
  6 | *$py.class
  7 | 
  8 | # C extensions
  9 | *.so
 10 | 
 11 | # Distribution / packaging
 12 | .Python
 13 | env/
 14 | build/
 15 | develop-eggs/
 16 | dist/
 17 | downloads/
 18 | eggs/
 19 | .eggs/
 20 | lib/
 21 | lib64/
 22 | parts/
 23 | sdist/
 24 | var/
 25 | wheels/
 26 | *.egg-info/
 27 | .installed.cfg
 28 | *.egg
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | .hypothesis/
 50 | 
 51 | # Translations
 52 | *.mo
 53 | *.pot
 54 | 
 55 | # Django stuff:
 56 | *.log
 57 | local_settings.py
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # dotenv
 85 | .env
 86 | 
 87 | # virtualenv
 88 | .venv
 89 | venv/
 90 | ENV/
 91 | 
 92 | # Spyder project settings
 93 | .spyderproject
 94 | .spyproject
 95 | 
 96 | # Rope project settings
 97 | .ropeproject
 98 | 
 99 | # mkdocs documentation
100 | /site
101 | 
102 | # mypy
103 | .mypy_cache/
104 | 
105 | # Vim
106 | *~
107 | 
108 | ## Project-Specific
109 | 
110 | # data files
111 | *.npz
112 | *.npy
113 | *.pkl
114 | *.out
115 | 
116 | # notebooks
117 | nbs/
118 | 


--------------------------------------------------------------------------------
/tests/problems/convolutional.py:
--------------------------------------------------------------------------------
 1 | import autograd.numpy as np
 2 | 
 3 | from . import conv_utils
 4 | 
 5 | 
 6 | class Classification(object):
 7 | 
 8 |     def __init__(self, X, Y, Y_iis):
 9 |         """
10 |         X : array, h x w x ch x n input observations
11 |         Y : array, l x n output label onehots
12 |         Y_iis : array, 1 x n output label integers
13 |         """
14 |         self.X = X
15 |         self.Y = Y
16 |         self.Y_iis = Y_iis
17 | 
18 |         self.exact_solution = None
19 | 
20 |     def loss(self, soln):
21 |         if soln is None:
22 |             return 0
23 |         else:
24 |             raise ValueError(
25 |                 "loss not implemented for convolutional classification")
26 | 
27 |     @classmethod
28 |     def generate_test_problem(cls, n, im_side=17, autocorr_scale=5.):
29 |         """Generate a test convolutional linear classification //
30 |         logistic regression problem using a mixture of Gaussians.
31 | 
32 |         The problem is to separate images with a pink power spectrum
33 |         from images with a white power spectrum.
34 | 
35 |         Images are square with side length im_side and the pink noise
36 |         images have an autocorrelation scale monotonically increasing
37 |         with autocorr_scale.
38 |         """
39 |         n = n // 2
40 | 
41 |         X, Y, Y_iis = generate_convolutional_mog_data(n, im_side, autocorr_scale)
42 | 
43 |         return cls(X, Y, Y_iis)
44 | 
45 | 
46 | def generate_convolutional_mog_data(n, im_side=17, autocorr_scale=5.):
47 | 
48 |     circ_cov_mat = conv_utils.generate_iostropic_circulant_cov_2d(
49 |         im_side, autocorr_scale=autocorr_scale)
50 |     circ_class_samples = conv_utils.rgb_gauss_random_samples(
51 |         n, cov_or_covs=circ_cov_mat)
52 | 
53 |     white_noise_cov_mat = np.eye(im_side ** 2)
54 |     noise_class_samples = conv_utils.rgb_gauss_random_samples(
55 |         n, cov_or_covs=white_noise_cov_mat)
56 | 
57 |     # combine batch major samples from each class
58 |     inputs = np.concatenate([circ_class_samples.T, noise_class_samples.T])
59 | 
60 |     # covert to images, then return to batch minor
61 |     inputs = np.asarray(
62 |         [conv_utils.to_im_rgb(inpt, im_side) for inpt in inputs]).T
63 | 
64 |     # generate one_hot and label vectors
65 |     one_hots = np.hstack([np.tile(np.atleast_2d([1, 0]).T, [1, n]),
66 |                           np.tile(np.atleast_2d([0, 1]).T, [1, n])])
67 |     labels = np.argmax(one_hots, axis=0)
68 | 
69 |     return inputs, one_hots, labels
70 | 


--------------------------------------------------------------------------------
/autocrit/finders/base.py:
--------------------------------------------------------------------------------
 1 | """Provides an abstract base class for critical-point-finding algorithms,
 2 | aka finders.
 3 | 
 4 | Primarily handles high-level API and logging. Also infers Hessian and
 5 | Hessian-vector product (hvp) oracles from a zeroth-order oracle.
 6 | """
 7 | import autograd
 8 | import autograd.numpy as np
 9 | 
10 | 
11 | class Finder(object):
12 |     """Abstract base class for critical-point-finding algorithms (finders).
13 |     """
14 | 
15 |     def __init__(self, f, grad_kwargs=None, log_kwargs=None):
16 |         self.f = f
17 |         self.grad_f = autograd.grad(f)
18 |         self.H = lambda theta: np.squeeze(autograd.hessian(f)(theta))
19 |         self.hvp = lambda theta, v: np.dot(self.H(theta), v)
20 | 
21 |         self.log = {}
22 |         self.loggers = []
23 |         if log_kwargs is None:
24 |             log_kwargs = {}
25 |         self.log_kwargs = log_kwargs
26 | 
27 |         self.setup_logs(**log_kwargs)
28 | 
29 |     def run(self):
30 |         raise NotImplementedError
31 | 
32 |     def update_logs(self, step_info):
33 |         for logger in self.loggers:
34 |             logger.write_log(step_info, self.log)
35 | 
36 |     def setup_logs(self, track_theta=False, track_f=True, track_grad_f=False, track_g=False,
37 |                    track_update=False):
38 |         if track_theta:
39 |             self.loggers.append(
40 |                 Logger("theta",
41 |                        lambda step_info: step_info["theta"]))
42 | 
43 |         if track_f:
44 |             self.loggers.append(
45 |                 Logger("f_theta",
46 |                        lambda step_info: self.f(step_info["theta"])))
47 | 
48 |         if track_grad_f:
49 |             self.loggers.append(
50 |                 Logger("grad_theta",
51 |                        lambda step_info: self.grad_f(step_info["theta"])))
52 | 
53 |         if track_g:
54 |             self.loggers.append(
55 |                 Logger("g_theta",
56 |                        lambda step_info: 0.5 * np.sum(np.square(self.grad_f(step_info["theta"])))))
57 | 
58 |         if track_update:
59 |             self.loggers.append(
60 |                 Logger("update_direction",
61 |                        lambda step_info: step_info["update_direction"]))
62 | 
63 | 
64 | class Logger(object):
65 | 
66 |     def __init__(self, key, log_func):
67 |         self.key = key
68 |         self.log_func = log_func
69 | 
70 |     def write_log(self, step_info, log):
71 |         if self.key not in log.keys():
72 |             log[self.key] = []
73 |         log[self.key].append(self.log_func(step_info))
74 | 


--------------------------------------------------------------------------------
/autocrit/nn/conv.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa E221, E251
 2 | """Adds optional torch acceleration to autograd's convolve function.
 3 | """
 4 | import autograd.scipy.signal as _autograd_signal
 5 | from functools import partial
 6 | import autograd.numpy as np
 7 | import numpy as npo  # original numpy
 8 | from autograd.extend import primitive, defvjp
 9 | 
10 | try:
11 |     import torch
12 |     import torch.nn.functional as torch_F
13 |     torch_accelerated = True
14 | except ImportError:
15 |     torch_accelerated = False
16 | 
17 | 
18 | def convolve(A, B, axes=None, dot_axes=[(), ()], mode='full', accelerated=torch_accelerated):
19 |     args_are_implemented = check_implemented(axes, dot_axes, mode)
20 |     if accelerated and args_are_implemented:
21 |         return _torch_convolve(A, B, axes=axes, dot_axes=dot_axes, mode=mode)
22 |     else:
23 |         return _autograd_signal.convolve(A, B, axes=axes, dot_axes=dot_axes, mode=mode)
24 | 
25 | 
26 | @primitive
27 | def _torch_convolve(A, B, axes=None, dot_axes=[(), ()], mode='full'):
28 |     B = np.ascontiguousarray(np.transpose(B[:, :, ::-1, ::-1], (1, 0, 2, 3)))
29 |     At, Bt = torch.tensor(A), torch.tensor(B)
30 |     if tuple(dot_axes) == ([0], [0]):
31 |         At = torch.transpose(At, 0 ,1)
32 |         yt = torch_F.conv2d(Bt, At)
33 |         yt = torch.flip(torch.transpose(yt, 0, 1), (-2, -1))
34 |     else:
35 |         yt = torch_F.conv2d(At, Bt)
36 |     return np.asarray(yt)
37 | 
38 | 
39 | def check_implemented(axes, dot_axes, mode):
40 |     """Check whether a fast convolution with these argument values has been implemented."""
41 |     if tuple(axes) != ([2, 3], [2, 3]):
42 |         return False
43 |     if tuple(dot_axes) not in [([1], [0]), ([0], [0])]:
44 |         return False
45 |     if mode != "valid":
46 |         return False
47 | 
48 |     return True
49 | 
50 | 
51 | def _torch_grad_convolve(argnum, ans, A, B, axes=None, dot_axes=[(), ()], mode='full'):
52 |     assert mode in ['valid', 'full'], "Grad for mode {0} not yet implemented".format(mode)
53 |     axes, shapes = _autograd_signal.parse_axes(A.shape, B.shape, axes, dot_axes, mode)
54 |     if argnum == 0:
55 |         _, Y = A, B
56 |         _X_, _Y_ = 'A', 'B'
57 |         ignore_Y = 'ignore_B'
58 |     elif argnum == 1:
59 |         _, Y = B, A
60 |         _X_, _Y_ = 'B', 'A'
61 |         ignore_Y = 'ignore_A'
62 |     else:
63 |         raise NotImplementedError("Can't take grad of convolve w.r.t. arg {0}".format(argnum))
64 | 
65 |     if mode == 'full':
66 |         new_mode = 'valid'
67 |     else:
68 |         if any([x_size > y_size for x_size, y_size
69 |                 in zip(shapes[_X_]['conv'], shapes[_Y_]['conv'])]):
70 |             new_mode = 'full'
71 |         else:
72 |             new_mode = 'valid'
73 | 
74 |     def vjp(g):
75 |         result = convolve(g, Y[tuple(_autograd_signal.flipped_idxs(Y.ndim, axes[_Y_]['conv']))],
76 |                           axes     = [axes['out']['conv'],   axes[_Y_]['conv']],
77 |                           dot_axes = [axes['out'][ignore_Y], axes[_Y_]['ignore']],
78 |                           mode     = new_mode)
79 |         new_order = npo.argsort(axes[_X_]['ignore'] + axes[_X_]['dot'] + axes[_X_]['conv'])
80 |         return np.transpose(result, new_order)
81 |     return vjp
82 | 
83 | 
84 | defvjp(_torch_convolve, partial(_torch_grad_convolve, 0), partial(_torch_grad_convolve, 1))
85 | 


--------------------------------------------------------------------------------
/autocrit/finders/gradnormmin.py:
--------------------------------------------------------------------------------
 1 | """Provides a Finder that performs gradient norm minimization to find critical points.
 2 | """
 3 | import json
 4 | 
 5 | import autograd
 6 | import autograd.numpy as np
 7 | 
 8 | from .base import Finder, Logger
 9 | from ..defaults import DEFAULT_ALPHA
10 | from ..optimizers import GradientDescentOptimizer, MomentumOptimizer
11 | from ..optimizers import BackTrackingLineSearchOptimizer
12 | 
13 | DEFAULT_MINIMIZER_PARAMS = {"lr": DEFAULT_ALPHA}
14 | 
15 | 
16 | class GradientNormMinimizer(Finder):
17 |     r"""Find critical points of function f by minimizing
18 |     auxiliary function g where
19 |     $$
20 |     g(theta) = \frac{1]{2}\lvert\nabla f(theta)\rvert^2
21 |     $$
22 | 
23 |     The gradient of g is the product of the hessian with the gradient.
24 |     This can be more efficiently computed as a hessian-vector product.
25 |     """
26 | 
27 |     def __init__(self, f, log_kwargs=None, minimizer_str="gd", minimizer_params=None):
28 |         Finder.__init__(self, f, log_kwargs=log_kwargs)
29 | 
30 |         def g(theta):
31 |             return 0.5 * np.sum(np.square(self.grad_f(theta)))
32 | 
33 |         self.g = g
34 |         self.grad_g = autograd.grad(g)
35 |         self.hvp = autograd.hessian_vector_product(self.f)
36 |         self.fast_grad_g = lambda x: self.hvp(x, self.grad_f(x))
37 | 
38 |         self.minimizer_str = minimizer_str
39 |         self.minimizer_params = minimizer_params or DEFAULT_MINIMIZER_PARAMS.copy()
40 |         self.set_minimizer(minimizer_str)
41 | 
42 |     def run(self, init_theta, num_iters=1):
43 |         theta = init_theta
44 |         self.update_logs({"theta": theta})
45 | 
46 |         for ii in range(num_iters):
47 |             theta_new = theta + self.minimizer.update(theta)
48 |             self.update_logs({"theta": theta_new})
49 | 
50 |             if np.array_equal(theta, theta_new):
51 |                 return theta
52 | 
53 |             theta = theta_new
54 | 
55 |         return theta
56 | 
57 |     def setup_log(self, track_thetas=False, track_f_thetas=False, track_g_thetas=False):
58 | 
59 |         if track_thetas:
60 |             self.loggers.append(Logger("theta", lambda step_info: step_info["theta"]))
61 | 
62 |         if track_f_thetas:
63 |             self.loggers.append(Logger("f_theta", lambda step_info: self.f(step_info["theta"])))
64 | 
65 |         if track_g_thetas:
66 |             self.loggers.append(Logger("g_theta", lambda step_info: self.g(step_info["theta"])))
67 | 
68 |     def set_minimizer(self, minimizer_str):
69 |         if minimizer_str == "gd":
70 |             self.minimizer = GradientDescentOptimizer(
71 |                 self.g, self.fast_grad_g, **self.minimizer_params)
72 |         elif minimizer_str == "momentum":
73 |             self.minimizer = MomentumOptimizer(
74 |                 self.g, self.fast_grad_g, **self.minimizer_params)
75 |         elif minimizer_str == "btls":
76 |             self.minimizer = BackTrackingLineSearchOptimizer(
77 |                 self.g, self.fast_grad_g, **self.minimizer_params)
78 |         else:
79 |             raise NotImplementedError
80 | 
81 |     def to_json(self, json_path):
82 |         dictionary = self.construct_dictionary()
83 |         with open(json_path, "w") as fp:
84 |             json.write(dictionary, fp)
85 | 
86 |     @classmethod
87 |     def from_json(cls, f, json_path):
88 |         with open(json_path) as fp:
89 |             dictionary = json.load(fp)
90 |         return cls(f, **dictionary)
91 | 
92 |     def construct_dictionary(self):
93 |         dictionary = {"log_kwargs": self.log_kwargs,
94 |                       "minimizer_str": self.minimizer_str,
95 |                       "minimzer_params": self.minimzer_params}
96 |         return dictionary
97 | 


--------------------------------------------------------------------------------
/tests/test_nn.py:
--------------------------------------------------------------------------------
  1 | import autograd.numpy as np
  2 | import pytest
  3 | 
  4 | import autocrit.nn as nn
  5 | 
  6 | 
  7 | def test_id_net():
  8 |     none_vector = np.asarray([[None]])
  9 |     id_net = nn.networks.Network(
 10 |         (none_vector, none_vector),
 11 |         [nn.layers.PointwiseNonlinearLayer("none")])
 12 | 
 13 |     assert id_net.forward_pass(None, np.asarray([None])) is None
 14 | 
 15 | 
 16 | def test_equivalence_fc():
 17 | 
 18 |     network_style, fc_style, data = make_network_and_fc_style()
 19 |     shared_theta = network_style.initialize()
 20 | 
 21 |     assert network_style.loss(shared_theta) == fc_style.loss(shared_theta)
 22 | 
 23 |     loss_val = network_style.loss(shared_theta)
 24 | 
 25 |     network_style_dict = network_style.construct_dict()
 26 |     fc_style_dict = fc_style.construct_dict()
 27 | 
 28 |     network_style_rebuild = nn.networks.Network(data, **network_style_dict)
 29 |     fc_style_rebuild = nn.networks.FullyConnected(data, **fc_style_dict)
 30 | 
 31 |     assert loss_val == network_style_rebuild.loss(shared_theta)
 32 |     assert loss_val == fc_style_rebuild.loss(shared_theta)
 33 | 
 34 | 
 35 | def test_regularizer_l2():
 36 |     scalmult = make_scalmult(regularizer_str="l2",
 37 |                              regularization_parameter=1.)
 38 |     theta = scalmult.initialize()
 39 |     assert scalmult.loss(theta) == np.square(theta)
 40 | 
 41 |     scalmult = make_scalmult(regularizer_str="l2",
 42 |                              regularization_parameter=0.5)
 43 |     theta = scalmult.initialize()
 44 |     assert scalmult.loss(theta) == 0.5 * np.square(theta)
 45 | 
 46 | 
 47 | def test_regularizer_l1():
 48 |     scalmult = make_scalmult(regularizer_str="l1",
 49 |                              regularization_parameter=1.)
 50 |     theta = scalmult.initialize()
 51 |     assert scalmult.loss(theta) == np.abs(theta)
 52 | 
 53 |     scalmult = make_scalmult(regularizer_str="l1",
 54 |                              regularization_parameter=-1.)
 55 |     theta = scalmult.initialize()
 56 |     assert scalmult.loss(theta) == -np.abs(theta)
 57 | 
 58 | 
 59 | def test_to_from_json(tmpdir):
 60 |     with pytest.raises(NotImplementedError):
 61 |         none_vector = np.asarray([[None]])
 62 |         lambda_id_net = nn.networks.Network(
 63 |             (none_vector, none_vector),
 64 |             [nn.layers.LambdaLayer(lambda x: x)])
 65 |         path = tmpdir.join("lambda_id_net.json")
 66 |         lambda_id_net.to_json(path)
 67 | 
 68 |     network_style, fc_style, data = make_network_and_fc_style()
 69 | 
 70 |     network_style_path = tmpdir.join("network_style.json")
 71 |     network_style.to_json(network_style_path)
 72 | 
 73 |     fc_style_path = tmpdir.join("fc_style.json")
 74 |     fc_style.to_json(fc_style_path)
 75 | 
 76 |     network_style_rebuild = nn.networks.Network.from_json(data, network_style_path)
 77 |     fc_style_rebuild = nn.networks.FullyConnected.from_json(data, fc_style_path)
 78 | 
 79 |     shared_theta = network_style.initialize()
 80 | 
 81 |     assert network_style_rebuild.loss(shared_theta) == network_style.loss(shared_theta)
 82 |     assert fc_style_rebuild.loss(shared_theta) == fc_style.loss(shared_theta)
 83 | 
 84 | 
 85 | def make_scalmult(**kwargs):
 86 |     data = (np.asarray([0]), np.asarray([0]))
 87 |     scalar_mult_layer = nn.layers.FCLayer(1, has_biases=False)
 88 |     scalmult = nn.networks.Network(
 89 |         data, layer_specs=[scalar_mult_layer],
 90 |         **kwargs)
 91 | 
 92 |     return scalmult
 93 | 
 94 | 
 95 | def make_network_and_fc_style():
 96 |     layer_sizes = [2, 4]
 97 |     data = (np.random.standard_normal((4, 1)), np.random.standard_normal((4, 1)))
 98 | 
 99 |     network_style = nn.networks.Network(
100 |         data,
101 |         layer_specs=[nn.layers.FCLayer(layer_size) for layer_size in layer_sizes])
102 | 
103 |     fc_style = nn.networks.FullyConnected(
104 |         data,
105 |         layer_sizes=layer_sizes,
106 |         nonlinearity_str="none")
107 | 
108 |     return network_style, fc_style, data
109 | 


--------------------------------------------------------------------------------
/tests/problems/conv_utils.py:
--------------------------------------------------------------------------------
  1 | """Utilities for generating gaussian random matrices
  2 | with translation-invariant covariance (toroidal boundaries)
  3 | and converting between "vector" and "image" representations of same.
  4 | 
  5 | Images constructed in this fashion (with or without channel dependencies)
  6 | are the natural targets of convolutional neural networks.
  7 | """
  8 | import random
  9 | 
 10 | import autograd.numpy as np
 11 | import scipy
 12 | 
 13 | from autocrit.utils.math import rescale
 14 | 
 15 | EPS = 1e-3
 16 | 
 17 | 
 18 | def rgb_gauss_random_samples(N, mean_or_means=None, cov_or_covs=None, im_sz=None):
 19 |     means, covs, im_sz = _handle_kwargs(mean_or_means, cov_or_covs, im_sz)
 20 | 
 21 |     samples_by_channel = np.asarray(
 22 |         [np.random.multivariate_normal(mean, cov, N).T
 23 |          for mean, cov in zip(means, covs)])
 24 | 
 25 |     samples_batch_minor = np.moveaxis(samples_by_channel, [0, 1], [1, 0])
 26 | 
 27 |     return samples_batch_minor
 28 | 
 29 | 
 30 | def generate_iostropic_circulant_cov_2d(k, autocorr_scale=1.):
 31 |     """returns covariance matrix for translation-invariant,
 32 |     isotropic multivariate gaussian defined on a discrete torus
 33 |     with side length k
 34 |     """
 35 |     isotropic_circulant_1d = generate_isotropic_circulant_2d_vector(k, autocorr_scale)
 36 | 
 37 |     circ_mat = circulant_2d_vector_to_circulant_2d_matrix(isotropic_circulant_1d)
 38 | 
 39 |     # check symmetry
 40 |     assert np.array_equal(circ_mat, (circ_mat.T + circ_mat) / 2)
 41 | 
 42 |     # impose PSD
 43 |     cov_mat = apply_damping(circ_mat)
 44 | 
 45 |     return cov_mat
 46 | 
 47 | 
 48 | def to_im(vals, im_side):
 49 |     return np.reshape(vals, (im_side, im_side))
 50 | 
 51 | 
 52 | def from_im(im):
 53 |     return np.reshape(im, im.shape[0] ** 2)
 54 | 
 55 | 
 56 | def to_im_rgb(rgb_vec, im_side):
 57 |     return np.asarray([to_im(ch_vec, im_side) for ch_vec in rgb_vec])
 58 | 
 59 | 
 60 | def apply_damping(mat, eps=EPS):
 61 |     eigvals = np.linalg.eigvalsh(mat)
 62 |     damping_coeff = np.abs(min([min(eigvals) - eps, -eps]))
 63 |     damped_mat = mat + damping_coeff * np.eye(mat.shape[0])
 64 |     return damped_mat
 65 | 
 66 | 
 67 | def generate_isotropic_circulant_2d_vector(k, autocorr_scale):
 68 |     gaussian = scipy.stats.multivariate_normal(mean=[0, 0]).pdf
 69 |     xs = ys = np.linspace(-autocorr_scale, autocorr_scale, k)
 70 |     Xs, Ys = np.meshgrid(xs, ys)
 71 |     isotropic_circulant_1d = np.asarray(
 72 |         [gaussian([x, y]) for x, y in zip(Xs.flatten(), Ys.flatten())])
 73 |     isotropic_circulant_1d = np.roll(isotropic_circulant_1d,
 74 |                                      -np.argmax(isotropic_circulant_1d))
 75 |     return isotropic_circulant_1d
 76 | 
 77 | 
 78 | def circulant_2d_vector_to_circulant_2d_matrix(circulant_2d_vector):
 79 | 
 80 |     circulant_2d_matrix = np.asarray(
 81 |         [np.roll(circulant_2d_vector, ii)
 82 |          for ii in range(len(circulant_2d_vector))])
 83 | 
 84 |     return circulant_2d_matrix
 85 | 
 86 | 
 87 | def _handle_kwargs(mean_or_means, cov_or_covs, im_sz):
 88 |     kwargs = [mean_or_means, cov_or_covs, im_sz]
 89 |     assert not all([kwarg is None for kwarg in kwargs])
 90 | 
 91 |     if im_sz is None:
 92 |         assert not (mean_or_means is None and cov_or_covs is None)
 93 | 
 94 |     if cov_or_covs is not None:
 95 |         if type(cov_or_covs) is not list:
 96 |             assert isinstance(cov_or_covs, np.ndarray)
 97 |             covs = 3 * [cov_or_covs]
 98 |         else:
 99 |             covs = cov_or_covs
100 |     else:
101 |         covs = None
102 | 
103 |     if mean_or_means is not None:
104 |         if type(mean_or_means) is not list:
105 |             assert isinstance(mean_or_means, np.ndarray)
106 |             means = 3 * [mean_or_means]
107 |         else:
108 |             means = mean_or_means
109 |     else:
110 |         means = None
111 | 
112 |     if im_sz is None:
113 |         if covs is not None:
114 |             im_sz = covs[0].shape[0]
115 |         else:
116 |             im_sz = means.shape[0]
117 | 
118 |     if means is None:
119 |         means = 3 * [np.zeros(im_sz)]
120 | 
121 |     if covs is None:
122 |         covs = 3 * [np.eye(im_sz)]
123 | 
124 |     return means, covs, im_sz
125 | 
126 | 
127 | def display_sample_rgb(rgbs_batch_minor, ax, im_side=None):
128 |     random_rgb_vec = random.choice(rgbs_batch_minor.T)
129 |     assert random_rgb_vec.shape[0] == 3
130 |     if im_side is None:
131 |         candidate_im_side = np.sqrt(random_rgb_vec.shape[1])
132 |         assert candidate_im_side == int(candidate_im_side)
133 |         im_side = int(candidate_im_side)
134 | 
135 |     random_rgb_im = to_im_rgb(random_rgb_vec, im_side)
136 |     if np.min(random_rgb_im) < 0:
137 |         random_rgb_im = rescale(random_rgb_im)
138 | 
139 |     ax.imshow(random_rgb_im.T)
140 |     ax.axis("off")
141 | 


--------------------------------------------------------------------------------
/tests/test_finders.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | 
  3 | import autocrit
  4 | import autocrit.utils.random_matrix
  5 | import pytest
  6 | 
  7 | import tests.utils.shared as shared
  8 | 
  9 | 
 10 | def test_NewtonMethod():
 11 |     warnings.filterwarnings("ignore")
 12 |     finder = autocrit.finders.newtons.NewtonMethod
 13 |     finder_str = "NewtonMethod"
 14 | 
 15 |     problem_str = "least squares"
 16 |     finder_kwargs = {}
 17 |     num_iters = 1
 18 | 
 19 |     random_least_squares_problem, random_init = \
 20 |         shared.generate_random_least_squares()
 21 | 
 22 |     shared.convergence_test(finder, finder_str, finder_kwargs,
 23 |                             random_least_squares_problem.loss, random_least_squares_problem,
 24 |                             problem_str, random_init, num_iters)
 25 | 
 26 | 
 27 | def test_FastNewtonMR():
 28 |     warnings.filterwarnings("ignore")
 29 |     finder = autocrit.FastNewtonMR
 30 |     finder_str = "FastNewtonMR"
 31 | 
 32 |     problem_str = "least squares"
 33 |     finder_kwargs = {"alpha": 0.5, "beta": 0.99, "check_pure": True}
 34 |     num_iters = 500
 35 | 
 36 |     random_least_squares_problem, random_init = \
 37 |         shared.generate_random_least_squares()
 38 | 
 39 |     shared.convergence_test(finder, finder_str, finder_kwargs,
 40 |                             random_least_squares_problem.loss, random_least_squares_problem,
 41 |                             problem_str, random_init, num_iters)
 42 | 
 43 |     problem_str = "shallow regression"
 44 |     finder_kwargs = {"alpha": 0.5, "beta": 0.9, "rho": 1e-6}
 45 |     num_iters = 250
 46 | 
 47 |     random_regression_problem, network, random_init = \
 48 |         shared.generate_random_shallow_regression()
 49 | 
 50 |     shared.convergence_test(finder, finder_str, finder_kwargs,
 51 |                             network.loss, random_regression_problem, problem_str,
 52 |                             random_init, num_iters,
 53 |                             test_soln_converge=False)
 54 | 
 55 | 
 56 | def test_FastNewtonTR():
 57 |     warnings.filterwarnings("ignore")
 58 |     finder = autocrit.FastNewtonTR
 59 |     finder_str = "FastNewtonTR"
 60 | 
 61 |     problem_str = "least squares"
 62 |     finder_kwargs = {"step_size": 0.5}
 63 |     num_iters = 25
 64 | 
 65 |     random_least_squares_problem, random_init = \
 66 |         shared.generate_random_least_squares()
 67 | 
 68 |     shared.convergence_test(finder, finder_str, finder_kwargs,
 69 |                             random_least_squares_problem.loss, random_least_squares_problem,
 70 |                             problem_str, random_init, num_iters)
 71 | 
 72 |     problem_str = "shallow regression"
 73 |     finder_kwargs = {"step_size": 0.1}
 74 |     num_iters = 250
 75 | 
 76 |     random_regression_problem, network, random_init = \
 77 |         shared.generate_random_shallow_regression()
 78 | 
 79 |     shared.convergence_test(finder, finder_str, finder_kwargs,
 80 |                             network.loss, random_regression_problem, problem_str,
 81 |                             random_init, num_iters,
 82 |                             test_soln_converge=False)
 83 | 
 84 | 
 85 | @pytest.mark.slow
 86 | def test_deep_classification():
 87 |     warnings.filterwarnings("ignore")
 88 | 
 89 |     problem_str = "deep classification"
 90 | 
 91 |     finder = autocrit.FastNewtonMR
 92 |     finder_str = "FastNewtonMR"
 93 | 
 94 |     finder_kwargs = {"alpha": 1., "beta": 0.5, "rho": 1e-6,
 95 |                      "check_pure": True}
 96 |     num_iters = 250
 97 | 
 98 |     random_classification_problem, network, random_init = \
 99 |         shared.generate_random_deep_classification()
100 | 
101 |     shared.convergence_test(finder, finder_str, finder_kwargs,
102 |                             network.loss, random_classification_problem, problem_str,
103 |                             random_init, num_iters,
104 |                             test_function_converge=False,
105 |                             test_soln_converge=False)
106 | 
107 |     finder = autocrit.FastNewtonTR
108 |     finder_str = "FastNewtonTR"
109 | 
110 |     finder_kwargs = {"step_size": 0.05}
111 |     num_iters = 250
112 | 
113 |     random_classification_problem, network, random_init = \
114 |         shared.generate_random_deep_classification(seed=shared.SEED + 1)
115 | 
116 |     shared.convergence_test(finder, finder_str, finder_kwargs,
117 |                             network.loss, random_classification_problem, problem_str,
118 |                             random_init, num_iters,
119 |                             test_function_converge=False,
120 |                             test_soln_converge=False)
121 | 
122 | 
123 | def test_GradientNormMinimizer():
124 |     warnings.filterwarnings("ignore")
125 |     finder = autocrit.GradientNormMinimizer
126 |     finder_str = "GradientNormMinimizer"
127 | 
128 |     problem_str = "least squares"
129 |     finder_kwargs = {"minimizer_str": "momentum",
130 |                      "minimizer_params": {"lr": 1e-2,
131 |                                           "momentum": 0.9}}
132 |     num_iters = 1000
133 | 
134 |     random_least_squares_problem, random_init = \
135 |         shared.generate_random_least_squares()
136 | 
137 |     shared.convergence_test(finder, finder_str, finder_kwargs,
138 |                             random_least_squares_problem.loss, random_least_squares_problem,
139 |                             problem_str, random_init, num_iters)
140 | 


--------------------------------------------------------------------------------
/tests/utils/shared.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | 
  3 | import autograd
  4 | import autograd.numpy as np
  5 | 
  6 | import autocrit.nn as nn
  7 | from autocrit.utils.random_matrix import generate_random_unit_vector
  8 | import autocrit.utils.math as math
  9 | import tests.problems.linear as linear
 10 | import tests.problems.convolutional as convolutional
 11 | 
 12 | SEED = 14
 13 | 
 14 | CRITERION_STRS = ["gradient norm", "function val", "solution val"]
 15 | CRITERION_VALS = [1e-5, 5e-5, 1e-5]
 16 | 
 17 | DIM = 25
 18 | 
 19 | K = 10
 20 | L = 5
 21 | M = 2
 22 | N = 100
 23 | 
 24 | FAIL_MSG = "{0} failed to converge on {1} in {2}:\n\t{3} > {4}"
 25 | 
 26 | 
 27 | def convergence_test(algorithm, algorithm_str, algorithm_kwargs,
 28 |                      loss, problem, problem_str, init, num_iters,
 29 |                      test_function_converge=True, test_soln_converge=True):
 30 | 
 31 |     _, _, errors = evaluate(algorithm, loss, problem, init, num_iters,
 32 |                             kwargs=algorithm_kwargs,
 33 |                             calc_func_error=test_function_converge,
 34 |                             calc_soln_rms_error=test_soln_converge)
 35 | 
 36 |     for criterion_str, error_val, criterion_val in zip(CRITERION_STRS, errors, CRITERION_VALS):
 37 |         assert_pass(algorithm_str, problem_str, criterion_str, error_val, criterion_val)
 38 | 
 39 | 
 40 | def evaluate(algorithm_constructor, loss,
 41 |              problem, init, num_iters,
 42 |              kwargs=None,
 43 |              calc_func_error=True,
 44 |              calc_soln_rms_error=True):
 45 |     if kwargs is None:
 46 |         kwargs = {}
 47 | 
 48 |     algorithm = algorithm_constructor(loss, **kwargs)
 49 |     solution = algorithm.run(init, num_iters)
 50 |     exact_solution = problem.exact_solution
 51 | 
 52 |     grad_rms_error = math.rms(autograd.grad(loss)(solution))
 53 |     errors = [grad_rms_error]
 54 | 
 55 |     if calc_func_error:
 56 |         func_error = loss(solution) - problem.loss(exact_solution)
 57 |         errors.append(func_error)
 58 | 
 59 |     if calc_soln_rms_error:
 60 |         soln_rms_error = math.rms(solution.ravel() - exact_solution.T.ravel())
 61 |         errors.append(soln_rms_error)
 62 | 
 63 |     return solution, exact_solution, errors
 64 | 
 65 | 
 66 | def assert_pass(algorithm_str, problem_str, criterion_str, error_val, criterion_val):
 67 |     fail_msg = FAIL_MSG.format(
 68 |         algorithm_str, problem_str, criterion_str, error_val, criterion_val)
 69 | 
 70 |     assert error_val <= criterion_val, fail_msg
 71 | 
 72 | 
 73 | def generate_random_least_squares(dim=DIM, seed=SEED):
 74 |     np.random.seed(seed)
 75 |     random_least_squares_problem = linear.LeastSquares.\
 76 |         generate_random_problem(dim=dim)
 77 |     random_init = generate_random_unit_vector(dim=dim)
 78 | 
 79 |     return random_least_squares_problem, random_init
 80 | 
 81 | 
 82 | def generate_random_shallow_regression(k=K, l=L, n=N, seed=SEED):
 83 |     np.random.seed(seed)
 84 |     random_regression_problem = linear.Regression.\
 85 |         generate_random_problem(k=k, l=l, n=n)
 86 | 
 87 |     shallow_network = nn.networks.FullyConnected(
 88 |         (random_regression_problem.X, random_regression_problem.Y),
 89 |         layer_sizes=[l],
 90 |         nonlinearity_str="none",
 91 |         has_biases=False)
 92 | 
 93 |     random_init = shallow_network.initialize()
 94 | 
 95 |     return random_regression_problem, shallow_network, random_init
 96 | 
 97 | 
 98 | def generate_random_shallow_classification(k=K, m=M, n=N, seed=SEED):
 99 |     np.random.seed(seed)
100 |     random.seed(seed)
101 |     random_classification_problem = linear.Classification.\
102 |         generate_random_problem(k=k, m=m, n=n)
103 | 
104 |     shallow_network = nn.networks.FullyConnected(
105 |         (random_classification_problem.X, random_classification_problem.Y),
106 |         layer_sizes=[m],
107 |         nonlinearity_str="none",
108 |         has_biases=False,
109 |         cost_str="softmax_cross_entropy")
110 | 
111 |     random_init = shallow_network.initialize()
112 | 
113 |     return random_classification_problem, shallow_network, random_init
114 | 
115 | 
116 | def generate_random_deep_classification(k=K, m=M, n=N, seed=SEED):
117 |     np.random.seed(seed)
118 |     random.seed(seed)
119 |     random_classification_problem = linear.Classification.\
120 |         generate_random_problem(k=k, m=m, n=n)
121 | 
122 |     p = min(k, m)
123 |     deep_network = nn.networks.FullyConnected(
124 |         (random_classification_problem.X, random_classification_problem.Y),
125 |         layer_sizes=[p, m],
126 |         nonlinearity_str="none",
127 |         has_biases=False,
128 |         regularizer_str="l2",
129 |         regularization_parameter=0.1,
130 |         cost_str="softmax_cross_entropy")
131 | 
132 |     random_init = deep_network.initialize()
133 | 
134 |     return random_classification_problem, deep_network, random_init
135 | 
136 | 
137 | def generate_test_conv_classification(n=N, seed=SEED):
138 |     np.random.seed(seed)
139 |     random.seed(seed)
140 |     test_classification_problem = convolutional.Classification.\
141 |         generate_test_problem(n=n)
142 | 
143 |     conv_network = nn.networks.Network(
144 |         (test_classification_problem.X, test_classification_problem.Y),
145 |         layer_specs=[nn.layers.ConvLayer((4, 4), 2),
146 |                      nn.layers.MaxPoolLayer((2, 2)),
147 |                      nn.layers.GlobalAvgPoolLayer(),
148 |                      nn.layers.SqueezeLayer()],
149 |         cost_str="softmax_cross_entropy")
150 | 
151 |     random_init = conv_network.initialize()
152 | 
153 |     return test_classification_problem, conv_network, random_init
154 | 


--------------------------------------------------------------------------------
/tests/problems/linear.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | 
  3 | import autograd
  4 | import autograd.numpy as np
  5 | import sklearn.linear_model
  6 | 
  7 | from autocrit.utils import random_matrix
  8 | import autocrit.utils.math as math
  9 | 
 10 | 
 11 | class LeastSquares(object):
 12 |     EPS = 1e-20
 13 | 
 14 |     def __init__(self, A, b):
 15 |         self.A = A
 16 |         self.b = b
 17 | 
 18 |         self.grad = autograd.grad(self.loss)
 19 | 
 20 |         self.exact_solution = self.solve()
 21 | 
 22 |         assert self.loss(self.exact_solution) < self.EPS
 23 |         assert math.rms(self.grad(self.exact_solution)) < np.sqrt(self.EPS)
 24 | 
 25 |     def loss(self, x):
 26 |         return np.sum(np.square(np.dot(self.A, x) - self.b))
 27 | 
 28 |     def solve(self):
 29 |         return np.dot(np.linalg.pinv(self.A), self.b)
 30 | 
 31 |     @classmethod
 32 |     def generate_random_problem(cls, dim=25, eps=5e-1):
 33 |         random_psd_matrix = random_matrix.Wishart(dim, dim)
 34 |         A = np.eye(dim) + eps * random_psd_matrix.M
 35 |         b = random_matrix.generate_random_unit_vector(dim=dim)
 36 | 
 37 |         return cls(A, b)
 38 | 
 39 | 
 40 | class Regression(object):
 41 |     EPS = 1e-20
 42 | 
 43 |     def __init__(self, X, Y):
 44 |         """
 45 |         X : array, k x n input observations
 46 |         Y : array, l x n output observations
 47 | 
 48 |         Attributes:
 49 |         -----------
 50 |         W : array, l x k parameter matrix
 51 |         """
 52 |         self.X = X
 53 |         self.Y = Y
 54 | 
 55 |         self.grad = autograd.grad(self.loss)
 56 |         self.H = autograd.hessian(self.loss)
 57 | 
 58 |         self.exact_solution = self.solve()
 59 | 
 60 |         assert math.rms(self.grad(self.exact_solution)) < np.sqrt(self.EPS)
 61 | 
 62 |     def loss(self, W):
 63 |         return np.mean(np.square(np.dot(W.T, self.X) - self.Y))
 64 | 
 65 |     def solve(self):
 66 |         return np.dot(
 67 |             np.dot(
 68 |                 np.linalg.pinv(np.dot(self.X, self.X.T)),
 69 |                 self.X),
 70 |             self.Y.T)
 71 | 
 72 |     @classmethod
 73 |     def generate_random_problem(cls, k, l, n, sigma=1.):
 74 |         """Generate a random linear regression problem.
 75 | 
 76 |         Parameters:
 77 |         -----------
 78 | 
 79 |         k : int, dimension of X
 80 |         l : int, dimension of Y
 81 |         n : int, number of observations
 82 |         sigma: float, expected norm of additive noise on Y
 83 | 
 84 |         Returns:
 85 |         --------
 86 | 
 87 |         regression_problem : LinearRegressionProblem, class combining loss, data, solver
 88 |         """
 89 | 
 90 |         # independent gaussian vectors with approx unit norm
 91 |         X = 1 / np.sqrt(k) * np.random.standard_normal(size=(k, n))
 92 |         X -= np.mean(X, axis=1)[:, None]
 93 | 
 94 |         # unit norm transformation with uniform orientation
 95 |         W = np.asarray(
 96 |             [random_matrix.generate_random_unit_vector(dim=k) for _ in range(l)])\
 97 |             .T.squeeze()
 98 | 
 99 |         Y = np.dot(W.T, X)
100 |         Y += np.sqrt(sigma / l) * np.random.standard_normal(size=Y.shape)
101 | 
102 |         return cls(X, Y)
103 | 
104 | 
105 | class Classification(object):
106 |     EPS = 1e-20
107 | 
108 |     def __init__(self, X, Y, Y_iis):
109 |         """
110 |         X : array, k x n input observations
111 |         Y : array, l x n output label onehots
112 |         Y_iis : array, 1 x n output label integers
113 |         """
114 |         self.X = X
115 |         self.Y = Y
116 |         self.Y_iis = Y_iis
117 | 
118 |         self.grad = autograd.grad(self.loss)
119 |         self.H = autograd.hessian(self.loss)
120 | 
121 |         # minimally regularized LogisticRegression
122 |         self.sklearn_model = sklearn.linear_model.LogisticRegression(
123 |             solver="sag", fit_intercept=False, C=1e4)
124 | 
125 |         self.exact_solution = self.solve()
126 | 
127 |     def loss(self, W):
128 |         _W = np.hstack([-W, W])
129 |         logits = np.dot(_W.T, self.X)
130 | 
131 |         return math.softmax_cross_entropy(logits, self.Y)
132 | 
133 |     def solve(self):
134 |         self.sklearn_model.fit(self.X.T, self.Y_iis)
135 | 
136 |         return np.atleast_2d(self.sklearn_model.coef_).T
137 | 
138 |     @classmethod
139 |     def generate_random_problem(cls, k, m, n):
140 |         """Generate a random linear classification // logistic regression
141 |         problem using a mixture of Gaussians.
142 | 
143 |         Parameters:
144 |         -----------
145 | 
146 |         k : int, dimension of X
147 |         m : int, number of labels
148 |         n : int, number of observations
149 | 
150 |         Returns:
151 |         --------
152 |         X : array, k x n input observations
153 |         Y : array, m x n one_hot labels
154 |         regression_problem : RegressionProblem, class combining loss, data, solver
155 |         """
156 | 
157 |         X, Y, Y_iis, mus, covs = sample_gaussian_mixture(k, m, n)
158 | 
159 |         return cls(X, Y, Y_iis)
160 | 
161 | 
162 | def sample_gaussian_mixture(k, m, n, mus=None, covs=None):
163 |     mus = [random_matrix.generate_random_unit_vector(dim=k) for _ in range(m)]
164 |     covs = [np.eye(k) / np.sqrt(k) for _ in range(m)]
165 | 
166 |     labels = list(range(m))
167 | 
168 |     Y_iis = []
169 |     Y = []
170 |     X = []
171 |     one_hots = np.eye(m)
172 | 
173 |     for _ in range(n):
174 |         y_ii = random.choice(labels)
175 |         y = one_hots[y_ii]
176 |         x = np.random.multivariate_normal(np.squeeze(mus[y_ii]), covs[y_ii])
177 | 
178 |         Y_iis.append(y_ii)
179 |         Y.append(y)
180 |         X.append(x)
181 | 
182 |     X = np.asarray(X).T
183 |     Y = np.atleast_2d(np.asarray(Y)).T
184 |     Y_iis = np.asarray(Y_iis)
185 | 
186 |     return X, Y, Y_iis, mus, covs
187 | 


--------------------------------------------------------------------------------
/tests/test_optimizers.py:
--------------------------------------------------------------------------------
  1 | """Tests GradientDescentOptimizer, MomentumOptimizer, and BacktrackingLineSearchOptimizer
  2 | for convergence in function value, gradient norm, and solution value for linear least squares
  3 | linear regression, and linear classification with a shallow network.
  4 | Additionally tests MomentumOptimizer on a linear convolutional problem.
  5 | """
  6 | import warnings
  7 | 
  8 | import autocrit
  9 | import autocrit.utils.random_matrix
 10 | 
 11 | import tests.utils.shared as shared
 12 | 
 13 | 
 14 | def test_GradientDescentOptimizer():
 15 |     optimizer = autocrit.optimizers.GradientDescentOptimizer
 16 |     optimizer_str = "GradientDescentOptimizer"
 17 | 
 18 |     problem_str = "least squares"
 19 |     optimizer_kwargs = {}
 20 |     num_iters = 1000
 21 | 
 22 |     random_least_squares_problem, random_init = \
 23 |         shared.generate_random_least_squares()
 24 | 
 25 |     shared.convergence_test(optimizer, optimizer_str, optimizer_kwargs,
 26 |                             random_least_squares_problem.loss, random_least_squares_problem,
 27 |                             problem_str, random_init, num_iters)
 28 | 
 29 |     problem_str = "shallow regression"
 30 |     optimizer_kwargs = {}
 31 |     num_iters = 10000
 32 | 
 33 |     random_regression_problem, network, random_init = \
 34 |         shared.generate_random_shallow_regression()
 35 | 
 36 |     shared.convergence_test(optimizer, optimizer_str, optimizer_kwargs,
 37 |                             network.loss, random_regression_problem, problem_str,
 38 |                             random_init, num_iters)
 39 | 
 40 |     problem_str = "shallow classification"
 41 |     optimizer_kwargs = {}
 42 |     num_iters = 12500
 43 | 
 44 |     random_classification_problem, network, random_init = \
 45 |         shared.generate_random_shallow_classification()
 46 | 
 47 |     shared.convergence_test(optimizer, optimizer_str, optimizer_kwargs,
 48 |                             network.loss, random_classification_problem, problem_str,
 49 |                             random_init, num_iters, test_soln_converge=False)
 50 | 
 51 | 
 52 | def test_MomentumOptimizer():
 53 |     warnings.filterwarnings("ignore")
 54 |     optimizer = autocrit.optimizers.MomentumOptimizer
 55 |     optimizer_str = "MomentumOptimizer"
 56 | 
 57 |     problem_str = "least squares"
 58 |     optimizer_kwargs = {}
 59 |     num_iters = 1000
 60 | 
 61 |     random_least_squares_problem, random_init = \
 62 |         shared.generate_random_least_squares()
 63 | 
 64 |     shared.convergence_test(optimizer, optimizer_str, optimizer_kwargs,
 65 |                             random_least_squares_problem.loss, random_least_squares_problem,
 66 |                             problem_str, random_init, num_iters)
 67 | 
 68 |     problem_str = "shallow regression"
 69 |     optimizer_kwargs = {}
 70 |     num_iters = 1000
 71 | 
 72 |     random_regression_problem, network, random_init = \
 73 |         shared.generate_random_shallow_regression()
 74 | 
 75 |     shared.convergence_test(optimizer, optimizer_str, optimizer_kwargs,
 76 |                             network.loss, random_regression_problem, problem_str,
 77 |                             random_init, num_iters)
 78 | 
 79 |     problem_str = "shallow classification"
 80 |     optimizer_kwargs = {}
 81 |     num_iters = 1000
 82 | 
 83 |     random_classification_problem, network, random_init = \
 84 |         shared.generate_random_shallow_classification()
 85 | 
 86 |     shared.convergence_test(optimizer, optimizer_str, optimizer_kwargs,
 87 |                             network.loss, random_classification_problem, problem_str,
 88 |                             random_init, num_iters, test_soln_converge=False)
 89 | 
 90 |     problem_str = "convolutional classification"
 91 |     optimizer_kwargs = {"momentum": 0.99}
 92 |     num_iters = 1000
 93 | 
 94 |     test_classification_problem, network, random_init = \
 95 |         shared.generate_test_conv_classification()
 96 | 
 97 |     shared.convergence_test(optimizer, optimizer_str, optimizer_kwargs,
 98 |                             network.loss, test_classification_problem, problem_str,
 99 |                             random_init, num_iters, test_soln_converge=False)
100 | 
101 | 
102 | def test_BackTrackingLineSearchOptimizer(dim=25):
103 |     optimizer = autocrit.optimizers.BackTrackingLineSearchOptimizer
104 |     optimizer_str = "BackTrackingLineSearchOptimizer"
105 | 
106 |     problem_str = "least squares"
107 |     optimizer_kwargs = {}
108 |     num_iters = 1000
109 | 
110 |     random_least_squares_problem, random_init = \
111 |         shared.generate_random_least_squares()
112 | 
113 |     shared.convergence_test(optimizer, optimizer_str, optimizer_kwargs,
114 |                             random_least_squares_problem.loss, random_least_squares_problem,
115 |                             problem_str, random_init, num_iters)
116 | 
117 |     problem_str = "shallow regression"
118 |     optimizer_kwargs = {"gamma": 1 - 1e-3}
119 |     num_iters = 100
120 | 
121 |     random_regression_problem, network, random_init = \
122 |         shared.generate_random_shallow_regression()
123 | 
124 |     shared.convergence_test(optimizer, optimizer_str, optimizer_kwargs,
125 |                             network.loss, random_regression_problem, problem_str,
126 |                             random_init, num_iters)
127 | 
128 |     problem_str = "shallow classification"
129 |     optimizer_kwargs = {"gamma": 1 - 1e-3}
130 |     num_iters = 100
131 | 
132 |     random_classification_problem, network, random_init = \
133 |         shared.generate_random_shallow_classification()
134 | 
135 |     shared.convergence_test(optimizer, optimizer_str, optimizer_kwargs,
136 |                             network.loss, random_classification_problem, problem_str,
137 |                             random_init, num_iters, test_soln_converge=False)
138 | 


--------------------------------------------------------------------------------
/autocrit/optimizers.py:
--------------------------------------------------------------------------------
  1 | """Optimization algorithms using zeroth- and first-order oracles.
  2 | 
  3 | Includes gradient descent, momentum, and backtracking line search,
  4 | using either the standard Wolfe criterion or the Roosta criterion from
  5 | the paper on Newton-MR.
  6 | """
  7 | import autograd
  8 | import autograd.numpy as np
  9 | 
 10 | from autocrit.defaults import DEFAULT_ALPHA, DEFAULT_MOMENTUM
 11 | from autocrit.defaults import DEFAULT_BETA, DEFAULT_GAMMA, DEFAULT_RHO
 12 | 
 13 | 
 14 | class FirstOrderOptimizer(object):
 15 |     """Abstract Base Class for optimizers with a zeroth- and first-order oracle.
 16 | 
 17 |     If no first-order oracle is provided, it is computed from the zeroth-order
 18 |     oracle with autograd."""
 19 | 
 20 |     def __init__(self, f, grad_f):
 21 |         self.f = f
 22 |         if grad_f is None:
 23 |             self.grad_f = autograd.grad(f)
 24 |         else:
 25 |             self.grad_f = grad_f
 26 | 
 27 |     def run(self, init, num_iters):
 28 |         solution = np.copy(init)
 29 | 
 30 |         for _ in range(num_iters):
 31 |             solution += self.update(solution)
 32 | 
 33 |         return solution
 34 | 
 35 | 
 36 | class GradientDescentOptimizer(FirstOrderOptimizer):
 37 |     """FirstOrderOptimizer that uses scaled gradients to update."""
 38 | 
 39 |     def __init__(self, f, grad_f=None, lr=DEFAULT_ALPHA):
 40 |         super().__init__(f, grad_f)
 41 |         self.lr = lr
 42 | 
 43 |     def update(self, theta):
 44 |         return -self.lr * self.grad_f(theta)
 45 | 
 46 | 
 47 | class MomentumOptimizer(FirstOrderOptimizer):
 48 |     """FirstOrderOptimizer that maintains a 'velocity' term in addition to scaled gradients.
 49 | 
 50 |     If initial velocity is not provided in init_velocity, starts at 0.
 51 |     """
 52 | 
 53 |     def __init__(self, f, grad_f=None, lr=DEFAULT_ALPHA, momentum=DEFAULT_MOMENTUM,
 54 |                  init_velocity=None):
 55 |         super().__init__(f, grad_f)
 56 |         self.lr = lr
 57 |         self.momentum = momentum
 58 | 
 59 |         self.velocity = init_velocity
 60 | 
 61 |     def update(self, theta):
 62 |         if self.velocity is None:
 63 |             self.velocity = np.zeros_like(theta)
 64 |         self.velocity = self.grad_f(theta) + self.momentum * self.velocity
 65 |         update = -self.lr * self.velocity
 66 | 
 67 |         return update
 68 | 
 69 | 
 70 | class BackTrackingLineSearchOptimizer(FirstOrderOptimizer):
 71 |     """FirstOrderOptimizer that uses line search over the gradient direction.
 72 | 
 73 |     Can use either the traditional Wolfe criterion for terminating the line search
 74 |     or the new critertion from Roosta et al., 2018.
 75 |     """
 76 | 
 77 |     def __init__(self, f, grad_f=None, hvp=None,
 78 |                  alpha=DEFAULT_ALPHA, beta=DEFAULT_BETA,
 79 |                  rho=DEFAULT_RHO, gamma=None,
 80 |                  criterion="wolfe"):
 81 | 
 82 |         super().__init__(f, grad_f)
 83 |         self.set_criterion(criterion, gamma)
 84 | 
 85 |         self.alpha = alpha
 86 |         self.beta = beta
 87 |         self.rho = rho
 88 | 
 89 |         self.min_step_size = self.compute_min_step_size(self.alpha, self.beta)
 90 | 
 91 |         if hvp is None:
 92 |             self.hvp = autograd.hessian_vector_product(self.f)
 93 |         else:
 94 |             self.hvp = hvp
 95 | 
 96 |     def update(self, theta):
 97 |         update_direction = -self.grad_f(theta)
 98 |         converged = self.check_convergence(theta, update_direction)
 99 |         while not converged:
100 |             self.alpha *= self.beta
101 |             if self.alpha <= self.min_step_size:
102 |                 return np.zeros_like(theta)
103 |             converged = self.check_convergence(theta, update_direction)
104 |         step = self.alpha * update_direction
105 |         self.alpha /= self.beta
106 |         return step
107 | 
108 |     def set_criterion(self, criterion_str, gamma):
109 |         self.criterion_str = criterion_str
110 | 
111 |         if self.criterion_str is None:
112 |             return
113 | 
114 |         if self.criterion_str == "roosta":
115 |             self.check_convergence = self.roosta_criterion
116 |         elif self.criterion_str == "wolfe":
117 |             self.check_convergence = self.wolfe_criterion
118 |             if gamma is None:
119 |                 self.gamma = DEFAULT_GAMMA
120 |             else:
121 |                 self.gamma = gamma
122 |         else:
123 |             raise NotImplementedError
124 | 
125 |     def roosta_criterion(self, theta, update_direction):
126 |         proposed_update = theta + self.alpha * update_direction
127 |         updated_f = self.f(proposed_update)
128 |         current_f = self.f(theta)
129 | 
130 |         sufficient_decrease = 2 * self.rho * self.alpha * np.dot(
131 |             self.hvp(theta, update_direction).T, self.grad_f(theta))
132 | 
133 |         return (updated_f <=
134 |                 current_f + sufficient_decrease)
135 | 
136 |     def wolfe_criterion(self, theta, update_direction):
137 |         proposed_update = theta + self.alpha * update_direction
138 |         updated_f = self.f(proposed_update)
139 |         current_f = self.f(theta)
140 | 
141 |         current_grad = self.grad_f(theta)
142 |         grad_update_product = np.dot(update_direction.T, current_grad)
143 | 
144 |         new_grad = self.grad_f(proposed_update)
145 |         new_grad_update_product = np.dot(update_direction.T, new_grad)
146 | 
147 |         passed_armijo = updated_f <= current_f + self.rho * self.alpha * grad_update_product
148 | 
149 |         passed_curvature = -new_grad_update_product <= -self.gamma * grad_update_product
150 | 
151 |         return passed_armijo and passed_curvature
152 | 
153 |     @staticmethod
154 |     def compute_min_step_size(alpha, beta):
155 |         while alpha * beta != alpha:
156 |             alpha *= beta
157 |         return alpha
158 | 


--------------------------------------------------------------------------------
/autocrit/utils/random_matrix.py:
--------------------------------------------------------------------------------
  1 | import autograd.numpy as np
  2 | import scipy.integrate
  3 | 
  4 | PRECISION = 1e-4
  5 | 
  6 | 
  7 | class RandomMatrix(object):
  8 | 
  9 |     def __init__(self):
 10 |         self.symmetric = False
 11 |         return
 12 | 
 13 |     def eigvals(self):
 14 |         if self.symmetric:
 15 |             return np.linalg.eigvalsh(self.M)
 16 |         else:
 17 |             return np.linalg.eigvals(self.M)
 18 | 
 19 |     def expected_cumulative_spectral_distribution(self, lam, precision=PRECISION, accumulate=False):
 20 |         if lam < self.min_lam:
 21 |             return 0.
 22 | 
 23 |         lams = self.generate_lams(lam, precision)
 24 | 
 25 |         singular_mass = self.expected_spectral_singular_mass()
 26 |         density_values = [self.expected_spectral_density(lam) for lam in lams]
 27 |         if not accumulate:
 28 |             accumulated_density = scipy.integrate.trapz(density_values, lams)
 29 |             return ((lam >= 0) * singular_mass) + accumulated_density
 30 |         else:
 31 |             accumulated_densities = scipy.integrate.cumtrapz(density_values, lams)
 32 |             accumulated_masses = singular_mass * (lams[1:] >= 0) + accumulated_densities
 33 |             return accumulated_masses
 34 | 
 35 |     def __repr__(self):
 36 |         return self.M.__repr__()
 37 | 
 38 |     def display_expected_cumulative_spectral_distribution(
 39 |             self, ax, precision=PRECISION, **plot_kwargs):
 40 |         lams = self.generate_lams(self.max_lam + precision, precision)
 41 | 
 42 |         expected_csds = self.expected_cumulative_spectral_distribution(
 43 |             self.max_lam + precision, precision, accumulate=True)
 44 | 
 45 |         ax.plot(lams[1:], expected_csds, **plot_kwargs)
 46 | 
 47 |         return ax
 48 | 
 49 |     def generate_lams(self, lam, precision=PRECISION):
 50 |         return np.arange(self.min_lam - 2 * precision, lam + precision, precision)
 51 | 
 52 | 
 53 | class SymmetricWigner(RandomMatrix):
 54 | 
 55 |     def __init__(self, N):
 56 |         super().__init__()
 57 |         self.symmetric = True
 58 |         self.generate = self.generate_symmetric_gaussian
 59 |         self.M = self.generate(N)
 60 |         self.min_lam = -2.
 61 |         self.max_lam = 2.
 62 | 
 63 |     @staticmethod
 64 |     def generate_symmetric_gaussian(N):
 65 |         """generate an N by N symmetric gaussian random matrix with variance 1/N
 66 |         """
 67 |         base_matrix = SymmetricWigner.generate_gaussian(N)
 68 |         return (1 / np.sqrt(2)) * (base_matrix + base_matrix.T)
 69 | 
 70 |     @staticmethod
 71 |     def generate_gaussian(N):
 72 |         """generate an N by N gaussian random matrix with variance 1/N
 73 |         """
 74 |         return 1 / np.sqrt(N) * np.random.standard_normal(size=(N, N))
 75 | 
 76 |     def expected_spectral_singular_mass(self):
 77 |         return 0.
 78 | 
 79 |     def expected_spectral_density(self, lam):
 80 |         """Expected density for a symmetric gaussian random matrix with variance 1/N"""
 81 |         if lam > self.max_lam or lam < self.min_lam:
 82 |             return 0
 83 |         else:
 84 |             return 1 / (2 * np.pi) * np.sqrt(2 ** 2 - lam ** 2)
 85 | 
 86 | 
 87 | class Wishart(RandomMatrix):
 88 | 
 89 |     def __init__(self, N, k, negative=False):
 90 |         super().__init__()
 91 |         self.symmetric = True
 92 |         if negative:
 93 |             self.generate = self.generate_negative_wishart
 94 |             self.sign = -1
 95 |         else:
 96 |             self.generate = self.generate_wishart
 97 |             self.sign = 1
 98 | 
 99 |         self.N, self.k = N, k
100 |         self.M = self.generate(self.N, self.k)
101 |         self.sigma = 1.
102 | 
103 |         self.central_lam = self.sign * N / k
104 |         self.scaling_factor = 1 / (2 * np.pi * self.sigma ** 2)
105 | 
106 |         self.lam_plus = self.sigma ** 2 * self.sign * (1 + np.sqrt(self.central_lam)) ** 2
107 |         self.lam_minus = self.sigma ** 2 * self.sign * (1 - np.sqrt(self.central_lam)) ** 2
108 | 
109 |         if negative:
110 |             self.max_lam = 0.
111 |             self.min_lam = self.lam_plus
112 |         else:
113 |             self.max_lam = self.lam_plus
114 |             self.min_lam = 0.
115 | 
116 |         self.expected_spectral_density = self.marchenkopastur_density
117 | 
118 |     @staticmethod
119 |     def generate_wishart(N, k=1):
120 |         """generate an N by N wishart random matrix with rank min(N,k)
121 |         """
122 |         self_outer_product = lambda x: x.dot(x.T)
123 |         random_factor = np.random.standard_normal(size=(N, k))
124 |         wishart_random_matrix = 1 / k * self_outer_product(random_factor)
125 | 
126 |         return wishart_random_matrix
127 | 
128 |     @staticmethod
129 |     def generate_negative_wishart(N, k=1):
130 |         """generate an N by N negative wishart random matric with rank min(N,k)
131 |         """
132 |         wishart_random_matrix = Wishart.generate_wishart(N, k)
133 |         negative_wishart_random_matrix = -1 * wishart_random_matrix
134 | 
135 |         return negative_wishart_random_matrix
136 | 
137 |     def marchenkopastur_density(self, lam):
138 |         """the density for the non-singular portion of the marchenko-pastur distribution,
139 |         as given by https://en.wikipedia.org/wiki/Marchenko-Pastur_distribution.
140 |         """
141 | 
142 |         # density is 0 on real half-line opposite its sign
143 |         if np.sign(lam) != self.sign:
144 |             return 0
145 | 
146 |         # that handled, we can solve as though lam were positive, since density invariant
147 |         lam = np.abs(lam)
148 |         lam_minus = self.sign * self.lam_minus
149 |         lam_plus = self.sign * self.lam_plus
150 | 
151 |         if (lam > lam_minus and lam < lam_plus):
152 |             unscaled_density = np.sqrt(
153 |                 (lam_plus - lam) * (lam - lam_minus)) / (self.central_lam * lam)
154 |             return self.scaling_factor * unscaled_density
155 |         else:
156 |             return 0
157 | 
158 |     def expected_spectral_singular_mass(self):
159 |         return max(1 - self.k / self.N, 0)
160 | 
161 | 
162 | def generate_random_unit_vector(dim=25):
163 |     gauss_random_vector = np.atleast_2d(np.random.standard_normal(size=dim)).T
164 |     return gauss_random_vector / np.linalg.norm(gauss_random_vector)
165 | 


--------------------------------------------------------------------------------
/autocrit/nn/networks.py:
--------------------------------------------------------------------------------
  1 | """Provides neural networks composed of layers from nn.layers module.
  2 | 
  3 | Generic sequential networks are implemented by the Network class,
  4 | while the traditional fully-connected network is provided by FullyConnected.
  5 | """
  6 | from collections import namedtuple
  7 | import json
  8 | 
  9 | import autograd
 10 | from autograd import numpy as np
 11 | 
 12 | from . import layers as nn_layers
 13 | from autocrit.utils import random_matrix
 14 | from autocrit.utils import math
 15 | 
 16 | _LAYERS = nn_layers._LAYERS
 17 | 
 18 | _COSTS = {"mean_squared_error": math.mean_squared_error,
 19 |           "softmax_cross_entropy": math.softmax_cross_entropy}
 20 | 
 21 | 
 22 | def l2_regularizer(theta):
 23 |     return np.mean(np.square(theta))
 24 | 
 25 | 
 26 | def l1_regularizer(theta):
 27 |     return np.mean(np.abs(theta))
 28 | 
 29 | 
 30 | _REGULARIZERS = {"l2": l2_regularizer,
 31 |                  "l1": l1_regularizer,
 32 |                  "none": lambda x: 0.}
 33 | 
 34 | 
 35 | Data = namedtuple("Data", ['x', 'y'])
 36 | 
 37 | 
 38 | class Network(object):
 39 | 
 40 |     def __init__(self, data, layer_specs, cost_str="mean_squared_error",
 41 |                  regularizer_str="none", regularization_parameter=0.,
 42 |                  batch_size=None):
 43 |         if not isinstance(data, Data):
 44 |             try:
 45 |                 data = Data(x=data[0], y=data[1])
 46 |             except IndexError:
 47 |                 raise("data argument not understood")
 48 | 
 49 |         self.data = data
 50 | 
 51 |         if batch_size is None:
 52 |             self.batch_size = self.data.x.shape[-1]
 53 |         else:
 54 |             self.batch_size = batch_size
 55 | 
 56 |         self.cost_str = cost_str
 57 |         self.regularizer_str = regularizer_str
 58 | 
 59 |         self.cost = _COSTS[self.cost_str]
 60 |         self.regularizer = _REGULARIZERS[self.regularizer_str]
 61 | 
 62 |         self.regularization_parameter = regularization_parameter
 63 | 
 64 |         self.layer_specs = layer_specs
 65 |         self.layers = []
 66 |         for layer_spec in self.layer_specs:
 67 |             if not isinstance(layer_spec, nn_layers.Layer):
 68 |                 layer_constructor = _LAYERS[layer_spec["type"]]
 69 |                 layer = layer_constructor(**layer_spec["params"])
 70 |             else:
 71 |                 layer = layer_spec
 72 |             self.layers.append(layer)
 73 | 
 74 |         self.N_params, _ = self.build()
 75 | 
 76 |         self.grad = autograd.grad(self.loss)
 77 |         self.hess = autograd.hessian(self.loss)
 78 | 
 79 |     def loss(self, theta):
 80 |         return self.loss_on_batch(self.data.x, self.data.y, theta)
 81 | 
 82 |     def loss_on_batch(self, batch_x, batch_y, theta):
 83 |         return (self.cost(self.forward_pass(batch_x, theta), batch_y) +
 84 |                 self.regularization_parameter * self.regularizer(theta))
 85 | 
 86 |     def loss_on_random_batch(self, theta, batch_size=None):
 87 |         """Loss on a randomly selected batch of size batch_size.
 88 |         Defaults to self.batch_size, which itself defaults to full-batch.
 89 |         """
 90 |         if batch_size is None:
 91 |             batch_size = self.batch_size
 92 |         dataset_size = self.data.x.shape[-1]
 93 | 
 94 |         if dataset_size == batch_size:
 95 |             batch_x, batch_y = self.data.x, self.data.y
 96 |         else:
 97 |             batch_idxs = np.random.choice(dataset_size, size=batch_size)
 98 |             batch_x, batch_y = self.data.x[..., batch_idxs], self.data.y[..., batch_idxs]
 99 | 
100 |         return self.loss_on_batch(batch_x, batch_y, theta)
101 | 
102 |     def forward_pass(self, x, theta):
103 |         y = x
104 |         for layer in self.layers:
105 |             params = self.parser.get(theta, layer)
106 |             y = layer.forward_pass(y, params)
107 |         return y
108 | 
109 |     def build(self):
110 |         self.parser = nn_layers.ParamParser()
111 | 
112 |         shape = self.data.x.shape
113 |         for layer in self.layers:
114 |             N_params, shape = layer.build(shape)
115 |             self.parser.add_params(layer, (N_params))
116 | 
117 |         return self.parser.N, shape
118 | 
119 |     def to_json(self, filename):
120 |         dictionary = self.construct_dict()
121 |         with open(filename, "w") as f:
122 |             json.dump(dictionary, f)
123 | 
124 |     @classmethod
125 |     def from_json(cls, data, filename):
126 |         with open(filename) as f:
127 |             dictionary = json.load(f)
128 |         return cls(data, **dictionary)
129 | 
130 |     def construct_dict(self):
131 |         self.layer_dicts = [layer.to_dict() for layer in self.layers]
132 | 
133 |         return {"layer_specs": self.layer_dicts,
134 |                 "cost_str": self.cost_str,
135 |                 "regularizer_str": self.regularizer_str,
136 |                 "regularization_parameter": self.regularization_parameter,
137 |                 "batch_size": self.batch_size}
138 | 
139 |     def initialize(self):
140 |         return 1 / np.sqrt(self.N_params)  * np.random.standard_normal(size=[self.N_params, 1])
141 | 
142 | 
143 | class FullyConnected(Network):
144 | 
145 |     def __init__(self, data, layer_sizes, cost_str="mean_squared_error", nonlinearity_str="relu",
146 |                  regularizer_str="none", regularization_parameter=0., has_biases=True,
147 |                  batch_size=None):
148 |         self.layer_sizes = layer_sizes
149 |         self.has_biases = has_biases
150 |         self.nonlinearity_str = nonlinearity_str
151 |         layers = []
152 |         for layer_size in self.layer_sizes:
153 |             assert isinstance(layer_size, int)
154 |             layers.append(nn_layers.FCLayer(layer_size, self.has_biases))
155 |             layers.append(_LAYERS["pointwise_nonlinear"](self.nonlinearity_str))
156 | 
157 |         if self.has_biases:
158 |             self.num_biases = sum(self.layer_sizes)
159 |         else:
160 |             self.num_biases = 0
161 | 
162 |         Network.__init__(self, data, layers, cost_str, regularizer_str,
163 |                          regularization_parameter, batch_size=batch_size)
164 | 
165 |     def initialize(self, weight_kwargs=None, bias_kwargs=None):
166 |         if weight_kwargs is None:
167 |             weight_kwargs = {}
168 |         if bias_kwargs is None:
169 |             bias_kwargs = {}
170 | 
171 |         init_weights = self.initialize_weights(**weight_kwargs)
172 |         init_biases = self.initialize_biases(**bias_kwargs)
173 | 
174 |         return np.atleast_2d(np.concatenate([init_weights, init_biases])).T
175 | 
176 |     def initialize_weights(self):
177 |         in_sizes = [self.data.x.shape[0]] + self.layer_sizes[:-1]
178 |         out_sizes = self.layer_sizes
179 |         weight_matrices = [self.initialize_weight_matrix(in_size, out_size)
180 |                            for in_size, out_size in zip(in_sizes, out_sizes)]
181 | 
182 |         return np.concatenate([weight_matrix.ravel()
183 |                                for weight_matrix in weight_matrices])
184 | 
185 |     def initialize_biases(self, constant=0.01):
186 |         return np.asarray([constant] * self.num_biases)
187 | 
188 |     def initialize_weight_matrix(self, in_size, out_size):
189 |         weight_matrix = np.asarray([random_matrix.generate_random_unit_vector(dim=in_size)
190 |                                     for _ in range(out_size)]).squeeze()
191 |         return weight_matrix
192 | 
193 |     def construct_dict(self):
194 | 
195 |         return {"layer_sizes": self.layer_sizes,
196 |                 "cost_str": self.cost_str,
197 |                 "nonlinearity_str": self.nonlinearity_str,
198 |                 "regularizer_str": self.regularizer_str,
199 |                 "regularization_parameter": self.regularization_parameter,
200 |                 "has_biases": self.has_biases}
201 | 


--------------------------------------------------------------------------------
/autocrit/experiments.py:
--------------------------------------------------------------------------------
  1 | """Provides Experiment objects, which apply an optimization algorithm
  2 | or a critical point-finding algorithm to a function.
  3 | 
  4 | If the function is autograd-differentiable, the gradient oracle is
  5 | computed automatically. If that's insufficient, a gradient oracle can be
  6 | directly provided as the grad_f argument.
  7 | """
  8 | import json
  9 | import random
 10 | 
 11 | import autograd
 12 | import autograd.numpy as np
 13 | 
 14 | from . import finders
 15 | from . import nn
 16 | from . import optimizers
 17 | 
 18 | SEED = 14
 19 | 
 20 | _NETWORK_INITS = {"fullyconnected": nn.networks.FullyConnected}
 21 | 
 22 | _OPTIMIZERS = {"gd": optimizers.GradientDescentOptimizer,
 23 |                "momentum": optimizers.MomentumOptimizer,
 24 |                "btls": optimizers.BackTrackingLineSearchOptimizer}
 25 | 
 26 | _FINDER_INITS = {"newtonMR": finders.newtons.FastNewtonMR,
 27 |                  "newtonTR": finders.newtons.FastNewtonTR,
 28 |                  "gnm": finders.gradnormmin.GradientNormMinimizer}
 29 | 
 30 | DEFAULT_LOG_KWARGS = {"track_theta": True, "track_f": True, "track_grad_f": False}
 31 | 
 32 | 
 33 | class Experiment(object):
 34 |     """Abstract base class for OptimizationExperiments and CritFinderExperiments.
 35 | 
 36 |     Concrete classes should implement a .run method that executes the experiment
 37 |     and stores the results of runs in self.runs, a list. These should be save-able
 38 |     into .npz format by np.savez.
 39 | 
 40 |     They should further implement a construct_dictionary method that saves
 41 |     all of the relevant arguments necessary for a constructor call as a dictionary
 42 |     that can be written to a .json file. These .json files are used to reconstruct
 43 |     experiments and their components.
 44 |     """
 45 | 
 46 |     def __init__(self, seed=None):
 47 |         """
 48 |         Parameters
 49 |         ----------
 50 | 
 51 |         seed : int or None, default is None
 52 |             Seeding value for random and np.random.
 53 |             If None, defaults to global variable SEED.
 54 |         """
 55 |         if seed is None:
 56 |             self.seed = SEED
 57 |         else:
 58 |             self.seed = seed
 59 | 
 60 |         self.runs = []
 61 | 
 62 |     def to_json(self, filename):
 63 |         dictionary = self.construct_dictionary()
 64 | 
 65 |         with open(filename, "w") as f:
 66 |             json.dump(dictionary, f)
 67 | 
 68 |     def save_results(self, filename):
 69 |         results_dict = self.runs[-1]
 70 |         np.savez(filename, **results_dict)
 71 | 
 72 |     def construct_dictionary(self):
 73 |         raise NotImplementedError
 74 | 
 75 | 
 76 | class OptimizationExperiment(Experiment):
 77 |     """Concrete Experiment that performs optimization on a function.
 78 |     """
 79 | 
 80 |     def __init__(self, f, grad_f=None, optimizer_str="gd", optimizer_kwargs=None,
 81 |                  log_kwargs=None, seed=None):
 82 |         """Create an OptimizationExperiment on callable f according to kwargs.
 83 | 
 84 |         Parameters
 85 |         ----------
 86 | 
 87 |         f : callable
 88 |             Function to optimize. Should require only parameters as input.
 89 |             For stochastic functions, e.g. for stochastic gradient descent,
 90 |             function must perform batching.
 91 | 
 92 |         grad_f : callable or None, default is None
 93 |             A gradient oracle for f. If None, autograd.grad is called on f.
 94 | 
 95 |         optimizer_str : str
 96 |             String to key into _OPTIMIZERS. Default is "gd", which is
 97 |             optimizers.gradient_descent.
 98 | 
 99 |         optimizer_kwargs : dict or None, default is None
100 |             A dictionary of keyword arguments for the optimizer selected with
101 |             optimizer_str. See optimizers for call signatures.
102 | 
103 |         log_kwargs : dict or None, default is None
104 |             A dictionary of keyword arguments for the log_run method, which
105 |             determines which features of the run are saved. If None,
106 |             DEFAULT_LOG_KWARGS is used. See log_run for details.
107 | 
108 |         seed : int or None, default is None
109 |             Seeding value for random and np.random.
110 |             If None, defaults to global variable SEED.
111 |         """
112 |         Experiment.__init__(self, seed=seed)
113 | 
114 |         if log_kwargs is None:
115 |             self.log_kwargs = DEFAULT_LOG_KWARGS.copy()
116 |         else:
117 |             self.log_kwargs = log_kwargs
118 | 
119 |         self.f = f
120 |         self.grad_f = grad_f
121 | 
122 |         if self.grad_f is None:
123 |             self.grad_f = autograd.grad(f)
124 | 
125 |         self.optimizer_str = optimizer_str
126 |         self.optimizer = _OPTIMIZERS[self.optimizer_str]
127 | 
128 |         if optimizer_kwargs is None:
129 |             self.optimizer_kwargs = {}
130 |         else:
131 |             self.optimizer_kwargs = optimizer_kwargs
132 | 
133 |         self.optimizer = _OPTIMIZERS[self.optimizer_str](
134 |             self.f, self.grad_f, **self.optimizer_kwargs)
135 | 
136 |     def run(self, init_theta, num_iters=1, seed=None):
137 |         """Execute optimizer on self.f, starting with init_theta, for num_iters.
138 | 
139 |         Includes optional SEED argument to allow for stochastic behavior
140 |         of stochastic functions f.
141 |         Warning: this does not guarantee that f is non-stochastic across calls.
142 |         """
143 |         if seed is None:
144 |             seed = self.seed
145 |         np.random.seed(seed)
146 |         random.seed(seed)
147 | 
148 |         empty_run = {"theta": [],
149 |                      "f_theta": [],
150 |                      "grad_f_theta": [],
151 |                      "g_theta": []}
152 |         self.runs.append(empty_run)
153 | 
154 |         theta = init_theta
155 |         self.log_step(theta, **self.log_kwargs)
156 | 
157 |         for _ in range(num_iters):
158 |             theta = theta + self.optimizer.update(theta)
159 |             self.log_step(theta, **self.log_kwargs)
160 | 
161 |         return theta
162 | 
163 |     def log_step(self, theta,
164 |                  track_theta=False, track_f=False, track_grad_f=False, track_g=False):
165 |         """Append selected values to run dictionary
166 |         """
167 |         run = self.runs[-1]
168 |         if track_theta:
169 |             run["theta"].append(theta)
170 |         if track_f:
171 |             run["f_theta"].append(self.f(theta))
172 |         if track_grad_f:
173 |             run["grad_f_theta"].append(self.grad_f(theta))
174 |         if track_g:
175 |             run["g_theta"].append(0.5 * np.sum(np.square(self.grad_f(theta))))
176 | 
177 |     @classmethod
178 |     def from_json(cls, f, filename, grad_f=None):
179 |         """Given a function and possibly a gradient oracle and the path to a .json file,
180 |         creates an OptimizationExperiment on f using kwargs in the .json file.
181 |         """
182 |         with open(filename) as fn:
183 |             dictionary = json.load(fn)
184 | 
185 |         return cls(f, grad_f, **dictionary)
186 | 
187 |     def construct_dictionary(self):
188 |         """Construct a dictionary containing necessary information for
189 |         reconstructing OptimizationExperiment when combined with self.f.
190 | 
191 |         See OptimizationExperiment.from_json for details.
192 |         """
193 |         return {"optimizer_str": self.optimizer_str,
194 |                 "optimizer_kwargs": self.optimizer_kwargs,
195 |                 "log_kwargs": self.log_kwargs,
196 |                 "seed": self.seed}
197 | 
198 | 
199 | class CritFinderExperiment(Experiment):
200 |     """Concrete Experiment that finds critical points on a function.
201 |     """
202 | 
203 |     def __init__(self, f, finder_str, finder_kwargs=None):
204 |         """
205 | 
206 |         Parameters
207 |         ----------
208 | 
209 |         f : callable
210 |             Function to search on. Should require only parameters as input.
211 |             For stochastic functions, function must perform batching.
212 | 
213 |         finder_str : str
214 |             String to key into _FINDER_INITS. Identifies the critical point-
215 |             finding algorithm to use.
216 | 
217 |         finder_kwargs: dict or None, default is None
218 |             Dictionary with keyword arguments to provide to self.finder_init.
219 |             If None, an empty dictionary is used.
220 | 
221 |         seed : int or None, default is None
222 |             Seeding value for random and np.random.
223 |             If None, defaults to global variable SEED.
224 |         """
225 |         Experiment.__init__(self)
226 |         self.f = f
227 | 
228 |         self.finder_str = finder_str
229 | 
230 |         if finder_kwargs is None:
231 |             self.finder_kwargs = {}
232 |         else:
233 |             self.finder_kwargs = finder_kwargs
234 | 
235 |         if "log_kwargs" not in self.finder_kwargs.keys():
236 |             self.finder_kwargs.update({"log_kwargs": DEFAULT_LOG_KWARGS.copy()})
237 | 
238 |         self.finder_init = _FINDER_INITS[self.finder_str]
239 | 
240 |         self.finder = self.finder_init(self.f, **self.finder_kwargs)
241 | 
242 |     def run(self, init_theta, num_iters=1, seed=None):
243 |         """Execute finder on self.f, starting with init_theta, for num_iters.
244 |         """
245 |         if seed is None:
246 |             seed = self.seed
247 |         np.random.seed(seed)
248 |         random.seed(seed)
249 | 
250 |         self.finder.log = {}
251 |         thetas = self.finder.run(init_theta, num_iters)
252 |         self.runs.append(self.finder.log)
253 |         return thetas
254 | 
255 |     @classmethod
256 |     def from_json(cls, f, filename):
257 |         """Given a function f and the path to a .json file,
258 |         creates a CritFinderExperiment for f using kwargs in the .json file.
259 |         """
260 |         with open(filename) as fn:
261 |             dictionary = json.load(fn)
262 | 
263 |         return cls(f, **dictionary)
264 | 
265 |     def construct_dictionary(self):
266 |         """Construct a dictionary containing necessary information for
267 |         reconstructing CritFinderExperiment when combined with self.f.
268 | 
269 |         See CritFinderExperiment.from_json for details.
270 |         """
271 |         dictionary = {"finder_kwargs": self.finder_kwargs,
272 |                       "finder_str": self.finder_str}
273 |         return dictionary
274 | 
275 |     def uniform(self, thetas):
276 |         """Select a theta at random from list thetas.
277 |         """
278 |         return random.choice(thetas)
279 | 
280 |     def uniform_f(self, thetas):
281 |         """Select a theta from thetas uniformly across values of self.f.
282 | 
283 |         This can be slow. Overwrite this method by calling freeze_uniform_f
284 |         if this function needs to be called multiple times.
285 |         """
286 |         return self.uniform_cd(*self.sort_and_calculate_cds(thetas, self.f))
287 | 
288 |     def freeze_uniform_f(self, thetas):
289 |         """Overwrites self.uniform_f with a function that has pre-computed
290 |         the sorted version of thetas and the cumulative densities, supporting
291 |         much faster random selection.
292 |         """
293 |         sorted_thetas, cds = self.sort_and_calculate_cds(thetas, self.f)
294 |         self.uniform_f = lambda thetas: self.uniform_cd(sorted_thetas, cds)
295 | 
296 |     @staticmethod
297 |     def sort_and_calculate_cds(thetas, f):
298 |         f_thetas = [f(theta) for theta in thetas]
299 |         min_f, max_f = min(f_thetas), max(f_thetas)
300 |         cds = [(f_theta - min_f) / (max_f - min_f) for f_theta in f_thetas]
301 |         thetas, cds = zip(*sorted(zip(thetas, cds), key=lambda tup: tup[1]))
302 |         return thetas, cds
303 | 
304 |     @staticmethod
305 |     def uniform_cd(sorted_thetas, cds):
306 |         """Select randomly from sorted_thetas with respect to the cumulative
307 |         density implied by cds, an equal-length list of cumulative density values
308 |         for each element in sorted_thetas.
309 |         """
310 |         rand_cd = random.uniform(0, 1)
311 |         idx = next(filter(lambda tup: tup[1] >= rand_cd, enumerate(cds)))[0]
312 |         return sorted_thetas[idx]
313 | 


--------------------------------------------------------------------------------
/autocrit/finders/newtons.py:
--------------------------------------------------------------------------------
  1 | """Provides Newton-style methods for finding critical points.
  2 | """
  3 | # import warnings
  4 | 
  5 | import autograd
  6 | import autograd.numpy as np
  7 | 
  8 | from .minresQLP import MinresQLP as mrqlp
  9 | 
 10 | from .base import Finder, Logger
 11 | from ..defaults import DEFAULT_STEP_SIZE, DEFAULT_RTOL, DEFAULT_MAXIT
 12 | from ..defaults import DEFAULT_ALPHA, DEFAULT_BETA, DEFAULT_GAMMAS, DEFAULT_RHO, DEFAULT_RHO_PURE
 13 | 
 14 | DEFAULT_ACONDLIM = 1e7
 15 | DEFAULT_MAXXNORM = 1e4
 16 | DEFAULT_TRANCOND = 1e4
 17 | 
 18 | 
 19 | class NewtonMethod(Finder):
 20 |     """Base version of Newton method for finding critical points.
 21 | 
 22 |     All Newton methods are run the same way: select an update direction or directions,
 23 |     and then the current value of theta and the update direction(s) are used to select an update.
 24 | 
 25 |     Those two steps are implemented here as the methods get_update_direction,
 26 |     which inverts the Hessian and multiplies it with the negative gradient,
 27 |     and select_update, which scales the result by the step_size.
 28 | 
 29 |     Additional Newton methods are defined by over-riding those two methods.
 30 |     """
 31 | 
 32 |     def __init__(self, f, step_size=DEFAULT_STEP_SIZE, log_kwargs=None):
 33 |         Finder.__init__(self, f, log_kwargs=log_kwargs)
 34 | 
 35 |         self.step_size = step_size
 36 | 
 37 |         self.parameters = {"step_size": step_size}
 38 | 
 39 |     def run(self, init_theta, num_iters=1):
 40 |         theta = init_theta
 41 |         self.update_logs({"theta": theta,
 42 |                           "update_direction": None,
 43 |                           "parameters": self.parameters})
 44 | 
 45 |         for ii in range(num_iters):
 46 | 
 47 |             update_direction = self.get_update_direction(theta)
 48 |             theta_new = self.select_update(theta, update_direction)
 49 | 
 50 |             self.update_logs({"theta": theta_new,
 51 |                               "update_direction": update_direction,
 52 |                               "parameters": self.parameters})
 53 | 
 54 |             if np.array_equal(theta, theta_new):
 55 |                 return theta
 56 | 
 57 |             theta = theta_new
 58 | 
 59 |         return theta
 60 | 
 61 |     def get_update_direction(self, theta):
 62 |         """Compute an update direction using the classic Newton-Raphson method:
 63 |         compute the Hessian at theta, invert it explicitly, and then apply that matrix
 64 |         to the negative gradient.
 65 |         """
 66 |         update_direction = -np.linalg.inv(self.H(theta)).dot(self.grad_f(theta))
 67 |         return update_direction
 68 | 
 69 |     def select_update(self, theta, update_direction):
 70 |         """Select the update along update direction using a fixed step size.
 71 |         """
 72 |         return theta + self.step_size * update_direction
 73 | 
 74 |     def squared_grad_norm(self, theta):
 75 |         return np.sum(np.square(self.grad_f(theta)))
 76 | 
 77 | 
 78 | class NewtonPI(NewtonMethod):
 79 |     """Newton method that uses Moore-Penrose pseudo-inversion of the Hessian instead of
 80 |     classic inversion, for use in problems with singular Hessians.
 81 |     """
 82 | 
 83 |     def __init__(self, f, step_size=DEFAULT_STEP_SIZE, log_kwargs=None):
 84 |         NewtonMethod.__init__(self, f, step_size=step_size, log_kwargs=log_kwargs)
 85 |         self.pinv = np.linalg.pinv
 86 | 
 87 |     def get_update_direction(self, theta):
 88 |         update_direction = -self.pinv(self.H(theta)).dot(self.grad_f(theta))
 89 |         return update_direction
 90 | 
 91 | 
 92 | class NewtonBTLS(NewtonMethod):
 93 |     """Newton method that uses back-tracking line search to select the update.
 94 |     Convergence is checked using the Roosta criterion.
 95 |     """
 96 | 
 97 |     def __init__(self, f, alpha=DEFAULT_ALPHA, beta=DEFAULT_BETA, rho=DEFAULT_RHO,
 98 |                  check_pure=False, rho_pure=DEFAULT_RHO_PURE, log_kwargs=None):
 99 |         NewtonMethod.__init__(self, f, log_kwargs=log_kwargs)
100 |         self.alpha = alpha
101 |         self.beta = beta
102 |         self.rho = rho
103 | 
104 |         self.check_pure = check_pure
105 |         self.rho_pure = rho_pure
106 |         self.pure_accepted = False
107 | 
108 |         self.parameters.update({"alpha": self.alpha,
109 |                                 "pure_accepted": self.pure_accepted})
110 | 
111 |         self.loggers.append(
112 |             Logger("alpha",
113 |                    lambda step_info: step_info["parameters"]["alpha"]))
114 | 
115 |         if self.check_pure:
116 |             self.loggers.append(
117 |                 Logger("pure_accepted",
118 |                        lambda step_info: step_info["parameters"]["pure_accepted"]))
119 | 
120 |         self.min_step_size = self.compute_min_step_size(alpha, beta)
121 | 
122 |     def select_update(self, theta, update_direction):
123 |         if self.check_pure and self.alpha != 1:
124 |             converged = self.check_convergence(theta, update_direction, 1., self.rho_pure)
125 |             if converged:
126 |                 self.alpha = 1.
127 |                 self.pure_accepted = True
128 |         else:
129 |             converged = False
130 |             self.pure_accepted = False
131 | 
132 |         while not converged:
133 |             converged = self.check_convergence(theta, update_direction, self.alpha, self.rho)
134 | 
135 |             if not converged:
136 |                 self.alpha *= self.beta
137 |                 if self.alpha <= self.min_step_size:
138 |                     return np.zeros_like(theta)
139 | 
140 |         update = theta + self.alpha * update_direction
141 | 
142 |         self.parameters.update(
143 |             {"alpha": self.alpha,
144 |              "pure_accepted": self.pure_accepted})
145 | 
146 |         self.alpha = min(1., self.alpha / self.beta)
147 |         return update
148 | 
149 |     def check_convergence(self, theta, update_direction, alpha, rho):
150 |         proposed_update = theta + alpha * update_direction
151 |         updated_squared_gradient_norm = self.squared_grad_norm(proposed_update)
152 |         current_squared_gradient_norm = self.squared_grad_norm(theta)
153 |         sufficient_decrease = 2 * rho * alpha * np.dot(self.hvp(theta, update_direction).T,
154 |                                                        self.grad_f(theta))
155 | 
156 |         return (updated_squared_gradient_norm <=
157 |                 current_squared_gradient_norm + sufficient_decrease)
158 | 
159 |     @staticmethod
160 |     def compute_min_step_size(alpha, beta):
161 |         while alpha * beta != alpha:
162 |             alpha *= beta
163 |         return alpha
164 | 
165 | 
166 | class NewtonMR(NewtonBTLS):
167 |     """Newton method that uses MRQLP to approximately compute the update direction
168 |     and back-tracking line search to select the update.
169 |     """
170 | 
171 |     def __init__(self, f, alpha=DEFAULT_ALPHA, beta=DEFAULT_BETA, rho=DEFAULT_RHO,
172 |                  check_pure=False, rho_pure=DEFAULT_RHO_PURE,
173 |                  rtol=DEFAULT_RTOL, maxit=DEFAULT_MAXIT,
174 |                  acondlim=DEFAULT_ACONDLIM, trancond=DEFAULT_TRANCOND,
175 |                  maxxnorm=DEFAULT_MAXXNORM,
176 |                  log_mrqlp=False, log_kwargs=None):
177 |         NewtonBTLS.__init__(self, f, alpha, beta, rho, check_pure, rho_pure,
178 |                             log_kwargs=log_kwargs)
179 |         self.rtol = rtol
180 |         self.maxit = maxit
181 |         self.acondlim = acondlim
182 |         self.trancond = trancond
183 |         self.maxxnorm = maxxnorm
184 | 
185 |         self.parameters.update({"rtol": rtol,
186 |                                 "maxit": maxit,
187 |                                 "acondlim": acondlim,
188 |                                 "trancond": trancond,
189 |                                 "maxxnorm": maxxnorm})
190 | 
191 |         self.log_mrqlp = log_mrqlp
192 | 
193 |         if self.log_mrqlp:
194 |             self.loggers.append(
195 |                 Logger("mrqlp_outputs",
196 |                        lambda step_info: step_info["parameters"]["mrqlp_outputs"]))
197 |             self.parameters.update({"mrqlp_outputs": None})
198 | 
199 |     def get_update_direction(self, theta):
200 |         current_hvp = lambda v: self.hvp(theta, v)
201 |         mrqlp_outputs = mrqlp(
202 |             current_hvp, -1 * self.grad_f(theta),
203 |             rtol=self.rtol, maxit=self.maxit,
204 |             acondlim=self.acondlim, trancond=self.trancond, maxxnorm=self.maxxnorm)
205 | 
206 |         self.parameters.update({"mrqlp_outputs": mrqlp_outputs[1:]})
207 |         mr_update_direction = mrqlp_outputs[0]
208 | 
209 |         return mr_update_direction
210 | 
211 | 
212 | class FastNewtonMR(NewtonMR):
213 |     """Newton method that uses MRQLP to approximately compute the update direction.
214 |     Makes use of fast Hessian-vector products.
215 |     """
216 | 
217 |     def __init__(self, f, alpha=DEFAULT_ALPHA, beta=DEFAULT_BETA, rho=DEFAULT_RHO,
218 |                  check_pure=False, rho_pure=DEFAULT_RHO_PURE,
219 |                  rtol=DEFAULT_RTOL, maxit=DEFAULT_MAXIT,
220 |                  acondlim=DEFAULT_ACONDLIM, trancond=DEFAULT_TRANCOND,
221 |                  maxxnorm=DEFAULT_MAXXNORM,
222 |                  log_mrqlp=False, log_kwargs=None):
223 |         NewtonMR.__init__(self, f, alpha, beta, rho, check_pure, rho_pure,
224 |                           rtol=rtol, maxit=maxit, acondlim=acondlim,
225 |                           maxxnorm=maxxnorm, trancond=trancond,
226 |                           log_mrqlp=log_mrqlp, log_kwargs=log_kwargs)
227 |         self.hvp = autograd.hessian_vector_product(self.f)
228 | 
229 | 
230 | class NewtonTR(NewtonPI):
231 |     """Newton method that computes a sequence of proposed updates using the pseudo-inverse of
232 |     a sequence of perturbed versions of the Hessian. The perturbations are diagonal matrices with
233 |     varying values gamma. Equivalent to a trust region approach.
234 |     """
235 | 
236 |     def __init__(self, f, gammas=DEFAULT_GAMMAS, step_size=DEFAULT_STEP_SIZE, log_kwargs=None):
237 |         NewtonPI.__init__(self, f, step_size=step_size, log_kwargs=log_kwargs)
238 |         self.gammas = gammas
239 |         self.Hs = [lambda theta: self.H(theta) + np.diag([gamma] * theta.shape[0])
240 |                    for gamma in gammas]
241 | 
242 |         self.parameters.update({"gammas": gammas})
243 | 
244 |     def get_update_direction(self, theta):
245 |         update_directions = []
246 | 
247 |         for H in self.Hs:
248 |             update_directions.append(-self.pinv(H(theta))
249 |                                      .dot(self.grad_f(theta)))
250 | 
251 |         return update_directions
252 | 
253 |     def select_update(self, theta, update_directions):
254 |         best_update = theta
255 |         best_grad_norm = self.squared_grad_norm(best_update)
256 |         for update_direction in update_directions:
257 |             proposed_update = theta + self.step_size * update_direction
258 |             if self.squared_grad_norm(proposed_update) < best_grad_norm:
259 |                 best_update = proposed_update
260 | 
261 |         return best_update
262 | 
263 | 
264 | class FastNewtonTR(NewtonTR):
265 |     """Newton method that computes a sequence of proposed updates by applying MRQLP to
266 |     a sequence of perturbed versions of the Hessian. The perturbations are diagonal matrices with
267 |     varying values gamma. Equivalent to a trust region approach.
268 |     Makes use of fast Hessian-vector products.
269 |     """
270 | 
271 |     def __init__(self, f, gammas=DEFAULT_GAMMAS, step_size=DEFAULT_STEP_SIZE, log_kwargs=None,
272 |                  rtol=DEFAULT_RTOL, maxit=DEFAULT_MAXIT):
273 |         NewtonTR.__init__(self, f, gammas, step_size=step_size, log_kwargs=log_kwargs)
274 |         self.rtol = rtol
275 |         self.maxit = maxit
276 | 
277 |         self.hvps = [lambda theta, v: autograd.hessian_vector_product(self.f)(theta, v) +
278 |                      np.sum(gamma * theta) for gamma in gammas]
279 | 
280 |     def get_update_direction(self, theta):
281 |         update_directions = []
282 |         current_hvps = [lambda v: hvp(theta, v) for hvp in self.hvps]
283 | 
284 |         for current_hvp in current_hvps:
285 |             mr_update_direction = mrqlp(current_hvp, -1 * self.grad_f(theta),
286 |                                         rtol=self.rtol, maxit=self.maxit)[0]
287 |             update_directions.append(mr_update_direction)
288 | 
289 |         return update_directions
290 | 


--------------------------------------------------------------------------------
/autocrit/nn/layers.py:
--------------------------------------------------------------------------------
  1 | # modified from code in autograd/examples/convnet.py
  2 | 
  3 | import autograd.numpy as np
  4 | 
  5 | from autocrit.nn.conv import convolve, torch_accelerated
  6 | from autocrit.utils import math
  7 | 
  8 | _NONLINEARITIES = {"relu": math.relu,
  9 |                    "sigmoid": math.sigmoid,
 10 |                    "softplus": math.softplus,
 11 |                    "swish": math.swish,
 12 |                    "none": lambda x: x}
 13 | 
 14 | 
 15 | class ParamParser(object):
 16 |     """A helper class to index into a parameter vector."""
 17 |     def __init__(self):
 18 |         self.idxs_and_shapes = {}
 19 |         self.N = 0
 20 | 
 21 |     def add_params(self, name, shape):
 22 |         start = self.N
 23 |         self.N += np.prod(shape)
 24 |         self.idxs_and_shapes[name] = (slice(start, self.N), shape)
 25 | 
 26 |     def get(self, vect, name):
 27 |         idxs, shape = self.idxs_and_shapes[name]
 28 |         return np.reshape(vect[idxs], shape)
 29 | 
 30 | 
 31 | class Layer(object):
 32 |     """A Layer implements two methods:
 33 |     forward_pass, which takes inputs and a parameter vector and returns outputs,
 34 |     and build, which takes the input_shape and computes the number of
 35 |     parameters and the shape of the outputs, optionally also
 36 |     using a ParamsParser to track those parameters.
 37 |     """
 38 | 
 39 |     def __init__(self):
 40 |         pass
 41 | 
 42 |     def to_batch_major(self, inputs):
 43 |         """Reorder [y, x, channels, batch]
 44 |            to      [batch, channels, y, x]
 45 |         """
 46 |         return np.moveaxis(inputs, [0, 1, 2, 3], [2, 3, 1, 0])
 47 | 
 48 |     def to_batch_minor(self, inputs):
 49 |         """Reorder [batch, channels, y, x]
 50 |            to      [y, x, channels, batch]
 51 |         """
 52 |         return np.moveaxis(inputs, [0, 1, 2, 3], [3, 2, 0, 1])
 53 | 
 54 |     def forward_pass(self, inputs, theta):
 55 |         raise NotImplementedError
 56 | 
 57 |     def build(self, input_shape):
 58 |         raise NotImplementedError
 59 | 
 60 |     def to_dict(self, str, params):
 61 |         """Convert Layer to a dictionary representation.
 62 |         """
 63 |         return {"type": str, "params": params}
 64 | 
 65 | 
 66 | class PointwiseNonlinearLayer(Layer):
 67 |     """Layer for applying the same nonlinear function to each node,
 68 |     aka pointwise.
 69 | 
 70 |     Any callable can be provided as the nonlinearity, but the layer
 71 |     can only be represented by a dictionary if the nonlinearity is provided
 72 |     as a string, used to key into the _NONLINEARITIES dictionary.
 73 |     """
 74 |     str = "pointwise_nonlinear"
 75 | 
 76 |     def __init__(self, nonlinearity):
 77 |         """
 78 |         Parameters
 79 |         ----------
 80 |         nonlinearity: str or callable. pointwise nonlinear transformation.
 81 |             if is a str instance, used to key into _NONLINEARITIES dictionary.
 82 |             if is callable, directly called as function applied by this layer.
 83 |             it is assumed but not checked that this function doesn't change the shape.
 84 |         """
 85 |         if isinstance(nonlinearity, str):
 86 |             self.nonlinearity_str = nonlinearity
 87 |             nonlinearity = _NONLINEARITIES[nonlinearity]
 88 |         else:
 89 |             assert callable(nonlinearity)
 90 |         self.nonlinearity = nonlinearity
 91 | 
 92 |     def forward_pass(self, inputs, theta):
 93 |         return self.nonlinearity(inputs)
 94 | 
 95 |     def build(self, input_shape):
 96 |         return 0, input_shape
 97 | 
 98 |     def to_dict(self):
 99 |         assert hasattr(self, "nonlinearity_str"), "can't save nonlinear layer without str"
100 |         params = {"nonlinearity": self.nonlinearity_str}
101 |         return super().to_dict(self.str, params)
102 | 
103 | 
104 | class FCLayer(Layer):
105 |     """Layer for applying an affine transformation to the inputs.
106 |     """
107 |     str = "fc"
108 | 
109 |     def __init__(self, out_nodes, has_biases=True):
110 |         """
111 |         Parameters
112 |         ----------
113 |         out_nodes: int, number of nodes in the output layer.
114 |         has_biases: bool, if False, linear transform. otherwise affine.
115 |         """
116 |         self.out_nodes = out_nodes
117 |         self.has_biases = has_biases
118 | 
119 |     def forward_pass(self, inputs, theta):
120 |         W = self.parser.get(theta, 'weights')
121 |         if self.has_biases:
122 |             b = self.parser.get(theta, 'biases')
123 |         else:
124 |             b = 0.
125 |         activations = np.dot(W, inputs) + b
126 |         return activations
127 | 
128 |     def build(self, input_shape):
129 |         self.parser = ParamParser()
130 |         self.parser.add_params('weights', (self.out_nodes, input_shape[0]))
131 |         if self.has_biases:
132 |             self.parser.add_params('biases', (self.out_nodes, 1))
133 |         output_shape = (self.out_nodes, 1)
134 | 
135 |         return self.parser.N, output_shape
136 | 
137 |     def to_dict(self):
138 |         params = {"out_nodes": self.out_nodes,
139 |                   "has_biases": self.has_biases}
140 |         return super().to_dict(self.str, params)
141 | 
142 | 
143 | class ConvLayer(Layer):
144 |     """Layer for applying a valid 2D convolution to inputs.
145 |     """
146 |     str = "conv"
147 | 
148 |     def __init__(self, kernel_shape, out_channels, accelerated=torch_accelerated):
149 |         """
150 |         Parameters
151 |         ----------
152 |         kernel_shape: tuple of ints, shape of convolutional kernel
153 |         out_channels: int, number of output channels aka convolutional kernels
154 |         accelerated: Boolean, use pytorch acceleration, if available. See conv.py
155 |         """
156 |         self.kernel_shape = kernel_shape
157 |         self.out_channels = out_channels
158 |         self.accelerated = accelerated
159 | 
160 |     def forward_pass(self, inputs, theta):
161 |         weights = self.parser.get(theta, 'weights')
162 |         biases = self.parser.get(theta, 'biases')
163 |         inputs = self.to_batch_major(inputs)
164 |         conv = convolve(inputs, weights,
165 |                         axes=([2, 3], [2, 3]), dot_axes=([1], [0]),
166 |                         mode='valid', accelerated=self.accelerated)
167 |         activations = conv + biases
168 |         activations = self.to_batch_minor(activations)
169 |         return activations
170 | 
171 |     def build(self, input_shape):
172 |         self.parser = ParamParser()
173 |         self.parser.add_params('weights', (input_shape[-2], self.out_channels) +
174 |                                self.kernel_shape)
175 |         self.parser.add_params('biases', (1, self.out_channels, 1, 1))
176 |         output_shape = self.conv_output_shape(input_shape[:-1], self.kernel_shape) +\
177 |             (self.out_channels, input_shape[-1])
178 |         return self.parser.N, output_shape
179 | 
180 |     def conv_output_shape(self, A, B):
181 |         return (A[0] - B[0] + 1, A[1] - B[1] + 1)
182 | 
183 |     def to_dict(self):
184 |         params = {"kernel_shape": self.kernel_shape,
185 |                   "out_channels": self.out_channels}
186 |         return super().to_dict(self.str, params)
187 | 
188 | 
189 | class PoolLayer(Layer):
190 |     """Abstract class for Layers that applying pooling: summarizing
191 |     a block of values in a feature map with a single number.
192 | 
193 |     Pooling shapes must evenly tile inputs.
194 |     """
195 | 
196 |     def __init__(self, pool_shape):
197 |         """
198 |         Parameters
199 |         ----------
200 |         pool_shape: tuple of ints, shape of pooling kernel
201 |         """
202 |         self.pool_shape = pool_shape
203 | 
204 |     def forward_pass(self, inputs, theta):
205 |         patches = self.to_patches(inputs)
206 |         patch_means = self.pool_func(patches)
207 |         patch_means = self.to_batch_minor(patch_means)
208 |         return patch_means
209 | 
210 |     def build(self, input_shape):
211 |         output_shape = self.set_output_shapes(input_shape)
212 |         return 0, output_shape
213 | 
214 |     def to_patches(self, inputs):
215 |         self.set_patch_shapes(inputs.shape)
216 |         channels, batch = inputs.shape[2:]
217 |         inputs = self.to_batch_major(inputs)
218 | 
219 |         patched_shape = inputs.shape[:2]
220 |         for patch_ct, pool_shape in zip(self.patch_yx, self.pool_shape):
221 |             patched_shape += (patch_ct, pool_shape)
222 | 
223 |         patches = inputs.reshape(patched_shape)
224 | 
225 |         return patches
226 | 
227 |     def set_patch_shapes(self, input_shape):
228 |         self.input_yx = input_shape[:2]
229 |         self.patch_yx = np.floor_divide(self.input_yx, self.pool_shape)
230 |         self.num_patches = np.prod(self.patch_yx)
231 | 
232 |     def set_output_shapes(self, input_shape):
233 |         self.output_shape = list(input_shape)
234 |         for i in [0, 1]:
235 |             assert input_shape[i] % self.pool_shape[i] == 0, \
236 |                 "pool shape should tile input exactly"
237 |             self.output_shape[i] = input_shape[i] // self.pool_shape[i]
238 |         return self.output_shape
239 | 
240 |     def pool_func(self, patches):
241 |         return patches
242 | 
243 |     def to_dict(self, str, params):
244 |         return super().to_dict(str, params)
245 | 
246 | 
247 | class AvgPoolLayer(PoolLayer):
248 |     """Applies an average pooling: computes mean of elements in pool kernel.
249 | 
250 |     Pooling kernel shape must evenly tile inputs.
251 |     """
252 |     str = "avg_pool"
253 | 
254 |     def __init__(self, pool_shape):
255 |         """
256 |         Parameters
257 |         ----------
258 |         pool_shape: tuple of ints, shape of pooling kernel
259 |         """
260 |         super().__init__(pool_shape)
261 | 
262 |     def pool_func(self, patches):
263 |         return np.mean(np.mean(patches, axis=3), axis=4)
264 | 
265 | 
266 | class MaxPoolLayer(PoolLayer):
267 |     """Applies maximum-based pooling: computes max of elements in pool kernel.
268 | 
269 |     Pooling kernel shape must evenly tile inputs.
270 |     """
271 |     str = "max_pool"
272 | 
273 |     def __init__(self, pool_shape):
274 |         """
275 |         Parameters
276 |         ----------
277 |         pool_shape: tuple of ints, shape of pooling kernel
278 |         """
279 |         super().__init__(pool_shape)
280 | 
281 |     def pool_func(self, patches):
282 |         return np.max(np.max(patches, axis=3), axis=4)
283 | 
284 |     def to_dict(self):
285 |         params = {"pool_shape": self.pool_shape}
286 |         return super().to_dict(self.str, params)
287 | 
288 | 
289 | class GlobalAvgPoolLayer(AvgPoolLayer):
290 |     """Applies global average pooling: computes the average of the
291 |     entire feature map.
292 | 
293 |     Typically used as the last transformation before classification
294 |     in an all-convolutional classification network.
295 |     """
296 |     str = "global_avg_pool"
297 | 
298 |     def __init__(self):
299 |         pass
300 | 
301 |     def build(self, input_shape):
302 |         super().__init__(input_shape[:2])
303 |         output_shape = self.set_output_shapes(input_shape)
304 |         return 0, output_shape
305 | 
306 |     def to_dict(self):
307 |         params = {}
308 |         return super().to_dict(self.str, params)
309 | 
310 | 
311 | class SqueezeLayer(Layer):
312 |     """Removes "dummy" singleton axes from shapes.
313 |     """
314 |     str = "squeeze"
315 | 
316 |     def __init__(self, squeeze_axes=(0, 1)):
317 |         """
318 |         Parameters
319 |         ----------
320 | 
321 |         squeeze_axes: tuple of ints, axes to remove
322 |         """
323 |         super().__init__()
324 |         self.squeeze_axes = squeeze_axes
325 | 
326 |     def build(self, input_shape):
327 |         output_shape = [input_shape[i] for i in range(len(input_shape))
328 |                         if i not in self.squeeze_axes]
329 |         return 0, output_shape
330 | 
331 |     def forward_pass(self, inputs, theta):
332 |         for axis in self.squeeze_axes:
333 |             assert inputs.shape[axis] == 1
334 |         return np.squeeze(inputs, axis=self.squeeze_axes)
335 | 
336 |     def to_dict(self):
337 |         params = {"squeeze_axes": self.squeeze_axes}
338 |         return super().to_dict(self.str, params)
339 | 
340 | 
341 | class LambdaLayer(Layer):
342 |     """Layer for arbitrary functional transformations.
343 | 
344 |     Cannot be represented by a dictionary.
345 |     """
346 |     str = "lambda"
347 | 
348 |     def __init__(self, lam, shape_calculator=lambda shape: shape):
349 |         """
350 |         Parameters
351 |         ----------
352 |         lam: callable. Functional transformation to apply.
353 |         shape_calculator: callable. Computes output shape from input shape.
354 |             Defaults to assuming shape does not change.
355 |         """
356 |         super().__init__()
357 |         self.lam = lam
358 |         self.shape_calculator = shape_calculator
359 | 
360 |     def build(self, input_shape):
361 |         output_shape = self.shape_calculator(input_shape)
362 |         return 0, output_shape
363 | 
364 |     def forward_pass(self, inputs, theta):
365 |         return self.lam(inputs)
366 | 
367 |     def to_dict(self):
368 |         raise NotImplementedError("cannot convert LambdaLayer to dict")
369 | 
370 | 
371 | _layer_list = [PointwiseNonlinearLayer,
372 |                FCLayer,
373 |                ConvLayer,
374 |                AvgPoolLayer,
375 |                MaxPoolLayer,
376 |                GlobalAvgPoolLayer,
377 |                SqueezeLayer,
378 |                LambdaLayer]
379 | 
380 | _LAYERS = {layer.str: layer for layer in _layer_list}
381 | 


--------------------------------------------------------------------------------
/autocrit/finders/minresQLP.py:
--------------------------------------------------------------------------------
  1 | # flake8: noqa
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Sun Jan 14 23:43:12 2018
  5 | 
  6 | Note:
  7 |     This code is translated from the MATLAB version of minresQLP:
  8 |         http://www.stanford.edu/group/SOL/software.html
  9 | 
 10 | Authors:
 11 |     Yang Liu,
 12 |     School of Mathematics and Physics,
 13 |     The University of Queensland.
 14 |     yang.liu15(AT)uqconnect.edu.au
 15 | 
 16 |     Farbod Roosta-Khorasani,
 17 |     School of Mathematics and Physics,
 18 |     The University of Queensland.
 19 |     fred.roosta(AT)uq.edu.au
 20 | 
 21 | REFERENCES:
 22 |     S.-C. Choi, C. C. Paige, and M. A. Saunders,
 23 |     MINRES-QLP: A Krylov subspace method for indefinite or singular symmetric
 24 |     systems, SIAM Journal of Scientific Computing, submitted on March 7, 2010.
 25 | 
 26 |     S.-C. Choi's PhD Dissertation, Stanford University, 2006:
 27 |         http://www.stanford.edu/group/SOL/dissertations.html
 28 | 
 29 | --------------------------------------------------------------------------
 30 | minresQLP: Aim to obtain the min-length solution of symmetric
 31 |    (possibly singular) Ax=b or min||Ax-b||.
 32 | 
 33 |    X = minresQLP(A,B) solves the system of linear equations A*X=B
 34 |    or the least-squares problem min norm(B-A*X) if A is singular.
 35 |    The N-by-N matrix A must be symmetric or Hermitian, but need not be
 36 |    positive definite or nonsingular.  It may be double or single.
 37 |    The rhs vector B must have length N.  It may be real or complex,
 38 |    double or single,
 39 | 
 40 |    X = minresQLP(AFUN,B) accepts a function handle AFUN instead of
 41 |    the matrix A.  Y = AFUN(X) returns the matrix-vector product Y=A*X.
 42 |    In all of the following syntaxes, A can be replaced by AFUN.
 43 | 
 44 |    X = minresQLP(A,B,RTOL) specifies a stopping tolerance.
 45 |    If RTOL=[] or is absent, a default value is used.
 46 |    (Similarly for all later input parameters.)
 47 |    Default RTOL=1e-6.
 48 | 
 49 |    X = minresQLP(A,B,RTOL,MAXIT)
 50 |    specifies the maximum number of iterations.  Default MAXIT=N.
 51 | 
 52 |    X = minresQLP(A,B,RTOL,MAXIT,M)
 53 |    uses a matrix M as preconditioner.  M must be positive definite
 54 |    and symmetric or Hermitian.  It may be a function handle MFUN
 55 |    such that Y=MFUN(X) returns Y=M divide X.
 56 |    If M=[], a preconditioner is not applied.
 57 | 
 58 |    X = minresQLP(A,B,RTOL,MAXIT,M,SHIFT)
 59 |    solves (A - SHIFT*I)X = B, or the corresponding least-squares problem
 60 |    if (A - SHIFT*I) is singular, where SHIFT is a real or complex scalar.
 61 |    Default SHIFT=0.
 62 | 
 63 |    X = minresQLP(A,B,RTOL,MAXIT,M,SHIFT,MAXXNORM,ACONDLIM,TRANCOND)
 64 |    specifies three parameters associated with singular or
 65 |    ill-conditioned systems (A - SHIFT*I)*X = B.
 66 | 
 67 |    MAXXNORM is an upper bound on NORM(X).
 68 |    Default MAXXNORM=1e7.
 69 | 
 70 |    ACONDLIM is an upper bound on ACOND, an estimate of COND(A).
 71 |    Default ACONDLIM=1e15.
 72 | 
 73 |    TRANCOND is a real scalar >= 1.
 74 |    If TRANCOND>1,        a switch is made from MINRES iterations to
 75 |                          MINRES-QLP iterationsd when ACOND >= TRANCOND.
 76 |    If TRANCOND=1,        all iterations will be MINRES-QLP iterations.
 77 |    If TRANCOND=ACONDLIM, all iterations will be conventional MINRES
 78 |                          iterations (which are slightly cheaper).
 79 |    Default TRANCOND=1e7.
 80 | 
 81 |    X = minresQLP(A,B,RTOL,MAXIT,M,SHIFT,MAXXNORM,ACONDLIM,TRANCOND,SHOW)
 82 |    specifies the printing option.
 83 |    If SHOW=true,  an iteration log will be output.
 84 |    If SHOW=false, the log is suppressed.
 85 |    Default SHOW=true.
 86 | 
 87 | 
 88 |    FLAG:
 89 |    -1 (beta2=0)  B and X are eigenvectors of (A - SHIFT*I).
 90 |     0 (beta1=0)  B = 0.  The exact solution is X = 0.
 91 |     1 X solves the compatible (possibly singular) system (A - SHIFT*I)X = B
 92 |       to the desired tolerance:
 93 |          RELRES = RNORM / (ANORM*XNORM + NORM(B)) <= RTOL,
 94 |       where
 95 |               R = B - (A - SHIFT*I)X and RNORM = norm(R).
 96 |     2 X solves the incompatible (singular) system (A - SHIFT*I)X = B
 97 |       to the desired tolerance:
 98 |          RELARES = ARNORM / (ANORM * RNORM) <= RTOL,
 99 |       where
100 |               AR = (A - SHIFT*I)R and ARNORM = NORM(AR).
101 |     3 Same as 1 with RTOL = EPS.
102 |     4 Same as 2 with RTOL = EPS.
103 |     5 X converged to an eigenvector of (A - SHIFT*I).
104 |     6 XNORM exceeded MAXXNORM.
105 |     7 ACOND exceeded ACONDLIM.
106 |     8 MAXIT iterations were performed before one of the previous
107 |       conditions was satisfied.
108 |     9 The system appears to be exactly singular.  XNORM does not
109 |       yet exceed MAXXNORM, but would if further iterations were
110 |       performed.
111 | 
112 |     ITER:  the number of iterations performed. ITER = MITER + QLPITER.
113 |     MITER:  the number of conventional MINRES iterations.
114 |     QLPITER:  the number of MINRES-QLP iterations.
115 | 
116 |     RELRES & RELARES: Relative residuals for (A - SHIFT*I)X = B and the
117 |             associated least-squares problem.  RELRES and RELARES are
118 |             defined above in the description of FLAG.
119 | 
120 |     ANORM:  an estimate of the 2-norm of A-SHIFT*I.
121 |     ACOND:  an estimate of COND(A-SHIFT*I,2).
122 |     XNORM:  a recurred estimate of NORM(X).
123 |     AXNORM:   a recurred estimate of NORM((A-SHIFT*I)X)
124 | 
125 |     RESVEC:  a vector of estimates of NORM(R) at each iteration,
126 |              including NORM(B) as the first entry.
127 |     ARESVEC: a vector of estimates of NORM((A-SHIFT*I)R) at each
128 |              iteration, including NORM((A-SHIFT*I)B) as the first entry.
129 |     RESVEC and ARESVEC have length ITER+1.
130 | 
131 | COPYRIGHT NOTICE:
132 |    If you seek permission to copy and distribute translations of this
133 |    software into another language, please e-mail a specific request to
134 |    saunders@stanford.edu and scchoi@stanford.edu.
135 | """
136 | 
137 | import numpy as np
138 | import scipy.sparse as sp
139 | from numpy.linalg import inv, norm
140 | from scipy.sparse.linalg import cg
141 | from scipy.sparse.linalg.interface import aslinearoperator
142 | 
143 | def MinresQLP(A, b, rtol, maxit, M=None, shift=None, maxxnorm=None,
144 |               acondlim=None, trancond=None, show=False, rnormvec=False):
145 | 
146 |     #A = aslinearoperator(A)
147 |     if shift is None:
148 |         shift = 0
149 |     if maxxnorm is None:
150 |         maxxnorm = 1e7
151 |     if acondlim is None:
152 |         acondlim = 1e15
153 |     if trancond is None:
154 |         trancond = 1e7
155 |     if rnormvec:
156 |         resvec = []
157 |         Aresvec = []
158 | 
159 | 
160 |     n = len(b)
161 |     b = b.reshape(n,1)
162 |     r2 = b
163 |     r3 = r2
164 |     beta1 = norm(r2)
165 | 
166 |     if M is None:
167 |         noprecon = True
168 |         pass
169 |     else:
170 |         noprecon = False
171 |         r3 = Precond(M, r2)
172 |         beta1 = r3.T.dot(r2) #teta
173 |         if beta1 <0:
174 |             print('Error: "M" is indefinite!')
175 |         else:
176 |             beta1 = np.sqrt(beta1)
177 | 
178 |     ## Initialize
179 |     flag0 = -2
180 |     flag = -2
181 |     iters = 0
182 |     QLPiter = 0
183 |     beta = 0
184 |     tau = 0
185 |     taul = 0
186 |     phi = beta1
187 |     betan = beta1
188 |     gmin = 0
189 |     cs = -1
190 |     sn = 0
191 |     cr1 = -1
192 |     sr1 = 0
193 |     cr2 = -1
194 |     sr2 = 0
195 |     dltan = 0
196 |     eplnn = 0
197 |     gama = 0
198 |     gamal = 0
199 |     gamal2 = 0
200 |     eta = 0
201 |     etal = 0
202 |     etal2 = 0
203 |     vepln = 0
204 |     veplnl = 0
205 |     veplnl2 = 0
206 |     ul3 = 0
207 |     ul2 = 0
208 |     ul = 0
209 |     u = 0
210 |     rnorm = betan
211 |     xnorm = 0
212 |     xl2norm = 0
213 |     Axnorm = 0
214 |     Anorm = 0
215 |     Acond = 1
216 |     relres = rnorm / (beta1 + 1e-50)
217 |     x = np.zeros((n,1))
218 |     w = np.zeros((n,1))
219 |     wl = np.zeros((n,1))
220 |     if rnormvec:
221 |         resvec = np.append(resvec, beta1)
222 | 
223 |     msg = [' beta2 = 0.  b and x are eigenvectors                   ',  # -1
224 |            ' beta1 = 0.  The exact solution is  x = 0               ',  # 0
225 |            ' A solution to Ax = b found, given rtol                 ',  # 1
226 |            ' Min-length solution for singular LS problem, given rtol',  # 2
227 |            ' A solution to Ax = b found, given eps                  ',  # 3
228 |            ' Min-length solution for singular LS problem, given eps ',  # 4
229 |            ' x has converged to an eigenvector                      ',  # 5
230 |            ' xnorm has exceeded maxxnorm                            ',  # 6
231 |            ' Acond has exceeded acondlim                            ',  # 7
232 |            ' The iteration limit was reached                        ',  # 8
233 |            ' Least-squares problem but no converged solution yet    ']  # 9
234 | 
235 |     if show:
236 |         print(' ')
237 |         print('Enter Minres-QLP: ')
238 |         print('Min-length solution of symmetric(singular)', end=' ')
239 |         print('(A-sI)x = b or min ||(A-sI)x - b||')
240 |         #||Ax - b|| is ||(A-sI)x - b|| if shift != 0 here
241 |         hstr1 = '    n = %8g    ||Ax - b|| = %8.2e     ' % (n, beta1)
242 |         hstr2 = 'shift = %8.2e       rtol = %8g' % (shift, rtol)
243 |         hstr3 = 'maxit = %8g      maxxnorm = %8.2e  ' % (maxit, maxxnorm)
244 |         hstr4 = 'acondlim = %8.2e   trancond = %8g' % (acondlim, trancond)
245 |         print(hstr1, hstr2)
246 |         print(hstr3, hstr4)
247 | 
248 |     #b = 0 --> x = 0 skip the main loop
249 |     if beta1 == 0:
250 |         flag = 0
251 | 
252 |     while flag == flag0 and iters < maxit:
253 |         #lanczos
254 |         iters += 1
255 |         betal = beta
256 |         beta = betan
257 |         v = r3/beta
258 |         r3 = Ax(A, v)
259 |         if shift == 0:
260 |             pass
261 |         else:
262 |             r3 = r3 - shift*v
263 | 
264 |         if iters > 1:
265 |             r3 = r3 - r1*beta/betal
266 | 
267 |         alfa = np.real(r3.T.dot(v))
268 |         r3 = r3 - r2*alfa/beta
269 |         r1 = r2
270 |         r2 = r3
271 | 
272 |         if noprecon:
273 |             betan = norm(r3)
274 |             if iters == 1:
275 |                 if betan == 0:
276 |                     if alfa == 0:
277 |                         flag = 0
278 |                         break
279 |                     else:
280 |                         flag = -1
281 |                         x = b/alfa
282 |                         break
283 |         else:
284 |             r3 = Precond(M, r2)
285 |             betan = r2.T.dot(r3)
286 |             if betan > 0:
287 |                 betan = np.sqrt(betan)
288 |             else:
289 |                 print('Error: "M" is indefinite or singular!')
290 |         pnorm = np.sqrt(betal ** 2 + alfa ** 2 + betan ** 2)
291 | 
292 |         #previous left rotation Q_{k-1}
293 |         dbar = dltan
294 |         dlta = cs*dbar + sn*alfa
295 |         epln = eplnn
296 |         gbar = sn*dbar - cs*alfa
297 |         eplnn = sn*betan
298 |         dltan = -cs*betan
299 |         dlta_QLP = dlta
300 |         #current left plane rotation Q_k
301 |         gamal3 = gamal2
302 |         gamal2 = gamal
303 |         gamal = gama
304 |         cs, sn, gama = SymGivens(gbar, betan)
305 |         gama_tmp = gama
306 |         taul2 = taul
307 |         taul = tau
308 |         tau = cs*phi
309 |         Axnorm = np.sqrt(Axnorm ** 2 + tau ** 2)
310 |         phi = sn*phi
311 |         #previous right plane rotation P_{k-2,k}
312 |         if iters > 2:
313 |             veplnl2 = veplnl
314 |             etal2 = etal
315 |             etal = eta
316 |             dlta_tmp = sr2*vepln - cr2*dlta
317 |             veplnl = cr2*vepln + sr2*dlta
318 |             dlta = dlta_tmp
319 |             eta = sr2*gama
320 |             gama = -cr2 *gama
321 |         #current right plane rotation P{k-1,k}
322 |         if iters > 1:
323 |             cr1, sr1, gamal = SymGivens(gamal, dlta)
324 |             vepln = sr1*gama
325 |             gama = -cr1*gama
326 | 
327 |         #update xnorm
328 |         xnorml = xnorm
329 |         ul4 = ul3
330 |         ul3 = ul2
331 |         if iters > 2:
332 |             ul2 = (taul2 - etal2*ul4 - veplnl2*ul3)/gamal2
333 |         if iters > 1:
334 |             ul = (taul - etal*ul3 - veplnl *ul2)/gamal
335 |         xnorm_tmp = np.sqrt(xl2norm**2 + ul2**2 + ul**2)
336 |         if abs(gama) > np.finfo(np.double).tiny and xnorm_tmp < maxxnorm:
337 |             u = (tau - eta*ul2 - vepln*ul)/gama
338 |             if np.sqrt(xnorm_tmp**2 + u**2) > maxxnorm:
339 |                 u = 0
340 |                 flag = 6
341 |         else:
342 |             u = 0
343 |             flag = 9
344 |         xl2norm = np.sqrt(xl2norm**2 + ul2**2)
345 |         xnorm = np.sqrt(xl2norm**2 + ul**2 + u**2)
346 |         #update w&x
347 |         #Minres
348 |         if (Acond < trancond) and flag != flag0 and QLPiter == 0:
349 |             wl2 = wl
350 |             wl = w
351 |             w = (v - epln*wl2 - dlta_QLP*wl)/gama_tmp
352 |             if xnorm < maxxnorm:
353 |                 x += tau*w
354 |             else:
355 |                 flag = 6
356 |         #Minres-QLP
357 |         else:
358 |             QLPiter += 1
359 |             if QLPiter == 1:
360 |                 xl2 = np.zeros((n,1))
361 |                 if (iters > 1):  # construct w_{k-3}, w_{k-2}, w_{k-1}
362 |                     if iters > 3:
363 |                         wl2 = gamal3*wl2 + veplnl2*wl + etal*w
364 |                     if iters > 2:
365 |                         wl = gamal_QLP*wl + vepln_QLP*w
366 |                     w = gama_QLP*w
367 |                     xl2 = x - wl*ul_QLP - w*u_QLP
368 | 
369 |             if iters == 1:
370 |                 wl2 = wl
371 |                 wl = v*sr1
372 |                 w = -v*cr1
373 |             elif iters == 2:
374 |                 wl2 = wl
375 |                 wl = w*cr1 + v*sr1
376 |                 w = w*sr1 - v*cr1
377 |             else:
378 |                 wl2 = wl
379 |                 wl = w
380 |                 w = wl2*sr2 - v*cr2
381 |                 wl2 = wl2*cr2 +v*sr2
382 |                 v = wl*cr1 + w*sr1
383 |                 w = wl*sr1 - w*cr1
384 |                 wl = v
385 |             xl2 = xl2 + wl2*ul2
386 |             x = xl2 + wl*ul + w*u
387 | 
388 |         #next right plane rotation P{k-1,k+1}
389 |         gamal_tmp = gamal
390 |         cr2, sr2, gamal = SymGivens(gamal, eplnn)
391 |         #transfering from Minres to Minres-QLP
392 |         gamal_QLP = gamal_tmp
393 |         #print('gamal_QLP=', gamal_QLP)
394 |         vepln_QLP = vepln
395 |         gama_QLP = gama
396 |         ul_QLP = ul
397 |         u_QLP = u
398 |         ## Estimate various norms
399 |         abs_gama = abs(gama)
400 |         Anorml = Anorm
401 |         Anorm = max([Anorm, pnorm, gamal, abs_gama])
402 |         if iters == 1:
403 |             gmin = gama
404 |             gminl = gmin
405 |         elif iters > 1:
406 |             gminl2 = gminl
407 |             gminl = gmin
408 |             gmin = min([gminl2, gamal, abs_gama])
409 |         Acondl = Acond
410 |         Acond = Anorm / gmin
411 |         rnorml = rnorm
412 |         relresl = relres
413 |         if flag != 9:
414 |             rnorm = phi
415 |         relres = rnorm / (Anorm * xnorm + beta1)
416 |         rootl = np.sqrt(gbar ** 2 + dltan ** 2)
417 |         Arnorml = rnorml * rootl
418 |         relAresl = rootl / Anorm
419 |         ## See if any of the stopping criteria are satisfied.
420 |         epsx = Anorm * xnorm * np.finfo(float).eps
421 |         if (flag == flag0) or (flag == 9):
422 |             t1 = 1 + relres
423 |             t2 = 1 + relAresl
424 |             if iters >= maxit:
425 |                 flag = 8 #exit before maxit
426 |             if epsx >= beta1:
427 |                 flag = 5 #x = eigenvector
428 |             if t2 <= 1:
429 |                 flag = 4 #Accurate Least Square Solution
430 |             if t1 <= 1:
431 |                 flag = 3 #Accurate Ax = b Solution
432 |             if relAresl <= rtol:
433 |                 flag = 2 #Trustful Least Square Solution
434 |             if relres <= rtol:
435 |                 flag = 1 #Trustful Ax = b Solution
436 |             if Acond >= acondlim:
437 |                 flag = 7 #Huge Acond
438 |             if xnorm >= maxxnorm:
439 |                 flag = 6 #xnorm exceeded
440 |         if flag == 2 or flag == 4 or flag == 6 or flag == 7:
441 |             #possibly singular
442 |             iters = iters - 1
443 |             Acond = Acondl
444 |             rnorm = rnorml
445 |             relres = relresl
446 |         else:
447 |             if rnormvec:
448 |                 resvec = np.append(resvec, rnorm)
449 |                 Aresvec = np.append(Aresvec, Arnorml)
450 | 
451 |             if show:
452 |                 if iters%10 - 1 == 0:
453 |                     lstr = ('        iter     rnorm    Arnorm    relres   ' +
454 |                             'relAres    Anorm     Acond     xnorm')
455 |                     print(' ')
456 |                     print(lstr)
457 |                 if QLPiter == 1:
458 |                     print('QLP', end='')
459 |                 else:
460 |                     print('   ', end='')
461 |                 lstr1 = '%8g    %8.2e ' % (iters-1, rnorml)
462 |                 lstr2 = '%8.2e  %8.2e ' % (Arnorml, relresl)
463 |                 lstr3 = '%8.2e  %8.2e ' % (relAresl, Anorml)
464 |                 lstr4 = '%8.2e  %8.2e ' % (Acondl, xnorml)
465 |                 print(lstr1, lstr2, lstr3, lstr4)
466 | 
467 |     #exited the main loop
468 |     if show:
469 |         if QLPiter == 1:
470 |             print('QLP', end = '')
471 |         else:
472 |             print('   ', end = '')
473 |     Miter = iters - QLPiter
474 | 
475 |     #final quantities
476 |     r1 = b - Ax(A,x) + shift*x
477 |     rnorm = norm(r1)
478 |     Arnorm = norm(Ax(A,r1) - shift*r1)
479 |     xnorm = norm(x)
480 |     relres = rnorm/(Anorm*xnorm + beta1)
481 |     relAres = 0
482 |     if rnorm > np.finfo(np.double).tiny:
483 |         relAres = Arnorm/(Anorm*rnorm)
484 | 
485 |     if show:
486 |         if rnorm > np.finfo(np.double).tiny:
487 |             lstr1 = '%8g    %8.2e ' % (iters, rnorm)
488 |             lstr2 = '%8.2eD %8.2e ' % (Arnorm, relres)
489 |             lstr3 = '%8.2eD %8.2e ' % (relAres, Anorm)
490 |             lstr4 = '%8.2e  %8.2e ' % (Acond, xnorm)
491 |             print(lstr1, lstr2, lstr3, lstr4)
492 |         else:
493 |             lstr1 = '%8g    %8.2e ' % (iters, rnorm)
494 |             lstr2 = '%8.2eD %8.2e ' % (Arnorm, relres)
495 |             lstr3 = '          %8.2e ' % (Anorm)
496 |             lstr4 = '%8.2e  %8.2e ' % (Acond, xnorm)
497 |             print(lstr1, lstr2, lstr3, lstr4)
498 | 
499 |         print(' ')
500 |         print('Exit Minres-QLP: ')
501 |         str1 = 'Flag = %8g    %8s' % (flag, msg[int(flag + 1)])
502 |         str2 = 'Iter = %8g      ' % (iters)
503 |         str3 = 'Minres = %8g       Minres-QLP = %8g' % (Miter, QLPiter)
504 |         str4 = 'relres = %8.2e    relAres = %8.2e    ' % (relres, relAres)
505 |         str5 = 'rnorm = %8.2e      Arnorm = %8.2e' % (rnorm, Arnorm)
506 |         str6 = 'Anorm = %8.2e       Acond = %8.2e    ' % (Anorm, Acond)
507 |         str7 = 'xnorm = %8.2e      Axnorm = %8.2e' % (xnorm, Axnorm)
508 |         print(str1)
509 |         print(str2, str3)
510 |         print(str4, str5)
511 |         print(str6, str7)
512 | 
513 |     if rnormvec:
514 |         Aresvec = np.append(Aresvec, Arnorm)
515 |         return (x,flag,iters,Miter,QLPiter,relres,relAres,Anorm,Acond,
516 |                 xnorm,Axnorm,resvec,Aresvec)
517 | 
518 |     return (x,flag,iters,Miter,QLPiter,relres,relAres,Anorm,Acond,xnorm,Axnorm)
519 | 
520 | 
521 | def Ax(A, x):
522 |     if callable(A):
523 |         Ax = A(x)
524 |     else:
525 |         Ax = A.dot(x)
526 |     return Ax
527 | 
528 | def Precond(M, r):
529 |     if callable(M):
530 |         h = cg(M, r)
531 |     else:
532 |         h = inv(M).dot(r)
533 |     return h
534 | 
535 | def SymGivens(a, b):
536 |     if b == 0:
537 |         if a == 0:
538 |             c = 1
539 |         else:
540 |             c = np.sign(a)
541 |         s = 0
542 |         r = abs(a)
543 |     elif a == 0:
544 |         c = 0
545 |         s = np.sign(b)
546 |         r = abs(b)
547 |     elif abs(b) > abs(a):
548 |         t = a / b
549 |         s = np.sign(b) / np.sqrt(1 + t ** 2)
550 |         c = s * t
551 |         r = b / s
552 |     else:
553 |         t = b / a
554 |         c = np.sign(a) / np.sqrt(1 + t ** 2)
555 |         s = c * t
556 |         r = a / c
557 |     return c, s, r
558 | 
559 | def main():
560 | ##################    example1    ####################
561 |     n=100
562 |     e = np.ones((n,1))
563 |     data = np.c_[-2*e,4*e,-2*e]
564 |     A = sp.spdiags(data.T, [-1,0,1],n,n).toarray()
565 |     M = sp.spdiags(4*e.T, 0,n,n).toarray()
566 |     b = sum(A)
567 |     rtol = 1e-10
568 |     maxit = 50
569 |     x = MinresQLP(A,b,rtol,maxit,M,show=True)
570 | #    x = MinresQLP(A,b,rtol,maxit,M,show=True,rnormvec=True)
571 | #    print(x[11])
572 | #    print(x[12])
573 | 
574 | ##################    example2    ####################
575 | #    n=50
576 | #    N=n**2
577 | #    e = np.ones((n,1))
578 | #    data = np.c_[e, e, e]
579 | #    B = sp.spdiags(data.T, [-1,0,1],n,n)
580 | #    A_mid = np.array([]).reshape(0,0)
581 | #    for i in range(n):
582 | #        A_mid = sp.block_diag((A_mid, B))
583 | #        if i == 0:
584 | #            A_upper = sp.hstack([sp.csr_matrix((n,n)), B])
585 | #            A_lower = sp.vstack([sp.csr_matrix((n,n)), B])
586 | #        if i > 0 and i < n-1:
587 | #            A_upper = sp.block_diag((A_upper, B))
588 | #            A_lower = sp.block_diag((A_lower, B))
589 | #        if i == n-1:
590 | #            A_upper = sp.vstack([A_upper, sp.csr_matrix((n,N))])
591 | #            A_lower = sp.hstack([A_lower, sp.csr_matrix((N,n))])
592 | #    A = A_upper + A_lower + A_mid
593 | #    b = sum(A.toarray())
594 | #    rtol = 1e-5
595 | #    x = MinresQLP(A, b, rtol, N, maxxnorm = 1e2, show = True)
596 | 
597 | ##################    example3    ####################
598 | #    a = -10
599 | #    c = -a
600 | #    n = 2*c + 1
601 | #    A = sp.spdiags(np.arange(a, c+1), 0, n, n)
602 | #    b = np.ones((n, 1))
603 | #    rtol = 1e-6
604 | #    x = MinresQLP(A, b, rtol, n, maxxnorm = 1e2, show = True)
605 | 
606 | if __name__ == '__main__':
607 |     main()
608 | 


--------------------------------------------------------------------------------