├── .gitignore ├── python ├── setup.py ├── aapy.pyx ├── example.py └── aa.ipynb ├── .github └── workflows │ ├── valgrind.yml │ └── build.yml ├── Makefile ├── LICENSE.txt ├── test ├── minunit.h └── run_tests.c ├── include ├── aa_blas.h └── aa.h ├── README.md ├── examples └── gd.c └── src └── aa.c /.gitignore: -------------------------------------------------------------------------------- 1 | out 2 | *.o 3 | *.swp 4 | python/aapy.c 5 | python/build 6 | python/.* 7 | -------------------------------------------------------------------------------- /python/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | from distutils.extension import Extension 3 | from Cython.Build import cythonize 4 | 5 | aa_extension = Extension( 6 | name="aa", 7 | sources=["aapy.pyx"], 8 | library_dirs=["../src"], 9 | include_dirs=["../include"], 10 | libraries=['lapack', 'blas'] 11 | ) 12 | 13 | setup( 14 | name="aa", 15 | version='0.0.1', 16 | ext_modules=cythonize([aa_extension]) 17 | ) 18 | 19 | -------------------------------------------------------------------------------- /.github/workflows/valgrind.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: Valgrind 3 | 4 | on: [push, pull_request] 5 | 6 | jobs: 7 | linux: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - uses: actions/checkout@v2 11 | - run: sudo apt-get install libopenblas-dev liblapack-dev valgrind 12 | - run: make 13 | - run: make test 14 | - run: valgrind --leak-check=full --show-leak-kinds=all --track-origins=yes --show-reachable=yes --error-exitcode=1 out/run_tests 15 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | --- 2 | name: Build 3 | 4 | on: [push, pull_request] 5 | 6 | jobs: 7 | linux: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - uses: actions/checkout@v2 11 | - run: sudo apt-get install libopenblas-dev liblapack-dev 12 | - run: make 13 | - run: make test 14 | - run: out/run_tests 15 | 16 | # runs-on: windows-latest 17 | # steps: 18 | # - uses: actions/checkout@v2 19 | # - run: choco install clapack 20 | # - run: make 21 | # - run: make test 22 | # - run: test/run_tests 23 | 24 | mac: 25 | runs-on: macos-latest 26 | steps: 27 | - uses: actions/checkout@v2 28 | - run: brew install openblas lapack 29 | - run: make 30 | - run: make test 31 | - run: out/run_tests 32 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # MAKEFILE for aa 2 | .PHONY: default clean purge 3 | 4 | OBJECTS = src/aa.o 5 | 6 | PROFILING = 0 7 | CFLAGS += -g -Wall -O3 -Iinclude -DPROFILING=$(PROFILING) 8 | 9 | SRC_FILES = $(wildcard src/*.c) 10 | INC_FILES = $(wildcard include/*.h) 11 | 12 | OUT = out 13 | ARCHIVE = ar -rv 14 | RANLIB = ranlib 15 | 16 | default: $(OUT)/libaa.a $(OUT)/gd 17 | 18 | %.o : src/%.c 19 | $(CC) $(CFLAGS) -c $< -o $@ 20 | 21 | src/aa.o : $(SRC_FILES) $(INC_FILES) 22 | 23 | $(OUT)/libaa.a: $(OBJECTS) 24 | mkdir -p $(OUT) 25 | $(ARCHIVE) $@ $^ 26 | - ranlib $@ 27 | 28 | $(OUT)/gd: examples/gd.c $(OUT)/libaa.a 29 | $(CC) $(CFLAGS) -o $@ $^ -lblas -llapack 30 | 31 | clean: 32 | @rm -rf $(OBJECTS) 33 | purge: clean 34 | @rm -rf $(OUT) 35 | 36 | test: $(OUT)/run_tests 37 | 38 | $(OUT)/run_tests: test/run_tests.c $(OUT)/libaa.a 39 | $(CC) $(CFLAGS) -o $@ $^ -lblas -llapack 40 | 41 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2018 Brendan O'Donoghue (bodonoghue85@gmail.com) 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /test/minunit.h: -------------------------------------------------------------------------------- 1 | /* Taken from http://www.jera.com/techinfo/jtns/jtn002.html */ 2 | 3 | /* Simple Macros for testing */ 4 | #define mu_assert_less(message, a, b) \ 5 | do { \ 6 | if (a > b) { \ 7 | printf("%s: %1.3e > %1.3e\n", message, a, b); \ 8 | return message; \ 9 | } \ 10 | } while (0) 11 | #define mu_assert(message, test) \ 12 | do { \ 13 | if (!(test)) \ 14 | return message; \ 15 | } while (0) 16 | #define mu_run_test(test) \ 17 | do { \ 18 | const char *message = test(); \ 19 | tests_run++; \ 20 | if (message) \ 21 | return message; \ 22 | } while (0) 23 | -------------------------------------------------------------------------------- /python/aapy.pyx: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | cdef extern from "../src/aa.c": 4 | pass 5 | 6 | cdef extern from "../include/aa.h": 7 | ctypedef struct AaWork: 8 | pass 9 | AaWork *aa_init(int, int, int, float, float, float, float, int) 10 | double aa_apply(double*, const double*, AaWork*) 11 | int aa_safeguard(double*, double*, AaWork*) 12 | void aa_reset(AaWork*) 13 | void aa_finish(AaWork *) 14 | 15 | cdef class AndersonAccelerator(object): 16 | cdef AaWork* _wrk 17 | cdef int _dim 18 | 19 | def __cinit__(self, dim, mem, type1=False, regularization=1e-12, 20 | relaxation=1.0, safeguard_factor=1.0, max_weight_norm=1e6, 21 | verbosity=0): 22 | self._wrk = aa_init(dim, mem, type1, regularization, relaxation, 23 | safeguard_factor, max_weight_norm, verbosity) 24 | self._dim = dim 25 | 26 | def _validate(self, f, x): 27 | f = np.squeeze(f) 28 | x = np.squeeze(x) 29 | if (f.shape != (self._dim,) or x.shape != (self._dim,)): 30 | raise ValueError("Incorrect input dimension") 31 | 32 | if not f.flags['C_CONTIGUOUS']: 33 | # Makes a contiguous copy of the numpy array. 34 | f = np.ascontiguousarray(f) 35 | if not x.flags['C_CONTIGUOUS']: 36 | # Makes a contiguous copy of the numpy array. 37 | x = np.ascontiguousarray(x) 38 | 39 | return f, x 40 | 41 | def apply(self, f, x): 42 | f, x = self._validate(f, x) 43 | cdef double[::1] f_memview = f 44 | cdef double[::1] x_memview = x 45 | return aa_apply(&f_memview[0], &x_memview[0], self._wrk) 46 | 47 | def safeguard(self, f_new, x_new): 48 | f_new , x_new = self._validate(f_new, x_new) 49 | cdef double[::1] f_memview = f_new 50 | cdef double[::1] x_memview = x_new 51 | return aa_safeguard(&f_memview[0], &x_memview[0], self._wrk) 52 | 53 | def reset(self): 54 | aa_reset(self._wrk) 55 | 56 | def __dealloc__(self): 57 | aa_finish(self._wrk) 58 | 59 | -------------------------------------------------------------------------------- /include/aa_blas.h: -------------------------------------------------------------------------------- 1 | #ifndef AA_BLAS_H_GUARD 2 | #define AA_BLAS_H_GUARD 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include "aa.h" 9 | 10 | /* Default to underscore for blas / lapack */ 11 | #ifndef BLASSUFFIX 12 | #define BLASSUFFIX _ 13 | #endif 14 | 15 | /* annoying hack because some preprocessors can't handle empty macros */ 16 | #if defined(NOBLASSUFFIX) && NOBLASSUFFIX > 0 17 | /* single or double precision */ 18 | #ifndef SFLOAT 19 | #define BLAS(x) d##x 20 | #else 21 | #define BLAS(x) s##x 22 | #endif 23 | #else 24 | /* this extra indirection is needed for BLASSUFFIX to work correctly as a 25 | * variable */ 26 | #define stitch_(pre, x, post) pre##x##post 27 | #define stitch__(pre, x, post) stitch_(pre, x, post) 28 | /* single or double precision */ 29 | #ifndef SFLOAT 30 | #define BLAS(x) stitch__(d, x, BLASSUFFIX) 31 | #else 32 | #define BLAS(x) stitch__(s, x, BLASSUFFIX) 33 | #endif 34 | #endif 35 | 36 | #ifdef MATLAB_MEX_FILE 37 | typedef ptrdiff_t blas_int; 38 | #elif defined BLAS64 39 | #include 40 | typedef int64_t blas_int; 41 | #else 42 | typedef int blas_int; 43 | #endif 44 | 45 | /* BLAS functions used */ 46 | aa_float BLAS(nrm2)(blas_int *n, aa_float *x, blas_int *incx); 47 | void BLAS(axpy)(blas_int *n, aa_float *a, const aa_float *x, blas_int *incx, 48 | aa_float *y, blas_int *incy); 49 | void BLAS(gemv)(const char *trans, const blas_int *m, const blas_int *n, 50 | const aa_float *alpha, const aa_float *a, const blas_int *lda, 51 | const aa_float *x, const blas_int *incx, const aa_float *beta, 52 | aa_float *y, const blas_int *incy); 53 | void BLAS(gesv)(blas_int *n, blas_int *nrhs, aa_float *a, blas_int *lda, 54 | blas_int *ipiv, aa_float *b, blas_int *ldb, blas_int *info); 55 | void BLAS(gemm)(const char *transa, const char *transb, blas_int *m, 56 | blas_int *n, blas_int *k, aa_float *alpha, aa_float *a, 57 | blas_int *lda, aa_float *b, blas_int *ldb, aa_float *beta, 58 | aa_float *c, blas_int *ldc); 59 | void BLAS(scal)(const blas_int *n, const aa_float *a, aa_float *x, 60 | const blas_int *incx); 61 | 62 | #ifdef __cplusplus 63 | } 64 | #endif 65 | 66 | #endif /* AA_BLAS_H_GUARD */ 67 | -------------------------------------------------------------------------------- /python/example.py: -------------------------------------------------------------------------------- 1 | # min (1/2) x'Q'x - q'x 2 | 3 | from __future__ import print_function 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | import aa 7 | 8 | dim = 100 9 | mems = [5, 10, 20, 50] 10 | N = int(1e4) 11 | 12 | np.random.seed(1234) 13 | 14 | Q = np.random.randn(dim, dim) 15 | Q = 0.1 * Q.T.dot(Q) 16 | q = np.random.randn(dim) 17 | x_0 = np.random.randn(dim) 18 | x_star = np.linalg.solve(Q, q) 19 | 20 | step = 1.0 / np.max(np.linalg.eigvals(Q)) 21 | 22 | f = lambda x: 0.5 * x.T @ Q @ x - q.T @ x 23 | 24 | f_star = f(x_star) 25 | print('f^* = ', f_star) 26 | 27 | print('No acceleration') 28 | 29 | results = {} 30 | 31 | fs = [] 32 | x = x_0.copy() 33 | for i in range(N): 34 | x_prev = np.copy(x) 35 | x -= step * (Q.dot(x) - q) 36 | fs.append(f(x) - f_star) 37 | if i % 1000 == 0: 38 | print('i: ', i,' f - f^*: ', np.abs(f(x) - f_star)) 39 | 40 | results['No accel'] = fs 41 | 42 | RELAXATION = 1.0 43 | 44 | for mem in mems: 45 | print('Type-I acceleration, mem:', mem) 46 | fs = [] 47 | x = x_0.copy() 48 | aa_wrk = aa.AndersonAccelerator(dim, mem, True, regularization=1e-8, 49 | relaxation=RELAXATION, verbosity=1, 50 | max_weight_norm=1e6) 51 | for i in range(N): 52 | if i > 0: aa_wrk.apply(x, x_prev) 53 | x_prev = np.copy(x) 54 | x -= step * (Q.dot(x) - q) 55 | aa_wrk.safeguard(x, x_prev) 56 | fs.append(f(x) - f_star) 57 | if i % 1000 == 0: 58 | print('i: ', i,' f - f^*: ', np.abs(f(x) - f_star)) 59 | 60 | results[f'AA-I {mem}'] = fs 61 | 62 | print('Type-II acceleration, mem:', mem) 63 | fs = [] 64 | x = x_0.copy() 65 | aa_wrk = aa.AndersonAccelerator(dim, mem, False, regularization=1e-12, 66 | relaxation=RELAXATION, verbosity=1, 67 | max_weight_norm=1e6) 68 | for i in range(N): 69 | if i > 0: aa_wrk.apply(x, x_prev) 70 | x_prev = np.copy(x) 71 | x -= step * (Q.dot(x) - q) 72 | aa_wrk.safeguard(x, x_prev) 73 | fs.append(f(x) - f_star) 74 | if i % 1000 == 0: 75 | print('i: ', i,' f - f^*: ', np.abs(f(x) - f_star)) 76 | 77 | results[f'AA-II {mem}'] = fs 78 | 79 | for k,v in results.items(): 80 | plt.semilogy(v, label=k) 81 | 82 | plt.legend() 83 | plt.show() 84 | -------------------------------------------------------------------------------- /include/aa.h: -------------------------------------------------------------------------------- 1 | #ifndef AA_H_GUARD 2 | #define AA_H_GUARD 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | typedef double aa_float; 13 | typedef int aa_int; 14 | 15 | typedef struct ACCEL_WORK AaWork; 16 | 17 | /** 18 | * Initialize Anderson Acceleration, allocates memory. 19 | * 20 | * @param dim the dimension of the variable for AA 21 | * @param mem the memory (number of past iterations used) for AA 22 | * @param type1 if True use type 1 AA, otherwise use type 2 23 | * @param regularization type-I and type-II different, for type-I: 1e-8 works 24 | * well, type-II: more stable can use 1e-12 often 25 | * @param relaxation float \in [0,2], mixing parameter (1.0 is vanilla) 26 | * @param safeguard_factor factor that controls safeguarding checks 27 | * larger is more aggressive but less stable 28 | * @param max_weight_norm float, maximum norm of AA weights 29 | * @param verbosity if greater than 0 prints out various info 30 | * 31 | * @return pointer to AA workspace 32 | * 33 | */ 34 | AaWork *aa_init(aa_int dim, aa_int mem, aa_int type1, aa_float regularization, 35 | aa_float relaxation, aa_float safeguard_factor, 36 | aa_float max_weight_norm, aa_int verbosity); 37 | 38 | /** 39 | * Apply Anderson Acceleration. The usage pattern should be as follows: 40 | * 41 | * - for i = 0 .. N: 42 | * - if (i > 0): aa_apply(x, x_prev, a) 43 | * - x_prev = x.copy() 44 | * - x = F(x) 45 | * - aa_safeguard(x, x_prev, a) // optional but helps stability 46 | * 47 | * Here F is the map we are trying to find the fixed point for. We put the AA 48 | * before the map so that any properties of the map are maintained at the end. 49 | * Eg if the map contains a projection onto a set then the output is guaranteed 50 | * to be in the set. 51 | * 52 | * 53 | * @param f output of map at current iteration, overwritten with AA output 54 | * @param x input to map at current iteration 55 | * @param a workspace from aa_init 56 | * 57 | * @return (+ or -) norm of AA weights vector. If positive then update 58 | * was accepted and f contains new point, if negative then update was 59 | * rejected and f is unchanged 60 | * 61 | */ 62 | aa_float aa_apply(aa_float *f, const aa_float *x, AaWork *a); 63 | 64 | /** 65 | * Apply safeguarding. 66 | * 67 | * This step is optional but can improve stability. 68 | * 69 | * @param f_new output of map after AA step 70 | * @param x_new AA output that is input to the map 71 | * @param a workspace from aa_init 72 | * 73 | * @returns 0 if AA step is accepted otherwise -1, if AA step is rejected then 74 | * this overwrites f_new and x_new with previous values 75 | * 76 | */ 77 | aa_int aa_safeguard(aa_float *f_new, aa_float *x_new, AaWork *a); 78 | 79 | /** 80 | * Finish Anderson Acceleration, clears memory. 81 | * 82 | * @param a AA workspace from aa_init 83 | * 84 | */ 85 | void aa_finish(AaWork *a); 86 | 87 | /** 88 | * Reset Anderson Acceleration. 89 | * 90 | * Resets AA as if at the first iteration, reuses original memory allocations. 91 | * 92 | * @param a AA workspace from aa_init 93 | * 94 | */ 95 | void aa_reset(AaWork *a); 96 | 97 | #ifdef __cplusplus 98 | } 99 | #endif 100 | #endif 101 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | AA 2 | === 3 | 4 | [![Build Status](https://github.com/cvxgrp/aa/actions/workflows/build.yml/badge.svg)](https://github.com/cvxgrp/aa/actions/workflows/build.yml) 5 | 6 | AA (`Anderson Acceleration`) 7 | 8 | C (with python interface) implementation of the Anderson Acceleration algorithm as described in our paper [Globally Convergent Type-I Anderson Acceleration for Non-Smooth Fixed-Point Iterations](https://web.stanford.edu/~boyd/papers/nonexp_global_aa1.html) 9 | 10 | NOTE: This implementation is a simple proof-of-concept and does not include all 11 | the necessary stabilizations required to guarantee convergence. However, it 12 | works well in many cases. 13 | 14 | MATLAB code (and the experiments presented in the paper) available [here](https://github.com/cvxgrp/nonexp_global_aa1/): 15 | 16 | ---- 17 | 18 | Python 19 | ---- 20 | 21 | To install the package use: 22 | ```bash 23 | cd python 24 | python setup.py install 25 | ``` 26 | To test, run in the same directory: 27 | ```bash 28 | python example.py 29 | ``` 30 | 31 | The Python API is as follows. To initialize the accelerator: 32 | ```python 33 | import aa 34 | aa_wrk = aa.AndersonAccelerator(dim, mem, type1, eta) 35 | ``` 36 | where: 37 | * `dim` is the integer problem dimension. 38 | * `mem` is the integer amount of memory (or lookback) you want the algorithm to use, around 10 is a good number for this. 39 | * `type1` is a boolean, if `True` uses type-1 AA, otherwise uses type-2 AA. 40 | * `regularization`: float, regularization param, type-I: 1e-8 works well, type-II: more stable can use 1e-10 often 41 | * `relaxation`: float in [0,2], mixing parameter (1.0 is vanilla AA) 42 | * `verbosity`: verbosity level, if greater than 0 prints out various info 43 | 44 | To use the accelerator: 45 | ```python 46 | aa_wrk.apply(x, x_prev) 47 | ``` 48 | where: 49 | * `x` is the numpy array consisting of the current iterate and it will be overwritten with the accelerated iterate. 50 | * `x_prev` is the numpy array consisting of the previous iterate (the input to the update function). 51 | 52 | 53 | C 54 | ---- 55 | 56 | At the command prompt type `make` to compile the library and the example. The 57 | example can be run by `out/gd`. 58 | 59 | The C API is as follows: 60 | 61 | ```C 62 | /* Initialize Anderson Acceleration, allocates memory. 63 | * 64 | * Args: 65 | * dim: the dimension of the variable for aa 66 | * mem: the memory (number of past iterations used) for aa 67 | * type1: bool, if True use type 1 aa, otherwise use type 2 68 | * regularization: float, regularization param, type-I and type-II different 69 | * for type-I: 1e-8 works well, type-II: more stable can use 1e-10 often 70 | * relaxation: float \in [0,2], mixing parameter (1.0 is vanilla AA) 71 | * verbosity: if greater than 0 prints out various info 72 | 73 | * Reurns: 74 | * Pointer to aa workspace 75 | */ 76 | AaWork *aa_init(aa_int dim, aa_int mem, aa_int type1, aa_float regularization, 77 | aa_float relaxation, aa_int verbosity); 78 | 79 | /* Apply Anderson Acceleration. 80 | * 81 | * Args: 82 | * f: output of map at current iteration, overwritten with aa output at end. 83 | * x: input to map at current iteration 84 | * a: aa workspace from aa_init 85 | * 86 | * Returns: 87 | * (float) (+ or -) norm of AA weights vector: 88 | * if positive then update was accepted and f contains new point 89 | * if negative then update was rejected and f is unchanged 90 | */ 91 | aa_float aa_apply(aa_float *f, const aa_float *x, AaWork *a); 92 | 93 | /* Finish Anderson Acceleration, clears memory. 94 | * 95 | * Args: 96 | * a: aa workspace from aa_init. 97 | */ 98 | void aa_finish(AaWork *a); 99 | ``` 100 | 101 | -------------------------------------------------------------------------------- /examples/gd.c: -------------------------------------------------------------------------------- 1 | /* Gradient descent (GD) on convex quadratic */ 2 | #include "aa.h" 3 | #include "aa_blas.h" 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | /* default parameters */ 10 | #define SEED (1234) 11 | #define TYPE1 (1) 12 | #define DIM (1000) 13 | #define MEM (5) 14 | #define REGULARIZATION (0) 15 | #define SAFEGUARD_TOLERANCE (2.0) 16 | #define MAX_AA_NORM (1e10) 17 | #define RELAXATION (1.0) 18 | #define ITERS (30000) 19 | #define STEPSIZE (0.001) 20 | #define PRINT_INTERVAL (500) 21 | #define VERBOSITY (1) 22 | 23 | 24 | /* duplicate these with underscore prefix */ 25 | typedef struct _timer { 26 | struct timespec tic; 27 | struct timespec toc; 28 | } _timer; 29 | 30 | void _tic(_timer *t) { 31 | clock_gettime(CLOCK_MONOTONIC, &t->tic); 32 | } 33 | 34 | aa_float _tocq(_timer *t) { 35 | struct timespec temp; 36 | 37 | clock_gettime(CLOCK_MONOTONIC, &t->toc); 38 | 39 | if ((t->toc.tv_nsec - t->tic.tv_nsec) < 0) { 40 | temp.tv_sec = t->toc.tv_sec - t->tic.tv_sec - 1; 41 | temp.tv_nsec = 1e9 + t->toc.tv_nsec - t->tic.tv_nsec; 42 | } else { 43 | temp.tv_sec = t->toc.tv_sec - t->tic.tv_sec; 44 | temp.tv_nsec = t->toc.tv_nsec - t->tic.tv_nsec; 45 | } 46 | return (aa_float)temp.tv_sec * 1e3 + (aa_float)temp.tv_nsec / 1e6; 47 | } 48 | 49 | /* uniform random number in [-1,1] */ 50 | static aa_float rand_float(void) { 51 | return 2 * (((aa_float)rand()) / RAND_MAX) - 1; 52 | } 53 | 54 | /* 55 | * out/gd memory dimension step_size type1 seed iters regularization 56 | * 57 | */ 58 | int main(int argc, char **argv) { 59 | aa_int type1 = TYPE1, n = DIM, iters = ITERS, memory = MEM, seed = SEED; 60 | aa_int i, one = 1; 61 | aa_int verbosity = VERBOSITY; 62 | aa_float neg_step_size = -STEPSIZE; 63 | aa_float regularization = REGULARIZATION; 64 | aa_float relaxation = RELAXATION; 65 | aa_float safeguard_tolerance = SAFEGUARD_TOLERANCE; 66 | aa_float max_aa_norm = MAX_AA_NORM; 67 | aa_float err = 0; 68 | aa_float *x, *xprev, *Qhalf, *Q, zerof = 0.0, onef = 1.0; 69 | _timer aa_timer; 70 | aa_float aa_time = 0; 71 | 72 | printf("Usage: 'out/gd memory type1 dimension step_size seed iters " 73 | "regularization relaxation safeguard_tolerance max_aa_norm'\n"); 74 | 75 | switch (argc - 1) { 76 | case 10: 77 | max_aa_norm = atof(argv[10]); 78 | case 9: 79 | safeguard_tolerance = atof(argv[9]); 80 | case 8: 81 | relaxation = atof(argv[8]); 82 | case 7: 83 | regularization = atof(argv[7]); 84 | case 6: 85 | iters = atoi(argv[6]); 86 | case 5: 87 | seed = atoi(argv[5]); 88 | case 4: 89 | neg_step_size = -atof(argv[4]); 90 | case 3: 91 | n = atoi(argv[3]); 92 | case 2: 93 | type1 = atoi(argv[2]); 94 | case 1: 95 | memory = atoi(argv[1]); 96 | break; 97 | default: 98 | printf("Running default parameters.\n"); 99 | } 100 | 101 | x = (aa_float *)malloc(sizeof(aa_float) * n); 102 | xprev = (aa_float *)malloc(sizeof(aa_float) * n); 103 | Qhalf = (aa_float *)malloc(sizeof(aa_float) * n * n); 104 | Q = (aa_float *)malloc(sizeof(aa_float) * n * n); 105 | 106 | srand(seed); 107 | 108 | /* generate random data */ 109 | for (i = 0; i < n; i++) { 110 | x[i] = rand_float(); 111 | } 112 | for (i = 0; i < n * n; i++) { 113 | Qhalf[i] = rand_float(); 114 | } 115 | 116 | BLAS(gemm) 117 | ("Trans", "No", &n, &n, &n, &onef, Qhalf, &n, Qhalf, &n, &zerof, Q, &n); 118 | 119 | /* add small amount regularization */ 120 | for (i = 0; i < n; i++) { 121 | Q[i + i * n] += 1e-6; 122 | } 123 | 124 | AaWork *a = aa_init(n, memory, type1, regularization, relaxation, 125 | safeguard_tolerance, max_aa_norm, verbosity); 126 | for (i = 0; i < iters; i++) { 127 | if (i > 0) { 128 | _tic(&aa_timer); 129 | aa_apply(x, xprev, a); 130 | aa_time += _tocq(&aa_timer); 131 | } 132 | 133 | memcpy(xprev, x, sizeof(aa_float) * n); 134 | /* x = x - step_size * Q * xprev */ 135 | BLAS(gemv) 136 | ("No", &n, &n, &neg_step_size, Q, &n, xprev, &one, &onef, x, &one); 137 | 138 | _tic(&aa_timer); 139 | aa_safeguard(x, xprev, a); 140 | aa_time += _tocq(&aa_timer); 141 | 142 | err = BLAS(nrm2)(&n, x, &one); 143 | if (i % PRINT_INTERVAL == 0) { 144 | printf("Iter: %i, Err %.4e\n", i, err); 145 | } 146 | } 147 | printf("Iter: %i, Err %.4e\n", i, err); 148 | printf("AA time: %.4f seconds\n", aa_time / 1e3); 149 | aa_finish(a); 150 | free(Q); 151 | free(Qhalf); 152 | free(x); 153 | free(xprev); 154 | return 0; 155 | } 156 | -------------------------------------------------------------------------------- /test/run_tests.c: -------------------------------------------------------------------------------- 1 | /* Gradient descent (GD) on convex quadratic */ 2 | #include "aa.h" 3 | #include "aa_blas.h" 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "minunit.h" 10 | 11 | /* default parameters */ 12 | #define SEED (1234) 13 | #define DIM (100) 14 | #define MEM (5) 15 | #define TYPE1_REGULARIZATION (1e-3) 16 | #define TYPE2_REGULARIZATION (0) 17 | #define SAFEGUARD_TOLERANCE (2.0) 18 | #define MAX_AA_NORM (1e10) 19 | #define ITERS (10000) 20 | #define STEPSIZE (0.01) 21 | #define PRINT_INTERVAL (500) 22 | #define VERBOSITY (1) 23 | 24 | int tests_run = 0; 25 | 26 | /* duplicate these with underscore prefix */ 27 | typedef struct _timer { 28 | struct timespec tic; 29 | struct timespec toc; 30 | } _timer; 31 | 32 | void _tic(_timer *t) { 33 | clock_gettime(CLOCK_MONOTONIC, &t->tic); 34 | } 35 | 36 | aa_float _tocq(_timer *t) { 37 | struct timespec temp; 38 | 39 | clock_gettime(CLOCK_MONOTONIC, &t->toc); 40 | 41 | if ((t->toc.tv_nsec - t->tic.tv_nsec) < 0) { 42 | temp.tv_sec = t->toc.tv_sec - t->tic.tv_sec - 1; 43 | temp.tv_nsec = 1e9 + t->toc.tv_nsec - t->tic.tv_nsec; 44 | } else { 45 | temp.tv_sec = t->toc.tv_sec - t->tic.tv_sec; 46 | temp.tv_nsec = t->toc.tv_nsec - t->tic.tv_nsec; 47 | } 48 | return (aa_float)temp.tv_sec * 1e3 + (aa_float)temp.tv_nsec / 1e6; 49 | } 50 | 51 | /* uniform random number in [-1,1] */ 52 | static aa_float rand_float(void) { 53 | return 2 * (((aa_float)rand()) / RAND_MAX) - 1; 54 | } 55 | 56 | static const char *gd(aa_int type1, aa_float relaxation) { 57 | aa_int n = DIM, iters = ITERS, memory = MEM, seed = SEED; 58 | aa_int i, one = 1; 59 | aa_int verbosity = VERBOSITY; 60 | aa_float neg_step_size = -STEPSIZE; 61 | aa_float safeguard_tolerance = SAFEGUARD_TOLERANCE; 62 | aa_float max_aa_norm = MAX_AA_NORM; 63 | aa_float err = 0; 64 | aa_float regularization; 65 | aa_float *x, *xprev, *Qhalf, *Q, zerof = 0.0, onef = 1.0; 66 | _timer aa_timer; 67 | aa_float aa_time = 0; 68 | x = (aa_float *)malloc(sizeof(aa_float) * n); 69 | xprev = (aa_float *)malloc(sizeof(aa_float) * n); 70 | Qhalf = (aa_float *)malloc(sizeof(aa_float) * n * n); 71 | Q = (aa_float *)malloc(sizeof(aa_float) * n * n); 72 | 73 | srand(seed); 74 | 75 | if (type1) { 76 | regularization = TYPE1_REGULARIZATION; 77 | } else { 78 | regularization = TYPE2_REGULARIZATION; 79 | } 80 | 81 | /* generate random data */ 82 | for (i = 0; i < n; i++) { 83 | x[i] = rand_float(); 84 | } 85 | for (i = 0; i < n * n; i++) { 86 | Qhalf[i] = rand_float(); 87 | } 88 | 89 | BLAS(gemm) 90 | ("Trans", "No", &n, &n, &n, &onef, Qhalf, &n, Qhalf, &n, &zerof, Q, &n); 91 | 92 | /* add small amount regularization */ 93 | for (i = 0; i < n; i++) { 94 | Q[i + i * n] += 1e-2; 95 | } 96 | 97 | AaWork *a = aa_init(n, memory, type1, regularization, relaxation, 98 | safeguard_tolerance, max_aa_norm, verbosity); 99 | for (i = 0; i < iters; i++) { 100 | if (i > 0) { 101 | _tic(&aa_timer); 102 | aa_apply(x, xprev, a); 103 | aa_time += _tocq(&aa_timer); 104 | } 105 | 106 | memcpy(xprev, x, sizeof(aa_float) * n); 107 | /* x = x - step_size * Q * xprev */ 108 | BLAS(gemv) 109 | ("No", &n, &n, &neg_step_size, Q, &n, xprev, &one, &onef, x, &one); 110 | 111 | _tic(&aa_timer); 112 | aa_safeguard(x, xprev, a); 113 | aa_time += _tocq(&aa_timer); 114 | 115 | err = BLAS(nrm2)(&n, x, &one); 116 | if (i % PRINT_INTERVAL == 0) { 117 | printf("Iter: %i, Err %.4e\n", i, err); 118 | } 119 | } 120 | printf("Iter: %i, Err %.4e\n", i, err); 121 | printf("AA time: %.4f seconds\n", aa_time / 1e3); 122 | aa_finish(a); 123 | free(Q); 124 | free(Qhalf); 125 | free(x); 126 | free(xprev); 127 | 128 | mu_assert_less("Failed to produce small error", err, 1e-6); 129 | 130 | return 0; 131 | } 132 | 133 | static const char *gd_type1_relax1(void) { 134 | return gd(1, 1.0); 135 | } 136 | 137 | static const char *gd_type1_relaxl1(void) { 138 | return gd(1, 0.98); 139 | } 140 | 141 | static const char *gd_type2_relax1(void) { 142 | return gd(0, 1.0); 143 | } 144 | 145 | static const char *gd_type2_relaxl1(void) { 146 | return gd(0, 0.98); 147 | } 148 | 149 | static const char *all_tests(void) { 150 | printf("type 1, relaxation 1.0\n"); 151 | mu_run_test(gd_type1_relax1); 152 | printf("type 1, relaxation < 1.0\n"); 153 | mu_run_test(gd_type1_relaxl1); 154 | printf("type 2, relaxation 1.0\n"); 155 | mu_run_test(gd_type2_relax1); 156 | printf("type 2, relaxation < 1.0\n"); 157 | mu_run_test(gd_type2_relaxl1); 158 | return 0; 159 | } 160 | 161 | int main(void) { 162 | const char *result = all_tests(); 163 | if (result != 0) { 164 | printf("%s\n", result); 165 | } else { 166 | printf("ALL TESTS PASSED\n"); 167 | } 168 | printf("Tests run: %d\n", tests_run); 169 | 170 | return result != 0; 171 | } 172 | -------------------------------------------------------------------------------- /src/aa.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Anderson acceleration. 3 | * 4 | * x: input iterate 5 | * x_prev: previous input iterate 6 | * f: f(x) output of map f applied to x 7 | * g: x - f (error) 8 | * g_prev: previous error 9 | * s: x - x_prev 10 | * y: g - g_prev 11 | * d: s - y = f - f_prev 12 | * 13 | * capital letters are the variables stacked columnwise 14 | * idx tracks current index where latest quantities written 15 | * idx cycles from left to right columns in matrix 16 | * 17 | * Type-I: 18 | * return f = f - (S - Y) * ( S'Y + r I)^{-1} ( S'g ) 19 | * 20 | * Type-II: 21 | * return f = f - (S - Y) * ( Y'Y + r I)^{-1} ( Y'g ) 22 | * 23 | */ 24 | 25 | #include "aa.h" 26 | #include "aa_blas.h" 27 | 28 | #define MAX(a, b) (((a) > (b)) ? (a) : (b)) 29 | #define MIN(a, b) (((a) < (b)) ? (a) : (b)) 30 | #define FILL_MEMORY_BEFORE_SOLVE (1) 31 | 32 | #if PROFILING > 0 33 | 34 | #define TIME_TIC \ 35 | timer __t; \ 36 | tic(&__t); 37 | #define TIME_TOC toc(__func__, &__t); 38 | 39 | #include 40 | typedef struct timer { 41 | struct timespec tic; 42 | struct timespec toc; 43 | } timer; 44 | 45 | void tic(timer *t) { 46 | clock_gettime(CLOCK_MONOTONIC, &t->tic); 47 | } 48 | 49 | aa_float tocq(timer *t) { 50 | struct timespec temp; 51 | 52 | clock_gettime(CLOCK_MONOTONIC, &t->toc); 53 | 54 | if ((t->toc.tv_nsec - t->tic.tv_nsec) < 0) { 55 | temp.tv_sec = t->toc.tv_sec - t->tic.tv_sec - 1; 56 | temp.tv_nsec = 1e9 + t->toc.tv_nsec - t->tic.tv_nsec; 57 | } else { 58 | temp.tv_sec = t->toc.tv_sec - t->tic.tv_sec; 59 | temp.tv_nsec = t->toc.tv_nsec - t->tic.tv_nsec; 60 | } 61 | return (aa_float)temp.tv_sec * 1e3 + (aa_float)temp.tv_nsec / 1e6; 62 | } 63 | 64 | aa_float toc(const char *str, timer *t) { 65 | aa_float time = tocq(t); 66 | printf("%s - time: %8.4f milli-seconds.\n", str, time); 67 | return time; 68 | } 69 | 70 | #else 71 | 72 | #define TIME_TIC 73 | #define TIME_TOC 74 | 75 | #endif 76 | 77 | /* This file uses Anderson acceleration to improve the convergence of 78 | * a fixed point mapping. 79 | * At each iteration we need to solve a (small) linear system, we 80 | * do this using LAPACK ?gesv. 81 | */ 82 | 83 | /* contains the necessary parameters to perform aa at each step */ 84 | struct ACCEL_WORK { 85 | aa_int type1; /* bool, if true type 1 aa otherwise type 2 */ 86 | aa_int mem; /* aa memory */ 87 | aa_int dim; /* variable dimension */ 88 | aa_int iter; /* current iteration */ 89 | aa_int verbosity; /* verbosity level, 0 is no printing */ 90 | aa_int success; /* was the last AA step successful or not */ 91 | 92 | aa_float relaxation; /* relaxation x and f, beta in some papers */ 93 | aa_float regularization; /* regularization */ 94 | aa_float safeguard_factor; /* safeguard tolerance factor */ 95 | aa_float max_weight_norm; /* maximum norm of AA weights */ 96 | 97 | aa_float *x; /* x input to map*/ 98 | aa_float *f; /* f(x) output of map */ 99 | aa_float *g; /* x - f(x) */ 100 | aa_float norm_g; /* ||x - f(x)|| */ 101 | 102 | /* from previous iteration */ 103 | aa_float *g_prev; /* x_prev - f(x_prev) */ 104 | 105 | aa_float *y; /* g - g_prev */ 106 | aa_float *s; /* x - x_prev */ 107 | aa_float *d; /* f - f_prev */ 108 | 109 | aa_float *Y; /* matrix of stacked y values */ 110 | aa_float *S; /* matrix of stacked s values */ 111 | aa_float *D; /* matrix of stacked d values = (S-Y) */ 112 | aa_float *M; /* S'Y or Y'Y depending on type of aa */ 113 | 114 | /* workspace variables */ 115 | aa_float *work; /* scratch space */ 116 | blas_int *ipiv; /* permutation variable, not used after solve */ 117 | 118 | aa_float *x_work; /* workspace (= x) for when relaxation != 1.0 */ 119 | }; 120 | 121 | /* add regularization dependent on Y and S matrices */ 122 | static aa_float compute_regularization(AaWork *a, aa_int len) { 123 | /* typically type-I does better with higher regularization than type-II */ 124 | TIME_TIC 125 | aa_float r, nrm_m; 126 | blas_int btotal = (blas_int)(len * len), one = 1; 127 | nrm_m = BLAS(nrm2)(&btotal, a->M, &one); 128 | r = a->regularization * nrm_m; 129 | if (a->verbosity > 2) { 130 | printf("iter: %i, norm: M %.2e, r: %.2e\n", (int)a->iter, nrm_m, r); 131 | } 132 | TIME_TOC 133 | return r; 134 | } 135 | 136 | /* sets a->M to S'Y or Y'Y depending on type of aa used */ 137 | /* M is len x len after this */ 138 | static void set_m(AaWork *a, aa_int len) { 139 | TIME_TIC 140 | aa_int i; 141 | blas_int bdim = (blas_int)(a->dim); 142 | blas_int blen = (blas_int)len; 143 | aa_float onef = 1.0, zerof = 0.0, r; 144 | /* if len < mem this only uses len cols */ 145 | BLAS(gemm) 146 | ("Trans", "No", &blen, &blen, &bdim, &onef, a->type1 ? a->S : a->Y, &bdim, 147 | a->Y, &bdim, &zerof, a->M, &blen); 148 | if (a->regularization > 0) { 149 | r = compute_regularization(a, len); 150 | for (i = 0; i < len; ++i) { 151 | a->M[i + len * i] += r; 152 | } 153 | } 154 | TIME_TOC 155 | } 156 | 157 | /* initialize accel params, in particular x_prev, f_prev, g_prev */ 158 | static void init_accel_params(const aa_float *x, const aa_float *f, AaWork *a) { 159 | TIME_TIC 160 | blas_int bdim = (blas_int)a->dim; 161 | aa_float neg_onef = -1.0; 162 | blas_int one = 1; 163 | /* x_prev = x */ 164 | memcpy(a->x, x, sizeof(aa_float) * a->dim); 165 | /* f_prev = f */ 166 | memcpy(a->f, f, sizeof(aa_float) * a->dim); 167 | /* g_prev = x */ 168 | memcpy(a->g_prev, x, sizeof(aa_float) * a->dim); 169 | /* g_prev = x_prev - f_prev */ 170 | BLAS(axpy)(&bdim, &neg_onef, f, &one, a->g_prev, &one); 171 | TIME_TOC 172 | } 173 | 174 | /* updates the workspace parameters for aa for this iteration */ 175 | static void update_accel_params(const aa_float *x, const aa_float *f, AaWork *a, 176 | aa_int len) { 177 | /* at the start a->x = x_prev and a->f = f_prev */ 178 | TIME_TIC 179 | aa_int idx = (a->iter - 1) % a->mem; 180 | blas_int one = 1; 181 | blas_int bdim = (blas_int)a->dim; 182 | aa_float neg_onef = -1.0; 183 | 184 | /* g = x */ 185 | memcpy(a->g, x, sizeof(aa_float) * a->dim); 186 | /* s = x */ 187 | memcpy(a->s, x, sizeof(aa_float) * a->dim); 188 | /* d = f */ 189 | memcpy(a->d, f, sizeof(aa_float) * a->dim); 190 | /* g = x - f */ 191 | BLAS(axpy)(&bdim, &neg_onef, f, &one, a->g, &one); 192 | /* s = x - x_prev */ 193 | BLAS(axpy)(&bdim, &neg_onef, a->x, &one, a->s, &one); 194 | /* d = f - f_prev */ 195 | BLAS(axpy)(&bdim, &neg_onef, a->f, &one, a->d, &one); 196 | 197 | /* g, s, d correct here */ 198 | 199 | /* y = g */ 200 | memcpy(a->y, a->g, sizeof(aa_float) * a->dim); 201 | /* y = g - g_prev */ 202 | BLAS(axpy)(&bdim, &neg_onef, a->g_prev, &one, a->y, &one); 203 | 204 | /* y correct here */ 205 | 206 | /* copy y into idx col of Y */ 207 | memcpy(&(a->Y[idx * a->dim]), a->y, sizeof(aa_float) * a->dim); 208 | /* copy s into idx col of S */ 209 | memcpy(&(a->S[idx * a->dim]), a->s, sizeof(aa_float) * a->dim); 210 | /* copy d into idx col of D */ 211 | memcpy(&(a->D[idx * a->dim]), a->d, sizeof(aa_float) * a->dim); 212 | 213 | /* Y, S, D correct here */ 214 | 215 | /* set a->f and a->x for next iter (x_prev and f_prev) */ 216 | memcpy(a->f, f, sizeof(aa_float) * a->dim); 217 | memcpy(a->x, x, sizeof(aa_float) * a->dim); 218 | 219 | /* workspace for when relaxation != 1.0 */ 220 | if (a->x_work) { 221 | memcpy(a->x_work, x, sizeof(aa_float) * a->dim); 222 | } 223 | 224 | /* x, f correct here */ 225 | 226 | memcpy(a->g_prev, a->g, sizeof(aa_float) * a->dim); 227 | /* g_prev set for next iter here */ 228 | 229 | /* compute ||g|| = ||f - x|| */ 230 | a->norm_g = BLAS(nrm2)(&bdim, a->g, &one); 231 | 232 | TIME_TOC 233 | } 234 | 235 | /* f = (1-relaxation) * \sum_i a_i x_i + relaxation * \sum_i a_i f_i */ 236 | static void relax(aa_float *f, AaWork *a, aa_int len) { 237 | TIME_TIC 238 | /* x_work = x initially */ 239 | blas_int bdim = (blas_int)(a->dim), one = 1, blen = (blas_int)len; 240 | aa_float onef = 1.0, neg_onef = -1.0; 241 | aa_float one_m_relaxation = 1. - a->relaxation; 242 | /* x_work = x - S * work */ 243 | BLAS(gemv) 244 | ("NoTrans", &bdim, &blen, &neg_onef, a->S, &bdim, a->work, &one, &onef, 245 | a->x_work, &one); 246 | /* f = relaxation * f */ 247 | BLAS(scal)(&bdim, &a->relaxation, f, &one); 248 | /* f += (1 - relaxation) * x_work */ 249 | BLAS(axpy)(&bdim, &one_m_relaxation, a->x_work, &one, f, &one); 250 | TIME_TOC 251 | } 252 | 253 | /* solves the system of equations to perform the AA update 254 | * at the end f contains the next iterate to be returned 255 | */ 256 | static aa_float solve(aa_float *f, AaWork *a, aa_int len) { 257 | TIME_TIC 258 | blas_int info = -1, bdim = (blas_int)(a->dim), one = 1, blen = (blas_int)len; 259 | aa_float onef = 1.0, zerof = 0.0, neg_onef = -1.0, aa_norm; 260 | 261 | /* work = S'g or Y'g */ 262 | BLAS(gemv) 263 | ("Trans", &bdim, &blen, &onef, a->type1 ? a->S : a->Y, &bdim, a->g, &one, 264 | &zerof, a->work, &one); 265 | 266 | /* work = M \ work, where update_accel_params has set M = S'Y or M = Y'Y */ 267 | BLAS(gesv)(&blen, &one, a->M, &blen, a->ipiv, a->work, &blen, &info); 268 | aa_norm = BLAS(nrm2)(&blen, a->work, &one); 269 | if (a->verbosity > 1) { 270 | printf("AA type %i, iter: %i, len %i, info: %i, aa_norm %.2e\n", 271 | a->type1 ? 1 : 2, (int)a->iter, (int)len, (int)info, aa_norm); 272 | } 273 | 274 | /* info < 0 input error, input > 0 matrix is singular */ 275 | if (info != 0 || aa_norm >= a->max_weight_norm) { 276 | if (a->verbosity > 0) { 277 | printf("Error in AA type %i, iter: %i, len %i, info: %i, aa_norm %.2e\n", 278 | a->type1 ? 1 : 2, (int)a->iter, (int)len, (int)info, aa_norm); 279 | } 280 | a->success = 0; 281 | /* reset aa for stability */ 282 | aa_reset(a); 283 | TIME_TOC 284 | return -aa_norm; 285 | } 286 | 287 | /* here work = gamma, ie, the correct AA shifted weights */ 288 | /* if solve was successful compute new point */ 289 | 290 | /* first set f -= D * work */ 291 | BLAS(gemv) 292 | ("NoTrans", &bdim, &blen, &neg_onef, a->D, &bdim, a->work, &one, &onef, f, 293 | &one); 294 | 295 | /* if relaxation is not 1 then need to incorporate */ 296 | if (a->relaxation != 1.0) { 297 | relax(f, a, len); 298 | } 299 | 300 | a->success = 1; /* this should be the only place we set success = 1 */ 301 | TIME_TOC 302 | return aa_norm; 303 | } 304 | 305 | /* 306 | * API functions below this line, see aa.h for descriptions. 307 | */ 308 | AaWork *aa_init(aa_int dim, aa_int mem, aa_int type1, aa_float regularization, 309 | aa_float relaxation, aa_float safeguard_factor, 310 | aa_float max_weight_norm, aa_int verbosity) { 311 | TIME_TIC 312 | AaWork *a = (AaWork *)calloc(1, sizeof(AaWork)); 313 | if (!a) { 314 | printf("Failed to allocate memory for AA.\n"); 315 | return (AaWork *)0; 316 | } 317 | a->type1 = type1; 318 | a->iter = 0; 319 | a->dim = dim; 320 | a->mem = MIN(mem, dim); /* for rank stability */ 321 | a->regularization = regularization; 322 | a->relaxation = relaxation; 323 | a->safeguard_factor = safeguard_factor; 324 | a->max_weight_norm = max_weight_norm; 325 | a->success = 0; 326 | a->verbosity = verbosity; 327 | if (a->mem <= 0) { 328 | return a; 329 | } 330 | 331 | a->x = (aa_float *)calloc(a->dim, sizeof(aa_float)); 332 | a->f = (aa_float *)calloc(a->dim, sizeof(aa_float)); 333 | a->g = (aa_float *)calloc(a->dim, sizeof(aa_float)); 334 | 335 | a->g_prev = (aa_float *)calloc(a->dim, sizeof(aa_float)); 336 | 337 | a->y = (aa_float *)calloc(a->dim, sizeof(aa_float)); 338 | a->s = (aa_float *)calloc(a->dim, sizeof(aa_float)); 339 | a->d = (aa_float *)calloc(a->dim, sizeof(aa_float)); 340 | 341 | a->Y = (aa_float *)calloc(a->dim * a->mem, sizeof(aa_float)); 342 | a->S = (aa_float *)calloc(a->dim * a->mem, sizeof(aa_float)); 343 | a->D = (aa_float *)calloc(a->dim * a->mem, sizeof(aa_float)); 344 | 345 | a->M = (aa_float *)calloc(a->mem * a->mem, sizeof(aa_float)); 346 | a->work = (aa_float *)calloc(MAX(a->mem, a->dim), sizeof(aa_float)); 347 | a->ipiv = (blas_int *)calloc(a->mem, sizeof(blas_int)); 348 | 349 | if (relaxation != 1.0) { 350 | a->x_work = (aa_float *)calloc(a->dim, sizeof(aa_float)); 351 | } else { 352 | a->x_work = 0; 353 | } 354 | TIME_TOC 355 | return a; 356 | } 357 | 358 | aa_float aa_apply(aa_float *f, const aa_float *x, AaWork *a) { 359 | TIME_TIC 360 | aa_float aa_norm = 0; 361 | aa_int len = MIN(a->iter, a->mem); 362 | a->success = 0; /* if we make an AA step we set this to 1 later */ 363 | if (a->mem <= 0) { 364 | TIME_TOC 365 | return aa_norm; /* 0 */ 366 | } 367 | if (a->iter == 0) { 368 | /* if first iteration then seed params for next iter */ 369 | init_accel_params(x, f, a); 370 | a->iter++; 371 | TIME_TOC 372 | return aa_norm; /* 0 */ 373 | } 374 | /* set various accel quantities */ 375 | update_accel_params(x, f, a, len); 376 | 377 | /* only perform solve steps when the memory is full */ 378 | if (!FILL_MEMORY_BEFORE_SOLVE || a->iter >= a->mem) { 379 | /* set M = S'Y or Y'Y depending on type of aa used */ 380 | set_m(a, len); 381 | /* solve linear system, new point overwrites f if successful */ 382 | aa_norm = solve(f, a, len); 383 | } 384 | a->iter++; 385 | TIME_TOC 386 | return aa_norm; 387 | } 388 | 389 | aa_int aa_safeguard(aa_float *f_new, aa_float *x_new, AaWork *a) { 390 | TIME_TIC 391 | blas_int bdim = (blas_int)a->dim; 392 | blas_int one = 1; 393 | aa_float neg_onef = -1.0; 394 | aa_float norm_diff; 395 | if (!a->success) { 396 | /* last AA update was not successful, no need for safeguarding */ 397 | TIME_TOC 398 | return 0; 399 | } 400 | 401 | /* reset success indicator in case safeguarding called multiple times */ 402 | a->success = 0; 403 | 404 | /* work = x_new */ 405 | memcpy(a->work, x_new, a->dim * sizeof(aa_float)); 406 | /* work = x_new - f_new */ 407 | BLAS(axpy)(&bdim, &neg_onef, f_new, &one, a->work, &one); 408 | /* norm_diff = || f_new - x_new || */ 409 | norm_diff = BLAS(nrm2)(&bdim, a->work, &one); 410 | /* g = f - x */ 411 | if (norm_diff > a->safeguard_factor * a->norm_g) { 412 | /* in this case we reject the AA step and reset */ 413 | memcpy(f_new, a->f, a->dim * sizeof(aa_float)); 414 | memcpy(x_new, a->x, a->dim * sizeof(aa_float)); 415 | if (a->verbosity > 0) { 416 | printf("AA rejection, iter: %i, norm_diff %.4e, prev_norm_diff %.4e\n", 417 | (int)a->iter, norm_diff, a->norm_g); 418 | } 419 | aa_reset(a); 420 | TIME_TOC 421 | return -1; 422 | } 423 | TIME_TOC 424 | return 0; 425 | } 426 | 427 | void aa_finish(AaWork *a) { 428 | if (a) { 429 | free(a->x); 430 | free(a->f); 431 | free(a->g); 432 | free(a->g_prev); 433 | free(a->y); 434 | free(a->s); 435 | free(a->d); 436 | free(a->Y); 437 | free(a->S); 438 | free(a->D); 439 | free(a->M); 440 | free(a->work); 441 | free(a->ipiv); 442 | if (a->x_work) { 443 | free(a->x_work); 444 | } 445 | free(a); 446 | } 447 | } 448 | 449 | void aa_reset(AaWork *a) { 450 | /* to reset we simply set a->iter = 0 */ 451 | if (a->verbosity > 0) { 452 | printf("AA reset.\n"); 453 | } 454 | a->iter = 0; 455 | } 456 | -------------------------------------------------------------------------------- /python/aa.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import aa as aa\n", 11 | "import matplotlib.pyplot as plt\n", 12 | "import scipy.linalg as la\n", 13 | "plt.style.use('classic')" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 2, 19 | "metadata": {}, 20 | "outputs": [ 21 | { 22 | "name": "stderr", 23 | "output_type": "stream", 24 | "text": [ 25 | "/Users/bodonoghue/miniconda2/envs/python37/lib/python3.7/site-packages/ipykernel_launcher.py:9: FutureWarning: `rcond` parameter will change to the default of machine precision times ``max(M, N)`` where M and N are the input matrix dimensions.\n", 26 | "To use the future default and silence this warning we advise to pass `rcond=None`, to keep using the old, explicitly pass `rcond=-1`.\n", 27 | " if __name__ == '__main__':\n" 28 | ] 29 | }, 30 | { 31 | "data": { 32 | "image/png": "\n", 33 | "text/plain": [ 34 | "
" 35 | ] 36 | }, 37 | "metadata": {}, 38 | "output_type": "display_data" 39 | } 40 | ], 41 | "source": [ 42 | "# Solving least squarse min_x ||Ax - b||^2\n", 43 | "\n", 44 | "np.random.seed(123)\n", 45 | "\n", 46 | "n = 100\n", 47 | "m = 300\n", 48 | "A = np.random.randn(m,n)\n", 49 | "b = np.random.randn(m,1)\n", 50 | "x_star = np.linalg.lstsq(A, b)[0]\n", 51 | "f_star = 0.5 * (np.linalg.norm(A.dot(x_star) - b) ** 2)\n", 52 | "N = 300\n", 53 | "t = 0.002\n", 54 | "\n", 55 | "gs_vanilla = []\n", 56 | "gs_aa_1 = []\n", 57 | "gs_aa_2 = []\n", 58 | "\n", 59 | "x = np.zeros((n,1))\n", 60 | "for i in range(N):\n", 61 | " x -= t * A.T.dot(A.dot(x) - b)\n", 62 | " gs_vanilla.append(np.linalg.norm( A.T.dot(A.dot(x) - b)))\n", 63 | " \n", 64 | "\n", 65 | "aa_mem = 5\n", 66 | "\n", 67 | "aa_wrk = aa.AndersonAccelerator(n, aa_mem, True, regularization=0, max_weight_norm=1e12)\n", 68 | "x = np.zeros((n,1))\n", 69 | "for i in range(N):\n", 70 | " if i > 0: aa_wrk.apply(x, x_prev)\n", 71 | " x_prev = np.copy(x)\n", 72 | " x -= t * A.T.dot(A.dot(x) - b)\n", 73 | " aa_wrk.safeguard(x, x_prev)\n", 74 | " gs_aa_1.append(np.linalg.norm( A.T.dot(A.dot(x) - b)))\n", 75 | "\n", 76 | "aa_wrk = aa.AndersonAccelerator(n, aa_mem, False, regularization=0, max_weight_norm=1e12)\n", 77 | "x = np.zeros((n,1))\n", 78 | "for i in range(N):\n", 79 | " if i > 0: aa_wrk.apply(x, x_prev)\n", 80 | " x_prev = np.copy(x)\n", 81 | " x -= t * A.T.dot(A.dot(x) - b)\n", 82 | " aa_wrk.safeguard(x, x_prev)\n", 83 | " gs_aa_2.append(np.linalg.norm( A.T.dot(A.dot(x) - b)))\n", 84 | "\n", 85 | " \n", 86 | "plt.semilogy(gs_vanilla, label='grad desc')\n", 87 | "plt.semilogy(gs_aa_1, label='AA-I')\n", 88 | "plt.semilogy(gs_aa_2, label='AA-II')\n", 89 | "\n", 90 | "plt.legend()\n", 91 | "plt.show()\n" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": {}, 98 | "outputs": [], 99 | "source": [ 100 | "# l1 regularized least squares\n", 101 | "\n", 102 | "np.random.seed(123)\n", 103 | "\n", 104 | "n = 300\n", 105 | "m = 100\n", 106 | "mu = 0.1\n", 107 | "rho = 0.1\n", 108 | "N = 10_000\n", 109 | "\n", 110 | "A = np.random.randn(m,n)\n", 111 | "b = np.random.randn(m,1)\n", 112 | "\n", 113 | "L = la.cho_factor(A.T.dot(A) + rho * np.identity(n))\n", 114 | "\n", 115 | "def soft_thresh(y, t):\n", 116 | " return np.sign(y) * np.maximum(abs(y) - t, 0)\n", 117 | "\n", 118 | " \n", 119 | "z0 = np.linalg.solve(A.T.dot(A), A.T.dot(b))\n", 120 | "\n", 121 | "z = z0\n", 122 | "lam = np.zeros((n,1))\n", 123 | "res_vanilla = []\n", 124 | "ds_vanilla = []\n", 125 | "for i in range(N):\n", 126 | " z_old = z\n", 127 | " x = la.cho_solve(L, rho * (z + lam) + A.T.dot(b))\n", 128 | " z = soft_thresh(x - lam, mu / rho)\n", 129 | " lam = lam - x + z\n", 130 | " res_vanilla.append(np.linalg.norm(x-z))\n", 131 | " ds_vanilla.append(np.linalg.norm(z - z_old))\n", 132 | "\n", 133 | " \n", 134 | "aa_mem = 5\n", 135 | "\n", 136 | "print(\"====================== Type - I ======================\")\n", 137 | "z = z0\n", 138 | "lam = np.zeros((n,1))\n", 139 | "u = np.vstack((z,lam))\n", 140 | "aa_wrk = aa.AndersonAccelerator(2 * n, aa_mem, True, regularization=1e-6, safeguard_factor=1, verbosity=1, max_weight_norm=1e12)\n", 141 | "res_aa_1 = []\n", 142 | "ds_aa_1 = []\n", 143 | "for i in range(N):\n", 144 | " if i > 0: aa_wrk.apply(u, u_old)\n", 145 | " u_old = np.copy(u)\n", 146 | " x = la.cho_solve(L, rho * (z + lam) + A.T.dot(b))\n", 147 | " z = soft_thresh(x - lam, mu / rho)\n", 148 | " lam = lam - x + z\n", 149 | " u = np.vstack((z, lam))\n", 150 | " aa_wrk.safeguard(u, u_old)\n", 151 | " z = u[:n]\n", 152 | " z_old = u_old[:n]\n", 153 | " lam = u[n:]\n", 154 | " \n", 155 | " res_aa_1.append(np.linalg.norm(la.cho_solve(L, rho * (z + lam) + A.T.dot(b)) - z))\n", 156 | " ds_aa_1.append(np.linalg.norm(z - z_old))\n", 157 | "\n", 158 | "print(\"====================== Type - II ======================\")\n", 159 | "z = z0\n", 160 | "lam = np.zeros((n,1))\n", 161 | "u = np.vstack((z,lam))\n", 162 | "aa_wrk = aa.AndersonAccelerator(2 * n, aa_mem, False, regularization=1e-12, safeguard_factor=1, verbosity=1, max_weight_norm=1e12)\n", 163 | "res_aa_2 = []\n", 164 | "ds_aa_2 = []\n", 165 | "for i in range(N):\n", 166 | " if i > 0: aa_wrk.apply(u, u_old)\n", 167 | " u_old = np.copy(u)\n", 168 | " x = la.cho_solve(L, rho * (z + lam) + A.T.dot(b))\n", 169 | " z = soft_thresh(x - lam, mu / rho)\n", 170 | " lam = lam - x + z\n", 171 | " u = np.vstack((z, lam))\n", 172 | " aa_wrk.safeguard(u, u_old)\n", 173 | " z = u[:n]\n", 174 | " z_old = u_old[:n]\n", 175 | " lam = u[n:]\n", 176 | " \n", 177 | " res_aa_2.append(np.linalg.norm(la.cho_solve(L, rho * (z + lam) + A.T.dot(b)) - z))\n", 178 | " ds_aa_2.append(np.linalg.norm(z - z_old))\n", 179 | "\n", 180 | "\n", 181 | "plt.semilogy(res_vanilla, label='admm - res')\n", 182 | "plt.semilogy(ds_vanilla, label='admm - dual')\n", 183 | "plt.semilogy(res_aa_1, label='AA-I - res')\n", 184 | "plt.semilogy(ds_aa_1, label='AA-I - dual')\n", 185 | "plt.semilogy(res_aa_2, label='AA-II - res')\n", 186 | "plt.semilogy(ds_aa_2, label='AA-II -dual')\n", 187 | "#plt.semilogy(res_aa_1_p, label='AA-I-P - res')\n", 188 | "#plt.semilogy(ds_aa_1_p, label='AA-I-P - dual')\n", 189 | "\n", 190 | "plt.legend()\n", 191 | "plt.show()" 192 | ] 193 | } 194 | ], 195 | "metadata": { 196 | "kernelspec": { 197 | "display_name": "Python 3 (ipykernel)", 198 | "language": "python", 199 | "name": "python3" 200 | }, 201 | "language_info": { 202 | "codemirror_mode": { 203 | "name": "ipython", 204 | "version": 3 205 | }, 206 | "file_extension": ".py", 207 | "mimetype": "text/x-python", 208 | "name": "python", 209 | "nbconvert_exporter": "python", 210 | "pygments_lexer": "ipython3", 211 | "version": "3.7.3" 212 | } 213 | }, 214 | "nbformat": 4, 215 | "nbformat_minor": 2 216 | } 217 | --------------------------------------------------------------------------------