├── src
    ├── ch20.py
    ├── figures
    │   ├── ch06_figures.py
    │   ├── ch03_figures.py
    │   ├── ch10_figures.py
    │   ├── ch05_figures.py
    │   ├── ch02_figures.py
    │   ├── ch04_figures.py
    │   ├── ch07_figures.py
    │   ├── ch08_figures.py
    │   └── ch09_figures.py
    ├── examples
    │   ├── ch20_examples.py
    │   ├── ch10_examples.py
    │   ├── ch11_examples.py
    │   ├── ch05_examples.py
    │   ├── ch06_examples.py
    │   ├── ch09_examples.py
    │   ├── ch03_examples.py
    │   ├── ch04_examples.py
    │   ├── ch01_examples.py
    │   ├── ch07_examples.py
    │   ├── ch02_examples.py
    │   ├── ch21_examples.py
    │   └── ch08_examples.py
    ├── exercises
    │   ├── ch10_exercises.py
    │   ├── ch21_exercises.py
    │   ├── ch03_exercises.py
    │   ├── ch05_exercises.py
    │   └── ch06_exercises.py
    ├── tests
    │   ├── ch10
    │   │   └── test_constrained_optimization_methods.py
    │   ├── ch09
    │   │   ├── test_genetic_methods.py
    │   │   └── test_population_methods.py
    │   ├── ch11
    │   │   └── test_linear_program.py
    │   ├── ch02
    │   │   └── test_finite_difference_methods.py
    │   ├── ch21
    │   │   └── test_gauss_seidel.py
    │   ├── ch07
    │   │   ├── test_direct.py
    │   │   └── test_direct_methods.py
    │   ├── ch04
    │   │   └── test_local_descent_methods.py
    │   ├── test_testfunctions.py
    │   ├── ch06
    │   │   └── test_second_order_methods.py
    │   ├── ch08
    │   │   └── test_stochastic_methods.py
    │   ├── ch03
    │   │   └── test_bracketing_methods.py
    │   └── ch05
    │   │   └── test_first_order_methods.py
    ├── ch21.py
    ├── ch02.py
    ├── ch10.py
    ├── ch15.py
    ├── convenience.py
    ├── ch18.py
    ├── ch06.py
    ├── ch12.py
    ├── ch16.py
    ├── ch11.py
    ├── ch04.py
    ├── ch03.py
    ├── ch13.py
    ├── ch05.py
    ├── ch14.py
    ├── ch19.py
    ├── ch07.py
    └── ch09.py
├── requirements.txt
├── LICENSE
├── .github
    └── workflows
    │   └── python-package.yml
└── README.md


/src/ch20.py:
--------------------------------------------------------------------------------
1 | """Chapter 20: Expression Optimization"""


--------------------------------------------------------------------------------
/src/figures/ch06_figures.py:
--------------------------------------------------------------------------------
1 | # TODO - Figure 6.4
2 | 


--------------------------------------------------------------------------------
/src/examples/ch20_examples.py:
--------------------------------------------------------------------------------
1 | # from pyparsing import *
2 | 
3 | # TODO
4 | 


--------------------------------------------------------------------------------
/src/exercises/ch10_exercises.py:
--------------------------------------------------------------------------------
1 | # TODO - Exercise 10.1
2 | # TODO - Exercise 10.9
3 | # TODO - Exercise 10.13


--------------------------------------------------------------------------------
/src/examples/ch10_examples.py:
--------------------------------------------------------------------------------
1 | # TODO - Example 10.1
2 | # TODO - Example 10.3
3 | # TODO - Example 10.5
4 | # TODO - Example 10.6


--------------------------------------------------------------------------------
/src/examples/ch11_examples.py:
--------------------------------------------------------------------------------
1 | # TODO - Example 11.3
2 | # TODO - Example 11.4
3 | # TODO - Example 11.6
4 | # TODO - Example 11.7
5 | # TODO - Example 11.8
6 | # TODO - Example 11.9


--------------------------------------------------------------------------------
/src/exercises/ch21_exercises.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import numpy as np
3 | 
4 | from copy import deepcopy
5 | 
6 | # TODO - Exercise 21.4 - Need help from Mykel to understand it properly.


--------------------------------------------------------------------------------
/src/figures/ch03_figures.py:
--------------------------------------------------------------------------------
1 | # TODO - Figure 3.2
2 | # TODO - Figure 3.8
3 | # TODO - Figure 3.9
4 | # TODO - Figure 3.11
5 | # TODO - Figure 3.12
6 | # TODO - Figure 3.13
7 | # TODO - Figure 3.14
8 | # TODO - Figure 3.16
9 | 


--------------------------------------------------------------------------------
/src/figures/ch10_figures.py:
--------------------------------------------------------------------------------
 1 | # TODO - Figure 10.2
 2 | # TODO - Figure 10.4
 3 | # TODO - Figure 10.7
 4 | # TODO - Figure 10.8
 5 | # TODO - Figure 10.9
 6 | # TODO - Figure 10.10
 7 | # TODO - Figure 10.11
 8 | # TODO - Figure 10.12
 9 | # TODO - Figure 10.13
10 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | cvxpy==1.4.1
 2 | matplotlib==3.8.2
 3 | networkx==3.2.1
 4 | numdifftools==0.9.41
 5 | numpy==1.26.1
 6 | primePy==1.3
 7 | # pyparsing==3.1.1  # TODO: Testing to see if it works for ExprRules.jl
 8 | scipy==1.12.0
 9 | sympy==1.12
10 | # tensorflow==2.15.0  # NOTE: Only needed for an example right now; not necessary for tests


--------------------------------------------------------------------------------
/src/tests/ch10/test_constrained_optimization_methods.py:
--------------------------------------------------------------------------------
 1 | import sys; sys.path.append('./src/'); sys.path.append('../../')
 2 | 
 3 | import numpy as np
 4 | 
 5 | from ch10 import *
 6 | 
 7 | 
 8 | class TestConstrainedOptimizationMethods():
 9 |     def test_penalty_method(self):
10 |         pass
11 | 
12 |     def test_augmented_lagrange_method(self):
13 |         pass
14 | 
15 |     def test_interior_point_method(self):
16 |         pass


--------------------------------------------------------------------------------
/src/tests/ch09/test_genetic_methods.py:
--------------------------------------------------------------------------------
 1 | import sys; sys.path.append('./src/'); sys.path.append('../../')
 2 | 
 3 | import numpy as np
 4 | 
 5 | from ch09 import *
 6 | 
 7 | 
 8 | class TestGeneticMethods():
 9 |     def test_genetic_algorithm(self):
10 |         pass
11 | 
12 |     def test_selection_methods(self):
13 |         pass
14 | 
15 |     def test_crossover_methods(self):
16 |         pass
17 | 
18 |     def test_mutation_methods(self):
19 |         pass


--------------------------------------------------------------------------------
/src/tests/ch09/test_population_methods.py:
--------------------------------------------------------------------------------
 1 | import sys; sys.path.append('./src/'); sys.path.append('../../')
 2 | 
 3 | import numpy as np
 4 | 
 5 | from ch09 import *
 6 | 
 7 | 
 8 | class TestPopulationMethods():
 9 |     def test_differential_evolution(self):
10 |         pass
11 | 
12 |     def test_particle_swarm_optimization(self):
13 |         pass
14 | 
15 |     def test_firefly(self):
16 |         pass
17 | 
18 |     def test_cuckoo_search(self):
19 |         pass
20 | 


--------------------------------------------------------------------------------
/src/examples/ch05_examples.py:
--------------------------------------------------------------------------------
 1 | import sys; sys.path.append('../')
 2 | 
 3 | import numpy as np
 4 | 
 5 | 
 6 | def example_5_1():
 7 |     """Example 5.1: Computing the gradient descent direction."""
 8 |     def f(x): return x[0]*(x[1]**2)
 9 |     def grad_f(x): return np.array([x[1]**2, 2*x[0]*x[1]])
10 |     x = np.array([1.0, 2.0])
11 |     d = -grad_f(x)
12 | 
13 |     print("Unnormalized descent direction: d = ", d)
14 |     print("Normalized descent direction: d = ", d/np.linalg.norm(d))
15 | 


--------------------------------------------------------------------------------
/src/examples/ch06_examples.py:
--------------------------------------------------------------------------------
 1 | import sys; sys.path.append('../')
 2 | 
 3 | import numpy as np
 4 | 
 5 | from ch06 import newtons_method
 6 | from TestFunctions import booth
 7 | 
 8 | 
 9 | def example_6_1():
10 |     """Example 6.1: Newton's method used to minimize Booth's function"""
11 |     x = np.array([9.0, 8.0])
12 |     x_prime = newtons_method(booth.grad, booth.hess, x, eps=1e-5, k_max=1)
13 | 
14 |     print("After 1 iteration of Newton's Method, x = ", x_prime)
15 |     print("Gradient at x: ", booth.grad(x))
16 | 


--------------------------------------------------------------------------------
/src/tests/ch11/test_linear_program.py:
--------------------------------------------------------------------------------
 1 | import sys; sys.path.append('./src/'); sys.path.append('../../')
 2 | 
 3 | import numpy as np
 4 | 
 5 | from ch11 import LinearProgram
 6 | 
 7 | 
 8 | class TestLinearProgram():
 9 |     def test_get_vertex(self):
10 |         pass
11 | 
12 |     def test_edge_transition(self):
13 |         pass
14 | 
15 |     def test_step(self):
16 |         pass
17 | 
18 |     def test_minimize_given_vertex_partition(self):
19 |         pass
20 | 
21 |     def test_minimize(self):
22 |         pass
23 | 
24 |     def test_dual_certificate(self):
25 |         pass
26 | 
27 |     def test_minimize_lp_and_y(self):
28 |         pass


--------------------------------------------------------------------------------
/src/exercises/ch03_exercises.py:
--------------------------------------------------------------------------------
 1 | import sys; sys.path.append('../')
 2 | 
 3 | import numpy as np
 4 | 
 5 | 
 6 | def exercise_3_4():
 7 |     """Exercise 3.4: Applying Bisection to f(x) = x^2/2 - x, starting with [0, 1000]"""
 8 |     def f_prime(x): return x - 1
 9 |     a, b = 0.0, 1000.0
10 |     y_a, y_b = f_prime(a), f_prime(b)
11 | 
12 |     for i in range(3):  # Execute 3 steps of the algorithm
13 |         x = (a + b) / 2
14 |         y = f_prime(x)
15 |         if y == 0:
16 |             a, b = x, x
17 |         elif np.sign(y) == np.sign(y_a):
18 |             a = x
19 |         else:
20 |             b = x
21 |         print("Iteration " + str(i + 1) + ": ", (a, b))
22 | 


--------------------------------------------------------------------------------
/src/tests/ch02/test_finite_difference_methods.py:
--------------------------------------------------------------------------------
 1 | import sys; sys.path.append('./src/'); sys.path.append('../../')
 2 | 
 3 | import numpy as np
 4 | 
 5 | from ch02 import diff_forward, diff_central, diff_backward, diff_complex
 6 | 
 7 | 
 8 | class TestFiniteDifferenceMethods():
 9 |     tol = 1e-7
10 | 
11 |     def test_diff_forward(self):
12 |         self.run_test_finite_difference_method(diff_forward)
13 | 
14 |     def test_diff_central(self):
15 |         self.run_test_finite_difference_method(diff_central)
16 | 
17 |     def test_diff_backward(self):
18 |         self.run_test_finite_difference_method(diff_backward)
19 | 
20 |     def test_diff_complex(self):
21 |         self.run_test_finite_difference_method(diff_complex)
22 | 
23 |     def run_test_finite_difference_method(self, diff):
24 |         x = np.linspace(-100, 100, 1000)
25 |         assert np.all(np.abs(np.cos(x) - diff(np.sin, x)) < self.tol)
26 | 


--------------------------------------------------------------------------------
/src/tests/ch21/test_gauss_seidel.py:
--------------------------------------------------------------------------------
 1 | import sys; sys.path.append("./src"); sys.path.append("../../")
 2 | 
 3 | import numpy as np
 4 | 
 5 | from ch21 import gauss_seidel
 6 | 
 7 | 
 8 | class TestGaussSeidel():
 9 |     def F1(A):
10 |         A["y1"] = A["y2"] - A["x"]
11 |         return A
12 | 
13 |     def F2(A):
14 |         A["y2"] = np.sin(A["y1"] + A["y3"])
15 |         return A
16 |     
17 |     def F3(A):
18 |         A["y3"] = np.cos(A["x"] + A["y2"] + A["y1"])
19 | 
20 |     def test(self):
21 |         A = {"x": 1.0, "y1": 1.0, "y2": 1.0, "y3": 1.0}
22 |         A, converged = gauss_seidel([TestGaussSeidel.F1, TestGaussSeidel.F2, TestGaussSeidel.F3], A, k_max=100)
23 |         assert converged
24 |         assert np.isclose(A["y1"], -1.8795201143545137, atol=1e-8)
25 |         assert np.isclose(A["y2"], -0.8795468970115342, atol=1e-8)
26 |         assert np.isclose(A["y3"], -0.1871604183537351, atol=1e-8)
27 | 


--------------------------------------------------------------------------------
/src/examples/ch09_examples.py:
--------------------------------------------------------------------------------
 1 | import sys; sys.path.append('../')
 2 | 
 3 | import numpy as np
 4 | 
 5 | from ch09 import rand_population_uniform, genetic_algorithm,\
 6 |                  TruncationSelection, SinglePointCrossover, GaussianMutation
 7 | 
 8 | 
 9 | def example_9_1():
10 |     """
11 |     Example 9.1: Demonstration of using a genetic algorithm for optimizing a
12 |     simple function.
13 |     """
14 |     np.random.seed(0)
15 |     def f(x): return np.linalg.norm(x)
16 |     m = 100     # population size
17 |     k_max = 10  # number of iterations
18 |     population = rand_population_uniform(m, a=np.array([-3.0, -3.0]), b=np.array([3.0, 3.0]))
19 |     S = TruncationSelection(10)  # select top 10
20 |     C = SinglePointCrossover()
21 |     M = GaussianMutation(0.5)    # small mutation rate
22 |     x = genetic_algorithm(f, population, k_max, S, C, M)
23 |     print("x = ", x)
24 | 
25 | # TODO - Example 9.2 (Maybe eventually: need to construct the algorithm for Lamarckian and Baldwinian learning)


--------------------------------------------------------------------------------
/src/figures/ch05_figures.py:
--------------------------------------------------------------------------------
 1 | import sys; sys.path.append("../")
 2 | 
 3 | import matplotlib.pyplot as plt
 4 | import numpy as np
 5 | 
 6 | from ch05 import GradientDescent
 7 | 
 8 | from TestFunctions import rosenbrock
 9 | from convenience import plot_contour
10 | 
11 | 
12 | def figure_5_1():  # TODO - To duplicate the effect, I need to see the parameters
13 |     """
14 |     Figure 5.1: Gradient descent can result in zig-zagging in narrow canyons.
15 |     Here we see the effect on the Rosenbrock function.
16 |     """
17 |     x = np.array([-1.1, -1.1])  # Starting point
18 |     M = GradientDescent(alpha = 0.025)
19 | 
20 |     fig = plt.figure()
21 |     plot_contour(fig, rosenbrock, xlim=(-2.1, 2.1), ylim=(-2.1, 2.1), xstride=0.01, ystride=0.01, levels=[0, 1, 2, 3, 5, 9, 25, 50, 100])
22 |     for i in range(10):
23 |         x_next = M.step(rosenbrock, rosenbrock.grad, x)
24 |         plt.plot([x[0], x_next[0]], [x[1], x_next[1]], c="black")
25 |         x = x_next
26 |     plt.title("Figure 5.1")
27 |     plt.show()
28 | 
29 | # TODO - Figure 5.2
30 | # TODO - Figure 5.3
31 | # TODO - Figure 5.5
32 | # TODO - Figure 5.6
33 | # TODO - Figure 5.7


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 griffinbholt
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/src/ch21.py:
--------------------------------------------------------------------------------
 1 | """Chapter 21: Multidisciplinary Optimization"""
 2 | 
 3 | import numpy as np
 4 | 
 5 | from copy import deepcopy
 6 | from typing import Callable
 7 | 
 8 | 
 9 | def gauss_seidel(Fs: list[Callable[[dict[str, float | np.ndarray]], dict[str, float | np.ndarray]]],
10 |                  A: dict[str, float | np.ndarray],
11 |                  k_max: int = 100,
12 |                  eps: float = 1e-4) -> tuple[dict[str, float | np.ndarray], bool]:
13 |     """
14 |     The Gauss-Seidel algorithm for conducting a multidiciplinary analysis.
15 |     Here, `Fs` is a list of disciplinary analysis functions that take and modify
16 |     an assignment `A`. There are two optional arguments: the maximum number of
17 |     iterations `k_max` and the relative error tolerance `eps`. The method
18 |     returns the modified assignment and whether it converged.
19 |     """
20 |     k, converged = 0, False
21 |     while (not converged) and (k <= k_max):
22 |         k += 1
23 |         A_old = deepcopy(A)
24 |         for F in Fs:
25 |             F(A)
26 |         converged = np.all([np.isclose(A[v], A_old[v], rtol=eps) for v in A])
27 |     return (A, converged)
28 | 


--------------------------------------------------------------------------------
/src/examples/ch03_examples.py:
--------------------------------------------------------------------------------
 1 | import sys; sys.path.append('../');
 2 | 
 3 | import numpy as np
 4 | 
 5 | from ch03 import PHI
 6 | 
 7 | 
 8 | def example_3_1():
 9 |     """
10 |     Example 3.1: Using Fibonacci search with five function evaluations
11 |     to optimize a univariate function
12 |     """
13 |     def f(x): return np.exp(x - 2) - x
14 |     a, b = -2, 6
15 |     n = 5
16 |     eps = 1e-2
17 | 
18 |     # Fibonacci search (taken directly from ch03.py)
19 |     print("Original Interval: ", (a, b), "\n")
20 |     s = (1 - np.sqrt(5)) / (1 + np.sqrt(5))
21 |     p = 1 / ((PHI*(1 - (s**(n + 1)))) / (1 - (s**n)))
22 |     d = p*b + (1 - p)*a
23 |     y_d = f(d)
24 |     print("f(" + str(round(d, 2)) + ") = ", y_d)
25 |     for i in range(1, n):
26 |         if i == n - 1:
27 |             c = eps*a + (1 - eps)*d
28 |         else:
29 |             c = p*a + (1 - p)*b
30 |         y_c = f(c)
31 |         print("f(" + str(round(c, 2)) + ") = ", y_c)
32 |         if y_c < y_d:
33 |             b, d, y_d = d, c, y_c
34 |             print("Interval Update: ", (round(a, 2), round(b, 2)) if a < b else (round(b, 2), round(a, 2)), "\n")
35 |         else:
36 |             a, b = b, c
37 |             print("Interval Update: ", (round(a, 2), round(b, 2)) if a < b else (round(b, 2), round(a, 2)), "\n")
38 |         p = 1 / ((PHI*(1 - (s**(n - i + 1)))) / (1 - (s**(n - i))))
39 |     print("Final Interval: ", (round(a, 2), round(b, 2)) if a < b else (round(b, 2), round(a, 2)))
40 | 


--------------------------------------------------------------------------------
/.github/workflows/python-package.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
 3 | 
 4 | name: Python package
 5 | 
 6 | on:
 7 |   push:
 8 |     branches: [ "main" ]
 9 |   pull_request:
10 |     branches: [ "main" ]
11 | 
12 | jobs:
13 |   build:
14 | 
15 |     runs-on: ubuntu-latest
16 |     strategy:
17 |       fail-fast: false
18 |       matrix:
19 |         python-version: ["3.10", "3.11"]
20 | 
21 |     steps:
22 |     - uses: actions/checkout@v4
23 |     - name: Set up Python ${{ matrix.python-version }}
24 |       uses: actions/setup-python@v4
25 |       with:
26 |         python-version: ${{ matrix.python-version }}
27 |     - name: Install dependencies
28 |       run: |
29 |         python -m pip install --upgrade pip
30 |         python -m pip install flake8 pytest
31 |         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
32 |     - name: Lint with flake8
33 |       run: |
34 |         # stop the build if there are Python syntax errors or undefined names
35 |         flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
36 |         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
37 |         flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
38 |     - name: Test with pytest
39 |       run: |
40 |         pytest
41 | 


--------------------------------------------------------------------------------
/src/figures/ch02_figures.py:
--------------------------------------------------------------------------------
 1 | import sys; sys.path.append('./src/'); sys.path.append('../')
 2 | 
 3 | import matplotlib.pyplot as plt
 4 | import numpy as np
 5 | 
 6 | from ch02 import diff_forward, diff_central, diff_complex
 7 | 
 8 | 
 9 | def figure_2_4():
10 |     """
11 |     Figure 2.4: A comparison of the error in derivative estimate for the
12 |     function sin(x) at x = 1/2 as the step size is varied. The linear error
13 |     of the forward difference method and the quadratic error of the central
14 |     difference and complex methods can be seen by the constant slops on the
15 |     right hand side. The complex step method avoids the subtractive cancellation
16 |     error that occurs when differencing two function evaluations that are close
17 |     together.
18 |     """
19 |     def abs_rel_error(v, v_approx): return np.abs((v - v_approx) / v)
20 |     x = 0.5
21 |     dfdx_true = np.cos(x)
22 |    
23 |     # Compute absolute relative errors for finite difference gradient approximations
24 |     h = np.logspace(-18, 1, 100)
25 |     error_complex = abs_rel_error(dfdx_true, diff_complex(np.sin, x, h))
26 |     error_forward = abs_rel_error(dfdx_true, diff_forward(np.sin, x, h))
27 |     error_central = abs_rel_error(dfdx_true, diff_central(np.sin, x, h))
28 | 
29 |     # Plot results
30 |     plt.plot(h, error_complex, c="tab:green", label="complex")
31 |     plt.plot(h, error_forward, c="tab:blue", label="forward")
32 |     plt.plot(h, error_central, c="tab:red", label="central")
33 |     plt.xlabel("step size h")
34 |     plt.ylabel("absolute relative error")
35 |     plt.xscale("log")
36 |     plt.yscale("log")
37 |     plt.legend()
38 |     plt.title("Figure 2.4")
39 |     plt.show()
40 | 


--------------------------------------------------------------------------------
/src/examples/ch04_examples.py:
--------------------------------------------------------------------------------
 1 | import sys; sys.path.append('../')
 2 | 
 3 | import matplotlib.pyplot as plt
 4 | import numpy as np
 5 | 
 6 | from ch04 import line_search, backtracking_line_search
 7 | 
 8 | 
 9 | def example_4_1():
10 |     """Example 4.1: Line search used to minimize a function along a descent direction"""
11 |     def f(x): return np.sin(x[0]*x[1]) + np.exp(x[1] + x[2]) - x[2]
12 |     x = np.array([1.0, 2.0, 3.0])
13 |     d = np.array([0.0, -1.0, -1.0])
14 |     x_next = line_search(f, x, d)
15 |     alpha_opt = (x_next[1] - x[1])/d[1]
16 | 
17 |     # Print results
18 |     print("α* = ", alpha_opt)
19 |     print("x' = ", x_next)
20 | 
21 |     # Plot line search objective
22 |     alpha = np.arange(0.0, 5.0, 0.01)
23 |     def ls_obj(alpha): return np.sin(2 - alpha) + np.exp(5 - 2*alpha) + alpha - 3
24 |     plt.plot(alpha, ls_obj(alpha))
25 |     plt.scatter([alpha_opt], [ls_obj(alpha_opt)], label='α*')
26 |     plt.xlabel("")
27 |     plt.ylabel("line search objective")
28 |     plt.legend()
29 |     plt.show()
30 | 
31 | 
32 | def example_4_2():
33 |     """Example 4.2: An example of backtracking line search, an approximate line search method"""
34 |     def f(x): return x[0]**2 + x[0]*x[1] + x[1]**2
35 |     def grad_f(x): return np.array([[2, 1], [1, 2]]) @ x
36 |     x = np.array([1.0, 2.0])
37 |     d = np.array([-1.0, -1.0])
38 |     sigma = 0.9
39 | 
40 |     alpha_opt = backtracking_line_search(f, grad_f, x, d, alpha=10.0, p=0.5, beta=1e-4)
41 |     candidate_x = x + alpha_opt*d
42 |     cand_x_deriv_d = np.dot(grad_f(candidate_x), d)
43 |     adj_x_deriv_d = sigma * np.dot(grad_f(x), d)
44 | 
45 |     print("α* = ", alpha_opt)
46 |     print("x' = ", candidate_x)
47 |     print("2nd Wolfe Condition: ", cand_x_deriv_d >= adj_x_deriv_d, " ({:.1f} >= {:.1f})".format(cand_x_deriv_d, adj_x_deriv_d))
48 | 


--------------------------------------------------------------------------------
/src/examples/ch01_examples.py:
--------------------------------------------------------------------------------
 1 | import sys; sys.path.append('../')
 2 | 
 3 | import matplotlib.pyplot as plt
 4 | import numpy as np
 5 | 
 6 | from TestFunctions import rosenbrock
 7 | from convenience import plot_surface, plot_contour
 8 | 
 9 | def example_1_1(display_contour_plot=False):
10 |     """Example 1.1: Checking the first- and second-order necessary conditions
11 |     of a point on the Rosenbrock function. The minimizer is indicated by the
12 |     dot in the figure (when `display_contour_plot=True`)."""
13 |     x = np.array([1.0, 1.0])
14 | 
15 |     print("Gradient at [1, 1]:")
16 |     print(rosenbrock.grad(x))
17 |     print()
18 |     print("Hessian at [1, 1]:")
19 |     print(rosenbrock.hess(x))
20 | 
21 |     if display_contour_plot:
22 |         fig = plt.figure()
23 |         plot_contour(fig,
24 |                      rosenbrock, 
25 |                      xlim=(-2.1, 2.1), 
26 |                      ylim=(-2.1, 2.1), 
27 |                      xstride=0.01, 
28 |                      ystride=0.01, 
29 |                      levels=[1, 2, 3, 5, 9, 25, 50, 100])
30 |         plt.scatter([1], [1], c='black')
31 |         plt.show()
32 | 
33 | def example_1_2():
34 |     """Example 1.2: An example three-dimensional visualization and the associated contour plot"""
35 |     def f(x): return x[0]**2 - x[1]**2
36 | 
37 |     fig = plt.figure(figsize=(10, 5))
38 |     plot_surface(fig,
39 |                  f,
40 |                  xlim=(-2.1, 2.1),
41 |                  ylim=(-2.1, 2.1),
42 |                  zlim=(-5.1, 5.1),
43 |                  xstride=0.05,
44 |                  ystride=0.05,
45 |                  subplot_coords=(1,2,1))
46 |     plot_contour(fig,
47 |                  f,
48 |                  xlim=(-2.1, 2.1),
49 |                  ylim=(-2.1, 2.1),
50 |                  xstride=0.05,
51 |                  ystride=0.05,
52 |                  levels=[-4, -2, 0, 2, 4],
53 |                  clabel=True,
54 |                  subplot_coords=(1,2,2))
55 |     plt.subplots_adjust(wspace=0.5)
56 |     plt.show()
57 | 


--------------------------------------------------------------------------------
/src/ch02.py:
--------------------------------------------------------------------------------
 1 | """Chapter 2: Derivatives and Gradients"""
 2 | 
 3 | import numpy as np
 4 | 
 5 | from typing import Callable
 6 | 
 7 | 
 8 | def diff_forward(f: Callable[[float | np.ndarray], float | np.ndarray], 
 9 |                  x: float | np.ndarray, 
10 |                  h: float | np.ndarray = np.sqrt(np.finfo(np.float64).eps)) -> float | np.ndarray:
11 |     """Forward difference method for estimating the derivative of a
12 |     function `f` at `x` with finite difference `h`. The default step size is
13 |     the square root of the machine precision for floating point values. This
14 |     step size balances machine round-off error with step size error.
15 | 
16 |     `np.finfo(np.float64).eps` provides the step size between 1.0 and the next
17 |     larger representable floating-point value.
18 |     """
19 |     return (f(x + h) - f(x)) / h
20 | 
21 | 
22 | def diff_central(f: Callable[[float | np.ndarray], float | np.ndarray],
23 |                  x: float | np.ndarray,
24 |                  h: float | np.ndarray = np.cbrt(np.finfo(np.float64).eps)) -> float | np.ndarray:
25 |     """Central difference method for estimating the derivative of a
26 |     function `f` at `x` with finite difference `h`. The default step size is
27 |     the cube root of the machine precision for floating point values.
28 |     """
29 |     return (f(x + (h/2)) - f(x - (h/2))) / h
30 | 
31 | 
32 | def diff_backward(f: Callable[[float | np.ndarray], float | np.ndarray],
33 |                   x: float | np.ndarray,
34 |                   h: float | np.ndarray = np.sqrt(np.finfo(np.float64).eps)) -> float | np.ndarray:
35 |     """Backward difference method for estimating the derivative of a
36 |     function `f` at `x` with finite difference `h`. The default step size is
37 |     the square root of the machine precision for floating point values.
38 |     """
39 |     return (f(x) - f(x - h)) / h
40 | 
41 | 
42 | def diff_complex(f: Callable[[float | np.ndarray], float | np.ndarray],
43 |                  x: float | np.ndarray,
44 |                  h: float | np.ndarray = 1e-20) -> float | np.ndarray:
45 |     """The complex step method for estimating the derivative of a function `f`
46 |     at `x` with finite difference `h`."""
47 |     return np.imag(f(x + h*1j)) / h
48 | 


--------------------------------------------------------------------------------
/src/tests/ch07/test_direct.py:
--------------------------------------------------------------------------------
 1 | import sys; sys.path.append('./src/'); sys.path.append('../../')
 2 | 
 3 | import numpy as np
 4 | 
 5 | from ch07 import direct
 6 | from TestFunctions import ackley, booth, branin, flower, michalewicz, rosenbrock, wheeler
 7 | 
 8 | class TestDIRECT():
 9 |     def test(self, eps=1e-5):
10 |         # Ackley's Function
11 |         f_min, x_min = ackley.global_min()
12 |         x = direct(ackley, a=np.array([-30.0, -30.0]), b=np.array([30.0, 30.0]), eps=eps, k_max=50)
13 |         assert np.abs(ackley(x) - f_min) < eps
14 |         assert np.all(np.abs(x - x_min) < eps)
15 | 
16 |         # Booth's Function
17 |         f_min, x_min = booth.global_min()
18 |         x = direct(booth, a=np.array([-10.0, -10.0]), b=np.array([10.0, 10.0]), eps=eps, k_max=40)
19 |         assert np.abs(booth(x) - f_min) < eps
20 |         assert np.all(np.abs(x - x_min) < eps)
21 | 
22 |         # Branin's Function
23 |         f_min, x_min = branin.global_min()
24 |         x = direct(branin, a=np.array([-5.0, -5.0]), b=np.array([20.0, 20.0]), eps=eps, k_max=50)
25 |         assert np.abs(branin(x) - f_min[0]) < eps
26 |         assert np.any([np.all(np.abs(x - x_min_i) < eps) for x_min_i in x_min.T])
27 | 
28 |         # Michalewicz Function
29 |         f_min, x_min = michalewicz.global_min()
30 |         x = direct(michalewicz, a=np.array([0.0, 0.0]), b=np.array([4.0, 4.0]), eps=eps, k_max=50)
31 |         assert np.abs(michalewicz(x) - f_min) < eps
32 |         assert np.all(np.abs(x - x_min) < eps)
33 | 
34 |         # Flower Function
35 |         x = direct(flower, a=np.array([-3.0, -3.0]), b=np.array([3.0, 3.0]), eps=eps, k_max=50)
36 |         assert np.all(np.abs(x - np.zeros(2)) < eps)
37 | 
38 |         # Rosenbrock's Banana Function
39 |         f_min, x_min = rosenbrock.global_min()
40 |         x = direct(rosenbrock, a=np.array([-2.0, -2.0]), b=np.array([2.0, 2.0]), eps=eps, k_max=50)
41 |         assert np.abs(rosenbrock(x) - f_min) < eps
42 |         assert np.all(np.abs(x - x_min) < eps)
43 | 
44 |         # Wheeler's Ridge
45 |         f_min, x_min = wheeler.global_min()
46 |         x = direct(wheeler, a=np.array([-5.0, -2.0]), b=np.array([25.0, 6.0]), eps=eps, k_max=50)
47 |         assert np.abs(wheeler(x) - f_min) < eps
48 |         assert np.all(np.abs(x - x_min) < eps)
49 | 


--------------------------------------------------------------------------------
/src/tests/ch04/test_local_descent_methods.py:
--------------------------------------------------------------------------------
 1 | import sys; sys.path.append('./src/'); sys.path.append('../../')
 2 | 
 3 | import numpy as np
 4 | import warnings
 5 | 
 6 | from ch04 import line_search, backtracking_line_search, strong_backtracking, trust_region_descent
 7 | from TestFunctions import rosenbrock
 8 | 
 9 | 
10 | class TestLocalDescentMethods():
11 |     def test_line_search(self):
12 |         def f(x): return np.sin(x[0]*x[1]) + np.exp(x[1] + x[2]) - x[2]
13 |         x = np.array([1.0, 2.0, 3.0])
14 |         d = np.array([0.0, -1.0, -1.0])
15 |         x_prime = line_search(f, x, d)
16 |         exp_x_prime = np.array([1.0, -1.127, -0.127])
17 |         assert np.all(np.abs(x_prime - exp_x_prime) < 1e-3)
18 | 
19 |     def test_backtracking_line_search(self):
20 |         def f(x): return x[0]**2 + x[0]*x[1] + x[1]**2
21 |         def grad_f(x): return np.array([2*x[0] + x[1], 2*x[1] + x[0]])
22 |         x = np.array([1.0, 2.0])
23 |         d = np.array([-1.0, -1.0])
24 |         alpha = backtracking_line_search(f, grad_f, x, d, alpha=10)
25 |         x_prime = x + alpha * d
26 |         exp_x_prime = np.array([-1.5, -0.5])
27 |         assert np.all(np.abs(x_prime - exp_x_prime) < 1e-10)
28 | 
29 |     def test_strong_backtracking(self):
30 |         def f(x): return x[0]**2 + x[0]*x[1] + x[1]**2
31 |         def grad_f(x): return np.array([2*x[0] + x[1], 2*x[1] + x[0]])
32 |         x = np.array([1.0, 2.0])
33 |         d = np.array([-1.0, -1.0])
34 |         alpha = strong_backtracking(f, grad_f, x, d)
35 |         x_prime = x + alpha * d
36 |         assert f(x_prime) < f(x)
37 |         assert f(x_prime) < 3.25
38 | 
39 |     def test_trust_region_descent(self, eps: float = 1e-8):
40 |         warnings.simplefilter(action='ignore', category=FutureWarning)
41 |        
42 |         # Rosenbrock
43 |         x = np.array([-5.0, -3.0])
44 |         x_prime = trust_region_descent(f=rosenbrock,
45 |                                        grad_f=rosenbrock.grad,
46 |                                        H=rosenbrock.hess,
47 |                                        x=x,
48 |                                        k_max=15)
49 |         f_min, x_min = rosenbrock.global_min()
50 |         assert np.abs(rosenbrock(x_prime) - f_min) < eps
51 |         assert np.all(np.abs(x_prime - x_min) < eps)
52 | 


--------------------------------------------------------------------------------
/src/examples/ch07_examples.py:
--------------------------------------------------------------------------------
 1 | import sys; sys.path.append('../')
 2 | 
 3 | import matplotlib.pyplot as plt
 4 | import numpy as np
 5 | 
 6 | from ch07 import Interval, Intervals, reparametrize_to_unit_hypercube, rev_unit_hypercube_parametrization
 7 | 
 8 | # TODO - Example 7.1
 9 | # TODO - Example 7.2
10 | 
11 | def example_7_1(eps: float = 1e-5, k_max: int = 5): # TODO - Hitting some snags
12 |     """Example 7.1: The DIRECT method applied to a univariate function."""
13 |     def f(x): return np.sin(x) + np.sin(2*x) + np.sin(4*x) + np.sin(8*x)
14 |     a = np.array([-2.0])
15 |     b = np.array([2.0])
16 | 
17 |     g = reparametrize_to_unit_hypercube(f, a, b)
18 |     intervals = Intervals()
19 |     n = len(a)
20 |     c = np.full(n, 0.0)
21 |     interval = Interval(c, g(c), np.zeros(n))
22 |     intervals.add_interval(interval)
23 |     c_best, y_best = np.copy(interval.c), interval.y
24 | 
25 |     fig, ax = plt.subplots(k_max + 1, 2, sharey=True, figsize=(7, 9))
26 |     t = np.linspace(-2.0, 2.0, 1000)
27 |     f_t = f(t)
28 |     ax[0, 0].plot(t, f_t, c="black")
29 |     ax[0, 0].hlines([f(c)], xmin=-2.0, xmax=2.0, color="tab:blue")
30 |     ax[0, 0].scatter([interval.c], [f(c)], color="tab:blue")
31 |     ax[0, 0].set_xlim(-2.0, 2.0)
32 |     ax[0, 1].scatter([2.0], [f(c)], color="tab:blue")
33 |     ax[0, 1].set_xlim(0.0, 2.0)
34 | 
35 |     for i in range(1, k_max + 1):
36 |         ax[i, 0].plot(t, f_t, color="black")
37 |         ax[i, 0].set_xlim(-2.0, 2.0)
38 |         ax[i, 1].set_xlim(0.0, 2.0)
39 |         S = intervals.get_opt_intervals(eps, y_best)
40 |         to_add = []
41 |         for interval in S:
42 |             new_intervals = interval.divide(g)
43 |             to_add.extend(new_intervals)
44 |             intervals[interval.vertex_dist()].get()
45 |         for interval in to_add:
46 |             c = rev_unit_hypercube_parametrization(interval.c, a, b)
47 |             u = rev_unit_hypercube_parametrization(interval.c + (3.0**(-i)), a, b)
48 |             l = rev_unit_hypercube_parametrization(interval.c - (3.0**(-i)), a, b)
49 |             ax[i, 0].hlines([f(c)], xmin=l, xmax=u, color="gray")
50 |             ax[i, 0].scatter([c], [f(c)], color="black")
51 |             intervals.add_interval(interval)
52 |             if interval.y < y_best:
53 |                 c_best, y_best = np.copy(interval.c), interval.y
54 |     
55 |     x = rev_unit_hypercube_parametrization(c_best, a, b)
56 | 
57 |     plt.ylim(-2.5, 2.5)
58 |     plt.show()
59 | 
60 | example_7_1()


--------------------------------------------------------------------------------
/src/examples/ch02_examples.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import sympy as sp
 3 | 
 4 | from typing import Union
 5 | 
 6 | 
 7 | def example_2_1():
 8 |     """Example 2.1: Symbolic differentiation provides analytical derivatives."""
 9 |     x = sp.Symbol('x')
10 |     f = x**2 + x/2 - sp.sin(x)/x
11 |     print(sp.diff(f, x))
12 | 
13 | 
14 | def example_2_4():
15 |     """Example 2.4: The complex step method for estimating derivatives"""
16 |     def f(x): return np.sin(x**2)
17 |     v = f(np.pi/2 + 0.001j)
18 |     print("f(x) = real(v) = ", np.real(v))
19 |     print("f'(x) = imag(v)/0.001 = ", np.imag(v)/0.001)
20 | 
21 | 
22 | def example_2_5():
23 |     """Example 2.5: An implementation of dual numbers allows for automatic forward accumulation"""
24 |     class Dual():
25 |         def __init__(self, v: float, d: float):
26 |             self.v = v
27 |             self.d = d
28 | 
29 |         def __repr__(self) -> str:
30 |             return 'Dual(' + str(self.v) + ',' + str(self.d) + ')'
31 |         
32 |         def __add__(self, other: 'Dual') -> 'Dual':
33 |             return Dual(self.v + other.v, self.d + other.d)
34 | 
35 |         def __mul__(self, other: 'Dual') -> 'Dual':
36 |             return Dual(self.v * other.v, self.v * other.d + other.v * self.d)
37 |     
38 |         @staticmethod
39 |         def log(a: 'Dual') -> 'Dual':
40 |             return Dual(np.log(a.v), a.d / a.v)
41 | 
42 |         @staticmethod
43 |         def max(a: 'Dual', b: Union['Dual', int]) -> 'Dual':
44 |             if isinstance(b, Dual):
45 |                 v = np.maximum(a.v, b.v)
46 |                 d = a.d if a.v > b.v else (b.d if a.v < b.v else np.nan)
47 |             else:  # isinstance(b, int)
48 |                 v = np.maximum(a.v, b)
49 |                 d = a.d if a.v > b else (0 if a.v < b else np.nan)
50 |             return Dual(v, d)
51 | 
52 |     a = Dual(3, 1)
53 |     b = Dual(2, 0)
54 |     print(Dual.log(a*b + Dual.max(a, 2)))
55 | 
56 | 
57 | def example_2_6():
58 |     """
59 |     Example 2.6: Automatic differentiation using the Tensorflow package.
60 |     We find that the gradient at [3, 2] is [1/3, 1/3]
61 |     """
62 |     import os; os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1' # Disables Tensorflow CPU warning
63 |     import tensorflow as tf
64 | 
65 |     @tf.function
66 |     def f(a, b): return tf.math.log(a*b + tf.math.maximum(a, 2))
67 | 
68 |     x = tf.Variable(3.0)
69 |     y = tf.Variable(2.0)
70 |     with tf.GradientTape() as tape:
71 |         z = f(x, y)
72 |     print([deriv.numpy() for deriv in tape.gradient(z, [x, y])])
73 | 


--------------------------------------------------------------------------------
/src/tests/ch07/test_direct_methods.py:
--------------------------------------------------------------------------------
 1 | import sys; sys.path.append('./src/'); sys.path.append('../../')
 2 | 
 3 | import numpy as np
 4 | 
 5 | from ch07 import cyclic_coordinate_descent, powell, hooke_jeeves, generalized_pattern_search, nelder_mead
 6 | from TestFunctions import booth, wheeler
 7 | 
 8 | class TestDirectMethods():
 9 |     eps = 1e-5
10 | 
11 |     def test_cyclic_coord_descent(self):
12 |         f_min, x_min = booth.global_min()
13 |         x = np.array([10.0, -5.0])
14 |         x = cyclic_coordinate_descent(booth, x, self.eps)
15 |         assert np.abs(booth(x) - f_min) < self.eps
16 |         assert np.all(np.abs(x - x_min) < self.eps)
17 | 
18 |     def test_cyclic_coord_descent_with_accel(self):
19 |         f_min, x_min = booth.global_min()
20 |         x = np.array([10.0, -5.0])
21 |         x = cyclic_coordinate_descent(booth, x, self.eps, with_acceleration=True)
22 |         assert np.abs(booth(x) - f_min) < self.eps
23 |         assert np.all(np.abs(x - x_min) < self.eps)
24 | 
25 |     def test_powell(self):
26 |         f_min, x_min = booth.global_min()
27 |         x = np.array([10.0, -5.0])
28 |         x = powell(booth, x, self.eps)
29 |         assert np.abs(booth(x) - f_min) < self.eps
30 |         assert np.all(np.abs(x - x_min) < self.eps)
31 | 
32 |     def test_hooke_jeeves(self):
33 |         f_min, x_min = wheeler.global_min()
34 |         x = np.array([0.7, 0.9])
35 |         x = hooke_jeeves(wheeler, x, alpha=0.5, eps=self.eps, gamma=0.5)
36 |         assert np.abs(wheeler(x) - f_min) < self.eps
37 |         assert np.all(np.abs(x - x_min) < self.eps)
38 | 
39 |     def test_generalized_pattern_search(self):
40 |         possible_Ds = [
41 |             np.array([[1.0, 0], [0, 1], [-1, 0], [0, -1]]),  # Equivalent to Hooke-Jeeves
42 |             np.array([[1, 1], [1, -1], [-1, -1], [-1, 1]]),  # Diagonal Directions
43 |             np.array([[1, 0], [0, 1], [-1, -1]])             # 3 Directions: Up, Right, Down-Left
44 |         ]
45 |         for D in possible_Ds:
46 |             f_min, x_min = wheeler.global_min()
47 |             x = np.array([0.7, 0.9])
48 |             x = generalized_pattern_search(wheeler, x, alpha=0.5, D=D, eps=self.eps, gamma=0.5)
49 |             assert np.abs(wheeler(x) - f_min) < 10*self.eps
50 |             assert np.all(np.abs(x - x_min) < 10*self.eps)
51 |     
52 |     def test_nelder_mead(self):
53 |         f_min, x_min = wheeler.global_min()
54 |         S = np.array([[0.7, 1.4], [0.7, 0.9], [0.4, 0.7]])
55 |         x = nelder_mead(wheeler, S, eps=self.eps)
56 |         assert np.abs(wheeler(x) - f_min) < 1e-4
57 |         assert np.all(np.abs(x - x_min) < 1e-2)
58 | 


--------------------------------------------------------------------------------
/src/tests/test_testfunctions.py:
--------------------------------------------------------------------------------
 1 | import sys; sys.path.append('./src/'); sys.path.append('../'); sys.path.append('../../')
 2 | 
 3 | import numdifftools as nd
 4 | import numpy as np
 5 | 
 6 | from TestFunctions import ScalarValuedTestFunction, MichalewiczFunction, VectorValuedTestFunction,\
 7 |                           ackley, booth, branin, flower, michalewicz, rosenbrock,\
 8 |                           wheeler, circle
 9 | 
10 | 
11 | class TestTestFunctions():
12 |     scalar_functions = [ackley, booth, branin, flower,
13 |                         michalewicz, rosenbrock, wheeler]
14 |     vector_functions = [circle]
15 | 
16 |     def test_gradients(self):
17 |         np.random.seed(42)
18 |         for test_function in self.scalar_functions:
19 |             self.run_gradient_test(test_function)
20 | 
21 |     def run_gradient_test(self, test_function: ScalarValuedTestFunction, eps: float = 1e-9, n_trials: int = 100):
22 |         for _ in range(n_trials):
23 |             x = np.random.rand(test_function.d if test_function.d is not None else 10)
24 |             num_grad = nd.Gradient(test_function)(x)
25 |             test_grad = test_function.grad(x)
26 |             assert np.all(np.abs(num_grad - test_grad) < eps), test_function.__class__.__name__ + " Gradient failed"
27 | 
28 |     def test_hessians(self):
29 |         np.random.seed(42)
30 |         for test_function in self.scalar_functions:
31 |             if isinstance(test_function, MichalewiczFunction):
32 |                 self.run_hessian_test(test_function, eps=0.5)
33 |             else:
34 |                 self.run_hessian_test(test_function)
35 | 
36 |     def run_hessian_test(self, test_function: ScalarValuedTestFunction, eps: float = 1e-3, n_trials: int = 100):
37 |         for _ in range(n_trials):
38 |             x = np.random.rand(2)
39 |             num_hess = nd.Hessian(test_function)(x)
40 |             test_hess = test_function.hess(x)
41 |             assert np.all(np.abs(num_hess - test_hess) < eps), test_function.__class__.__name__ + " Hessian failed"
42 | 
43 |     def test_jacobians(self):
44 |         np.random.seed(42)
45 |         for test_function in self.vector_functions:
46 |             self.run_jacobian_test(test_function)
47 | 
48 |     def run_jacobian_test(self, test_function: VectorValuedTestFunction, eps: float = 1e-9, n_trials: int = 100):
49 |         for _ in range(n_trials):
50 |             x = np.random.rand(2)
51 |             num_jac = nd.Jacobian(test_function)(x)
52 |             test_jac = test_function.jac(x)
53 |             assert np.all(np.abs(num_jac - test_jac) < eps), test_function.__class__.__name__ + " Jacobian failed"
54 | 


--------------------------------------------------------------------------------
/src/examples/ch21_examples.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import numpy as np
 3 | 
 4 | from copy import deepcopy
 5 | 
 6 | 
 7 | def example_21_1(f1, f2):
 8 |     """
 9 |     Example 21.1: Basic code syntax for the assignment-based representation of
10 |     multidisciplinary design optimization problems.
11 |     """
12 |     def F1(A):
13 |         A["y1"] = f1(A["x"], A["y2"])
14 |         return A
15 |     
16 |     def F2(A):
17 |         A["y2"] = f2(A["x"], A["y1"])
18 |         return A
19 | 
20 |     A = {"x": 1, "y1": 2, "y2": 3}
21 | 
22 | 
23 | def example_21_2():
24 |     """
25 |     Example 21.2: An example that illustrates the importance of choosing an
26 |     appropriate ordering when running a multidisciplinary analysis.
27 |     """
28 |     def F1(A):
29 |         A["y1"] = A["y2"] - A["x"]
30 |         return A
31 |     
32 |     def F2(A):
33 |         A["y2"] = np.sin(A["y1"] + A["y3"])
34 |         return A
35 |     
36 |     def F3(A):
37 |         A["y3"] = np.cos(A["x"] + A["y2"] + A["y1"])
38 | 
39 |     def gauss_seidel(Fs, A, k_max, eps=1e-4):
40 |         """Gauss-Seidel Algorithm (from Chapter 21), altered for plotting convergence"""
41 |         k, converged = 0, False
42 |         history = {var: [val] for (var, val) in A.items() if var != "x"}
43 |         while (not converged) and (k < k_max):
44 |             k += 1
45 |             A_old = deepcopy(A)
46 |             for F in Fs:
47 |                 F(A)
48 |             converged = np.all([np.isclose(A[v], A_old[v], rtol=eps) for v in A])
49 |             for (var, val) in A.items():
50 |                 if var != "x":
51 |                     history[var].append(val)
52 |         return (A, history, converged)
53 |     
54 |     # Run two orderings for 20 iterations each and plot
55 |     k_max = 20
56 |     k = np.arange(0, k_max + 1)
57 |     orderings = [[F1, F2, F3], [F1, F3, F2]]
58 |     _, axs = plt.subplots(nrows=2, figsize=(10, 6), sharex=True)
59 |     for i, Fs in enumerate(orderings):
60 |         A = {"x": 1.0, "y1": 1.0, "y2": 1.0, "y3": 1.0}
61 |         A, history, _ = gauss_seidel(Fs, A, k_max)
62 |         print(A)
63 |         axs[i].plot(k, history["y1"], label="y1", c="tab:purple")
64 |         axs[i].plot(k, history["y2"], label="y2", c="tab:blue")
65 |         axs[i].plot(k, history["y3"], label="y3", c="tab:green")
66 |         axs[i].scatter(k, history["y1"], c="tab:purple")
67 |         axs[i].scatter(k, history["y2"], c="tab:blue")
68 |         axs[i].scatter(k, history["y3"], c="tab:green")
69 |         axs[i].set_yticks([-2, -1, 0, 1])
70 |     axs[0].legend(bbox_to_anchor=(1.01, 1), loc='upper left')
71 |     axs[1].set_xticks([0, 5, 10, 15, 20])
72 |     axs[1].set_xlabel("iteration")
73 |     plt.tight_layout()
74 |     plt.show()
75 | 


--------------------------------------------------------------------------------
/src/exercises/ch05_exercises.py:
--------------------------------------------------------------------------------
 1 | import sys; sys.path.append("../")
 2 | 
 3 | import numpy as np
 4 | 
 5 | from ch04 import line_search
 6 | from ch05 import GradientDescent
 7 | 
 8 | 
 9 | def exercise_5_2(x0: float):
10 |     """Exercise 5.2: Gradient Descent for f(x) = x^4"""
11 |     def f(x): return x**4
12 |     def deriv(x): return 4*(x**3)
13 | 
14 |     M = GradientDescent(alpha=1.0)
15 |     M.initialize(f, deriv, x0)
16 |     print("Initial point: x0 = ", x0)
17 |     print("Derivative at x0: ", deriv(x0))
18 |     x = M.step(f, deriv, x0)
19 |     print("After 1 iteration of Gradient Descent, x = ", x)
20 |     print("Derivative at x: ", deriv(x))
21 |     x = M.step(f, deriv, x)
22 |     print("After 2 iterations of Gradient Descent, x = ", x)
23 |     print("Derivative at x: ", deriv(x))
24 | 
25 | 
26 | def exercise_5_3():
27 |     """Exercise 5.3: Gradient Descent: Unit Step vs. Exact Line Search"""
28 |     def f(x): return np.exp(x) + np.exp(-x)
29 |     def deriv(x): return np.exp(x) - np.exp(-x)
30 |     x0 = 10.0
31 | 
32 |     # Unit Step
33 |     M = GradientDescent(alpha=1.0)
34 |     M.initialize(f, deriv, x0)
35 |     x = M.step(f, deriv, x0)
36 |     print("With Unit Step:")
37 |     print("After 1 iteration of Gradient Descent, x = ", x)
38 |     with np.errstate(over='ignore'):
39 |         print("Derivative at x: ", deriv(x), "\n")
40 |     print("=> Gradient Descent diverges.")
41 | 
42 |     # Exact Line Search
43 |     with np.errstate(over='ignore'):
44 |         x = line_search(f, x0, deriv(x0))
45 |     print("With Exact Line Search:")
46 |     print("After 1 iteration of Gradient Descent, x = ", x)
47 |     print("Derivative at x: ", deriv(x))
48 |     print("=> Gradient Descent converges to the minimum.")
49 | 
50 | 
51 | def exercise_5_7():
52 |     """Exercise 5.7: Conjugate Gradient Descent"""
53 |     def f(x): return x[0]**2 + x[0]*x[1] + x[1]**2 + 5
54 |     def grad_f(x): return np.array([2*x[0] + x[1], 2*x[1] + x[0]])
55 |     x0 = np.ones(2)
56 | 
57 |     # Conjugate Gradient Descent (taken directly from ch05.py)
58 |     g = grad_f(x0)
59 |     d = -g
60 | 
61 |     # First Step
62 |     g_prime = grad_f(x0)
63 |     beta = np.maximum(0, np.dot(g_prime, g_prime - g) / np.dot(g, g))
64 |     d = -g_prime + beta*d
65 |     x = line_search(f, x0, d)
66 |     g = g_prime.copy()
67 |     print("After 1 iteration of CG, the normalized descent direction is d = ", d/np.linalg.norm(d))
68 | 
69 |     # Second Step
70 |     g_prime = grad_f(x)
71 |     beta = np.maximum(0, np.dot(g_prime, g_prime - g) / np.dot(g, g))
72 |     d = -g_prime + beta*d
73 |     x = line_search(f, x, d)
74 |     print("After 2 iterations of CG, x = ", x)
75 |     print("Gradient at x: ", grad_f(x))
76 |     print("=> Conjugate Gradient Descent converges after 2 iterations.")
77 | 


--------------------------------------------------------------------------------
/src/ch10.py:
--------------------------------------------------------------------------------
 1 | """Chapter 10: Constraints"""
 2 | 
 3 | import numpy as np
 4 | 
 5 | from typing import Callable
 6 | 
 7 | def penalty_method(f: Callable[[np.ndarray], float],
 8 |                    minimize: Callable[[Callable, np.ndarray], np.ndarray],
 9 |                    p: Callable[[np.ndarray], float],
10 |                    x: np.ndarray,
11 |                    k_max: int,
12 |                    rho: float = 1.0,
13 |                    gamma: float = 2.0) -> np.ndarray:
14 |     """
15 |     The penalty method for objective function `f`, penalty function `p`, initial
16 |     point `x`, number of iterations `k_max`, initial penalty `rho` > 0, and
17 |     penalty multiplier `gamma` > 1. The method `minimize` should be replaced
18 |     with a suitable unconstrained minimization method.
19 |     """
20 |     for _ in range(k_max):
21 |         x = minimize(lambda x: f(x) + rho * p(x), x)
22 |         p *= gamma
23 |         if p(x) == 0:
24 |             return x
25 |     return x
26 | 
27 | 
28 | def augmented_lagrange_method(f: Callable[[np.ndarray], float],
29 |                               h: Callable[[np.ndarray], np.ndarray],
30 |                               minimize: Callable[[Callable, np.ndarray], np.ndarray],
31 |                               x: np.ndarray,
32 |                               k_max: int,
33 |                               rho: float = 1.0,
34 |                               gamma: float = 2.0) -> np.ndarray:
35 |     """
36 |     The augmented Lagrange method for objective function `f`, equality constraint
37 |     function `h`, initial point `x`, number of iterations `k_max`, initial penalty
38 |     `rho` > 0, and penalty multiplier `gamma` > 1. The function `minimize`
39 |     should be replaced with the minimization method of your choice.
40 |     """
41 |     lam = np.zeros(len(h(x)))
42 |     for _ in range(k_max):
43 |         def p(x): return ((rho/2) * np.sum(h(x)**2)) - np.dot(lam, h(x))
44 |         x = minimize(lambda x: f(x) + p(x), x)
45 |         lam -= rho * h(x)
46 |         rho *= gamma
47 |     return x
48 | 
49 | 
50 | def interior_point_method(f: Callable[[np.ndarray], float],
51 |                           p: Callable[[np.ndarray], float],
52 |                           minimize: Callable[[Callable, np.ndarray], np.ndarray],
53 |                           x: np.ndarray,
54 |                           rho: float = 1.0,
55 |                           gamma: float = 2.0,
56 |                           eps: float = 0.001) -> np.ndarray:
57 |     """
58 |     The interior point method for objective function `f`, barrier function `p`,
59 |     initial point `x`, initial penalty `rho` > 0, penalty multiplier `gamma` > 1,
60 |     and stopping tolerance `eps` > 0.
61 |     """
62 |     delta = np.inf
63 |     while delta > eps:
64 |         x_prime = minimize(lambda x: f(x) + (p(x) / rho), x)
65 |         delta = np.linalg.norm(x_prime - x)
66 |         x = x_prime
67 |         rho *= gamma
68 |     return x
69 | 


--------------------------------------------------------------------------------
/src/figures/ch04_figures.py:
--------------------------------------------------------------------------------
 1 | import sys; sys.path.append("../")
 2 | 
 3 | import matplotlib.pyplot as plt
 4 | import numpy as np
 5 | 
 6 | from ch04 import backtracking_line_search, solve_trust_region_subproblem
 7 | from TestFunctions import rosenbrock
 8 | from convenience import plot_contour
 9 | 
10 | 
11 | def figure_4_2():
12 |     """
13 |     Figure 4.2: Backtracking line search used on the Rosenbrock function.
14 |     The black lines show the eight iterations taken by the descent method and
15 |     the red lines show the points considered during each line search.
16 |     """
17 |     x = np.array([-1.75, -1.6])  # Starting point
18 | 
19 |     fig = plt.figure()
20 |     plot_contour(fig, rosenbrock, xlim=(-2.1, 2.1), ylim=(-2.1, 2.1), xstride=0.01, ystride=0.01, levels=[0, 1, 2, 3, 5, 9, 25, 50, 100])
21 |     plt.scatter([x[0]], [x[1]], c="black", s=10)
22 |     plt.annotate("1", x, xytext=[-5, -13], textcoords='offset points')
23 |     for i in range(7):
24 |         d = -rosenbrock.grad(x)  # use negative gradient as the descent direction
25 |         alpha_opt = backtracking_line_search(rosenbrock, rosenbrock.grad, x, d, alpha=100.0)
26 |         x_next = x + alpha_opt*d
27 |         plt.plot([x[0], x_next[0]], [x[1], x_next[1]], c="black")
28 |         plt.scatter([x_next[0]], [x_next[1]], c="black", s=10)
29 |         plt.annotate(str(i + 2), x_next, xytext=[-5, -13], textcoords='offset points')
30 |         x = x_next
31 |     plt.title("Figure 4.2")
32 |     plt.show()
33 | 
34 | 
35 | def figure_4_9(): # TODO - Needs some more work - something isn't working quite right
36 |     """Figure 4.9: Trust region optimization used on the Rosenbrock function"""
37 |     x = np.array([-1.75, -1.75])  # Starting point
38 |     k_max, eta_1, eta_2, gamma_1, gamma_2, delta = 9, 0.25, 2.0, 0.5, 2.0, 1.0
39 | 
40 |     fig = plt.figure()
41 |     plot_contour(fig, rosenbrock, xlim=(-2.1, 2.1), ylim=(-2.1, 3.1), xstride=0.01, ystride=0.01, levels=[0, 1, 2, 3, 5, 9, 25, 50, 100])
42 |     plt.scatter([x[0]], [x[1]], c="black", s=10)
43 |     plt.annotate("1", x, xytext=[-5, -13], textcoords='offset points')
44 | 
45 |     # Trust Region Descent (taking from ch04.py)
46 |     y = rosenbrock(x)
47 |     for i in range(k_max):
48 |         circle = plt.Circle((x[0], x[1]), delta, color='black', fill=False, alpha=0.1*(i + 1))
49 |         plt.gca().add_patch(circle)
50 |         x_prime, y_prime = solve_trust_region_subproblem(rosenbrock.grad, rosenbrock.hess, x, delta)
51 |         r = (y - rosenbrock(x_prime)) / (y - y_prime)
52 |         if r < eta_1:
53 |             delta *= gamma_1
54 |         else:
55 |             x, y = x_prime, y_prime
56 |             if r > eta_2:
57 |                 delta *= gamma_2
58 |         plt.scatter([x[0]], [x[1]], c="black", s=10)
59 |         plt.annotate(str(i + 2), x, xytext=[-5, -13], textcoords='offset points')
60 |     plt.xlim((-2.1, 2.1))
61 |     plt.ylim((-2.1, 3.1))
62 |     plt.gca().set_aspect('equal')
63 |     plt.show()
64 | 


--------------------------------------------------------------------------------
/src/ch15.py:
--------------------------------------------------------------------------------
 1 | """Chapter 15: Probabilistic Surrogate Models"""
 2 | 
 3 | import numpy as np
 4 | 
 5 | from scipy.stats import multivariate_normal
 6 | from typing import Callable
 7 | 
 8 | 
 9 | 
10 | def mu(X: np.ndarray, m: Callable[[np.ndarray], float]) -> np.ndarray:
11 |     """
12 |     A method for constructing a mean vector given a list of design points `X` 
13 |     and a mean function `m`.
14 |     """
15 |     return np.apply_along_axis(m, 1, X)
16 | 
17 | 
18 | def Sigma(X: np.ndarray, k: Callable[[np.ndarray, np.ndarray], float]) -> np.ndarray:
19 |     """
20 |     A method for constructing a covariance matrix given one list of design
21 |     points `X` and a covariance function `k`.
22 |     """
23 |     return np.ndarray([[k(x, x_prime) for x_prime in X] for x in X])
24 | 
25 | 
26 | def K(X: np.ndarray, X_prime: np.ndarray, k: Callable[[np.ndarray, np.ndarray], float]) -> np.ndarray:
27 |     """
28 |     A method for constructing a covariance matrix given two lists of design
29 |     points `X` and `X_prime`, and a covariance function `k`.
30 |     """
31 |     return  np.ndarray([[k(x, x_prime) for x_prime in X_prime] for x in X])
32 | 
33 | 
34 | def mvnrand(mu: np.ndarray, Sigma: np.ndarray, inflation: float = 1e-6) -> np.ndarray:
35 |     """TODO"""
36 |     N = multivariate_normal(mu, Sigma + inflation*np.eye(len(mu)))
37 |     return N.rvs()
38 | 
39 | class GaussianProcess():
40 |     """TODO"""
41 |     def __init__(self, 
42 |                  m: Callable[[np.ndarray], float],
43 |                  k: Callable[[np.ndarray, np.ndarray], float],
44 |                  X: np.ndarray,
45 |                  y: np.ndarray,
46 |                  v: float):
47 |         self.m = m  # mean
48 |         self.k = k  # covariance function
49 |         self.X = X  # design points
50 |         self.y = y  # objective values
51 |         self.v = v  # noise variance
52 | 
53 |     def rand(self, X: np.ndarray) -> np.ndarray:
54 |         """TODO"""
55 |         return mvnrand(mu(X, self.m), Sigma(X, self.k))
56 | 
57 |     def predict(self, X_pred: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
58 |         """TODO"""
59 |         m, k, v = self.m, self.k, self.v
60 |         tmp = np.linalg.solve(K(X_pred, self.X, k), K(self.X, self.X, k) + v * np.eye(len(self.X)))
61 |         mu_p = mu(X_pred, m) + tmp @ (self.y - mu(self.X, m))
62 |         S = K(X_pred, X_pred, k) - tmp @ K(self.X, X_pred, k)
63 |         v_p = np.diag(S) + np.finfo(np.float64).eps  # eps prevents numerical issues
64 |         return (mu_p, v_p)
65 | 
66 |     def append(self, x: np.ndarray, y: float):
67 |         if len(self.X) == 0:
68 |             self.X = np.array([x])
69 |             self.y = np.array([y])
70 |         else:
71 |             self.X = np.append(self.X, x)
72 |             self.y = np.append(self.y, y)
73 | 
74 |     def pop(self) -> tuple[np.ndarray, float]:
75 |         popped_x = self.X[-1]
76 |         popped_y = self.y[-1]
77 |         self.X = np.delete(self.X, -1)
78 |         self.y = np.delete(self.y, -1)
79 |         return (popped_x, popped_y)


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # optimization-code-py
 2 | 
 3 | [![Python package](https://github.com/griffinbholt/optimization-code-py/actions/workflows/python-package.yml/badge.svg)](https://github.com/griffinbholt/optimization-code-py/actions/workflows/python-package.yml)
 4 | 
 5 | *Original Julia Code by: Mykel Kochenderfer and Tim Wheeler*
 6 | 
 7 | *Python Versions by: Griffin Holt*
 8 | 
 9 | Python versions of all typeset code blocks from the book, [Algorithms for Optimization](https://algorithmsbook.com/optimization/).
10 | 
11 | I share this content in the hopes that it helps you and makes the decision making algorithms more approachable and accessible (especially to those not as familiar with Julia). Thank you for reading!
12 | 
13 | If you encounter any issues or have pressing comments, please [file an issue](https://github.com/griffinbholt/optimization-code-py/issues/new/choose). (There are likely to still be bugs as I have not finished testing all of the classes and functions.)
14 | 
15 | ## Progress Update: (19 Mar 2024)
16 | 
17 | | Chapter(s) | Written | Tested | Notes |
18 | |--:|:--|:--|:--|
19 | |  1 | N/A | N/A | No code blocks in this chapter |
20 | |  2 | ▌▌▌▌▌▌▌▌▌▌ 100% | ▌▌▌▌▌▌▌▌▌▌ 100% | **Ready for use** |
21 | |  3 | ▌▌▌▌▌▌▌▌▌▌ 100% | ▌▌▌▌▌▌▌▌▌▌ 100% | **Ready for use** |
22 | |  4 | ▌▌▌▌▌▌▌▌▌▌ 100% | ▌▌▌▌▌▌▌▌▌▌ 100% | **Ready for use** |
23 | |  5 | ▌▌▌▌▌▌▌▌▌▌ 100% | ▌▌▌▌▌▌▌▌▌▌ 100% | **Ready for use** |
24 | |  6 | ▌▌▌▌▌▌▌▌▌▌ 100% | ▌▌▌▌▌▌▌▌▌▌ 100% | **Ready for use** |
25 | |  7 | ▌▌▌▌▌▌▌▌▌▌ 100% | ▌▌▌▌▌▌▌▌▌▌ 100% | **Ready for use** |
26 | |  8 | ▌▌▌▌▌▌▌▌▌▌ 100% | ▌▌▌▌▌▌▌ 70% | `adaptive_simulated_annealing`, `natural_evolution_strategies`, and `covariance_matrix_adaptation` need to be tested |
27 | |  9 | ▌▌▌▌▌▌▌▌▌▌ 100%  | 0% | Needs to be tested |
28 | | 10 | ▌▌▌▌▌▌▌▌▌▌ 100%  | 0% | Needs to be tested |
29 | | 11 | ▌▌▌▌▌▌▌▌▌▌ 100%  | 0% | Needs to be tested |
30 | | 12 | ▌▌▌▌▌▌▌▌▌▌ 100%  | 0% | Needs to be tested |
31 | | 13 | ▌▌▌▌▌▌▌▌▌▌ 100%  | 0% | Needs to be tested |
32 | | 14 | ▌▌▌▌▌▌▌▌▌▌ 100%  | 0% | Needs to be tested |
33 | | 15 | ▌▌▌▌▌▌▌▌▌▌ 100%  | 0% | Needs to be tested |
34 | | 16 | ▌▌▌▌▌▌▌▌▌▌ 100%  | 0% | Needs to be tested |
35 | | 17 | N/A | N/A | No code blocks in this chapter |
36 | | 18 | ▌▌▌▌▌▌▌▌▌▌ 100%  | 0% | Needs to be tested |
37 | | 19 | ▌▌▌▌▌▌▌▌▌▌ 100%  | 0% | Needs to be tested |
38 | | 20 | 0% | 0% | Need to figure out replacement library for `ExprRules.jl` |
39 | | 21 | ▌▌▌▌▌▌▌▌▌▌ 100%  | ▌▌▌▌▌▌▌▌▌▌ 100% | **Ready for use** |
40 | 
41 | I have also written code for pertinent figures, examples, exercises through Chapter 9.
42 | 
43 | I have also written code for test functions (`TestFunctions.py`) and convenience functions (`convenience.py`).
44 | 
45 | <!-- TODO - I need to go through and check that all functions have proper parameter
46 | and return signatures. -->
47 | 
48 | <!-- TODO - I need to go through all of the def(...)... one line functions and make sure that parameters are passed through so they persist. -->
49 | 
50 | <!-- TODO - Suppress the Deprecated Warnings in pytest: https://docs.pytest.org/en/stable/how-to/capture-warnings.html -->


--------------------------------------------------------------------------------
/src/tests/ch06/test_second_order_methods.py:
--------------------------------------------------------------------------------
 1 | import sys; sys.path.append('./src/'); sys.path.append('../../')
 2 | 
 3 | import numpy as np
 4 | 
 5 | from ch06 import newtons_method, secant_method, DFP, BFGS, LimitedMemoryBFGS
 6 | from TestFunctions import booth, branin, rosenbrock, wheeler
 7 | 
 8 | class TestSecondOrderMethods():
 9 |     def test_newtons_method(self, eps=1e-8):
10 |         f_min, x_min = booth.global_min()
11 |         x = np.array([9.0, 8.0])
12 |         x_prime = newtons_method(booth.grad, booth.hess, x, eps=1e-5, k_max=1)
13 |         assert np.abs(booth(x_prime) - f_min) < eps
14 |         assert np.all(np.abs(x_prime - x_min) < eps)
15 | 
16 |     def test_secant_method(self, eps=1e-8):
17 |         def f(x): return np.exp(x) + np.exp(-x) - 3*x + 2
18 |         def f_prime(x): return np.exp(x) - np.exp(-x) - 3
19 |         x_min = np.log((3 + np.sqrt(13))/2)
20 |         f_min = f(x_min)
21 | 
22 |         x0, x1 = -4, -3
23 |         x = secant_method(f_prime, x0, x1, eps)
24 |         assert np.abs(x - x_min) < eps
25 |         assert np.abs(f(x) - f_min) < eps
26 | 
27 |     def test_DFP(self, eps=1e-8):
28 |         M = DFP()
29 |         self.run_on(booth, max_steps=2, x=np.array([-5.0, 5.0]), M=M, eps=eps)
30 |         self.run_on_branin(max_steps=7, x=np.ones(2)*-5, M=M, eps=eps)
31 |         self.run_on(rosenbrock, max_steps=10, x=np.ones(2)*-5, M=M, eps=eps)
32 |         self.run_on(wheeler, max_steps=10, x=np.zeros(2), M=M, eps=eps)
33 | 
34 |     def test_BFGS(self, eps=1e-8):
35 |         M = BFGS()
36 |         self.run_on(booth, max_steps=2, x=np.array([-5.0, 5.0]), M=M, eps=eps)
37 |         self.run_on_branin(max_steps=7, x=np.ones(2)*-5, M=M, eps=eps)
38 |         self.run_on(rosenbrock, max_steps=10, x=np.ones(2)*-5, M=M, eps=eps)
39 |         self.run_on(wheeler, max_steps=10, x=np.zeros(2), M=M, eps=eps)
40 | 
41 |     def test_limited_memory_BFGS(self, eps=1e-4):
42 |         for m in range(1, 4):
43 |             M = LimitedMemoryBFGS(m)
44 |             self.run_on(booth, max_steps=2, x=np.array([-5.0, 5.0]), M=M, eps=eps)
45 |             self.run_on_branin(max_steps=7, x=np.ones(2)*-5, M=M, eps=eps)
46 |             self.run_on(rosenbrock, max_steps=10, x=np.ones(2)*-5, M=M, eps=eps)
47 |             with np.errstate(over="ignore", invalid="ignore"):
48 |                 self.run_on(wheeler, max_steps=6, x=np.ones(2)*5, M=M, eps=eps)
49 | 
50 |     def run_on(self, f, max_steps, x, M, eps):
51 |         f_min, x_min = f.global_min()
52 |         M.initialize(f, f.grad, x)
53 |         for _ in range(max_steps):
54 |             x = M.step(f, f.grad, x)
55 |         assert np.abs(f(x) - f_min) < eps
56 |         assert np.all(np.abs(x - x_min) < eps)
57 | 
58 |     def run_on_branin(self, max_steps, x, M, eps):
59 |         f_min, x_min = branin.global_min()
60 |         M.initialize(branin, branin.grad, x)
61 |         for _ in range(max_steps):
62 |             x = M.step(branin, branin.grad, x)
63 |         assert np.abs(branin(x) - f_min[0]) < eps
64 |         assert np.any([np.all(np.abs(x - x_min_i) < eps) for x_min_i in x_min.T])
65 | 


--------------------------------------------------------------------------------
/src/examples/ch08_examples.py:
--------------------------------------------------------------------------------
  1 | import sys; sys.path.append('../')
  2 | 
  3 | import matplotlib.pyplot as plt
  4 | import numpy as np
  5 | 
  6 | from scipy.stats import norm, multivariate_normal
  7 | 
  8 | from ch08 import cross_entropy_method
  9 | from TestFunctions import ackley
 10 | 
 11 | 
 12 | def example_8_2():
 13 |     """
 14 |     Example 8.2: Exploring the effect of distribution variance and temperature
 15 |     on the performance of simulated annealing. The blue regions indicate the
 16 |     5% to 95% and 25% to 75% empirical Gaussian quantiles of the objective
 17 |     function value.
 18 |     """
 19 |     f = ackley
 20 |     x0 = np.array([15.0, 15.0])
 21 |     n_trials = 500
 22 |     k_max = 100
 23 |     iterations = np.arange(k_max + 1)
 24 | 
 25 |     _, ax = plt.subplots(3, 3, figsize=(10, 10), sharex=True, sharey=True)
 26 |     for p, sigma in enumerate([1.0, 5.0, 25.0]):
 27 |         T = multivariate_normal(np.zeros(2), sigma * np.eye(2))
 28 |         for q, t1 in enumerate([1.0, 10.0, 25.0]):
 29 |             def t(k, t1=t1): return t1 / k
 30 |             traj = np.zeros((n_trials, k_max + 1))
 31 | 
 32 |             # Run Trials
 33 |             for j in range(n_trials):
 34 |                 # Simulated Annealing
 35 |                 x = x0.copy()
 36 |                 y = f(x)
 37 |                 traj[j, 0] = y
 38 | 
 39 |                 x_best, y_best = x, y
 40 |                 for k in range(1, k_max + 1):
 41 |                     x_prime = x + T.rvs()
 42 |                     y_prime = f(x_prime)
 43 |                     delta_y = y_prime - y
 44 |                     if (delta_y <= 0) or (np.random.rand() < np.exp(-delta_y / t(k))):
 45 |                         x, y = x_prime, y_prime
 46 |                     if y_prime < y_best:
 47 |                         x_best, y_best = x_prime, y_prime
 48 |                     traj[j, k] = y
 49 |             
 50 |             # Plot the results
 51 |             traj_means = np.mean(traj, axis=0)
 52 |             traj_stds = np.std(traj, axis=0)
 53 |             quantiles = np.zeros((4, k_max + 1))
 54 |             for j in range(k_max + 1):
 55 |                 quantiles[:, j] = norm(traj_means[j], traj_stds[j]).ppf(q=[0.05, 0.25, 0.75, 0.95])
 56 | 
 57 |             ax[p, q].fill_between(
 58 |                 iterations,
 59 |                 quantiles[3, :],
 60 |                 quantiles[0, :],
 61 |                 color="tab:blue",
 62 |                 alpha=0.15
 63 |             )
 64 |             ax[p, q].fill_between(
 65 |                 iterations,
 66 |                 quantiles[2, :],
 67 |                 quantiles[1, :],
 68 |                 color="tab:blue",
 69 |                 alpha=0.50
 70 |             )
 71 |             ax[p, q].plot(iterations, traj_means, color="tab:blue")
 72 |             ax[p, q].set_ylim((-5, 30))
 73 |             ax[p, q].set_xlim((0, k_max))
 74 |             ax[p, q].set_title("$\sigma = $" + str(int(sigma)) + ", $t^{(1)} = $" + str(int(t1)))
 75 |     for j in range(3):
 76 |         ax[2, j].set_xlabel("iteration")
 77 |         ax[j, 0].set_ylabel("$y$")
 78 |     plt.suptitle("Example 8.2")
 79 |     plt.show()
 80 | 
 81 | 
 82 | def example_8_3():
 83 |     """
 84 |     Example 8.3: An example of using the cross-entropy method.
 85 |     
 86 |     We can use `scipy.stats` classes to represent, sample from, and fit proposal
 87 |     distributions. The parameter vector `theta` is replaced by a distribution `P`.
 88 |     Calling `P.rvs(m)` will produce a m x n matrix corresponding to m samples of
 89 |     n-dimensional samples from `P`, and calling `fit` will fit a new distribution
 90 |     of the given input type.
 91 |     """
 92 |     np.random.seed(0)
 93 |     def f(x): return np.linalg.norm(x)
 94 |     mu = np.array([0.5, 1.5])
 95 |     Sigma = np.array([[1.0, 0.2], [0.2, 2.0]])
 96 |     P = multivariate_normal(mu, Sigma)
 97 |     k_max = 10
 98 |     P = cross_entropy_method(f, P, k_max)
 99 |     print("P.mu =", P.mean)
100 | 


--------------------------------------------------------------------------------
/src/convenience.py:
--------------------------------------------------------------------------------
  1 | import matplotlib.pyplot as plt
  2 | import numpy as np
  3 | 
  4 | from matplotlib import cm, ticker
  5 | from matplotlib.patches import Ellipse
  6 | import matplotlib.transforms as transforms
  7 | 
  8 | 
  9 | VIRIDIS_REV = cm.viridis.reversed()
 10 | 
 11 | 
 12 | def normalize(x: np.ndarray,
 13 |               ord: int | float | str = 2,
 14 |               axis: int | tuple[int, int] = None,
 15 |               keepdims: bool = False) -> np.ndarray:
 16 |     nmlzd_x = np.divide(x, np.linalg.norm(x, ord, axis, keepdims))
 17 |     nmlzd_x = np.where(np.abs(nmlzd_x) < 1e-16, 0, nmlzd_x)
 18 |     return nmlzd_x
 19 | 
 20 | 
 21 | def plot_surface(fig, f, xlim, ylim, zlim, xstride, ystride, subplot_coords=None):
 22 |     X, Y, Z = _make_3d_data(f, xlim, ylim, xstride, ystride)
 23 |     if subplot_coords is not None:
 24 |         ax = fig.add_subplot(*subplot_coords, projection='3d')
 25 |     else:
 26 |         ax = fig.add_subplot(projection='3d')
 27 |     ax.plot_surface(X, Y, Z, cmap=VIRIDIS_REV)
 28 |     ax.set_zlim(*zlim) # Customize the z-axis
 29 |     ax.set_xlabel('$x_1$')
 30 |     ax.set_ylabel('$x_2$')
 31 |     return ax
 32 | 
 33 | 
 34 | def plot_contour(fig, f, xlim, ylim, xstride, ystride, levels=None, filled=False, clabel=False, subplot_coords=None):
 35 |     X, Y, Z = _make_3d_data(f, xlim, ylim, xstride, ystride)
 36 |     if subplot_coords is not None:
 37 |         ax = fig.add_subplot(*subplot_coords)
 38 |     else:
 39 |         ax = fig.add_subplot()
 40 |     if filled:
 41 |         if levels is not None:
 42 |             CS = ax.contourf(X, Y, Z, levels=levels, cmap=VIRIDIS_REV, zorder=1)
 43 |         else:
 44 |             CS = ax.contourf(X, Y, Z, locator=ticker.LogLocator(), cmap=VIRIDIS_REV, zorder=1)
 45 |     else:
 46 |         if levels is not None:
 47 |             CS = ax.contour(X, Y, Z, levels=levels, cmap=VIRIDIS_REV, zorder=1)
 48 |         else:
 49 |             CS = ax.contour(X, Y, Z, locator=ticker.LogLocator(), cmap=VIRIDIS_REV, zorder=1)
 50 |     if clabel:
 51 |         ax.clabel(CS, inline=True, fontsize=10)
 52 |     ax.set_aspect('equal')
 53 |     ax.set_xlabel('$x_1$')
 54 |     ax.set_ylabel('$x_2$')
 55 |     return ax
 56 | 
 57 | 
 58 | def _make_3d_data(f, xlim, ylim, xstride, ystride):
 59 |     X = np.arange(xlim[0], xlim[1], xstride)
 60 |     Y = np.arange(ylim[0], ylim[1], ystride)
 61 |     X, Y = np.meshgrid(X, Y)
 62 |     Z = f(np.array([X, Y]))
 63 |     return X, Y, Z
 64 | 
 65 | 
 66 | def confidence_ellipse(mean, cov, ax, n_std=3.0, facecolor='none', **kwargs):
 67 |     """
 68 |     Create a plot of the covariance confidence ellipse of *x* and *y*.
 69 | 
 70 |     Parameters
 71 |     ----------
 72 |     mean: array-like, shape (2, )
 73 |         Mean
 74 | 
 75 |     cov : array-like, shape (2, 2)
 76 |         Covariance matrix
 77 | 
 78 |     ax : matplotlib.axes.Axes
 79 |         The axes object to draw the ellipse into.
 80 | 
 81 |     n_std : float
 82 |         The number of standard deviations to determine the ellipse's radiuses.
 83 | 
 84 |     **kwargs
 85 |         Forwarded to `~matplotlib.patches.Ellipse`
 86 | 
 87 |     Returns
 88 |     -------
 89 |     matplotlib.patches.Ellipse
 90 |     """
 91 |     pearson = cov[0, 1]/np.sqrt(cov[0, 0] * cov[1, 1])
 92 |     # Using a special case to obtain the eigenvalues of this
 93 |     # two-dimensional dataset.
 94 |     ell_radius_x = np.sqrt(1 + pearson)
 95 |     ell_radius_y = np.sqrt(1 - pearson)
 96 |     ellipse = Ellipse((0, 0), width=ell_radius_x * 2, height=ell_radius_y * 2,
 97 |                       facecolor=facecolor, **kwargs)
 98 | 
 99 |     # Calculating the standard deviation of x from
100 |     # the squareroot of the variance and multiplying
101 |     # with the given number of standard deviations.
102 |     scale_x = np.sqrt(cov[0, 0]) * n_std
103 |     mean_x = mean[0]
104 | 
105 |     # calculating the standard deviation of y ...
106 |     scale_y = np.sqrt(cov[1, 1]) * n_std
107 |     mean_y = mean[1]
108 | 
109 |     transf = transforms.Affine2D() \
110 |         .rotate_deg(45) \
111 |         .scale(scale_x, scale_y) \
112 |         .translate(mean_x, mean_y)
113 | 
114 |     ellipse.set_transform(transf + ax.transData)
115 |     return ax.add_patch(ellipse)
116 | 


--------------------------------------------------------------------------------
/src/ch18.py:
--------------------------------------------------------------------------------
  1 | """Chapter 18: Uncertainty Propagation"""
  2 | 
  3 | import numdifftools as nd
  4 | import numpy as np
  5 | 
  6 | from itertools import product
  7 | from numpy.polynomial import Polynomial
  8 | from scipy import integrate
  9 | from scipy.special import factorial
 10 | from typing import Callable
 11 | 
 12 | from ch15 import K, GaussianProcess
 13 | 
 14 | 
 15 | def taylor_approx(f: Callable[[np.ndarray], float],
 16 |                   mu: np.ndarray,
 17 |                   v: np.ndarray,
 18 |                   secondorder: bool = False) -> tuple[float, float]:
 19 |     """
 20 |     A method for automatically computing the Taylor approximation of the mean
 21 |     and variance of objective function `f` at design point `x` with noise mean
 22 |     vector `mu` and variance vector `v`. The Boolean parameter `secondorder`
 23 |     controls whether the first- or second-order approximation is compared.s    
 24 |     """
 25 |     mu_hat = f(mu)
 26 |     grad = nd.Gradient(f)(mu)
 27 |     v_hat = np.do(grad**2, v)
 28 |     if secondorder:
 29 |         H = nd.Hessian(f)(mu)
 30 |         mu_hat += np.dot(np.diag(H), v) / 2
 31 |         v_hat += np.dot(v, (H**2) @ v) / 2
 32 |     return (mu_hat, v_hat)
 33 | 
 34 | 
 35 | def legendre(i: int) -> Polynomial:
 36 |     """
 37 |     Method for constructing Legendre polynomial orthogonal basis functions,
 38 |     where `i` indicates the construction of b_i.
 39 |     """  # TODO - Test to make sure constructs correct polynomial
 40 |     n = i - 1
 41 |     p = Polynomial([-1, 0, 1])**n
 42 |     p = p.deriv(n)
 43 |     return p / ((2**n)*factorial(n))
 44 | 
 45 | 
 46 | def laguerre(i: int) -> Polynomial:
 47 |     """
 48 |     Method for constructing Laguerre polynomial orthogonal basis functions,
 49 |     where `i` indicates the construction of b_i.
 50 |     """  # TODO - Test to make sure constructs correct polynomial
 51 |     p = Polynomial([1])
 52 |     for _ in range(i - 1):
 53 |         p = (p.deriv() - p).integ() + 1
 54 |     return p
 55 | 
 56 | 
 57 | def hermite(i: int) -> Polynomial:
 58 |     """
 59 |     Method for constructing Hermite polynomial orthogonal basis functions,
 60 |     where `i` indicates the construction of b_i.
 61 |     """  # TODO - Test to make sure constructs correct polynomial
 62 |     p = Polynomial([1])
 63 |     x = Polynomial([0, 1])
 64 |     for _ in range(i - 1):
 65 |         p = x*p - p.deriv()
 66 |     return p
 67 | 
 68 | 
 69 | def orthogonal_recurrence(bs: list[Polynomial],
 70 |                           p: Callable[[float], float],
 71 |                           dom: tuple[float, float],
 72 |                           eps: float = 1e-6) -> Polynomial:
 73 |     """
 74 |     The Stieltjes algorithm for constructing the next polynomial basis function
 75 |     b_{i + 1} according to the orthogonal recurrence relation, where `bs` contains
 76 |     {b_1, ..., b_i}, `p` is the probability distribution, and `dom` is a tuple
 77 |     containing a lower and upper bound for z. The optional parameter `eps`
 78 |     controls the absolute tolerance of the numerical integration. We make use of
 79 |     the `numpy.polynomials.Polynomial` class.
 80 |     """
 81 |     i = len(bs)
 82 |     c1 = integrate.quad(lambda z: z*(bs[i](z)**2)*p(z), dom[0], dom[1], epsabs=eps)[0]
 83 |     c2 = integrate.quad(lambda z:   (bs[i](z)**2)*p(z), dom[0], dom[1], epsabs=eps)[0]
 84 |     alpha = c1 / c2
 85 |     if i > 1:
 86 |         c3 = integrate.quad(lambda z: (bs[i - 1](z)**2)*p(z), dom[0], dom[1], epsabs=eps)[0]
 87 |         beta = c2 / c3
 88 |         return Polynomial([-alpha, 1])*bs[i] - beta*bs[i - 1]
 89 |     return Polynomial([-alpha, 1])*bs[i]
 90 | 
 91 | 
 92 | def polynomial_chaos_bases(bases1d: list[Callable[[float], float]]) -> list[Callable[[float], float]]:
 93 |     """
 94 |     A method for constructing multivariate basis functions where `bases1d` contains
 95 |     lists of univariate orthogonal basis functions for each random variable.
 96 |     """
 97 |     bases = []
 98 |     for a in product(*bases1d):
 99 |         bases.append(lambda z: np.prod([b(z[i]) for (i, b) in enumerate(a)]))
100 |     return bases
101 | 
102 | 
103 | def bayesian_monte_carlo(GP: GaussianProcess,
104 |                          w: np.ndarray,
105 |                          mu_z: np.ndarray,
106 |                          Sigma_z: np.ndarray) -> tuple[float, float]:
107 |     """
108 |     A method for obtaining the Bayesian Monte Carlo estimate for the expected
109 |     value of a function under a Gaussian process `GP` with a Gaussian kernel
110 |     with weights `w`, where the variables are drawn from a normal distribution
111 |     with mean `mu_z` and covariance `Sigma_z`.
112 |     """
113 |     W = np.diag(w**2)
114 |     invK = np.linalg.inv(K(GP.X, GP.X, GP.k))
115 |     q = np.exp(-(np.dot(GP.X - mu_z, np.linalg.inv(W + Sigma_z @ (GP.X - mu_z)))) / 2)  # TODO - Need to check/test dimensions
116 |     q *= np.linalg.det((1/W) @ Sigma_z + np.eye(len(w)))**(-0.5)
117 |     mu = np.dot(q, invK @ GP.y)
118 |     v = np.linalg.det(2 * (1/W) @ Sigma_z + np.eye(len(w)))**(-0.5) - np.dot(q, invK @ q)[0]
119 |     return (mu, v)
120 | 


--------------------------------------------------------------------------------
/src/ch06.py:
--------------------------------------------------------------------------------
  1 | """Chapter 6: Second-Order Methods"""
  2 | 
  3 | import numpy as np
  4 | 
  5 | from typing import Callable
  6 | 
  7 | from ch04 import line_search
  8 | from ch05 import DescentMethod
  9 | 
 10 | 
 11 | def newtons_method(grad_f: Callable[[np.ndarray], np.ndarray], 
 12 |                    H: Callable[[np.ndarray], np.ndarray], 
 13 |                    x: np.ndarray,
 14 |                    eps: float,
 15 |                    k_max: int) -> np.ndarray:
 16 |     """
 17 |     Newton's method, which takes the gradient of the function `grad_f`,
 18 |     the Hessian of the objective function `H`, an initial point `x`, a step size
 19 |     tolerance `eps`, and a maximum number of iterations `k_max`.
 20 |     """
 21 |     k, Delta = 0, np.full(len(x), np.inf)
 22 |     while (np.linalg.norm(Delta) > eps) and (k < k_max):
 23 |         Delta = np.linalg.solve(H(x), grad_f(x))
 24 |         x -= Delta
 25 |         k += 1
 26 |     return x
 27 | 
 28 | 
 29 | def secant_method(f_prime: Callable[[float], float], x0: float, x1: float, eps: float):
 30 |     """
 31 |     The secant method for univariate function minimization. The inputs are the
 32 |     first derivative `f_prime` of the target function, two initial points `x0`
 33 |     and `x1`, and the desired tolerance `eps`. The final x-coordinate is
 34 |     returned.
 35 |     """
 36 |     g0 = f_prime(x0)
 37 |     delta = np.inf
 38 |     while np.abs(delta) > eps:
 39 |         g1 = f_prime(x1)
 40 |         delta = ((x1 - x0) / (g1 - g0)) * g1
 41 |         x0, x1, g0 = x1, x1 - delta, g1
 42 |     return x1
 43 | 
 44 | 
 45 | class DFP(DescentMethod):
 46 |     """The Davidon-Fletcher-Powell descent method"""
 47 |     def __init__(self, Q: np.ndarray = None):
 48 |         self.Q = Q  # approximate inverse Hessian
 49 | 
 50 |     def initialize(self, f: Callable[[np.ndarray], float], grad_f: Callable[[np.ndarray], float], x: np.ndarray):
 51 |         m = len(x)
 52 |         self.Q = np.eye(m)
 53 | 
 54 |     def step(self, f: Callable[[np.ndarray], float], grad_f: Callable[[np.ndarray], float], x: np.ndarray) -> np.ndarray:
 55 |         g = grad_f(x)
 56 |         x_prime = line_search(f, x, -self.Q @ g)
 57 |         g_prime = grad_f(x_prime)
 58 |         delta = x_prime - x
 59 |         gamma = g_prime - g
 60 |         self.Q -= self.Q_update(delta, gamma, self.Q @ gamma)
 61 |         return x_prime
 62 | 
 63 |     def Q_update(self, delta: np.ndarray, gamma: np.ndarray, Q_gamma: np.ndarray) -> np.ndarray:
 64 |         return (np.outer(Q_gamma, Q_gamma) / np.dot(Q_gamma, gamma)) - (np.outer(delta, delta) / np.dot(delta, gamma))
 65 | 
 66 | 
 67 | class BFGS(DFP):
 68 |     """
 69 |     The Broyden-Fletcher-Goldfarb-Shanno descent method
 70 |     
 71 |     NOTE: BFGS is the same as DFP, except for the `Q` update rule.
 72 |     """
 73 |     def __init__(self, Q: np.ndarray = None):
 74 |         super().__init__(Q)
 75 | 
 76 |     def Q_update(self, delta: np.ndarray, gamma: np.ndarray, Q_gamma: np.ndarray) -> np.ndarray:
 77 |         outer_dQg = np.outer(delta, Q_gamma)
 78 |         dot_dg = np.dot(delta, gamma)
 79 |         return ((outer_dQg + outer_dQg.T) / dot_dg)\
 80 |              - ((1 + (np.dot(Q_gamma, gamma) / dot_dg)) * (np.outer(delta, delta) / dot_dg))
 81 | 
 82 | 
 83 | class LimitedMemoryBFGS(DescentMethod):
 84 |     """
 85 |     The Limited-memory BFGS descent method, which avoids storing the approximate
 86 |     inverse Hessian. The parameter `m` determines the history size. It also
 87 |     stores the step differences `deltas`, the gradient changes `gammas`, and
 88 |     storage vectors `qs`.
 89 |     """
 90 |     def __init__(self, m: int, deltas: list[np.ndarray] = None, gammas: list[np.ndarray] = None, qs: np.ndarray = None):
 91 |         self.m = m            # history size
 92 |         self.deltas = deltas  # step differences
 93 |         self.gammas = gammas  # gradient changes
 94 |         self.qs = qs          # storage vectors
 95 | 
 96 |     def initialize(self, f: Callable[[np.ndarray], float], grad_f: Callable[[np.ndarray], float], x: np.ndarray):
 97 |         self.deltas = []
 98 |         self.gammas = []
 99 |         self.qs = []
100 | 
101 |     def step(self, f: Callable[[np.ndarray], float], grad_f: Callable[[np.ndarray], float], x: np.ndarray) -> np.ndarray:
102 |         g = grad_f(x)
103 |         m = len(self.deltas)
104 |         if m > 0:
105 |             q = g.copy()
106 |             for i in range(m - 1, -1, -1):
107 |                 self.qs[i] = q.copy()
108 |                 q -= (np.dot(self.deltas[i], q) / np.dot(self.gammas[i], self.deltas[i])) * self.gammas[i]
109 |             z = (self.gammas[-1] * self.deltas[-1] * q) / np.dot(self.gammas[-1], self.gammas[-1])
110 |             for i in range(m):
111 |                 z += self.deltas[i] * ((np.dot(self.deltas[i], self.qs[i]) - np.dot(self.gammas[i], z)) / np.dot(self.gammas[i], self.deltas[i]))
112 |             x_prime = line_search(f, x, -z)
113 |         else:
114 |             x_prime = line_search(f, x, -g)
115 |         g_prime = grad_f(x_prime)
116 |         self.deltas.append(x_prime - x); self.gammas.append(g_prime - g); self.qs.append(np.zeros(len(x)))
117 |         while len(self.deltas) > self.m:
118 |             self.deltas.pop(0); self.gammas.pop(0); self.qs.pop(0)
119 |         return x_prime
120 | 


--------------------------------------------------------------------------------
/src/tests/ch08/test_stochastic_methods.py:
--------------------------------------------------------------------------------
  1 | import sys; sys.path.append('./src/'); sys.path.append('../../')
  2 | 
  3 | import cvxpy as cp
  4 | import numpy as np
  5 | import warnings
  6 | 
  7 | from scipy.stats import norm, multivariate_normal
  8 | 
  9 | from ch05 import GradientDescent, Adam, HyperNesterovMomentum
 10 | from ch08 import *
 11 | from TestFunctions import ackley, booth, branin, rosenbrock, wheeler
 12 | 
 13 | 
 14 | class TestStochasticMethods():
 15 |     def test_noisy_descent(self, eps=1e-8):
 16 |         np.random.seed(42)
 17 |         def sigma(k): return 1/(k**3)
 18 | 
 19 |         M = NoisyDescent(GradientDescent(alpha=0.001), sigma)
 20 |         self.run_on(booth, max_steps=100000, x=np.array([-5.0, 5.0]), M=M, eps=eps)
 21 |         self.run_on_branin(max_steps=100000, x=np.ones(2)*-5, M=M, eps=eps)
 22 |         self.run_on(rosenbrock, max_steps=100000, x=np.ones(2)*-5, M=M, eps=eps)
 23 | 
 24 |         M = NoisyDescent(Adam(alpha=0.001, gamma_v=0.9, gamma_s=0.999, eps=1e-8), sigma)
 25 |         self.run_on(booth, max_steps=100000, x=np.array([-5.0, 5.0]), M=M, eps=eps)
 26 |         self.run_on_branin(max_steps=100000, x=np.ones(2)*-5, M=M, eps=1e-4)
 27 |         self.run_on(rosenbrock, max_steps=100000, x=np.ones(2)*-5, M=M, eps=1e-5)
 28 | 
 29 |         M = NoisyDescent(HyperNesterovMomentum(alpha_0=0.01, mu=0.000001, beta=0.9), sigma)
 30 |         self.run_on(wheeler, max_steps=1000, x=np.zeros(2), M=M, eps=eps)
 31 | 
 32 |     def run_on(self, f, max_steps, x, M, eps):
 33 |         f_min, x_min = f.global_min()
 34 |         M.initialize(f, f.grad, x)
 35 |         for _ in range(max_steps):
 36 |             x = M.step(f, f.grad, x)
 37 |         assert np.abs(f(x) - f_min) < eps
 38 |         assert np.all(np.abs(x - x_min) < eps)
 39 | 
 40 |     def run_on_branin(self, max_steps, x, M, eps):
 41 |         f_min, x_min = branin.global_min()
 42 |         M.initialize(branin, branin.grad, x)
 43 |         for _ in range(max_steps):
 44 |             x = M.step(branin, branin.grad, x)
 45 |         assert np.abs(branin(x) - f_min[0]) < eps
 46 |         assert np.any([np.all(np.abs(x - x_min_i) < eps) for x_min_i in x_min.T])
 47 | 
 48 |     def test_rand_positive_spanning_set(self):
 49 |         warnings.simplefilter(action='ignore', category=FutureWarning)
 50 |         for alpha in [1.0, 0.25, 0.25/4, 0.25/16]:
 51 |             for n in [2, 3, 5, 10, 100]:
 52 |                 D = rand_positive_spanning_set(alpha, n).T
 53 |                 assert np.linalg.matrix_rank(D) == n  # full row rank
 54 | 
 55 |                 x = cp.Variable(n + 1)
 56 |                 constraints = [D @ x == -D @ np.ones(n + 1), x >= 0]
 57 |                 problem = cp.Problem(cp.Minimize(0), constraints)
 58 |                 problem.solve()
 59 |                 assert problem.status == "optimal"  # Dx = -D1, x >= 0 is feasible
 60 | 
 61 |     def test_mesh_adaptive_direct_search(self):
 62 |         f_min, x_min = wheeler.global_min()
 63 |         x = np.array([0.7, 0.9])
 64 |         x = mesh_adaptive_direct_search(wheeler, x, eps=1e-8)
 65 |         assert np.abs(wheeler(x) - f_min) < 1e-6
 66 |         assert np.all(np.abs(x - x_min) < 1e-3)
 67 | 
 68 |     def test_simulated_annealing(self):
 69 |         np.random.seed(42)
 70 |         x0 = 0.5
 71 |         def f(x): return np.sin(5*(x + np.pi/3 + np.pi/10)) + 2*np.sin(x + np.pi/4 + np.pi/10)
 72 |         def t(k, gamma=0.5, t1=1.0): return (gamma**(k - 1)) * t1
 73 |         x_best, y_best = x0, f(x0)
 74 |         for _ in range(100):
 75 |             x = simulated_annealing(f, x=x0, T=norm(0, 1.5), t=t, k_max=20)
 76 |             if f(x) < y_best:
 77 |                 x_best, y_best = x, f(x)
 78 |         assert np.abs(y_best - (-2.937)) < 1e-2
 79 | 
 80 |         x0 = np.array([10.0, 10.0])
 81 |         def t(k, gamma=0.75, t1=10.0): return (gamma**(k - 1)) * t1
 82 |         T = multivariate_normal(np.zeros(2), 25*np.eye(2))
 83 |         x_best, y_best = x0, ackley(x0)
 84 |         for _ in range(1000):
 85 |             x = simulated_annealing(ackley, x=x0, T=T, t=t, k_max=100)
 86 |             if ackley(x) < y_best:
 87 |                 x_best, y_best = x, ackley(x)
 88 |         assert y_best < 0.15
 89 | 
 90 |     def test_adaptive_simulated_annealing(self):
 91 |         pass
 92 | 
 93 |     def test_cross_entropy_method(self, eps=1e-5):
 94 |         f_min, x_min = branin.global_min()
 95 |         P = multivariate_normal(np.array([3.0, 7.5]), 5*np.eye(2))
 96 |         try_again = True
 97 |         while try_again:
 98 |             try:
 99 |                 P = cross_entropy_method(branin, P, k_max=100)
100 |                 try_again = False
101 |             except Exception as e:
102 |                 print(e)
103 |         x = P.mean
104 |         assert np.abs(branin(x) - f_min[0]) < eps
105 |         assert np.any([np.all(np.abs(x - x_min_i) < eps) for x_min_i in x_min.T])
106 | 
107 |         f_min, x_min = booth.global_min()
108 |         P = multivariate_normal(np.array([-0.0, -0.0]), 10*np.eye(2))
109 |         try_again = True
110 |         while try_again:
111 |             try:
112 |                 P = cross_entropy_method(booth, P, k_max=10)
113 |                 try_again = False
114 |             except Exception as e:
115 |                 print(e)
116 |         x = P.mean
117 |         assert np.abs(booth(x) - f_min) < eps
118 |         assert np.all(np.abs(x - x_min) < eps)  
119 | 
120 |     def test_natural_evolution_strategies(self):
121 |         pass
122 | 
123 |     def test_covariance_matrix_adaptation(self):
124 |         pass
125 | 
126 | 


--------------------------------------------------------------------------------
/src/exercises/ch06_exercises.py:
--------------------------------------------------------------------------------
  1 | import sys; sys.path.append("../")
  2 | 
  3 | import matplotlib.pyplot as plt
  4 | import numpy as np
  5 | 
  6 | from ch05 import GradientDescent, ConjugateGradientDescent
  7 | from ch06 import newtons_method
  8 | 
  9 | 
 10 | def exercise_6_3(x0: float):
 11 |     """Exercise 6.3: Applying Newton's Method to f(x) = x^2"""
 12 |     def f(x): return x**2
 13 |     def grad_f(x): return 2*x
 14 |     def H(x): return 2
 15 |     x = x0
 16 | 
 17 |     # Single Iteration of Newton's Method (for univariate function)
 18 |     Delta = grad_f(x) / H(x)
 19 |     x -= Delta
 20 | 
 21 |     print("After 1 iteration, x = ", x)
 22 |     print("Gradient at x: ", grad_f(x))
 23 |     print("=> Only 1 step of Newton's Method is needed to minimize f(x) = x^2.")
 24 | 
 25 | 
 26 | def exercise_6_4():
 27 |     """
 28 |     Exercise 6.4: Applying Newton's Method, Gradient Descent, and the 
 29 |     Conjugate Gradient Method to f(x) = (1/2)x'Hx.
 30 |     """
 31 |     def H(x): return np.array([[1.0, 0.0], [0.0, 1000.0]])
 32 |     def f(x): return 0.5 * np.dot(x, H(x) @ x)
 33 |     def grad_f(x): return H(x) @ x
 34 |     x0 = np.array([1.0, 1.0])
 35 | 
 36 |     # Newton's Method
 37 |     print("Newton's Method:")
 38 |     x_nm = newtons_method(grad_f, H, x0.copy(), eps=1e-5, k_max=1)
 39 |     print("After 1 iteration of Newton's Method, x = ", x_nm)
 40 |     print("Gradient at x: ", grad_f(x_nm))
 41 |     print("=> Newton's Method converges to the minimum after only 1 iteration.\n")
 42 | 
 43 |     # Gradient Descent
 44 |     print("Gradient Descent: (w/ unnormalized gradient)")
 45 |     M = GradientDescent(alpha=1)
 46 |     M.initialize(f, grad_f, x0.copy())
 47 |     x_gd = M.step(f, grad_f, x0.copy())
 48 |     print("After 1 iteration of Gradient Descent, x = ", x_gd)
 49 |     print("Gradient at x: ", grad_f(x_gd))
 50 |     x_gd = M.step(f, grad_f, x_gd)
 51 |     print("After 2 iterations of Gradient Descent, x = ", x_gd)
 52 |     print("Gradient at x: ", x_gd)
 53 |     print("=> Gradient Descent does not converge after 2 iterations.\n")
 54 | 
 55 |     # Conjugate Gradient Method
 56 |     print("Conjugate Gradient Method")
 57 |     M = ConjugateGradientDescent()
 58 |     M.initialize(f, grad_f, x0.copy())
 59 |     x_cg = M.step(f, grad_f, x0.copy())
 60 |     print("After 1 iteration of Conjugate Gradient, x = ", x_cg)
 61 |     print("Gradient at x: ", grad_f(x_cg))
 62 |     x_cg = M.step(f, grad_f, x_cg)
 63 |     print("After 2 iterations of Conjugate Gradient, x = ", x_cg)
 64 |     print("Gradient at x: ", x_cg)
 65 |     print("=> Gradient Descent converges after 2 iterations.\n")
 66 | 
 67 | 
 68 | def exercise_6_5():
 69 |     """Exercise 6.5: Comparison of Newton's Method vs. Secant Method"""
 70 |     def f(x): return x**2 + x**4
 71 |     def deriv(x): return 2*x + 4*(x**3)
 72 |     def deriv2(x): return 2 + 12*(x**2)
 73 |     n_iter = 10
 74 | 
 75 |     # Initialize Newton's Method
 76 |     x = -3
 77 |     newton_x = [x]
 78 |     newton_f = [f(x)]
 79 |     newton_deriv = [deriv(x)]
 80 | 
 81 |     # Initialize Secant Method
 82 |     x0, x1 = -4, -3
 83 |     g0 = deriv(x0)
 84 |     secant_x = [x1]
 85 |     secant_f = [f(x1)]
 86 |     secant_deriv = [deriv(x1)]
 87 |     
 88 |     for _ in range(n_iter):
 89 |         # Newton's Method (for univariate function)
 90 |         Delta_nm = deriv(x) / deriv2(x)
 91 |         x -= Delta_nm
 92 |         newton_x.append(x)
 93 |         newton_deriv.append(0)
 94 |         newton_f.append(f(x))
 95 |         newton_x.append(x)
 96 |         newton_deriv.append(deriv(x))
 97 | 
 98 |         # Secant Method
 99 |         g1 = deriv(x1)
100 |         Delta_sm = ((x1 - x0) / (g1 - g0)) * g1
101 |         x0, x1, g0 = x1, x1 - Delta_sm, g1
102 |         secant_x.append(x1)
103 |         secant_deriv.append(0)
104 |         secant_f.append(f(x1))
105 |         secant_x.append(x1)
106 |         secant_deriv.append(deriv(x1))
107 |     
108 |     # Plots the results
109 |     fig, ax = plt.subplots(1, 2, figsize=(11, 4))
110 | 
111 |     # f(x_k) vs iterations, k
112 |     iters = np.arange(len(newton_f))
113 |     ax[0].plot(iters, newton_f, color="tab:blue")
114 |     ax[0].plot(iters, secant_f, color="tab:red")
115 |     ax[0].set_yscale("log")
116 |     ax[0].set_xlabel("iterations, $k$")
117 |     ax[0].set_ylabel("$f(x_k)$")
118 | 
119 |     # f_prime vs. x
120 |     t = np.linspace(newton_x[0] - 0.5, newton_x[-1] + 0.5, 1000)
121 |     f_prime = [deriv(t_i) for t_i in t]
122 |     ax[1].plot(newton_x, newton_deriv, color="tab:blue", label="Newton")
123 |     ax[1].plot(secant_x, secant_deriv, color="tab:red", label="secant")
124 |     ax[1].hlines([0], xmin=newton_x[0] - 0.5, xmax=newton_x[-1] + 0.5, colors=['black'], linewidth=0.5)
125 |     ax[1].plot(t, f_prime, color="black", linewidth=0.5)
126 |     ax[1].set_xlabel("$x_k$")
127 |     ax[1].set_ylabel("$f'(x_k)$")
128 |     ax[1].legend(loc='upper left', bbox_to_anchor=(1.05, 1.0))
129 | 
130 |     fig.tight_layout()
131 |     plt.show()
132 | 
133 | 
134 | def exercise_6_9():
135 |     """Exercise 6.9: Newton's Method for f(x) = (x1 + 1)^2 + (x2 + 3)^2 + 4"""
136 |     def f(x): return (x[0] + 1)**2 + (x[1] + 3)**2 + 4
137 |     def grad_f(x): return np.array([2*(x[0] + 1), 2*(x[1] + 3)])
138 |     def H(x): return np.array([[2, 0], [0, 2]])
139 | 
140 |     x = np.zeros(2)
141 |     x_prime = newtons_method(grad_f, H, x, eps=1e-5, k_max=1)
142 |     print("After 1 step of Newton's Method, x = ", x_prime)
143 |     print("Gradient at x: ", grad_f(x_prime))
144 |     print("=> Newton's Method converges to the minimum after only 1 iteration.")
145 | 


--------------------------------------------------------------------------------
/src/ch12.py:
--------------------------------------------------------------------------------
  1 | """Chapter 12: Multiobjective Optimization"""
  2 | 
  3 | import numpy as np
  4 | 
  5 | from typing import Callable
  6 | 
  7 | from ch09 import SelectionMethod, CrossoverMethod, MutationMethod
  8 | 
  9 | 
 10 | def dominates(y: np.ndarray, y_prime: np.ndarray) -> bool:
 11 |     """
 12 |     A method for checking whether x dominates x_prime, where `y` is the vector
 13 |     of objective values for f(x) and `y_prime` is the vector of objective values
 14 |     for f(x_prime).
 15 |     """
 16 |     return np.all(y <= y_prime) and np.any(y < y_prime)
 17 | 
 18 | 
 19 | def naive_pareto(xs: np.ndarray, ys: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
 20 |     """
 21 |     A method for generating a Pareto frontier using randomly sampled design
 22 |     ponts `xs` and their multiobjective values `ys`. Both the Pareto-optimal
 23 |     design points and their objective values are returned.
 24 |     """
 25 |     pareto_xs, pareto_ys = [], []
 26 |     for (x, y) in zip(xs, ys):
 27 |         if not np.any([dominates(y_prime, y) for y_prime in ys]):
 28 |             pareto_xs.append(x)
 29 |             pareto_ys.append(y)
 30 |     return (np.array(pareto_xs), np.array(pareto_ys))
 31 | 
 32 | 
 33 | def weight_pareto(f1: Callable[[np.ndarray], float],
 34 |                   f2: Callable[[np.ndarray], float],
 35 |                   optimize: Callable[[Callable[[np.ndarray], float]], np.ndarray],
 36 |                   npts: int) -> np.ndarray:
 37 |     """
 38 |     The weighted sum method for generating a Pareto frontier, which takes
 39 |     objective functions `f1` and `f2` and number of Pareto points `npts`.
 40 |     """
 41 |     return np.array([optimize(lambda x: w1 * f1(x) + (1 - w1) * f2(x)) for w1 in np.linspace(0, stop=1, num=npts)])
 42 | 
 43 | 
 44 | def vector_evaluated_genetic_algorithm(f: Callable[[np.ndarray], np.ndarray],
 45 |                                        population: np.ndarray,
 46 |                                        k_max: int,
 47 |                                        S: SelectionMethod,
 48 |                                        C: CrossoverMethod,
 49 |                                        M: MutationMethod) -> np.ndarray:
 50 |     """
 51 |     The vector-evaluated genetic algorithm which takes a vector-valued objective
 52 |     function `f`, an initial population, number of iterations `k_max`, a
 53 |     `SelectionMethod` `S`, a `CrossoverMethod` `C`, and a `MutationMethod` `M`.
 54 |     The resulting population is returned.
 55 |     """
 56 |     m = len(f(population[0]))
 57 |     m_pop = len(population)
 58 |     m_subpop = m_pop // m
 59 |     for _ in range(k_max):
 60 |         ys = np.apply_along_axis(f, 1, population)
 61 |         parents = np.apply_along_axis(lambda y: S.select(y)[:m_subpop], 0, ys)
 62 | 
 63 |         p = np.random.permutation(2*m_pop)
 64 |         def p_ind(i): return parents[(p[i] - 1) % m_pop][(p[i] - 1) // m_pop]
 65 |         parents = np.array([[p_ind(i), p_ind(i + 1)] for i in range(0, 2*m_pop, 2)])
 66 |         children = np.array([C.crossover(population[p[0]], population[p[1]]) for p in parents])
 67 |         population = np.array([M.mutate(c) for c in children])
 68 |     return population
 69 | 
 70 | 
 71 | def get_non_domination_levels(ys: np.ndarray) -> np.ndarray:
 72 |     """
 73 |     A function for getting the nondomination levels of an array of
 74 |     multiobjective function evaluations `ys`.
 75 |     """
 76 |     L, m = 0, len(ys)
 77 |     levels = np.zeros(m).astype(int)
 78 |     while np.min(levels) == 0:
 79 |         L += 1
 80 |         for (i, y) in enumerate(ys):
 81 |             if (levels[i] == 0) and\
 82 |                not np.any([(levels[i] == 0 or levels[i] == L) & dominates(ys[i], y) for i in range(m)]):
 83 |                 levels[i] = L
 84 |     return levels
 85 | 
 86 | 
 87 | def discard_closest_pair(xs: np.ndarray, ys: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
 88 |     """
 89 |     This method is used to remove one individual from a filter that is above
 90 |     capacity. The method takes the filter's list of design points `xs` and
 91 |     associated objective function values `ys`.
 92 |     """
 93 |     index, min_dist = 0, np.inf
 94 |     for (i, y) in enumerate(ys):
 95 |         for (j, y_prime) in enumerate(ys[i:]):
 96 |             dist = np.linalg.norm(y - y_prime)
 97 |             if dist < min_dist:
 98 |                 index, min_dist = np.random.choices([i, j]), dist
 99 |     xs = np.delete(xs, index, axis=0)
100 |     ys = np.delete(ys, index, axis=0)
101 |     return (xs, ys)
102 | 
103 | 
104 | def update_pareto_filter(filter_xs: np.ndarray,
105 |                          filter_ys: np.ndarray,
106 |                          xs: np.ndarray,
107 |                          ys: np.ndarray,
108 |                          capacity: int = None) -> tuple[np.ndarray, np.ndarray]:
109 |     """
110 |     A method for updating a Pareto filter with design points `filter_xs`,
111 |     corresponding objective function values `filter_ys`, a population with
112 |     design points `xs` and objective values `ys`, and filter capacity `capactity`
113 |     which defaults to the population size.
114 |     """
115 |     capacity = len(xs) if capacity is None else capacity
116 |     for (x, y) in zip(xs, ys):
117 |         if not np.any([dominates(y_prime, y) for y_prime in filter_ys]):
118 |             filter_xs = np.append(filter_xs, x)
119 |             filter_ys = np.append(filter_ys, y)
120 |     filter_xs, filter_ys = naive_pareto(filter_xs, filter_ys)
121 |     while len(filter_xs) > capacity:
122 |         filter_xs, filter_ys = discard_closest_pair(filter_xs, filter_ys)
123 |     return (filter_xs, filter_ys)
124 | 


--------------------------------------------------------------------------------
/src/tests/ch03/test_bracketing_methods.py:
--------------------------------------------------------------------------------
  1 | import sys; sys.path.append('./src/'); sys.path.append('../../')
  2 | 
  3 | import pytest
  4 | 
  5 | from ch03 import *
  6 | 
  7 | 
  8 | class TestBracketingMethods():
  9 |     @pytest.fixture(autouse=True)
 10 |     def run_before(self):
 11 |         self.f = lambda x: 2*(x**4) + 5*(x**3) - 3*x
 12 |         self.f_prime = lambda x: 8*(x**3) + 15*(x**2) - 3
 13 |         self.x_local_min = 0.40550
 14 |         self.x_global_min = -1.75296
 15 |     
 16 |     def test_bracket_minimum(self):
 17 |         a, b = bracket_minimum(self.f, x=0.0)
 18 |         assert (a <= self.x_local_min) and (self.x_local_min <= b)
 19 | 
 20 |         a, b = bracket_minimum(self.f, x=-1.0)
 21 |         assert (a <= self.x_global_min) and (self.x_global_min <= b)
 22 | 
 23 |     def test_fibonacci_search(self):
 24 |         a, b = fibonacci_search(self.f, a=-5, b=5, n=10)
 25 |         assert (a <= self.x_global_min) and (self.x_global_min <= b)
 26 | 
 27 |         a, b = fibonacci_search(self.f, a=-5, b=0, n=10)
 28 |         assert (a <= self.x_global_min) and (self.x_global_min <= b)
 29 | 
 30 |         a, b = fibonacci_search(self.f, a=0, b=5, n=10)
 31 |         assert (a <= self.x_local_min) and (self.x_local_min <= b)
 32 | 
 33 |     def test_golden_section_search(self, eps=1e-5):
 34 |         a, b = golden_section_search(self.f, a=-5, b=5, n=10)
 35 |         assert (a <= self.x_global_min) and (self.x_global_min <= b)
 36 | 
 37 |         a, b = -5, 5
 38 |         n = np.ceil((b - a)/(eps*np.log(PHI))).astype(int)
 39 |         a, b = golden_section_search(self.f, a, b, n)
 40 |         assert np.abs(self.x_global_min - a) < eps
 41 |         assert np.abs(self.x_global_min - b) < eps
 42 | 
 43 |         a, b = golden_section_search(self.f, a=-5, b=0, n=10)
 44 |         assert (a <= self.x_global_min) and (self.x_global_min <= b)
 45 | 
 46 |         a, b = -5, 0
 47 |         n = np.ceil((b - a)/(eps*np.log(PHI))).astype(int)
 48 |         a, b = golden_section_search(self.f, a, b, n)
 49 |         assert np.abs(self.x_global_min - a) < eps
 50 |         assert np.abs(self.x_global_min - b) < eps
 51 | 
 52 |         a, b = golden_section_search(self.f, a=0, b=5, n=10)
 53 |         assert (a <= self.x_local_min) and (self.x_local_min <= b)
 54 | 
 55 |         a, b = 0, 5
 56 |         n = np.ceil((b - a)/(eps*np.log(PHI))).astype(int)
 57 |         a, b = golden_section_search(self.f, a, b, n)
 58 |         assert np.abs(self.x_local_min - a) < eps
 59 |         assert np.abs(self.x_local_min - b) < eps
 60 | 
 61 |     def test_quadratic_fit_search(self, eps=1e-5):
 62 |         a, b, c = quadratic_fit_search(self.f, a=-5, b=0, c=5, n=1000)
 63 |         assert (a - eps <= self.x_global_min) and (self.x_global_min <= c + eps)
 64 |         assert np.abs(self.x_global_min - b) <= eps
 65 | 
 66 |         a, b, c = quadratic_fit_search(self.f, a=-10, b=-5, c=0, n=1000)
 67 |         assert (a - eps <= self.x_global_min) and (self.x_global_min <= c + eps)
 68 |         assert np.abs(self.x_global_min - b) <= eps
 69 | 
 70 |         a, b, c = quadratic_fit_search(self.f, a=0.1, b=5, c=10, n=1000)
 71 |         assert (a - eps <= self.x_local_min) and (self.x_local_min <= c + eps)
 72 |         assert np.abs(self.x_local_min - b) <= eps
 73 | 
 74 |     def test_shubert_piyavskii(self, eps=1e-5):
 75 |         P_min, intervals = shubert_piyavskii(self.f, a=-2, b=1, l=20, eps=eps)
 76 |         assert np.abs(P_min[0] - self.x_global_min) <= eps
 77 |         assert len(intervals) > 0
 78 |         assert self.in_an_interval(self.x_global_min, intervals)
 79 | 
 80 |         P_min, intervals = shubert_piyavskii(self.f, a=0, b=1, l=20, eps=eps)
 81 |         assert np.abs(P_min[0] - self.x_local_min) <= eps
 82 |         assert len(intervals) > 0
 83 |         assert self.in_an_interval(self.x_local_min, intervals)
 84 | 
 85 |         P_min, intervals = shubert_piyavskii(lambda x: np.sin(x) - 0.5*x, a=-5, b=7, l=1.5, eps=eps)
 86 |         assert np.abs(P_min[0] - (5*np.pi/3)) <= eps
 87 |         assert len(intervals) > 0
 88 |         assert self.in_an_interval(5*np.pi/3, intervals)
 89 | 
 90 |     def in_an_interval(self, x_min: float, intervals: list[tuple[float, float]]) -> bool:
 91 |         in_interval = False
 92 |         for interval in intervals:
 93 |             in_interval = in_interval or (interval[0] <= x_min and x_min <= interval[1])
 94 |         return in_interval
 95 | 
 96 |     def test_bisection(self, eps=1e-5):
 97 |         a, b = bisection(self.f_prime, a=-5, b=5, eps=eps/10)
 98 |         assert (np.abs(self.x_global_min - a) <= eps) or (np.abs(self.x_local_min - a) <= eps)
 99 |         assert (np.abs(self.x_global_min - b) <= eps) or (np.abs(self.x_local_min - b) <= eps)
100 |     
101 |         a, b = bisection(self.f_prime, a=-5, b=-0.5, eps=eps/10)
102 |         assert (a - eps <= self.x_global_min) and (self.x_global_min <= b + eps)
103 |         assert np.abs(self.x_global_min - a) <= eps
104 |         assert np.abs(self.x_global_min - b) <= eps
105 | 
106 |         a, b = bisection(self.f_prime, a=0, b=5, eps=eps/10)
107 |         assert (a - eps <= self.x_local_min) and (self.x_local_min <= b + eps)
108 |         assert np.abs(self.x_local_min - a) <= eps
109 |         assert np.abs(self.x_local_min - b) <= eps
110 | 
111 |     def test_bracket_sign_change(self):
112 |         a, b = bracket_sign_change(self.f_prime, a=-5, b=0)
113 |         assert self.f_prime(a) * self.f_prime(b) <= 0
114 | 
115 |         a, b = bracket_sign_change(self.f_prime, a=-5, b=5)
116 |         assert self.f_prime(a) * self.f_prime(b) <= 0
117 | 
118 |         a, b = bracket_sign_change(self.f_prime, a=0, b=5)
119 |         assert self.f_prime(a) * self.f_prime(b) <= 0
120 | 


--------------------------------------------------------------------------------
/src/tests/ch05/test_first_order_methods.py:
--------------------------------------------------------------------------------
 1 | import sys; sys.path.append('./src/'); sys.path.append('../../')
 2 | 
 3 | import numpy as np
 4 | 
 5 | from ch05 import *
 6 | from TestFunctions import booth, branin, michalewicz, rosenbrock, wheeler
 7 | 
 8 | class TestFirstOrderMethods():
 9 |     def test_gradient_descent(self, eps: float = 1e-8):
10 |         M = GradientDescent(alpha=0.001)
11 |         self.run_on(booth, max_steps=100000, x=np.array([-5.0, 5.0]), M=M, eps=eps)
12 |         self.run_on_branin(max_steps=100000, x=np.ones(2)*-5, M=M, eps=eps)
13 |         self.run_on(rosenbrock, max_steps=100000, x=np.ones(2)*-5, M=M, eps=eps)
14 | 
15 |     def test_conjugate_gradient(self, eps: float = 1e-6):
16 |         M = ConjugateGradientDescent()
17 |         self.run_on(booth, max_steps=2, x=np.array([-5.0, 5.0]), M=M, eps=eps)
18 |         self.run_on_branin(max_steps=10, x=np.ones(2)*-5, M=M, eps=eps)
19 |         self.run_on(michalewicz, max_steps=5, x=np.ones(2), M=M, eps=1e-4)
20 |         self.run_on(rosenbrock, max_steps=10, x=np.ones(2)*-5, M=M, eps=eps)
21 |         self.run_on(wheeler, max_steps=10, x=np.zeros(2), M=M, eps=eps)
22 | 
23 |     def test_momentum(self, eps: float = 1e-8):
24 |         M = Momentum(alpha=0.001, beta=0.9)
25 |         self.run_on(booth, max_steps=1000, x=np.array([-5.0, 5.0]), M=M, eps=eps)
26 |         self.run_on_branin(max_steps=1000, x=np.ones(2)*-5, M=M, eps=eps)
27 |         self.run_on(rosenbrock, max_steps=10000, x=np.ones(2)*-5, M=M, eps=eps)
28 |         self.run_on(wheeler, max_steps=10000, x=np.zeros(2), M=M, eps=eps)   
29 | 
30 |     def test_nesterov_momentum(self, eps: float = 1e-8):
31 |         M = NesterovMomentum(alpha=0.001, beta=0.9)
32 |         self.run_on(booth, max_steps=1000, x=np.array([-5.0, 5.0]), M=M, eps=eps)
33 |         self.run_on_branin(max_steps=1000, x=np.ones(2)*-5, M=M, eps=eps)
34 |         self.run_on(rosenbrock, max_steps=10000, x=np.ones(2)*-5, M=M, eps=eps)
35 |         self.run_on(wheeler, max_steps=10000, x=np.zeros(2), M=M, eps=eps)
36 | 
37 |     def test_adagrad(self, eps: float = 1e-8):
38 |         M = Adagrad(alpha=0.1, eps=1e-3)
39 |         self.run_on(booth, max_steps=100000, x=np.array([-5.0, 5.0]), M=M, eps=eps)
40 |         self.run_on_branin(max_steps=100000, x=np.ones(2)*-5, M=M, eps=eps)
41 |         M = Adagrad(alpha=1.0, eps=1e-3)
42 |         self.run_on(rosenbrock, max_steps=100000, x=np.ones(2)*-5, M=M, eps=eps)
43 |         self.run_on(wheeler, max_steps=1000, x=np.zeros(2), M=M, eps=eps)
44 | 
45 |     def test_rmsprop(self, eps: float = 1e-3):
46 |         M = RMSProp(alpha=0.001, gamma=0.9, eps=1e-3)
47 |         self.run_on(booth, max_steps=10000, x=np.array([-5.0, 5.0]), M=M, eps=eps)
48 |         self.run_on_branin(max_steps=10000, x=np.ones(2)*-5, M=M, eps=eps)
49 |         self.run_on(rosenbrock, max_steps=10000, x=np.ones(2)*-5, M=M, eps=eps)
50 |         self.run_on(wheeler, max_steps=10000, x=np.zeros(2), M=M, eps=1e-2)
51 | 
52 |     def test_adadelta(self, eps: float = 1e-8):
53 |         M = Adadelta(gamma_s=0.95, gamma_x=0.95, eps=1e-3)
54 |         self.run_on_branin(max_steps=1000, x=np.ones(2)*-5, M=M, eps=1e-3)
55 | 
56 |     def test_adam(self, eps: float = 1e-8):
57 |         M = Adam(alpha=0.001, gamma_v=0.9, gamma_s=0.999, eps=1e-8)
58 |         self.run_on(booth, max_steps=100000, x=np.array([-5.0, 5.0]), M=M, eps=eps)
59 |         self.run_on_branin(max_steps=100000, x=np.ones(2)*-5, M=M, eps=1e-4)
60 |         self.run_on(rosenbrock, max_steps=100000, x=np.ones(2)*-5, M=M, eps=eps)
61 |         self.run_on(wheeler, max_steps=100000, x=np.zeros(2), M=M, eps=eps)
62 |         self.run_on(michalewicz, max_steps=100000, x=np.ones(2), M=M, eps=1e-4)
63 | 
64 |     def test_hypergradient_descent(self, eps: float = 1e-8):
65 |         M = HyperGradientDescent(alpha_0=0.00001, mu=0.00001)
66 |         self.run_on(booth, max_steps=1000, x=np.array([-5.0, 5.0]), M=M, eps=eps)
67 |         M = HyperGradientDescent(alpha_0=0.000001, mu=0.000001)
68 |         self.run_on_branin(max_steps=1000, x=np.ones(2)*-5, M=M, eps=eps)
69 |         M = HyperGradientDescent(alpha_0=0.0001, mu=0.0000000001)
70 |         self.run_on(rosenbrock, max_steps=100000, x=np.ones(2)*-5, M=M, eps=eps)
71 |         M = HyperGradientDescent(alpha_0=0.0001, mu=0.00001)
72 |         self.run_on(wheeler, max_steps=100000, x=np.zeros(2), M=M, eps=eps)
73 | 
74 |     def test_hypernesterov_momentum(self, eps: float = 1e-8):
75 |         M = HyperNesterovMomentum(alpha_0=0.000001, mu=0.000001, beta=0.9)
76 |         self.run_on(booth, max_steps=1000, x=np.array([-5.0, 5.0]), M=M, eps=eps)
77 |         M = HyperNesterovMomentum(alpha_0=0.0000001, mu=0.0000001, beta=0.9)
78 |         self.run_on_branin(max_steps=1000, x=np.ones(2)*-5, M=M, eps=eps)
79 |         M = HyperNesterovMomentum(alpha_0=0.0001, mu=0.0000000001, beta=0.9)
80 |         self.run_on(rosenbrock, max_steps=10000, x=np.ones(2)*-5, M=M, eps=eps)
81 |         M = HyperNesterovMomentum(alpha_0=0.01, mu=0.000001, beta=0.9)
82 |         self.run_on(wheeler, max_steps=1000, x=np.zeros(2), M=M, eps=eps)
83 | 
84 |     def run_on(self, f, max_steps, x, M, eps):
85 |         f_min, x_min = f.global_min()
86 |         M.initialize(f, f.grad, x)
87 |         for _ in range(max_steps):
88 |             x = M.step(f, f.grad, x)
89 |         assert np.abs(f(x) - f_min) < eps
90 |         assert np.all(np.abs(x - x_min) < eps)
91 | 
92 |     def run_on_branin(self, max_steps, x, M, eps):
93 |         f_min, x_min = branin.global_min()
94 |         M.initialize(branin, branin.grad, x)
95 |         for _ in range(max_steps):
96 |             x = M.step(branin, branin.grad, x)
97 |         assert np.abs(branin(x) - f_min[0]) < eps
98 |         assert np.any([np.all(np.abs(x - x_min_i) < eps) for x_min_i in x_min.T])
99 | 


--------------------------------------------------------------------------------
/src/ch16.py:
--------------------------------------------------------------------------------
  1 | """Chapter 16: Surrogate Optimization"""
  2 | 
  3 | import numpy as np
  4 | 
  5 | from scipy.stats import norm
  6 | from typing import Callable
  7 | 
  8 | from ch15 import GaussianProcess
  9 | 
 10 | 
 11 | def prob_of_improvement(y_min: float, mu: float, sigma: float) -> float:
 12 |     """
 13 |     Computing the probability of improvement for a given best y value `y_min`,
 14 |     mean `mu`, and standard deviation `sigma`.
 15 |     """
 16 |     return norm(mu, sigma).cdf(y_min)
 17 | 
 18 | 
 19 | def expected_improvement(y_min: float, mu: float, sigma: float) -> float:
 20 |     """
 21 |     Computing the expected improvment for a given best y value `y_min`,
 22 |     mean `mu`, and standard deviation `sigma`.
 23 |     """
 24 |     p_imp = prob_of_improvement(y_min, mu, sigma)
 25 |     p_ymin = norm(mu, sigma).pdf(y_min)
 26 |     return (y_min - mu)*p_imp + (sigma**2)*p_ymin
 27 | 
 28 | 
 29 | class SafeOpt():
 30 |     """
 31 |     The SafeOpt algorithm applied to an empty Gaussian process `GP`, a finite
 32 |     design space `X`, index of initial safe point `i`, objective function `f`.
 33 |     and safety threshold `y_max`. The optional parameters are the confidence
 34 |     scalar `beta` and the number of iterations `k_max`. A tuple containing the
 35 |     best safe upper bound and its index in `X` is returned.
 36 |     """
 37 |     def __call__(self,
 38 |                  GP: GaussianProcess,
 39 |                  X: np.ndarray,
 40 |                  i: int,
 41 |                  f: Callable[[np.ndarray], float],
 42 |                  y_max: float,
 43 |                  beta: float = 3.0,
 44 |                  k_max: int = 10) -> tuple[np.ndarray, int]:
 45 |         GP.append(X[i], f(X[i]))
 46 | 
 47 |         m = len(X)
 48 |         u, l = np.full(m, np.inf), np.full(m, -np.inf)
 49 |         S, M, E = np.full(m, False), np.full(m, False), np.full(m, False)
 50 | 
 51 |         for _ in range(k_max):
 52 |             u, l = self.update_confidence_intervals(GP, X, u, l, beta)
 53 |             S, M, E = self.compute_sets(GP, S, M, E, X, u, l, y_max, beta)
 54 |             i = self.get_new_query_point(M, E, u, l)
 55 |             if i == 0:
 56 |                 break
 57 |             GP.push(X[i], f(X[i]))
 58 |         
 59 |         # return the best point
 60 |         u, l = self.update_confidence_intervals(GP, X, u, l, beta)
 61 |         S = (u <= y_max)
 62 |         if np.any(S):
 63 |             i_best = np.argmin(u[S])
 64 |             u_best = u[S][i_best]
 65 |             i_best = np.where(i_best == np.cumsum(S))[0][0]
 66 |             return (u_best, i_best)
 67 |         return (None, 0)
 68 | 
 69 |     def update_confidence_intervals(self,
 70 |                                     GP: GaussianProcess,
 71 |                                     X: np.ndarray,
 72 |                                     u: np.ndarray,
 73 |                                     l: np.ndarray,
 74 |                                     beta: float) -> tuple[np.ndarray, np.ndarray]:
 75 |         """
 76 |         A method for updating the lower and upper bounds used in SafeOpt, which
 77 |         takes the Gaussian process `GP`, the finite search space `X`, the upper-
 78 |         and lower-bound vectors `u` and `l`, and the confidence scalar `beta`.
 79 |         """
 80 |         mu_p, v_p = GP.predict(X)
 81 |         u = mu_p + np.sqrt(beta * v_p)
 82 |         l = mu_p - np.sqrt(beta * v_p)
 83 |         return (u, l)
 84 | 
 85 |     def compute_sets(self,
 86 |                      GP: GaussianProcess,
 87 |                      S: np.ndarray,
 88 |                      M: np.ndarray,
 89 |                      E: np.ndarray,
 90 |                      X: np.ndarray,
 91 |                      u: np.ndarray,
 92 |                      l: np.ndarray,
 93 |                      y_max: float,
 94 |                      beta: float) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
 95 |         """
 96 |         A method for updating the safe `S`, minimizer `M`, and expander `E` sets
 97 |         used in SafeOpt. The sets are all Boolean vectors indicating whether the
 98 |         corresponding design point in `X` is in the set. The method also takes
 99 |         the Gaussian process `GP`, the upper and lower bounds `u` and `l`,
100 |         respectively, the safety threshold `y_max`, and the confidence scalar
101 |         `beta`.
102 |         """
103 |         M.fill(False)
104 |         E.fill(False)
105 | 
106 |         # safe set
107 |         S = (u <= y_max)
108 | 
109 |         if np.any(S):
110 |             # potential minimizers
111 |             M[S] = (l[S] < np.min(u[S]))
112 | 
113 |             # maximum width (in M)
114 |             w_max = np.max(u[M] - l[M])
115 | 
116 |             # expanders - skip values in M or those with w <= w_max
117 |             E = S & ~M  # skip points in M
118 |             if np.any(E):
119 |                 E[E] = (np.max(u[E] - l[E]) > w_max)
120 |                 for (i, e) in enumerate(E):
121 |                     if e and (u[i] - l[i] > w_max):
122 |                         GP.append(X[i], l[i])
123 |                         mu_p, v_p = GP.predict(X[~S])
124 |                         GP.pop()
125 |                         E[i] = np.any(mu_p + np.sqrt(beta * v_p) >= y_max)
126 |                         if E[i]:
127 |                             w_max = u[i] - l[i]
128 | 
129 |         return (S, M, E)
130 | 
131 |     def get_new_query_point(self, M: np.ndarray, E: np.ndarray, u: np.ndarray, l: np.ndarray) -> int:
132 |         """
133 |         A method for obtaining the next query point in SafeOpt. The index of the
134 |         point in `X` with the greatest width is returned.
135 |         """
136 |         ME = M | E
137 |         if np.any(ME):
138 |             v = np.argmax(u[ME] - l[ME])
139 |             return np.where(v == np.cumsum(ME))[0][0]
140 |         return 0
141 | 


--------------------------------------------------------------------------------
/src/ch11.py:
--------------------------------------------------------------------------------
  1 | """Chapter 11: Linear Constrainted Optimization"""
  2 | 
  3 | import numpy as np
  4 | 
  5 | class LinearProgram():
  6 |     """
  7 |     A linear program in equality form:
  8 | 
  9 |     minimize    c'x
 10 |     subject to: Ax = b
 11 |                 x >= 0
 12 |     """
 13 |     def __init__(self, A: np.ndarray, b: np.ndarray, c: np.ndarray):
 14 |         self.A = A
 15 |         self.b = b
 16 |         self.c = c
 17 | 
 18 |     def get_vertex(self, B: np.ndarray) -> np.ndarray:
 19 |         """A method for extracting the vertex associated with a partition `B` and an LP `self`"""
 20 |         b_inds = np.sort(B)
 21 |         AB = self.A[:, b_inds]
 22 |         xB = np.linalg.solve(AB, self.b)
 23 |         x = np.zeros(len(self.c))
 24 |         x[b_inds] = xB
 25 |         return x
 26 | 
 27 |     def edge_transition(self, B: np.ndarray, q: int) -> tuple[int, float]:
 28 |         """
 29 |         A method for computing the index `p` and the new coordinate value `x_q_prime`
 30 |         obtained by increasing index `q` of the vertex defined by the partition
 31 |         `B` in the equality-form linear program.
 32 |         """
 33 |         A, b = self.A, self.b
 34 |         n = A.shape[1]
 35 |         b_inds = np.sort(B)
 36 |         n_inds = np.setdiff1d(np.arange(n), B)
 37 |         AB = A[:, b_inds]
 38 |         d, xB = np.linalg.solve(AB, A[:, n_inds[q]]), np.linalg.solve(AB, b)
 39 | 
 40 |         p, xq_prime = 0, np.inf
 41 |         for i in range(len(d)):
 42 |             if d[i] > 0:
 43 |                 v = xB[i] / d[i]
 44 |                 if v < xq_prime:
 45 |                     p, xq_prime = i, v
 46 | 
 47 |         return (p, xq_prime)
 48 | 
 49 |     def step(self, B: np.ndarray) -> tuple[np.ndarray, bool]:
 50 |         """
 51 |         A single iteration of the simplex algorithm in which the set `B`
 52 |         is moved from one vertex to a neighbor while maximally decreasing the
 53 |         objective function. The function takes a partition defined by `B`.
 54 |         """
 55 |         A, b, c = self.A, self.b, self.c
 56 |         n = A.shape[1]
 57 |         b_inds = np.sort(B)
 58 |         n_inds = np.setdiff1d(np.arange(n), B)
 59 |         AB, AV = A[:, b_inds], A[:, n_inds]
 60 |         # xB = np.linalg.solve(AB, b) # TODO - never used?
 61 |         cB = c[b_inds]
 62 |         lam = np.linalg.solve(AB.T, cB)
 63 |         cV = c[n_inds]
 64 |         muV = cV - AV.T @ lam
 65 | 
 66 |         q, p, xq_prime, delta = 0.0, 0.0, np.inf, np.inf
 67 |         for i in range(len(muV)):
 68 |             if muV[i] < 0:
 69 |                 pi, xi_prime = self.edge_transition(B, i)
 70 |                 if muV[i] * xi_prime < delta:
 71 |                     q, p, xq_prime, delta = i, pi, xi_prime, muV[i]*xi_prime
 72 |             if q == 0:
 73 |                 return (B, True)  # optimal point found
 74 | 
 75 |         if np.isinf(xq_prime):
 76 |             raise ValueError("unbounded")
 77 | 
 78 |         j = np.where(B == b_inds[p])[0][0]
 79 |         B[j] = n_inds[q]   # swap indices
 80 |         return (B, False)  # new vertex but not optimal
 81 | 
 82 |     def minimize_given_vertex_partition(self, B: np.ndarray) -> np.ndarray:
 83 |         """Minimizing a linear program given a vertex partition defined by `B`."""
 84 |         done = False
 85 |         while not done:
 86 |             B, done = self.step(B)
 87 |         return B
 88 | 
 89 |     def minimize(self, return_idcs=False) -> np.ndarray:
 90 |         """
 91 |         The simplex algorithm for solving linear programs in equality form
 92 |         when an initial partition is not known.
 93 |         """
 94 |         A, b, c = self.A, self.b, self.c  # TODO - c is not necessary?
 95 |         m, n = A.shape
 96 |         z = np.ones(m)
 97 |         Z = np.diag([1 if j >= 0 else -1 for j in b])
 98 | 
 99 |         A_prime = np.hstack([A, Z])
100 |         b_prime = b
101 |         c_prime = np.concatenate((np.zeros(n), z))
102 |         LP_init = LinearProgram(A_prime, b_prime, c_prime)
103 |         B = np.arange(1, m + 1) + n
104 |         B = LP_init.minimize_given_vertex_partition(B)
105 | 
106 |         if np.any(B > n):
107 |             raise ValueError("infeasible")
108 | 
109 |         A_prime_prime = np.vstack([np.hstack([A, np.eye(m)]),
110 |                                    np.hstack([np.zeros((m, n)), np.eye(m)])])
111 |         b_prime_prime = np.concatenate((b, np.zeros(m)))
112 |         c_prime_prime = c_prime
113 |         LP_opt = LinearProgram(A_prime_prime, b_prime_prime, c_prime_prime)
114 |         B = LP_opt.minimize_given_vertex_partition(B)
115 |         x_opt = LP_opt.get_vertex(B)[:n]
116 |         if return_idcs:
117 |             b_inds = np.sort(B)
118 |             n_inds = np.setdiff1d(np.arange(n), B)
119 |             return x_opt, b_inds, n_inds
120 |         return x_opt
121 | 
122 |     def dual_certificate(self, x: np.ndarray, lam: np.ndarray, eps: float = 1e-6) -> bool:
123 |         """
124 |         A method for checking whether a candidate solution given by design point
125 |         `x` and dual point `lam` for the linear program is optimal. The
126 |         parameter `eps` controls the tolerance for the equality constraint.
127 |         """
128 |         A, b, c = self.A, self.b, self.c
129 |         primal_feasible = np.all(x >= 0) and np.all(np.isclose(A @ x, b))
130 |         dual_feasible = np.all(A.T @ lam <= c)
131 |         return primal_feasible and dual_feasible and np.isclose(np.dot(c, x), np.dot(b, lam), atol=eps)
132 | 
133 |     def minimize_lp_and_y(self) -> tuple[np.ndarray, float]:
134 |         """
135 |         (From Chapter 19) Solves an LP and returns both the solutions and its
136 |         value. An infeasible LP produces a `NaN` solution and an `np.inf` value. 
137 |         """
138 |         try:
139 |             x = self.minimize()
140 |             return (x, np.dot(x, self.c))
141 |         except ValueError:
142 |             return (np.full(len(self.c), np.nan), np.inf)


--------------------------------------------------------------------------------
/src/ch04.py:
--------------------------------------------------------------------------------
  1 | """Chapter 4: Local Descent"""
  2 | 
  3 | import cvxpy as cp
  4 | import numpy as np
  5 | import warnings
  6 | 
  7 | from scipy.optimize import brent
  8 | from typing import Callable
  9 | 
 10 | from ch03 import bracket_minimum
 11 | 
 12 | warnings.simplefilter(action='ignore', category=FutureWarning)
 13 | 
 14 | 
 15 | def line_search(f: Callable[[np.ndarray], float],
 16 |                 x: np.ndarray,
 17 |                 d: np.ndarray,
 18 |                 minimize: Callable[[Callable, float, float], float] = lambda f,a,b: brent(f, brack=(a, b))
 19 |                 ) -> np.ndarray:
 20 |     """
 21 |     A method for conducting a line search, which finds the optimal step factor
 22 |     along a descent direction `d` from design point `x` to minimize function `f`.
 23 |     The `minimize` function can be implemented using a univariate optimization
 24 |     algorithm such as the Brent-Dekker method.
 25 |     """
 26 |     def objective(alpha): return f(x + alpha*d)
 27 |     a, b = bracket_minimum(objective)
 28 |     alpha = minimize(objective, a, b)
 29 |     return x + alpha*d
 30 | 
 31 | 
 32 | def backtracking_line_search(f: Callable[[np.ndarray], float],
 33 |                              grad_f: Callable[[np.ndarray], np.ndarray],
 34 |                              x: np.ndarray,
 35 |                              d: np.ndarray,
 36 |                              alpha: float,
 37 |                              p: float = 0.5,
 38 |                              beta: float = 1e-4) -> float:
 39 |     """
 40 |     The backtracking line search algorithm, which takes objective function `f`,
 41 |     its gradient `grad_f`, the current design point `x`, a descent direction `d`,
 42 |     and the maximum step size `alpha`. We can optionally specify the reduction
 43 |     factor `p` and the first Wolfe condition parameter `beta`.
 44 |     """
 45 |     y, g = f(x), grad_f(x)
 46 |     while f(x + alpha*d) > y + beta*alpha*np.dot(g, d):
 47 |         alpha *= p
 48 |     return alpha
 49 | 
 50 | 
 51 | def strong_backtracking(f: Callable[[np.ndarray], float],
 52 |                         grad_f: Callable[[np.ndarray], np.ndarray],
 53 |                         x: np.ndarray,
 54 |                         d: np.ndarray,
 55 |                         alpha: float = 1.0,
 56 |                         beta: float = 1e-4,
 57 |                         sigma: float = 0.1) -> float:
 58 |     """
 59 |     Strong backtracking approximate line search for satisfying the strong Wolfe
 60 |     conditions. It takes as input the objective function `f`, the gradient
 61 |     function `grad_f`, the design point `x` and direction `d` from which line
 62 |     search is conducted, an initial step size `alpha`, and the Wolfe condition
 63 |     parameters `beta` and `sigma`. The algorithm's bracket phase first brackets
 64 |     an interval containing a step size that satisfies the strong Wolfe conditions.
 65 |     It then reduces this bracketed interval in the zoom phase until a suitable
 66 |     step size is found. We interpolate with bisection, but other schemes can be
 67 |     used.
 68 |     """
 69 |     y_0, g_0, y_prev, alpha_prev = f(x), np.dot(grad_f(x), d), np.nan, 0
 70 |     alpha_lo, alpha_hi = np.nan, np.nan
 71 | 
 72 |     # Bracket Phase
 73 |     while True:
 74 |         y = f(x + alpha*d)
 75 |         if (y > y_0 + beta*alpha*g_0) or ((not np.isnan(y_prev)) and (y >= y_prev)):
 76 |             alpha_lo, alpha_hi = alpha_prev, alpha
 77 |             break
 78 |         g = np.dot(grad_f(x + alpha*d), d)
 79 |         if abs(g) <= -sigma*g_0:
 80 |             return alpha
 81 |         elif g >= 0:
 82 |             alpha_lo, alpha_hi = alpha, alpha_prev
 83 |             break
 84 |         y_prev, alpha_prev, alpha = y, alpha, 2*alpha
 85 | 
 86 |     # Zoom Phase
 87 |     y_lo = f(x + alpha_lo*d)
 88 |     while True:
 89 |         alpha = (alpha_lo + alpha_hi) / 2
 90 |         y = f(x + alpha*d)
 91 |         if (y > y_0 + beta*alpha*g_0) or (y >= y_lo):
 92 |             alpha_hi = alpha
 93 |         else:
 94 |             g = np.dot(grad_f(x + alpha*d), d)
 95 |             if abs(g) <= -sigma*g_0:
 96 |                 return alpha
 97 |             elif g*(alpha_hi - alpha_lo) >= 0:
 98 |                 alpha_hi = alpha_lo
 99 |             alpha_lo = alpha
100 | 
101 | 
102 | def solve_trust_region_subproblem(grad_f: Callable[[np.ndarray], np.ndarray],
103 |                                   H: Callable[[np.ndarray], np.ndarray],
104 |                                   x0: np.ndarray,
105 |                                   delta: float) -> tuple[np.ndarray, float]:
106 |     """We have provided an example implementation of `solve_trust_region_subproblem`
107 |     that uses a second-order Taylor approximation about `x0` with a circular trust region."""
108 |     x = cp.Variable(len(x0))
109 |     objective = cp.Minimize((grad_f(x0) @ (x - x0)) + (cp.quad_form(x - x0, H(x0)) / 2))
110 |     constraints = [cp.norm(x - x0) <= delta]
111 |     problem = cp.Problem(objective, constraints)
112 |     problem.solve()
113 |     return (x.value, problem.value)
114 | 
115 | 
116 | def trust_region_descent(f: Callable[[np.ndarray], float],
117 |                          grad_f: Callable[[np.ndarray], np.ndarray],
118 |                          H: Callable[[np.ndarray], np.ndarray],
119 |                          x: np.ndarray,
120 |                          k_max: int,
121 |                          eta_1: float = 0.25,
122 |                          eta_2: float = 0.5,
123 |                          gamma_1: float = 0.5,
124 |                          gamma_2: float = 2.0,
125 |                          delta: float = 1.0,
126 |                          solve_trust_region_subproblem: Callable[[Callable, Callable, np.ndarray, float], tuple[np.ndarray, float]] = solve_trust_region_subproblem
127 |                          ) -> np.ndarray:
128 |     """
129 |     The trust region descent method, where `f` is the objective function,
130 |     `grad_f` produces the derivative, `H` produces the Hessian, `x` is an initial
131 |     design point, and `k_max` is the number of iterations. The optional parameters
132 |     `eta_1` and `eta_2` determine when the trust region radius `delta` is increased
133 |     or decreased, and `gamma_` and `gamma_2` control the magnitude of the change.
134 |     An implementation for `solve_trust_region_subproblem` must be provided that
135 |     solves equation (4.10) in the texbook.
136 |     """
137 |     y = f(x)
138 |     for _ in range(k_max):
139 |         x_prime, y_prime = solve_trust_region_subproblem(grad_f, H, x, delta)
140 |         r = (y - f(x_prime)) / (y - y_prime)
141 |         if r < eta_1:
142 |             delta *= gamma_1
143 |         else:
144 |             x, y = x_prime, y_prime
145 |             if r > eta_2:
146 |                 delta *= gamma_2
147 |     return x
148 | 


--------------------------------------------------------------------------------
/src/ch03.py:
--------------------------------------------------------------------------------
  1 | """Chapter 3: Bracketing"""
  2 | 
  3 | import numpy as np
  4 | 
  5 | from typing import Callable
  6 | 
  7 | 
  8 | PHI = (1 + np.sqrt(5))/2  # golden ratio
  9 | 
 10 | 
 11 | def bracket_minimum(f: Callable[[float], float],
 12 |                     x: float = 0.0,
 13 |                     s: float = 1e-2,
 14 |                     k: float = 2.0) -> tuple[float, float]:
 15 |     """
 16 |     An algorithm for bracketing an interval in which a local minimum must exist.
 17 |     It takes as input a univariate function `f` and starting position `x`, which
 18 |     defaults to 0.0. The starting step size `s` and the expansion factor `k` can
 19 |     be specified. It returns a tuple containing the new interval [a, b].
 20 |     """
 21 |     a, y_a = x, f(x)
 22 |     b, y_b = a + s, f(a + s)
 23 |     if y_b > y_a:
 24 |         a, b, = b, a
 25 |         y_a, y_b = y_b, y_a
 26 |         s = -s
 27 |     while True:
 28 |         c, y_c = b + s, f(b + s)
 29 |         if y_c > y_b:
 30 |             return (a, c) if a < c else (c, a)
 31 |         a, y_a, b, y_b = b, y_b, c, y_c
 32 |         s *= k
 33 | 
 34 | 
 35 | def fibonacci_search(f: Callable[[float], float],
 36 |                      a: float,
 37 |                      b: float,
 38 |                      n: int,
 39 |                      eps: float = 0.01) -> tuple[float, float]:
 40 |     """
 41 |     Fibonacci search to be run on univariate function `f`, with bracketing
 42 |     interval `[a, b]` for n > 1 function evaluations. It returns the new
 43 |     interval [a, b]. The optimal parameter `eps` controls the lowest-level
 44 |     interval.
 45 |     """
 46 |     s = (1 - np.sqrt(5)) / (1 + np.sqrt(5))
 47 |     p = 1 / ((PHI*(1 - (s**(n + 1)))) / (1 - (s**n)))
 48 |     d = p*b + (1 - p)*a
 49 |     y_d = f(d)
 50 |     for i in range(1, n):
 51 |         if i == n - 1:
 52 |             c = eps*a + (1 - eps)*d
 53 |         else:
 54 |             c = p*a + (1 - p)*b
 55 |         y_c = f(c)
 56 |         if y_c < y_d:
 57 |             b, d, y_d = d, c, y_c
 58 |         else:
 59 |             a, b = b, c
 60 |         p = 1 / ((PHI*(1 - (s**(n - i + 1)))) / (1 - (s**(n - i))))
 61 |     return (a, b) if a < b else (b, a)
 62 | 
 63 | 
 64 | def golden_section_search(f: Callable[[float], float],
 65 |                           a: float,
 66 |                           b: float,
 67 |                           n: int) -> tuple[float, float]:
 68 |     """
 69 |     Golden section search to be run on a univariate function `f`, with
 70 |     bracketing interval [a, b], for n > 1 function evaluations. It returns the
 71 |     new interval (a, b). Guaranteeing convergence to within `eps` requires
 72 |     n = (b - a)/(eps*ln(PHI)) iterations. 
 73 |     """
 74 |     p = PHI - 1
 75 |     d = p*b + (1 - p)*a
 76 |     y_d = f(d)
 77 |     for _ in range(1, n):
 78 |         c = p*a + (1 - p)*b
 79 |         y_c = f(c)
 80 |         if y_c < y_d:
 81 |             b, d, y_d = d, c, y_c
 82 |         else:
 83 |             a, b = b, c
 84 |     return (a, b) if a < b else (b, a)
 85 | 
 86 | 
 87 | def quadratic_fit_search(f: Callable[[float], float],
 88 |                          a: float,
 89 |                          b: float,
 90 |                          c: float,
 91 |                          n: int) -> tuple[float, float, float]:
 92 |     """
 93 |     Quadratic fit search to be run on univariate function `f`, with bracketing
 94 |     interval [a, c] with a < b < c. The method will run for `n` function
 95 |     evaluations. It returns the new bracketing values as a tuple, `(a, b, c)`.
 96 |     """
 97 |     y_a, y_b, y_c = f(a), f(b), f(c)
 98 |     for i in range(1, n - 2):
 99 |         x = 0.5 * (y_a*(b**2 - c**2) + y_b*(c**2 - a**2) + y_c*(a**2 - b**2)) /\
100 |                   (y_a*(b - c) + y_b*(c - a) + y_c*(a - b))
101 |         y_x = f(x)
102 |         if x > b:
103 |             if y_x > y_b:
104 |                 c, y_c = x, y_x
105 |             else:
106 |                 a, y_a, b, y_b = b, y_b, x, y_x
107 |         elif x < b:
108 |             if y_x > y_b:
109 |                 a, y_a = x, y_x
110 |             else:
111 |                 c, y_c, b, y_b = b, y_b, x, y_x
112 |     return (a, b, c)
113 | 
114 | 
115 | def shubert_piyavskii(f: Callable[[float], float],
116 |                       a: float,
117 |                       b: float,
118 |                       l: float,
119 |                       eps: float,
120 |                       delta: float = 0.01) -> tuple[np.ndarray, list[tuple[float, float]]]:
121 |     """
122 |     The Shubert-Piyavskii method to be run on univariate function `f`, with
123 |     bracketing interval `a` < `b` and Lipschitz constant `l`. The algorithm runs
124 |     until the update is less than the tolerance `eps`. Both the best point and
125 |     the set of uncertainty intervals are returned. The uncertainty intervals are
126 |     returned as an array of `(a, b)` tuples. The parameter `delta` is a
127 |     tolerance used to merge the uncertainty intervals.
128 |     """
129 |     def _get_sp_intersection(A: np.ndarray, B: np.ndarray, l: float) -> np.ndarray:
130 |         t = ((A[1] - B[1]) - l*(A[0] - B[0])) / (2*l)
131 |         return np.array([A[0] + t, A[1] - t*l])
132 | 
133 |     m = (a + b) / 2
134 |     A, M, B = np.array([a, f(a)]), np.array([m, f(m)]), np.array([b, f(b)])
135 |     pts = np.array([A, _get_sp_intersection(A, M, l),
136 |                     M, _get_sp_intersection(M, B, l),
137 |                     B])
138 |     Delta = np.inf
139 |     while Delta > eps:
140 |         i = np.argmin(pts[:, 1])
141 |         P = np.array([pts[i, 0], f(pts[i, 0])])
142 |         Delta = P[1] - pts[i, 1]
143 | 
144 |         P_prev = _get_sp_intersection(pts[i - 1], P, l)
145 |         P_next = _get_sp_intersection(P, pts[i + 1], l)
146 | 
147 |         pts = np.delete(pts, i, axis=0)
148 |         pts = np.insert(pts, i, P_next, axis=0)
149 |         pts = np.insert(pts, i, P, axis=0)
150 |         pts = np.insert(pts, i, P_prev, axis=0)
151 | 
152 |     intervals = []
153 |     P_min = pts[2 * np.argmin(pts[::2, 1])]
154 |     y_min = P_min[1]
155 |     for i in range(1, len(pts), 2):
156 |         if pts[i, 1] < y_min:
157 |             dy = y_min - pts[i, 1]
158 |             x_lo = np.maximum(a, pts[i, 0] - (dy/l))
159 |             x_hi = np.minimum(b, pts[i, 0] + (dy/l))
160 |             if (len(intervals) != 0) and (intervals[-1][1] + delta >= x_lo):
161 |                 intervals[-1] = (intervals[-1][0], x_hi)
162 |             else:
163 |                 intervals.append((x_lo, x_hi))
164 |     
165 |     return (P_min, intervals)
166 | 
167 | 
168 | def bisection(f_prime: Callable[[float], float],
169 |               a: float,
170 |               b: float,
171 |               eps: float) -> tuple[float, float]:
172 |     """
173 |     The bisection algorithm where `f_prime` is the derivative of the univariate
174 |     function we seek to optimize. We have a < b that bracket a zero of `f_prime`.
175 |     The interval width tolerance is `eps`. Calling `bisection` returns the new
176 |     bracketed interval [a, b] as a tuple.
177 |     """
178 |     a, b = (b, a) if a > b else (a, b)  # ensure a < b
179 | 
180 |     y_a, y_b = f_prime(a), f_prime(b)
181 |     b = a if y_a == 0 else b
182 |     a = b if y_b == 0 else a
183 | 
184 |     while (b - a > eps):
185 |         x = (a + b) / 2
186 |         y = f_prime(x)
187 |         if y == 0:
188 |             a, b = x, x
189 |         elif np.sign(y) == np.sign(y_a):
190 |             a = x
191 |         else:
192 |             b = x
193 |     
194 |     return (a, b)
195 | 
196 | 
197 | def bracket_sign_change(f_prime: Callable[[float], float],
198 |                         a: float,
199 |                         b: float,
200 |                         k: float = 2.0) -> tuple[float, float]:
201 |     """
202 |     An algorithm for finding an interval in which a sign change occurs. The
203 |     inputs are the real-valued function `f_prime` defined on the real numbers,
204 |     and starting interval [a, b]. It returns the new interval as a tuple by
205 |     expanding the interval width until there is a sign change between the
206 |     function evaluated at the interval bounds. The expansion default factor `k`
207 |     defaults to 2.0.
208 |     """
209 |     a, b = (b, a) if a > b else (a, b)  # ensure a < b
210 | 
211 |     center, half_width = (b + a) / 2, (b - a) / 2
212 |     while (f_prime(a) * f_prime(b) > 0):
213 |         half_width *= k
214 |         a = center - half_width
215 |         b = center + half_width
216 |     
217 |     return (a, b)
218 | 


--------------------------------------------------------------------------------
/src/figures/ch07_figures.py:
--------------------------------------------------------------------------------
  1 | import sys; sys.path.append('./src/'); sys.path.append('../')
  2 | 
  3 | import matplotlib.pyplot as plt
  4 | from matplotlib.patches import Polygon
  5 | import numpy as np
  6 | 
  7 | from ch04 import line_search
  8 | from ch07 import basis
  9 | from TestFunctions import booth, wheeler
 10 | from convenience import plot_contour
 11 | 
 12 | 
 13 | def figure_7_1(n_steps: int = 6):
 14 |     """
 15 |     Figure 7.1: Cyclic coordinate descent alternates between coordinate directions.
 16 |     """
 17 |     x = np.array([10.0, -5.0])  # Starting point
 18 |     n = len(x)
 19 | 
 20 |     fig = plt.figure()
 21 |     plot_contour(fig, booth, xlim=(-10.5, 10.5), ylim=(-10.1, 10.1), xstride=0.01, ystride=0.01, levels=[0, 1, 5, 10, 20, 50, 100, 200, 500, 1000])
 22 |     for _ in range(n_steps):
 23 |         for i in range(n):
 24 |             d = basis(i, n)
 25 |             x_next = line_search(booth, x, d)
 26 |             plt.plot([x[0], x_next[0]], [x[1], x_next[1]], c="black")
 27 |             x = x_next
 28 |     plt.title("Figure 7.1")
 29 |     plt.show()
 30 | 
 31 | 
 32 | def figure_7_3(n_steps: int = 6):
 33 |     """
 34 |     Figure 7.3: Adding the acceleration step to cyclic coordinate descent helps
 35 |     traverse valleys. Six steps are shown for both the original and accelerated
 36 |     versions.
 37 |     """
 38 |     x = np.array([10.0, -5.0])  # Starting point (Original)
 39 |     x_accel = x.copy()          # Starting point (Accelerated)
 40 |     n = len(x)
 41 | 
 42 |     fig = plt.figure()
 43 |     plot_contour(fig, booth, xlim=(-10.5, 10.5), ylim=(-10.1, 10.1), xstride=0.01, ystride=0.01, levels=[0, 1, 5, 10, 20, 50, 100, 200, 500, 1000])
 44 |     for _ in range(n_steps):
 45 |         x_accel_prev = x_accel.copy()
 46 |         for i in range(n):
 47 |             d = basis(i, n)
 48 |             # Original
 49 |             x_next = line_search(booth, x, d)
 50 |             plt.plot([x[0], x_next[0]], [x[1], x_next[1]], c="tab:blue")
 51 |             x = x_next
 52 | 
 53 |             # Accelerated
 54 |             x_accel_next = line_search(booth, x_accel, d)
 55 |             plt.plot([x_accel[0], x_accel_next[0]], [x_accel[1], x_accel_next[1]], c="tab:red")
 56 |             x_accel = x_accel_next
 57 |         # Acceleration Step
 58 |         x_accel_next = line_search(booth, x_accel, x_accel - x_accel_prev)
 59 |         plt.plot([x_accel[0], x_accel_next[0]], [x_accel[1], x_accel_next[1]], c="tab:red")    
 60 |         x_accel = x_accel_next
 61 |     plt.legend(labels=["original", "accelerated"], loc="lower left")
 62 |     plt.title("Figure 7.3")
 63 |     plt.show()
 64 | 
 65 | 
 66 | def figure_7_4():
 67 |     """
 68 |     Figure 7.4: Powell's method starts the same as cyclic coordinate descent but
 69 |     iteratively learns conjugate directions.
 70 |     """
 71 |     x = np.array([10.0, -5.0])  # Starting point
 72 |     n = len(x)
 73 |     U = np.eye(n)
 74 | 
 75 |     fig = plt.figure()
 76 |     plot_contour(fig, wheeler, xlim=(-10.5, 10.5), ylim=(-10.1, 10.1), xstride=0.01, ystride=0.01, levels=[0, 1, 5, 10, 20, 50, 100, 200, 500, 1000])
 77 |     for _ in range(2):
 78 |         x_prime = x.copy()
 79 |         for i in range(n):
 80 |             d = U[i]
 81 |             x_prime_next = line_search(booth, x_prime, d)
 82 |             plt.plot([x_prime[0], x_prime_next[0]], [x_prime[1], x_prime_next[1]], c="black")
 83 |             x_prime = x_prime_next
 84 |         for i in range(n - 1):
 85 |             U[i] = U[i + 1]
 86 |         U[n - 1] = d = x_prime - x
 87 |         x_prime_next = line_search(booth, x_prime, d)
 88 |         plt.plot([x_prime[0], x_prime_next[0]], [x_prime[1], x_prime_next[1]], c="black")
 89 |         x = x_prime_next
 90 |     plt.title("Figure 7.4")
 91 |     plt.show()
 92 | 
 93 | 
 94 | def figure_7_5(n_steps: int = 4):
 95 |     """
 96 |     Figure 7.5: The Hooke-Jeeves method, proceeding left to right. It begins
 97 |     with a large step size but then reduces it once it cannot improve by taking
 98 |     a step in any coordinate direction.
 99 |     """
100 |     alpha, gamma = 0.5, 0.5
101 |     x = np.array([0.7, 0.9])  # Starting point
102 |     y, n = wheeler(x), len(x)
103 |     
104 |     fig = plt.figure(figsize=(5*n_steps, 5))
105 |     for i in range(1, n_steps + 1):
106 |         cont_ax = plot_contour(fig, wheeler, xlim=(-0.1, 3.0), ylim=(-0.1, 3.0), xstride=0.01, ystride=0.01, levels=np.arange(-1.0, -0.0, 0.1), subplot_coords=(1,n_steps,i))
107 |         cont_ax.scatter([x[0]], [x[1]], c='black', s=30.0)
108 |         improved = False
109 |         x_best, y_best = x, y
110 |         for j in range(n):
111 |             for sgn in [-1, 1]:
112 |                 x_prime = x + sgn*alpha*basis(j, n)
113 |                 cont_ax.scatter([x_prime[0]], [x_prime[1]], c='black', s=10.0, zorder=2)
114 |                 y_prime = wheeler(x_prime)
115 |                 if y_prime < y_best:
116 |                     x_best, y_best, improved = x_prime, y_prime, True
117 |         x, y = x_best, y_best
118 |         if not improved:
119 |             alpha *= gamma
120 |     plt.suptitle("Figure 7.5", y=0.78)
121 |     plt.subplots_adjust(wspace=0.25)
122 |     plt.show()
123 | 
124 | def figure_7_5_gps(n_steps: int = 4):
125 |     """
126 |     Similar to Figure 7.5, but Generalized Pattern Search is used instead of
127 |     the Hooke-Jeeves Method
128 |     """
129 |     alpha, gamma = 0.5, 0.5
130 |     D = np.array([[1, 0], [0, 1], [-1, -1]])  # positive spanning set
131 |     x = np.array([0.7, 0.9])  # Starting point
132 |     y = wheeler(x)
133 |     
134 |     fig = plt.figure(figsize=(5*n_steps, 5))
135 |     for i in range(1, n_steps + 1):
136 |         cont_ax = plot_contour(fig, wheeler, xlim=(-0.1, 3.0), ylim=(-0.1, 3.0), xstride=0.01, ystride=0.01, levels=np.arange(-1.0, -0.0, 0.1), subplot_coords=(1,n_steps,i))
137 |         cont_ax.scatter([x[0]], [x[1]], c='black', s=30.0)
138 |         improved = False
139 |         for j, d in enumerate(D):
140 |             x_prime = x + alpha * d
141 |             cont_ax.scatter([x_prime[0]], [x_prime[1]], c='black', s=10.0, zorder=2)
142 |             y_prime = wheeler(x_prime)
143 |             if y_prime < y:
144 |                 x, y, improved = x_prime, y_prime, True
145 |                 D = np.insert(np.delete(D, j, axis=0), 0, d, axis=0)
146 |                 break
147 |         if not improved:
148 |             alpha *= gamma
149 |     plt.suptitle("Figure 7.5 (w/ Generalized Pattern Search)", y=0.78)
150 |     plt.subplots_adjust(wspace=0.25)
151 |     plt.show()
152 | 
153 | def figure_7_11():
154 |     """
155 |     Figure 7.11: The Nelder-Mead method, proceeding left to right and top to bottom.
156 |     """
157 |     S = np.array([[0.7, 1.4], [0.7, 0.9], [0.4, 0.7]])
158 |     triangles = [S.copy()]
159 |     f = wheeler
160 |     alpha, beta, gamma = 1.0, 2.0, 0.5
161 | 
162 |     fig = plt.figure(figsize=(20, 15))
163 |     y_arr = np.apply_along_axis(f, 1, S)
164 |     for j in range(1, 12 + 1):
165 |         cont_ax = plot_contour(fig, wheeler, xlim=(-0.1, 3.0), ylim=(-0.1, 3.0), xstride=0.01, ystride=0.01, levels=np.arange(-1.0, -0.0, 0.1), subplot_coords=(3,4,j))
166 |         cont_ax.tick_params(axis="both", which="both", bottom=False, left=False, labelbottom=False, labelleft=False)
167 |         cont_ax.set_xlabel(None)
168 |         cont_ax.set_ylabel(None)
169 |         for k, triangle in enumerate(triangles):
170 |             cont_ax.add_patch(Polygon(triangle, fill=False, ec="black", alpha=0.75**(len(triangles) - k - 1)))
171 |         p = np.argsort(y_arr)         # sort lowest to highest
172 |         S, y_arr = S[p], y_arr[p]
173 |         xl, yl = S[0], y_arr[0]       # lowest
174 |         xh, yh = S[-1], y_arr[-1]     # highest
175 |         xs, ys = S[-2], y_arr[-2]     # second-highest
176 |         xm = np.mean(S[:-1], axis=0)  # centroid
177 |         xr = xm + alpha * (xm - xh)   # reflection point
178 |         yr = f(xr)
179 | 
180 |         if yr < yl:
181 |             xe = xm + beta * (xr - xm)   # expansion point
182 |             ye = f(xe)
183 |             S[-1], y_arr[-1] = (xe, ye) if ye < yr else (xr, yr)
184 |         elif yr >= ys:
185 |             if yr < yh:
186 |                 xh, yh, S[-1], y_arr[-1] = xr, yr, xr, yr
187 |             xc = xm + gamma * (xh - xm)  # contraction point
188 |             yc = f(xc)
189 |             if yc > yh:
190 |                 for i in range(1, len(y_arr)):
191 |                     S[i] = (S[i] + xl) / 2
192 |                     y_arr[i] = f(S[i])
193 |             else:
194 |                 S[-1], y_arr[-1] = xc, yc
195 |         else:
196 |             S[-1], y_arr[-1] = xr, yr
197 |         triangles.append(S.copy())
198 |     plt.suptitle("Figure 7.11", fontsize=20, y=0.91)
199 |     plt.subplots_adjust(wspace=0.05, hspace=0.05)
200 |     plt.show()
201 | 
202 | def figure_7_20():
203 |     raise NotImplementedError  # TODO
204 | 


--------------------------------------------------------------------------------
/src/ch13.py:
--------------------------------------------------------------------------------
  1 | """Chapter 13: Sampling Plans"""
  2 | 
  3 | import numpy as np
  4 | 
  5 | from abc import abstractmethod
  6 | from itertools import product
  7 | from numpy import ndarray
  8 | from primePy import primes
  9 | from typing import Callable
 10 | 
 11 | from ch03 import PHI
 12 | 
 13 | # TODO - Rethink the classing in this chapter
 14 | 
 15 | 
 16 | class SamplingPlan():
 17 |     def __init__(self, *args):
 18 |         assert len(args) == 1
 19 |         self.X = args[0]  # array of points in sampling plan
 20 | 
 21 |     def pairwise_distances(self, p: float = 2) -> np.ndarray:
 22 |         """
 23 |         A function for obtaining the list of pairwise distances between points in
 24 |         sampling plan `self` using the L_p norm specified by `p`.
 25 |         """
 26 |         m = len(self.X)
 27 |         return np.array([np.linalg.norm(self.X[i] - self.X[j], p) for i in range(m - 1) for j in range(i, m)])
 28 | 
 29 |     def compare(self, other: 'SamplingPlan', p: float = 2) -> int:
 30 |         """
 31 |         A function for comparing the degree to which two sampling plans `self`
 32 |         and `other` are space-filling using the L_p norm specified by `p`.
 33 | 
 34 |         The function returns: * -1, if `self` is more space-filling than `other`
 35 |                               *  1, if `self` is more space-filling than `other`
 36 |                               *  0, if they are equivalent
 37 |         """
 38 |         p_self = np.sort(self.pairwise_distances(p))
 39 |         p_other = np.sort(other.pairwise_distances(p))
 40 |         for (d_self, d_other) in zip(p_self, p_other):
 41 |             if d_self < d_other:
 42 |                 return 1
 43 |             elif d_self > d_other:
 44 |                 return -1
 45 |         return 0
 46 | 
 47 |     def phiq(self, q: float = 1, p: float = 2) -> float:
 48 |         """
 49 |         An implementation of the Morris-Mitchell criterion which takes a list of
 50 |         design points `X`, the criterion parameter `q` > 0, and a norm parameter
 51 |         `p` >= 1.
 52 |         """
 53 |         dists = self.pairwise_distances(p)
 54 |         return np.sum(dists**(-q))**(1/q)
 55 | 
 56 |     def copy(self) -> 'SamplingPlan':
 57 |         return SamplingPlan(np.copy(self.X))
 58 | 
 59 |     def append(self, x: np.ndarray):
 60 |         self.X = np.append(self.X, x)
 61 | 
 62 |     def __contains__(self, x: np.ndarray) -> bool:
 63 |         return x in self.X
 64 | 
 65 |     def __iter__(self):
 66 |         for x in self.X:
 67 |             yield x
 68 | 
 69 |     def __getitem__(self, key):
 70 |         return self.X[key]
 71 | 
 72 |     def __setitem__(self, key, value):
 73 |         self.X[key] = value
 74 | 
 75 | 
 76 | class FullFactorialPlan(SamplingPlan):
 77 |     """
 78 |     A function for obtaining all sample locations for the full factorial grid.
 79 |     Here, `a` is a vector of variable lower bounds, `b` is a vector of variable
 80 |     upper bounds, and `m` is a vector of sample counts for each dimension.
 81 |     """
 82 |     def __init__(self, a: np.ndarray, b: np.ndarray, m: np.ndarray):
 83 |         ranges = [np.linspace(a[i], stop=b[i], num=m[i]) for i in range(len(a))]
 84 |         X = np.array(list(product(*ranges)))
 85 |         super().__init__(X)
 86 | 
 87 | 
 88 | class UniformProjectionPlan(SamplingPlan):
 89 |     """
 90 |     A function for constructing a uniform projection plan for an `n`-dimensional
 91 |     hypercube with `m` samples per dimension. It returns a vector of index vectors.
 92 |     """
 93 |     def __init__(self, m: int, n: int):
 94 |         perms = [np.random.permutation(m) for _ in range(n)]
 95 |         X = np.array([[perms[i][j] for i in range(n)] for j in range(m)])
 96 |         super().__init__(X)
 97 | 
 98 |     def mutate(self):
 99 |         """
100 |         A function for mutating uniform projection plan `X`, while maintaining
101 |         its uniform projection property.
102 |         """
103 |         m, n = self.X.shape
104 |         j = np.random.randint(n)
105 |         i = np.random.permutation(m)[:2]
106 |         self.X[i[0], j], self.X[i[1], j] = self.X[i[1], j], self.X[i[0], j]
107 | 
108 | 
109 | def d_max(A: SamplingPlan, B: SamplingPlan, p: float = 2) -> float:
110 |     """
111 |     The set L_p distance metrics between two discrete sets, where `A` and `B`
112 |     are lists of design points and `p` is the L_p norm parameter.
113 |     """
114 |     def min_dist(a, B, p) -> float:
115 |         return np.min([np.linalg.norm(a - b, p) for b in B])
116 |     return np.max([min_dist(a, B, p) for a in A])
117 | 
118 | 
119 | def greedy_local_search(X: SamplingPlan, 
120 |                         m: int, 
121 |                         d: Callable[[SamplingPlan, SamplingPlan], float] = d_max) -> SamplingPlan:
122 |     """
123 |     Greedy local search, for finding `m`-element sampling plans that minimize
124 |     a distance metric `d` for discrete set `X`.
125 |     """
126 |     S = SamplingPlan(np.array([X[np.random.randint(m)]]))
127 |     for _ in range(m - 1):
128 |         j = np.argmin([np.inf if x in S else d(X, S.copy().append(x)) for x in X])
129 |         S.append(X[j])
130 |     return S
131 | 
132 | 
133 | def exchange_algorithm(X: SamplingPlan,
134 |                        m: int,
135 |                        d: Callable[[SamplingPlan], float] = d_max) -> SamplingPlan:
136 |     """
137 |     The exchange algorithm for finding `m`-element sampling plans that minimize
138 |     a distance metric `d` for discrete set `X`.
139 |     """
140 |     S = SamplingPlan(X[np.random.permutation(m)])
141 |     delta, done = d(X, S), False
142 |     while not done:
143 |         best_pair = (0,0)
144 |         for i in range(m):
145 |             s = S[i]
146 |             for (j, x) in enumerate(X):
147 |                 if x not in S:
148 |                     S[i] = x
149 |                     delta_prime = d(X, S)
150 |                     if delta_prime < delta:
151 |                         delta = delta_prime
152 |                         best_pair = (i,j)
153 |             S[i] = s
154 |         done = best_pair == (0,0)
155 |         if not done:
156 |             i,j = best_pair
157 |             S[i] = X[j]
158 |     return S
159 | 
160 | 
161 | def multistart_local_search(X: SamplingPlan,
162 |                             m: int,
163 |                             alg: Callable,
164 |                             k_max: int,
165 |                             d: Callable[[SamplingPlan, SamplingPlan], float] = d_max) -> SamplingPlan:
166 |     """
167 |     Multistart local search runs a particular search algorithm multiple times
168 |     and returns the best result. Here, `X` is the list of points, `m` is the size
169 |     of the desired sampling plan, `alg` is either `exchange_algorithm` or
170 |     `greedy_local_search`, `k_max` is the number of iterations to run, and `d`
171 |     is the distance metric.
172 |     """
173 |     assert alg.__name__ in ['exchange_algorithm', 'greedy_local_search']
174 |     sets = [alg(X, m, d) for _ in range(k_max)]
175 |     return sets[np.argmin([d(X, S) for S in sets])]
176 | 
177 | 
178 | class FillingSet(SamplingPlan):
179 |     def __init__(self, m: int, n: int, max_prime: int):
180 |         bs = primes.upto(max(np.ceil(n*(np.log(n) + np.log(np.log(n)))), max_prime))
181 |         seqs = np.array([self._get_filling_set(m, b) for b in bs[:n]])
182 |         super().__init__(seqs.T)
183 | 
184 |     @abstractmethod
185 |     def _get_filling_set(self, m: int, b: int) -> np.ndarray:
186 |         pass
187 | 
188 | 
189 | class AdditiveRecurrenceFillingSet(FillingSet):
190 |     """
191 |     Additive recurrence for constructing `m`-element filling sequences over
192 |     `n`-dimensional hypercubes. The `primePy` package is used to generate
193 |     the first `n` prime numbers, where the kth prime number is bounded by
194 | 
195 |     k(log(k) + loglog(k))
196 | 
197 |     for k > 6, and `primes.upto(a)` returns all primes up to `a`. Note that 13
198 |     is the sixth prime number.
199 |     """
200 |     def __init__(self, m: int, n: int):
201 |         super().__init__(m, n, max_prime=13)
202 | 
203 |     def _get_filling_set(self, m: int, b: int = None) -> np.ndarray:
204 |         c = np.sqrt(b) if b is not None else PHI - 1
205 |         X = np.random.rand(1)
206 |         for _ in range(m - 1):
207 |             X = np.append(X, (X[-1] + c) % 1)
208 |         return X
209 | 
210 | 
211 | class HaltonFillingSet(FillingSet):
212 |     """
213 |     Halton quasi-random `m`-element filling sequences over `n`-dimensional unit
214 |     hypercubes, where `b` is the base. The bases `bs` must be coprime.
215 |     """
216 |     def __init__(self, m: int, n: int):
217 |         super().__init__(m, n, max_prime=6)
218 |     
219 |     def _get_filling_set(self, m: int, b: int = 2) -> ndarray:
220 |         return np.array([self.halton(i, b) for i in range(1, m + 1)])
221 | 
222 |     def halton(self, i: int, b: int) -> float:
223 |         result, f = 0.0, 1.0
224 |         while i > 0:
225 |             f = f / b
226 |             result += f * (i % b)
227 |             i = np.floor(i / b)
228 |         return result
229 | 


--------------------------------------------------------------------------------
/src/figures/ch08_figures.py:
--------------------------------------------------------------------------------
  1 | import sys; sys.path.append('./src/'); sys.path.append('../')
  2 | 
  3 | import matplotlib.pyplot as plt
  4 | import numpy as np
  5 | from scipy.stats import norm, multivariate_normal
  6 | 
  7 | from ch05 import GradientDescent
  8 | from ch08 import NoisyDescent, rand_positive_spanning_set
  9 | from TestFunctions import branin, wheeler
 10 | from convenience import plot_contour, confidence_ellipse
 11 | 
 12 | def figure_8_1():
 13 |     """
 14 |     Figure 8.1: Adding stochasticity to a descent method helps with traversing
 15 |     saddle points such as f(x) = x1^2 - x2^2 shown here. Due to the
 16 |     initialization, the steepest descent method converges to the saddle point
 17 |     where the gradient is zero.
 18 |     """
 19 |     def f(x): return x[0]**2 - x[1]**2
 20 |     def grad_f(x): return np.array([2*x[0], -2*x[1]])
 21 | 
 22 |     alpha = 0.1
 23 |     x_gd = np.array([2.0, 0.0])
 24 |     x_sgd = x_gd.copy()
 25 |     GD = GradientDescent(alpha)
 26 |     SGD = NoisyDescent(GradientDescent(alpha), sigma=lambda k: 1/(k**3))
 27 | 
 28 |     fig = plt.figure()
 29 |     lim = (-2.5, 2.5)
 30 |     plot_contour(fig, f, xlim=lim, ylim=lim, xstride=0.01, ystride=0.01, levels=[-5, -2, 0, 2, 5])
 31 |     for _ in range(20):
 32 |         x_sgd_next = SGD.step(f, grad_f, x_sgd)
 33 |         plt.plot([x_sgd[0], x_sgd_next[0]], [x_sgd[1], x_sgd_next[1]], c="tab:red")
 34 |         x_sgd = x_sgd_next
 35 | 
 36 |         x_gd_next = GD.step(f, grad_f, x_gd)
 37 |         plt.plot([x_gd[0], x_gd_next[0]], [x_gd[1], x_gd_next[1]], c="tab:blue")
 38 |         x_gd = x_gd_next
 39 |     plt.xlim(lim)
 40 |     plt.ylim(lim)
 41 |     plt.legend(labels=["stochastic gradient descent", "steepest descent"])
 42 |     plt.title("Figure 8.1")
 43 |     plt.show()
 44 | 
 45 | 
 46 | def figure_8_2():
 47 |     """
 48 |     Figure 8.2: Mesh adaptive direct search proceeding left to right and top to bottom.
 49 |     """
 50 |     x = np.array([1.5, 1.5])
 51 |     spanning_sets = []
 52 |     alpha, y, n = 1.0, wheeler(x), len(x)
 53 | 
 54 |     fig = plt.figure(figsize=(20, 10))
 55 |     lim = (-0.1, 3.0)
 56 |     for j in range(1, 8 + 1):
 57 |         # Set up contour plot
 58 |         cont_ax = plot_contour(fig, wheeler, xlim=lim, ylim=lim, xstride=0.01, ystride=0.01, levels=np.arange(-1.0, -0.0, 0.1), subplot_coords=(2,4,j))
 59 |         cont_ax.tick_params(axis="both", which="both", bottom=False, left=False, labelbottom=False, labelleft=False)
 60 |         cont_ax.set_xlabel(None)
 61 |         cont_ax.set_ylabel(None)
 62 |         cont_ax.set_xlim(lim)
 63 |         cont_ax.set_ylim(lim)
 64 | 
 65 |         improved = False
 66 |         D = rand_positive_spanning_set(alpha, n)
 67 |         
 68 |         # Plot spanning sets
 69 |         spanning_sets.append((alpha, x.copy(), D.copy()))
 70 |         for (k, (alpha_tmp, x_tmp, spanning_set)) in enumerate(spanning_sets):
 71 |             cont_ax.scatter([x_tmp[0]], [x_tmp[1]], c='black', s=30.0, zorder=2, alpha=0.5**(len(spanning_sets) - k - 1))
 72 |             for d in spanning_set:
 73 |                 x_prime = x_tmp + alpha_tmp * d
 74 |                 cont_ax.scatter([x_prime[0]], [x_prime[1]], c='black', s=10.0, zorder=2, alpha=0.5**(len(spanning_sets) - k - 1))
 75 |                 cont_ax.plot([x_tmp[0], x_prime[0]], [x_tmp[1], x_prime[1]], c='black', zorder=2, alpha=0.5**(len(spanning_sets) - k - 1))
 76 |         
 77 |         # Mesh Adaptive Direct Search Algorithm
 78 |         for d in D:
 79 |             x_prime = x + alpha * d
 80 |             y_prime = wheeler(x_prime)
 81 |             if y_prime < y:
 82 |                 x, y, improved = x_prime, y_prime, True
 83 |                 x_prime = x + 3 * alpha * d
 84 |                 y_prime = wheeler(x_prime)
 85 |                 if y_prime < y:
 86 |                     x, y = x_prime, y_prime
 87 |                 break
 88 |         alpha = np.minimum(4 * alpha, 1.0) if improved else alpha / 4
 89 |     plt.suptitle("Figure 8.2", fontsize=15, y=0.86)
 90 |     plt.subplots_adjust(wspace=0.05, hspace=0.05)
 91 |     plt.show()
 92 | 
 93 | 
 94 | def figure_8_3():
 95 |     """
 96 |     Figure 8.3: Several annealing schedules commonly used in simulated annealing.
 97 |     The schedules have an initial temperature of 10.
 98 |     """
 99 |     def logarithmic(k, t1): return t0 * np.log(2) / np.log(k + 1)
100 |     def exponential(k, gamma, t1): return (gamma**(k - 1)) * t1
101 |     def fast(k, t1): return t1 / k
102 | 
103 |     t0 = 10.0
104 |     max_iters = 10000
105 |     k = np.linspace(1, max_iters, 100000)
106 | 
107 |     plt.plot(k, [logarithmic(k[i], t0) for i in range(len(k))], color="tab:red", label="logarithmic")
108 |     plt.plot(k, [exponential(k[i], 0.25, t0) for i in range(len(k))], color="tab:blue", alpha=1.0, label="exponential, $\gamma = 1/4$")
109 |     plt.plot(k, [exponential(k[i], 0.5, t0) for i in range(len(k))], color="tab:blue", alpha=0.75, label="exponential, $\gamma = 1/2$")
110 |     plt.plot(k, [exponential(k[i], 0.75, t0) for i in range(len(k))], color="tab:blue", alpha=0.5, label="exponential, $\gamma = 3/4$")
111 |     plt.plot(k, [fast(k[i], t0) for i in range(len(k))], color="tab:green", label="fast")
112 |     plt.xlim((1, max_iters))
113 |     plt.xscale('log')
114 |     plt.xlabel("iteration")
115 |     plt.ylabel("temperature")
116 |     plt.title("Figure 8.3")
117 |     plt.legend()
118 |     plt.show()
119 | 
120 | 
121 | def figure_8_4():
122 |     """
123 |     Figure 8.4: The step multiplication factor as a function of acceptance for
124 |     c = 2.
125 |     """
126 |     def factor(x, c):
127 |         if x > 0.6:
128 |             return 1 + c*((x - 0.6)/0.4)
129 |         elif x < 0.4:
130 |             return 1/(1 + c*((0.4 - x)/0.4))
131 |         return 1.0
132 | 
133 |     c = 2
134 |     x = np.linspace(0.0, 1.0, 1000)
135 |     plt.plot(x, [factor(x_i, c=c) for x_i in x])
136 |     plt.xlim((0, 1))
137 |     plt.ylim((0, 1 + c + 0.1))
138 |     plt.xticks([0.0, 0.4, 0.6, 1.0])
139 |     plt.yticks([1/(1 + c), 1.0, 1 + c], labels=["$\\frac{1}{1 + c}$", "$1$", "$1 + c$"])
140 |     plt.title("Figure 8.4")
141 |     plt.show()
142 | 
143 | 
144 | def figure_8_5(sigma: float = 1.5, gamma: float = 0.5, t1: float = 1.0):
145 |     """
146 |     Figure 8.5: Simulated annealing with an exponentially decaying temperature,
147 |     where the histograms indicate the probability of simulated annealing being
148 |     at a particular position at that iteration.
149 |     """
150 |     def f(x): return (np.sin(5*(x + np.pi/3 + np.pi/10)) + 2*np.sin(x + np.pi/4 + np.pi/10) + 2.937)/(2*2.937)
151 |     T = norm(0, sigma)
152 |     def t(k, gamma=gamma, t1=t1): return (gamma**(k - 1)) * t1
153 | 
154 |     n_trials = 1000
155 |     k_max = 8
156 |     traj = np.zeros((n_trials, k_max))
157 |     traj[:, 0] = 0.5
158 | 
159 |     # Run trials
160 |     for i in range(n_trials):
161 |         x = 0.5
162 |         y = f(x)
163 |         x_best, y_best = x, y
164 |         for k in range(1, k_max):
165 |             x_prime = x + T.rvs()
166 |             y_prime = f(x_prime)
167 |             delta_y = y_prime - y
168 |             if (delta_y <= 0) or (np.random.rand() < np.exp(-delta_y / t(k))):
169 |                 x, y = x_prime, y_prime
170 |             if y_prime < y_best:
171 |                 x_best, y_best = x_prime, y_prime
172 |             traj[i, k] = x
173 | 
174 |     # Plot the results
175 |     xlim = (0.0, 6.5)
176 |     x = np.linspace(xlim[0], xlim[1], 1000)
177 |     fig = plt.figure(figsize=(20, 5))
178 |     for k in range(k_max):
179 |         ax = fig.add_subplot(2, 4, k + 1)
180 |         ax.plot(x, f(x), c='black')
181 |         ax.hist(traj[:, k], bins=np.linspace(xlim[0], xlim[1], 50), density=True, alpha=0.5)
182 |         ax.set_xlim(xlim[0], xlim[1])
183 |         ax.set_ylim(0, 1.1)
184 |         ax.tick_params(axis="both", which="both", bottom=False, left=False, labelbottom=False, labelleft=False)
185 |         if k in [0, 4]:
186 |             ax.set_ylabel("$y$")
187 |         if k in [4, 5, 6, 7]:
188 |             ax.set_xlabel("$x$")
189 |     plt.suptitle("Figure 8.5", y=0.93)
190 |     plt.subplots_adjust(wspace=0.05, hspace=0.05)
191 |     plt.show()
192 | 
193 | 
194 | def figure_8_6():
195 |     """
196 |     Figure 8.6: The cross-entropy method with `m = 40` applied to the Branin
197 |     function (appendix B.3) using a multivariate Gaussian proposal distribution.
198 |     The 10 elite samples in each iteration are in red.
199 |     """
200 |     k_max = 4
201 |     P = multivariate_normal(np.array([3.0, 7.5]), 5*np.eye(2))
202 |     m = 40
203 |     m_elite = 10
204 |     f = branin
205 | 
206 |     fig = plt.figure(figsize=(20, 5))
207 |     xlim = (2*np.pi - 12, 2*np.pi + 12)
208 |     ylim = (-3, 22)
209 |     for i in range(1, k_max + 1):
210 |         ax = plot_contour(fig, branin, xlim, ylim, 0.01, 0.01, levels=[0, 1, 2, 3, 5, 10, 20, 50, 100], filled=True, subplot_coords=(1,k_max,i))
211 |         confidence_ellipse(P.mean, P.cov, ax, n_std=1, edgecolor='white')
212 |         confidence_ellipse(P.mean, P.cov, ax, n_std=2, edgecolor='white')
213 |         confidence_ellipse(P.mean, P.cov, ax, n_std=3, edgecolor='white')
214 | 
215 |         samples = P.rvs(m)  # return shape (m, n), where n is dimension of random variable
216 |         ax.scatter(samples[:, 0], samples[:, 1], c='white', s=1.0)
217 | 
218 |         order = np.argsort(np.apply_along_axis(f, 1, samples))
219 |         elite_samples = samples[order[:m_elite]]
220 |         ax.scatter(elite_samples[:, 0], elite_samples[:, 1], c='tab:red', s=1.0)
221 |         P = P._dist(*P._dist.fit(elite_samples))
222 |     plt.suptitle("Figure 8.6", y=0.8)
223 |     plt.show()
224 | 
225 | # TODO - Figure 8.7
226 | # TODO - Figure 8.8
227 | # TODO - Figure 8.9


--------------------------------------------------------------------------------
/src/ch05.py:
--------------------------------------------------------------------------------
  1 | """Chapter 5: First-Order Methods"""
  2 | 
  3 | import numpy as np
  4 | 
  5 | from abc import ABC, abstractmethod
  6 | from typing import Callable
  7 | 
  8 | from ch04 import line_search
  9 | 
 10 | 
 11 | class DescentMethod(ABC):
 12 |     @abstractmethod
 13 |     def initialize(self, f: Callable[[np.ndarray], float], grad_f: Callable[[np.ndarray], float], x: np.ndarray):
 14 |         pass
 15 | 
 16 |     @abstractmethod
 17 |     def step(self, f: Callable[[np.ndarray], float], grad_f: Callable[[np.ndarray], float], x: np.ndarray) -> np.ndarray:
 18 |         pass
 19 | 
 20 | 
 21 | class GradientDescent(DescentMethod):
 22 |     """
 23 |     The gradient descent method, which follows the direction of gradient descent
 24 |     with a fixed learning rate. The `step` function produces the next iterate
 25 |     whereas the `initialize` function does nothing.
 26 |     """
 27 |     def __init__(self, alpha: float):
 28 |         self.alpha = alpha  # learning rate
 29 | 
 30 |     def initialize(self, f: Callable[[np.ndarray], float], grad_f: Callable[[np.ndarray], float], x: np.ndarray):
 31 |         pass
 32 |     
 33 |     def step(self, f: Callable[[np.ndarray], float], grad_f: Callable[[np.ndarray], float], x: np.ndarray) -> np.ndarray:
 34 |         g = grad_f(x)
 35 |         return x - self.alpha * g
 36 | 
 37 | 
 38 | class ConjugateGradientDescent(DescentMethod):
 39 |     """
 40 |     The conjugate gradient method with the Polak-Ribiere update, where `d`
 41 |     is the previous search direction and `g` is the previous gradient.
 42 |     """
 43 |     def __init__(self, d: np.ndarray = None, g: np.ndarray = None):
 44 |         self.d = d  # previous search direction
 45 |         self.g = g  # previous gradient
 46 | 
 47 |     def initialize(self, f: Callable[[np.ndarray], float], grad_f: Callable[[np.ndarray], float], x: np.ndarray):
 48 |         self.g = grad_f(x)
 49 |         self.d = -self.g
 50 |     
 51 |     def step(self, f: Callable[[np.ndarray], float], grad_f: Callable[[np.ndarray], float], x: np.ndarray) -> np.ndarray:
 52 |         g_prime = grad_f(x)
 53 |         beta = np.maximum(0, np.dot(g_prime, g_prime - self.g) / np.dot(self.g, self.g))
 54 |         d_prime = -g_prime + beta*self.d
 55 |         x_prime = line_search(f, x, d_prime)
 56 |         self.d, self.g = d_prime, g_prime
 57 |         return x_prime
 58 |     
 59 | 
 60 | class Momentum(GradientDescent):
 61 |     """The momentum method for accelerated descent."""
 62 |     def __init__(self, alpha: float, beta: float, v: np.ndarray = None):
 63 |         super().__init__(alpha)  # learning rate
 64 |         self.beta = beta         # momentum decay
 65 |         self.v = v               # momentum
 66 |     
 67 |     def initialize(self, f: Callable[[np.ndarray], float], grad_f: Callable[[np.ndarray], float], x: np.ndarray):
 68 |         self.v = np.zeros(len(x))
 69 | 
 70 |     def step(self, f: Callable[[np.ndarray], float], grad_f: Callable[[np.ndarray], float], x: np.ndarray) -> np.ndarray:
 71 |         g = grad_f(x)
 72 |         self.v = self.beta*self.v - self.alpha*g
 73 |         return x + self.v
 74 | 
 75 | 
 76 | class NesterovMomentum(Momentum):
 77 |     """Nesterov's momentum method of accelerated descent."""
 78 |     def __init__(self, alpha: float, beta: float, v: np.ndarray = None):
 79 |         super().__init__(alpha, beta, v)
 80 | 
 81 |     def step(self, f: Callable[[np.ndarray], float], grad_f: Callable[[np.ndarray], float], x: np.ndarray) -> np.ndarray:
 82 |         g = grad_f(x + self.beta*self.v)
 83 |         self.v = self.beta*self.v - self.alpha*g
 84 |         return x + self.v
 85 | 
 86 | 
 87 | class Adagrad(GradientDescent):
 88 |     """The Adagrad accelerated descent method."""
 89 |     def __init__(self, alpha: float, eps: float, s: np.ndarray = None):
 90 |         super().__init__(alpha)  # learning rate
 91 |         self.eps = eps           # small value
 92 |         self.s = s               # sum of squared gradient
 93 |     
 94 |     def initialize(self, f: Callable[[np.ndarray], float], grad_f: Callable[[np.ndarray], float], x: np.ndarray):
 95 |         self.s = np.zeros(len(x))
 96 |     
 97 |     def step(self, f: Callable[[np.ndarray], float], grad_f: Callable[[np.ndarray], float], x: np.ndarray) -> np.ndarray:
 98 |         g = grad_f(x)
 99 |         self.s += g**2
100 |         return x - self.alpha * (g / (np.sqrt(self.s) + self.eps))
101 | 
102 | 
103 | class RMSProp(Adagrad):
104 |     """The RMSProp accelerated descent method."""
105 |     def __init__(self, alpha: float, gamma: float, eps: float, s: np.ndarray = None):
106 |         super().__init__(alpha, eps, s)
107 |         self.gamma = gamma  # decay
108 |     
109 |     def step(self, f: Callable[[np.ndarray], float], grad_f: Callable[[np.ndarray], float], x: np.ndarray) -> np.ndarray:
110 |         g = grad_f(x)
111 |         self.s = self.gamma*self.s + (1 - self.gamma)*(g**2)
112 |         return x - self.alpha * (g / (np.sqrt(self.s) + self.eps))
113 | 
114 | 
115 | class Adadelta(DescentMethod):
116 |     """
117 |     The Adadelta accelerated descent method. The small constant `eps` is
118 |     added to the numerator as well to prevent progress from entirely decaying to
119 |     zero and to start off the first iteration where `delta_x = 0`.
120 |     """
121 |     def __init__(self, gamma_s: float, gamma_x: float, eps: float, s: np.ndarray = None, u: np.ndarray = None):
122 |         self.gamma_s = gamma_s  # gradient decay
123 |         self.gamma_x = gamma_x  # update decay
124 |         self.eps = eps          # small value
125 |         self.s = s              # sum of squared gradients
126 |         self.u = u              # sum of squared updates
127 | 
128 |     def initialize(self, f: Callable[[np.ndarray], float], grad_f: Callable[[np.ndarray], float], x: np.ndarray):
129 |         self.s = np.zeros(len(x))
130 |         self.u = np.zeros(len(x))
131 | 
132 |     def step(self, f: Callable[[np.ndarray], float], grad_f: Callable[[np.ndarray], float], x: np.ndarray) -> np.ndarray:
133 |         g = grad_f(x)
134 |         self.s = self.gamma_s*self.s + (1 - self.gamma_s)*(g**2)
135 |         delta_x = -((np.sqrt(self.u) + self.eps) / (np.sqrt(self.s) + self.eps)) * g
136 |         self.u = self.gamma_x*self.u + (1 - self.gamma_x)*(delta_x**2)
137 |         return x + delta_x
138 | 
139 | 
140 | class Adam(GradientDescent):
141 |     """The Adam accelerated descent method."""
142 |     def __init__(self, alpha: float, gamma_v: float, gamma_s: float, eps: float, k: int = 0, v: np.ndarray = None, s: np.ndarray = None):
143 |         super().__init__(alpha)  # learning rate
144 |         self.gamma_v = gamma_v   # 1st moment decay
145 |         self.gamma_s = gamma_s   # 2nd moment decay
146 |         self.eps = eps           # small value
147 |         self.k = k               # step counter
148 |         self.v = v               # 1st moment estimate
149 |         self.s = s               # 2nd moment estimate
150 |     
151 |     def initialize(self, f: Callable[[np.ndarray], float], grad_f: Callable[[np.ndarray], float], x: np.ndarray):
152 |         self.k = 0
153 |         self.v = np.zeros(len(x))
154 |         self.s = np.zeros(len(x))
155 |     
156 |     def step(self, f: Callable[[np.ndarray], float], grad_f: Callable[[np.ndarray], float], x: np.ndarray) -> np.ndarray:
157 |         g = grad_f(x)
158 |         self.v = self.gamma_v*self.v + (1 - self.gamma_v)*g
159 |         self.s = self.gamma_s*self.s + (1 - self.gamma_s)*(g**2)
160 |         self.k += 1
161 |         v_hat = self.v / (1 - (self.gamma_v**self.k))
162 |         s_hat = self.s / (1 - (self.gamma_s**self.k))
163 |         return x - self.alpha * (v_hat / (np.sqrt(s_hat) + self.eps))
164 | 
165 | 
166 | class HyperGradientDescent(GradientDescent):
167 |     """The hypergradient form of gradient descent."""
168 |     def __init__(self, alpha_0: float, mu: float, alpha: float = None, g_prev: np.ndarray = None):
169 |         super().__init__(alpha)  # current learning rate
170 |         self.alpha_0 = alpha_0   # initial learning rate
171 |         self.mu = mu             # learning rate of the learning rate
172 |         self.g_prev = g_prev     # previous gradient
173 | 
174 |     def initialize(self, f: Callable[[np.ndarray], float], grad_f: Callable[[np.ndarray], float], x: np.ndarray):
175 |         self.alpha = self.alpha_0
176 |         self.g_prev = np.zeros(len(x))
177 | 
178 |     def step(self, f: Callable[[np.ndarray], float], grad_f: Callable[[np.ndarray], float], x: np.ndarray) -> np.ndarray:
179 |         g = grad_f(x)
180 |         self.alpha += self.mu * np.dot(g, self.g_prev)
181 |         self.g_prev = g
182 |         return x - self.alpha * g
183 | 
184 | 
185 | class HyperNesterovMomentum(NesterovMomentum):
186 |     """The hypergradient form of the Nesterov momentum descent method."""
187 |     def __init__(self, alpha_0: float, mu: float, beta: float, v: np.ndarray = None, alpha: float = None, g_prev: np.ndarray = None):
188 |         super().__init__(alpha, beta, v)  # current learning rate, momentum decay, momentum
189 |         self.alpha_0 = alpha_0            # initial learning rate
190 |         self.mu = mu                      # learning rate of the learning rate
191 |         self.g_prev = g_prev              # previous gradient
192 | 
193 |     def initialize(self, f: Callable[[np.ndarray], float], grad_f: Callable[[np.ndarray], float], x: np.ndarray):
194 |         self.alpha = self.alpha_0
195 |         self.v = np.zeros(len(x))
196 |         self.g_prev = np.zeros(len(x))
197 |     
198 |     def step(self, f: Callable[[np.ndarray], float], grad_f: Callable[[np.ndarray], float], x: np.ndarray) -> np.ndarray:
199 |         g = grad_f(x)
200 |         self.alpha += self.mu * np.dot(g, self.g_prev + self.beta*self.v)
201 |         self.v = g + self.beta*self.v
202 |         self.g_prev = g
203 |         return x - self.alpha * (g + self.beta*self.v)  # TODO - Ask Mykel if this is a typo
204 | 


--------------------------------------------------------------------------------
/src/figures/ch09_figures.py:
--------------------------------------------------------------------------------
  1 | import sys; sys.path.append('./src/'); sys.path.append('../')
  2 | 
  3 | import matplotlib.pyplot as plt
  4 | import numpy as np
  5 | 
  6 | from matplotlib import cm
  7 | from scipy.stats import norm, cauchy
  8 | 
  9 | from ch09 import rand_population_uniform, rand_population_normal, rand_population_cauchy,\
 10 |                  TruncationSelection, TournamentSelection, RouletteWheelSelection
 11 | from convenience import normalize
 12 | 
 13 | 
 14 | def figure_9_1():
 15 |     """
 16 |     Figure 9.1: A comparison of the normal distribution with standard deviation
 17 |     1 and the Cauchy distribution with scale 1. Although `sigma` is sometimes
 18 |     used for the scale parameter in the Cauchy distribution, this should not be
 19 |     confused with the standard deviation since the standard deviation of the
 20 |     Cauchy distribution is undefined. The Cauchy distribution is heavy-tailed,
 21 |     allowing it to cover the design space more broadly.
 22 |     """
 23 |     x = np.linspace(-6, 6, 1000)
 24 |     plt.plot(x, norm(loc=0, scale=1).pdf(x), c='tab:purple', label="Normal")
 25 |     plt.plot(x, cauchy(loc=0, scale=1).pdf(x), c='tab:blue', label="Cauchy")
 26 |     plt.xlabel("$x$")
 27 |     plt.ylabel("$p(x)$")
 28 |     plt.title("Figure 9.1")
 29 |     plt.xticks([-5, 0, 5])
 30 |     plt.yticks([0.0, 0.2, 0.4])
 31 |     plt.legend()
 32 |     plt.show()
 33 | 
 34 | 
 35 | def figure_9_2():
 36 |     """
 37 |     Figure 9.2: Initial populations of size 1,000 sampled using a uniform
 38 |     hyperrectangle with a = [-2, -2], b = [2, 2], a zero-mean normal distribution
 39 |     with diagonal covariance Sigma = I, and Cauchy distributions centered at the
 40 |     origin with scale sigma = 1.
 41 |     """
 42 |     m = 1000
 43 |     s = 10.0
 44 |     alpha = 0.55
 45 |     _, ax = plt.subplots(1, 3, figsize=(12, 4))
 46 |     
 47 |     population = rand_population_uniform(m, a=np.array([-2.0, -2.0]), b=np.array([2.0, 2.0]))
 48 |     ax[0].scatter(population[:, 0], population[:, 1], s=s, alpha=alpha)
 49 |     ax[0].set_title("Uniform")
 50 |     
 51 |     population = rand_population_normal(m, mu=np.zeros(2), Sigma=np.eye(2))
 52 |     ax[1].scatter(population[:, 0], population[:, 1], s=s, alpha=alpha)
 53 |     ax[1].set_title("Normal")
 54 |     
 55 |     population = rand_population_cauchy(m, mu=np.zeros(2), sigma=np.ones(2))
 56 |     ax[2].scatter(population[:, 0], population[:, 1], s=s, alpha=alpha)
 57 |     ax[2].set_title("Cauchy")
 58 | 
 59 |     for i in range(3):
 60 |         ax[i].set_xlabel("$x_1$")
 61 |         ax[i].set_ylabel("$x_2$")
 62 |         ax[i].set_xlim(-4, 4)
 63 |         ax[i].set_ylim(-4, 4)
 64 |         ax[i].set_aspect('equal')
 65 |     plt.suptitle("Figure 9.2")
 66 |     plt.show()
 67 | 
 68 | 
 69 | def figure_9_4():
 70 |     """
 71 |     Figure 9.4: Truncation selection with a population size `m = 7` and sample
 72 |     size `k = 3`. The height of a bar indicates its objective function value
 73 |     whereas its color indicates what individual it corresponds to.
 74 |     """
 75 |     x, y, m, ax, colors = selection_setup()
 76 | 
 77 |     # Truncation Selection (taken directly from ch09.py) 
 78 |     k = 3
 79 |     p = np.argsort(y)
 80 |     new_colors = colors[p]
 81 |     new_colors[k:] = np.array([192, 192, 192, 255.0]) / 255.0
 82 |     ax[1].bar(x, y[p], width=1/(m - 1), color=new_colors, edgecolor='black')
 83 |     ax[1].tick_params(axis="both", which="both", bottom=False, left=False, labelbottom=False, labelleft=False)
 84 |     ax[1].spines[['right', 'top']].set_visible(False)
 85 |     ax[1].set_ylim(0.0, 1.3)
 86 |     ax[1].set_xlabel("individual")
 87 |     ax[1].set_ylabel("$y$")
 88 |     plt.suptitle("Figure 9.4")
 89 |     plt.show()
 90 | 
 91 | 
 92 | def figure_9_5():
 93 |     """
 94 |     Figure 9.5: Tournament selection with a population size `m = 7` and a sample
 95 |     size `k = 3`, which is run separately for each parent. The height of a bar
 96 |     indicates its objective function value whereas its color indicates what
 97 |     individual it corresponds to.
 98 |     """
 99 |     x, y, m, ax, colors = selection_setup()
100 | 
101 |     # Tournament Selection (taken directly from ch09.py)
102 |     k = 3
103 |     def getparent():
104 |         p = np.random.permutation(len(y))
105 |         return p[np.argmin(y[p[:k]])]
106 |     p = [getparent() for _ in range(k)]
107 |     new_colors = np.array([[192, 192, 192, 255.0] for _ in range(m)]) / 255.0
108 |     new_colors[p, :] = colors[p, :]
109 |     ax[1].bar(x, y, width=1/(m - 1), color=new_colors, edgecolor='black')
110 |     ax[1].tick_params(axis="both", which="both", bottom=False, left=False, labelbottom=False, labelleft=False)
111 |     ax[1].spines[['right', 'top']].set_visible(False)
112 |     ax[1].set_ylim(0.0, 1.3)
113 |     ax[1].set_xlabel("individual")
114 |     ax[1].set_ylabel("$y$")
115 |     plt.suptitle("Figure 9.5")
116 |     plt.show()
117 | 
118 | 
119 | def figure_9_6():
120 |     """
121 |     Figure 9.6: Roulette wheel selection with a population size `m = 7`, which
122 |     is run separately for each parent. The approach used causes the individual
123 |     with the worst objective function value to have a zero likelihood of being
124 |     selected. The height of a bar indicates its objective function value (left),
125 |     or its likelihood (right), whereas its color indicates what individual it
126 |     corresponds to.
127 |     """
128 |     x, y, m, ax, colors = selection_setup()
129 | 
130 |     # Roulette Wheel Selection (taken directly from ch09.py)
131 |     y = np.max(y) - y
132 |     p = normalize(y, ord=1)
133 |     ax[1].bar(x, p, width=1/(m - 1), color=colors, edgecolor='black')
134 |     ax[1].tick_params(axis="both", which="both", bottom=False, left=False, labelbottom=False, labelleft=False)
135 |     ax[1].spines[['right', 'top']].set_visible(False)
136 |     ax[1].set_ylim(0.0, 0.6)
137 |     ax[1].set_xlabel("individual")
138 |     ax[1].set_ylabel("likelihood")
139 |     plt.suptitle("Figure 9.6")
140 |     plt.show()
141 | 
142 | 
143 | def selection_setup():
144 |     m = 7
145 |     y = np.array([1.0, 0.6, 0.2, 1.0, 0.9, 0.6, 1.1])
146 |     x = np.linspace(0.0, 1.0, m)
147 |     colors = cm.viridis(x)
148 | 
149 |     _, ax = plt.subplots(1, 2, figsize=(6, 2))
150 |     ax[0].bar(x, y, width=1/(m - 1), color=colors, edgecolor='black')
151 |     ax[0].tick_params(axis="both", which="both", bottom=False, left=False, labelbottom=False, labelleft=False)
152 |     ax[0].spines[['right', 'top']].set_visible(False)
153 |     ax[0].set_ylim(0.0, 1.3)
154 |     ax[0].set_xlabel("individual")
155 |     ax[0].set_ylabel("$y$")
156 | 
157 |     return x, y, m, ax, colors
158 | 
159 | 
160 | def figure_9_7():
161 |     """Figure 9.7: Single-point crossover"""
162 |     a, b, x, color = crossover_setup()
163 | 
164 |     # Single-Point Crossover (taken directly from ch09.py)
165 |     i = np.random.randint(len(a))
166 |     child = np.concatenate((a[:i], b[i:]))
167 |     plt.scatter(x, 0.0*x - 0.3, color=color(child))
168 |     plt.xticks([i - 0.5], labels=["crossover point"])
169 |     plt.subplots_adjust(bottom=0.4)
170 |     plt.title("Figure 9.7", y=0.9)
171 |     plt.show()
172 | 
173 | 
174 | def figure_9_8():
175 |     """Figure 9.8: Two-point crossover"""
176 |     a, b, x, color = crossover_setup()
177 | 
178 |     # Two-Point Crossover (taken directly from ch09.py)
179 |     n = len(a)
180 |     i, j = np.random.randint(n, size=2)
181 |     if i > j:
182 |         i, j = j, i
183 |     child = np.concatenate((a[:i], b[i:j], a[j:]))
184 |     plt.scatter(x, 0.0*x - 0.3, color=color(child))
185 |     plt.xticks([i - 0.5, j - 0.5], labels=["crossover point 1", "crossover point 2"])
186 |     plt.subplots_adjust(bottom=0.4)
187 |     plt.title("Figure 9.8", y=0.9)
188 |     plt.show()
189 | 
190 | 
191 | def figure_9_9():
192 |     """Figure 9.9: Uniform crossover"""
193 |     a, b, x, color = crossover_setup()
194 | 
195 |     # Uniform Crossover (taken directly from ch09.py)
196 |     child = np.copy(a)
197 |     for i in range(len(a)):
198 |         if np.random.rand() < 0.5:
199 |             child[i] = b[i]
200 |     plt.scatter(x, 0.0*x - 0.3, color=color(child))
201 |     plt.tick_params(axis="x", which="both", bottom=False)
202 |     plt.xticks([])
203 |     plt.title("Figure 9.9", y=0.9)
204 |     plt.show()
205 | 
206 | 
207 | def crossover_setup():
208 |     n = 45
209 |     x = np.arange(n)
210 |     a, b = np.zeros(n), np.ones(n)
211 |     def color(x): return ['tab:red' if x_i == 1 else 'tab:blue' for x_i in x]
212 | 
213 |     plt.figure(figsize=(10, 2.0))
214 |     plt.scatter(x, 0.0*x, color=color(a))
215 |     plt.scatter(x, 0.0*x - 0.15, color=color(b))
216 |     plt.ylim(-0.5, 0.2)
217 |     plt.yticks([0.0, -0.15, -0.3], labels=["parent A", "parent B", "child"])
218 |     plt.tick_params(axis="y", which="both", left=False)
219 |     plt.gca().spines[['left', 'bottom', 'right', 'top']].set_visible(False)
220 | 
221 |     return a, b, x, color
222 | 
223 | 
224 | def figure_9_10():
225 |     """
226 |     Figure 9.10: Mutation for binary string chromosomes gives each bit a
227 |     small probability of flipping.
228 |     """
229 |     n = 45
230 |     lam = 1/n
231 |     x = np.arange(n)
232 |     before = np.zeros(n).astype(bool)
233 |     after = np.array([~v if np.random.rand() < lam else v for v in before])
234 |     def color(x): return ['lightgreen' if x_i == 1 else 'tab:blue' for x_i in x]
235 | 
236 |     plt.figure(figsize=(10, 1.5))
237 |     plt.scatter(x, 0.0*x, color=color(before))
238 |     plt.scatter(x, 0.0*x - 0.15, color=color(after))
239 |     plt.ylim(-0.35, 0.2)
240 |     plt.xticks([])
241 |     plt.yticks([0.0, -0.15], labels=["before mutation", "after mutation"])
242 |     plt.tick_params(axis="both", which="both", left=False, bottom=False)
243 |     plt.gca().spines[['left', 'bottom', 'right', 'top']].set_visible(False)
244 |     plt.title("Figure 9.10")
245 |     plt.tight_layout()
246 |     plt.show()
247 | 
248 | 
249 | # TODO - Figure 9.11
250 | # TODO - Figure 9.13
251 | # TODO - Figure 9.14
252 | # TODO - Figure 9.15
253 | # TODO - Figure 9.16
254 | 


--------------------------------------------------------------------------------
/src/ch14.py:
--------------------------------------------------------------------------------
  1 | """Chapter 14: Surrogate Models"""
  2 | 
  3 | import numpy as np
  4 | 
  5 | from itertools import product
  6 | from typing import Callable
  7 | 
  8 | 
  9 | def design_matrix(X: np.ndarray) -> np.ndarray:
 10 |     """A method for constructing a design matrix from a list of design points `X`"""
 11 |     m = len(X)
 12 |     return np.hstack([np.ones((m, 1)), X])
 13 | 
 14 | 
 15 | def linear_regression(X: np.ndarray, y: np.ndarray) -> Callable[[np.ndarray], float | np.ndarray]:
 16 |     """
 17 |     A method for fitting a surrogate model using linear regression to a list of
 18 |     design points `X` and a vector of objective function values `y`.
 19 |     """
 20 |     theta = np.pinv(design_matrix(X)) @ y
 21 |     return lambda x: np.dot(x, theta[1:]) + theta[0]
 22 | 
 23 | 
 24 | def regression(X: np.ndarray,
 25 |                y: np.ndarray,
 26 |                bases: list[Callable[[np.ndarray], float]],
 27 |                lam: float = 0.0) -> Callable[[np.ndarray], float | np.ndarray]:
 28 |     """
 29 |     A method for fitting a surrogate model to a list of design points `X` and
 30 |     corresponding objective function values `y` using regression with basis
 31 |     functions contained in the `bases` list.
 32 | 
 33 |     `lam` is an optional smoothing term, for regression in the presence of noise.
 34 |     """
 35 |     B = np.array([[b(x) for b in bases] for x in X])
 36 |     theta = np.linalg.solve(B.T @ B + lam * np.eye(len(bases)), B.T @ y)
 37 |     return lambda x: np.sum([theta[i] * bases[i](x) for i in range(len(theta))], axis=-1)
 38 | 
 39 | 
 40 | def polynomial_bases_1d(i: int, k: int) -> list[Callable[[np.ndarray], float]]:
 41 |     """
 42 |     A method for constructing a list of polynomial basis functions up to a degree `k`
 43 |     for the `i`th component of a design point.
 44 |     """
 45 |     return [lambda x: x[i]**p for p in range(k + 1)]
 46 | 
 47 | 
 48 | def polynomial_bases(n: int, k: int) -> list[Callable[[np.ndarray], float]]:
 49 |     """
 50 |     A method for constructing a list of `n`-dimensional polynomial bases for
 51 |     terms up to degree `k`.
 52 |     """
 53 |     bases = [polynomial_bases_1d(i, k) for i in range(n)]
 54 |     terms = []
 55 |     for ks in product(*[range(k + 1) for i in range(n)]):
 56 |         if sum(ks) <= k:
 57 |             terms.append(lambda x, ks=ks: np.prod([b[j](x) for (j, b) in zip(ks, bases)]))
 58 |     return terms
 59 | 
 60 | 
 61 | def sinusoidal_bases_1d(j: int, k: int, a: np.ndarray, b: np.ndarray) -> list[Callable[[np.ndarray], float]]:
 62 |     """
 63 |     Produces a list of sinusoidal basis function up to degree `k` for the `i`th
 64 |     component of the design vector given lower bound `a` and upper bound `b`.
 65 |     """
 66 |     T = b[j] - a[j]
 67 |     bases = [lambda x: 0.5]
 68 |     for i in range(1, k + 1):
 69 |         bases.append(lambda x: np.sin(2*np.pi*i*x[j]/T))
 70 |         bases.append(lambda x: np.cos(2*np.pi*i*x[j]/T))
 71 | 
 72 | 
 73 | def sinusoidal_bases(k: int, a: np.ndarray, b: np.ndarray) -> list[Callable[[np.ndarray], float]]:
 74 |     """
 75 |     Produces all sinusoidal base function combinations up to degree `k` for
 76 |     lower-bound vector `a` and upper-bound vector `b`.
 77 |     """
 78 |     n = len(a)
 79 |     bases = [sinusoidal_bases_1d(i, k, a, b) for i in range(n)]
 80 |     terms = []
 81 |     for ks in product(*[range(2*k + 1) for i in range(n)]):
 82 |         powers = [(k + 1) // 2 for k in ks]
 83 |         if sum(powers) <= k:
 84 |             terms.append(lambda x, ks=ks: np.prod([b[j][x] for (j, b) in zip(ks, bases)]))
 85 |     return terms
 86 | 
 87 | 
 88 | def radial_bases(psi: Callable[[float], float], C: np.ndarray, p: float = 2) -> list[Callable[[np.ndarray], float]]:
 89 |     """
 90 |     A method for obtaining a list of basis functions given a radial basis
 91 |     function `psi`, a list of centers `C`, and an L_p norm parameter p`.
 92 |     """
 93 |     return [lambda x: psi(np.linalg.norm(x - c, p)) for c in C]
 94 | 
 95 | 
 96 | class TrainTest():
 97 |     """
 98 |     A utility type for training a model and then validating it on a metric.
 99 |     Here, `train` and `test` are arrays of indices into the training data.
100 |     """
101 |     def __init__(self, train: np.ndarray, test: np.ndarray):
102 |         self.train = train
103 |         self.test = test
104 | 
105 | 
106 | def train_and_validate(X: np.ndarray,
107 |                        y: np.ndarray,
108 |                        tt: TrainTest,
109 |                        fit: Callable[[np.ndarray, np.ndarray], Callable[[np.ndarray], float]],
110 |                        metric: Callable[[Callable[[np.ndarray], float], np.ndarray, np.ndarray], float]) -> float:
111 |     """
112 |     A utility method for training a model and then validating it on a metric.
113 |     Here, `X` is a list of design points, `y` is the vector of corresponding
114 |     function evaluations, `tt` is a train-test partition, `fit` is a model
115 |     fitting function, and `metric` evaluates a model on a test set to produce an
116 |     estimate of generalization error.
117 |     """
118 |     model = fit(X[tt.train], y[tt.train])
119 |     return metric(model, X[tt.test], y[tt.test])
120 | 
121 | 
122 | def holdout_partition(m: int, h: int = None) -> TrainTest:
123 |     """
124 |     A method for randomly partitioning `m` data samples into training and
125 |     holdout sets, where `h` samples are assigned to the holdout set.
126 |     """
127 |     h = m // 2 if h is None else h
128 |     p = np.random.permutation(m)
129 |     train = p[h:]
130 |     holdout = p[:h]
131 |     return TrainTest(train, holdout)
132 | 
133 | 
134 | def random_subsampling(X: np.ndarray,
135 |                        y: np.ndarray,
136 |                        fit: Callable[[np.ndarray, np.ndarray], Callable[[np.ndarray], float]],
137 |                        metric: Callable[[Callable[[np.ndarray], float], np.ndarray, np.ndarray], float],
138 |                        h: int = None,
139 |                        k_max: int = 10) -> float:
140 |     """
141 |     The random subsampling method used to obtain mean and standard deviation
142 |     estimates for model generalization error using `k_max` runs of the holdout
143 |     method.
144 |     """
145 |     m = len(X)
146 |     return np.mean([train_and_validate(X, y, holdout_partition(m, h), fit, metric) for _ in range(k_max)])
147 | 
148 | 
149 | def k_fold_cross_validation_sets(m: int, k: int) -> list[TrainTest]:
150 |     """
151 |     Constructs the sets needed for `k`-fold cross validation on `m` samples,
152 |     with `k` <= `m`.
153 |     """
154 |     perm = np.random.permutation(m)
155 |     sets = []
156 |     for i in range(k):
157 |         validate = perm[i:m:k]
158 |         train = perm[np.setdiff1d(range(m), range(i, m, k))]
159 |         sets.append(TrainTest(train, validate))
160 |     return sets
161 |     
162 | 
163 | def multiset_validation_estimate(X: np.ndarray,
164 |                                  y: np.ndarray,
165 |                                  sets: list[TrainTest],
166 |                                  fit: Callable[[np.ndarray, np.ndarray], Callable[[np.ndarray], float]],
167 |                                  metric: Callable[[Callable[[np.ndarray], float], np.ndarray, np.ndarray], float]) -> float:
168 |     """
169 |     Computes the mean of the generalization error estimate by training and
170 |     validating on the list of train-validate sets contained in `sets`. The other
171 |     variables are the list of design points `X`, the corresponding objective
172 |     function values `y`, a function `fit` that trains a surrogate model, and a
173 |     function `metric` that evaluates a model on a data set.
174 | 
175 |     NOTE: Works for Cross-Validation sets and Bootstrap sets
176 |     """
177 |     return np.mean([train_and_validate(X, y, tt, fit, metric) for tt in sets])
178 | 
179 | 
180 | def bootstrap_sets(m: int, b: int) -> list[TrainTest]:
181 |     """A method for obtaining `b` bootstrap samples, each for a data set of size `m`"""
182 |     return [TrainTest(np.random.randint(m, size=m), np.arange(m)) for i in range(b)]
183 | 
184 | 
185 | def leave_one_out_bootstrap_estimate(X: np.ndarray,
186 |                                      y: np.ndarray,
187 |                                      sets: list[TrainTest],
188 |                                      fit: Callable[[np.ndarray, np.ndarray], Callable[[np.ndarray], float]],
189 |                                      metric: Callable[[Callable[[np.ndarray], float], np.ndarray, np.ndarray], float]) -> float:
190 |     """
191 |     A method for computing the leave-one-out bootstrap generalization error
192 |     estimate using the train-validate sets `sets`. The other variables are the
193 |     list of design points `X`, the corresponding objective function values `y`,
194 |     a function `fit` that trains a surrogate model, and a function `metric` that
195 |     evaluates a model on a data set.
196 |     """
197 |     m, b = len(X), len(sets)
198 |     error = 0.0
199 |     models = [fit(X[tt.train], y[tt.train]) for tt in sets]
200 |     for j in range(m):
201 |         c = 0
202 |         delta = 0.0
203 |         for i in range(b):
204 |             if j not in sets[i].train:
205 |                 c += 1
206 |                 delta += metric(models[i], np.array([X[j]]), np.array([y[j]]))
207 |         error += delta / c
208 |     return error / m
209 | 
210 | 
211 | def bootstrap_632_estimate(X: np.ndarray,
212 |                            y: np.ndarray,
213 |                            sets: list[TrainTest],
214 |                            fit: Callable[[np.ndarray, np.ndarray], Callable[[np.ndarray], float]],
215 |                            metric: Callable[[Callable[[np.ndarray], float], np.ndarray, np.ndarray], float]) -> float:
216 |     """
217 |     A method for obtaining the 0.632 bootstrap estimate for data points `X`,
218 |     objective function values `y`, fitting function `fit`, and
219 |     metric function `metric`.
220 |     """
221 |     eps_loob = leave_one_out_bootstrap_estimate(X, y, sets, fit, metric)
222 |     eps_boot = multiset_validation_estimate(X, y, sets, fit, metric)
223 |     return 0.632 * eps_loob + 0.368 * eps_boot
224 | 


--------------------------------------------------------------------------------
/src/ch19.py:
--------------------------------------------------------------------------------
  1 | """Chapter 19: Discrete Optimization"""
  2 | 
  3 | import networkx as nx
  4 | import numpy as np
  5 | 
  6 | from itertools import combinations
  7 | from queue import PriorityQueue
  8 | 
  9 | from ch11 import LinearProgram
 10 | from convenience import normalize
 11 | 
 12 | 
 13 | class MixedIntegerProgram():
 14 |     """
 15 |     A mixed integer linear program type that reflects the following equation:
 16 | 
 17 |     minimize    c'x
 18 |     subject to: Ax = b
 19 |                 x >= 0
 20 |                 x_D \in Z^||D||
 21 | 
 22 |     Here, `D` is the set of design indices constrained to be discrete.
 23 |     """
 24 |     def __init__(self, A: np.ndarray, b: np.ndarray, c: np.ndarray, D: np.ndarray):
 25 |         self.A = A
 26 |         self.b = b
 27 |         self.c = c
 28 |         self.D = D
 29 | 
 30 |     def relax(self) -> LinearProgram:
 31 |         """A method for relaxing a mixed integer linear program into a linear program"""
 32 |         return LinearProgram(self.A, self.b, self.c)
 33 |     
 34 |     def round(self) -> np.ndarray:
 35 |         """
 36 |         A method for solving a mixed integer linear program by rounding.
 37 |         The solution obtained by rounding may be suboptimal or infeasible.
 38 |         """
 39 |         x = self.relax().minimize()
 40 |         for i in self.D:
 41 |             x[i] = round(x[i])
 42 |         return x
 43 | 
 44 |     def is_totally_unimodular(self) -> bool:
 45 |         """Method for determining whether a mixed integer program is totally unimodular"""
 46 |         return is_totally_unimodular(self.A) and\
 47 |                np.all(isintegral(self.b)) and np.all(isintegral(self.c))
 48 | 
 49 | 
 50 | def isintegral(x: float | np.ndarray, eps=1e-10) -> bool | np.ndarray:
 51 |     """Returns true if the given value is integral"""
 52 |     return np.abs(np.round(x) - x) <= eps
 53 | 
 54 | 
 55 | def is_totally_unimodular(A: np.ndarray) -> bool:
 56 |     """Method for determining whether matrices `A` are totally unimodular"""
 57 |     # all entries must be in [0, 1, -1]
 58 |     if np.any([a not in [0, -1, 1] for a in A]):
 59 |         return False
 60 |     # brute force check every subdeterminant
 61 |     r, c = A.shape
 62 |     for i in range(1, min(r, c) + 1):
 63 |         for a in combinations(range(r), i):
 64 |             for b in combinations(range(c), i):
 65 |                 B = A[a, b]
 66 |                 if np.linalg.det(B) not in [0, -1, 1]:  # TODO Check this closer (for approximate values)
 67 |                     return False
 68 |     return True
 69 | 
 70 | 
 71 | def frac(x: float):
 72 |     """Returns the fractional part of a number"""
 73 |     return np.modf(x)[0]
 74 | 
 75 | 
 76 | def cutting_plane(MIP: MixedIntegerProgram) -> np.ndarray:
 77 |     """
 78 |     The cutting plane method solves a given mixed integer program `MIP` and
 79 |     returns an optimal design vector. An error is thrown if no feasible solution
 80 |     exists.
 81 |     """
 82 |     LP = MIP.relax()
 83 |     x, b_inds, v_inds = LP.minimize(return_idcs=True)
 84 |     n_orig = len(x)
 85 |     D = np.copy(MIP.D)
 86 |     while not np.all(isintegral(x[D])):
 87 |         AB, AV = LP.A[:, b_inds], LP.A[:, v_inds]
 88 |         Abar = np.linalg.solve(AB, AV)
 89 |         b = 0
 90 |         for i in D:
 91 |             if not isintegral(x[i]):
 92 |                 b += 1
 93 |                 A2 = np.vstack([np.hstack([LP.A, np.zeros((len(LP.A), 1))]),
 94 |                                 np.zeros((1, LP.A.shape[1] + 1))])
 95 |                 A2[-1, -1] = 1
 96 |                 A2[-1, v_inds] = np.floor(Abar[b,:]) - Abar[b,:]
 97 |                 b2 = np.append(LP.b, -frac(x[i]))
 98 |                 c2 = np.append(LP.c, 0)
 99 |                 LP = LinearProgram(A2, b2, c2)
100 |         x, b_inds, v_inds = LP.minimize(return_idcs=True)
101 |     return x[:n_orig]
102 | 
103 | 
104 | def branch_and_bound(MIP: MixedIntegerProgram) -> np.ndarray:
105 |     """
106 |     The branch and bound algorithm for solving a mixed integer program `MIP`.
107 |     More sophisticated implementations will drop variables whose solutions are
108 |     known in order to speed computation.
109 | 
110 |     The `PriorityQueue` type is provided by the Python `queue` library.
111 |     """
112 |     LP = MIP.relax()
113 |     x, y = LP.minimize_lp_and_y()
114 |     n = len(x)
115 |     x_best, y_best, Q = np.copy(x), np.inf, PriorityQueue()
116 |     Q.put((y, (LP, x, y)))
117 |     while not Q.empty():
118 |         LP, x, y = Q.get()
119 |         if np.any(np.isnan(x)) or np.all(isintegral(x[MIP.D])):
120 |             if y < y_best:
121 |                 x_best, y_best = x[:n], y
122 |         else:
123 |             i = np.argmax(np.abs(x[MIP.D] - np.round(x[MIP.D])))  # TODO - Not convinced this gets the right index
124 |             A, b, c = LP.A, LP.b, LP.c
125 |             c2 = np.append(c, 0)
126 |             for r in [1, -1]:  # x_i <= floor(x_i), then x_i >= ceil(x_i)
127 |                 A2 = np.vstack([np.hstack([A, np.zeros((len(A), 1))]),
128 |                                 np.array([[j == i for j in range(A.shape[1])] + [r]])])
129 |                 b2 = np.append(b, np.floor(x[i]) if r == 1 else np.ceil(x[i]))
130 |                 LP2 = LinearProgram(A2, b2, c2)
131 |                 x2, y2 = LP2.minimize_lp_and_y()
132 |                 if y2 <= y_best:
133 |                     Q.put((y2, (LP2, x2, y2)))
134 |     return x_best
135 | 
136 | 
137 | def padovan_topdown(n: int, P: dict[int, int] = dict()) -> int:
138 |     """Computing the Padovan sequence using dynamic programming, with the top-down approach"""
139 |     if n not in P:
140 |         P[n] = 1 if n < 3 else padovan_topdown(n - 2, P) + padovan_topdown(n - 3, P)
141 |     return P[n]
142 | 
143 | 
144 | def padovan_bottomup(n: int) -> int:
145 |     """Computing the Padovan sequence using dynamic programming, with the bottom-up approach"""
146 |     P = {0:1, 1:1, 2:1}
147 |     for i in range(3, n + 1):
148 |         P[i] = P[i - 2] + P[i - 3]
149 |     return P[n]
150 | 
151 | 
152 | def knapsack(v: np.ndarray, w: np.ndarray, w_max: float) -> np.ndarray:
153 |     """
154 |     A method for solving the 0-1 knapsack problem with item values `v`,
155 |     integral item weights `w`, and integral capacity `w_max`. Recovering the
156 |     design vector from the cached solutions requires additional iteration.
157 |     """
158 |     n = len(v)
159 |     y = {(0, j): 0.0 for j in range(w_max + 1)}
160 |     for i in range(n):
161 |         for j in range(w_max + 1):
162 |             y[(i, j)] = y[(i - 1, j)] if w[i] > j else max(y[(i - 1, j)], y[(i - 1, j - w[i])] + v[i])
163 |     
164 |     # recover solution
165 |     x, j = np.full(n, False), w_max
166 |     for i in range(n - 1, -1, -1):
167 |         if (w[i] <= j) and (y[(i, j)] - y[(i - 1, j - w[i])] == v[i]):
168 |             # the ith element is in the knapsack
169 |             x[i] = True
170 |             j -= w[i]
171 |     return x
172 | 
173 | class AntColonyOptimization():
174 |     """
175 |     Ant colony optimization, which takes a directed or undirected graph `G`
176 |     from `networkx` and a dictionary of edge tuples ot path lengths `lengths`.
177 |     Ants start at the first node in the graph. Optional parameters include the
178 |     number of ants per iteration `m`, the number of iterations `k_max`, the
179 |     pheromone exponent `alpha`, the prior exponent `beta`, the evaporation
180 |     scalar `rho`, and a dictionary of prior edge weights `eta`.
181 |     """
182 |     def __call__(self,
183 |                  G: nx.Graph | nx.DiGraph,
184 |                  lengths: dict[tuple[int, int], float],
185 |                  m: int = 1000,
186 |                  k_max: int = 100,
187 |                  alpha: float = 1.0,
188 |                  beta: float = 5.0,
189 |                  rho: float = 0.5,
190 |                  eta: dict[tuple[int, int], float] = None) -> list[int]:
191 |         tau = {e: 1.0 for e in G.edges}
192 |         x_best, y_best = [], np.inf
193 |         for k in range(k_max):
194 |             A = self.edge_attractiveness(G, tau, eta, alpha, beta)
195 |             for (e, v) in tau.items():
196 |                 tau[e] = (1 - rho)*v
197 |             for ant in range(m):
198 |                 x_best, y_best = self.run_ant(G, lengths, tau, A, x_best, y_best)
199 |         return x_best
200 | 
201 |     def edge_attractiveness(self,
202 |                             graph: nx.Graph | nx.DiGraph,
203 |                             tau: dict[tuple[int, int], float],
204 |                             eta: dict[tuple[int, int], float],
205 |                             alpha: float = 1.0,
206 |                             beta: float = 5.0) -> dict[tuple[int, int], float]:
207 |         """
208 |         A method for computing the edge attractiveness table given graph `graph`,
209 |         pheromone levels `tau`, prior edge weights `eta`, pheromone exponent `alpha`,
210 |         and prior exponent `beta`.
211 |         """
212 |         A = dict()
213 |         for src in graph:
214 |             neighbors = graph.neighbors(src)
215 |             for dst in neighbors:
216 |                 v = (tau[(src, dst)]**alpha) * (eta[(src, dst)]**beta)
217 |                 A[(src, dst)] = v
218 |         return A
219 | 
220 |     def run_ant(self,
221 |                 G: nx.Graph | nx.DiGraph,
222 |                 lengths: dict[tuple[int, int], float],
223 |                 tau: dict[tuple[int, int], float],
224 |                 A: dict[tuple[int, int], float],
225 |                 x_best: list[int],
226 |                 y_best: float) -> tuple[list[int], float]:
227 |         """
228 |         A method for simulating a single ant on a traveling salesman problem
229 |         in which the ant starts at the first node and attempts to visit each node
230 |         exactly once. Pheromone levels are increased at the end of a successful
231 |         tour. The parameters are the graph `G`, edge lengths `lengths`, pheromone
232 |         levels `tau`, edge attractiveness `A`, the best solution found thus far
233 |         `x_best`, and its value `y_best`.
234 |         """
235 |         x = [1]
236 |         while len(x) < len(G):
237 |             src = x[-1]
238 |             neighbors = np.setdiff1d(G.neighbors(src), x).tolist()
239 |             if len(neighbors) == 0:  # ant got stuck
240 |                 return (x_best, y_best)
241 |             
242 |             attractiveness = [A[(src, dst)] for dst in neighbors]
243 |             x.append(neighbors[np.random.choice(len(neighbors), p=normalize(attractiveness, 1))])
244 |         
245 |         l = np.sum([lengths[(x[i - 1], x[i])] for i in range(1, len(x))])
246 |         for i in range(1, len(x)):
247 |             tau[(x[i - 1], x[i])] += 1/l
248 |         if l < y_best:
249 |             return (x, l)
250 |         return (x_best, y_best)
251 | 


--------------------------------------------------------------------------------
/src/ch07.py:
--------------------------------------------------------------------------------
  1 | """Chapter 7: Direct Methods"""
  2 | 
  3 | import numpy as np
  4 | 
  5 | from abc import ABC, abstractmethod
  6 | from collections import OrderedDict
  7 | from queue import PriorityQueue
  8 | from typing import Callable
  9 | 
 10 | from ch04 import line_search
 11 | 
 12 | 
 13 | def basis(i: int, n: int) -> np.ndarray:
 14 |     """A function for constructing the `i`th basis vector (zero-indexed) of length `n`"""
 15 |     return np.array([1.0 if k == i else 0.0 for k in range(n)])
 16 | 
 17 | 
 18 | def cyclic_coordinate_descent(f: Callable[[np.ndarray], float], 
 19 |                               x: np.ndarray,
 20 |                               eps: float,
 21 |                               with_acceleration: bool = False) -> np.ndarray:
 22 |     """
 23 |     The cyclic coordinate descent method (with or without acceleration) takes as
 24 |     input the objective function `f` and a starting point `x`, and it runs until
 25 |     the step size over a full cycle is less than a given tolerance `eps`.
 26 |     """
 27 |     delta, n = np.inf, len(x)
 28 |     while delta > eps:
 29 |         x_prev = x.copy()
 30 |         for i in range(n):
 31 |             d = basis(i, n)
 32 |             x = line_search(f, x, d)
 33 |         if with_acceleration:
 34 |             x = line_search(f, x, x - x_prev)  # acceleration step
 35 |         delta = np.linalg.norm(x - x_prev)
 36 |     return x
 37 | 
 38 | 
 39 | def powell(f: Callable[[np.ndarray], float], x: np.ndarray, eps: float) -> np.ndarray:
 40 |     """
 41 |     Powell's method, which takes the objective function `f`, a starting point `x`,
 42 |     and a tolerance `eps`.
 43 |     """
 44 |     n = len(x)
 45 |     U = np.eye(n)
 46 |     delta = np.inf
 47 |     while delta > eps:
 48 |         x_prime = x.copy()
 49 |         for i in range(n):
 50 |             d = U[i]
 51 |             x_prime = line_search(f, x_prime, d)
 52 |         for i in range(n - 1):
 53 |             U[i] = U[i + 1]
 54 |         U[n - 1] = d = x_prime - x
 55 |         x_prime = line_search(f, x_prime, d)
 56 |         delta = np.linalg.norm(x_prime - x)
 57 |         x = x_prime
 58 |     return x
 59 | 
 60 | 
 61 | def hooke_jeeves(f: Callable[[np.ndarray], float], 
 62 |                  x: np.ndarray, 
 63 |                  alpha: float, 
 64 |                  eps: float, 
 65 |                  gamma: float = 0.5) -> np.ndarray:
 66 |     """
 67 |     The Hooke-Jeeves method, which takes the target function `f`, a starting point
 68 |     `x`, a starting step size `alpha`, a tolerance `eps`, and a step decay `gamma`.
 69 |     The method runs until the step size is less than `eps` and the points sampled
 70 |     along the coordinate directions do not provide an improvement. 
 71 | 
 72 |     Based on the implementation from A.F. Kaupe Jr, "Algorithm 178: Direct Search,"
 73 |     Communications of the ACM, vol. 6, no. 6, pp. 313-314, 1963.
 74 |     """
 75 |     y, n = f(x), len(x)
 76 |     while alpha > eps:
 77 |         improved = False
 78 |         x_best, y_best = x, y
 79 |         for i in range(n):
 80 |             for sgn in [-1, 1]:
 81 |                 x_prime = x + sgn*alpha*basis(i, n)
 82 |                 y_prime = f(x_prime)
 83 |                 if y_prime < y_best:
 84 |                     x_best, y_best, improved = x_prime, y_prime, True
 85 |         x, y = x_best, y_best
 86 |         if not improved:
 87 |             alpha *= gamma
 88 |     return x
 89 | 
 90 | 
 91 | def generalized_pattern_search(f: Callable[[np.ndarray], float],
 92 |                                x: np.ndarray,
 93 |                                alpha: float,
 94 |                                D: np.ndarray,
 95 |                                eps: float,
 96 |                                gamma: float = 0.5) -> np.ndarray:
 97 |     """
 98 |     Generalized pattern search, which takes the target function `f`, a starting
 99 |     point `x`, a starting step size `alpha`, a set of search directions `D`, a
100 |     tolerance `eps`, and a step decay `gamma`. The method runs until the step
101 |     size is less than `eps` and the points sampled along the coordinate directions
102 |     do not provide an improvement.
103 |     """
104 |     y = f(x)
105 |     while alpha > eps:
106 |         improved = False
107 |         for i, d in enumerate(D):
108 |             x_prime = x + alpha * d
109 |             y_prime = f(x_prime)
110 |             if y_prime < y:
111 |                 x, y, improved = x_prime, y_prime, True
112 |                 D = np.insert(np.delete(D, i, axis=0), 0, d, axis=0)
113 |                 break
114 |         if not improved:
115 |             alpha *= gamma
116 |     return x
117 | 
118 | 
119 | def nelder_mead(f: Callable[[np.ndarray], float],
120 |                 S: np.ndarray,
121 |                 eps: float,
122 |                 alpha: float = 1.0,
123 |                 beta: float = 2.0,
124 |                 gamma: float = 0.5) -> np.ndarray:
125 |     """
126 |     The Nelder-Mead simplex method, which takes the objective function `f`, a
127 |     starting simplex `S` consisting of a list of vectors, and a tolerance `eps`.
128 |     The Nelder-Mead parameters can be specified as well and default to recommended
129 |     values.
130 |     """
131 |     delta, y_arr = np.inf, np.apply_along_axis(f, 1, S)
132 |     while delta > eps:
133 |         p = np.argsort(y_arr)         # sort lowest to highest
134 |         S, y_arr = S[p], y_arr[p]
135 |         xl, yl = S[0], y_arr[0]       # lowest
136 |         xh, yh = S[-1], y_arr[-1]     # highest
137 |         xs, ys = S[-2], y_arr[-2]     # second-highest
138 |         xm = np.mean(S[:-1], axis=0)  # centroid
139 |         xr = xm + alpha * (xm - xh)   # reflection point
140 |         yr = f(xr)
141 | 
142 |         if yr < yl:
143 |             xe = xm + beta * (xr - xm)   # expansion point
144 |             ye = f(xe)
145 |             S[-1], y_arr[-1] = (xe, ye) if ye < yr else (xr, yr)
146 |         elif yr >= ys:
147 |             if yr < yh:
148 |                 xh, yh, S[-1], y_arr[-1] = xr, yr, xr, yr
149 |             xc = xm + gamma * (xh - xm)  # contraction point
150 |             yc = f(xc)
151 |             if yc > yh:
152 |                 for i in range(1, len(y_arr)):
153 |                     S[i] = (S[i] + xl) / 2
154 |                     y_arr[i] = f(S[i])
155 |             else:
156 |                 S[-1], y_arr[-1] = xc, yc
157 |         else:
158 |             S[-1], y_arr[-1] = xr, yr
159 |         
160 |         delta = np.std(y_arr)
161 |     return S[np.argmin(y_arr)]
162 | 
163 | 
164 | def direct(f: Callable[[np.ndarray], float],
165 |            a: np.ndarray,
166 |            b: np.ndarray,
167 |            eps: float,
168 |            k_max: int) -> np.ndarray:
169 |     """
170 |     DIRECT, which takes the multidimensional objective function `f`, vector of
171 |     lower bounds `a`, vector of upper bounds `b`, tolerance parameter `eps`, and
172 |     number of iterations `k_max`. It returns the best coordinate.
173 |     """
174 |     g = reparametrize_to_unit_hypercube(f, a, b)
175 |     intervals = Intervals()
176 |     n = len(a)
177 |     c = np.full(n, 0.5)
178 |     interval = Interval(c, g(c), np.zeros(n))
179 |     intervals.add_interval(interval)
180 |     c_best, y_best = np.copy(interval.c), interval.y
181 | 
182 |     for _ in range(k_max):
183 |         S = intervals.get_opt_intervals(eps, y_best)  # TODO - Why is y_best needed?
184 |         to_add = []
185 |         for interval in S:
186 |             to_add.extend(interval.divide(g))
187 |             intervals[interval.vertex_dist()].get()
188 |         for interval in to_add:
189 |             intervals.add_interval(interval)
190 |             if interval.y < y_best:
191 |                 c_best, y_best = np.copy(interval.c), interval.y
192 |     
193 |     return rev_unit_hypercube_parametrization(c_best, a, b)
194 | 
195 | 
196 | def rev_unit_hypercube_parametrization(x: np.ndarray, a: np.ndarray, b: np.ndarray) -> np.ndarray:
197 |     return x * (b - a) + a
198 | 
199 | 
200 | def reparametrize_to_unit_hypercube(f: Callable[[np.ndarray], float], a: np.ndarray, b: np.ndarray) -> Callable[[np.ndarray], float]:
201 |     """
202 |     A function that creates a function defined over the unit hypercube that
203 |     is a reparametrized version of the function `f` defined over the
204 |     hypercube with lower and upper bounds `a` and `b`.
205 |     """
206 |     Delta = b - a
207 |     return lambda x: f(x * Delta + a)
208 | 
209 | 
210 | class Interval():
211 |     """
212 |     `Interval` has three fields: the interval center `c`, the center point
213 |     `y = f(c)`, and the number of divisions in each dimension `depths`.
214 |     """
215 |     def __init__(self, c: np.ndarray, y: float, depths: np.ndarray):
216 |         self.c = c
217 |         self.y = y
218 |         self.depths = depths
219 | 
220 |     def __lt__(self, other: 'Interval'):
221 |         return self.y < other.y
222 |     
223 |     def min_depth(self):
224 |         return np.min(self.depths)
225 | 
226 |     def vertex_dist(self):
227 |         return np.linalg.norm(0.5 * (3.0**(-self.depths)))
228 | 
229 |     def divide(self, f: Callable[[np.ndarray], float]) -> list['Interval']:
230 |         """The `divide` routine for dividing an interval, where `f` is the
231 |         objective function and `self` is the interval to be divided. It
232 |         returns a list of the resulting smaller intervals."""
233 |         c, d, n = self.c, self.min_depth(), len(self.c)
234 |         dirs = np.where(self.depths == d)[0]
235 |         cs = np.array([[c + (3.0**(-d-1)) * basis(i, n), 
236 |                         c - (3.0**(-d-1)) * basis(i, n)] for i in dirs])
237 |         vs = np.apply_along_axis(f, 2, cs)
238 |         minvals = np.min(vs, axis=1)
239 |         minvals = minvals[0] if n == 1 else minvals
240 | 
241 |         intervals = []
242 |         depths = np.copy(self.depths)
243 |         for j in np.argsort(minvals):
244 |             depths[dirs[j]] += 1
245 |             C, V = cs[j], vs[j]
246 |             intervals.append(Interval(C[0], V[0], np.copy(depths)))
247 |             intervals.append(Interval(C[1], V[1], np.copy(depths)))
248 |         intervals.append(Interval(c, self.y, np.copy(depths)))
249 |         return intervals
250 | 
251 | class Intervals(OrderedDict[float, PriorityQueue[tuple[float, Interval]]]):
252 |     """The data structure used in DIRECT"""
253 |     def add_interval(self, interval: Interval):
254 |         """Inserts a new `Interval` into the data structure."""
255 |         d = interval.vertex_dist()
256 |         if d not in self.keys():
257 |             self[d] = PriorityQueue()
258 |         self[d].put((interval.y, interval))
259 | 
260 |     def get_opt_intervals(self, eps: float, y_best: float) -> list[Interval]:  # TODO - y_best isn't used?
261 |         """A routine for obtaining the potentially optimal intervals, where `eps`
262 |         is a tolerance parameter and `y_best` is the best function evaluation."""
263 |         stack = []
264 |         for (x, pq) in self.items():
265 |             if not pq.empty():
266 |                 interval = pq.queue[0][1]
267 |                 y = interval.y
268 | 
269 |                 while len(stack) > 1:
270 |                     interval1 = stack[-1]
271 |                     interval2 = stack[-2]
272 |                     x1, y1 = interval1.vertex_dist(), interval1.y
273 |                     x2, y2 = interval2.vertex_dist(), interval2.y
274 |                     l = (y2 - y) / (x2 - x)
275 |                     if (y1 <= l * (x1 - x) + y + eps): # TODO: and (y1 <= l * x1 + y_best - eps*np.abs(y_best)):
276 |                         break
277 |                     stack.pop()  # remove previous interval
278 |                 
279 |                 if (len(stack) != 0) and (interval.y > stack[-1].y + eps):
280 |                     continue  # skip new interval
281 | 
282 |                 stack.append(interval)  # add new interval
283 |         return stack
284 | 


--------------------------------------------------------------------------------
/src/ch09.py:
--------------------------------------------------------------------------------
  1 | """Chapter 9: Population Methods"""
  2 | 
  3 | import numpy as np
  4 | 
  5 | from abc import ABC, abstractmethod
  6 | from typing import Callable
  7 | from scipy.stats import cauchy, multivariate_normal, rv_continuous
  8 | 
  9 | from convenience import normalize
 10 | 
 11 | 
 12 | def rand_population_uniform(m: int, a: np.ndarray, b: np.ndarray) -> np.ndarray:
 13 |     """
 14 |     A method for sampling an initial population of `m` design points over a
 15 |     uniform hyperrectangle with lower-bound vector `a` and upper-bound vector `b`.
 16 |     """
 17 |     d = len(a)
 18 |     return a + np.random.rand(m, d) * (b - a)
 19 | 
 20 | 
 21 | def rand_population_normal(m: int, mu: np.ndarray, Sigma: np.ndarray) -> np.ndarray:
 22 |     """
 23 |     A method for sampling an initial population of `m` design points using a
 24 |     multivariate normal distribution with mean `mu` and covariance `Sigma`.
 25 |     """
 26 |     D = multivariate_normal(mu, Sigma)
 27 |     return D.rvs(m)
 28 | 
 29 | 
 30 | def rand_population_cauchy(m: int, mu: np.ndarray, sigma: np.ndarray) -> np.ndarray:
 31 |     """
 32 |     A method for sampling an initial population of `m` design points using a
 33 |     Cauchy distribution with location `mu` and scale `sigma` for each dimension.
 34 |     The location and scale are analogous to the mean and standard deviation used
 35 |     in a normal distribution.
 36 |     """
 37 |     n = len(mu)
 38 |     return np.array([[cauchy(mu[j], sigma[j]).rvs() for j in range(n)] for _ in range(m)])
 39 | 
 40 | 
 41 | def genetic_algorithm(f: Callable[[np.ndarray], float],
 42 |                       population: np.ndarray,
 43 |                       k_max: int,
 44 |                       S: 'SelectionMethod',
 45 |                       C: 'CrossoverMethod',
 46 |                       M: 'MutationMethod') -> np.ndarray:
 47 |     """
 48 |     The genetic algorithm, which takes an objective function `f`, an initial
 49 |     population `population`, number of iterations `k_max`, a `SelectionMethod` `S`,
 50 |     a `CrossoverMethod` `C`, and a `MutationMethod` `M`.
 51 |     """
 52 |     for _ in range(k_max):
 53 |         parents = S.select(np.apply_along_axis(f, 1, population))
 54 |         children = [C.crossover(population[p[0]], population[p[1]]) for p in parents]
 55 |         population = [M.mutate(child) for child in children]
 56 |     return population[np.argmin(np.apply_along_axis(f, 1, population))]
 57 | 
 58 | 
 59 | def rand_population_binary(m: int, n: int) -> np.ndarray:
 60 |     """
 61 |     A method for sampling random starting populations of `m` bit-string
 62 |     chromosomes of length `n`.
 63 |     """
 64 |     return np.random.randint(2, size=(m, n), dtype=bool)
 65 | 
 66 | 
 67 | class SelectionMethod(ABC):
 68 |     """
 69 |     Several selection methods for genetic algorithms. Calling selection with a
 70 |     `SelectionMethod` and the list of objective function values `y` will produce
 71 |     a list of parental pairs.
 72 |     """
 73 |     @abstractmethod
 74 |     def select(self, y: np.ndarray) -> np.ndarray:
 75 |         pass
 76 | 
 77 | 
 78 | class TruncationSelection(SelectionMethod):
 79 |     def __init__(self, k: int):
 80 |         self.k = k  # top k to keep
 81 | 
 82 |     def select(self, y: np.ndarray) -> np.ndarray:
 83 |         p = np.argsort(y)
 84 |         return np.array([p[np.random.choice(self.k, 2)] for _ in y])
 85 | 
 86 | 
 87 | class TournamentSelection(SelectionMethod):
 88 |     def __init__(self, k: int):
 89 |         self.k = k  # top k to keep
 90 | 
 91 |     def select(self, y: np.ndarray) -> np.ndarray:
 92 |         def getparent():
 93 |             p = np.random.permutation(len(y))
 94 |             return p[np.argmin(y[p[:self.k]])]
 95 |         return np.array([[getparent(), getparent()] for _ in y])
 96 | 
 97 | 
 98 | class RouletteWheelSelection(SelectionMethod):
 99 |     def select(self, y: np.ndarray) -> np.ndarray:
100 |         y = np.max(y) - y
101 |         p = normalize(y, ord=1)
102 |         return np.random.choice(len(y), size=(len(y), 2), p=p)
103 | 
104 | 
105 | class CrossoverMethod(ABC):
106 |     """
107 |     Several crossover methods for genetic algorithms. Calling crossover with a
108 |     `CrossoverMethod` and two parents `a` and `b` will produce a child
109 |     chromosome that contains a mixture of the parents' genetic codes.
110 |     """
111 |     @abstractmethod
112 |     def crossover(self, a: np.ndarray, b: np.ndarray) -> np.ndarray:
113 |         pass
114 | 
115 | 
116 | class SinglePointCrossover(CrossoverMethod):
117 |     """Works for both binary string and real-valued chromosomes"""
118 |     def crossover(self, a: np.ndarray, b: np.ndarray) -> np.ndarray:
119 |         i = np.random.randint(len(a))
120 |         return np.concatenate((a[:i], b[i:]))
121 | 
122 | 
123 | class TwoPointCrossover(CrossoverMethod):
124 |     """Works for both binary string and real-valued chromosomes"""
125 |     def crossover(self, a: np.ndarray, b: np.ndarray) -> np.ndarray:
126 |         n = len(a)
127 |         i, j = np.random.randint(n, size=2)
128 |         if i > j:
129 |             i, j = j, i
130 |         return np.concatenate((a[:i], b[i:j], a[j:]))
131 | 
132 | 
133 | class UniformCrossover(CrossoverMethod):
134 |     """Works for both binary string and real-valued chromosomes"""
135 |     def crossover(self, a: np.ndarray, b: np.ndarray) -> np.ndarray:
136 |         child = np.copy(a)
137 |         for i in range(len(a)):
138 |             if np.random.rand() < 0.5:
139 |                 child[i] = b[i]
140 |         return child
141 | 
142 | 
143 | class InterpolationCrossover(CrossoverMethod):
144 |     """
145 |     A crossover method for real-valued chromosomes which performs linear
146 |     interpolation between the parents.
147 |     """
148 |     def __init__(self, lam: float):
149 |         self.lam = lam  # interpolation parameter
150 | 
151 |     def crossover(self, a: np.ndarray, b: np.ndarray) -> np.ndarray:
152 |         return (1 - self.lam) * a + self.lam * b
153 | 
154 | 
155 | class MutationMethod(ABC):
156 |     @abstractmethod
157 |     def mutate(self, child: np.ndarray) -> np.ndarray:
158 |         pass
159 | 
160 | 
161 | class BitwiseMutation(MutationMethod):
162 |     """
163 |     The bitwise mutation method for binary string chromosomes.
164 |     Here, `lam` is the mutation rate.
165 |     """
166 |     def __init__(self, lam: float):
167 |         self.lam = lam  # mutation rate
168 | 
169 |     def mutate(self, child: np.ndarray) -> np.ndarray:
170 |         return np.array([~v if np.random.rand() < self.lam else v for v in child])
171 | 
172 | 
173 | class GaussianMutation(MutationMethod):
174 |     """
175 |     The Gaussian mutation method for real-valued chromosomes.
176 |     Here, `sigma` is the standard deviation.
177 |     """
178 |     def __init__(self, sigma: float):
179 |         self.sigma = sigma  # standard deviation
180 | 
181 |     def mutate(self, child: np.ndarray) -> np.ndarray:
182 |         return child + np.random.randn(len(child)) * self.sigma
183 | 
184 | 
185 | def differential_evolution(f: Callable[[np.ndarray], float],
186 |                            population: np.ndarray,
187 |                            k_max: int,
188 |                            p: float = 0.5,
189 |                            w: float = 1.0) -> np.ndarray:
190 |     """
191 |     Differential evolution, which takes an objective function `f`, a population
192 |     `population`, a number of iterations `k_max`, a crossover probability `p`,
193 |     and a differential weight `w`. The best individual is returned.
194 |     """
195 |     m, n = population.shape
196 |     for _ in range(k_max):
197 |         for (k, x) in enumerate(population):
198 |             a, b, c = np.random.choice(population, 
199 |                                        p=normalize(np.array([j != k for j in range(m)]), ord=1), 
200 |                                        size=3, replace=False)
201 |             z = a + w * (b - c)
202 |             j = np.random.randint(len(n))
203 |             x_prime = np.array([z[i] if ((i == j) or (np.random.rand() < p)) else x[i] for i in range(n)])
204 |             if f(x_prime) < f(x):
205 |                 x = x_prime
206 |     return population[np.argmin(np.apply_along_axis(f, 1, population))]
207 | 
208 | 
209 | class Particle():
210 |     """
211 |     Each particle in particle swarm optimization has a position `x` and velocity
212 |     `v` in design space and keeps track of the best design point found so far,
213 |     `x_best`.
214 |     """
215 |     def __init__(self, x: np.ndarray, v: np.ndarray, x_best: np.ndarray):
216 |         self.x = x
217 |         self.v = v
218 |         self.x_best = x_best
219 | 
220 | 
221 | def particle_swarm_optimization(f: Callable[[np.ndarray], float],
222 |                                 population: list[Particle],
223 |                                 k_max: int,
224 |                                 w: float = 1.0,
225 |                                 c1: float = 1.0,
226 |                                 c2: float = 1.0) -> list[Particle]:
227 |     """
228 |     Particle swarm optimization, which takes an objective function `f`, a list
229 |     of particles `population`, a number of iterations `k_max`, an inertia `w`,
230 |     an momentum coefficients `c1` and `c2`.
231 |     
232 |     The default values are those used by R. Eberhart and J. Kennedy, "A New
233 |     Optimizer Using Particle Swarm Theory," in International Symposium on Micro
234 |     Machine and Human Science, 1995.
235 |     """
236 |     n = len(population[0].x)
237 |     x_best, y_best = np.copy(population[0].x_best), np.inf
238 |     for P in population:
239 |         y = f(P.x)
240 |         if y < y_best:
241 |             x_best, y_best = P.x, y
242 |     for _ in range(k_max):
243 |         for P in population:
244 |             r1, r2 = np.random.rand(n), np.random.rand(n)
245 |             P.x += P.v
246 |             P.v = w*P.v + c1*r1*(P.x_best - P.x) + c2*r2*(x_best - P.x)
247 |             y = f(P.x)
248 |             if y < y_best:
249 |                 x_best, y_best = P.x, y
250 |             if y < f(P.x_best):
251 |                 P.x_best = P.x
252 |     return population
253 | 
254 | 
255 | def firefly(f: Callable[[np.ndarray], float],
256 |             population: np.ndarray,
257 |             k_max: int,
258 |             beta: float = 1.0,
259 |             alpha: float = 0.1,
260 |             brightness: Callable[[float], float] = lambda r: np.exp(-(r**2))) -> np.ndarray:
261 |     """
262 |     The firefly algorithm, which takes an objective function `f`, a population
263 |     `population` consisting of design points, a number of iterations `k_max`,
264 |     a source intensity `beta`, a random walk step size `alpha`, and an intensity
265 |     function `brightness`. The best design point is returned.
266 |     """
267 |     m = len(population[0])
268 |     N = multivariate_normal(np.zeros(m), np.eye(m))
269 |     for _ in range(k_max):
270 |         for a in population:
271 |             for b in population:
272 |                 if f(b) < f(a):
273 |                     r = np.linalg.norm(b - a)
274 |                     a += beta * brightness(r) * (b - a) + alpha * N.rvs()
275 |     return population[np.argmin(np.apply_along_axis(f, 1, population))]
276 | 
277 | 
278 | class Nest():
279 |     def __init__(self, x: np.ndarray, y: float):
280 |         self.x = x  # position
281 |         self.y = y  # value, f(x)
282 | 
283 | 
284 | def cuckoo_search(f: Callable[[np.ndarray], float],
285 |                   population: list[Nest],
286 |                   k_max: int,
287 |                   p_a: float = 0.1,
288 |                   C: rv_continuous = cauchy(0, 1)) -> list[Nest]:
289 |     """
290 |     Cuckoo search, which takes an objective function `f`, an initial set of
291 |     nests `population`, a number of iterations `k_max`, percent of nests to
292 |     abandon `p_a`, and flight distribution `C`. The flight distribution is
293 |     typically a centered Cauchy distribution.
294 |     """
295 |     m, n = len(population), len(population[0].x)
296 |     a = round(m*p_a)
297 |     for _ in range(k_max):
298 |         i, j = np.random.randint(m, size=2)
299 |         x = population[j].x + C.rvs(n)
300 |         y = f(x)
301 |         if y < population[i].y:
302 |             population[i].x = x
303 |             population[i].y = y
304 |     
305 |         p = np.argsort([-nest.y for nest in population])
306 |         for i in range(len(a)):
307 |             j = np.random.randint(m - a) + a
308 |             population[p[i]] = Nest(population[p[j]].x + C.rvs(n), f(population[p[i]].x))
309 |     return population
310 | 


--------------------------------------------------------------------------------