├── src ├── ch20.py ├── figures │ ├── ch06_figures.py │ ├── ch03_figures.py │ ├── ch10_figures.py │ ├── ch05_figures.py │ ├── ch02_figures.py │ ├── ch04_figures.py │ ├── ch07_figures.py │ ├── ch08_figures.py │ └── ch09_figures.py ├── examples │ ├── ch20_examples.py │ ├── ch10_examples.py │ ├── ch11_examples.py │ ├── ch05_examples.py │ ├── ch06_examples.py │ ├── ch09_examples.py │ ├── ch03_examples.py │ ├── ch04_examples.py │ ├── ch01_examples.py │ ├── ch07_examples.py │ ├── ch02_examples.py │ ├── ch21_examples.py │ └── ch08_examples.py ├── exercises │ ├── ch10_exercises.py │ ├── ch21_exercises.py │ ├── ch03_exercises.py │ ├── ch05_exercises.py │ └── ch06_exercises.py ├── tests │ ├── ch10 │ │ └── test_constrained_optimization_methods.py │ ├── ch09 │ │ ├── test_genetic_methods.py │ │ └── test_population_methods.py │ ├── ch11 │ │ └── test_linear_program.py │ ├── ch02 │ │ └── test_finite_difference_methods.py │ ├── ch21 │ │ └── test_gauss_seidel.py │ ├── ch07 │ │ ├── test_direct.py │ │ └── test_direct_methods.py │ ├── ch04 │ │ └── test_local_descent_methods.py │ ├── test_testfunctions.py │ ├── ch06 │ │ └── test_second_order_methods.py │ ├── ch08 │ │ └── test_stochastic_methods.py │ ├── ch03 │ │ └── test_bracketing_methods.py │ └── ch05 │ │ └── test_first_order_methods.py ├── ch21.py ├── ch02.py ├── ch10.py ├── ch15.py ├── convenience.py ├── ch18.py ├── ch06.py ├── ch12.py ├── ch16.py ├── ch11.py ├── ch04.py ├── ch03.py ├── ch13.py ├── ch05.py ├── ch14.py ├── ch19.py ├── ch07.py └── ch09.py ├── requirements.txt ├── LICENSE ├── .github └── workflows │ └── python-package.yml └── README.md /src/ch20.py: -------------------------------------------------------------------------------- 1 | """Chapter 20: Expression Optimization""" -------------------------------------------------------------------------------- /src/figures/ch06_figures.py: -------------------------------------------------------------------------------- 1 | # TODO - Figure 6.4 2 | -------------------------------------------------------------------------------- /src/examples/ch20_examples.py: -------------------------------------------------------------------------------- 1 | # from pyparsing import * 2 | 3 | # TODO 4 | -------------------------------------------------------------------------------- /src/exercises/ch10_exercises.py: -------------------------------------------------------------------------------- 1 | # TODO - Exercise 10.1 2 | # TODO - Exercise 10.9 3 | # TODO - Exercise 10.13 -------------------------------------------------------------------------------- /src/examples/ch10_examples.py: -------------------------------------------------------------------------------- 1 | # TODO - Example 10.1 2 | # TODO - Example 10.3 3 | # TODO - Example 10.5 4 | # TODO - Example 10.6 -------------------------------------------------------------------------------- /src/examples/ch11_examples.py: -------------------------------------------------------------------------------- 1 | # TODO - Example 11.3 2 | # TODO - Example 11.4 3 | # TODO - Example 11.6 4 | # TODO - Example 11.7 5 | # TODO - Example 11.8 6 | # TODO - Example 11.9 -------------------------------------------------------------------------------- /src/exercises/ch21_exercises.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | 4 | from copy import deepcopy 5 | 6 | # TODO - Exercise 21.4 - Need help from Mykel to understand it properly. -------------------------------------------------------------------------------- /src/figures/ch03_figures.py: -------------------------------------------------------------------------------- 1 | # TODO - Figure 3.2 2 | # TODO - Figure 3.8 3 | # TODO - Figure 3.9 4 | # TODO - Figure 3.11 5 | # TODO - Figure 3.12 6 | # TODO - Figure 3.13 7 | # TODO - Figure 3.14 8 | # TODO - Figure 3.16 9 | -------------------------------------------------------------------------------- /src/figures/ch10_figures.py: -------------------------------------------------------------------------------- 1 | # TODO - Figure 10.2 2 | # TODO - Figure 10.4 3 | # TODO - Figure 10.7 4 | # TODO - Figure 10.8 5 | # TODO - Figure 10.9 6 | # TODO - Figure 10.10 7 | # TODO - Figure 10.11 8 | # TODO - Figure 10.12 9 | # TODO - Figure 10.13 10 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | cvxpy==1.4.1 2 | matplotlib==3.8.2 3 | networkx==3.2.1 4 | numdifftools==0.9.41 5 | numpy==1.26.1 6 | primePy==1.3 7 | # pyparsing==3.1.1 # TODO: Testing to see if it works for ExprRules.jl 8 | scipy==1.12.0 9 | sympy==1.12 10 | # tensorflow==2.15.0 # NOTE: Only needed for an example right now; not necessary for tests -------------------------------------------------------------------------------- /src/tests/ch10/test_constrained_optimization_methods.py: -------------------------------------------------------------------------------- 1 | import sys; sys.path.append('./src/'); sys.path.append('../../') 2 | 3 | import numpy as np 4 | 5 | from ch10 import * 6 | 7 | 8 | class TestConstrainedOptimizationMethods(): 9 | def test_penalty_method(self): 10 | pass 11 | 12 | def test_augmented_lagrange_method(self): 13 | pass 14 | 15 | def test_interior_point_method(self): 16 | pass -------------------------------------------------------------------------------- /src/tests/ch09/test_genetic_methods.py: -------------------------------------------------------------------------------- 1 | import sys; sys.path.append('./src/'); sys.path.append('../../') 2 | 3 | import numpy as np 4 | 5 | from ch09 import * 6 | 7 | 8 | class TestGeneticMethods(): 9 | def test_genetic_algorithm(self): 10 | pass 11 | 12 | def test_selection_methods(self): 13 | pass 14 | 15 | def test_crossover_methods(self): 16 | pass 17 | 18 | def test_mutation_methods(self): 19 | pass -------------------------------------------------------------------------------- /src/tests/ch09/test_population_methods.py: -------------------------------------------------------------------------------- 1 | import sys; sys.path.append('./src/'); sys.path.append('../../') 2 | 3 | import numpy as np 4 | 5 | from ch09 import * 6 | 7 | 8 | class TestPopulationMethods(): 9 | def test_differential_evolution(self): 10 | pass 11 | 12 | def test_particle_swarm_optimization(self): 13 | pass 14 | 15 | def test_firefly(self): 16 | pass 17 | 18 | def test_cuckoo_search(self): 19 | pass 20 | -------------------------------------------------------------------------------- /src/examples/ch05_examples.py: -------------------------------------------------------------------------------- 1 | import sys; sys.path.append('../') 2 | 3 | import numpy as np 4 | 5 | 6 | def example_5_1(): 7 | """Example 5.1: Computing the gradient descent direction.""" 8 | def f(x): return x[0]*(x[1]**2) 9 | def grad_f(x): return np.array([x[1]**2, 2*x[0]*x[1]]) 10 | x = np.array([1.0, 2.0]) 11 | d = -grad_f(x) 12 | 13 | print("Unnormalized descent direction: d = ", d) 14 | print("Normalized descent direction: d = ", d/np.linalg.norm(d)) 15 | -------------------------------------------------------------------------------- /src/examples/ch06_examples.py: -------------------------------------------------------------------------------- 1 | import sys; sys.path.append('../') 2 | 3 | import numpy as np 4 | 5 | from ch06 import newtons_method 6 | from TestFunctions import booth 7 | 8 | 9 | def example_6_1(): 10 | """Example 6.1: Newton's method used to minimize Booth's function""" 11 | x = np.array([9.0, 8.0]) 12 | x_prime = newtons_method(booth.grad, booth.hess, x, eps=1e-5, k_max=1) 13 | 14 | print("After 1 iteration of Newton's Method, x = ", x_prime) 15 | print("Gradient at x: ", booth.grad(x)) 16 | -------------------------------------------------------------------------------- /src/tests/ch11/test_linear_program.py: -------------------------------------------------------------------------------- 1 | import sys; sys.path.append('./src/'); sys.path.append('../../') 2 | 3 | import numpy as np 4 | 5 | from ch11 import LinearProgram 6 | 7 | 8 | class TestLinearProgram(): 9 | def test_get_vertex(self): 10 | pass 11 | 12 | def test_edge_transition(self): 13 | pass 14 | 15 | def test_step(self): 16 | pass 17 | 18 | def test_minimize_given_vertex_partition(self): 19 | pass 20 | 21 | def test_minimize(self): 22 | pass 23 | 24 | def test_dual_certificate(self): 25 | pass 26 | 27 | def test_minimize_lp_and_y(self): 28 | pass -------------------------------------------------------------------------------- /src/exercises/ch03_exercises.py: -------------------------------------------------------------------------------- 1 | import sys; sys.path.append('../') 2 | 3 | import numpy as np 4 | 5 | 6 | def exercise_3_4(): 7 | """Exercise 3.4: Applying Bisection to f(x) = x^2/2 - x, starting with [0, 1000]""" 8 | def f_prime(x): return x - 1 9 | a, b = 0.0, 1000.0 10 | y_a, y_b = f_prime(a), f_prime(b) 11 | 12 | for i in range(3): # Execute 3 steps of the algorithm 13 | x = (a + b) / 2 14 | y = f_prime(x) 15 | if y == 0: 16 | a, b = x, x 17 | elif np.sign(y) == np.sign(y_a): 18 | a = x 19 | else: 20 | b = x 21 | print("Iteration " + str(i + 1) + ": ", (a, b)) 22 | -------------------------------------------------------------------------------- /src/tests/ch02/test_finite_difference_methods.py: -------------------------------------------------------------------------------- 1 | import sys; sys.path.append('./src/'); sys.path.append('../../') 2 | 3 | import numpy as np 4 | 5 | from ch02 import diff_forward, diff_central, diff_backward, diff_complex 6 | 7 | 8 | class TestFiniteDifferenceMethods(): 9 | tol = 1e-7 10 | 11 | def test_diff_forward(self): 12 | self.run_test_finite_difference_method(diff_forward) 13 | 14 | def test_diff_central(self): 15 | self.run_test_finite_difference_method(diff_central) 16 | 17 | def test_diff_backward(self): 18 | self.run_test_finite_difference_method(diff_backward) 19 | 20 | def test_diff_complex(self): 21 | self.run_test_finite_difference_method(diff_complex) 22 | 23 | def run_test_finite_difference_method(self, diff): 24 | x = np.linspace(-100, 100, 1000) 25 | assert np.all(np.abs(np.cos(x) - diff(np.sin, x)) < self.tol) 26 | -------------------------------------------------------------------------------- /src/tests/ch21/test_gauss_seidel.py: -------------------------------------------------------------------------------- 1 | import sys; sys.path.append("./src"); sys.path.append("../../") 2 | 3 | import numpy as np 4 | 5 | from ch21 import gauss_seidel 6 | 7 | 8 | class TestGaussSeidel(): 9 | def F1(A): 10 | A["y1"] = A["y2"] - A["x"] 11 | return A 12 | 13 | def F2(A): 14 | A["y2"] = np.sin(A["y1"] + A["y3"]) 15 | return A 16 | 17 | def F3(A): 18 | A["y3"] = np.cos(A["x"] + A["y2"] + A["y1"]) 19 | 20 | def test(self): 21 | A = {"x": 1.0, "y1": 1.0, "y2": 1.0, "y3": 1.0} 22 | A, converged = gauss_seidel([TestGaussSeidel.F1, TestGaussSeidel.F2, TestGaussSeidel.F3], A, k_max=100) 23 | assert converged 24 | assert np.isclose(A["y1"], -1.8795201143545137, atol=1e-8) 25 | assert np.isclose(A["y2"], -0.8795468970115342, atol=1e-8) 26 | assert np.isclose(A["y3"], -0.1871604183537351, atol=1e-8) 27 | -------------------------------------------------------------------------------- /src/examples/ch09_examples.py: -------------------------------------------------------------------------------- 1 | import sys; sys.path.append('../') 2 | 3 | import numpy as np 4 | 5 | from ch09 import rand_population_uniform, genetic_algorithm,\ 6 | TruncationSelection, SinglePointCrossover, GaussianMutation 7 | 8 | 9 | def example_9_1(): 10 | """ 11 | Example 9.1: Demonstration of using a genetic algorithm for optimizing a 12 | simple function. 13 | """ 14 | np.random.seed(0) 15 | def f(x): return np.linalg.norm(x) 16 | m = 100 # population size 17 | k_max = 10 # number of iterations 18 | population = rand_population_uniform(m, a=np.array([-3.0, -3.0]), b=np.array([3.0, 3.0])) 19 | S = TruncationSelection(10) # select top 10 20 | C = SinglePointCrossover() 21 | M = GaussianMutation(0.5) # small mutation rate 22 | x = genetic_algorithm(f, population, k_max, S, C, M) 23 | print("x = ", x) 24 | 25 | # TODO - Example 9.2 (Maybe eventually: need to construct the algorithm for Lamarckian and Baldwinian learning) -------------------------------------------------------------------------------- /src/figures/ch05_figures.py: -------------------------------------------------------------------------------- 1 | import sys; sys.path.append("../") 2 | 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | 6 | from ch05 import GradientDescent 7 | 8 | from TestFunctions import rosenbrock 9 | from convenience import plot_contour 10 | 11 | 12 | def figure_5_1(): # TODO - To duplicate the effect, I need to see the parameters 13 | """ 14 | Figure 5.1: Gradient descent can result in zig-zagging in narrow canyons. 15 | Here we see the effect on the Rosenbrock function. 16 | """ 17 | x = np.array([-1.1, -1.1]) # Starting point 18 | M = GradientDescent(alpha = 0.025) 19 | 20 | fig = plt.figure() 21 | plot_contour(fig, rosenbrock, xlim=(-2.1, 2.1), ylim=(-2.1, 2.1), xstride=0.01, ystride=0.01, levels=[0, 1, 2, 3, 5, 9, 25, 50, 100]) 22 | for i in range(10): 23 | x_next = M.step(rosenbrock, rosenbrock.grad, x) 24 | plt.plot([x[0], x_next[0]], [x[1], x_next[1]], c="black") 25 | x = x_next 26 | plt.title("Figure 5.1") 27 | plt.show() 28 | 29 | # TODO - Figure 5.2 30 | # TODO - Figure 5.3 31 | # TODO - Figure 5.5 32 | # TODO - Figure 5.6 33 | # TODO - Figure 5.7 -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 griffinbholt 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/ch21.py: -------------------------------------------------------------------------------- 1 | """Chapter 21: Multidisciplinary Optimization""" 2 | 3 | import numpy as np 4 | 5 | from copy import deepcopy 6 | from typing import Callable 7 | 8 | 9 | def gauss_seidel(Fs: list[Callable[[dict[str, float | np.ndarray]], dict[str, float | np.ndarray]]], 10 | A: dict[str, float | np.ndarray], 11 | k_max: int = 100, 12 | eps: float = 1e-4) -> tuple[dict[str, float | np.ndarray], bool]: 13 | """ 14 | The Gauss-Seidel algorithm for conducting a multidiciplinary analysis. 15 | Here, `Fs` is a list of disciplinary analysis functions that take and modify 16 | an assignment `A`. There are two optional arguments: the maximum number of 17 | iterations `k_max` and the relative error tolerance `eps`. The method 18 | returns the modified assignment and whether it converged. 19 | """ 20 | k, converged = 0, False 21 | while (not converged) and (k <= k_max): 22 | k += 1 23 | A_old = deepcopy(A) 24 | for F in Fs: 25 | F(A) 26 | converged = np.all([np.isclose(A[v], A_old[v], rtol=eps) for v in A]) 27 | return (A, converged) 28 | -------------------------------------------------------------------------------- /src/examples/ch03_examples.py: -------------------------------------------------------------------------------- 1 | import sys; sys.path.append('../'); 2 | 3 | import numpy as np 4 | 5 | from ch03 import PHI 6 | 7 | 8 | def example_3_1(): 9 | """ 10 | Example 3.1: Using Fibonacci search with five function evaluations 11 | to optimize a univariate function 12 | """ 13 | def f(x): return np.exp(x - 2) - x 14 | a, b = -2, 6 15 | n = 5 16 | eps = 1e-2 17 | 18 | # Fibonacci search (taken directly from ch03.py) 19 | print("Original Interval: ", (a, b), "\n") 20 | s = (1 - np.sqrt(5)) / (1 + np.sqrt(5)) 21 | p = 1 / ((PHI*(1 - (s**(n + 1)))) / (1 - (s**n))) 22 | d = p*b + (1 - p)*a 23 | y_d = f(d) 24 | print("f(" + str(round(d, 2)) + ") = ", y_d) 25 | for i in range(1, n): 26 | if i == n - 1: 27 | c = eps*a + (1 - eps)*d 28 | else: 29 | c = p*a + (1 - p)*b 30 | y_c = f(c) 31 | print("f(" + str(round(c, 2)) + ") = ", y_c) 32 | if y_c < y_d: 33 | b, d, y_d = d, c, y_c 34 | print("Interval Update: ", (round(a, 2), round(b, 2)) if a < b else (round(b, 2), round(a, 2)), "\n") 35 | else: 36 | a, b = b, c 37 | print("Interval Update: ", (round(a, 2), round(b, 2)) if a < b else (round(b, 2), round(a, 2)), "\n") 38 | p = 1 / ((PHI*(1 - (s**(n - i + 1)))) / (1 - (s**(n - i)))) 39 | print("Final Interval: ", (round(a, 2), round(b, 2)) if a < b else (round(b, 2), round(a, 2))) 40 | -------------------------------------------------------------------------------- /.github/workflows/python-package.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python 3 | 4 | name: Python package 5 | 6 | on: 7 | push: 8 | branches: [ "main" ] 9 | pull_request: 10 | branches: [ "main" ] 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | strategy: 17 | fail-fast: false 18 | matrix: 19 | python-version: ["3.10", "3.11"] 20 | 21 | steps: 22 | - uses: actions/checkout@v4 23 | - name: Set up Python ${{ matrix.python-version }} 24 | uses: actions/setup-python@v4 25 | with: 26 | python-version: ${{ matrix.python-version }} 27 | - name: Install dependencies 28 | run: | 29 | python -m pip install --upgrade pip 30 | python -m pip install flake8 pytest 31 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 32 | - name: Lint with flake8 33 | run: | 34 | # stop the build if there are Python syntax errors or undefined names 35 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 36 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 37 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 38 | - name: Test with pytest 39 | run: | 40 | pytest 41 | -------------------------------------------------------------------------------- /src/figures/ch02_figures.py: -------------------------------------------------------------------------------- 1 | import sys; sys.path.append('./src/'); sys.path.append('../') 2 | 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | 6 | from ch02 import diff_forward, diff_central, diff_complex 7 | 8 | 9 | def figure_2_4(): 10 | """ 11 | Figure 2.4: A comparison of the error in derivative estimate for the 12 | function sin(x) at x = 1/2 as the step size is varied. The linear error 13 | of the forward difference method and the quadratic error of the central 14 | difference and complex methods can be seen by the constant slops on the 15 | right hand side. The complex step method avoids the subtractive cancellation 16 | error that occurs when differencing two function evaluations that are close 17 | together. 18 | """ 19 | def abs_rel_error(v, v_approx): return np.abs((v - v_approx) / v) 20 | x = 0.5 21 | dfdx_true = np.cos(x) 22 | 23 | # Compute absolute relative errors for finite difference gradient approximations 24 | h = np.logspace(-18, 1, 100) 25 | error_complex = abs_rel_error(dfdx_true, diff_complex(np.sin, x, h)) 26 | error_forward = abs_rel_error(dfdx_true, diff_forward(np.sin, x, h)) 27 | error_central = abs_rel_error(dfdx_true, diff_central(np.sin, x, h)) 28 | 29 | # Plot results 30 | plt.plot(h, error_complex, c="tab:green", label="complex") 31 | plt.plot(h, error_forward, c="tab:blue", label="forward") 32 | plt.plot(h, error_central, c="tab:red", label="central") 33 | plt.xlabel("step size h") 34 | plt.ylabel("absolute relative error") 35 | plt.xscale("log") 36 | plt.yscale("log") 37 | plt.legend() 38 | plt.title("Figure 2.4") 39 | plt.show() 40 | -------------------------------------------------------------------------------- /src/examples/ch04_examples.py: -------------------------------------------------------------------------------- 1 | import sys; sys.path.append('../') 2 | 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | 6 | from ch04 import line_search, backtracking_line_search 7 | 8 | 9 | def example_4_1(): 10 | """Example 4.1: Line search used to minimize a function along a descent direction""" 11 | def f(x): return np.sin(x[0]*x[1]) + np.exp(x[1] + x[2]) - x[2] 12 | x = np.array([1.0, 2.0, 3.0]) 13 | d = np.array([0.0, -1.0, -1.0]) 14 | x_next = line_search(f, x, d) 15 | alpha_opt = (x_next[1] - x[1])/d[1] 16 | 17 | # Print results 18 | print("α* = ", alpha_opt) 19 | print("x' = ", x_next) 20 | 21 | # Plot line search objective 22 | alpha = np.arange(0.0, 5.0, 0.01) 23 | def ls_obj(alpha): return np.sin(2 - alpha) + np.exp(5 - 2*alpha) + alpha - 3 24 | plt.plot(alpha, ls_obj(alpha)) 25 | plt.scatter([alpha_opt], [ls_obj(alpha_opt)], label='α*') 26 | plt.xlabel("") 27 | plt.ylabel("line search objective") 28 | plt.legend() 29 | plt.show() 30 | 31 | 32 | def example_4_2(): 33 | """Example 4.2: An example of backtracking line search, an approximate line search method""" 34 | def f(x): return x[0]**2 + x[0]*x[1] + x[1]**2 35 | def grad_f(x): return np.array([[2, 1], [1, 2]]) @ x 36 | x = np.array([1.0, 2.0]) 37 | d = np.array([-1.0, -1.0]) 38 | sigma = 0.9 39 | 40 | alpha_opt = backtracking_line_search(f, grad_f, x, d, alpha=10.0, p=0.5, beta=1e-4) 41 | candidate_x = x + alpha_opt*d 42 | cand_x_deriv_d = np.dot(grad_f(candidate_x), d) 43 | adj_x_deriv_d = sigma * np.dot(grad_f(x), d) 44 | 45 | print("α* = ", alpha_opt) 46 | print("x' = ", candidate_x) 47 | print("2nd Wolfe Condition: ", cand_x_deriv_d >= adj_x_deriv_d, " ({:.1f} >= {:.1f})".format(cand_x_deriv_d, adj_x_deriv_d)) 48 | -------------------------------------------------------------------------------- /src/examples/ch01_examples.py: -------------------------------------------------------------------------------- 1 | import sys; sys.path.append('../') 2 | 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | 6 | from TestFunctions import rosenbrock 7 | from convenience import plot_surface, plot_contour 8 | 9 | def example_1_1(display_contour_plot=False): 10 | """Example 1.1: Checking the first- and second-order necessary conditions 11 | of a point on the Rosenbrock function. The minimizer is indicated by the 12 | dot in the figure (when `display_contour_plot=True`).""" 13 | x = np.array([1.0, 1.0]) 14 | 15 | print("Gradient at [1, 1]:") 16 | print(rosenbrock.grad(x)) 17 | print() 18 | print("Hessian at [1, 1]:") 19 | print(rosenbrock.hess(x)) 20 | 21 | if display_contour_plot: 22 | fig = plt.figure() 23 | plot_contour(fig, 24 | rosenbrock, 25 | xlim=(-2.1, 2.1), 26 | ylim=(-2.1, 2.1), 27 | xstride=0.01, 28 | ystride=0.01, 29 | levels=[1, 2, 3, 5, 9, 25, 50, 100]) 30 | plt.scatter([1], [1], c='black') 31 | plt.show() 32 | 33 | def example_1_2(): 34 | """Example 1.2: An example three-dimensional visualization and the associated contour plot""" 35 | def f(x): return x[0]**2 - x[1]**2 36 | 37 | fig = plt.figure(figsize=(10, 5)) 38 | plot_surface(fig, 39 | f, 40 | xlim=(-2.1, 2.1), 41 | ylim=(-2.1, 2.1), 42 | zlim=(-5.1, 5.1), 43 | xstride=0.05, 44 | ystride=0.05, 45 | subplot_coords=(1,2,1)) 46 | plot_contour(fig, 47 | f, 48 | xlim=(-2.1, 2.1), 49 | ylim=(-2.1, 2.1), 50 | xstride=0.05, 51 | ystride=0.05, 52 | levels=[-4, -2, 0, 2, 4], 53 | clabel=True, 54 | subplot_coords=(1,2,2)) 55 | plt.subplots_adjust(wspace=0.5) 56 | plt.show() 57 | -------------------------------------------------------------------------------- /src/ch02.py: -------------------------------------------------------------------------------- 1 | """Chapter 2: Derivatives and Gradients""" 2 | 3 | import numpy as np 4 | 5 | from typing import Callable 6 | 7 | 8 | def diff_forward(f: Callable[[float | np.ndarray], float | np.ndarray], 9 | x: float | np.ndarray, 10 | h: float | np.ndarray = np.sqrt(np.finfo(np.float64).eps)) -> float | np.ndarray: 11 | """Forward difference method for estimating the derivative of a 12 | function `f` at `x` with finite difference `h`. The default step size is 13 | the square root of the machine precision for floating point values. This 14 | step size balances machine round-off error with step size error. 15 | 16 | `np.finfo(np.float64).eps` provides the step size between 1.0 and the next 17 | larger representable floating-point value. 18 | """ 19 | return (f(x + h) - f(x)) / h 20 | 21 | 22 | def diff_central(f: Callable[[float | np.ndarray], float | np.ndarray], 23 | x: float | np.ndarray, 24 | h: float | np.ndarray = np.cbrt(np.finfo(np.float64).eps)) -> float | np.ndarray: 25 | """Central difference method for estimating the derivative of a 26 | function `f` at `x` with finite difference `h`. The default step size is 27 | the cube root of the machine precision for floating point values. 28 | """ 29 | return (f(x + (h/2)) - f(x - (h/2))) / h 30 | 31 | 32 | def diff_backward(f: Callable[[float | np.ndarray], float | np.ndarray], 33 | x: float | np.ndarray, 34 | h: float | np.ndarray = np.sqrt(np.finfo(np.float64).eps)) -> float | np.ndarray: 35 | """Backward difference method for estimating the derivative of a 36 | function `f` at `x` with finite difference `h`. The default step size is 37 | the square root of the machine precision for floating point values. 38 | """ 39 | return (f(x) - f(x - h)) / h 40 | 41 | 42 | def diff_complex(f: Callable[[float | np.ndarray], float | np.ndarray], 43 | x: float | np.ndarray, 44 | h: float | np.ndarray = 1e-20) -> float | np.ndarray: 45 | """The complex step method for estimating the derivative of a function `f` 46 | at `x` with finite difference `h`.""" 47 | return np.imag(f(x + h*1j)) / h 48 | -------------------------------------------------------------------------------- /src/tests/ch07/test_direct.py: -------------------------------------------------------------------------------- 1 | import sys; sys.path.append('./src/'); sys.path.append('../../') 2 | 3 | import numpy as np 4 | 5 | from ch07 import direct 6 | from TestFunctions import ackley, booth, branin, flower, michalewicz, rosenbrock, wheeler 7 | 8 | class TestDIRECT(): 9 | def test(self, eps=1e-5): 10 | # Ackley's Function 11 | f_min, x_min = ackley.global_min() 12 | x = direct(ackley, a=np.array([-30.0, -30.0]), b=np.array([30.0, 30.0]), eps=eps, k_max=50) 13 | assert np.abs(ackley(x) - f_min) < eps 14 | assert np.all(np.abs(x - x_min) < eps) 15 | 16 | # Booth's Function 17 | f_min, x_min = booth.global_min() 18 | x = direct(booth, a=np.array([-10.0, -10.0]), b=np.array([10.0, 10.0]), eps=eps, k_max=40) 19 | assert np.abs(booth(x) - f_min) < eps 20 | assert np.all(np.abs(x - x_min) < eps) 21 | 22 | # Branin's Function 23 | f_min, x_min = branin.global_min() 24 | x = direct(branin, a=np.array([-5.0, -5.0]), b=np.array([20.0, 20.0]), eps=eps, k_max=50) 25 | assert np.abs(branin(x) - f_min[0]) < eps 26 | assert np.any([np.all(np.abs(x - x_min_i) < eps) for x_min_i in x_min.T]) 27 | 28 | # Michalewicz Function 29 | f_min, x_min = michalewicz.global_min() 30 | x = direct(michalewicz, a=np.array([0.0, 0.0]), b=np.array([4.0, 4.0]), eps=eps, k_max=50) 31 | assert np.abs(michalewicz(x) - f_min) < eps 32 | assert np.all(np.abs(x - x_min) < eps) 33 | 34 | # Flower Function 35 | x = direct(flower, a=np.array([-3.0, -3.0]), b=np.array([3.0, 3.0]), eps=eps, k_max=50) 36 | assert np.all(np.abs(x - np.zeros(2)) < eps) 37 | 38 | # Rosenbrock's Banana Function 39 | f_min, x_min = rosenbrock.global_min() 40 | x = direct(rosenbrock, a=np.array([-2.0, -2.0]), b=np.array([2.0, 2.0]), eps=eps, k_max=50) 41 | assert np.abs(rosenbrock(x) - f_min) < eps 42 | assert np.all(np.abs(x - x_min) < eps) 43 | 44 | # Wheeler's Ridge 45 | f_min, x_min = wheeler.global_min() 46 | x = direct(wheeler, a=np.array([-5.0, -2.0]), b=np.array([25.0, 6.0]), eps=eps, k_max=50) 47 | assert np.abs(wheeler(x) - f_min) < eps 48 | assert np.all(np.abs(x - x_min) < eps) 49 | -------------------------------------------------------------------------------- /src/tests/ch04/test_local_descent_methods.py: -------------------------------------------------------------------------------- 1 | import sys; sys.path.append('./src/'); sys.path.append('../../') 2 | 3 | import numpy as np 4 | import warnings 5 | 6 | from ch04 import line_search, backtracking_line_search, strong_backtracking, trust_region_descent 7 | from TestFunctions import rosenbrock 8 | 9 | 10 | class TestLocalDescentMethods(): 11 | def test_line_search(self): 12 | def f(x): return np.sin(x[0]*x[1]) + np.exp(x[1] + x[2]) - x[2] 13 | x = np.array([1.0, 2.0, 3.0]) 14 | d = np.array([0.0, -1.0, -1.0]) 15 | x_prime = line_search(f, x, d) 16 | exp_x_prime = np.array([1.0, -1.127, -0.127]) 17 | assert np.all(np.abs(x_prime - exp_x_prime) < 1e-3) 18 | 19 | def test_backtracking_line_search(self): 20 | def f(x): return x[0]**2 + x[0]*x[1] + x[1]**2 21 | def grad_f(x): return np.array([2*x[0] + x[1], 2*x[1] + x[0]]) 22 | x = np.array([1.0, 2.0]) 23 | d = np.array([-1.0, -1.0]) 24 | alpha = backtracking_line_search(f, grad_f, x, d, alpha=10) 25 | x_prime = x + alpha * d 26 | exp_x_prime = np.array([-1.5, -0.5]) 27 | assert np.all(np.abs(x_prime - exp_x_prime) < 1e-10) 28 | 29 | def test_strong_backtracking(self): 30 | def f(x): return x[0]**2 + x[0]*x[1] + x[1]**2 31 | def grad_f(x): return np.array([2*x[0] + x[1], 2*x[1] + x[0]]) 32 | x = np.array([1.0, 2.0]) 33 | d = np.array([-1.0, -1.0]) 34 | alpha = strong_backtracking(f, grad_f, x, d) 35 | x_prime = x + alpha * d 36 | assert f(x_prime) < f(x) 37 | assert f(x_prime) < 3.25 38 | 39 | def test_trust_region_descent(self, eps: float = 1e-8): 40 | warnings.simplefilter(action='ignore', category=FutureWarning) 41 | 42 | # Rosenbrock 43 | x = np.array([-5.0, -3.0]) 44 | x_prime = trust_region_descent(f=rosenbrock, 45 | grad_f=rosenbrock.grad, 46 | H=rosenbrock.hess, 47 | x=x, 48 | k_max=15) 49 | f_min, x_min = rosenbrock.global_min() 50 | assert np.abs(rosenbrock(x_prime) - f_min) < eps 51 | assert np.all(np.abs(x_prime - x_min) < eps) 52 | -------------------------------------------------------------------------------- /src/examples/ch07_examples.py: -------------------------------------------------------------------------------- 1 | import sys; sys.path.append('../') 2 | 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | 6 | from ch07 import Interval, Intervals, reparametrize_to_unit_hypercube, rev_unit_hypercube_parametrization 7 | 8 | # TODO - Example 7.1 9 | # TODO - Example 7.2 10 | 11 | def example_7_1(eps: float = 1e-5, k_max: int = 5): # TODO - Hitting some snags 12 | """Example 7.1: The DIRECT method applied to a univariate function.""" 13 | def f(x): return np.sin(x) + np.sin(2*x) + np.sin(4*x) + np.sin(8*x) 14 | a = np.array([-2.0]) 15 | b = np.array([2.0]) 16 | 17 | g = reparametrize_to_unit_hypercube(f, a, b) 18 | intervals = Intervals() 19 | n = len(a) 20 | c = np.full(n, 0.0) 21 | interval = Interval(c, g(c), np.zeros(n)) 22 | intervals.add_interval(interval) 23 | c_best, y_best = np.copy(interval.c), interval.y 24 | 25 | fig, ax = plt.subplots(k_max + 1, 2, sharey=True, figsize=(7, 9)) 26 | t = np.linspace(-2.0, 2.0, 1000) 27 | f_t = f(t) 28 | ax[0, 0].plot(t, f_t, c="black") 29 | ax[0, 0].hlines([f(c)], xmin=-2.0, xmax=2.0, color="tab:blue") 30 | ax[0, 0].scatter([interval.c], [f(c)], color="tab:blue") 31 | ax[0, 0].set_xlim(-2.0, 2.0) 32 | ax[0, 1].scatter([2.0], [f(c)], color="tab:blue") 33 | ax[0, 1].set_xlim(0.0, 2.0) 34 | 35 | for i in range(1, k_max + 1): 36 | ax[i, 0].plot(t, f_t, color="black") 37 | ax[i, 0].set_xlim(-2.0, 2.0) 38 | ax[i, 1].set_xlim(0.0, 2.0) 39 | S = intervals.get_opt_intervals(eps, y_best) 40 | to_add = [] 41 | for interval in S: 42 | new_intervals = interval.divide(g) 43 | to_add.extend(new_intervals) 44 | intervals[interval.vertex_dist()].get() 45 | for interval in to_add: 46 | c = rev_unit_hypercube_parametrization(interval.c, a, b) 47 | u = rev_unit_hypercube_parametrization(interval.c + (3.0**(-i)), a, b) 48 | l = rev_unit_hypercube_parametrization(interval.c - (3.0**(-i)), a, b) 49 | ax[i, 0].hlines([f(c)], xmin=l, xmax=u, color="gray") 50 | ax[i, 0].scatter([c], [f(c)], color="black") 51 | intervals.add_interval(interval) 52 | if interval.y < y_best: 53 | c_best, y_best = np.copy(interval.c), interval.y 54 | 55 | x = rev_unit_hypercube_parametrization(c_best, a, b) 56 | 57 | plt.ylim(-2.5, 2.5) 58 | plt.show() 59 | 60 | example_7_1() -------------------------------------------------------------------------------- /src/examples/ch02_examples.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import sympy as sp 3 | 4 | from typing import Union 5 | 6 | 7 | def example_2_1(): 8 | """Example 2.1: Symbolic differentiation provides analytical derivatives.""" 9 | x = sp.Symbol('x') 10 | f = x**2 + x/2 - sp.sin(x)/x 11 | print(sp.diff(f, x)) 12 | 13 | 14 | def example_2_4(): 15 | """Example 2.4: The complex step method for estimating derivatives""" 16 | def f(x): return np.sin(x**2) 17 | v = f(np.pi/2 + 0.001j) 18 | print("f(x) = real(v) = ", np.real(v)) 19 | print("f'(x) = imag(v)/0.001 = ", np.imag(v)/0.001) 20 | 21 | 22 | def example_2_5(): 23 | """Example 2.5: An implementation of dual numbers allows for automatic forward accumulation""" 24 | class Dual(): 25 | def __init__(self, v: float, d: float): 26 | self.v = v 27 | self.d = d 28 | 29 | def __repr__(self) -> str: 30 | return 'Dual(' + str(self.v) + ',' + str(self.d) + ')' 31 | 32 | def __add__(self, other: 'Dual') -> 'Dual': 33 | return Dual(self.v + other.v, self.d + other.d) 34 | 35 | def __mul__(self, other: 'Dual') -> 'Dual': 36 | return Dual(self.v * other.v, self.v * other.d + other.v * self.d) 37 | 38 | @staticmethod 39 | def log(a: 'Dual') -> 'Dual': 40 | return Dual(np.log(a.v), a.d / a.v) 41 | 42 | @staticmethod 43 | def max(a: 'Dual', b: Union['Dual', int]) -> 'Dual': 44 | if isinstance(b, Dual): 45 | v = np.maximum(a.v, b.v) 46 | d = a.d if a.v > b.v else (b.d if a.v < b.v else np.nan) 47 | else: # isinstance(b, int) 48 | v = np.maximum(a.v, b) 49 | d = a.d if a.v > b else (0 if a.v < b else np.nan) 50 | return Dual(v, d) 51 | 52 | a = Dual(3, 1) 53 | b = Dual(2, 0) 54 | print(Dual.log(a*b + Dual.max(a, 2))) 55 | 56 | 57 | def example_2_6(): 58 | """ 59 | Example 2.6: Automatic differentiation using the Tensorflow package. 60 | We find that the gradient at [3, 2] is [1/3, 1/3] 61 | """ 62 | import os; os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1' # Disables Tensorflow CPU warning 63 | import tensorflow as tf 64 | 65 | @tf.function 66 | def f(a, b): return tf.math.log(a*b + tf.math.maximum(a, 2)) 67 | 68 | x = tf.Variable(3.0) 69 | y = tf.Variable(2.0) 70 | with tf.GradientTape() as tape: 71 | z = f(x, y) 72 | print([deriv.numpy() for deriv in tape.gradient(z, [x, y])]) 73 | -------------------------------------------------------------------------------- /src/tests/ch07/test_direct_methods.py: -------------------------------------------------------------------------------- 1 | import sys; sys.path.append('./src/'); sys.path.append('../../') 2 | 3 | import numpy as np 4 | 5 | from ch07 import cyclic_coordinate_descent, powell, hooke_jeeves, generalized_pattern_search, nelder_mead 6 | from TestFunctions import booth, wheeler 7 | 8 | class TestDirectMethods(): 9 | eps = 1e-5 10 | 11 | def test_cyclic_coord_descent(self): 12 | f_min, x_min = booth.global_min() 13 | x = np.array([10.0, -5.0]) 14 | x = cyclic_coordinate_descent(booth, x, self.eps) 15 | assert np.abs(booth(x) - f_min) < self.eps 16 | assert np.all(np.abs(x - x_min) < self.eps) 17 | 18 | def test_cyclic_coord_descent_with_accel(self): 19 | f_min, x_min = booth.global_min() 20 | x = np.array([10.0, -5.0]) 21 | x = cyclic_coordinate_descent(booth, x, self.eps, with_acceleration=True) 22 | assert np.abs(booth(x) - f_min) < self.eps 23 | assert np.all(np.abs(x - x_min) < self.eps) 24 | 25 | def test_powell(self): 26 | f_min, x_min = booth.global_min() 27 | x = np.array([10.0, -5.0]) 28 | x = powell(booth, x, self.eps) 29 | assert np.abs(booth(x) - f_min) < self.eps 30 | assert np.all(np.abs(x - x_min) < self.eps) 31 | 32 | def test_hooke_jeeves(self): 33 | f_min, x_min = wheeler.global_min() 34 | x = np.array([0.7, 0.9]) 35 | x = hooke_jeeves(wheeler, x, alpha=0.5, eps=self.eps, gamma=0.5) 36 | assert np.abs(wheeler(x) - f_min) < self.eps 37 | assert np.all(np.abs(x - x_min) < self.eps) 38 | 39 | def test_generalized_pattern_search(self): 40 | possible_Ds = [ 41 | np.array([[1.0, 0], [0, 1], [-1, 0], [0, -1]]), # Equivalent to Hooke-Jeeves 42 | np.array([[1, 1], [1, -1], [-1, -1], [-1, 1]]), # Diagonal Directions 43 | np.array([[1, 0], [0, 1], [-1, -1]]) # 3 Directions: Up, Right, Down-Left 44 | ] 45 | for D in possible_Ds: 46 | f_min, x_min = wheeler.global_min() 47 | x = np.array([0.7, 0.9]) 48 | x = generalized_pattern_search(wheeler, x, alpha=0.5, D=D, eps=self.eps, gamma=0.5) 49 | assert np.abs(wheeler(x) - f_min) < 10*self.eps 50 | assert np.all(np.abs(x - x_min) < 10*self.eps) 51 | 52 | def test_nelder_mead(self): 53 | f_min, x_min = wheeler.global_min() 54 | S = np.array([[0.7, 1.4], [0.7, 0.9], [0.4, 0.7]]) 55 | x = nelder_mead(wheeler, S, eps=self.eps) 56 | assert np.abs(wheeler(x) - f_min) < 1e-4 57 | assert np.all(np.abs(x - x_min) < 1e-2) 58 | -------------------------------------------------------------------------------- /src/tests/test_testfunctions.py: -------------------------------------------------------------------------------- 1 | import sys; sys.path.append('./src/'); sys.path.append('../'); sys.path.append('../../') 2 | 3 | import numdifftools as nd 4 | import numpy as np 5 | 6 | from TestFunctions import ScalarValuedTestFunction, MichalewiczFunction, VectorValuedTestFunction,\ 7 | ackley, booth, branin, flower, michalewicz, rosenbrock,\ 8 | wheeler, circle 9 | 10 | 11 | class TestTestFunctions(): 12 | scalar_functions = [ackley, booth, branin, flower, 13 | michalewicz, rosenbrock, wheeler] 14 | vector_functions = [circle] 15 | 16 | def test_gradients(self): 17 | np.random.seed(42) 18 | for test_function in self.scalar_functions: 19 | self.run_gradient_test(test_function) 20 | 21 | def run_gradient_test(self, test_function: ScalarValuedTestFunction, eps: float = 1e-9, n_trials: int = 100): 22 | for _ in range(n_trials): 23 | x = np.random.rand(test_function.d if test_function.d is not None else 10) 24 | num_grad = nd.Gradient(test_function)(x) 25 | test_grad = test_function.grad(x) 26 | assert np.all(np.abs(num_grad - test_grad) < eps), test_function.__class__.__name__ + " Gradient failed" 27 | 28 | def test_hessians(self): 29 | np.random.seed(42) 30 | for test_function in self.scalar_functions: 31 | if isinstance(test_function, MichalewiczFunction): 32 | self.run_hessian_test(test_function, eps=0.5) 33 | else: 34 | self.run_hessian_test(test_function) 35 | 36 | def run_hessian_test(self, test_function: ScalarValuedTestFunction, eps: float = 1e-3, n_trials: int = 100): 37 | for _ in range(n_trials): 38 | x = np.random.rand(2) 39 | num_hess = nd.Hessian(test_function)(x) 40 | test_hess = test_function.hess(x) 41 | assert np.all(np.abs(num_hess - test_hess) < eps), test_function.__class__.__name__ + " Hessian failed" 42 | 43 | def test_jacobians(self): 44 | np.random.seed(42) 45 | for test_function in self.vector_functions: 46 | self.run_jacobian_test(test_function) 47 | 48 | def run_jacobian_test(self, test_function: VectorValuedTestFunction, eps: float = 1e-9, n_trials: int = 100): 49 | for _ in range(n_trials): 50 | x = np.random.rand(2) 51 | num_jac = nd.Jacobian(test_function)(x) 52 | test_jac = test_function.jac(x) 53 | assert np.all(np.abs(num_jac - test_jac) < eps), test_function.__class__.__name__ + " Jacobian failed" 54 | -------------------------------------------------------------------------------- /src/examples/ch21_examples.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | 4 | from copy import deepcopy 5 | 6 | 7 | def example_21_1(f1, f2): 8 | """ 9 | Example 21.1: Basic code syntax for the assignment-based representation of 10 | multidisciplinary design optimization problems. 11 | """ 12 | def F1(A): 13 | A["y1"] = f1(A["x"], A["y2"]) 14 | return A 15 | 16 | def F2(A): 17 | A["y2"] = f2(A["x"], A["y1"]) 18 | return A 19 | 20 | A = {"x": 1, "y1": 2, "y2": 3} 21 | 22 | 23 | def example_21_2(): 24 | """ 25 | Example 21.2: An example that illustrates the importance of choosing an 26 | appropriate ordering when running a multidisciplinary analysis. 27 | """ 28 | def F1(A): 29 | A["y1"] = A["y2"] - A["x"] 30 | return A 31 | 32 | def F2(A): 33 | A["y2"] = np.sin(A["y1"] + A["y3"]) 34 | return A 35 | 36 | def F3(A): 37 | A["y3"] = np.cos(A["x"] + A["y2"] + A["y1"]) 38 | 39 | def gauss_seidel(Fs, A, k_max, eps=1e-4): 40 | """Gauss-Seidel Algorithm (from Chapter 21), altered for plotting convergence""" 41 | k, converged = 0, False 42 | history = {var: [val] for (var, val) in A.items() if var != "x"} 43 | while (not converged) and (k < k_max): 44 | k += 1 45 | A_old = deepcopy(A) 46 | for F in Fs: 47 | F(A) 48 | converged = np.all([np.isclose(A[v], A_old[v], rtol=eps) for v in A]) 49 | for (var, val) in A.items(): 50 | if var != "x": 51 | history[var].append(val) 52 | return (A, history, converged) 53 | 54 | # Run two orderings for 20 iterations each and plot 55 | k_max = 20 56 | k = np.arange(0, k_max + 1) 57 | orderings = [[F1, F2, F3], [F1, F3, F2]] 58 | _, axs = plt.subplots(nrows=2, figsize=(10, 6), sharex=True) 59 | for i, Fs in enumerate(orderings): 60 | A = {"x": 1.0, "y1": 1.0, "y2": 1.0, "y3": 1.0} 61 | A, history, _ = gauss_seidel(Fs, A, k_max) 62 | print(A) 63 | axs[i].plot(k, history["y1"], label="y1", c="tab:purple") 64 | axs[i].plot(k, history["y2"], label="y2", c="tab:blue") 65 | axs[i].plot(k, history["y3"], label="y3", c="tab:green") 66 | axs[i].scatter(k, history["y1"], c="tab:purple") 67 | axs[i].scatter(k, history["y2"], c="tab:blue") 68 | axs[i].scatter(k, history["y3"], c="tab:green") 69 | axs[i].set_yticks([-2, -1, 0, 1]) 70 | axs[0].legend(bbox_to_anchor=(1.01, 1), loc='upper left') 71 | axs[1].set_xticks([0, 5, 10, 15, 20]) 72 | axs[1].set_xlabel("iteration") 73 | plt.tight_layout() 74 | plt.show() 75 | -------------------------------------------------------------------------------- /src/exercises/ch05_exercises.py: -------------------------------------------------------------------------------- 1 | import sys; sys.path.append("../") 2 | 3 | import numpy as np 4 | 5 | from ch04 import line_search 6 | from ch05 import GradientDescent 7 | 8 | 9 | def exercise_5_2(x0: float): 10 | """Exercise 5.2: Gradient Descent for f(x) = x^4""" 11 | def f(x): return x**4 12 | def deriv(x): return 4*(x**3) 13 | 14 | M = GradientDescent(alpha=1.0) 15 | M.initialize(f, deriv, x0) 16 | print("Initial point: x0 = ", x0) 17 | print("Derivative at x0: ", deriv(x0)) 18 | x = M.step(f, deriv, x0) 19 | print("After 1 iteration of Gradient Descent, x = ", x) 20 | print("Derivative at x: ", deriv(x)) 21 | x = M.step(f, deriv, x) 22 | print("After 2 iterations of Gradient Descent, x = ", x) 23 | print("Derivative at x: ", deriv(x)) 24 | 25 | 26 | def exercise_5_3(): 27 | """Exercise 5.3: Gradient Descent: Unit Step vs. Exact Line Search""" 28 | def f(x): return np.exp(x) + np.exp(-x) 29 | def deriv(x): return np.exp(x) - np.exp(-x) 30 | x0 = 10.0 31 | 32 | # Unit Step 33 | M = GradientDescent(alpha=1.0) 34 | M.initialize(f, deriv, x0) 35 | x = M.step(f, deriv, x0) 36 | print("With Unit Step:") 37 | print("After 1 iteration of Gradient Descent, x = ", x) 38 | with np.errstate(over='ignore'): 39 | print("Derivative at x: ", deriv(x), "\n") 40 | print("=> Gradient Descent diverges.") 41 | 42 | # Exact Line Search 43 | with np.errstate(over='ignore'): 44 | x = line_search(f, x0, deriv(x0)) 45 | print("With Exact Line Search:") 46 | print("After 1 iteration of Gradient Descent, x = ", x) 47 | print("Derivative at x: ", deriv(x)) 48 | print("=> Gradient Descent converges to the minimum.") 49 | 50 | 51 | def exercise_5_7(): 52 | """Exercise 5.7: Conjugate Gradient Descent""" 53 | def f(x): return x[0]**2 + x[0]*x[1] + x[1]**2 + 5 54 | def grad_f(x): return np.array([2*x[0] + x[1], 2*x[1] + x[0]]) 55 | x0 = np.ones(2) 56 | 57 | # Conjugate Gradient Descent (taken directly from ch05.py) 58 | g = grad_f(x0) 59 | d = -g 60 | 61 | # First Step 62 | g_prime = grad_f(x0) 63 | beta = np.maximum(0, np.dot(g_prime, g_prime - g) / np.dot(g, g)) 64 | d = -g_prime + beta*d 65 | x = line_search(f, x0, d) 66 | g = g_prime.copy() 67 | print("After 1 iteration of CG, the normalized descent direction is d = ", d/np.linalg.norm(d)) 68 | 69 | # Second Step 70 | g_prime = grad_f(x) 71 | beta = np.maximum(0, np.dot(g_prime, g_prime - g) / np.dot(g, g)) 72 | d = -g_prime + beta*d 73 | x = line_search(f, x, d) 74 | print("After 2 iterations of CG, x = ", x) 75 | print("Gradient at x: ", grad_f(x)) 76 | print("=> Conjugate Gradient Descent converges after 2 iterations.") 77 | -------------------------------------------------------------------------------- /src/ch10.py: -------------------------------------------------------------------------------- 1 | """Chapter 10: Constraints""" 2 | 3 | import numpy as np 4 | 5 | from typing import Callable 6 | 7 | def penalty_method(f: Callable[[np.ndarray], float], 8 | minimize: Callable[[Callable, np.ndarray], np.ndarray], 9 | p: Callable[[np.ndarray], float], 10 | x: np.ndarray, 11 | k_max: int, 12 | rho: float = 1.0, 13 | gamma: float = 2.0) -> np.ndarray: 14 | """ 15 | The penalty method for objective function `f`, penalty function `p`, initial 16 | point `x`, number of iterations `k_max`, initial penalty `rho` > 0, and 17 | penalty multiplier `gamma` > 1. The method `minimize` should be replaced 18 | with a suitable unconstrained minimization method. 19 | """ 20 | for _ in range(k_max): 21 | x = minimize(lambda x: f(x) + rho * p(x), x) 22 | p *= gamma 23 | if p(x) == 0: 24 | return x 25 | return x 26 | 27 | 28 | def augmented_lagrange_method(f: Callable[[np.ndarray], float], 29 | h: Callable[[np.ndarray], np.ndarray], 30 | minimize: Callable[[Callable, np.ndarray], np.ndarray], 31 | x: np.ndarray, 32 | k_max: int, 33 | rho: float = 1.0, 34 | gamma: float = 2.0) -> np.ndarray: 35 | """ 36 | The augmented Lagrange method for objective function `f`, equality constraint 37 | function `h`, initial point `x`, number of iterations `k_max`, initial penalty 38 | `rho` > 0, and penalty multiplier `gamma` > 1. The function `minimize` 39 | should be replaced with the minimization method of your choice. 40 | """ 41 | lam = np.zeros(len(h(x))) 42 | for _ in range(k_max): 43 | def p(x): return ((rho/2) * np.sum(h(x)**2)) - np.dot(lam, h(x)) 44 | x = minimize(lambda x: f(x) + p(x), x) 45 | lam -= rho * h(x) 46 | rho *= gamma 47 | return x 48 | 49 | 50 | def interior_point_method(f: Callable[[np.ndarray], float], 51 | p: Callable[[np.ndarray], float], 52 | minimize: Callable[[Callable, np.ndarray], np.ndarray], 53 | x: np.ndarray, 54 | rho: float = 1.0, 55 | gamma: float = 2.0, 56 | eps: float = 0.001) -> np.ndarray: 57 | """ 58 | The interior point method for objective function `f`, barrier function `p`, 59 | initial point `x`, initial penalty `rho` > 0, penalty multiplier `gamma` > 1, 60 | and stopping tolerance `eps` > 0. 61 | """ 62 | delta = np.inf 63 | while delta > eps: 64 | x_prime = minimize(lambda x: f(x) + (p(x) / rho), x) 65 | delta = np.linalg.norm(x_prime - x) 66 | x = x_prime 67 | rho *= gamma 68 | return x 69 | -------------------------------------------------------------------------------- /src/figures/ch04_figures.py: -------------------------------------------------------------------------------- 1 | import sys; sys.path.append("../") 2 | 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | 6 | from ch04 import backtracking_line_search, solve_trust_region_subproblem 7 | from TestFunctions import rosenbrock 8 | from convenience import plot_contour 9 | 10 | 11 | def figure_4_2(): 12 | """ 13 | Figure 4.2: Backtracking line search used on the Rosenbrock function. 14 | The black lines show the eight iterations taken by the descent method and 15 | the red lines show the points considered during each line search. 16 | """ 17 | x = np.array([-1.75, -1.6]) # Starting point 18 | 19 | fig = plt.figure() 20 | plot_contour(fig, rosenbrock, xlim=(-2.1, 2.1), ylim=(-2.1, 2.1), xstride=0.01, ystride=0.01, levels=[0, 1, 2, 3, 5, 9, 25, 50, 100]) 21 | plt.scatter([x[0]], [x[1]], c="black", s=10) 22 | plt.annotate("1", x, xytext=[-5, -13], textcoords='offset points') 23 | for i in range(7): 24 | d = -rosenbrock.grad(x) # use negative gradient as the descent direction 25 | alpha_opt = backtracking_line_search(rosenbrock, rosenbrock.grad, x, d, alpha=100.0) 26 | x_next = x + alpha_opt*d 27 | plt.plot([x[0], x_next[0]], [x[1], x_next[1]], c="black") 28 | plt.scatter([x_next[0]], [x_next[1]], c="black", s=10) 29 | plt.annotate(str(i + 2), x_next, xytext=[-5, -13], textcoords='offset points') 30 | x = x_next 31 | plt.title("Figure 4.2") 32 | plt.show() 33 | 34 | 35 | def figure_4_9(): # TODO - Needs some more work - something isn't working quite right 36 | """Figure 4.9: Trust region optimization used on the Rosenbrock function""" 37 | x = np.array([-1.75, -1.75]) # Starting point 38 | k_max, eta_1, eta_2, gamma_1, gamma_2, delta = 9, 0.25, 2.0, 0.5, 2.0, 1.0 39 | 40 | fig = plt.figure() 41 | plot_contour(fig, rosenbrock, xlim=(-2.1, 2.1), ylim=(-2.1, 3.1), xstride=0.01, ystride=0.01, levels=[0, 1, 2, 3, 5, 9, 25, 50, 100]) 42 | plt.scatter([x[0]], [x[1]], c="black", s=10) 43 | plt.annotate("1", x, xytext=[-5, -13], textcoords='offset points') 44 | 45 | # Trust Region Descent (taking from ch04.py) 46 | y = rosenbrock(x) 47 | for i in range(k_max): 48 | circle = plt.Circle((x[0], x[1]), delta, color='black', fill=False, alpha=0.1*(i + 1)) 49 | plt.gca().add_patch(circle) 50 | x_prime, y_prime = solve_trust_region_subproblem(rosenbrock.grad, rosenbrock.hess, x, delta) 51 | r = (y - rosenbrock(x_prime)) / (y - y_prime) 52 | if r < eta_1: 53 | delta *= gamma_1 54 | else: 55 | x, y = x_prime, y_prime 56 | if r > eta_2: 57 | delta *= gamma_2 58 | plt.scatter([x[0]], [x[1]], c="black", s=10) 59 | plt.annotate(str(i + 2), x, xytext=[-5, -13], textcoords='offset points') 60 | plt.xlim((-2.1, 2.1)) 61 | plt.ylim((-2.1, 3.1)) 62 | plt.gca().set_aspect('equal') 63 | plt.show() 64 | -------------------------------------------------------------------------------- /src/ch15.py: -------------------------------------------------------------------------------- 1 | """Chapter 15: Probabilistic Surrogate Models""" 2 | 3 | import numpy as np 4 | 5 | from scipy.stats import multivariate_normal 6 | from typing import Callable 7 | 8 | 9 | 10 | def mu(X: np.ndarray, m: Callable[[np.ndarray], float]) -> np.ndarray: 11 | """ 12 | A method for constructing a mean vector given a list of design points `X` 13 | and a mean function `m`. 14 | """ 15 | return np.apply_along_axis(m, 1, X) 16 | 17 | 18 | def Sigma(X: np.ndarray, k: Callable[[np.ndarray, np.ndarray], float]) -> np.ndarray: 19 | """ 20 | A method for constructing a covariance matrix given one list of design 21 | points `X` and a covariance function `k`. 22 | """ 23 | return np.ndarray([[k(x, x_prime) for x_prime in X] for x in X]) 24 | 25 | 26 | def K(X: np.ndarray, X_prime: np.ndarray, k: Callable[[np.ndarray, np.ndarray], float]) -> np.ndarray: 27 | """ 28 | A method for constructing a covariance matrix given two lists of design 29 | points `X` and `X_prime`, and a covariance function `k`. 30 | """ 31 | return np.ndarray([[k(x, x_prime) for x_prime in X_prime] for x in X]) 32 | 33 | 34 | def mvnrand(mu: np.ndarray, Sigma: np.ndarray, inflation: float = 1e-6) -> np.ndarray: 35 | """TODO""" 36 | N = multivariate_normal(mu, Sigma + inflation*np.eye(len(mu))) 37 | return N.rvs() 38 | 39 | class GaussianProcess(): 40 | """TODO""" 41 | def __init__(self, 42 | m: Callable[[np.ndarray], float], 43 | k: Callable[[np.ndarray, np.ndarray], float], 44 | X: np.ndarray, 45 | y: np.ndarray, 46 | v: float): 47 | self.m = m # mean 48 | self.k = k # covariance function 49 | self.X = X # design points 50 | self.y = y # objective values 51 | self.v = v # noise variance 52 | 53 | def rand(self, X: np.ndarray) -> np.ndarray: 54 | """TODO""" 55 | return mvnrand(mu(X, self.m), Sigma(X, self.k)) 56 | 57 | def predict(self, X_pred: np.ndarray) -> tuple[np.ndarray, np.ndarray]: 58 | """TODO""" 59 | m, k, v = self.m, self.k, self.v 60 | tmp = np.linalg.solve(K(X_pred, self.X, k), K(self.X, self.X, k) + v * np.eye(len(self.X))) 61 | mu_p = mu(X_pred, m) + tmp @ (self.y - mu(self.X, m)) 62 | S = K(X_pred, X_pred, k) - tmp @ K(self.X, X_pred, k) 63 | v_p = np.diag(S) + np.finfo(np.float64).eps # eps prevents numerical issues 64 | return (mu_p, v_p) 65 | 66 | def append(self, x: np.ndarray, y: float): 67 | if len(self.X) == 0: 68 | self.X = np.array([x]) 69 | self.y = np.array([y]) 70 | else: 71 | self.X = np.append(self.X, x) 72 | self.y = np.append(self.y, y) 73 | 74 | def pop(self) -> tuple[np.ndarray, float]: 75 | popped_x = self.X[-1] 76 | popped_y = self.y[-1] 77 | self.X = np.delete(self.X, -1) 78 | self.y = np.delete(self.y, -1) 79 | return (popped_x, popped_y) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # optimization-code-py 2 | 3 | [![Python package](https://github.com/griffinbholt/optimization-code-py/actions/workflows/python-package.yml/badge.svg)](https://github.com/griffinbholt/optimization-code-py/actions/workflows/python-package.yml) 4 | 5 | *Original Julia Code by: Mykel Kochenderfer and Tim Wheeler* 6 | 7 | *Python Versions by: Griffin Holt* 8 | 9 | Python versions of all typeset code blocks from the book, [Algorithms for Optimization](https://algorithmsbook.com/optimization/). 10 | 11 | I share this content in the hopes that it helps you and makes the decision making algorithms more approachable and accessible (especially to those not as familiar with Julia). Thank you for reading! 12 | 13 | If you encounter any issues or have pressing comments, please [file an issue](https://github.com/griffinbholt/optimization-code-py/issues/new/choose). (There are likely to still be bugs as I have not finished testing all of the classes and functions.) 14 | 15 | ## Progress Update: (19 Mar 2024) 16 | 17 | | Chapter(s) | Written | Tested | Notes | 18 | |--:|:--|:--|:--| 19 | | 1 | N/A | N/A | No code blocks in this chapter | 20 | | 2 | ▌▌▌▌▌▌▌▌▌▌ 100% | ▌▌▌▌▌▌▌▌▌▌ 100% | **Ready for use** | 21 | | 3 | ▌▌▌▌▌▌▌▌▌▌ 100% | ▌▌▌▌▌▌▌▌▌▌ 100% | **Ready for use** | 22 | | 4 | ▌▌▌▌▌▌▌▌▌▌ 100% | ▌▌▌▌▌▌▌▌▌▌ 100% | **Ready for use** | 23 | | 5 | ▌▌▌▌▌▌▌▌▌▌ 100% | ▌▌▌▌▌▌▌▌▌▌ 100% | **Ready for use** | 24 | | 6 | ▌▌▌▌▌▌▌▌▌▌ 100% | ▌▌▌▌▌▌▌▌▌▌ 100% | **Ready for use** | 25 | | 7 | ▌▌▌▌▌▌▌▌▌▌ 100% | ▌▌▌▌▌▌▌▌▌▌ 100% | **Ready for use** | 26 | | 8 | ▌▌▌▌▌▌▌▌▌▌ 100% | ▌▌▌▌▌▌▌ 70% | `adaptive_simulated_annealing`, `natural_evolution_strategies`, and `covariance_matrix_adaptation` need to be tested | 27 | | 9 | ▌▌▌▌▌▌▌▌▌▌ 100% | 0% | Needs to be tested | 28 | | 10 | ▌▌▌▌▌▌▌▌▌▌ 100% | 0% | Needs to be tested | 29 | | 11 | ▌▌▌▌▌▌▌▌▌▌ 100% | 0% | Needs to be tested | 30 | | 12 | ▌▌▌▌▌▌▌▌▌▌ 100% | 0% | Needs to be tested | 31 | | 13 | ▌▌▌▌▌▌▌▌▌▌ 100% | 0% | Needs to be tested | 32 | | 14 | ▌▌▌▌▌▌▌▌▌▌ 100% | 0% | Needs to be tested | 33 | | 15 | ▌▌▌▌▌▌▌▌▌▌ 100% | 0% | Needs to be tested | 34 | | 16 | ▌▌▌▌▌▌▌▌▌▌ 100% | 0% | Needs to be tested | 35 | | 17 | N/A | N/A | No code blocks in this chapter | 36 | | 18 | ▌▌▌▌▌▌▌▌▌▌ 100% | 0% | Needs to be tested | 37 | | 19 | ▌▌▌▌▌▌▌▌▌▌ 100% | 0% | Needs to be tested | 38 | | 20 | 0% | 0% | Need to figure out replacement library for `ExprRules.jl` | 39 | | 21 | ▌▌▌▌▌▌▌▌▌▌ 100% | ▌▌▌▌▌▌▌▌▌▌ 100% | **Ready for use** | 40 | 41 | I have also written code for pertinent figures, examples, exercises through Chapter 9. 42 | 43 | I have also written code for test functions (`TestFunctions.py`) and convenience functions (`convenience.py`). 44 | 45 | 47 | 48 | 49 | 50 | -------------------------------------------------------------------------------- /src/tests/ch06/test_second_order_methods.py: -------------------------------------------------------------------------------- 1 | import sys; sys.path.append('./src/'); sys.path.append('../../') 2 | 3 | import numpy as np 4 | 5 | from ch06 import newtons_method, secant_method, DFP, BFGS, LimitedMemoryBFGS 6 | from TestFunctions import booth, branin, rosenbrock, wheeler 7 | 8 | class TestSecondOrderMethods(): 9 | def test_newtons_method(self, eps=1e-8): 10 | f_min, x_min = booth.global_min() 11 | x = np.array([9.0, 8.0]) 12 | x_prime = newtons_method(booth.grad, booth.hess, x, eps=1e-5, k_max=1) 13 | assert np.abs(booth(x_prime) - f_min) < eps 14 | assert np.all(np.abs(x_prime - x_min) < eps) 15 | 16 | def test_secant_method(self, eps=1e-8): 17 | def f(x): return np.exp(x) + np.exp(-x) - 3*x + 2 18 | def f_prime(x): return np.exp(x) - np.exp(-x) - 3 19 | x_min = np.log((3 + np.sqrt(13))/2) 20 | f_min = f(x_min) 21 | 22 | x0, x1 = -4, -3 23 | x = secant_method(f_prime, x0, x1, eps) 24 | assert np.abs(x - x_min) < eps 25 | assert np.abs(f(x) - f_min) < eps 26 | 27 | def test_DFP(self, eps=1e-8): 28 | M = DFP() 29 | self.run_on(booth, max_steps=2, x=np.array([-5.0, 5.0]), M=M, eps=eps) 30 | self.run_on_branin(max_steps=7, x=np.ones(2)*-5, M=M, eps=eps) 31 | self.run_on(rosenbrock, max_steps=10, x=np.ones(2)*-5, M=M, eps=eps) 32 | self.run_on(wheeler, max_steps=10, x=np.zeros(2), M=M, eps=eps) 33 | 34 | def test_BFGS(self, eps=1e-8): 35 | M = BFGS() 36 | self.run_on(booth, max_steps=2, x=np.array([-5.0, 5.0]), M=M, eps=eps) 37 | self.run_on_branin(max_steps=7, x=np.ones(2)*-5, M=M, eps=eps) 38 | self.run_on(rosenbrock, max_steps=10, x=np.ones(2)*-5, M=M, eps=eps) 39 | self.run_on(wheeler, max_steps=10, x=np.zeros(2), M=M, eps=eps) 40 | 41 | def test_limited_memory_BFGS(self, eps=1e-4): 42 | for m in range(1, 4): 43 | M = LimitedMemoryBFGS(m) 44 | self.run_on(booth, max_steps=2, x=np.array([-5.0, 5.0]), M=M, eps=eps) 45 | self.run_on_branin(max_steps=7, x=np.ones(2)*-5, M=M, eps=eps) 46 | self.run_on(rosenbrock, max_steps=10, x=np.ones(2)*-5, M=M, eps=eps) 47 | with np.errstate(over="ignore", invalid="ignore"): 48 | self.run_on(wheeler, max_steps=6, x=np.ones(2)*5, M=M, eps=eps) 49 | 50 | def run_on(self, f, max_steps, x, M, eps): 51 | f_min, x_min = f.global_min() 52 | M.initialize(f, f.grad, x) 53 | for _ in range(max_steps): 54 | x = M.step(f, f.grad, x) 55 | assert np.abs(f(x) - f_min) < eps 56 | assert np.all(np.abs(x - x_min) < eps) 57 | 58 | def run_on_branin(self, max_steps, x, M, eps): 59 | f_min, x_min = branin.global_min() 60 | M.initialize(branin, branin.grad, x) 61 | for _ in range(max_steps): 62 | x = M.step(branin, branin.grad, x) 63 | assert np.abs(branin(x) - f_min[0]) < eps 64 | assert np.any([np.all(np.abs(x - x_min_i) < eps) for x_min_i in x_min.T]) 65 | -------------------------------------------------------------------------------- /src/examples/ch08_examples.py: -------------------------------------------------------------------------------- 1 | import sys; sys.path.append('../') 2 | 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | 6 | from scipy.stats import norm, multivariate_normal 7 | 8 | from ch08 import cross_entropy_method 9 | from TestFunctions import ackley 10 | 11 | 12 | def example_8_2(): 13 | """ 14 | Example 8.2: Exploring the effect of distribution variance and temperature 15 | on the performance of simulated annealing. The blue regions indicate the 16 | 5% to 95% and 25% to 75% empirical Gaussian quantiles of the objective 17 | function value. 18 | """ 19 | f = ackley 20 | x0 = np.array([15.0, 15.0]) 21 | n_trials = 500 22 | k_max = 100 23 | iterations = np.arange(k_max + 1) 24 | 25 | _, ax = plt.subplots(3, 3, figsize=(10, 10), sharex=True, sharey=True) 26 | for p, sigma in enumerate([1.0, 5.0, 25.0]): 27 | T = multivariate_normal(np.zeros(2), sigma * np.eye(2)) 28 | for q, t1 in enumerate([1.0, 10.0, 25.0]): 29 | def t(k, t1=t1): return t1 / k 30 | traj = np.zeros((n_trials, k_max + 1)) 31 | 32 | # Run Trials 33 | for j in range(n_trials): 34 | # Simulated Annealing 35 | x = x0.copy() 36 | y = f(x) 37 | traj[j, 0] = y 38 | 39 | x_best, y_best = x, y 40 | for k in range(1, k_max + 1): 41 | x_prime = x + T.rvs() 42 | y_prime = f(x_prime) 43 | delta_y = y_prime - y 44 | if (delta_y <= 0) or (np.random.rand() < np.exp(-delta_y / t(k))): 45 | x, y = x_prime, y_prime 46 | if y_prime < y_best: 47 | x_best, y_best = x_prime, y_prime 48 | traj[j, k] = y 49 | 50 | # Plot the results 51 | traj_means = np.mean(traj, axis=0) 52 | traj_stds = np.std(traj, axis=0) 53 | quantiles = np.zeros((4, k_max + 1)) 54 | for j in range(k_max + 1): 55 | quantiles[:, j] = norm(traj_means[j], traj_stds[j]).ppf(q=[0.05, 0.25, 0.75, 0.95]) 56 | 57 | ax[p, q].fill_between( 58 | iterations, 59 | quantiles[3, :], 60 | quantiles[0, :], 61 | color="tab:blue", 62 | alpha=0.15 63 | ) 64 | ax[p, q].fill_between( 65 | iterations, 66 | quantiles[2, :], 67 | quantiles[1, :], 68 | color="tab:blue", 69 | alpha=0.50 70 | ) 71 | ax[p, q].plot(iterations, traj_means, color="tab:blue") 72 | ax[p, q].set_ylim((-5, 30)) 73 | ax[p, q].set_xlim((0, k_max)) 74 | ax[p, q].set_title("$\sigma = $" + str(int(sigma)) + ", $t^{(1)} = $" + str(int(t1))) 75 | for j in range(3): 76 | ax[2, j].set_xlabel("iteration") 77 | ax[j, 0].set_ylabel("$y$") 78 | plt.suptitle("Example 8.2") 79 | plt.show() 80 | 81 | 82 | def example_8_3(): 83 | """ 84 | Example 8.3: An example of using the cross-entropy method. 85 | 86 | We can use `scipy.stats` classes to represent, sample from, and fit proposal 87 | distributions. The parameter vector `theta` is replaced by a distribution `P`. 88 | Calling `P.rvs(m)` will produce a m x n matrix corresponding to m samples of 89 | n-dimensional samples from `P`, and calling `fit` will fit a new distribution 90 | of the given input type. 91 | """ 92 | np.random.seed(0) 93 | def f(x): return np.linalg.norm(x) 94 | mu = np.array([0.5, 1.5]) 95 | Sigma = np.array([[1.0, 0.2], [0.2, 2.0]]) 96 | P = multivariate_normal(mu, Sigma) 97 | k_max = 10 98 | P = cross_entropy_method(f, P, k_max) 99 | print("P.mu =", P.mean) 100 | -------------------------------------------------------------------------------- /src/convenience.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | 4 | from matplotlib import cm, ticker 5 | from matplotlib.patches import Ellipse 6 | import matplotlib.transforms as transforms 7 | 8 | 9 | VIRIDIS_REV = cm.viridis.reversed() 10 | 11 | 12 | def normalize(x: np.ndarray, 13 | ord: int | float | str = 2, 14 | axis: int | tuple[int, int] = None, 15 | keepdims: bool = False) -> np.ndarray: 16 | nmlzd_x = np.divide(x, np.linalg.norm(x, ord, axis, keepdims)) 17 | nmlzd_x = np.where(np.abs(nmlzd_x) < 1e-16, 0, nmlzd_x) 18 | return nmlzd_x 19 | 20 | 21 | def plot_surface(fig, f, xlim, ylim, zlim, xstride, ystride, subplot_coords=None): 22 | X, Y, Z = _make_3d_data(f, xlim, ylim, xstride, ystride) 23 | if subplot_coords is not None: 24 | ax = fig.add_subplot(*subplot_coords, projection='3d') 25 | else: 26 | ax = fig.add_subplot(projection='3d') 27 | ax.plot_surface(X, Y, Z, cmap=VIRIDIS_REV) 28 | ax.set_zlim(*zlim) # Customize the z-axis 29 | ax.set_xlabel('$x_1$') 30 | ax.set_ylabel('$x_2$') 31 | return ax 32 | 33 | 34 | def plot_contour(fig, f, xlim, ylim, xstride, ystride, levels=None, filled=False, clabel=False, subplot_coords=None): 35 | X, Y, Z = _make_3d_data(f, xlim, ylim, xstride, ystride) 36 | if subplot_coords is not None: 37 | ax = fig.add_subplot(*subplot_coords) 38 | else: 39 | ax = fig.add_subplot() 40 | if filled: 41 | if levels is not None: 42 | CS = ax.contourf(X, Y, Z, levels=levels, cmap=VIRIDIS_REV, zorder=1) 43 | else: 44 | CS = ax.contourf(X, Y, Z, locator=ticker.LogLocator(), cmap=VIRIDIS_REV, zorder=1) 45 | else: 46 | if levels is not None: 47 | CS = ax.contour(X, Y, Z, levels=levels, cmap=VIRIDIS_REV, zorder=1) 48 | else: 49 | CS = ax.contour(X, Y, Z, locator=ticker.LogLocator(), cmap=VIRIDIS_REV, zorder=1) 50 | if clabel: 51 | ax.clabel(CS, inline=True, fontsize=10) 52 | ax.set_aspect('equal') 53 | ax.set_xlabel('$x_1$') 54 | ax.set_ylabel('$x_2$') 55 | return ax 56 | 57 | 58 | def _make_3d_data(f, xlim, ylim, xstride, ystride): 59 | X = np.arange(xlim[0], xlim[1], xstride) 60 | Y = np.arange(ylim[0], ylim[1], ystride) 61 | X, Y = np.meshgrid(X, Y) 62 | Z = f(np.array([X, Y])) 63 | return X, Y, Z 64 | 65 | 66 | def confidence_ellipse(mean, cov, ax, n_std=3.0, facecolor='none', **kwargs): 67 | """ 68 | Create a plot of the covariance confidence ellipse of *x* and *y*. 69 | 70 | Parameters 71 | ---------- 72 | mean: array-like, shape (2, ) 73 | Mean 74 | 75 | cov : array-like, shape (2, 2) 76 | Covariance matrix 77 | 78 | ax : matplotlib.axes.Axes 79 | The axes object to draw the ellipse into. 80 | 81 | n_std : float 82 | The number of standard deviations to determine the ellipse's radiuses. 83 | 84 | **kwargs 85 | Forwarded to `~matplotlib.patches.Ellipse` 86 | 87 | Returns 88 | ------- 89 | matplotlib.patches.Ellipse 90 | """ 91 | pearson = cov[0, 1]/np.sqrt(cov[0, 0] * cov[1, 1]) 92 | # Using a special case to obtain the eigenvalues of this 93 | # two-dimensional dataset. 94 | ell_radius_x = np.sqrt(1 + pearson) 95 | ell_radius_y = np.sqrt(1 - pearson) 96 | ellipse = Ellipse((0, 0), width=ell_radius_x * 2, height=ell_radius_y * 2, 97 | facecolor=facecolor, **kwargs) 98 | 99 | # Calculating the standard deviation of x from 100 | # the squareroot of the variance and multiplying 101 | # with the given number of standard deviations. 102 | scale_x = np.sqrt(cov[0, 0]) * n_std 103 | mean_x = mean[0] 104 | 105 | # calculating the standard deviation of y ... 106 | scale_y = np.sqrt(cov[1, 1]) * n_std 107 | mean_y = mean[1] 108 | 109 | transf = transforms.Affine2D() \ 110 | .rotate_deg(45) \ 111 | .scale(scale_x, scale_y) \ 112 | .translate(mean_x, mean_y) 113 | 114 | ellipse.set_transform(transf + ax.transData) 115 | return ax.add_patch(ellipse) 116 | -------------------------------------------------------------------------------- /src/ch18.py: -------------------------------------------------------------------------------- 1 | """Chapter 18: Uncertainty Propagation""" 2 | 3 | import numdifftools as nd 4 | import numpy as np 5 | 6 | from itertools import product 7 | from numpy.polynomial import Polynomial 8 | from scipy import integrate 9 | from scipy.special import factorial 10 | from typing import Callable 11 | 12 | from ch15 import K, GaussianProcess 13 | 14 | 15 | def taylor_approx(f: Callable[[np.ndarray], float], 16 | mu: np.ndarray, 17 | v: np.ndarray, 18 | secondorder: bool = False) -> tuple[float, float]: 19 | """ 20 | A method for automatically computing the Taylor approximation of the mean 21 | and variance of objective function `f` at design point `x` with noise mean 22 | vector `mu` and variance vector `v`. The Boolean parameter `secondorder` 23 | controls whether the first- or second-order approximation is compared.s 24 | """ 25 | mu_hat = f(mu) 26 | grad = nd.Gradient(f)(mu) 27 | v_hat = np.do(grad**2, v) 28 | if secondorder: 29 | H = nd.Hessian(f)(mu) 30 | mu_hat += np.dot(np.diag(H), v) / 2 31 | v_hat += np.dot(v, (H**2) @ v) / 2 32 | return (mu_hat, v_hat) 33 | 34 | 35 | def legendre(i: int) -> Polynomial: 36 | """ 37 | Method for constructing Legendre polynomial orthogonal basis functions, 38 | where `i` indicates the construction of b_i. 39 | """ # TODO - Test to make sure constructs correct polynomial 40 | n = i - 1 41 | p = Polynomial([-1, 0, 1])**n 42 | p = p.deriv(n) 43 | return p / ((2**n)*factorial(n)) 44 | 45 | 46 | def laguerre(i: int) -> Polynomial: 47 | """ 48 | Method for constructing Laguerre polynomial orthogonal basis functions, 49 | where `i` indicates the construction of b_i. 50 | """ # TODO - Test to make sure constructs correct polynomial 51 | p = Polynomial([1]) 52 | for _ in range(i - 1): 53 | p = (p.deriv() - p).integ() + 1 54 | return p 55 | 56 | 57 | def hermite(i: int) -> Polynomial: 58 | """ 59 | Method for constructing Hermite polynomial orthogonal basis functions, 60 | where `i` indicates the construction of b_i. 61 | """ # TODO - Test to make sure constructs correct polynomial 62 | p = Polynomial([1]) 63 | x = Polynomial([0, 1]) 64 | for _ in range(i - 1): 65 | p = x*p - p.deriv() 66 | return p 67 | 68 | 69 | def orthogonal_recurrence(bs: list[Polynomial], 70 | p: Callable[[float], float], 71 | dom: tuple[float, float], 72 | eps: float = 1e-6) -> Polynomial: 73 | """ 74 | The Stieltjes algorithm for constructing the next polynomial basis function 75 | b_{i + 1} according to the orthogonal recurrence relation, where `bs` contains 76 | {b_1, ..., b_i}, `p` is the probability distribution, and `dom` is a tuple 77 | containing a lower and upper bound for z. The optional parameter `eps` 78 | controls the absolute tolerance of the numerical integration. We make use of 79 | the `numpy.polynomials.Polynomial` class. 80 | """ 81 | i = len(bs) 82 | c1 = integrate.quad(lambda z: z*(bs[i](z)**2)*p(z), dom[0], dom[1], epsabs=eps)[0] 83 | c2 = integrate.quad(lambda z: (bs[i](z)**2)*p(z), dom[0], dom[1], epsabs=eps)[0] 84 | alpha = c1 / c2 85 | if i > 1: 86 | c3 = integrate.quad(lambda z: (bs[i - 1](z)**2)*p(z), dom[0], dom[1], epsabs=eps)[0] 87 | beta = c2 / c3 88 | return Polynomial([-alpha, 1])*bs[i] - beta*bs[i - 1] 89 | return Polynomial([-alpha, 1])*bs[i] 90 | 91 | 92 | def polynomial_chaos_bases(bases1d: list[Callable[[float], float]]) -> list[Callable[[float], float]]: 93 | """ 94 | A method for constructing multivariate basis functions where `bases1d` contains 95 | lists of univariate orthogonal basis functions for each random variable. 96 | """ 97 | bases = [] 98 | for a in product(*bases1d): 99 | bases.append(lambda z: np.prod([b(z[i]) for (i, b) in enumerate(a)])) 100 | return bases 101 | 102 | 103 | def bayesian_monte_carlo(GP: GaussianProcess, 104 | w: np.ndarray, 105 | mu_z: np.ndarray, 106 | Sigma_z: np.ndarray) -> tuple[float, float]: 107 | """ 108 | A method for obtaining the Bayesian Monte Carlo estimate for the expected 109 | value of a function under a Gaussian process `GP` with a Gaussian kernel 110 | with weights `w`, where the variables are drawn from a normal distribution 111 | with mean `mu_z` and covariance `Sigma_z`. 112 | """ 113 | W = np.diag(w**2) 114 | invK = np.linalg.inv(K(GP.X, GP.X, GP.k)) 115 | q = np.exp(-(np.dot(GP.X - mu_z, np.linalg.inv(W + Sigma_z @ (GP.X - mu_z)))) / 2) # TODO - Need to check/test dimensions 116 | q *= np.linalg.det((1/W) @ Sigma_z + np.eye(len(w)))**(-0.5) 117 | mu = np.dot(q, invK @ GP.y) 118 | v = np.linalg.det(2 * (1/W) @ Sigma_z + np.eye(len(w)))**(-0.5) - np.dot(q, invK @ q)[0] 119 | return (mu, v) 120 | -------------------------------------------------------------------------------- /src/ch06.py: -------------------------------------------------------------------------------- 1 | """Chapter 6: Second-Order Methods""" 2 | 3 | import numpy as np 4 | 5 | from typing import Callable 6 | 7 | from ch04 import line_search 8 | from ch05 import DescentMethod 9 | 10 | 11 | def newtons_method(grad_f: Callable[[np.ndarray], np.ndarray], 12 | H: Callable[[np.ndarray], np.ndarray], 13 | x: np.ndarray, 14 | eps: float, 15 | k_max: int) -> np.ndarray: 16 | """ 17 | Newton's method, which takes the gradient of the function `grad_f`, 18 | the Hessian of the objective function `H`, an initial point `x`, a step size 19 | tolerance `eps`, and a maximum number of iterations `k_max`. 20 | """ 21 | k, Delta = 0, np.full(len(x), np.inf) 22 | while (np.linalg.norm(Delta) > eps) and (k < k_max): 23 | Delta = np.linalg.solve(H(x), grad_f(x)) 24 | x -= Delta 25 | k += 1 26 | return x 27 | 28 | 29 | def secant_method(f_prime: Callable[[float], float], x0: float, x1: float, eps: float): 30 | """ 31 | The secant method for univariate function minimization. The inputs are the 32 | first derivative `f_prime` of the target function, two initial points `x0` 33 | and `x1`, and the desired tolerance `eps`. The final x-coordinate is 34 | returned. 35 | """ 36 | g0 = f_prime(x0) 37 | delta = np.inf 38 | while np.abs(delta) > eps: 39 | g1 = f_prime(x1) 40 | delta = ((x1 - x0) / (g1 - g0)) * g1 41 | x0, x1, g0 = x1, x1 - delta, g1 42 | return x1 43 | 44 | 45 | class DFP(DescentMethod): 46 | """The Davidon-Fletcher-Powell descent method""" 47 | def __init__(self, Q: np.ndarray = None): 48 | self.Q = Q # approximate inverse Hessian 49 | 50 | def initialize(self, f: Callable[[np.ndarray], float], grad_f: Callable[[np.ndarray], float], x: np.ndarray): 51 | m = len(x) 52 | self.Q = np.eye(m) 53 | 54 | def step(self, f: Callable[[np.ndarray], float], grad_f: Callable[[np.ndarray], float], x: np.ndarray) -> np.ndarray: 55 | g = grad_f(x) 56 | x_prime = line_search(f, x, -self.Q @ g) 57 | g_prime = grad_f(x_prime) 58 | delta = x_prime - x 59 | gamma = g_prime - g 60 | self.Q -= self.Q_update(delta, gamma, self.Q @ gamma) 61 | return x_prime 62 | 63 | def Q_update(self, delta: np.ndarray, gamma: np.ndarray, Q_gamma: np.ndarray) -> np.ndarray: 64 | return (np.outer(Q_gamma, Q_gamma) / np.dot(Q_gamma, gamma)) - (np.outer(delta, delta) / np.dot(delta, gamma)) 65 | 66 | 67 | class BFGS(DFP): 68 | """ 69 | The Broyden-Fletcher-Goldfarb-Shanno descent method 70 | 71 | NOTE: BFGS is the same as DFP, except for the `Q` update rule. 72 | """ 73 | def __init__(self, Q: np.ndarray = None): 74 | super().__init__(Q) 75 | 76 | def Q_update(self, delta: np.ndarray, gamma: np.ndarray, Q_gamma: np.ndarray) -> np.ndarray: 77 | outer_dQg = np.outer(delta, Q_gamma) 78 | dot_dg = np.dot(delta, gamma) 79 | return ((outer_dQg + outer_dQg.T) / dot_dg)\ 80 | - ((1 + (np.dot(Q_gamma, gamma) / dot_dg)) * (np.outer(delta, delta) / dot_dg)) 81 | 82 | 83 | class LimitedMemoryBFGS(DescentMethod): 84 | """ 85 | The Limited-memory BFGS descent method, which avoids storing the approximate 86 | inverse Hessian. The parameter `m` determines the history size. It also 87 | stores the step differences `deltas`, the gradient changes `gammas`, and 88 | storage vectors `qs`. 89 | """ 90 | def __init__(self, m: int, deltas: list[np.ndarray] = None, gammas: list[np.ndarray] = None, qs: np.ndarray = None): 91 | self.m = m # history size 92 | self.deltas = deltas # step differences 93 | self.gammas = gammas # gradient changes 94 | self.qs = qs # storage vectors 95 | 96 | def initialize(self, f: Callable[[np.ndarray], float], grad_f: Callable[[np.ndarray], float], x: np.ndarray): 97 | self.deltas = [] 98 | self.gammas = [] 99 | self.qs = [] 100 | 101 | def step(self, f: Callable[[np.ndarray], float], grad_f: Callable[[np.ndarray], float], x: np.ndarray) -> np.ndarray: 102 | g = grad_f(x) 103 | m = len(self.deltas) 104 | if m > 0: 105 | q = g.copy() 106 | for i in range(m - 1, -1, -1): 107 | self.qs[i] = q.copy() 108 | q -= (np.dot(self.deltas[i], q) / np.dot(self.gammas[i], self.deltas[i])) * self.gammas[i] 109 | z = (self.gammas[-1] * self.deltas[-1] * q) / np.dot(self.gammas[-1], self.gammas[-1]) 110 | for i in range(m): 111 | z += self.deltas[i] * ((np.dot(self.deltas[i], self.qs[i]) - np.dot(self.gammas[i], z)) / np.dot(self.gammas[i], self.deltas[i])) 112 | x_prime = line_search(f, x, -z) 113 | else: 114 | x_prime = line_search(f, x, -g) 115 | g_prime = grad_f(x_prime) 116 | self.deltas.append(x_prime - x); self.gammas.append(g_prime - g); self.qs.append(np.zeros(len(x))) 117 | while len(self.deltas) > self.m: 118 | self.deltas.pop(0); self.gammas.pop(0); self.qs.pop(0) 119 | return x_prime 120 | -------------------------------------------------------------------------------- /src/tests/ch08/test_stochastic_methods.py: -------------------------------------------------------------------------------- 1 | import sys; sys.path.append('./src/'); sys.path.append('../../') 2 | 3 | import cvxpy as cp 4 | import numpy as np 5 | import warnings 6 | 7 | from scipy.stats import norm, multivariate_normal 8 | 9 | from ch05 import GradientDescent, Adam, HyperNesterovMomentum 10 | from ch08 import * 11 | from TestFunctions import ackley, booth, branin, rosenbrock, wheeler 12 | 13 | 14 | class TestStochasticMethods(): 15 | def test_noisy_descent(self, eps=1e-8): 16 | np.random.seed(42) 17 | def sigma(k): return 1/(k**3) 18 | 19 | M = NoisyDescent(GradientDescent(alpha=0.001), sigma) 20 | self.run_on(booth, max_steps=100000, x=np.array([-5.0, 5.0]), M=M, eps=eps) 21 | self.run_on_branin(max_steps=100000, x=np.ones(2)*-5, M=M, eps=eps) 22 | self.run_on(rosenbrock, max_steps=100000, x=np.ones(2)*-5, M=M, eps=eps) 23 | 24 | M = NoisyDescent(Adam(alpha=0.001, gamma_v=0.9, gamma_s=0.999, eps=1e-8), sigma) 25 | self.run_on(booth, max_steps=100000, x=np.array([-5.0, 5.0]), M=M, eps=eps) 26 | self.run_on_branin(max_steps=100000, x=np.ones(2)*-5, M=M, eps=1e-4) 27 | self.run_on(rosenbrock, max_steps=100000, x=np.ones(2)*-5, M=M, eps=1e-5) 28 | 29 | M = NoisyDescent(HyperNesterovMomentum(alpha_0=0.01, mu=0.000001, beta=0.9), sigma) 30 | self.run_on(wheeler, max_steps=1000, x=np.zeros(2), M=M, eps=eps) 31 | 32 | def run_on(self, f, max_steps, x, M, eps): 33 | f_min, x_min = f.global_min() 34 | M.initialize(f, f.grad, x) 35 | for _ in range(max_steps): 36 | x = M.step(f, f.grad, x) 37 | assert np.abs(f(x) - f_min) < eps 38 | assert np.all(np.abs(x - x_min) < eps) 39 | 40 | def run_on_branin(self, max_steps, x, M, eps): 41 | f_min, x_min = branin.global_min() 42 | M.initialize(branin, branin.grad, x) 43 | for _ in range(max_steps): 44 | x = M.step(branin, branin.grad, x) 45 | assert np.abs(branin(x) - f_min[0]) < eps 46 | assert np.any([np.all(np.abs(x - x_min_i) < eps) for x_min_i in x_min.T]) 47 | 48 | def test_rand_positive_spanning_set(self): 49 | warnings.simplefilter(action='ignore', category=FutureWarning) 50 | for alpha in [1.0, 0.25, 0.25/4, 0.25/16]: 51 | for n in [2, 3, 5, 10, 100]: 52 | D = rand_positive_spanning_set(alpha, n).T 53 | assert np.linalg.matrix_rank(D) == n # full row rank 54 | 55 | x = cp.Variable(n + 1) 56 | constraints = [D @ x == -D @ np.ones(n + 1), x >= 0] 57 | problem = cp.Problem(cp.Minimize(0), constraints) 58 | problem.solve() 59 | assert problem.status == "optimal" # Dx = -D1, x >= 0 is feasible 60 | 61 | def test_mesh_adaptive_direct_search(self): 62 | f_min, x_min = wheeler.global_min() 63 | x = np.array([0.7, 0.9]) 64 | x = mesh_adaptive_direct_search(wheeler, x, eps=1e-8) 65 | assert np.abs(wheeler(x) - f_min) < 1e-6 66 | assert np.all(np.abs(x - x_min) < 1e-3) 67 | 68 | def test_simulated_annealing(self): 69 | np.random.seed(42) 70 | x0 = 0.5 71 | def f(x): return np.sin(5*(x + np.pi/3 + np.pi/10)) + 2*np.sin(x + np.pi/4 + np.pi/10) 72 | def t(k, gamma=0.5, t1=1.0): return (gamma**(k - 1)) * t1 73 | x_best, y_best = x0, f(x0) 74 | for _ in range(100): 75 | x = simulated_annealing(f, x=x0, T=norm(0, 1.5), t=t, k_max=20) 76 | if f(x) < y_best: 77 | x_best, y_best = x, f(x) 78 | assert np.abs(y_best - (-2.937)) < 1e-2 79 | 80 | x0 = np.array([10.0, 10.0]) 81 | def t(k, gamma=0.75, t1=10.0): return (gamma**(k - 1)) * t1 82 | T = multivariate_normal(np.zeros(2), 25*np.eye(2)) 83 | x_best, y_best = x0, ackley(x0) 84 | for _ in range(1000): 85 | x = simulated_annealing(ackley, x=x0, T=T, t=t, k_max=100) 86 | if ackley(x) < y_best: 87 | x_best, y_best = x, ackley(x) 88 | assert y_best < 0.15 89 | 90 | def test_adaptive_simulated_annealing(self): 91 | pass 92 | 93 | def test_cross_entropy_method(self, eps=1e-5): 94 | f_min, x_min = branin.global_min() 95 | P = multivariate_normal(np.array([3.0, 7.5]), 5*np.eye(2)) 96 | try_again = True 97 | while try_again: 98 | try: 99 | P = cross_entropy_method(branin, P, k_max=100) 100 | try_again = False 101 | except Exception as e: 102 | print(e) 103 | x = P.mean 104 | assert np.abs(branin(x) - f_min[0]) < eps 105 | assert np.any([np.all(np.abs(x - x_min_i) < eps) for x_min_i in x_min.T]) 106 | 107 | f_min, x_min = booth.global_min() 108 | P = multivariate_normal(np.array([-0.0, -0.0]), 10*np.eye(2)) 109 | try_again = True 110 | while try_again: 111 | try: 112 | P = cross_entropy_method(booth, P, k_max=10) 113 | try_again = False 114 | except Exception as e: 115 | print(e) 116 | x = P.mean 117 | assert np.abs(booth(x) - f_min) < eps 118 | assert np.all(np.abs(x - x_min) < eps) 119 | 120 | def test_natural_evolution_strategies(self): 121 | pass 122 | 123 | def test_covariance_matrix_adaptation(self): 124 | pass 125 | 126 | -------------------------------------------------------------------------------- /src/exercises/ch06_exercises.py: -------------------------------------------------------------------------------- 1 | import sys; sys.path.append("../") 2 | 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | 6 | from ch05 import GradientDescent, ConjugateGradientDescent 7 | from ch06 import newtons_method 8 | 9 | 10 | def exercise_6_3(x0: float): 11 | """Exercise 6.3: Applying Newton's Method to f(x) = x^2""" 12 | def f(x): return x**2 13 | def grad_f(x): return 2*x 14 | def H(x): return 2 15 | x = x0 16 | 17 | # Single Iteration of Newton's Method (for univariate function) 18 | Delta = grad_f(x) / H(x) 19 | x -= Delta 20 | 21 | print("After 1 iteration, x = ", x) 22 | print("Gradient at x: ", grad_f(x)) 23 | print("=> Only 1 step of Newton's Method is needed to minimize f(x) = x^2.") 24 | 25 | 26 | def exercise_6_4(): 27 | """ 28 | Exercise 6.4: Applying Newton's Method, Gradient Descent, and the 29 | Conjugate Gradient Method to f(x) = (1/2)x'Hx. 30 | """ 31 | def H(x): return np.array([[1.0, 0.0], [0.0, 1000.0]]) 32 | def f(x): return 0.5 * np.dot(x, H(x) @ x) 33 | def grad_f(x): return H(x) @ x 34 | x0 = np.array([1.0, 1.0]) 35 | 36 | # Newton's Method 37 | print("Newton's Method:") 38 | x_nm = newtons_method(grad_f, H, x0.copy(), eps=1e-5, k_max=1) 39 | print("After 1 iteration of Newton's Method, x = ", x_nm) 40 | print("Gradient at x: ", grad_f(x_nm)) 41 | print("=> Newton's Method converges to the minimum after only 1 iteration.\n") 42 | 43 | # Gradient Descent 44 | print("Gradient Descent: (w/ unnormalized gradient)") 45 | M = GradientDescent(alpha=1) 46 | M.initialize(f, grad_f, x0.copy()) 47 | x_gd = M.step(f, grad_f, x0.copy()) 48 | print("After 1 iteration of Gradient Descent, x = ", x_gd) 49 | print("Gradient at x: ", grad_f(x_gd)) 50 | x_gd = M.step(f, grad_f, x_gd) 51 | print("After 2 iterations of Gradient Descent, x = ", x_gd) 52 | print("Gradient at x: ", x_gd) 53 | print("=> Gradient Descent does not converge after 2 iterations.\n") 54 | 55 | # Conjugate Gradient Method 56 | print("Conjugate Gradient Method") 57 | M = ConjugateGradientDescent() 58 | M.initialize(f, grad_f, x0.copy()) 59 | x_cg = M.step(f, grad_f, x0.copy()) 60 | print("After 1 iteration of Conjugate Gradient, x = ", x_cg) 61 | print("Gradient at x: ", grad_f(x_cg)) 62 | x_cg = M.step(f, grad_f, x_cg) 63 | print("After 2 iterations of Conjugate Gradient, x = ", x_cg) 64 | print("Gradient at x: ", x_cg) 65 | print("=> Gradient Descent converges after 2 iterations.\n") 66 | 67 | 68 | def exercise_6_5(): 69 | """Exercise 6.5: Comparison of Newton's Method vs. Secant Method""" 70 | def f(x): return x**2 + x**4 71 | def deriv(x): return 2*x + 4*(x**3) 72 | def deriv2(x): return 2 + 12*(x**2) 73 | n_iter = 10 74 | 75 | # Initialize Newton's Method 76 | x = -3 77 | newton_x = [x] 78 | newton_f = [f(x)] 79 | newton_deriv = [deriv(x)] 80 | 81 | # Initialize Secant Method 82 | x0, x1 = -4, -3 83 | g0 = deriv(x0) 84 | secant_x = [x1] 85 | secant_f = [f(x1)] 86 | secant_deriv = [deriv(x1)] 87 | 88 | for _ in range(n_iter): 89 | # Newton's Method (for univariate function) 90 | Delta_nm = deriv(x) / deriv2(x) 91 | x -= Delta_nm 92 | newton_x.append(x) 93 | newton_deriv.append(0) 94 | newton_f.append(f(x)) 95 | newton_x.append(x) 96 | newton_deriv.append(deriv(x)) 97 | 98 | # Secant Method 99 | g1 = deriv(x1) 100 | Delta_sm = ((x1 - x0) / (g1 - g0)) * g1 101 | x0, x1, g0 = x1, x1 - Delta_sm, g1 102 | secant_x.append(x1) 103 | secant_deriv.append(0) 104 | secant_f.append(f(x1)) 105 | secant_x.append(x1) 106 | secant_deriv.append(deriv(x1)) 107 | 108 | # Plots the results 109 | fig, ax = plt.subplots(1, 2, figsize=(11, 4)) 110 | 111 | # f(x_k) vs iterations, k 112 | iters = np.arange(len(newton_f)) 113 | ax[0].plot(iters, newton_f, color="tab:blue") 114 | ax[0].plot(iters, secant_f, color="tab:red") 115 | ax[0].set_yscale("log") 116 | ax[0].set_xlabel("iterations, $k$") 117 | ax[0].set_ylabel("$f(x_k)$") 118 | 119 | # f_prime vs. x 120 | t = np.linspace(newton_x[0] - 0.5, newton_x[-1] + 0.5, 1000) 121 | f_prime = [deriv(t_i) for t_i in t] 122 | ax[1].plot(newton_x, newton_deriv, color="tab:blue", label="Newton") 123 | ax[1].plot(secant_x, secant_deriv, color="tab:red", label="secant") 124 | ax[1].hlines([0], xmin=newton_x[0] - 0.5, xmax=newton_x[-1] + 0.5, colors=['black'], linewidth=0.5) 125 | ax[1].plot(t, f_prime, color="black", linewidth=0.5) 126 | ax[1].set_xlabel("$x_k$") 127 | ax[1].set_ylabel("$f'(x_k)$") 128 | ax[1].legend(loc='upper left', bbox_to_anchor=(1.05, 1.0)) 129 | 130 | fig.tight_layout() 131 | plt.show() 132 | 133 | 134 | def exercise_6_9(): 135 | """Exercise 6.9: Newton's Method for f(x) = (x1 + 1)^2 + (x2 + 3)^2 + 4""" 136 | def f(x): return (x[0] + 1)**2 + (x[1] + 3)**2 + 4 137 | def grad_f(x): return np.array([2*(x[0] + 1), 2*(x[1] + 3)]) 138 | def H(x): return np.array([[2, 0], [0, 2]]) 139 | 140 | x = np.zeros(2) 141 | x_prime = newtons_method(grad_f, H, x, eps=1e-5, k_max=1) 142 | print("After 1 step of Newton's Method, x = ", x_prime) 143 | print("Gradient at x: ", grad_f(x_prime)) 144 | print("=> Newton's Method converges to the minimum after only 1 iteration.") 145 | -------------------------------------------------------------------------------- /src/ch12.py: -------------------------------------------------------------------------------- 1 | """Chapter 12: Multiobjective Optimization""" 2 | 3 | import numpy as np 4 | 5 | from typing import Callable 6 | 7 | from ch09 import SelectionMethod, CrossoverMethod, MutationMethod 8 | 9 | 10 | def dominates(y: np.ndarray, y_prime: np.ndarray) -> bool: 11 | """ 12 | A method for checking whether x dominates x_prime, where `y` is the vector 13 | of objective values for f(x) and `y_prime` is the vector of objective values 14 | for f(x_prime). 15 | """ 16 | return np.all(y <= y_prime) and np.any(y < y_prime) 17 | 18 | 19 | def naive_pareto(xs: np.ndarray, ys: np.ndarray) -> tuple[np.ndarray, np.ndarray]: 20 | """ 21 | A method for generating a Pareto frontier using randomly sampled design 22 | ponts `xs` and their multiobjective values `ys`. Both the Pareto-optimal 23 | design points and their objective values are returned. 24 | """ 25 | pareto_xs, pareto_ys = [], [] 26 | for (x, y) in zip(xs, ys): 27 | if not np.any([dominates(y_prime, y) for y_prime in ys]): 28 | pareto_xs.append(x) 29 | pareto_ys.append(y) 30 | return (np.array(pareto_xs), np.array(pareto_ys)) 31 | 32 | 33 | def weight_pareto(f1: Callable[[np.ndarray], float], 34 | f2: Callable[[np.ndarray], float], 35 | optimize: Callable[[Callable[[np.ndarray], float]], np.ndarray], 36 | npts: int) -> np.ndarray: 37 | """ 38 | The weighted sum method for generating a Pareto frontier, which takes 39 | objective functions `f1` and `f2` and number of Pareto points `npts`. 40 | """ 41 | return np.array([optimize(lambda x: w1 * f1(x) + (1 - w1) * f2(x)) for w1 in np.linspace(0, stop=1, num=npts)]) 42 | 43 | 44 | def vector_evaluated_genetic_algorithm(f: Callable[[np.ndarray], np.ndarray], 45 | population: np.ndarray, 46 | k_max: int, 47 | S: SelectionMethod, 48 | C: CrossoverMethod, 49 | M: MutationMethod) -> np.ndarray: 50 | """ 51 | The vector-evaluated genetic algorithm which takes a vector-valued objective 52 | function `f`, an initial population, number of iterations `k_max`, a 53 | `SelectionMethod` `S`, a `CrossoverMethod` `C`, and a `MutationMethod` `M`. 54 | The resulting population is returned. 55 | """ 56 | m = len(f(population[0])) 57 | m_pop = len(population) 58 | m_subpop = m_pop // m 59 | for _ in range(k_max): 60 | ys = np.apply_along_axis(f, 1, population) 61 | parents = np.apply_along_axis(lambda y: S.select(y)[:m_subpop], 0, ys) 62 | 63 | p = np.random.permutation(2*m_pop) 64 | def p_ind(i): return parents[(p[i] - 1) % m_pop][(p[i] - 1) // m_pop] 65 | parents = np.array([[p_ind(i), p_ind(i + 1)] for i in range(0, 2*m_pop, 2)]) 66 | children = np.array([C.crossover(population[p[0]], population[p[1]]) for p in parents]) 67 | population = np.array([M.mutate(c) for c in children]) 68 | return population 69 | 70 | 71 | def get_non_domination_levels(ys: np.ndarray) -> np.ndarray: 72 | """ 73 | A function for getting the nondomination levels of an array of 74 | multiobjective function evaluations `ys`. 75 | """ 76 | L, m = 0, len(ys) 77 | levels = np.zeros(m).astype(int) 78 | while np.min(levels) == 0: 79 | L += 1 80 | for (i, y) in enumerate(ys): 81 | if (levels[i] == 0) and\ 82 | not np.any([(levels[i] == 0 or levels[i] == L) & dominates(ys[i], y) for i in range(m)]): 83 | levels[i] = L 84 | return levels 85 | 86 | 87 | def discard_closest_pair(xs: np.ndarray, ys: np.ndarray) -> tuple[np.ndarray, np.ndarray]: 88 | """ 89 | This method is used to remove one individual from a filter that is above 90 | capacity. The method takes the filter's list of design points `xs` and 91 | associated objective function values `ys`. 92 | """ 93 | index, min_dist = 0, np.inf 94 | for (i, y) in enumerate(ys): 95 | for (j, y_prime) in enumerate(ys[i:]): 96 | dist = np.linalg.norm(y - y_prime) 97 | if dist < min_dist: 98 | index, min_dist = np.random.choices([i, j]), dist 99 | xs = np.delete(xs, index, axis=0) 100 | ys = np.delete(ys, index, axis=0) 101 | return (xs, ys) 102 | 103 | 104 | def update_pareto_filter(filter_xs: np.ndarray, 105 | filter_ys: np.ndarray, 106 | xs: np.ndarray, 107 | ys: np.ndarray, 108 | capacity: int = None) -> tuple[np.ndarray, np.ndarray]: 109 | """ 110 | A method for updating a Pareto filter with design points `filter_xs`, 111 | corresponding objective function values `filter_ys`, a population with 112 | design points `xs` and objective values `ys`, and filter capacity `capactity` 113 | which defaults to the population size. 114 | """ 115 | capacity = len(xs) if capacity is None else capacity 116 | for (x, y) in zip(xs, ys): 117 | if not np.any([dominates(y_prime, y) for y_prime in filter_ys]): 118 | filter_xs = np.append(filter_xs, x) 119 | filter_ys = np.append(filter_ys, y) 120 | filter_xs, filter_ys = naive_pareto(filter_xs, filter_ys) 121 | while len(filter_xs) > capacity: 122 | filter_xs, filter_ys = discard_closest_pair(filter_xs, filter_ys) 123 | return (filter_xs, filter_ys) 124 | -------------------------------------------------------------------------------- /src/tests/ch03/test_bracketing_methods.py: -------------------------------------------------------------------------------- 1 | import sys; sys.path.append('./src/'); sys.path.append('../../') 2 | 3 | import pytest 4 | 5 | from ch03 import * 6 | 7 | 8 | class TestBracketingMethods(): 9 | @pytest.fixture(autouse=True) 10 | def run_before(self): 11 | self.f = lambda x: 2*(x**4) + 5*(x**3) - 3*x 12 | self.f_prime = lambda x: 8*(x**3) + 15*(x**2) - 3 13 | self.x_local_min = 0.40550 14 | self.x_global_min = -1.75296 15 | 16 | def test_bracket_minimum(self): 17 | a, b = bracket_minimum(self.f, x=0.0) 18 | assert (a <= self.x_local_min) and (self.x_local_min <= b) 19 | 20 | a, b = bracket_minimum(self.f, x=-1.0) 21 | assert (a <= self.x_global_min) and (self.x_global_min <= b) 22 | 23 | def test_fibonacci_search(self): 24 | a, b = fibonacci_search(self.f, a=-5, b=5, n=10) 25 | assert (a <= self.x_global_min) and (self.x_global_min <= b) 26 | 27 | a, b = fibonacci_search(self.f, a=-5, b=0, n=10) 28 | assert (a <= self.x_global_min) and (self.x_global_min <= b) 29 | 30 | a, b = fibonacci_search(self.f, a=0, b=5, n=10) 31 | assert (a <= self.x_local_min) and (self.x_local_min <= b) 32 | 33 | def test_golden_section_search(self, eps=1e-5): 34 | a, b = golden_section_search(self.f, a=-5, b=5, n=10) 35 | assert (a <= self.x_global_min) and (self.x_global_min <= b) 36 | 37 | a, b = -5, 5 38 | n = np.ceil((b - a)/(eps*np.log(PHI))).astype(int) 39 | a, b = golden_section_search(self.f, a, b, n) 40 | assert np.abs(self.x_global_min - a) < eps 41 | assert np.abs(self.x_global_min - b) < eps 42 | 43 | a, b = golden_section_search(self.f, a=-5, b=0, n=10) 44 | assert (a <= self.x_global_min) and (self.x_global_min <= b) 45 | 46 | a, b = -5, 0 47 | n = np.ceil((b - a)/(eps*np.log(PHI))).astype(int) 48 | a, b = golden_section_search(self.f, a, b, n) 49 | assert np.abs(self.x_global_min - a) < eps 50 | assert np.abs(self.x_global_min - b) < eps 51 | 52 | a, b = golden_section_search(self.f, a=0, b=5, n=10) 53 | assert (a <= self.x_local_min) and (self.x_local_min <= b) 54 | 55 | a, b = 0, 5 56 | n = np.ceil((b - a)/(eps*np.log(PHI))).astype(int) 57 | a, b = golden_section_search(self.f, a, b, n) 58 | assert np.abs(self.x_local_min - a) < eps 59 | assert np.abs(self.x_local_min - b) < eps 60 | 61 | def test_quadratic_fit_search(self, eps=1e-5): 62 | a, b, c = quadratic_fit_search(self.f, a=-5, b=0, c=5, n=1000) 63 | assert (a - eps <= self.x_global_min) and (self.x_global_min <= c + eps) 64 | assert np.abs(self.x_global_min - b) <= eps 65 | 66 | a, b, c = quadratic_fit_search(self.f, a=-10, b=-5, c=0, n=1000) 67 | assert (a - eps <= self.x_global_min) and (self.x_global_min <= c + eps) 68 | assert np.abs(self.x_global_min - b) <= eps 69 | 70 | a, b, c = quadratic_fit_search(self.f, a=0.1, b=5, c=10, n=1000) 71 | assert (a - eps <= self.x_local_min) and (self.x_local_min <= c + eps) 72 | assert np.abs(self.x_local_min - b) <= eps 73 | 74 | def test_shubert_piyavskii(self, eps=1e-5): 75 | P_min, intervals = shubert_piyavskii(self.f, a=-2, b=1, l=20, eps=eps) 76 | assert np.abs(P_min[0] - self.x_global_min) <= eps 77 | assert len(intervals) > 0 78 | assert self.in_an_interval(self.x_global_min, intervals) 79 | 80 | P_min, intervals = shubert_piyavskii(self.f, a=0, b=1, l=20, eps=eps) 81 | assert np.abs(P_min[0] - self.x_local_min) <= eps 82 | assert len(intervals) > 0 83 | assert self.in_an_interval(self.x_local_min, intervals) 84 | 85 | P_min, intervals = shubert_piyavskii(lambda x: np.sin(x) - 0.5*x, a=-5, b=7, l=1.5, eps=eps) 86 | assert np.abs(P_min[0] - (5*np.pi/3)) <= eps 87 | assert len(intervals) > 0 88 | assert self.in_an_interval(5*np.pi/3, intervals) 89 | 90 | def in_an_interval(self, x_min: float, intervals: list[tuple[float, float]]) -> bool: 91 | in_interval = False 92 | for interval in intervals: 93 | in_interval = in_interval or (interval[0] <= x_min and x_min <= interval[1]) 94 | return in_interval 95 | 96 | def test_bisection(self, eps=1e-5): 97 | a, b = bisection(self.f_prime, a=-5, b=5, eps=eps/10) 98 | assert (np.abs(self.x_global_min - a) <= eps) or (np.abs(self.x_local_min - a) <= eps) 99 | assert (np.abs(self.x_global_min - b) <= eps) or (np.abs(self.x_local_min - b) <= eps) 100 | 101 | a, b = bisection(self.f_prime, a=-5, b=-0.5, eps=eps/10) 102 | assert (a - eps <= self.x_global_min) and (self.x_global_min <= b + eps) 103 | assert np.abs(self.x_global_min - a) <= eps 104 | assert np.abs(self.x_global_min - b) <= eps 105 | 106 | a, b = bisection(self.f_prime, a=0, b=5, eps=eps/10) 107 | assert (a - eps <= self.x_local_min) and (self.x_local_min <= b + eps) 108 | assert np.abs(self.x_local_min - a) <= eps 109 | assert np.abs(self.x_local_min - b) <= eps 110 | 111 | def test_bracket_sign_change(self): 112 | a, b = bracket_sign_change(self.f_prime, a=-5, b=0) 113 | assert self.f_prime(a) * self.f_prime(b) <= 0 114 | 115 | a, b = bracket_sign_change(self.f_prime, a=-5, b=5) 116 | assert self.f_prime(a) * self.f_prime(b) <= 0 117 | 118 | a, b = bracket_sign_change(self.f_prime, a=0, b=5) 119 | assert self.f_prime(a) * self.f_prime(b) <= 0 120 | -------------------------------------------------------------------------------- /src/tests/ch05/test_first_order_methods.py: -------------------------------------------------------------------------------- 1 | import sys; sys.path.append('./src/'); sys.path.append('../../') 2 | 3 | import numpy as np 4 | 5 | from ch05 import * 6 | from TestFunctions import booth, branin, michalewicz, rosenbrock, wheeler 7 | 8 | class TestFirstOrderMethods(): 9 | def test_gradient_descent(self, eps: float = 1e-8): 10 | M = GradientDescent(alpha=0.001) 11 | self.run_on(booth, max_steps=100000, x=np.array([-5.0, 5.0]), M=M, eps=eps) 12 | self.run_on_branin(max_steps=100000, x=np.ones(2)*-5, M=M, eps=eps) 13 | self.run_on(rosenbrock, max_steps=100000, x=np.ones(2)*-5, M=M, eps=eps) 14 | 15 | def test_conjugate_gradient(self, eps: float = 1e-6): 16 | M = ConjugateGradientDescent() 17 | self.run_on(booth, max_steps=2, x=np.array([-5.0, 5.0]), M=M, eps=eps) 18 | self.run_on_branin(max_steps=10, x=np.ones(2)*-5, M=M, eps=eps) 19 | self.run_on(michalewicz, max_steps=5, x=np.ones(2), M=M, eps=1e-4) 20 | self.run_on(rosenbrock, max_steps=10, x=np.ones(2)*-5, M=M, eps=eps) 21 | self.run_on(wheeler, max_steps=10, x=np.zeros(2), M=M, eps=eps) 22 | 23 | def test_momentum(self, eps: float = 1e-8): 24 | M = Momentum(alpha=0.001, beta=0.9) 25 | self.run_on(booth, max_steps=1000, x=np.array([-5.0, 5.0]), M=M, eps=eps) 26 | self.run_on_branin(max_steps=1000, x=np.ones(2)*-5, M=M, eps=eps) 27 | self.run_on(rosenbrock, max_steps=10000, x=np.ones(2)*-5, M=M, eps=eps) 28 | self.run_on(wheeler, max_steps=10000, x=np.zeros(2), M=M, eps=eps) 29 | 30 | def test_nesterov_momentum(self, eps: float = 1e-8): 31 | M = NesterovMomentum(alpha=0.001, beta=0.9) 32 | self.run_on(booth, max_steps=1000, x=np.array([-5.0, 5.0]), M=M, eps=eps) 33 | self.run_on_branin(max_steps=1000, x=np.ones(2)*-5, M=M, eps=eps) 34 | self.run_on(rosenbrock, max_steps=10000, x=np.ones(2)*-5, M=M, eps=eps) 35 | self.run_on(wheeler, max_steps=10000, x=np.zeros(2), M=M, eps=eps) 36 | 37 | def test_adagrad(self, eps: float = 1e-8): 38 | M = Adagrad(alpha=0.1, eps=1e-3) 39 | self.run_on(booth, max_steps=100000, x=np.array([-5.0, 5.0]), M=M, eps=eps) 40 | self.run_on_branin(max_steps=100000, x=np.ones(2)*-5, M=M, eps=eps) 41 | M = Adagrad(alpha=1.0, eps=1e-3) 42 | self.run_on(rosenbrock, max_steps=100000, x=np.ones(2)*-5, M=M, eps=eps) 43 | self.run_on(wheeler, max_steps=1000, x=np.zeros(2), M=M, eps=eps) 44 | 45 | def test_rmsprop(self, eps: float = 1e-3): 46 | M = RMSProp(alpha=0.001, gamma=0.9, eps=1e-3) 47 | self.run_on(booth, max_steps=10000, x=np.array([-5.0, 5.0]), M=M, eps=eps) 48 | self.run_on_branin(max_steps=10000, x=np.ones(2)*-5, M=M, eps=eps) 49 | self.run_on(rosenbrock, max_steps=10000, x=np.ones(2)*-5, M=M, eps=eps) 50 | self.run_on(wheeler, max_steps=10000, x=np.zeros(2), M=M, eps=1e-2) 51 | 52 | def test_adadelta(self, eps: float = 1e-8): 53 | M = Adadelta(gamma_s=0.95, gamma_x=0.95, eps=1e-3) 54 | self.run_on_branin(max_steps=1000, x=np.ones(2)*-5, M=M, eps=1e-3) 55 | 56 | def test_adam(self, eps: float = 1e-8): 57 | M = Adam(alpha=0.001, gamma_v=0.9, gamma_s=0.999, eps=1e-8) 58 | self.run_on(booth, max_steps=100000, x=np.array([-5.0, 5.0]), M=M, eps=eps) 59 | self.run_on_branin(max_steps=100000, x=np.ones(2)*-5, M=M, eps=1e-4) 60 | self.run_on(rosenbrock, max_steps=100000, x=np.ones(2)*-5, M=M, eps=eps) 61 | self.run_on(wheeler, max_steps=100000, x=np.zeros(2), M=M, eps=eps) 62 | self.run_on(michalewicz, max_steps=100000, x=np.ones(2), M=M, eps=1e-4) 63 | 64 | def test_hypergradient_descent(self, eps: float = 1e-8): 65 | M = HyperGradientDescent(alpha_0=0.00001, mu=0.00001) 66 | self.run_on(booth, max_steps=1000, x=np.array([-5.0, 5.0]), M=M, eps=eps) 67 | M = HyperGradientDescent(alpha_0=0.000001, mu=0.000001) 68 | self.run_on_branin(max_steps=1000, x=np.ones(2)*-5, M=M, eps=eps) 69 | M = HyperGradientDescent(alpha_0=0.0001, mu=0.0000000001) 70 | self.run_on(rosenbrock, max_steps=100000, x=np.ones(2)*-5, M=M, eps=eps) 71 | M = HyperGradientDescent(alpha_0=0.0001, mu=0.00001) 72 | self.run_on(wheeler, max_steps=100000, x=np.zeros(2), M=M, eps=eps) 73 | 74 | def test_hypernesterov_momentum(self, eps: float = 1e-8): 75 | M = HyperNesterovMomentum(alpha_0=0.000001, mu=0.000001, beta=0.9) 76 | self.run_on(booth, max_steps=1000, x=np.array([-5.0, 5.0]), M=M, eps=eps) 77 | M = HyperNesterovMomentum(alpha_0=0.0000001, mu=0.0000001, beta=0.9) 78 | self.run_on_branin(max_steps=1000, x=np.ones(2)*-5, M=M, eps=eps) 79 | M = HyperNesterovMomentum(alpha_0=0.0001, mu=0.0000000001, beta=0.9) 80 | self.run_on(rosenbrock, max_steps=10000, x=np.ones(2)*-5, M=M, eps=eps) 81 | M = HyperNesterovMomentum(alpha_0=0.01, mu=0.000001, beta=0.9) 82 | self.run_on(wheeler, max_steps=1000, x=np.zeros(2), M=M, eps=eps) 83 | 84 | def run_on(self, f, max_steps, x, M, eps): 85 | f_min, x_min = f.global_min() 86 | M.initialize(f, f.grad, x) 87 | for _ in range(max_steps): 88 | x = M.step(f, f.grad, x) 89 | assert np.abs(f(x) - f_min) < eps 90 | assert np.all(np.abs(x - x_min) < eps) 91 | 92 | def run_on_branin(self, max_steps, x, M, eps): 93 | f_min, x_min = branin.global_min() 94 | M.initialize(branin, branin.grad, x) 95 | for _ in range(max_steps): 96 | x = M.step(branin, branin.grad, x) 97 | assert np.abs(branin(x) - f_min[0]) < eps 98 | assert np.any([np.all(np.abs(x - x_min_i) < eps) for x_min_i in x_min.T]) 99 | -------------------------------------------------------------------------------- /src/ch16.py: -------------------------------------------------------------------------------- 1 | """Chapter 16: Surrogate Optimization""" 2 | 3 | import numpy as np 4 | 5 | from scipy.stats import norm 6 | from typing import Callable 7 | 8 | from ch15 import GaussianProcess 9 | 10 | 11 | def prob_of_improvement(y_min: float, mu: float, sigma: float) -> float: 12 | """ 13 | Computing the probability of improvement for a given best y value `y_min`, 14 | mean `mu`, and standard deviation `sigma`. 15 | """ 16 | return norm(mu, sigma).cdf(y_min) 17 | 18 | 19 | def expected_improvement(y_min: float, mu: float, sigma: float) -> float: 20 | """ 21 | Computing the expected improvment for a given best y value `y_min`, 22 | mean `mu`, and standard deviation `sigma`. 23 | """ 24 | p_imp = prob_of_improvement(y_min, mu, sigma) 25 | p_ymin = norm(mu, sigma).pdf(y_min) 26 | return (y_min - mu)*p_imp + (sigma**2)*p_ymin 27 | 28 | 29 | class SafeOpt(): 30 | """ 31 | The SafeOpt algorithm applied to an empty Gaussian process `GP`, a finite 32 | design space `X`, index of initial safe point `i`, objective function `f`. 33 | and safety threshold `y_max`. The optional parameters are the confidence 34 | scalar `beta` and the number of iterations `k_max`. A tuple containing the 35 | best safe upper bound and its index in `X` is returned. 36 | """ 37 | def __call__(self, 38 | GP: GaussianProcess, 39 | X: np.ndarray, 40 | i: int, 41 | f: Callable[[np.ndarray], float], 42 | y_max: float, 43 | beta: float = 3.0, 44 | k_max: int = 10) -> tuple[np.ndarray, int]: 45 | GP.append(X[i], f(X[i])) 46 | 47 | m = len(X) 48 | u, l = np.full(m, np.inf), np.full(m, -np.inf) 49 | S, M, E = np.full(m, False), np.full(m, False), np.full(m, False) 50 | 51 | for _ in range(k_max): 52 | u, l = self.update_confidence_intervals(GP, X, u, l, beta) 53 | S, M, E = self.compute_sets(GP, S, M, E, X, u, l, y_max, beta) 54 | i = self.get_new_query_point(M, E, u, l) 55 | if i == 0: 56 | break 57 | GP.push(X[i], f(X[i])) 58 | 59 | # return the best point 60 | u, l = self.update_confidence_intervals(GP, X, u, l, beta) 61 | S = (u <= y_max) 62 | if np.any(S): 63 | i_best = np.argmin(u[S]) 64 | u_best = u[S][i_best] 65 | i_best = np.where(i_best == np.cumsum(S))[0][0] 66 | return (u_best, i_best) 67 | return (None, 0) 68 | 69 | def update_confidence_intervals(self, 70 | GP: GaussianProcess, 71 | X: np.ndarray, 72 | u: np.ndarray, 73 | l: np.ndarray, 74 | beta: float) -> tuple[np.ndarray, np.ndarray]: 75 | """ 76 | A method for updating the lower and upper bounds used in SafeOpt, which 77 | takes the Gaussian process `GP`, the finite search space `X`, the upper- 78 | and lower-bound vectors `u` and `l`, and the confidence scalar `beta`. 79 | """ 80 | mu_p, v_p = GP.predict(X) 81 | u = mu_p + np.sqrt(beta * v_p) 82 | l = mu_p - np.sqrt(beta * v_p) 83 | return (u, l) 84 | 85 | def compute_sets(self, 86 | GP: GaussianProcess, 87 | S: np.ndarray, 88 | M: np.ndarray, 89 | E: np.ndarray, 90 | X: np.ndarray, 91 | u: np.ndarray, 92 | l: np.ndarray, 93 | y_max: float, 94 | beta: float) -> tuple[np.ndarray, np.ndarray, np.ndarray]: 95 | """ 96 | A method for updating the safe `S`, minimizer `M`, and expander `E` sets 97 | used in SafeOpt. The sets are all Boolean vectors indicating whether the 98 | corresponding design point in `X` is in the set. The method also takes 99 | the Gaussian process `GP`, the upper and lower bounds `u` and `l`, 100 | respectively, the safety threshold `y_max`, and the confidence scalar 101 | `beta`. 102 | """ 103 | M.fill(False) 104 | E.fill(False) 105 | 106 | # safe set 107 | S = (u <= y_max) 108 | 109 | if np.any(S): 110 | # potential minimizers 111 | M[S] = (l[S] < np.min(u[S])) 112 | 113 | # maximum width (in M) 114 | w_max = np.max(u[M] - l[M]) 115 | 116 | # expanders - skip values in M or those with w <= w_max 117 | E = S & ~M # skip points in M 118 | if np.any(E): 119 | E[E] = (np.max(u[E] - l[E]) > w_max) 120 | for (i, e) in enumerate(E): 121 | if e and (u[i] - l[i] > w_max): 122 | GP.append(X[i], l[i]) 123 | mu_p, v_p = GP.predict(X[~S]) 124 | GP.pop() 125 | E[i] = np.any(mu_p + np.sqrt(beta * v_p) >= y_max) 126 | if E[i]: 127 | w_max = u[i] - l[i] 128 | 129 | return (S, M, E) 130 | 131 | def get_new_query_point(self, M: np.ndarray, E: np.ndarray, u: np.ndarray, l: np.ndarray) -> int: 132 | """ 133 | A method for obtaining the next query point in SafeOpt. The index of the 134 | point in `X` with the greatest width is returned. 135 | """ 136 | ME = M | E 137 | if np.any(ME): 138 | v = np.argmax(u[ME] - l[ME]) 139 | return np.where(v == np.cumsum(ME))[0][0] 140 | return 0 141 | -------------------------------------------------------------------------------- /src/ch11.py: -------------------------------------------------------------------------------- 1 | """Chapter 11: Linear Constrainted Optimization""" 2 | 3 | import numpy as np 4 | 5 | class LinearProgram(): 6 | """ 7 | A linear program in equality form: 8 | 9 | minimize c'x 10 | subject to: Ax = b 11 | x >= 0 12 | """ 13 | def __init__(self, A: np.ndarray, b: np.ndarray, c: np.ndarray): 14 | self.A = A 15 | self.b = b 16 | self.c = c 17 | 18 | def get_vertex(self, B: np.ndarray) -> np.ndarray: 19 | """A method for extracting the vertex associated with a partition `B` and an LP `self`""" 20 | b_inds = np.sort(B) 21 | AB = self.A[:, b_inds] 22 | xB = np.linalg.solve(AB, self.b) 23 | x = np.zeros(len(self.c)) 24 | x[b_inds] = xB 25 | return x 26 | 27 | def edge_transition(self, B: np.ndarray, q: int) -> tuple[int, float]: 28 | """ 29 | A method for computing the index `p` and the new coordinate value `x_q_prime` 30 | obtained by increasing index `q` of the vertex defined by the partition 31 | `B` in the equality-form linear program. 32 | """ 33 | A, b = self.A, self.b 34 | n = A.shape[1] 35 | b_inds = np.sort(B) 36 | n_inds = np.setdiff1d(np.arange(n), B) 37 | AB = A[:, b_inds] 38 | d, xB = np.linalg.solve(AB, A[:, n_inds[q]]), np.linalg.solve(AB, b) 39 | 40 | p, xq_prime = 0, np.inf 41 | for i in range(len(d)): 42 | if d[i] > 0: 43 | v = xB[i] / d[i] 44 | if v < xq_prime: 45 | p, xq_prime = i, v 46 | 47 | return (p, xq_prime) 48 | 49 | def step(self, B: np.ndarray) -> tuple[np.ndarray, bool]: 50 | """ 51 | A single iteration of the simplex algorithm in which the set `B` 52 | is moved from one vertex to a neighbor while maximally decreasing the 53 | objective function. The function takes a partition defined by `B`. 54 | """ 55 | A, b, c = self.A, self.b, self.c 56 | n = A.shape[1] 57 | b_inds = np.sort(B) 58 | n_inds = np.setdiff1d(np.arange(n), B) 59 | AB, AV = A[:, b_inds], A[:, n_inds] 60 | # xB = np.linalg.solve(AB, b) # TODO - never used? 61 | cB = c[b_inds] 62 | lam = np.linalg.solve(AB.T, cB) 63 | cV = c[n_inds] 64 | muV = cV - AV.T @ lam 65 | 66 | q, p, xq_prime, delta = 0.0, 0.0, np.inf, np.inf 67 | for i in range(len(muV)): 68 | if muV[i] < 0: 69 | pi, xi_prime = self.edge_transition(B, i) 70 | if muV[i] * xi_prime < delta: 71 | q, p, xq_prime, delta = i, pi, xi_prime, muV[i]*xi_prime 72 | if q == 0: 73 | return (B, True) # optimal point found 74 | 75 | if np.isinf(xq_prime): 76 | raise ValueError("unbounded") 77 | 78 | j = np.where(B == b_inds[p])[0][0] 79 | B[j] = n_inds[q] # swap indices 80 | return (B, False) # new vertex but not optimal 81 | 82 | def minimize_given_vertex_partition(self, B: np.ndarray) -> np.ndarray: 83 | """Minimizing a linear program given a vertex partition defined by `B`.""" 84 | done = False 85 | while not done: 86 | B, done = self.step(B) 87 | return B 88 | 89 | def minimize(self, return_idcs=False) -> np.ndarray: 90 | """ 91 | The simplex algorithm for solving linear programs in equality form 92 | when an initial partition is not known. 93 | """ 94 | A, b, c = self.A, self.b, self.c # TODO - c is not necessary? 95 | m, n = A.shape 96 | z = np.ones(m) 97 | Z = np.diag([1 if j >= 0 else -1 for j in b]) 98 | 99 | A_prime = np.hstack([A, Z]) 100 | b_prime = b 101 | c_prime = np.concatenate((np.zeros(n), z)) 102 | LP_init = LinearProgram(A_prime, b_prime, c_prime) 103 | B = np.arange(1, m + 1) + n 104 | B = LP_init.minimize_given_vertex_partition(B) 105 | 106 | if np.any(B > n): 107 | raise ValueError("infeasible") 108 | 109 | A_prime_prime = np.vstack([np.hstack([A, np.eye(m)]), 110 | np.hstack([np.zeros((m, n)), np.eye(m)])]) 111 | b_prime_prime = np.concatenate((b, np.zeros(m))) 112 | c_prime_prime = c_prime 113 | LP_opt = LinearProgram(A_prime_prime, b_prime_prime, c_prime_prime) 114 | B = LP_opt.minimize_given_vertex_partition(B) 115 | x_opt = LP_opt.get_vertex(B)[:n] 116 | if return_idcs: 117 | b_inds = np.sort(B) 118 | n_inds = np.setdiff1d(np.arange(n), B) 119 | return x_opt, b_inds, n_inds 120 | return x_opt 121 | 122 | def dual_certificate(self, x: np.ndarray, lam: np.ndarray, eps: float = 1e-6) -> bool: 123 | """ 124 | A method for checking whether a candidate solution given by design point 125 | `x` and dual point `lam` for the linear program is optimal. The 126 | parameter `eps` controls the tolerance for the equality constraint. 127 | """ 128 | A, b, c = self.A, self.b, self.c 129 | primal_feasible = np.all(x >= 0) and np.all(np.isclose(A @ x, b)) 130 | dual_feasible = np.all(A.T @ lam <= c) 131 | return primal_feasible and dual_feasible and np.isclose(np.dot(c, x), np.dot(b, lam), atol=eps) 132 | 133 | def minimize_lp_and_y(self) -> tuple[np.ndarray, float]: 134 | """ 135 | (From Chapter 19) Solves an LP and returns both the solutions and its 136 | value. An infeasible LP produces a `NaN` solution and an `np.inf` value. 137 | """ 138 | try: 139 | x = self.minimize() 140 | return (x, np.dot(x, self.c)) 141 | except ValueError: 142 | return (np.full(len(self.c), np.nan), np.inf) -------------------------------------------------------------------------------- /src/ch04.py: -------------------------------------------------------------------------------- 1 | """Chapter 4: Local Descent""" 2 | 3 | import cvxpy as cp 4 | import numpy as np 5 | import warnings 6 | 7 | from scipy.optimize import brent 8 | from typing import Callable 9 | 10 | from ch03 import bracket_minimum 11 | 12 | warnings.simplefilter(action='ignore', category=FutureWarning) 13 | 14 | 15 | def line_search(f: Callable[[np.ndarray], float], 16 | x: np.ndarray, 17 | d: np.ndarray, 18 | minimize: Callable[[Callable, float, float], float] = lambda f,a,b: brent(f, brack=(a, b)) 19 | ) -> np.ndarray: 20 | """ 21 | A method for conducting a line search, which finds the optimal step factor 22 | along a descent direction `d` from design point `x` to minimize function `f`. 23 | The `minimize` function can be implemented using a univariate optimization 24 | algorithm such as the Brent-Dekker method. 25 | """ 26 | def objective(alpha): return f(x + alpha*d) 27 | a, b = bracket_minimum(objective) 28 | alpha = minimize(objective, a, b) 29 | return x + alpha*d 30 | 31 | 32 | def backtracking_line_search(f: Callable[[np.ndarray], float], 33 | grad_f: Callable[[np.ndarray], np.ndarray], 34 | x: np.ndarray, 35 | d: np.ndarray, 36 | alpha: float, 37 | p: float = 0.5, 38 | beta: float = 1e-4) -> float: 39 | """ 40 | The backtracking line search algorithm, which takes objective function `f`, 41 | its gradient `grad_f`, the current design point `x`, a descent direction `d`, 42 | and the maximum step size `alpha`. We can optionally specify the reduction 43 | factor `p` and the first Wolfe condition parameter `beta`. 44 | """ 45 | y, g = f(x), grad_f(x) 46 | while f(x + alpha*d) > y + beta*alpha*np.dot(g, d): 47 | alpha *= p 48 | return alpha 49 | 50 | 51 | def strong_backtracking(f: Callable[[np.ndarray], float], 52 | grad_f: Callable[[np.ndarray], np.ndarray], 53 | x: np.ndarray, 54 | d: np.ndarray, 55 | alpha: float = 1.0, 56 | beta: float = 1e-4, 57 | sigma: float = 0.1) -> float: 58 | """ 59 | Strong backtracking approximate line search for satisfying the strong Wolfe 60 | conditions. It takes as input the objective function `f`, the gradient 61 | function `grad_f`, the design point `x` and direction `d` from which line 62 | search is conducted, an initial step size `alpha`, and the Wolfe condition 63 | parameters `beta` and `sigma`. The algorithm's bracket phase first brackets 64 | an interval containing a step size that satisfies the strong Wolfe conditions. 65 | It then reduces this bracketed interval in the zoom phase until a suitable 66 | step size is found. We interpolate with bisection, but other schemes can be 67 | used. 68 | """ 69 | y_0, g_0, y_prev, alpha_prev = f(x), np.dot(grad_f(x), d), np.nan, 0 70 | alpha_lo, alpha_hi = np.nan, np.nan 71 | 72 | # Bracket Phase 73 | while True: 74 | y = f(x + alpha*d) 75 | if (y > y_0 + beta*alpha*g_0) or ((not np.isnan(y_prev)) and (y >= y_prev)): 76 | alpha_lo, alpha_hi = alpha_prev, alpha 77 | break 78 | g = np.dot(grad_f(x + alpha*d), d) 79 | if abs(g) <= -sigma*g_0: 80 | return alpha 81 | elif g >= 0: 82 | alpha_lo, alpha_hi = alpha, alpha_prev 83 | break 84 | y_prev, alpha_prev, alpha = y, alpha, 2*alpha 85 | 86 | # Zoom Phase 87 | y_lo = f(x + alpha_lo*d) 88 | while True: 89 | alpha = (alpha_lo + alpha_hi) / 2 90 | y = f(x + alpha*d) 91 | if (y > y_0 + beta*alpha*g_0) or (y >= y_lo): 92 | alpha_hi = alpha 93 | else: 94 | g = np.dot(grad_f(x + alpha*d), d) 95 | if abs(g) <= -sigma*g_0: 96 | return alpha 97 | elif g*(alpha_hi - alpha_lo) >= 0: 98 | alpha_hi = alpha_lo 99 | alpha_lo = alpha 100 | 101 | 102 | def solve_trust_region_subproblem(grad_f: Callable[[np.ndarray], np.ndarray], 103 | H: Callable[[np.ndarray], np.ndarray], 104 | x0: np.ndarray, 105 | delta: float) -> tuple[np.ndarray, float]: 106 | """We have provided an example implementation of `solve_trust_region_subproblem` 107 | that uses a second-order Taylor approximation about `x0` with a circular trust region.""" 108 | x = cp.Variable(len(x0)) 109 | objective = cp.Minimize((grad_f(x0) @ (x - x0)) + (cp.quad_form(x - x0, H(x0)) / 2)) 110 | constraints = [cp.norm(x - x0) <= delta] 111 | problem = cp.Problem(objective, constraints) 112 | problem.solve() 113 | return (x.value, problem.value) 114 | 115 | 116 | def trust_region_descent(f: Callable[[np.ndarray], float], 117 | grad_f: Callable[[np.ndarray], np.ndarray], 118 | H: Callable[[np.ndarray], np.ndarray], 119 | x: np.ndarray, 120 | k_max: int, 121 | eta_1: float = 0.25, 122 | eta_2: float = 0.5, 123 | gamma_1: float = 0.5, 124 | gamma_2: float = 2.0, 125 | delta: float = 1.0, 126 | solve_trust_region_subproblem: Callable[[Callable, Callable, np.ndarray, float], tuple[np.ndarray, float]] = solve_trust_region_subproblem 127 | ) -> np.ndarray: 128 | """ 129 | The trust region descent method, where `f` is the objective function, 130 | `grad_f` produces the derivative, `H` produces the Hessian, `x` is an initial 131 | design point, and `k_max` is the number of iterations. The optional parameters 132 | `eta_1` and `eta_2` determine when the trust region radius `delta` is increased 133 | or decreased, and `gamma_` and `gamma_2` control the magnitude of the change. 134 | An implementation for `solve_trust_region_subproblem` must be provided that 135 | solves equation (4.10) in the texbook. 136 | """ 137 | y = f(x) 138 | for _ in range(k_max): 139 | x_prime, y_prime = solve_trust_region_subproblem(grad_f, H, x, delta) 140 | r = (y - f(x_prime)) / (y - y_prime) 141 | if r < eta_1: 142 | delta *= gamma_1 143 | else: 144 | x, y = x_prime, y_prime 145 | if r > eta_2: 146 | delta *= gamma_2 147 | return x 148 | -------------------------------------------------------------------------------- /src/ch03.py: -------------------------------------------------------------------------------- 1 | """Chapter 3: Bracketing""" 2 | 3 | import numpy as np 4 | 5 | from typing import Callable 6 | 7 | 8 | PHI = (1 + np.sqrt(5))/2 # golden ratio 9 | 10 | 11 | def bracket_minimum(f: Callable[[float], float], 12 | x: float = 0.0, 13 | s: float = 1e-2, 14 | k: float = 2.0) -> tuple[float, float]: 15 | """ 16 | An algorithm for bracketing an interval in which a local minimum must exist. 17 | It takes as input a univariate function `f` and starting position `x`, which 18 | defaults to 0.0. The starting step size `s` and the expansion factor `k` can 19 | be specified. It returns a tuple containing the new interval [a, b]. 20 | """ 21 | a, y_a = x, f(x) 22 | b, y_b = a + s, f(a + s) 23 | if y_b > y_a: 24 | a, b, = b, a 25 | y_a, y_b = y_b, y_a 26 | s = -s 27 | while True: 28 | c, y_c = b + s, f(b + s) 29 | if y_c > y_b: 30 | return (a, c) if a < c else (c, a) 31 | a, y_a, b, y_b = b, y_b, c, y_c 32 | s *= k 33 | 34 | 35 | def fibonacci_search(f: Callable[[float], float], 36 | a: float, 37 | b: float, 38 | n: int, 39 | eps: float = 0.01) -> tuple[float, float]: 40 | """ 41 | Fibonacci search to be run on univariate function `f`, with bracketing 42 | interval `[a, b]` for n > 1 function evaluations. It returns the new 43 | interval [a, b]. The optimal parameter `eps` controls the lowest-level 44 | interval. 45 | """ 46 | s = (1 - np.sqrt(5)) / (1 + np.sqrt(5)) 47 | p = 1 / ((PHI*(1 - (s**(n + 1)))) / (1 - (s**n))) 48 | d = p*b + (1 - p)*a 49 | y_d = f(d) 50 | for i in range(1, n): 51 | if i == n - 1: 52 | c = eps*a + (1 - eps)*d 53 | else: 54 | c = p*a + (1 - p)*b 55 | y_c = f(c) 56 | if y_c < y_d: 57 | b, d, y_d = d, c, y_c 58 | else: 59 | a, b = b, c 60 | p = 1 / ((PHI*(1 - (s**(n - i + 1)))) / (1 - (s**(n - i)))) 61 | return (a, b) if a < b else (b, a) 62 | 63 | 64 | def golden_section_search(f: Callable[[float], float], 65 | a: float, 66 | b: float, 67 | n: int) -> tuple[float, float]: 68 | """ 69 | Golden section search to be run on a univariate function `f`, with 70 | bracketing interval [a, b], for n > 1 function evaluations. It returns the 71 | new interval (a, b). Guaranteeing convergence to within `eps` requires 72 | n = (b - a)/(eps*ln(PHI)) iterations. 73 | """ 74 | p = PHI - 1 75 | d = p*b + (1 - p)*a 76 | y_d = f(d) 77 | for _ in range(1, n): 78 | c = p*a + (1 - p)*b 79 | y_c = f(c) 80 | if y_c < y_d: 81 | b, d, y_d = d, c, y_c 82 | else: 83 | a, b = b, c 84 | return (a, b) if a < b else (b, a) 85 | 86 | 87 | def quadratic_fit_search(f: Callable[[float], float], 88 | a: float, 89 | b: float, 90 | c: float, 91 | n: int) -> tuple[float, float, float]: 92 | """ 93 | Quadratic fit search to be run on univariate function `f`, with bracketing 94 | interval [a, c] with a < b < c. The method will run for `n` function 95 | evaluations. It returns the new bracketing values as a tuple, `(a, b, c)`. 96 | """ 97 | y_a, y_b, y_c = f(a), f(b), f(c) 98 | for i in range(1, n - 2): 99 | x = 0.5 * (y_a*(b**2 - c**2) + y_b*(c**2 - a**2) + y_c*(a**2 - b**2)) /\ 100 | (y_a*(b - c) + y_b*(c - a) + y_c*(a - b)) 101 | y_x = f(x) 102 | if x > b: 103 | if y_x > y_b: 104 | c, y_c = x, y_x 105 | else: 106 | a, y_a, b, y_b = b, y_b, x, y_x 107 | elif x < b: 108 | if y_x > y_b: 109 | a, y_a = x, y_x 110 | else: 111 | c, y_c, b, y_b = b, y_b, x, y_x 112 | return (a, b, c) 113 | 114 | 115 | def shubert_piyavskii(f: Callable[[float], float], 116 | a: float, 117 | b: float, 118 | l: float, 119 | eps: float, 120 | delta: float = 0.01) -> tuple[np.ndarray, list[tuple[float, float]]]: 121 | """ 122 | The Shubert-Piyavskii method to be run on univariate function `f`, with 123 | bracketing interval `a` < `b` and Lipschitz constant `l`. The algorithm runs 124 | until the update is less than the tolerance `eps`. Both the best point and 125 | the set of uncertainty intervals are returned. The uncertainty intervals are 126 | returned as an array of `(a, b)` tuples. The parameter `delta` is a 127 | tolerance used to merge the uncertainty intervals. 128 | """ 129 | def _get_sp_intersection(A: np.ndarray, B: np.ndarray, l: float) -> np.ndarray: 130 | t = ((A[1] - B[1]) - l*(A[0] - B[0])) / (2*l) 131 | return np.array([A[0] + t, A[1] - t*l]) 132 | 133 | m = (a + b) / 2 134 | A, M, B = np.array([a, f(a)]), np.array([m, f(m)]), np.array([b, f(b)]) 135 | pts = np.array([A, _get_sp_intersection(A, M, l), 136 | M, _get_sp_intersection(M, B, l), 137 | B]) 138 | Delta = np.inf 139 | while Delta > eps: 140 | i = np.argmin(pts[:, 1]) 141 | P = np.array([pts[i, 0], f(pts[i, 0])]) 142 | Delta = P[1] - pts[i, 1] 143 | 144 | P_prev = _get_sp_intersection(pts[i - 1], P, l) 145 | P_next = _get_sp_intersection(P, pts[i + 1], l) 146 | 147 | pts = np.delete(pts, i, axis=0) 148 | pts = np.insert(pts, i, P_next, axis=0) 149 | pts = np.insert(pts, i, P, axis=0) 150 | pts = np.insert(pts, i, P_prev, axis=0) 151 | 152 | intervals = [] 153 | P_min = pts[2 * np.argmin(pts[::2, 1])] 154 | y_min = P_min[1] 155 | for i in range(1, len(pts), 2): 156 | if pts[i, 1] < y_min: 157 | dy = y_min - pts[i, 1] 158 | x_lo = np.maximum(a, pts[i, 0] - (dy/l)) 159 | x_hi = np.minimum(b, pts[i, 0] + (dy/l)) 160 | if (len(intervals) != 0) and (intervals[-1][1] + delta >= x_lo): 161 | intervals[-1] = (intervals[-1][0], x_hi) 162 | else: 163 | intervals.append((x_lo, x_hi)) 164 | 165 | return (P_min, intervals) 166 | 167 | 168 | def bisection(f_prime: Callable[[float], float], 169 | a: float, 170 | b: float, 171 | eps: float) -> tuple[float, float]: 172 | """ 173 | The bisection algorithm where `f_prime` is the derivative of the univariate 174 | function we seek to optimize. We have a < b that bracket a zero of `f_prime`. 175 | The interval width tolerance is `eps`. Calling `bisection` returns the new 176 | bracketed interval [a, b] as a tuple. 177 | """ 178 | a, b = (b, a) if a > b else (a, b) # ensure a < b 179 | 180 | y_a, y_b = f_prime(a), f_prime(b) 181 | b = a if y_a == 0 else b 182 | a = b if y_b == 0 else a 183 | 184 | while (b - a > eps): 185 | x = (a + b) / 2 186 | y = f_prime(x) 187 | if y == 0: 188 | a, b = x, x 189 | elif np.sign(y) == np.sign(y_a): 190 | a = x 191 | else: 192 | b = x 193 | 194 | return (a, b) 195 | 196 | 197 | def bracket_sign_change(f_prime: Callable[[float], float], 198 | a: float, 199 | b: float, 200 | k: float = 2.0) -> tuple[float, float]: 201 | """ 202 | An algorithm for finding an interval in which a sign change occurs. The 203 | inputs are the real-valued function `f_prime` defined on the real numbers, 204 | and starting interval [a, b]. It returns the new interval as a tuple by 205 | expanding the interval width until there is a sign change between the 206 | function evaluated at the interval bounds. The expansion default factor `k` 207 | defaults to 2.0. 208 | """ 209 | a, b = (b, a) if a > b else (a, b) # ensure a < b 210 | 211 | center, half_width = (b + a) / 2, (b - a) / 2 212 | while (f_prime(a) * f_prime(b) > 0): 213 | half_width *= k 214 | a = center - half_width 215 | b = center + half_width 216 | 217 | return (a, b) 218 | -------------------------------------------------------------------------------- /src/figures/ch07_figures.py: -------------------------------------------------------------------------------- 1 | import sys; sys.path.append('./src/'); sys.path.append('../') 2 | 3 | import matplotlib.pyplot as plt 4 | from matplotlib.patches import Polygon 5 | import numpy as np 6 | 7 | from ch04 import line_search 8 | from ch07 import basis 9 | from TestFunctions import booth, wheeler 10 | from convenience import plot_contour 11 | 12 | 13 | def figure_7_1(n_steps: int = 6): 14 | """ 15 | Figure 7.1: Cyclic coordinate descent alternates between coordinate directions. 16 | """ 17 | x = np.array([10.0, -5.0]) # Starting point 18 | n = len(x) 19 | 20 | fig = plt.figure() 21 | plot_contour(fig, booth, xlim=(-10.5, 10.5), ylim=(-10.1, 10.1), xstride=0.01, ystride=0.01, levels=[0, 1, 5, 10, 20, 50, 100, 200, 500, 1000]) 22 | for _ in range(n_steps): 23 | for i in range(n): 24 | d = basis(i, n) 25 | x_next = line_search(booth, x, d) 26 | plt.plot([x[0], x_next[0]], [x[1], x_next[1]], c="black") 27 | x = x_next 28 | plt.title("Figure 7.1") 29 | plt.show() 30 | 31 | 32 | def figure_7_3(n_steps: int = 6): 33 | """ 34 | Figure 7.3: Adding the acceleration step to cyclic coordinate descent helps 35 | traverse valleys. Six steps are shown for both the original and accelerated 36 | versions. 37 | """ 38 | x = np.array([10.0, -5.0]) # Starting point (Original) 39 | x_accel = x.copy() # Starting point (Accelerated) 40 | n = len(x) 41 | 42 | fig = plt.figure() 43 | plot_contour(fig, booth, xlim=(-10.5, 10.5), ylim=(-10.1, 10.1), xstride=0.01, ystride=0.01, levels=[0, 1, 5, 10, 20, 50, 100, 200, 500, 1000]) 44 | for _ in range(n_steps): 45 | x_accel_prev = x_accel.copy() 46 | for i in range(n): 47 | d = basis(i, n) 48 | # Original 49 | x_next = line_search(booth, x, d) 50 | plt.plot([x[0], x_next[0]], [x[1], x_next[1]], c="tab:blue") 51 | x = x_next 52 | 53 | # Accelerated 54 | x_accel_next = line_search(booth, x_accel, d) 55 | plt.plot([x_accel[0], x_accel_next[0]], [x_accel[1], x_accel_next[1]], c="tab:red") 56 | x_accel = x_accel_next 57 | # Acceleration Step 58 | x_accel_next = line_search(booth, x_accel, x_accel - x_accel_prev) 59 | plt.plot([x_accel[0], x_accel_next[0]], [x_accel[1], x_accel_next[1]], c="tab:red") 60 | x_accel = x_accel_next 61 | plt.legend(labels=["original", "accelerated"], loc="lower left") 62 | plt.title("Figure 7.3") 63 | plt.show() 64 | 65 | 66 | def figure_7_4(): 67 | """ 68 | Figure 7.4: Powell's method starts the same as cyclic coordinate descent but 69 | iteratively learns conjugate directions. 70 | """ 71 | x = np.array([10.0, -5.0]) # Starting point 72 | n = len(x) 73 | U = np.eye(n) 74 | 75 | fig = plt.figure() 76 | plot_contour(fig, wheeler, xlim=(-10.5, 10.5), ylim=(-10.1, 10.1), xstride=0.01, ystride=0.01, levels=[0, 1, 5, 10, 20, 50, 100, 200, 500, 1000]) 77 | for _ in range(2): 78 | x_prime = x.copy() 79 | for i in range(n): 80 | d = U[i] 81 | x_prime_next = line_search(booth, x_prime, d) 82 | plt.plot([x_prime[0], x_prime_next[0]], [x_prime[1], x_prime_next[1]], c="black") 83 | x_prime = x_prime_next 84 | for i in range(n - 1): 85 | U[i] = U[i + 1] 86 | U[n - 1] = d = x_prime - x 87 | x_prime_next = line_search(booth, x_prime, d) 88 | plt.plot([x_prime[0], x_prime_next[0]], [x_prime[1], x_prime_next[1]], c="black") 89 | x = x_prime_next 90 | plt.title("Figure 7.4") 91 | plt.show() 92 | 93 | 94 | def figure_7_5(n_steps: int = 4): 95 | """ 96 | Figure 7.5: The Hooke-Jeeves method, proceeding left to right. It begins 97 | with a large step size but then reduces it once it cannot improve by taking 98 | a step in any coordinate direction. 99 | """ 100 | alpha, gamma = 0.5, 0.5 101 | x = np.array([0.7, 0.9]) # Starting point 102 | y, n = wheeler(x), len(x) 103 | 104 | fig = plt.figure(figsize=(5*n_steps, 5)) 105 | for i in range(1, n_steps + 1): 106 | cont_ax = plot_contour(fig, wheeler, xlim=(-0.1, 3.0), ylim=(-0.1, 3.0), xstride=0.01, ystride=0.01, levels=np.arange(-1.0, -0.0, 0.1), subplot_coords=(1,n_steps,i)) 107 | cont_ax.scatter([x[0]], [x[1]], c='black', s=30.0) 108 | improved = False 109 | x_best, y_best = x, y 110 | for j in range(n): 111 | for sgn in [-1, 1]: 112 | x_prime = x + sgn*alpha*basis(j, n) 113 | cont_ax.scatter([x_prime[0]], [x_prime[1]], c='black', s=10.0, zorder=2) 114 | y_prime = wheeler(x_prime) 115 | if y_prime < y_best: 116 | x_best, y_best, improved = x_prime, y_prime, True 117 | x, y = x_best, y_best 118 | if not improved: 119 | alpha *= gamma 120 | plt.suptitle("Figure 7.5", y=0.78) 121 | plt.subplots_adjust(wspace=0.25) 122 | plt.show() 123 | 124 | def figure_7_5_gps(n_steps: int = 4): 125 | """ 126 | Similar to Figure 7.5, but Generalized Pattern Search is used instead of 127 | the Hooke-Jeeves Method 128 | """ 129 | alpha, gamma = 0.5, 0.5 130 | D = np.array([[1, 0], [0, 1], [-1, -1]]) # positive spanning set 131 | x = np.array([0.7, 0.9]) # Starting point 132 | y = wheeler(x) 133 | 134 | fig = plt.figure(figsize=(5*n_steps, 5)) 135 | for i in range(1, n_steps + 1): 136 | cont_ax = plot_contour(fig, wheeler, xlim=(-0.1, 3.0), ylim=(-0.1, 3.0), xstride=0.01, ystride=0.01, levels=np.arange(-1.0, -0.0, 0.1), subplot_coords=(1,n_steps,i)) 137 | cont_ax.scatter([x[0]], [x[1]], c='black', s=30.0) 138 | improved = False 139 | for j, d in enumerate(D): 140 | x_prime = x + alpha * d 141 | cont_ax.scatter([x_prime[0]], [x_prime[1]], c='black', s=10.0, zorder=2) 142 | y_prime = wheeler(x_prime) 143 | if y_prime < y: 144 | x, y, improved = x_prime, y_prime, True 145 | D = np.insert(np.delete(D, j, axis=0), 0, d, axis=0) 146 | break 147 | if not improved: 148 | alpha *= gamma 149 | plt.suptitle("Figure 7.5 (w/ Generalized Pattern Search)", y=0.78) 150 | plt.subplots_adjust(wspace=0.25) 151 | plt.show() 152 | 153 | def figure_7_11(): 154 | """ 155 | Figure 7.11: The Nelder-Mead method, proceeding left to right and top to bottom. 156 | """ 157 | S = np.array([[0.7, 1.4], [0.7, 0.9], [0.4, 0.7]]) 158 | triangles = [S.copy()] 159 | f = wheeler 160 | alpha, beta, gamma = 1.0, 2.0, 0.5 161 | 162 | fig = plt.figure(figsize=(20, 15)) 163 | y_arr = np.apply_along_axis(f, 1, S) 164 | for j in range(1, 12 + 1): 165 | cont_ax = plot_contour(fig, wheeler, xlim=(-0.1, 3.0), ylim=(-0.1, 3.0), xstride=0.01, ystride=0.01, levels=np.arange(-1.0, -0.0, 0.1), subplot_coords=(3,4,j)) 166 | cont_ax.tick_params(axis="both", which="both", bottom=False, left=False, labelbottom=False, labelleft=False) 167 | cont_ax.set_xlabel(None) 168 | cont_ax.set_ylabel(None) 169 | for k, triangle in enumerate(triangles): 170 | cont_ax.add_patch(Polygon(triangle, fill=False, ec="black", alpha=0.75**(len(triangles) - k - 1))) 171 | p = np.argsort(y_arr) # sort lowest to highest 172 | S, y_arr = S[p], y_arr[p] 173 | xl, yl = S[0], y_arr[0] # lowest 174 | xh, yh = S[-1], y_arr[-1] # highest 175 | xs, ys = S[-2], y_arr[-2] # second-highest 176 | xm = np.mean(S[:-1], axis=0) # centroid 177 | xr = xm + alpha * (xm - xh) # reflection point 178 | yr = f(xr) 179 | 180 | if yr < yl: 181 | xe = xm + beta * (xr - xm) # expansion point 182 | ye = f(xe) 183 | S[-1], y_arr[-1] = (xe, ye) if ye < yr else (xr, yr) 184 | elif yr >= ys: 185 | if yr < yh: 186 | xh, yh, S[-1], y_arr[-1] = xr, yr, xr, yr 187 | xc = xm + gamma * (xh - xm) # contraction point 188 | yc = f(xc) 189 | if yc > yh: 190 | for i in range(1, len(y_arr)): 191 | S[i] = (S[i] + xl) / 2 192 | y_arr[i] = f(S[i]) 193 | else: 194 | S[-1], y_arr[-1] = xc, yc 195 | else: 196 | S[-1], y_arr[-1] = xr, yr 197 | triangles.append(S.copy()) 198 | plt.suptitle("Figure 7.11", fontsize=20, y=0.91) 199 | plt.subplots_adjust(wspace=0.05, hspace=0.05) 200 | plt.show() 201 | 202 | def figure_7_20(): 203 | raise NotImplementedError # TODO 204 | -------------------------------------------------------------------------------- /src/ch13.py: -------------------------------------------------------------------------------- 1 | """Chapter 13: Sampling Plans""" 2 | 3 | import numpy as np 4 | 5 | from abc import abstractmethod 6 | from itertools import product 7 | from numpy import ndarray 8 | from primePy import primes 9 | from typing import Callable 10 | 11 | from ch03 import PHI 12 | 13 | # TODO - Rethink the classing in this chapter 14 | 15 | 16 | class SamplingPlan(): 17 | def __init__(self, *args): 18 | assert len(args) == 1 19 | self.X = args[0] # array of points in sampling plan 20 | 21 | def pairwise_distances(self, p: float = 2) -> np.ndarray: 22 | """ 23 | A function for obtaining the list of pairwise distances between points in 24 | sampling plan `self` using the L_p norm specified by `p`. 25 | """ 26 | m = len(self.X) 27 | return np.array([np.linalg.norm(self.X[i] - self.X[j], p) for i in range(m - 1) for j in range(i, m)]) 28 | 29 | def compare(self, other: 'SamplingPlan', p: float = 2) -> int: 30 | """ 31 | A function for comparing the degree to which two sampling plans `self` 32 | and `other` are space-filling using the L_p norm specified by `p`. 33 | 34 | The function returns: * -1, if `self` is more space-filling than `other` 35 | * 1, if `self` is more space-filling than `other` 36 | * 0, if they are equivalent 37 | """ 38 | p_self = np.sort(self.pairwise_distances(p)) 39 | p_other = np.sort(other.pairwise_distances(p)) 40 | for (d_self, d_other) in zip(p_self, p_other): 41 | if d_self < d_other: 42 | return 1 43 | elif d_self > d_other: 44 | return -1 45 | return 0 46 | 47 | def phiq(self, q: float = 1, p: float = 2) -> float: 48 | """ 49 | An implementation of the Morris-Mitchell criterion which takes a list of 50 | design points `X`, the criterion parameter `q` > 0, and a norm parameter 51 | `p` >= 1. 52 | """ 53 | dists = self.pairwise_distances(p) 54 | return np.sum(dists**(-q))**(1/q) 55 | 56 | def copy(self) -> 'SamplingPlan': 57 | return SamplingPlan(np.copy(self.X)) 58 | 59 | def append(self, x: np.ndarray): 60 | self.X = np.append(self.X, x) 61 | 62 | def __contains__(self, x: np.ndarray) -> bool: 63 | return x in self.X 64 | 65 | def __iter__(self): 66 | for x in self.X: 67 | yield x 68 | 69 | def __getitem__(self, key): 70 | return self.X[key] 71 | 72 | def __setitem__(self, key, value): 73 | self.X[key] = value 74 | 75 | 76 | class FullFactorialPlan(SamplingPlan): 77 | """ 78 | A function for obtaining all sample locations for the full factorial grid. 79 | Here, `a` is a vector of variable lower bounds, `b` is a vector of variable 80 | upper bounds, and `m` is a vector of sample counts for each dimension. 81 | """ 82 | def __init__(self, a: np.ndarray, b: np.ndarray, m: np.ndarray): 83 | ranges = [np.linspace(a[i], stop=b[i], num=m[i]) for i in range(len(a))] 84 | X = np.array(list(product(*ranges))) 85 | super().__init__(X) 86 | 87 | 88 | class UniformProjectionPlan(SamplingPlan): 89 | """ 90 | A function for constructing a uniform projection plan for an `n`-dimensional 91 | hypercube with `m` samples per dimension. It returns a vector of index vectors. 92 | """ 93 | def __init__(self, m: int, n: int): 94 | perms = [np.random.permutation(m) for _ in range(n)] 95 | X = np.array([[perms[i][j] for i in range(n)] for j in range(m)]) 96 | super().__init__(X) 97 | 98 | def mutate(self): 99 | """ 100 | A function for mutating uniform projection plan `X`, while maintaining 101 | its uniform projection property. 102 | """ 103 | m, n = self.X.shape 104 | j = np.random.randint(n) 105 | i = np.random.permutation(m)[:2] 106 | self.X[i[0], j], self.X[i[1], j] = self.X[i[1], j], self.X[i[0], j] 107 | 108 | 109 | def d_max(A: SamplingPlan, B: SamplingPlan, p: float = 2) -> float: 110 | """ 111 | The set L_p distance metrics between two discrete sets, where `A` and `B` 112 | are lists of design points and `p` is the L_p norm parameter. 113 | """ 114 | def min_dist(a, B, p) -> float: 115 | return np.min([np.linalg.norm(a - b, p) for b in B]) 116 | return np.max([min_dist(a, B, p) for a in A]) 117 | 118 | 119 | def greedy_local_search(X: SamplingPlan, 120 | m: int, 121 | d: Callable[[SamplingPlan, SamplingPlan], float] = d_max) -> SamplingPlan: 122 | """ 123 | Greedy local search, for finding `m`-element sampling plans that minimize 124 | a distance metric `d` for discrete set `X`. 125 | """ 126 | S = SamplingPlan(np.array([X[np.random.randint(m)]])) 127 | for _ in range(m - 1): 128 | j = np.argmin([np.inf if x in S else d(X, S.copy().append(x)) for x in X]) 129 | S.append(X[j]) 130 | return S 131 | 132 | 133 | def exchange_algorithm(X: SamplingPlan, 134 | m: int, 135 | d: Callable[[SamplingPlan], float] = d_max) -> SamplingPlan: 136 | """ 137 | The exchange algorithm for finding `m`-element sampling plans that minimize 138 | a distance metric `d` for discrete set `X`. 139 | """ 140 | S = SamplingPlan(X[np.random.permutation(m)]) 141 | delta, done = d(X, S), False 142 | while not done: 143 | best_pair = (0,0) 144 | for i in range(m): 145 | s = S[i] 146 | for (j, x) in enumerate(X): 147 | if x not in S: 148 | S[i] = x 149 | delta_prime = d(X, S) 150 | if delta_prime < delta: 151 | delta = delta_prime 152 | best_pair = (i,j) 153 | S[i] = s 154 | done = best_pair == (0,0) 155 | if not done: 156 | i,j = best_pair 157 | S[i] = X[j] 158 | return S 159 | 160 | 161 | def multistart_local_search(X: SamplingPlan, 162 | m: int, 163 | alg: Callable, 164 | k_max: int, 165 | d: Callable[[SamplingPlan, SamplingPlan], float] = d_max) -> SamplingPlan: 166 | """ 167 | Multistart local search runs a particular search algorithm multiple times 168 | and returns the best result. Here, `X` is the list of points, `m` is the size 169 | of the desired sampling plan, `alg` is either `exchange_algorithm` or 170 | `greedy_local_search`, `k_max` is the number of iterations to run, and `d` 171 | is the distance metric. 172 | """ 173 | assert alg.__name__ in ['exchange_algorithm', 'greedy_local_search'] 174 | sets = [alg(X, m, d) for _ in range(k_max)] 175 | return sets[np.argmin([d(X, S) for S in sets])] 176 | 177 | 178 | class FillingSet(SamplingPlan): 179 | def __init__(self, m: int, n: int, max_prime: int): 180 | bs = primes.upto(max(np.ceil(n*(np.log(n) + np.log(np.log(n)))), max_prime)) 181 | seqs = np.array([self._get_filling_set(m, b) for b in bs[:n]]) 182 | super().__init__(seqs.T) 183 | 184 | @abstractmethod 185 | def _get_filling_set(self, m: int, b: int) -> np.ndarray: 186 | pass 187 | 188 | 189 | class AdditiveRecurrenceFillingSet(FillingSet): 190 | """ 191 | Additive recurrence for constructing `m`-element filling sequences over 192 | `n`-dimensional hypercubes. The `primePy` package is used to generate 193 | the first `n` prime numbers, where the kth prime number is bounded by 194 | 195 | k(log(k) + loglog(k)) 196 | 197 | for k > 6, and `primes.upto(a)` returns all primes up to `a`. Note that 13 198 | is the sixth prime number. 199 | """ 200 | def __init__(self, m: int, n: int): 201 | super().__init__(m, n, max_prime=13) 202 | 203 | def _get_filling_set(self, m: int, b: int = None) -> np.ndarray: 204 | c = np.sqrt(b) if b is not None else PHI - 1 205 | X = np.random.rand(1) 206 | for _ in range(m - 1): 207 | X = np.append(X, (X[-1] + c) % 1) 208 | return X 209 | 210 | 211 | class HaltonFillingSet(FillingSet): 212 | """ 213 | Halton quasi-random `m`-element filling sequences over `n`-dimensional unit 214 | hypercubes, where `b` is the base. The bases `bs` must be coprime. 215 | """ 216 | def __init__(self, m: int, n: int): 217 | super().__init__(m, n, max_prime=6) 218 | 219 | def _get_filling_set(self, m: int, b: int = 2) -> ndarray: 220 | return np.array([self.halton(i, b) for i in range(1, m + 1)]) 221 | 222 | def halton(self, i: int, b: int) -> float: 223 | result, f = 0.0, 1.0 224 | while i > 0: 225 | f = f / b 226 | result += f * (i % b) 227 | i = np.floor(i / b) 228 | return result 229 | -------------------------------------------------------------------------------- /src/figures/ch08_figures.py: -------------------------------------------------------------------------------- 1 | import sys; sys.path.append('./src/'); sys.path.append('../') 2 | 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | from scipy.stats import norm, multivariate_normal 6 | 7 | from ch05 import GradientDescent 8 | from ch08 import NoisyDescent, rand_positive_spanning_set 9 | from TestFunctions import branin, wheeler 10 | from convenience import plot_contour, confidence_ellipse 11 | 12 | def figure_8_1(): 13 | """ 14 | Figure 8.1: Adding stochasticity to a descent method helps with traversing 15 | saddle points such as f(x) = x1^2 - x2^2 shown here. Due to the 16 | initialization, the steepest descent method converges to the saddle point 17 | where the gradient is zero. 18 | """ 19 | def f(x): return x[0]**2 - x[1]**2 20 | def grad_f(x): return np.array([2*x[0], -2*x[1]]) 21 | 22 | alpha = 0.1 23 | x_gd = np.array([2.0, 0.0]) 24 | x_sgd = x_gd.copy() 25 | GD = GradientDescent(alpha) 26 | SGD = NoisyDescent(GradientDescent(alpha), sigma=lambda k: 1/(k**3)) 27 | 28 | fig = plt.figure() 29 | lim = (-2.5, 2.5) 30 | plot_contour(fig, f, xlim=lim, ylim=lim, xstride=0.01, ystride=0.01, levels=[-5, -2, 0, 2, 5]) 31 | for _ in range(20): 32 | x_sgd_next = SGD.step(f, grad_f, x_sgd) 33 | plt.plot([x_sgd[0], x_sgd_next[0]], [x_sgd[1], x_sgd_next[1]], c="tab:red") 34 | x_sgd = x_sgd_next 35 | 36 | x_gd_next = GD.step(f, grad_f, x_gd) 37 | plt.plot([x_gd[0], x_gd_next[0]], [x_gd[1], x_gd_next[1]], c="tab:blue") 38 | x_gd = x_gd_next 39 | plt.xlim(lim) 40 | plt.ylim(lim) 41 | plt.legend(labels=["stochastic gradient descent", "steepest descent"]) 42 | plt.title("Figure 8.1") 43 | plt.show() 44 | 45 | 46 | def figure_8_2(): 47 | """ 48 | Figure 8.2: Mesh adaptive direct search proceeding left to right and top to bottom. 49 | """ 50 | x = np.array([1.5, 1.5]) 51 | spanning_sets = [] 52 | alpha, y, n = 1.0, wheeler(x), len(x) 53 | 54 | fig = plt.figure(figsize=(20, 10)) 55 | lim = (-0.1, 3.0) 56 | for j in range(1, 8 + 1): 57 | # Set up contour plot 58 | cont_ax = plot_contour(fig, wheeler, xlim=lim, ylim=lim, xstride=0.01, ystride=0.01, levels=np.arange(-1.0, -0.0, 0.1), subplot_coords=(2,4,j)) 59 | cont_ax.tick_params(axis="both", which="both", bottom=False, left=False, labelbottom=False, labelleft=False) 60 | cont_ax.set_xlabel(None) 61 | cont_ax.set_ylabel(None) 62 | cont_ax.set_xlim(lim) 63 | cont_ax.set_ylim(lim) 64 | 65 | improved = False 66 | D = rand_positive_spanning_set(alpha, n) 67 | 68 | # Plot spanning sets 69 | spanning_sets.append((alpha, x.copy(), D.copy())) 70 | for (k, (alpha_tmp, x_tmp, spanning_set)) in enumerate(spanning_sets): 71 | cont_ax.scatter([x_tmp[0]], [x_tmp[1]], c='black', s=30.0, zorder=2, alpha=0.5**(len(spanning_sets) - k - 1)) 72 | for d in spanning_set: 73 | x_prime = x_tmp + alpha_tmp * d 74 | cont_ax.scatter([x_prime[0]], [x_prime[1]], c='black', s=10.0, zorder=2, alpha=0.5**(len(spanning_sets) - k - 1)) 75 | cont_ax.plot([x_tmp[0], x_prime[0]], [x_tmp[1], x_prime[1]], c='black', zorder=2, alpha=0.5**(len(spanning_sets) - k - 1)) 76 | 77 | # Mesh Adaptive Direct Search Algorithm 78 | for d in D: 79 | x_prime = x + alpha * d 80 | y_prime = wheeler(x_prime) 81 | if y_prime < y: 82 | x, y, improved = x_prime, y_prime, True 83 | x_prime = x + 3 * alpha * d 84 | y_prime = wheeler(x_prime) 85 | if y_prime < y: 86 | x, y = x_prime, y_prime 87 | break 88 | alpha = np.minimum(4 * alpha, 1.0) if improved else alpha / 4 89 | plt.suptitle("Figure 8.2", fontsize=15, y=0.86) 90 | plt.subplots_adjust(wspace=0.05, hspace=0.05) 91 | plt.show() 92 | 93 | 94 | def figure_8_3(): 95 | """ 96 | Figure 8.3: Several annealing schedules commonly used in simulated annealing. 97 | The schedules have an initial temperature of 10. 98 | """ 99 | def logarithmic(k, t1): return t0 * np.log(2) / np.log(k + 1) 100 | def exponential(k, gamma, t1): return (gamma**(k - 1)) * t1 101 | def fast(k, t1): return t1 / k 102 | 103 | t0 = 10.0 104 | max_iters = 10000 105 | k = np.linspace(1, max_iters, 100000) 106 | 107 | plt.plot(k, [logarithmic(k[i], t0) for i in range(len(k))], color="tab:red", label="logarithmic") 108 | plt.plot(k, [exponential(k[i], 0.25, t0) for i in range(len(k))], color="tab:blue", alpha=1.0, label="exponential, $\gamma = 1/4$") 109 | plt.plot(k, [exponential(k[i], 0.5, t0) for i in range(len(k))], color="tab:blue", alpha=0.75, label="exponential, $\gamma = 1/2$") 110 | plt.plot(k, [exponential(k[i], 0.75, t0) for i in range(len(k))], color="tab:blue", alpha=0.5, label="exponential, $\gamma = 3/4$") 111 | plt.plot(k, [fast(k[i], t0) for i in range(len(k))], color="tab:green", label="fast") 112 | plt.xlim((1, max_iters)) 113 | plt.xscale('log') 114 | plt.xlabel("iteration") 115 | plt.ylabel("temperature") 116 | plt.title("Figure 8.3") 117 | plt.legend() 118 | plt.show() 119 | 120 | 121 | def figure_8_4(): 122 | """ 123 | Figure 8.4: The step multiplication factor as a function of acceptance for 124 | c = 2. 125 | """ 126 | def factor(x, c): 127 | if x > 0.6: 128 | return 1 + c*((x - 0.6)/0.4) 129 | elif x < 0.4: 130 | return 1/(1 + c*((0.4 - x)/0.4)) 131 | return 1.0 132 | 133 | c = 2 134 | x = np.linspace(0.0, 1.0, 1000) 135 | plt.plot(x, [factor(x_i, c=c) for x_i in x]) 136 | plt.xlim((0, 1)) 137 | plt.ylim((0, 1 + c + 0.1)) 138 | plt.xticks([0.0, 0.4, 0.6, 1.0]) 139 | plt.yticks([1/(1 + c), 1.0, 1 + c], labels=["$\\frac{1}{1 + c}$", "$1$", "$1 + c$"]) 140 | plt.title("Figure 8.4") 141 | plt.show() 142 | 143 | 144 | def figure_8_5(sigma: float = 1.5, gamma: float = 0.5, t1: float = 1.0): 145 | """ 146 | Figure 8.5: Simulated annealing with an exponentially decaying temperature, 147 | where the histograms indicate the probability of simulated annealing being 148 | at a particular position at that iteration. 149 | """ 150 | def f(x): return (np.sin(5*(x + np.pi/3 + np.pi/10)) + 2*np.sin(x + np.pi/4 + np.pi/10) + 2.937)/(2*2.937) 151 | T = norm(0, sigma) 152 | def t(k, gamma=gamma, t1=t1): return (gamma**(k - 1)) * t1 153 | 154 | n_trials = 1000 155 | k_max = 8 156 | traj = np.zeros((n_trials, k_max)) 157 | traj[:, 0] = 0.5 158 | 159 | # Run trials 160 | for i in range(n_trials): 161 | x = 0.5 162 | y = f(x) 163 | x_best, y_best = x, y 164 | for k in range(1, k_max): 165 | x_prime = x + T.rvs() 166 | y_prime = f(x_prime) 167 | delta_y = y_prime - y 168 | if (delta_y <= 0) or (np.random.rand() < np.exp(-delta_y / t(k))): 169 | x, y = x_prime, y_prime 170 | if y_prime < y_best: 171 | x_best, y_best = x_prime, y_prime 172 | traj[i, k] = x 173 | 174 | # Plot the results 175 | xlim = (0.0, 6.5) 176 | x = np.linspace(xlim[0], xlim[1], 1000) 177 | fig = plt.figure(figsize=(20, 5)) 178 | for k in range(k_max): 179 | ax = fig.add_subplot(2, 4, k + 1) 180 | ax.plot(x, f(x), c='black') 181 | ax.hist(traj[:, k], bins=np.linspace(xlim[0], xlim[1], 50), density=True, alpha=0.5) 182 | ax.set_xlim(xlim[0], xlim[1]) 183 | ax.set_ylim(0, 1.1) 184 | ax.tick_params(axis="both", which="both", bottom=False, left=False, labelbottom=False, labelleft=False) 185 | if k in [0, 4]: 186 | ax.set_ylabel("$y$") 187 | if k in [4, 5, 6, 7]: 188 | ax.set_xlabel("$x$") 189 | plt.suptitle("Figure 8.5", y=0.93) 190 | plt.subplots_adjust(wspace=0.05, hspace=0.05) 191 | plt.show() 192 | 193 | 194 | def figure_8_6(): 195 | """ 196 | Figure 8.6: The cross-entropy method with `m = 40` applied to the Branin 197 | function (appendix B.3) using a multivariate Gaussian proposal distribution. 198 | The 10 elite samples in each iteration are in red. 199 | """ 200 | k_max = 4 201 | P = multivariate_normal(np.array([3.0, 7.5]), 5*np.eye(2)) 202 | m = 40 203 | m_elite = 10 204 | f = branin 205 | 206 | fig = plt.figure(figsize=(20, 5)) 207 | xlim = (2*np.pi - 12, 2*np.pi + 12) 208 | ylim = (-3, 22) 209 | for i in range(1, k_max + 1): 210 | ax = plot_contour(fig, branin, xlim, ylim, 0.01, 0.01, levels=[0, 1, 2, 3, 5, 10, 20, 50, 100], filled=True, subplot_coords=(1,k_max,i)) 211 | confidence_ellipse(P.mean, P.cov, ax, n_std=1, edgecolor='white') 212 | confidence_ellipse(P.mean, P.cov, ax, n_std=2, edgecolor='white') 213 | confidence_ellipse(P.mean, P.cov, ax, n_std=3, edgecolor='white') 214 | 215 | samples = P.rvs(m) # return shape (m, n), where n is dimension of random variable 216 | ax.scatter(samples[:, 0], samples[:, 1], c='white', s=1.0) 217 | 218 | order = np.argsort(np.apply_along_axis(f, 1, samples)) 219 | elite_samples = samples[order[:m_elite]] 220 | ax.scatter(elite_samples[:, 0], elite_samples[:, 1], c='tab:red', s=1.0) 221 | P = P._dist(*P._dist.fit(elite_samples)) 222 | plt.suptitle("Figure 8.6", y=0.8) 223 | plt.show() 224 | 225 | # TODO - Figure 8.7 226 | # TODO - Figure 8.8 227 | # TODO - Figure 8.9 -------------------------------------------------------------------------------- /src/ch05.py: -------------------------------------------------------------------------------- 1 | """Chapter 5: First-Order Methods""" 2 | 3 | import numpy as np 4 | 5 | from abc import ABC, abstractmethod 6 | from typing import Callable 7 | 8 | from ch04 import line_search 9 | 10 | 11 | class DescentMethod(ABC): 12 | @abstractmethod 13 | def initialize(self, f: Callable[[np.ndarray], float], grad_f: Callable[[np.ndarray], float], x: np.ndarray): 14 | pass 15 | 16 | @abstractmethod 17 | def step(self, f: Callable[[np.ndarray], float], grad_f: Callable[[np.ndarray], float], x: np.ndarray) -> np.ndarray: 18 | pass 19 | 20 | 21 | class GradientDescent(DescentMethod): 22 | """ 23 | The gradient descent method, which follows the direction of gradient descent 24 | with a fixed learning rate. The `step` function produces the next iterate 25 | whereas the `initialize` function does nothing. 26 | """ 27 | def __init__(self, alpha: float): 28 | self.alpha = alpha # learning rate 29 | 30 | def initialize(self, f: Callable[[np.ndarray], float], grad_f: Callable[[np.ndarray], float], x: np.ndarray): 31 | pass 32 | 33 | def step(self, f: Callable[[np.ndarray], float], grad_f: Callable[[np.ndarray], float], x: np.ndarray) -> np.ndarray: 34 | g = grad_f(x) 35 | return x - self.alpha * g 36 | 37 | 38 | class ConjugateGradientDescent(DescentMethod): 39 | """ 40 | The conjugate gradient method with the Polak-Ribiere update, where `d` 41 | is the previous search direction and `g` is the previous gradient. 42 | """ 43 | def __init__(self, d: np.ndarray = None, g: np.ndarray = None): 44 | self.d = d # previous search direction 45 | self.g = g # previous gradient 46 | 47 | def initialize(self, f: Callable[[np.ndarray], float], grad_f: Callable[[np.ndarray], float], x: np.ndarray): 48 | self.g = grad_f(x) 49 | self.d = -self.g 50 | 51 | def step(self, f: Callable[[np.ndarray], float], grad_f: Callable[[np.ndarray], float], x: np.ndarray) -> np.ndarray: 52 | g_prime = grad_f(x) 53 | beta = np.maximum(0, np.dot(g_prime, g_prime - self.g) / np.dot(self.g, self.g)) 54 | d_prime = -g_prime + beta*self.d 55 | x_prime = line_search(f, x, d_prime) 56 | self.d, self.g = d_prime, g_prime 57 | return x_prime 58 | 59 | 60 | class Momentum(GradientDescent): 61 | """The momentum method for accelerated descent.""" 62 | def __init__(self, alpha: float, beta: float, v: np.ndarray = None): 63 | super().__init__(alpha) # learning rate 64 | self.beta = beta # momentum decay 65 | self.v = v # momentum 66 | 67 | def initialize(self, f: Callable[[np.ndarray], float], grad_f: Callable[[np.ndarray], float], x: np.ndarray): 68 | self.v = np.zeros(len(x)) 69 | 70 | def step(self, f: Callable[[np.ndarray], float], grad_f: Callable[[np.ndarray], float], x: np.ndarray) -> np.ndarray: 71 | g = grad_f(x) 72 | self.v = self.beta*self.v - self.alpha*g 73 | return x + self.v 74 | 75 | 76 | class NesterovMomentum(Momentum): 77 | """Nesterov's momentum method of accelerated descent.""" 78 | def __init__(self, alpha: float, beta: float, v: np.ndarray = None): 79 | super().__init__(alpha, beta, v) 80 | 81 | def step(self, f: Callable[[np.ndarray], float], grad_f: Callable[[np.ndarray], float], x: np.ndarray) -> np.ndarray: 82 | g = grad_f(x + self.beta*self.v) 83 | self.v = self.beta*self.v - self.alpha*g 84 | return x + self.v 85 | 86 | 87 | class Adagrad(GradientDescent): 88 | """The Adagrad accelerated descent method.""" 89 | def __init__(self, alpha: float, eps: float, s: np.ndarray = None): 90 | super().__init__(alpha) # learning rate 91 | self.eps = eps # small value 92 | self.s = s # sum of squared gradient 93 | 94 | def initialize(self, f: Callable[[np.ndarray], float], grad_f: Callable[[np.ndarray], float], x: np.ndarray): 95 | self.s = np.zeros(len(x)) 96 | 97 | def step(self, f: Callable[[np.ndarray], float], grad_f: Callable[[np.ndarray], float], x: np.ndarray) -> np.ndarray: 98 | g = grad_f(x) 99 | self.s += g**2 100 | return x - self.alpha * (g / (np.sqrt(self.s) + self.eps)) 101 | 102 | 103 | class RMSProp(Adagrad): 104 | """The RMSProp accelerated descent method.""" 105 | def __init__(self, alpha: float, gamma: float, eps: float, s: np.ndarray = None): 106 | super().__init__(alpha, eps, s) 107 | self.gamma = gamma # decay 108 | 109 | def step(self, f: Callable[[np.ndarray], float], grad_f: Callable[[np.ndarray], float], x: np.ndarray) -> np.ndarray: 110 | g = grad_f(x) 111 | self.s = self.gamma*self.s + (1 - self.gamma)*(g**2) 112 | return x - self.alpha * (g / (np.sqrt(self.s) + self.eps)) 113 | 114 | 115 | class Adadelta(DescentMethod): 116 | """ 117 | The Adadelta accelerated descent method. The small constant `eps` is 118 | added to the numerator as well to prevent progress from entirely decaying to 119 | zero and to start off the first iteration where `delta_x = 0`. 120 | """ 121 | def __init__(self, gamma_s: float, gamma_x: float, eps: float, s: np.ndarray = None, u: np.ndarray = None): 122 | self.gamma_s = gamma_s # gradient decay 123 | self.gamma_x = gamma_x # update decay 124 | self.eps = eps # small value 125 | self.s = s # sum of squared gradients 126 | self.u = u # sum of squared updates 127 | 128 | def initialize(self, f: Callable[[np.ndarray], float], grad_f: Callable[[np.ndarray], float], x: np.ndarray): 129 | self.s = np.zeros(len(x)) 130 | self.u = np.zeros(len(x)) 131 | 132 | def step(self, f: Callable[[np.ndarray], float], grad_f: Callable[[np.ndarray], float], x: np.ndarray) -> np.ndarray: 133 | g = grad_f(x) 134 | self.s = self.gamma_s*self.s + (1 - self.gamma_s)*(g**2) 135 | delta_x = -((np.sqrt(self.u) + self.eps) / (np.sqrt(self.s) + self.eps)) * g 136 | self.u = self.gamma_x*self.u + (1 - self.gamma_x)*(delta_x**2) 137 | return x + delta_x 138 | 139 | 140 | class Adam(GradientDescent): 141 | """The Adam accelerated descent method.""" 142 | def __init__(self, alpha: float, gamma_v: float, gamma_s: float, eps: float, k: int = 0, v: np.ndarray = None, s: np.ndarray = None): 143 | super().__init__(alpha) # learning rate 144 | self.gamma_v = gamma_v # 1st moment decay 145 | self.gamma_s = gamma_s # 2nd moment decay 146 | self.eps = eps # small value 147 | self.k = k # step counter 148 | self.v = v # 1st moment estimate 149 | self.s = s # 2nd moment estimate 150 | 151 | def initialize(self, f: Callable[[np.ndarray], float], grad_f: Callable[[np.ndarray], float], x: np.ndarray): 152 | self.k = 0 153 | self.v = np.zeros(len(x)) 154 | self.s = np.zeros(len(x)) 155 | 156 | def step(self, f: Callable[[np.ndarray], float], grad_f: Callable[[np.ndarray], float], x: np.ndarray) -> np.ndarray: 157 | g = grad_f(x) 158 | self.v = self.gamma_v*self.v + (1 - self.gamma_v)*g 159 | self.s = self.gamma_s*self.s + (1 - self.gamma_s)*(g**2) 160 | self.k += 1 161 | v_hat = self.v / (1 - (self.gamma_v**self.k)) 162 | s_hat = self.s / (1 - (self.gamma_s**self.k)) 163 | return x - self.alpha * (v_hat / (np.sqrt(s_hat) + self.eps)) 164 | 165 | 166 | class HyperGradientDescent(GradientDescent): 167 | """The hypergradient form of gradient descent.""" 168 | def __init__(self, alpha_0: float, mu: float, alpha: float = None, g_prev: np.ndarray = None): 169 | super().__init__(alpha) # current learning rate 170 | self.alpha_0 = alpha_0 # initial learning rate 171 | self.mu = mu # learning rate of the learning rate 172 | self.g_prev = g_prev # previous gradient 173 | 174 | def initialize(self, f: Callable[[np.ndarray], float], grad_f: Callable[[np.ndarray], float], x: np.ndarray): 175 | self.alpha = self.alpha_0 176 | self.g_prev = np.zeros(len(x)) 177 | 178 | def step(self, f: Callable[[np.ndarray], float], grad_f: Callable[[np.ndarray], float], x: np.ndarray) -> np.ndarray: 179 | g = grad_f(x) 180 | self.alpha += self.mu * np.dot(g, self.g_prev) 181 | self.g_prev = g 182 | return x - self.alpha * g 183 | 184 | 185 | class HyperNesterovMomentum(NesterovMomentum): 186 | """The hypergradient form of the Nesterov momentum descent method.""" 187 | def __init__(self, alpha_0: float, mu: float, beta: float, v: np.ndarray = None, alpha: float = None, g_prev: np.ndarray = None): 188 | super().__init__(alpha, beta, v) # current learning rate, momentum decay, momentum 189 | self.alpha_0 = alpha_0 # initial learning rate 190 | self.mu = mu # learning rate of the learning rate 191 | self.g_prev = g_prev # previous gradient 192 | 193 | def initialize(self, f: Callable[[np.ndarray], float], grad_f: Callable[[np.ndarray], float], x: np.ndarray): 194 | self.alpha = self.alpha_0 195 | self.v = np.zeros(len(x)) 196 | self.g_prev = np.zeros(len(x)) 197 | 198 | def step(self, f: Callable[[np.ndarray], float], grad_f: Callable[[np.ndarray], float], x: np.ndarray) -> np.ndarray: 199 | g = grad_f(x) 200 | self.alpha += self.mu * np.dot(g, self.g_prev + self.beta*self.v) 201 | self.v = g + self.beta*self.v 202 | self.g_prev = g 203 | return x - self.alpha * (g + self.beta*self.v) # TODO - Ask Mykel if this is a typo 204 | -------------------------------------------------------------------------------- /src/figures/ch09_figures.py: -------------------------------------------------------------------------------- 1 | import sys; sys.path.append('./src/'); sys.path.append('../') 2 | 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | 6 | from matplotlib import cm 7 | from scipy.stats import norm, cauchy 8 | 9 | from ch09 import rand_population_uniform, rand_population_normal, rand_population_cauchy,\ 10 | TruncationSelection, TournamentSelection, RouletteWheelSelection 11 | from convenience import normalize 12 | 13 | 14 | def figure_9_1(): 15 | """ 16 | Figure 9.1: A comparison of the normal distribution with standard deviation 17 | 1 and the Cauchy distribution with scale 1. Although `sigma` is sometimes 18 | used for the scale parameter in the Cauchy distribution, this should not be 19 | confused with the standard deviation since the standard deviation of the 20 | Cauchy distribution is undefined. The Cauchy distribution is heavy-tailed, 21 | allowing it to cover the design space more broadly. 22 | """ 23 | x = np.linspace(-6, 6, 1000) 24 | plt.plot(x, norm(loc=0, scale=1).pdf(x), c='tab:purple', label="Normal") 25 | plt.plot(x, cauchy(loc=0, scale=1).pdf(x), c='tab:blue', label="Cauchy") 26 | plt.xlabel("$x$") 27 | plt.ylabel("$p(x)$") 28 | plt.title("Figure 9.1") 29 | plt.xticks([-5, 0, 5]) 30 | plt.yticks([0.0, 0.2, 0.4]) 31 | plt.legend() 32 | plt.show() 33 | 34 | 35 | def figure_9_2(): 36 | """ 37 | Figure 9.2: Initial populations of size 1,000 sampled using a uniform 38 | hyperrectangle with a = [-2, -2], b = [2, 2], a zero-mean normal distribution 39 | with diagonal covariance Sigma = I, and Cauchy distributions centered at the 40 | origin with scale sigma = 1. 41 | """ 42 | m = 1000 43 | s = 10.0 44 | alpha = 0.55 45 | _, ax = plt.subplots(1, 3, figsize=(12, 4)) 46 | 47 | population = rand_population_uniform(m, a=np.array([-2.0, -2.0]), b=np.array([2.0, 2.0])) 48 | ax[0].scatter(population[:, 0], population[:, 1], s=s, alpha=alpha) 49 | ax[0].set_title("Uniform") 50 | 51 | population = rand_population_normal(m, mu=np.zeros(2), Sigma=np.eye(2)) 52 | ax[1].scatter(population[:, 0], population[:, 1], s=s, alpha=alpha) 53 | ax[1].set_title("Normal") 54 | 55 | population = rand_population_cauchy(m, mu=np.zeros(2), sigma=np.ones(2)) 56 | ax[2].scatter(population[:, 0], population[:, 1], s=s, alpha=alpha) 57 | ax[2].set_title("Cauchy") 58 | 59 | for i in range(3): 60 | ax[i].set_xlabel("$x_1$") 61 | ax[i].set_ylabel("$x_2$") 62 | ax[i].set_xlim(-4, 4) 63 | ax[i].set_ylim(-4, 4) 64 | ax[i].set_aspect('equal') 65 | plt.suptitle("Figure 9.2") 66 | plt.show() 67 | 68 | 69 | def figure_9_4(): 70 | """ 71 | Figure 9.4: Truncation selection with a population size `m = 7` and sample 72 | size `k = 3`. The height of a bar indicates its objective function value 73 | whereas its color indicates what individual it corresponds to. 74 | """ 75 | x, y, m, ax, colors = selection_setup() 76 | 77 | # Truncation Selection (taken directly from ch09.py) 78 | k = 3 79 | p = np.argsort(y) 80 | new_colors = colors[p] 81 | new_colors[k:] = np.array([192, 192, 192, 255.0]) / 255.0 82 | ax[1].bar(x, y[p], width=1/(m - 1), color=new_colors, edgecolor='black') 83 | ax[1].tick_params(axis="both", which="both", bottom=False, left=False, labelbottom=False, labelleft=False) 84 | ax[1].spines[['right', 'top']].set_visible(False) 85 | ax[1].set_ylim(0.0, 1.3) 86 | ax[1].set_xlabel("individual") 87 | ax[1].set_ylabel("$y$") 88 | plt.suptitle("Figure 9.4") 89 | plt.show() 90 | 91 | 92 | def figure_9_5(): 93 | """ 94 | Figure 9.5: Tournament selection with a population size `m = 7` and a sample 95 | size `k = 3`, which is run separately for each parent. The height of a bar 96 | indicates its objective function value whereas its color indicates what 97 | individual it corresponds to. 98 | """ 99 | x, y, m, ax, colors = selection_setup() 100 | 101 | # Tournament Selection (taken directly from ch09.py) 102 | k = 3 103 | def getparent(): 104 | p = np.random.permutation(len(y)) 105 | return p[np.argmin(y[p[:k]])] 106 | p = [getparent() for _ in range(k)] 107 | new_colors = np.array([[192, 192, 192, 255.0] for _ in range(m)]) / 255.0 108 | new_colors[p, :] = colors[p, :] 109 | ax[1].bar(x, y, width=1/(m - 1), color=new_colors, edgecolor='black') 110 | ax[1].tick_params(axis="both", which="both", bottom=False, left=False, labelbottom=False, labelleft=False) 111 | ax[1].spines[['right', 'top']].set_visible(False) 112 | ax[1].set_ylim(0.0, 1.3) 113 | ax[1].set_xlabel("individual") 114 | ax[1].set_ylabel("$y$") 115 | plt.suptitle("Figure 9.5") 116 | plt.show() 117 | 118 | 119 | def figure_9_6(): 120 | """ 121 | Figure 9.6: Roulette wheel selection with a population size `m = 7`, which 122 | is run separately for each parent. The approach used causes the individual 123 | with the worst objective function value to have a zero likelihood of being 124 | selected. The height of a bar indicates its objective function value (left), 125 | or its likelihood (right), whereas its color indicates what individual it 126 | corresponds to. 127 | """ 128 | x, y, m, ax, colors = selection_setup() 129 | 130 | # Roulette Wheel Selection (taken directly from ch09.py) 131 | y = np.max(y) - y 132 | p = normalize(y, ord=1) 133 | ax[1].bar(x, p, width=1/(m - 1), color=colors, edgecolor='black') 134 | ax[1].tick_params(axis="both", which="both", bottom=False, left=False, labelbottom=False, labelleft=False) 135 | ax[1].spines[['right', 'top']].set_visible(False) 136 | ax[1].set_ylim(0.0, 0.6) 137 | ax[1].set_xlabel("individual") 138 | ax[1].set_ylabel("likelihood") 139 | plt.suptitle("Figure 9.6") 140 | plt.show() 141 | 142 | 143 | def selection_setup(): 144 | m = 7 145 | y = np.array([1.0, 0.6, 0.2, 1.0, 0.9, 0.6, 1.1]) 146 | x = np.linspace(0.0, 1.0, m) 147 | colors = cm.viridis(x) 148 | 149 | _, ax = plt.subplots(1, 2, figsize=(6, 2)) 150 | ax[0].bar(x, y, width=1/(m - 1), color=colors, edgecolor='black') 151 | ax[0].tick_params(axis="both", which="both", bottom=False, left=False, labelbottom=False, labelleft=False) 152 | ax[0].spines[['right', 'top']].set_visible(False) 153 | ax[0].set_ylim(0.0, 1.3) 154 | ax[0].set_xlabel("individual") 155 | ax[0].set_ylabel("$y$") 156 | 157 | return x, y, m, ax, colors 158 | 159 | 160 | def figure_9_7(): 161 | """Figure 9.7: Single-point crossover""" 162 | a, b, x, color = crossover_setup() 163 | 164 | # Single-Point Crossover (taken directly from ch09.py) 165 | i = np.random.randint(len(a)) 166 | child = np.concatenate((a[:i], b[i:])) 167 | plt.scatter(x, 0.0*x - 0.3, color=color(child)) 168 | plt.xticks([i - 0.5], labels=["crossover point"]) 169 | plt.subplots_adjust(bottom=0.4) 170 | plt.title("Figure 9.7", y=0.9) 171 | plt.show() 172 | 173 | 174 | def figure_9_8(): 175 | """Figure 9.8: Two-point crossover""" 176 | a, b, x, color = crossover_setup() 177 | 178 | # Two-Point Crossover (taken directly from ch09.py) 179 | n = len(a) 180 | i, j = np.random.randint(n, size=2) 181 | if i > j: 182 | i, j = j, i 183 | child = np.concatenate((a[:i], b[i:j], a[j:])) 184 | plt.scatter(x, 0.0*x - 0.3, color=color(child)) 185 | plt.xticks([i - 0.5, j - 0.5], labels=["crossover point 1", "crossover point 2"]) 186 | plt.subplots_adjust(bottom=0.4) 187 | plt.title("Figure 9.8", y=0.9) 188 | plt.show() 189 | 190 | 191 | def figure_9_9(): 192 | """Figure 9.9: Uniform crossover""" 193 | a, b, x, color = crossover_setup() 194 | 195 | # Uniform Crossover (taken directly from ch09.py) 196 | child = np.copy(a) 197 | for i in range(len(a)): 198 | if np.random.rand() < 0.5: 199 | child[i] = b[i] 200 | plt.scatter(x, 0.0*x - 0.3, color=color(child)) 201 | plt.tick_params(axis="x", which="both", bottom=False) 202 | plt.xticks([]) 203 | plt.title("Figure 9.9", y=0.9) 204 | plt.show() 205 | 206 | 207 | def crossover_setup(): 208 | n = 45 209 | x = np.arange(n) 210 | a, b = np.zeros(n), np.ones(n) 211 | def color(x): return ['tab:red' if x_i == 1 else 'tab:blue' for x_i in x] 212 | 213 | plt.figure(figsize=(10, 2.0)) 214 | plt.scatter(x, 0.0*x, color=color(a)) 215 | plt.scatter(x, 0.0*x - 0.15, color=color(b)) 216 | plt.ylim(-0.5, 0.2) 217 | plt.yticks([0.0, -0.15, -0.3], labels=["parent A", "parent B", "child"]) 218 | plt.tick_params(axis="y", which="both", left=False) 219 | plt.gca().spines[['left', 'bottom', 'right', 'top']].set_visible(False) 220 | 221 | return a, b, x, color 222 | 223 | 224 | def figure_9_10(): 225 | """ 226 | Figure 9.10: Mutation for binary string chromosomes gives each bit a 227 | small probability of flipping. 228 | """ 229 | n = 45 230 | lam = 1/n 231 | x = np.arange(n) 232 | before = np.zeros(n).astype(bool) 233 | after = np.array([~v if np.random.rand() < lam else v for v in before]) 234 | def color(x): return ['lightgreen' if x_i == 1 else 'tab:blue' for x_i in x] 235 | 236 | plt.figure(figsize=(10, 1.5)) 237 | plt.scatter(x, 0.0*x, color=color(before)) 238 | plt.scatter(x, 0.0*x - 0.15, color=color(after)) 239 | plt.ylim(-0.35, 0.2) 240 | plt.xticks([]) 241 | plt.yticks([0.0, -0.15], labels=["before mutation", "after mutation"]) 242 | plt.tick_params(axis="both", which="both", left=False, bottom=False) 243 | plt.gca().spines[['left', 'bottom', 'right', 'top']].set_visible(False) 244 | plt.title("Figure 9.10") 245 | plt.tight_layout() 246 | plt.show() 247 | 248 | 249 | # TODO - Figure 9.11 250 | # TODO - Figure 9.13 251 | # TODO - Figure 9.14 252 | # TODO - Figure 9.15 253 | # TODO - Figure 9.16 254 | -------------------------------------------------------------------------------- /src/ch14.py: -------------------------------------------------------------------------------- 1 | """Chapter 14: Surrogate Models""" 2 | 3 | import numpy as np 4 | 5 | from itertools import product 6 | from typing import Callable 7 | 8 | 9 | def design_matrix(X: np.ndarray) -> np.ndarray: 10 | """A method for constructing a design matrix from a list of design points `X`""" 11 | m = len(X) 12 | return np.hstack([np.ones((m, 1)), X]) 13 | 14 | 15 | def linear_regression(X: np.ndarray, y: np.ndarray) -> Callable[[np.ndarray], float | np.ndarray]: 16 | """ 17 | A method for fitting a surrogate model using linear regression to a list of 18 | design points `X` and a vector of objective function values `y`. 19 | """ 20 | theta = np.pinv(design_matrix(X)) @ y 21 | return lambda x: np.dot(x, theta[1:]) + theta[0] 22 | 23 | 24 | def regression(X: np.ndarray, 25 | y: np.ndarray, 26 | bases: list[Callable[[np.ndarray], float]], 27 | lam: float = 0.0) -> Callable[[np.ndarray], float | np.ndarray]: 28 | """ 29 | A method for fitting a surrogate model to a list of design points `X` and 30 | corresponding objective function values `y` using regression with basis 31 | functions contained in the `bases` list. 32 | 33 | `lam` is an optional smoothing term, for regression in the presence of noise. 34 | """ 35 | B = np.array([[b(x) for b in bases] for x in X]) 36 | theta = np.linalg.solve(B.T @ B + lam * np.eye(len(bases)), B.T @ y) 37 | return lambda x: np.sum([theta[i] * bases[i](x) for i in range(len(theta))], axis=-1) 38 | 39 | 40 | def polynomial_bases_1d(i: int, k: int) -> list[Callable[[np.ndarray], float]]: 41 | """ 42 | A method for constructing a list of polynomial basis functions up to a degree `k` 43 | for the `i`th component of a design point. 44 | """ 45 | return [lambda x: x[i]**p for p in range(k + 1)] 46 | 47 | 48 | def polynomial_bases(n: int, k: int) -> list[Callable[[np.ndarray], float]]: 49 | """ 50 | A method for constructing a list of `n`-dimensional polynomial bases for 51 | terms up to degree `k`. 52 | """ 53 | bases = [polynomial_bases_1d(i, k) for i in range(n)] 54 | terms = [] 55 | for ks in product(*[range(k + 1) for i in range(n)]): 56 | if sum(ks) <= k: 57 | terms.append(lambda x, ks=ks: np.prod([b[j](x) for (j, b) in zip(ks, bases)])) 58 | return terms 59 | 60 | 61 | def sinusoidal_bases_1d(j: int, k: int, a: np.ndarray, b: np.ndarray) -> list[Callable[[np.ndarray], float]]: 62 | """ 63 | Produces a list of sinusoidal basis function up to degree `k` for the `i`th 64 | component of the design vector given lower bound `a` and upper bound `b`. 65 | """ 66 | T = b[j] - a[j] 67 | bases = [lambda x: 0.5] 68 | for i in range(1, k + 1): 69 | bases.append(lambda x: np.sin(2*np.pi*i*x[j]/T)) 70 | bases.append(lambda x: np.cos(2*np.pi*i*x[j]/T)) 71 | 72 | 73 | def sinusoidal_bases(k: int, a: np.ndarray, b: np.ndarray) -> list[Callable[[np.ndarray], float]]: 74 | """ 75 | Produces all sinusoidal base function combinations up to degree `k` for 76 | lower-bound vector `a` and upper-bound vector `b`. 77 | """ 78 | n = len(a) 79 | bases = [sinusoidal_bases_1d(i, k, a, b) for i in range(n)] 80 | terms = [] 81 | for ks in product(*[range(2*k + 1) for i in range(n)]): 82 | powers = [(k + 1) // 2 for k in ks] 83 | if sum(powers) <= k: 84 | terms.append(lambda x, ks=ks: np.prod([b[j][x] for (j, b) in zip(ks, bases)])) 85 | return terms 86 | 87 | 88 | def radial_bases(psi: Callable[[float], float], C: np.ndarray, p: float = 2) -> list[Callable[[np.ndarray], float]]: 89 | """ 90 | A method for obtaining a list of basis functions given a radial basis 91 | function `psi`, a list of centers `C`, and an L_p norm parameter p`. 92 | """ 93 | return [lambda x: psi(np.linalg.norm(x - c, p)) for c in C] 94 | 95 | 96 | class TrainTest(): 97 | """ 98 | A utility type for training a model and then validating it on a metric. 99 | Here, `train` and `test` are arrays of indices into the training data. 100 | """ 101 | def __init__(self, train: np.ndarray, test: np.ndarray): 102 | self.train = train 103 | self.test = test 104 | 105 | 106 | def train_and_validate(X: np.ndarray, 107 | y: np.ndarray, 108 | tt: TrainTest, 109 | fit: Callable[[np.ndarray, np.ndarray], Callable[[np.ndarray], float]], 110 | metric: Callable[[Callable[[np.ndarray], float], np.ndarray, np.ndarray], float]) -> float: 111 | """ 112 | A utility method for training a model and then validating it on a metric. 113 | Here, `X` is a list of design points, `y` is the vector of corresponding 114 | function evaluations, `tt` is a train-test partition, `fit` is a model 115 | fitting function, and `metric` evaluates a model on a test set to produce an 116 | estimate of generalization error. 117 | """ 118 | model = fit(X[tt.train], y[tt.train]) 119 | return metric(model, X[tt.test], y[tt.test]) 120 | 121 | 122 | def holdout_partition(m: int, h: int = None) -> TrainTest: 123 | """ 124 | A method for randomly partitioning `m` data samples into training and 125 | holdout sets, where `h` samples are assigned to the holdout set. 126 | """ 127 | h = m // 2 if h is None else h 128 | p = np.random.permutation(m) 129 | train = p[h:] 130 | holdout = p[:h] 131 | return TrainTest(train, holdout) 132 | 133 | 134 | def random_subsampling(X: np.ndarray, 135 | y: np.ndarray, 136 | fit: Callable[[np.ndarray, np.ndarray], Callable[[np.ndarray], float]], 137 | metric: Callable[[Callable[[np.ndarray], float], np.ndarray, np.ndarray], float], 138 | h: int = None, 139 | k_max: int = 10) -> float: 140 | """ 141 | The random subsampling method used to obtain mean and standard deviation 142 | estimates for model generalization error using `k_max` runs of the holdout 143 | method. 144 | """ 145 | m = len(X) 146 | return np.mean([train_and_validate(X, y, holdout_partition(m, h), fit, metric) for _ in range(k_max)]) 147 | 148 | 149 | def k_fold_cross_validation_sets(m: int, k: int) -> list[TrainTest]: 150 | """ 151 | Constructs the sets needed for `k`-fold cross validation on `m` samples, 152 | with `k` <= `m`. 153 | """ 154 | perm = np.random.permutation(m) 155 | sets = [] 156 | for i in range(k): 157 | validate = perm[i:m:k] 158 | train = perm[np.setdiff1d(range(m), range(i, m, k))] 159 | sets.append(TrainTest(train, validate)) 160 | return sets 161 | 162 | 163 | def multiset_validation_estimate(X: np.ndarray, 164 | y: np.ndarray, 165 | sets: list[TrainTest], 166 | fit: Callable[[np.ndarray, np.ndarray], Callable[[np.ndarray], float]], 167 | metric: Callable[[Callable[[np.ndarray], float], np.ndarray, np.ndarray], float]) -> float: 168 | """ 169 | Computes the mean of the generalization error estimate by training and 170 | validating on the list of train-validate sets contained in `sets`. The other 171 | variables are the list of design points `X`, the corresponding objective 172 | function values `y`, a function `fit` that trains a surrogate model, and a 173 | function `metric` that evaluates a model on a data set. 174 | 175 | NOTE: Works for Cross-Validation sets and Bootstrap sets 176 | """ 177 | return np.mean([train_and_validate(X, y, tt, fit, metric) for tt in sets]) 178 | 179 | 180 | def bootstrap_sets(m: int, b: int) -> list[TrainTest]: 181 | """A method for obtaining `b` bootstrap samples, each for a data set of size `m`""" 182 | return [TrainTest(np.random.randint(m, size=m), np.arange(m)) for i in range(b)] 183 | 184 | 185 | def leave_one_out_bootstrap_estimate(X: np.ndarray, 186 | y: np.ndarray, 187 | sets: list[TrainTest], 188 | fit: Callable[[np.ndarray, np.ndarray], Callable[[np.ndarray], float]], 189 | metric: Callable[[Callable[[np.ndarray], float], np.ndarray, np.ndarray], float]) -> float: 190 | """ 191 | A method for computing the leave-one-out bootstrap generalization error 192 | estimate using the train-validate sets `sets`. The other variables are the 193 | list of design points `X`, the corresponding objective function values `y`, 194 | a function `fit` that trains a surrogate model, and a function `metric` that 195 | evaluates a model on a data set. 196 | """ 197 | m, b = len(X), len(sets) 198 | error = 0.0 199 | models = [fit(X[tt.train], y[tt.train]) for tt in sets] 200 | for j in range(m): 201 | c = 0 202 | delta = 0.0 203 | for i in range(b): 204 | if j not in sets[i].train: 205 | c += 1 206 | delta += metric(models[i], np.array([X[j]]), np.array([y[j]])) 207 | error += delta / c 208 | return error / m 209 | 210 | 211 | def bootstrap_632_estimate(X: np.ndarray, 212 | y: np.ndarray, 213 | sets: list[TrainTest], 214 | fit: Callable[[np.ndarray, np.ndarray], Callable[[np.ndarray], float]], 215 | metric: Callable[[Callable[[np.ndarray], float], np.ndarray, np.ndarray], float]) -> float: 216 | """ 217 | A method for obtaining the 0.632 bootstrap estimate for data points `X`, 218 | objective function values `y`, fitting function `fit`, and 219 | metric function `metric`. 220 | """ 221 | eps_loob = leave_one_out_bootstrap_estimate(X, y, sets, fit, metric) 222 | eps_boot = multiset_validation_estimate(X, y, sets, fit, metric) 223 | return 0.632 * eps_loob + 0.368 * eps_boot 224 | -------------------------------------------------------------------------------- /src/ch19.py: -------------------------------------------------------------------------------- 1 | """Chapter 19: Discrete Optimization""" 2 | 3 | import networkx as nx 4 | import numpy as np 5 | 6 | from itertools import combinations 7 | from queue import PriorityQueue 8 | 9 | from ch11 import LinearProgram 10 | from convenience import normalize 11 | 12 | 13 | class MixedIntegerProgram(): 14 | """ 15 | A mixed integer linear program type that reflects the following equation: 16 | 17 | minimize c'x 18 | subject to: Ax = b 19 | x >= 0 20 | x_D \in Z^||D|| 21 | 22 | Here, `D` is the set of design indices constrained to be discrete. 23 | """ 24 | def __init__(self, A: np.ndarray, b: np.ndarray, c: np.ndarray, D: np.ndarray): 25 | self.A = A 26 | self.b = b 27 | self.c = c 28 | self.D = D 29 | 30 | def relax(self) -> LinearProgram: 31 | """A method for relaxing a mixed integer linear program into a linear program""" 32 | return LinearProgram(self.A, self.b, self.c) 33 | 34 | def round(self) -> np.ndarray: 35 | """ 36 | A method for solving a mixed integer linear program by rounding. 37 | The solution obtained by rounding may be suboptimal or infeasible. 38 | """ 39 | x = self.relax().minimize() 40 | for i in self.D: 41 | x[i] = round(x[i]) 42 | return x 43 | 44 | def is_totally_unimodular(self) -> bool: 45 | """Method for determining whether a mixed integer program is totally unimodular""" 46 | return is_totally_unimodular(self.A) and\ 47 | np.all(isintegral(self.b)) and np.all(isintegral(self.c)) 48 | 49 | 50 | def isintegral(x: float | np.ndarray, eps=1e-10) -> bool | np.ndarray: 51 | """Returns true if the given value is integral""" 52 | return np.abs(np.round(x) - x) <= eps 53 | 54 | 55 | def is_totally_unimodular(A: np.ndarray) -> bool: 56 | """Method for determining whether matrices `A` are totally unimodular""" 57 | # all entries must be in [0, 1, -1] 58 | if np.any([a not in [0, -1, 1] for a in A]): 59 | return False 60 | # brute force check every subdeterminant 61 | r, c = A.shape 62 | for i in range(1, min(r, c) + 1): 63 | for a in combinations(range(r), i): 64 | for b in combinations(range(c), i): 65 | B = A[a, b] 66 | if np.linalg.det(B) not in [0, -1, 1]: # TODO Check this closer (for approximate values) 67 | return False 68 | return True 69 | 70 | 71 | def frac(x: float): 72 | """Returns the fractional part of a number""" 73 | return np.modf(x)[0] 74 | 75 | 76 | def cutting_plane(MIP: MixedIntegerProgram) -> np.ndarray: 77 | """ 78 | The cutting plane method solves a given mixed integer program `MIP` and 79 | returns an optimal design vector. An error is thrown if no feasible solution 80 | exists. 81 | """ 82 | LP = MIP.relax() 83 | x, b_inds, v_inds = LP.minimize(return_idcs=True) 84 | n_orig = len(x) 85 | D = np.copy(MIP.D) 86 | while not np.all(isintegral(x[D])): 87 | AB, AV = LP.A[:, b_inds], LP.A[:, v_inds] 88 | Abar = np.linalg.solve(AB, AV) 89 | b = 0 90 | for i in D: 91 | if not isintegral(x[i]): 92 | b += 1 93 | A2 = np.vstack([np.hstack([LP.A, np.zeros((len(LP.A), 1))]), 94 | np.zeros((1, LP.A.shape[1] + 1))]) 95 | A2[-1, -1] = 1 96 | A2[-1, v_inds] = np.floor(Abar[b,:]) - Abar[b,:] 97 | b2 = np.append(LP.b, -frac(x[i])) 98 | c2 = np.append(LP.c, 0) 99 | LP = LinearProgram(A2, b2, c2) 100 | x, b_inds, v_inds = LP.minimize(return_idcs=True) 101 | return x[:n_orig] 102 | 103 | 104 | def branch_and_bound(MIP: MixedIntegerProgram) -> np.ndarray: 105 | """ 106 | The branch and bound algorithm for solving a mixed integer program `MIP`. 107 | More sophisticated implementations will drop variables whose solutions are 108 | known in order to speed computation. 109 | 110 | The `PriorityQueue` type is provided by the Python `queue` library. 111 | """ 112 | LP = MIP.relax() 113 | x, y = LP.minimize_lp_and_y() 114 | n = len(x) 115 | x_best, y_best, Q = np.copy(x), np.inf, PriorityQueue() 116 | Q.put((y, (LP, x, y))) 117 | while not Q.empty(): 118 | LP, x, y = Q.get() 119 | if np.any(np.isnan(x)) or np.all(isintegral(x[MIP.D])): 120 | if y < y_best: 121 | x_best, y_best = x[:n], y 122 | else: 123 | i = np.argmax(np.abs(x[MIP.D] - np.round(x[MIP.D]))) # TODO - Not convinced this gets the right index 124 | A, b, c = LP.A, LP.b, LP.c 125 | c2 = np.append(c, 0) 126 | for r in [1, -1]: # x_i <= floor(x_i), then x_i >= ceil(x_i) 127 | A2 = np.vstack([np.hstack([A, np.zeros((len(A), 1))]), 128 | np.array([[j == i for j in range(A.shape[1])] + [r]])]) 129 | b2 = np.append(b, np.floor(x[i]) if r == 1 else np.ceil(x[i])) 130 | LP2 = LinearProgram(A2, b2, c2) 131 | x2, y2 = LP2.minimize_lp_and_y() 132 | if y2 <= y_best: 133 | Q.put((y2, (LP2, x2, y2))) 134 | return x_best 135 | 136 | 137 | def padovan_topdown(n: int, P: dict[int, int] = dict()) -> int: 138 | """Computing the Padovan sequence using dynamic programming, with the top-down approach""" 139 | if n not in P: 140 | P[n] = 1 if n < 3 else padovan_topdown(n - 2, P) + padovan_topdown(n - 3, P) 141 | return P[n] 142 | 143 | 144 | def padovan_bottomup(n: int) -> int: 145 | """Computing the Padovan sequence using dynamic programming, with the bottom-up approach""" 146 | P = {0:1, 1:1, 2:1} 147 | for i in range(3, n + 1): 148 | P[i] = P[i - 2] + P[i - 3] 149 | return P[n] 150 | 151 | 152 | def knapsack(v: np.ndarray, w: np.ndarray, w_max: float) -> np.ndarray: 153 | """ 154 | A method for solving the 0-1 knapsack problem with item values `v`, 155 | integral item weights `w`, and integral capacity `w_max`. Recovering the 156 | design vector from the cached solutions requires additional iteration. 157 | """ 158 | n = len(v) 159 | y = {(0, j): 0.0 for j in range(w_max + 1)} 160 | for i in range(n): 161 | for j in range(w_max + 1): 162 | y[(i, j)] = y[(i - 1, j)] if w[i] > j else max(y[(i - 1, j)], y[(i - 1, j - w[i])] + v[i]) 163 | 164 | # recover solution 165 | x, j = np.full(n, False), w_max 166 | for i in range(n - 1, -1, -1): 167 | if (w[i] <= j) and (y[(i, j)] - y[(i - 1, j - w[i])] == v[i]): 168 | # the ith element is in the knapsack 169 | x[i] = True 170 | j -= w[i] 171 | return x 172 | 173 | class AntColonyOptimization(): 174 | """ 175 | Ant colony optimization, which takes a directed or undirected graph `G` 176 | from `networkx` and a dictionary of edge tuples ot path lengths `lengths`. 177 | Ants start at the first node in the graph. Optional parameters include the 178 | number of ants per iteration `m`, the number of iterations `k_max`, the 179 | pheromone exponent `alpha`, the prior exponent `beta`, the evaporation 180 | scalar `rho`, and a dictionary of prior edge weights `eta`. 181 | """ 182 | def __call__(self, 183 | G: nx.Graph | nx.DiGraph, 184 | lengths: dict[tuple[int, int], float], 185 | m: int = 1000, 186 | k_max: int = 100, 187 | alpha: float = 1.0, 188 | beta: float = 5.0, 189 | rho: float = 0.5, 190 | eta: dict[tuple[int, int], float] = None) -> list[int]: 191 | tau = {e: 1.0 for e in G.edges} 192 | x_best, y_best = [], np.inf 193 | for k in range(k_max): 194 | A = self.edge_attractiveness(G, tau, eta, alpha, beta) 195 | for (e, v) in tau.items(): 196 | tau[e] = (1 - rho)*v 197 | for ant in range(m): 198 | x_best, y_best = self.run_ant(G, lengths, tau, A, x_best, y_best) 199 | return x_best 200 | 201 | def edge_attractiveness(self, 202 | graph: nx.Graph | nx.DiGraph, 203 | tau: dict[tuple[int, int], float], 204 | eta: dict[tuple[int, int], float], 205 | alpha: float = 1.0, 206 | beta: float = 5.0) -> dict[tuple[int, int], float]: 207 | """ 208 | A method for computing the edge attractiveness table given graph `graph`, 209 | pheromone levels `tau`, prior edge weights `eta`, pheromone exponent `alpha`, 210 | and prior exponent `beta`. 211 | """ 212 | A = dict() 213 | for src in graph: 214 | neighbors = graph.neighbors(src) 215 | for dst in neighbors: 216 | v = (tau[(src, dst)]**alpha) * (eta[(src, dst)]**beta) 217 | A[(src, dst)] = v 218 | return A 219 | 220 | def run_ant(self, 221 | G: nx.Graph | nx.DiGraph, 222 | lengths: dict[tuple[int, int], float], 223 | tau: dict[tuple[int, int], float], 224 | A: dict[tuple[int, int], float], 225 | x_best: list[int], 226 | y_best: float) -> tuple[list[int], float]: 227 | """ 228 | A method for simulating a single ant on a traveling salesman problem 229 | in which the ant starts at the first node and attempts to visit each node 230 | exactly once. Pheromone levels are increased at the end of a successful 231 | tour. The parameters are the graph `G`, edge lengths `lengths`, pheromone 232 | levels `tau`, edge attractiveness `A`, the best solution found thus far 233 | `x_best`, and its value `y_best`. 234 | """ 235 | x = [1] 236 | while len(x) < len(G): 237 | src = x[-1] 238 | neighbors = np.setdiff1d(G.neighbors(src), x).tolist() 239 | if len(neighbors) == 0: # ant got stuck 240 | return (x_best, y_best) 241 | 242 | attractiveness = [A[(src, dst)] for dst in neighbors] 243 | x.append(neighbors[np.random.choice(len(neighbors), p=normalize(attractiveness, 1))]) 244 | 245 | l = np.sum([lengths[(x[i - 1], x[i])] for i in range(1, len(x))]) 246 | for i in range(1, len(x)): 247 | tau[(x[i - 1], x[i])] += 1/l 248 | if l < y_best: 249 | return (x, l) 250 | return (x_best, y_best) 251 | -------------------------------------------------------------------------------- /src/ch07.py: -------------------------------------------------------------------------------- 1 | """Chapter 7: Direct Methods""" 2 | 3 | import numpy as np 4 | 5 | from abc import ABC, abstractmethod 6 | from collections import OrderedDict 7 | from queue import PriorityQueue 8 | from typing import Callable 9 | 10 | from ch04 import line_search 11 | 12 | 13 | def basis(i: int, n: int) -> np.ndarray: 14 | """A function for constructing the `i`th basis vector (zero-indexed) of length `n`""" 15 | return np.array([1.0 if k == i else 0.0 for k in range(n)]) 16 | 17 | 18 | def cyclic_coordinate_descent(f: Callable[[np.ndarray], float], 19 | x: np.ndarray, 20 | eps: float, 21 | with_acceleration: bool = False) -> np.ndarray: 22 | """ 23 | The cyclic coordinate descent method (with or without acceleration) takes as 24 | input the objective function `f` and a starting point `x`, and it runs until 25 | the step size over a full cycle is less than a given tolerance `eps`. 26 | """ 27 | delta, n = np.inf, len(x) 28 | while delta > eps: 29 | x_prev = x.copy() 30 | for i in range(n): 31 | d = basis(i, n) 32 | x = line_search(f, x, d) 33 | if with_acceleration: 34 | x = line_search(f, x, x - x_prev) # acceleration step 35 | delta = np.linalg.norm(x - x_prev) 36 | return x 37 | 38 | 39 | def powell(f: Callable[[np.ndarray], float], x: np.ndarray, eps: float) -> np.ndarray: 40 | """ 41 | Powell's method, which takes the objective function `f`, a starting point `x`, 42 | and a tolerance `eps`. 43 | """ 44 | n = len(x) 45 | U = np.eye(n) 46 | delta = np.inf 47 | while delta > eps: 48 | x_prime = x.copy() 49 | for i in range(n): 50 | d = U[i] 51 | x_prime = line_search(f, x_prime, d) 52 | for i in range(n - 1): 53 | U[i] = U[i + 1] 54 | U[n - 1] = d = x_prime - x 55 | x_prime = line_search(f, x_prime, d) 56 | delta = np.linalg.norm(x_prime - x) 57 | x = x_prime 58 | return x 59 | 60 | 61 | def hooke_jeeves(f: Callable[[np.ndarray], float], 62 | x: np.ndarray, 63 | alpha: float, 64 | eps: float, 65 | gamma: float = 0.5) -> np.ndarray: 66 | """ 67 | The Hooke-Jeeves method, which takes the target function `f`, a starting point 68 | `x`, a starting step size `alpha`, a tolerance `eps`, and a step decay `gamma`. 69 | The method runs until the step size is less than `eps` and the points sampled 70 | along the coordinate directions do not provide an improvement. 71 | 72 | Based on the implementation from A.F. Kaupe Jr, "Algorithm 178: Direct Search," 73 | Communications of the ACM, vol. 6, no. 6, pp. 313-314, 1963. 74 | """ 75 | y, n = f(x), len(x) 76 | while alpha > eps: 77 | improved = False 78 | x_best, y_best = x, y 79 | for i in range(n): 80 | for sgn in [-1, 1]: 81 | x_prime = x + sgn*alpha*basis(i, n) 82 | y_prime = f(x_prime) 83 | if y_prime < y_best: 84 | x_best, y_best, improved = x_prime, y_prime, True 85 | x, y = x_best, y_best 86 | if not improved: 87 | alpha *= gamma 88 | return x 89 | 90 | 91 | def generalized_pattern_search(f: Callable[[np.ndarray], float], 92 | x: np.ndarray, 93 | alpha: float, 94 | D: np.ndarray, 95 | eps: float, 96 | gamma: float = 0.5) -> np.ndarray: 97 | """ 98 | Generalized pattern search, which takes the target function `f`, a starting 99 | point `x`, a starting step size `alpha`, a set of search directions `D`, a 100 | tolerance `eps`, and a step decay `gamma`. The method runs until the step 101 | size is less than `eps` and the points sampled along the coordinate directions 102 | do not provide an improvement. 103 | """ 104 | y = f(x) 105 | while alpha > eps: 106 | improved = False 107 | for i, d in enumerate(D): 108 | x_prime = x + alpha * d 109 | y_prime = f(x_prime) 110 | if y_prime < y: 111 | x, y, improved = x_prime, y_prime, True 112 | D = np.insert(np.delete(D, i, axis=0), 0, d, axis=0) 113 | break 114 | if not improved: 115 | alpha *= gamma 116 | return x 117 | 118 | 119 | def nelder_mead(f: Callable[[np.ndarray], float], 120 | S: np.ndarray, 121 | eps: float, 122 | alpha: float = 1.0, 123 | beta: float = 2.0, 124 | gamma: float = 0.5) -> np.ndarray: 125 | """ 126 | The Nelder-Mead simplex method, which takes the objective function `f`, a 127 | starting simplex `S` consisting of a list of vectors, and a tolerance `eps`. 128 | The Nelder-Mead parameters can be specified as well and default to recommended 129 | values. 130 | """ 131 | delta, y_arr = np.inf, np.apply_along_axis(f, 1, S) 132 | while delta > eps: 133 | p = np.argsort(y_arr) # sort lowest to highest 134 | S, y_arr = S[p], y_arr[p] 135 | xl, yl = S[0], y_arr[0] # lowest 136 | xh, yh = S[-1], y_arr[-1] # highest 137 | xs, ys = S[-2], y_arr[-2] # second-highest 138 | xm = np.mean(S[:-1], axis=0) # centroid 139 | xr = xm + alpha * (xm - xh) # reflection point 140 | yr = f(xr) 141 | 142 | if yr < yl: 143 | xe = xm + beta * (xr - xm) # expansion point 144 | ye = f(xe) 145 | S[-1], y_arr[-1] = (xe, ye) if ye < yr else (xr, yr) 146 | elif yr >= ys: 147 | if yr < yh: 148 | xh, yh, S[-1], y_arr[-1] = xr, yr, xr, yr 149 | xc = xm + gamma * (xh - xm) # contraction point 150 | yc = f(xc) 151 | if yc > yh: 152 | for i in range(1, len(y_arr)): 153 | S[i] = (S[i] + xl) / 2 154 | y_arr[i] = f(S[i]) 155 | else: 156 | S[-1], y_arr[-1] = xc, yc 157 | else: 158 | S[-1], y_arr[-1] = xr, yr 159 | 160 | delta = np.std(y_arr) 161 | return S[np.argmin(y_arr)] 162 | 163 | 164 | def direct(f: Callable[[np.ndarray], float], 165 | a: np.ndarray, 166 | b: np.ndarray, 167 | eps: float, 168 | k_max: int) -> np.ndarray: 169 | """ 170 | DIRECT, which takes the multidimensional objective function `f`, vector of 171 | lower bounds `a`, vector of upper bounds `b`, tolerance parameter `eps`, and 172 | number of iterations `k_max`. It returns the best coordinate. 173 | """ 174 | g = reparametrize_to_unit_hypercube(f, a, b) 175 | intervals = Intervals() 176 | n = len(a) 177 | c = np.full(n, 0.5) 178 | interval = Interval(c, g(c), np.zeros(n)) 179 | intervals.add_interval(interval) 180 | c_best, y_best = np.copy(interval.c), interval.y 181 | 182 | for _ in range(k_max): 183 | S = intervals.get_opt_intervals(eps, y_best) # TODO - Why is y_best needed? 184 | to_add = [] 185 | for interval in S: 186 | to_add.extend(interval.divide(g)) 187 | intervals[interval.vertex_dist()].get() 188 | for interval in to_add: 189 | intervals.add_interval(interval) 190 | if interval.y < y_best: 191 | c_best, y_best = np.copy(interval.c), interval.y 192 | 193 | return rev_unit_hypercube_parametrization(c_best, a, b) 194 | 195 | 196 | def rev_unit_hypercube_parametrization(x: np.ndarray, a: np.ndarray, b: np.ndarray) -> np.ndarray: 197 | return x * (b - a) + a 198 | 199 | 200 | def reparametrize_to_unit_hypercube(f: Callable[[np.ndarray], float], a: np.ndarray, b: np.ndarray) -> Callable[[np.ndarray], float]: 201 | """ 202 | A function that creates a function defined over the unit hypercube that 203 | is a reparametrized version of the function `f` defined over the 204 | hypercube with lower and upper bounds `a` and `b`. 205 | """ 206 | Delta = b - a 207 | return lambda x: f(x * Delta + a) 208 | 209 | 210 | class Interval(): 211 | """ 212 | `Interval` has three fields: the interval center `c`, the center point 213 | `y = f(c)`, and the number of divisions in each dimension `depths`. 214 | """ 215 | def __init__(self, c: np.ndarray, y: float, depths: np.ndarray): 216 | self.c = c 217 | self.y = y 218 | self.depths = depths 219 | 220 | def __lt__(self, other: 'Interval'): 221 | return self.y < other.y 222 | 223 | def min_depth(self): 224 | return np.min(self.depths) 225 | 226 | def vertex_dist(self): 227 | return np.linalg.norm(0.5 * (3.0**(-self.depths))) 228 | 229 | def divide(self, f: Callable[[np.ndarray], float]) -> list['Interval']: 230 | """The `divide` routine for dividing an interval, where `f` is the 231 | objective function and `self` is the interval to be divided. It 232 | returns a list of the resulting smaller intervals.""" 233 | c, d, n = self.c, self.min_depth(), len(self.c) 234 | dirs = np.where(self.depths == d)[0] 235 | cs = np.array([[c + (3.0**(-d-1)) * basis(i, n), 236 | c - (3.0**(-d-1)) * basis(i, n)] for i in dirs]) 237 | vs = np.apply_along_axis(f, 2, cs) 238 | minvals = np.min(vs, axis=1) 239 | minvals = minvals[0] if n == 1 else minvals 240 | 241 | intervals = [] 242 | depths = np.copy(self.depths) 243 | for j in np.argsort(minvals): 244 | depths[dirs[j]] += 1 245 | C, V = cs[j], vs[j] 246 | intervals.append(Interval(C[0], V[0], np.copy(depths))) 247 | intervals.append(Interval(C[1], V[1], np.copy(depths))) 248 | intervals.append(Interval(c, self.y, np.copy(depths))) 249 | return intervals 250 | 251 | class Intervals(OrderedDict[float, PriorityQueue[tuple[float, Interval]]]): 252 | """The data structure used in DIRECT""" 253 | def add_interval(self, interval: Interval): 254 | """Inserts a new `Interval` into the data structure.""" 255 | d = interval.vertex_dist() 256 | if d not in self.keys(): 257 | self[d] = PriorityQueue() 258 | self[d].put((interval.y, interval)) 259 | 260 | def get_opt_intervals(self, eps: float, y_best: float) -> list[Interval]: # TODO - y_best isn't used? 261 | """A routine for obtaining the potentially optimal intervals, where `eps` 262 | is a tolerance parameter and `y_best` is the best function evaluation.""" 263 | stack = [] 264 | for (x, pq) in self.items(): 265 | if not pq.empty(): 266 | interval = pq.queue[0][1] 267 | y = interval.y 268 | 269 | while len(stack) > 1: 270 | interval1 = stack[-1] 271 | interval2 = stack[-2] 272 | x1, y1 = interval1.vertex_dist(), interval1.y 273 | x2, y2 = interval2.vertex_dist(), interval2.y 274 | l = (y2 - y) / (x2 - x) 275 | if (y1 <= l * (x1 - x) + y + eps): # TODO: and (y1 <= l * x1 + y_best - eps*np.abs(y_best)): 276 | break 277 | stack.pop() # remove previous interval 278 | 279 | if (len(stack) != 0) and (interval.y > stack[-1].y + eps): 280 | continue # skip new interval 281 | 282 | stack.append(interval) # add new interval 283 | return stack 284 | -------------------------------------------------------------------------------- /src/ch09.py: -------------------------------------------------------------------------------- 1 | """Chapter 9: Population Methods""" 2 | 3 | import numpy as np 4 | 5 | from abc import ABC, abstractmethod 6 | from typing import Callable 7 | from scipy.stats import cauchy, multivariate_normal, rv_continuous 8 | 9 | from convenience import normalize 10 | 11 | 12 | def rand_population_uniform(m: int, a: np.ndarray, b: np.ndarray) -> np.ndarray: 13 | """ 14 | A method for sampling an initial population of `m` design points over a 15 | uniform hyperrectangle with lower-bound vector `a` and upper-bound vector `b`. 16 | """ 17 | d = len(a) 18 | return a + np.random.rand(m, d) * (b - a) 19 | 20 | 21 | def rand_population_normal(m: int, mu: np.ndarray, Sigma: np.ndarray) -> np.ndarray: 22 | """ 23 | A method for sampling an initial population of `m` design points using a 24 | multivariate normal distribution with mean `mu` and covariance `Sigma`. 25 | """ 26 | D = multivariate_normal(mu, Sigma) 27 | return D.rvs(m) 28 | 29 | 30 | def rand_population_cauchy(m: int, mu: np.ndarray, sigma: np.ndarray) -> np.ndarray: 31 | """ 32 | A method for sampling an initial population of `m` design points using a 33 | Cauchy distribution with location `mu` and scale `sigma` for each dimension. 34 | The location and scale are analogous to the mean and standard deviation used 35 | in a normal distribution. 36 | """ 37 | n = len(mu) 38 | return np.array([[cauchy(mu[j], sigma[j]).rvs() for j in range(n)] for _ in range(m)]) 39 | 40 | 41 | def genetic_algorithm(f: Callable[[np.ndarray], float], 42 | population: np.ndarray, 43 | k_max: int, 44 | S: 'SelectionMethod', 45 | C: 'CrossoverMethod', 46 | M: 'MutationMethod') -> np.ndarray: 47 | """ 48 | The genetic algorithm, which takes an objective function `f`, an initial 49 | population `population`, number of iterations `k_max`, a `SelectionMethod` `S`, 50 | a `CrossoverMethod` `C`, and a `MutationMethod` `M`. 51 | """ 52 | for _ in range(k_max): 53 | parents = S.select(np.apply_along_axis(f, 1, population)) 54 | children = [C.crossover(population[p[0]], population[p[1]]) for p in parents] 55 | population = [M.mutate(child) for child in children] 56 | return population[np.argmin(np.apply_along_axis(f, 1, population))] 57 | 58 | 59 | def rand_population_binary(m: int, n: int) -> np.ndarray: 60 | """ 61 | A method for sampling random starting populations of `m` bit-string 62 | chromosomes of length `n`. 63 | """ 64 | return np.random.randint(2, size=(m, n), dtype=bool) 65 | 66 | 67 | class SelectionMethod(ABC): 68 | """ 69 | Several selection methods for genetic algorithms. Calling selection with a 70 | `SelectionMethod` and the list of objective function values `y` will produce 71 | a list of parental pairs. 72 | """ 73 | @abstractmethod 74 | def select(self, y: np.ndarray) -> np.ndarray: 75 | pass 76 | 77 | 78 | class TruncationSelection(SelectionMethod): 79 | def __init__(self, k: int): 80 | self.k = k # top k to keep 81 | 82 | def select(self, y: np.ndarray) -> np.ndarray: 83 | p = np.argsort(y) 84 | return np.array([p[np.random.choice(self.k, 2)] for _ in y]) 85 | 86 | 87 | class TournamentSelection(SelectionMethod): 88 | def __init__(self, k: int): 89 | self.k = k # top k to keep 90 | 91 | def select(self, y: np.ndarray) -> np.ndarray: 92 | def getparent(): 93 | p = np.random.permutation(len(y)) 94 | return p[np.argmin(y[p[:self.k]])] 95 | return np.array([[getparent(), getparent()] for _ in y]) 96 | 97 | 98 | class RouletteWheelSelection(SelectionMethod): 99 | def select(self, y: np.ndarray) -> np.ndarray: 100 | y = np.max(y) - y 101 | p = normalize(y, ord=1) 102 | return np.random.choice(len(y), size=(len(y), 2), p=p) 103 | 104 | 105 | class CrossoverMethod(ABC): 106 | """ 107 | Several crossover methods for genetic algorithms. Calling crossover with a 108 | `CrossoverMethod` and two parents `a` and `b` will produce a child 109 | chromosome that contains a mixture of the parents' genetic codes. 110 | """ 111 | @abstractmethod 112 | def crossover(self, a: np.ndarray, b: np.ndarray) -> np.ndarray: 113 | pass 114 | 115 | 116 | class SinglePointCrossover(CrossoverMethod): 117 | """Works for both binary string and real-valued chromosomes""" 118 | def crossover(self, a: np.ndarray, b: np.ndarray) -> np.ndarray: 119 | i = np.random.randint(len(a)) 120 | return np.concatenate((a[:i], b[i:])) 121 | 122 | 123 | class TwoPointCrossover(CrossoverMethod): 124 | """Works for both binary string and real-valued chromosomes""" 125 | def crossover(self, a: np.ndarray, b: np.ndarray) -> np.ndarray: 126 | n = len(a) 127 | i, j = np.random.randint(n, size=2) 128 | if i > j: 129 | i, j = j, i 130 | return np.concatenate((a[:i], b[i:j], a[j:])) 131 | 132 | 133 | class UniformCrossover(CrossoverMethod): 134 | """Works for both binary string and real-valued chromosomes""" 135 | def crossover(self, a: np.ndarray, b: np.ndarray) -> np.ndarray: 136 | child = np.copy(a) 137 | for i in range(len(a)): 138 | if np.random.rand() < 0.5: 139 | child[i] = b[i] 140 | return child 141 | 142 | 143 | class InterpolationCrossover(CrossoverMethod): 144 | """ 145 | A crossover method for real-valued chromosomes which performs linear 146 | interpolation between the parents. 147 | """ 148 | def __init__(self, lam: float): 149 | self.lam = lam # interpolation parameter 150 | 151 | def crossover(self, a: np.ndarray, b: np.ndarray) -> np.ndarray: 152 | return (1 - self.lam) * a + self.lam * b 153 | 154 | 155 | class MutationMethod(ABC): 156 | @abstractmethod 157 | def mutate(self, child: np.ndarray) -> np.ndarray: 158 | pass 159 | 160 | 161 | class BitwiseMutation(MutationMethod): 162 | """ 163 | The bitwise mutation method for binary string chromosomes. 164 | Here, `lam` is the mutation rate. 165 | """ 166 | def __init__(self, lam: float): 167 | self.lam = lam # mutation rate 168 | 169 | def mutate(self, child: np.ndarray) -> np.ndarray: 170 | return np.array([~v if np.random.rand() < self.lam else v for v in child]) 171 | 172 | 173 | class GaussianMutation(MutationMethod): 174 | """ 175 | The Gaussian mutation method for real-valued chromosomes. 176 | Here, `sigma` is the standard deviation. 177 | """ 178 | def __init__(self, sigma: float): 179 | self.sigma = sigma # standard deviation 180 | 181 | def mutate(self, child: np.ndarray) -> np.ndarray: 182 | return child + np.random.randn(len(child)) * self.sigma 183 | 184 | 185 | def differential_evolution(f: Callable[[np.ndarray], float], 186 | population: np.ndarray, 187 | k_max: int, 188 | p: float = 0.5, 189 | w: float = 1.0) -> np.ndarray: 190 | """ 191 | Differential evolution, which takes an objective function `f`, a population 192 | `population`, a number of iterations `k_max`, a crossover probability `p`, 193 | and a differential weight `w`. The best individual is returned. 194 | """ 195 | m, n = population.shape 196 | for _ in range(k_max): 197 | for (k, x) in enumerate(population): 198 | a, b, c = np.random.choice(population, 199 | p=normalize(np.array([j != k for j in range(m)]), ord=1), 200 | size=3, replace=False) 201 | z = a + w * (b - c) 202 | j = np.random.randint(len(n)) 203 | x_prime = np.array([z[i] if ((i == j) or (np.random.rand() < p)) else x[i] for i in range(n)]) 204 | if f(x_prime) < f(x): 205 | x = x_prime 206 | return population[np.argmin(np.apply_along_axis(f, 1, population))] 207 | 208 | 209 | class Particle(): 210 | """ 211 | Each particle in particle swarm optimization has a position `x` and velocity 212 | `v` in design space and keeps track of the best design point found so far, 213 | `x_best`. 214 | """ 215 | def __init__(self, x: np.ndarray, v: np.ndarray, x_best: np.ndarray): 216 | self.x = x 217 | self.v = v 218 | self.x_best = x_best 219 | 220 | 221 | def particle_swarm_optimization(f: Callable[[np.ndarray], float], 222 | population: list[Particle], 223 | k_max: int, 224 | w: float = 1.0, 225 | c1: float = 1.0, 226 | c2: float = 1.0) -> list[Particle]: 227 | """ 228 | Particle swarm optimization, which takes an objective function `f`, a list 229 | of particles `population`, a number of iterations `k_max`, an inertia `w`, 230 | an momentum coefficients `c1` and `c2`. 231 | 232 | The default values are those used by R. Eberhart and J. Kennedy, "A New 233 | Optimizer Using Particle Swarm Theory," in International Symposium on Micro 234 | Machine and Human Science, 1995. 235 | """ 236 | n = len(population[0].x) 237 | x_best, y_best = np.copy(population[0].x_best), np.inf 238 | for P in population: 239 | y = f(P.x) 240 | if y < y_best: 241 | x_best, y_best = P.x, y 242 | for _ in range(k_max): 243 | for P in population: 244 | r1, r2 = np.random.rand(n), np.random.rand(n) 245 | P.x += P.v 246 | P.v = w*P.v + c1*r1*(P.x_best - P.x) + c2*r2*(x_best - P.x) 247 | y = f(P.x) 248 | if y < y_best: 249 | x_best, y_best = P.x, y 250 | if y < f(P.x_best): 251 | P.x_best = P.x 252 | return population 253 | 254 | 255 | def firefly(f: Callable[[np.ndarray], float], 256 | population: np.ndarray, 257 | k_max: int, 258 | beta: float = 1.0, 259 | alpha: float = 0.1, 260 | brightness: Callable[[float], float] = lambda r: np.exp(-(r**2))) -> np.ndarray: 261 | """ 262 | The firefly algorithm, which takes an objective function `f`, a population 263 | `population` consisting of design points, a number of iterations `k_max`, 264 | a source intensity `beta`, a random walk step size `alpha`, and an intensity 265 | function `brightness`. The best design point is returned. 266 | """ 267 | m = len(population[0]) 268 | N = multivariate_normal(np.zeros(m), np.eye(m)) 269 | for _ in range(k_max): 270 | for a in population: 271 | for b in population: 272 | if f(b) < f(a): 273 | r = np.linalg.norm(b - a) 274 | a += beta * brightness(r) * (b - a) + alpha * N.rvs() 275 | return population[np.argmin(np.apply_along_axis(f, 1, population))] 276 | 277 | 278 | class Nest(): 279 | def __init__(self, x: np.ndarray, y: float): 280 | self.x = x # position 281 | self.y = y # value, f(x) 282 | 283 | 284 | def cuckoo_search(f: Callable[[np.ndarray], float], 285 | population: list[Nest], 286 | k_max: int, 287 | p_a: float = 0.1, 288 | C: rv_continuous = cauchy(0, 1)) -> list[Nest]: 289 | """ 290 | Cuckoo search, which takes an objective function `f`, an initial set of 291 | nests `population`, a number of iterations `k_max`, percent of nests to 292 | abandon `p_a`, and flight distribution `C`. The flight distribution is 293 | typically a centered Cauchy distribution. 294 | """ 295 | m, n = len(population), len(population[0].x) 296 | a = round(m*p_a) 297 | for _ in range(k_max): 298 | i, j = np.random.randint(m, size=2) 299 | x = population[j].x + C.rvs(n) 300 | y = f(x) 301 | if y < population[i].y: 302 | population[i].x = x 303 | population[i].y = y 304 | 305 | p = np.argsort([-nest.y for nest in population]) 306 | for i in range(len(a)): 307 | j = np.random.randint(m - a) + a 308 | population[p[i]] = Nest(population[p[j]].x + C.rvs(n), f(population[p[i]].x)) 309 | return population 310 | --------------------------------------------------------------------------------