├── README.md └── UnconstrainedOptimization ├── ConjugateGradient.py ├── CoordinateDescent.py ├── GradientDescent.py ├── NewtonMethod.py ├── QuasiNewtonBFGS.py ├── QuasiNewtonDFP.py └── figures ├── BFGSconvex2D.png ├── BFGSconvex3D.png ├── DFPconvex2D.png ├── DFPconvex3D.png ├── DFProsenbrock2D.png ├── DFProsenbrock3D.png ├── GDconvex2D.png ├── GDconvex3D.png ├── GDrosenbrock2D.png ├── GDrosenbrock3D.png ├── Newtonconvex2D.png ├── Newtonconvex3D.png ├── Newtonrosenbrock2D.png ├── Newtonrosenbrock3D.png ├── cordinatedescent2D.png └── cordinatedescent3D.png /README.md: -------------------------------------------------------------------------------- 1 | # 最优化方法python实现 2 | 3 | ​ 在学习机器学习的过程中,因为没有学习过最优化方法方面的知识,在看《统计学习方法》的时候有不少地方比较吃力,于是在 bilibili 上找了有关最优化方法的课程学习。上海财经大学 崔雪婷 老师发布在 bilibili 上的 [《最优化理论与方法》](https://space.bilibili.com/507629580/channel/detail?cid=120046) 是我觉得讲得特别棒的课程,对于常见优化算法的优化过程、证明、性质、适用条件等等都讲解非常详细,即使对于数学基础薄弱、没有相关知识背景的我来说,只要认真跟着老师推导,也能理解得很透彻。非常推荐入门学习最优化理论和最优化方法的同学学习。 4 | 5 | ​ 本仓库是根据老师课上学到的知识,用python实现常见的最优化方法,目前已经学习完了常见的无约束优化算法,于是尝试了用这些方法尝试解简单的无约束优化问题。后续学习完约束优化以及其他知识会尝试更新仓库。 6 | 7 | ​ 水平有限,代码并不漂亮,欢迎批评、交流。 8 | 9 | -------------------------------------------------------------------------------- /UnconstrainedOptimization/ConjugateGradient.py: -------------------------------------------------------------------------------- 1 | """ 2 | 演示拟共轭梯度法解决一个凸函数的最优化问题的例子 3 | 优化函数: 4 | f(x,y)=x**2+3*y**2-2x*y-6 5 | """ 6 | import numpy as np 7 | 8 | from matplotlib import pyplot as plt 9 | from mpl_toolkits.mplot3d import Axes3D 10 | 11 | 12 | def cvx_function(X): 13 | """ 14 | 凸函数函数值的计算 15 | """ 16 | z1 = X[0]**2 17 | z2 = X[1]**2 18 | z = z1 + 3 * z2 - 2 * X[0] * X[1] - 6 19 | return z 20 | 21 | 22 | def cvx_fucntion_gradient(x): 23 | """ 24 | 计算凸函数的梯度 25 | """ 26 | grad_x = 2 * x[0] - 2 * x[1] 27 | grad_y = 6 * x[1] - 2 * x[0] 28 | return np.array([grad_x, grad_y]) 29 | 30 | 31 | def generate_grid(x_1, x_2, y_1, y_2, delta, f): 32 | """ 33 | 生成二维网格,并计算网格中各个点的值,用于后续画登高线三维图 34 | """ 35 | x = np.arange(x_1, x_2, delta) 36 | y = np.arange(y_1, y_2, delta) 37 | X, Y = np.meshgrid(x, y) 38 | Z = f([X, Y]) 39 | return X, Y, Z 40 | 41 | 42 | def plot_2D_figure(X, Y, Z, x, y, filepath): 43 | """ 44 | 画二维图 45 | """ 46 | plt.figure() 47 | plt.contourf(X, Y, Z, 10) 48 | plt.colorbar(orientation='horizontal', shrink=0.8) 49 | plt.plot(x, y, c='r') 50 | plt.savefig(filepath) 51 | plt.show() 52 | 53 | 54 | def plot_3D_figure(X, Y, Z, x, y, z, filepath): 55 | """ 56 | 画三维图 57 | """ 58 | fig = plt.figure() 59 | ax = Axes3D(fig) 60 | p = ax.plot_surface(X, Y, Z, rstride=4, cstride=4, cmap='jet', alpha=0.8) 61 | ax.plot3D(x, y, z, c='r', linewidth=2) 62 | plt.colorbar(p, shrink=0.8) 63 | plt.savefig(filepath) 64 | plt.show() 65 | 66 | 67 | def generate_points(x_start, f, grad, epsilon=1e-5, steps=10): 68 | X, x_old = x_start, x_start 69 | Z = f(x_start) 70 | Q = np.mat([[2, -2], [-2, 6]]) 71 | grad_old = np.mat(grad(x_old)) 72 | d = - grad_old # 初始化共轭方向为初始梯度方向 73 | for i in range(1, steps): 74 | if np.sqrt(np.sum(grad_old.T * grad_old)) < epsilon: # 判断是否收敛到极小值点 75 | X = np.concatenate((X, x_old), axis=1) 76 | z_new = f(np.array(x_old)) 77 | Z = np.concatenate((Z, z_new)) 78 | print("Convergence at step: ", i) 79 | print("Final varaible values: ", [x_old[0], x_old[1]]) 80 | print("Final f(x,y):", z_new) 81 | break 82 | alpha = (grad_old.T * grad_old) / (d.T * Q * d) # 计算步长 83 | x_new = x_old + np.array(alpha)[0][0] * np.array(d) # 计算下一步的点 84 | grad_new = np.mat(grad(x_new)) # 计算新的点的梯度 85 | beta = (grad_new.T * grad_new) / (grad_old.T * grad_old) # 计算beta 86 | d = -1.0 * grad_new + np.array(beta)[0][0] * d # 根据梯度和上一步共轭方向构造新的共轭方向 87 | X = np.concatenate((X, x_new), axis=1) 88 | z_new = f(np.array(x_new)) 89 | Z = np.concatenate((Z, z_new)) 90 | grad_old = grad_new 91 | x_old = x_new 92 | return X[0], X[1], Z 93 | 94 | 95 | if __name__ == "__main__": 96 | x_1, x_2, y_1, y_2, delta = -4.0, 4.0, -4.0, 4.0, 0.025 97 | x_start = np.array([[3.4], [3.5]]) 98 | X, Y, Z = generate_grid(x_1, x_2, y_1, y_2, delta, cvx_function) 99 | x, y, z = generate_points(x_start, cvx_function, cvx_fucntion_gradient) 100 | plot_2D_figure(X, Y, Z, x, y, './figures/Conjugate2D.png') 101 | plot_3D_figure(X, Y, Z, x, y, z, './figures/Conjugate3D.png') 102 | -------------------------------------------------------------------------------- /UnconstrainedOptimization/CoordinateDescent.py: -------------------------------------------------------------------------------- 1 | """ 2 | 演示坐标下降法解决一个凸函数的最优化问题的例子 3 | 优化函数: 4 | f(x,y)=x**2+3*y**2-2*x*y-6 5 | """ 6 | import numpy as np 7 | 8 | from matplotlib import pyplot as plt 9 | from mpl_toolkits.mplot3d import Axes3D 10 | 11 | 12 | def function(X, Y): 13 | """ 14 | 函数值的计算 15 | """ 16 | z1 = X**2 17 | z2 = Y**2 18 | z = z1 + 3 * z2 - 2 * X * Y - 6 19 | return z 20 | 21 | 22 | def generate_grid(x_1, x_2, y_1, y_2, delta, f): 23 | """ 24 | 生成二维网格,并计算网格中各个点的值,用于后续画登高线三维图 25 | :param x_1: x最小值 26 | :param x_2: x最大值 27 | :param y_1: y最小值 28 | :param y_2: y最大值 29 | :param delta: 网格中各点间隔 30 | :param f: 函数 31 | :return: 网格坐标以及网格中各点函数值 32 | """ 33 | x = np.arange(x_1, x_2, delta) 34 | y = np.arange(y_1, y_2, delta) 35 | X, Y = np.meshgrid(x, y) 36 | Z = f(X, Y) 37 | return X, Y, Z 38 | 39 | 40 | def plot_2D_figure(X, Y, Z, x, y): 41 | """ 42 | 画二维图 43 | :param X: 网格x坐标 44 | :param Y: 网格y坐标 45 | :param Z: 网格坐标各个点函数值 46 | :param x:优化点列x坐标变化 47 | :param y:优化点列y坐标变化 48 | """ 49 | plt.figure() 50 | plt.contourf(X, Y, Z, 10) 51 | plt.colorbar(orientation='horizontal', shrink=0.8) 52 | plt.plot(x, y, c='r') 53 | plt.savefig('./figures/cordinatedescent2D.png') 54 | plt.show() 55 | 56 | 57 | def plot_3D_figure(X, Y, Z, x, y, z): 58 | """ 59 | 画三维图 60 | :param X:网格x坐标 61 | :param Y: 网格y坐标 62 | :param Z: 网格中各点函数值 63 | :param x: 优化点列x坐标变化 64 | :param y: 优化点点列y坐标变化 65 | :param z: 优化点列函数值变化 66 | """ 67 | fig = plt.figure() 68 | ax = Axes3D(fig) 69 | p = ax.plot_surface(X, Y, Z, rstride=4, cstride=4, cmap='jet', alpha=0.8) 70 | ax.plot3D(x, y, z, c='r', linewidth=2) 71 | plt.colorbar(p, shrink=0.8) 72 | plt.savefig('./figures/cordinatedescent3D.png') 73 | plt.show() 74 | pass 75 | 76 | 77 | def generate_points(x_start, y_start, f=function, steps=200): 78 | """ 79 | 根据坐标下降法,生成优化过程中的点列 80 | :param x_start: 起始点x坐标 81 | :param y_start: 起始点y坐标 82 | :param f: 函数 83 | :param steps: 迭代的步数 84 | :return: 优化点列中各点x,y坐标以及对应的函数值 85 | """ 86 | X, Y, Z = [x_start], [y_start], [f(x_start, y_start)] 87 | for i in range(1, steps): 88 | if i % 2 == 0: 89 | X.append(Y[i-1]) 90 | Y.append(Y[i-1]) 91 | Z.append(f(X[i], Y[i])) 92 | else: 93 | Y.append(X[i-1]/3) 94 | X.append(X[i-1]) 95 | Z.append(f(X[i], Y[i])) 96 | return X, Y, Z 97 | 98 | 99 | if __name__ == "__main__": 100 | x_1, x_2, y_1, y_2, delta = -4.0, 4.0, -4.0, 4.0, 0.025 101 | x_start, y_start = 3.4, 3.5 102 | X, Y, Z = generate_grid(x_1, x_2, y_1, y_2, delta, function) 103 | x, y, z = generate_points(x_start, y_start) 104 | plot_2D_figure(X, Y, Z, x, y) 105 | plot_3D_figure(X, Y, Z, x, y, z) -------------------------------------------------------------------------------- /UnconstrainedOptimization/GradientDescent.py: -------------------------------------------------------------------------------- 1 | """ 2 | 演示坐标下降法解决一个凸函数的最优化问题的例子 3 | 优化函数: 4 | f(x,y)=x**2+3*x**26x*y-6 5 | """ 6 | import numpy as np 7 | 8 | from matplotlib import pyplot as plt 9 | from mpl_toolkits.mplot3d import Axes3D 10 | 11 | 12 | def cvx_function(X, Y): 13 | """ 14 | 凸函数函数值的计算 15 | """ 16 | z1 = X**2 17 | z2 = Y**2 18 | z = z1 + 3 * z2 - 2 * X * Y - 6 19 | return z 20 | 21 | 22 | def cvx_fucntion_gradient(x, y): 23 | """ 24 | 计算凸函数的梯度 25 | :param X: 26 | :param Y: 27 | :return: 28 | """ 29 | grad_x = 2 * x - 2 * y 30 | grad_y = 6 * y - 2 * x 31 | return grad_x, grad_y 32 | 33 | 34 | def rosenbrock(X, Y): 35 | """ 36 | 非凸函数rosenbrock函数值计算 37 | """ 38 | z1 = (1.0 - X)**2 39 | z2 = 100 * (Y - X*X)**2 40 | z = z1 + z2 41 | return z 42 | 43 | 44 | def rosenbrock_gradient(x, y): 45 | """ 46 | 计算非凸函数rosenbrock的梯度 47 | """ 48 | grad_x = 400 * x * x * x + 2 * x - 400 * x * y - 2 49 | grad_y = 200 * (y - x**2) 50 | return grad_x, grad_y 51 | 52 | 53 | def generate_grid(x_1, x_2, y_1, y_2, delta, f): 54 | """ 55 | 生成二维网格,并计算网格中各个点的值,用于后续画登高线三维图 56 | :param x_1: x最小值 57 | :param x_2: x最大值 58 | :param y_1: y最小值 59 | :param y_2: y最大值 60 | :param delta: 网格中各点间隔 61 | :param f: 函数 62 | :return: 网格坐标以及网格中各点函数值 63 | """ 64 | x = np.arange(x_1, x_2, delta) 65 | y = np.arange(y_1, y_2, delta) 66 | X, Y = np.meshgrid(x, y) 67 | Z = f(X, Y) 68 | return X, Y, Z 69 | 70 | 71 | def plot_2D_figure(X, Y, Z, x, y, filepath): 72 | """ 73 | 画二维图 74 | :param X: 网格x坐标 75 | :param Y: 网格y坐标 76 | :param Z: 网格坐标各个点函数值 77 | :param x:优化点列x坐标变化 78 | :param y:优化点列y坐标变化 79 | """ 80 | plt.figure() 81 | plt.contourf(X, Y, Z, 10) 82 | plt.colorbar(orientation='horizontal', shrink=0.8) 83 | plt.plot(x, y, c='r') 84 | plt.savefig(filepath) 85 | plt.show() 86 | 87 | 88 | def plot_3D_figure(X, Y, Z, x, y, z, filepath): 89 | """ 90 | 画三维图 91 | :param X:网格x坐标 92 | :param Y: 网格y坐标 93 | :param Z: 网格中各点函数值 94 | :param x: 优化点列x坐标变化 95 | :param y: 优化点点列y坐标变化 96 | :param z: 优化点列函数值变化 97 | """ 98 | fig = plt.figure() 99 | ax = Axes3D(fig) 100 | p = ax.plot_surface(X, Y, Z, rstride=4, cstride=4, cmap='jet', alpha=0.8) 101 | ax.plot3D(x, y, z, c='r', linewidth=2) 102 | plt.colorbar(p, shrink=0.8) 103 | plt.savefig(filepath) 104 | plt.show() 105 | pass 106 | 107 | 108 | def generate_points(x_start, y_start, f, grad, alpha, steps): 109 | """ 110 | 根据坐标下降法,生成优化过程中的点列 111 | :param grad: 梯度计算函数 112 | :param x_start: 起始点x坐标 113 | :param y_start: 起始点y坐标 114 | :param f: 函数 115 | :param alpha: 学习率 116 | :param steps: 迭代的步数 117 | :return: 优化点列中各点x,y坐标以及对应的函数值 118 | """ 119 | X, Y, Z = [x_start], [y_start], [f(x_start, y_start)] 120 | for i in range(1, steps): 121 | grad_x, grad_y = grad(X[i-1], Y[i-1]) 122 | x_new, y_new = X[i-1] - alpha * grad_x, Y[i-1] - alpha * grad_y 123 | Z.append(f(x_new, y_new)) 124 | X.append(x_new) 125 | Y.append(y_new) 126 | return X, Y, Z 127 | 128 | 129 | if __name__ == "__main__": 130 | # x_1, x_2, y_1, y_2, delta = -4.0, 4.0, -4.0, 4.0, 0.025 131 | # x_start, y_start = 3.4, 3.5 132 | # X, Y, Z = generate_grid(x_1, x_2, y_1, y_2, delta, cvx_function) 133 | # x, y, z = generate_points(x_start, y_start, cvx_function, cvx_fucntion_gradient, 0.05, 100) 134 | # plot_2D_figure(X, Y, Z, x, y, './figures/GDconvex2D.png') 135 | # plot_3D_figure(X, Y, Z, x, y, z, './figures/GDconvex3D.png') 136 | x_1, x_2, y_1, y_2, delta = -2.0, 2.0, -2.0, 2.0, 0.025 137 | x_start, y_start = 0.0, 0.0 138 | X, Y, Z = generate_grid(x_1, x_2, y_1, y_2, delta, rosenbrock) 139 | x, y, z = generate_points(x_start, y_start, rosenbrock, rosenbrock_gradient, 0.001, 10000) 140 | plot_2D_figure(X, Y, Z, x, y, './figures/GDrosenbrock2D.png') 141 | plot_3D_figure(X, Y, Z, x, y, z, './figures/GDrosenbrock3D.png') -------------------------------------------------------------------------------- /UnconstrainedOptimization/NewtonMethod.py: -------------------------------------------------------------------------------- 1 | """ 2 | 演示牛顿法解决一个凸函数的最优化问题的例子 3 | 优化函数: 4 | f(x,y)=x**2+3*y**2-2x*y-6 5 | 以及牛顿法解决一个非凸函数优化的例子: 6 | 优化函数:(著名的rosenbrock函数) 7 | f(x,y)=(1-x)**2 + 100 * (y-x**2)**2 8 | """ 9 | import numpy as np 10 | 11 | from matplotlib import pyplot as plt 12 | from mpl_toolkits.mplot3d import Axes3D 13 | 14 | 15 | def cvx_function(X): 16 | """ 17 | 凸函数函数值的计算 18 | """ 19 | z1 = X[0]**2 20 | z2 = X[1]**2 21 | z = z1 + 3 * z2 - 2 * X[0] * X[1] - 6 22 | return z 23 | 24 | 25 | def cvx_fucntion_gradient(x): 26 | """ 27 | 计算凸函数的梯度 28 | """ 29 | grad_x = 2 * x[0] - 2 * x[1] 30 | grad_y = 6 * x[1] - 2 * x[0] 31 | return np.array([grad_x, grad_y]) 32 | 33 | 34 | def cvx_hessian_inverse(x): 35 | """ 36 | 计算凸函数的hessian矩阵的逆矩阵,牛顿法的更新方向需要其与梯度的乘积 37 | """ 38 | hessian_matrix = np.mat([[2, -2], [-2, 6]]) 39 | hessian_inverse = np.linalg.pinv(hessian_matrix) 40 | return np.array(hessian_inverse) 41 | 42 | 43 | def rosenbrock(X): 44 | """ 45 | 非凸函数rosenbrock函数值计算 46 | """ 47 | z1 = (1.0 - X[0])**2 48 | z2 = 100 * (X[1] - X[0]*X[0])**2 49 | z = z1 + z2 50 | return z 51 | 52 | 53 | def rosenbrock_gradient(x): 54 | """ 55 | 计算非凸函数rosenbrock的梯度 56 | """ 57 | grad_x = 400 * x[0] * x[0] * x[0] + 2 * x[0] - 400 * x[0] * x[1] - 2 58 | grad_y = 200 * (x[1] - x[0]**2) 59 | return np.array([grad_x, grad_y]) 60 | 61 | 62 | def rosenbrock_hessain_inverse(x): 63 | grad_xx = 1200 * x[0] * x[0] + 2 - 400 * x[1] 64 | grad_xy = - 400 * x[1] 65 | grad_yx = - 400 * x[1] 66 | grad_yy = 200 67 | hessian_matrix = np.mat([[grad_xx, grad_xy], [grad_yx, grad_yy]]) 68 | hessian_inverse = np.linalg.pinv(hessian_matrix) 69 | return np.array(hessian_inverse) 70 | 71 | 72 | def generate_grid(x_1, x_2, y_1, y_2, delta, f): 73 | """ 74 | 生成二维网格,并计算网格中各个点的值,用于后续画登高线三维图 75 | """ 76 | x = np.arange(x_1, x_2, delta) 77 | y = np.arange(y_1, y_2, delta) 78 | X, Y = np.meshgrid(x, y) 79 | Z = f([X, Y]) 80 | return X, Y, Z 81 | 82 | 83 | def plot_2D_figure(X, Y, Z, x, y, filepath): 84 | """ 85 | 画二维图 86 | """ 87 | plt.figure() 88 | plt.contourf(X, Y, Z, 10) 89 | plt.colorbar(orientation='horizontal', shrink=0.8) 90 | plt.plot(x, y, c='r') 91 | plt.savefig(filepath) 92 | plt.show() 93 | 94 | 95 | def plot_3D_figure(X, Y, Z, x, y, z, filepath): 96 | """ 97 | 画三维图 98 | """ 99 | fig = plt.figure() 100 | ax = Axes3D(fig) 101 | p = ax.plot_surface(X, Y, Z, rstride=4, cstride=4, cmap='jet', alpha=0.8) 102 | ax.plot3D(x, y, z, c='r', linewidth=2) 103 | plt.colorbar(p, shrink=0.8) 104 | plt.savefig(filepath) 105 | plt.show() 106 | 107 | 108 | def generate_points(x_start, f, grad, hessian_inverse, epsilon=1e-10, steps=200): 109 | """ 110 | 根据牛顿法生成优化点列的过程 111 | :param x_start: 起始点的坐标 112 | :param f: 需要优化的函数 113 | :param grad: 计算f函数的梯度函数 114 | :param hessian_inverse: 计算f函数hessian矩阵的逆矩阵的函数 115 | :param epsilon: 迭代停止的条件,当当前点的梯度的模小于epsilon时,迭代停止 116 | :param steps: 最大的迭代步数 117 | :return: 优化过程生成点列的x坐标序列,y坐标序列,以及每一个点对应的函数值 118 | """ 119 | X = x_start 120 | Z = f(x_start) 121 | print(Z) 122 | for i in range(1, steps): 123 | current_grad = grad(X[:, i-1]) 124 | if np.sqrt(np.sum(current_grad**2)) < epsilon: 125 | print("Convergence at step: ", i) 126 | break 127 | current_hessain_inverse = hessian_inverse(X[:, i-1]) 128 | x_new = X[:, i-1].reshape(2, 1) - np.dot(current_hessain_inverse, current_grad) 129 | z_new = f(x_new) 130 | print(z_new) 131 | X = np.concatenate((X, x_new), axis=1) 132 | Z = np.concatenate((Z, z_new)) 133 | return X[0], X[1], Z 134 | 135 | 136 | if __name__ == "__main__": 137 | # x_1, x_2, y_1, y_2, delta = -4.0, 4.0, -4.0, 4.0, 0.025 138 | # x_start= np.array([[3.4], [3.5]]) 139 | # X, Y, Z = generate_grid(x_1, x_2, y_1, y_2, delta, cvx_function) 140 | # x, y, z = generate_points(x_start, cvx_function, cvx_fucntion_gradient, cvx_hessian_inverse) 141 | # plot_2D_figure(X, Y, Z, x, y, './figures/Newtonconvex2D.png') 142 | # plot_3D_figure(X, Y, Z, x, y, z, './figures/Newtonconvex3D.png') 143 | x_1, x_2, y_1, y_2, delta = -2.0, 2.0, -2.0, 2.0, 0.025 144 | x_start = np.array([[0.0], [0.0]]) 145 | X, Y, Z = generate_grid(x_1, x_2, y_1, y_2, delta, rosenbrock) 146 | x, y, z = generate_points(x_start, rosenbrock, rosenbrock_gradient, rosenbrock_hessain_inverse) 147 | plot_2D_figure(X, Y, Z, x, y, './figures/Newtonrosenbrock2D.png') 148 | plot_3D_figure(X, Y, Z, x, y, z, './figures/Newtonrosenbrock3D.png') -------------------------------------------------------------------------------- /UnconstrainedOptimization/QuasiNewtonBFGS.py: -------------------------------------------------------------------------------- 1 | """ 2 | 演示拟牛顿法DFP解决一个凸函数的最优化问题的例子 3 | 优化函数: 4 | f(x,y)=x**2+3*y**2-2x*y-6 5 | 以及牛顿法解决一个非凸函数优化的例子: 6 | 优化函数:(著名的rosenbrock函数) 7 | f(x,y)=(1-x)**2 + 100 * (y-x**2)**2 8 | """ 9 | import numpy as np 10 | 11 | from matplotlib import pyplot as plt 12 | from mpl_toolkits.mplot3d import Axes3D 13 | 14 | 15 | def cvx_function(X): 16 | """ 17 | 凸函数函数值的计算 18 | """ 19 | z1 = X[0]**2 20 | z2 = X[1]**2 21 | z = z1 + 3 * z2 - 2 * X[0] * X[1] - 6 22 | return z 23 | 24 | 25 | def cvx_fucntion_gradient(x): 26 | """ 27 | 计算凸函数的梯度 28 | """ 29 | grad_x = 2 * x[0] - 2 * x[1] 30 | grad_y = 6 * x[1] - 2 * x[0] 31 | return np.array([grad_x, grad_y]) 32 | 33 | 34 | def rosenbrock(X): 35 | """ 36 | 非凸函数rosenbrock函数值计算 37 | """ 38 | z1 = (1.0 - X[0])**2 39 | z2 = 100 * (X[1] - X[0]*X[0])**2 40 | z = z1 + z2 41 | return z 42 | 43 | 44 | def rosenbrock_gradient(x): 45 | """ 46 | 计算非凸函数rosenbrock的梯度 47 | """ 48 | grad_x = 400 * x[0] * x[0] * x[0] + 2 * x[0] - 400 * x[0] * x[1] - 2 49 | grad_y = 200 * (x[1] - x[0]**2) 50 | return np.array([grad_x, grad_y]) 51 | 52 | 53 | def generate_grid(x_1, x_2, y_1, y_2, delta, f): 54 | """ 55 | 生成二维网格,并计算网格中各个点的值,用于后续画登高线三维图 56 | """ 57 | x = np.arange(x_1, x_2, delta) 58 | y = np.arange(y_1, y_2, delta) 59 | X, Y = np.meshgrid(x, y) 60 | Z = f([X, Y]) 61 | return X, Y, Z 62 | 63 | 64 | def plot_2D_figure(X, Y, Z, x, y, filepath): 65 | """ 66 | 画二维图 67 | """ 68 | plt.figure() 69 | plt.contourf(X, Y, Z, 10) 70 | plt.colorbar(orientation='horizontal', shrink=0.8) 71 | plt.plot(x, y, c='r') 72 | plt.savefig(filepath) 73 | plt.show() 74 | 75 | 76 | def plot_3D_figure(X, Y, Z, x, y, z, filepath): 77 | """ 78 | 画三维图 79 | """ 80 | fig = plt.figure() 81 | ax = Axes3D(fig) 82 | p = ax.plot_surface(X, Y, Z, rstride=4, cstride=4, cmap='jet', alpha=0.8) 83 | ax.plot3D(x, y, z, c='r', linewidth=2) 84 | plt.colorbar(p, shrink=0.8) 85 | plt.savefig(filepath) 86 | plt.show() 87 | 88 | 89 | def generate_points(x_start, f, grad, epsilon=1e-5, steps=100000): 90 | """ 91 | 根据拟牛顿法(DFP)生成优化点列的过程 92 | :param x_start: 起始点的坐标 93 | :param f: 需要优化的函数 94 | :param grad: 计算f函数的梯度函数 95 | :param epsilon: 迭代停止的条件,当当前点的梯度的模小于epsilon时,迭代停止 96 | :param steps: 最大的迭代步数 97 | :return: 优化过程生成点列的x坐标序列,y坐标序列,以及每一个点对应的函数值 98 | """ 99 | X, x_old = x_start, x_start 100 | Z = f(x_start) 101 | H_old = np.mat(np.eye(2, dtype=np.float32) * 0.01) 102 | grad_old = np.mat(grad(x_old)) 103 | I = np.mat(np.eye(2, dtype=np.float32)) 104 | for i in range(1, steps): 105 | x_new = x_old - np.array(H_old * grad_old) 106 | grad_new = np.mat(grad(np.array(x_new))) 107 | if np.sqrt(np.sum(grad_new.T * grad_new)) < epsilon: 108 | X = np.concatenate((X, x_new), axis=1) 109 | z_new = f(np.array(x_new)) 110 | Z = np.concatenate((Z, z_new)) 111 | print("Convergence at step: ", i) 112 | print("Final varaible values: ", [x_new[0], x_new[1]]) 113 | print("Final f(x,y):", z_new) 114 | break 115 | y = np.mat(grad_new - grad_old) 116 | s = np.mat(x_new - x_old) 117 | H_new = (I - (s * y.T) / (s.T * y)) * H_old * (I - (s * y.T) / (s.T * y)) + (s * s.T) / (s.T * y) 118 | X = np.concatenate((X, x_new), axis=1) 119 | z_new = f(np.array(x_new)) 120 | Z = np.concatenate((Z, z_new)) 121 | H_old = H_new 122 | grad_old = grad_new 123 | x_old = x_new 124 | return X[0], X[1], Z 125 | 126 | 127 | if __name__ == "__main__": 128 | x_1, x_2, y_1, y_2, delta = -4.0, 4.0, -4.0, 4.0, 0.025 129 | x_start = np.array([[3.4], [3.5]]) 130 | X, Y, Z = generate_grid(x_1, x_2, y_1, y_2, delta, cvx_function) 131 | x, y, z = generate_points(x_start, cvx_function, cvx_fucntion_gradient) 132 | plot_2D_figure(X, Y, Z, x, y, './figures/BFGSconvex2D.png') 133 | plot_3D_figure(X, Y, Z, x, y, z, './figures/BFGSconvex3D.png') 134 | # x_1, x_2, y_1, y_2, delta = -2.0, 2.0, -2.0, 2.0, 0.025 135 | # x_start = np.array([[0.0], [0.0]]) 136 | # X, Y, Z = generate_grid(x_1, x_2, y_1, y_2, delta, rosenbrock) 137 | # x, y, z = generate_points(x_start, rosenbrock, rosenbrock_gradient) 138 | # plot_2D_figure(X, Y, Z, x, y, './figures/BFGSrosenbrock2D.png') 139 | # plot_3D_figure(X, Y, Z, x, y, z, './figures/BFGSrosenbrock3D.png') -------------------------------------------------------------------------------- /UnconstrainedOptimization/QuasiNewtonDFP.py: -------------------------------------------------------------------------------- 1 | """ 2 | 演示拟牛顿法DFP解决一个凸函数的最优化问题的例子 3 | 优化函数: 4 | f(x,y)=x**2+3*y**2-2x*y-6 5 | 以及牛顿法解决一个非凸函数优化的例子: 6 | 优化函数:(著名的rosenbrock函数) 7 | f(x,y)=(1-x)**2 + 100 * (y-x**2)**2 8 | """ 9 | import numpy as np 10 | 11 | from matplotlib import pyplot as plt 12 | from mpl_toolkits.mplot3d import Axes3D 13 | 14 | 15 | def cvx_function(X): 16 | """ 17 | 凸函数函数值的计算 18 | """ 19 | z1 = X[0]**2 20 | z2 = X[1]**2 21 | z = z1 + 3 * z2 - 2 * X[0] * X[1] - 6 22 | return z 23 | 24 | 25 | def cvx_fucntion_gradient(x): 26 | """ 27 | 计算凸函数的梯度 28 | """ 29 | grad_x = 2 * x[0] - 2 * x[1] 30 | grad_y = 6 * x[1] - 2 * x[0] 31 | return np.array([grad_x, grad_y]) 32 | 33 | 34 | def rosenbrock(X): 35 | """ 36 | 非凸函数rosenbrock函数值计算 37 | """ 38 | z1 = (1.0 - X[0])**2 39 | z2 = 100 * (X[1] - X[0]*X[0])**2 40 | z = z1 + z2 41 | return z 42 | 43 | 44 | def rosenbrock_gradient(x): 45 | """ 46 | 计算非凸函数rosenbrock的梯度 47 | """ 48 | grad_x = 400 * x[0] * x[0] * x[0] + 2 * x[0] - 400 * x[0] * x[1] - 2 49 | grad_y = 200 * (x[1] - x[0]**2) 50 | return np.array([grad_x, grad_y]) 51 | 52 | 53 | def generate_grid(x_1, x_2, y_1, y_2, delta, f): 54 | """ 55 | 生成二维网格,并计算网格中各个点的值,用于后续画登高线三维图 56 | """ 57 | x = np.arange(x_1, x_2, delta) 58 | y = np.arange(y_1, y_2, delta) 59 | X, Y = np.meshgrid(x, y) 60 | Z = f([X, Y]) 61 | return X, Y, Z 62 | 63 | 64 | def plot_2D_figure(X, Y, Z, x, y, filepath): 65 | """ 66 | 画二维图 67 | """ 68 | plt.figure() 69 | plt.contourf(X, Y, Z, 10) 70 | plt.colorbar(orientation='horizontal', shrink=0.8) 71 | plt.plot(x, y, c='r') 72 | plt.savefig(filepath) 73 | plt.show() 74 | 75 | 76 | def plot_3D_figure(X, Y, Z, x, y, z, filepath): 77 | """ 78 | 画三维图 79 | """ 80 | fig = plt.figure() 81 | ax = Axes3D(fig) 82 | p = ax.plot_surface(X, Y, Z, rstride=4, cstride=4, cmap='jet', alpha=0.8) 83 | ax.plot3D(x, y, z, c='r', linewidth=2) 84 | plt.colorbar(p, shrink=0.8) 85 | plt.savefig(filepath) 86 | plt.show() 87 | 88 | 89 | def generate_points(x_start, f, grad, epsilon=1e-10, steps=100000): 90 | """ 91 | 根据拟牛顿法(DFP)生成优化点列的过程 92 | :param x_start: 起始点的坐标 93 | :param f: 需要优化的函数 94 | :param grad: 计算f函数的梯度函数 95 | :param epsilon: 迭代停止的条件,当当前点的梯度的模小于epsilon时,迭代停止 96 | :param steps: 最大的迭代步数 97 | :return: 优化过程生成点列的x坐标序列,y坐标序列,以及每一个点对应的函数值 98 | """ 99 | X, x_old = x_start, x_start 100 | Z = f(x_start) 101 | H_old = np.mat(np.eye(2, dtype=np.float32) * 0.01) 102 | grad_old = np.mat(grad(x_old)) 103 | for i in range(1, steps): 104 | x_new = x_old - np.array(H_old * grad_old) 105 | grad_new = np.mat(grad(np.array(x_new))) 106 | if np.sqrt(np.sum(grad_new.T * grad_new)) < epsilon: 107 | X = np.concatenate((X, x_new), axis=1) 108 | z_new = f(np.array(x_new)) 109 | Z = np.concatenate((Z, z_new)) 110 | print("Convergence at step: ", i) 111 | print("Final varaible values: ", [x_new[0], x_new[1]]) 112 | print("Final f(x,y):", z_new) 113 | break 114 | y = np.mat(grad_new - grad_old) 115 | s = np.mat(x_new - x_old) 116 | H_new = H_old - (H_old * y * y.T * H_old) / (y.T * H_old * y) + (s * s.T) / (y.T * s) 117 | X = np.concatenate((X, x_new), axis=1) 118 | z_new = f(np.array(x_new)) 119 | Z = np.concatenate((Z, z_new)) 120 | H_old = H_new 121 | grad_old = grad_new 122 | x_old = x_new 123 | return X[0], X[1], Z 124 | 125 | 126 | if __name__ == "__main__": 127 | x_1, x_2, y_1, y_2, delta = -4.0, 4.0, -4.0, 4.0, 0.025 128 | x_start = np.array([[3.4], [3.5]]) 129 | X, Y, Z = generate_grid(x_1, x_2, y_1, y_2, delta, cvx_function) 130 | x, y, z = generate_points(x_start, cvx_function, cvx_fucntion_gradient) 131 | plot_2D_figure(X, Y, Z, x, y, './figures/DFPconvex2D.png') 132 | plot_3D_figure(X, Y, Z, x, y, z, './figures/DFPconvex3D.png') 133 | # x_1, x_2, y_1, y_2, delta = -2.0, 2.0, -2.0, 2.0, 0.025 134 | # x_start = np.array([[0.0], [0.0]]) 135 | # X, Y, Z = generate_grid(x_1, x_2, y_1, y_2, delta, rosenbrock) 136 | # x, y, z = generate_points(x_start, rosenbrock, rosenbrock_gradient) 137 | # plot_2D_figure(X, Y, Z, x, y, './DFProsenbrock2D.png') 138 | # plot_3D_figure(X, Y, Z, x, y, z, './DFProsenbrock3D.png') -------------------------------------------------------------------------------- /UnconstrainedOptimization/figures/BFGSconvex2D.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coding-raccoon/Optimization/1b17fe2a0508bfffe1fe8b656bd90d26b8a3906e/UnconstrainedOptimization/figures/BFGSconvex2D.png -------------------------------------------------------------------------------- /UnconstrainedOptimization/figures/BFGSconvex3D.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coding-raccoon/Optimization/1b17fe2a0508bfffe1fe8b656bd90d26b8a3906e/UnconstrainedOptimization/figures/BFGSconvex3D.png -------------------------------------------------------------------------------- /UnconstrainedOptimization/figures/DFPconvex2D.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coding-raccoon/Optimization/1b17fe2a0508bfffe1fe8b656bd90d26b8a3906e/UnconstrainedOptimization/figures/DFPconvex2D.png -------------------------------------------------------------------------------- /UnconstrainedOptimization/figures/DFPconvex3D.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coding-raccoon/Optimization/1b17fe2a0508bfffe1fe8b656bd90d26b8a3906e/UnconstrainedOptimization/figures/DFPconvex3D.png -------------------------------------------------------------------------------- /UnconstrainedOptimization/figures/DFProsenbrock2D.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coding-raccoon/Optimization/1b17fe2a0508bfffe1fe8b656bd90d26b8a3906e/UnconstrainedOptimization/figures/DFProsenbrock2D.png -------------------------------------------------------------------------------- /UnconstrainedOptimization/figures/DFProsenbrock3D.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coding-raccoon/Optimization/1b17fe2a0508bfffe1fe8b656bd90d26b8a3906e/UnconstrainedOptimization/figures/DFProsenbrock3D.png -------------------------------------------------------------------------------- /UnconstrainedOptimization/figures/GDconvex2D.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coding-raccoon/Optimization/1b17fe2a0508bfffe1fe8b656bd90d26b8a3906e/UnconstrainedOptimization/figures/GDconvex2D.png -------------------------------------------------------------------------------- /UnconstrainedOptimization/figures/GDconvex3D.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coding-raccoon/Optimization/1b17fe2a0508bfffe1fe8b656bd90d26b8a3906e/UnconstrainedOptimization/figures/GDconvex3D.png -------------------------------------------------------------------------------- /UnconstrainedOptimization/figures/GDrosenbrock2D.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coding-raccoon/Optimization/1b17fe2a0508bfffe1fe8b656bd90d26b8a3906e/UnconstrainedOptimization/figures/GDrosenbrock2D.png -------------------------------------------------------------------------------- /UnconstrainedOptimization/figures/GDrosenbrock3D.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coding-raccoon/Optimization/1b17fe2a0508bfffe1fe8b656bd90d26b8a3906e/UnconstrainedOptimization/figures/GDrosenbrock3D.png -------------------------------------------------------------------------------- /UnconstrainedOptimization/figures/Newtonconvex2D.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coding-raccoon/Optimization/1b17fe2a0508bfffe1fe8b656bd90d26b8a3906e/UnconstrainedOptimization/figures/Newtonconvex2D.png -------------------------------------------------------------------------------- /UnconstrainedOptimization/figures/Newtonconvex3D.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coding-raccoon/Optimization/1b17fe2a0508bfffe1fe8b656bd90d26b8a3906e/UnconstrainedOptimization/figures/Newtonconvex3D.png -------------------------------------------------------------------------------- /UnconstrainedOptimization/figures/Newtonrosenbrock2D.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coding-raccoon/Optimization/1b17fe2a0508bfffe1fe8b656bd90d26b8a3906e/UnconstrainedOptimization/figures/Newtonrosenbrock2D.png -------------------------------------------------------------------------------- /UnconstrainedOptimization/figures/Newtonrosenbrock3D.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coding-raccoon/Optimization/1b17fe2a0508bfffe1fe8b656bd90d26b8a3906e/UnconstrainedOptimization/figures/Newtonrosenbrock3D.png -------------------------------------------------------------------------------- /UnconstrainedOptimization/figures/cordinatedescent2D.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coding-raccoon/Optimization/1b17fe2a0508bfffe1fe8b656bd90d26b8a3906e/UnconstrainedOptimization/figures/cordinatedescent2D.png -------------------------------------------------------------------------------- /UnconstrainedOptimization/figures/cordinatedescent3D.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coding-raccoon/Optimization/1b17fe2a0508bfffe1fe8b656bd90d26b8a3906e/UnconstrainedOptimization/figures/cordinatedescent3D.png --------------------------------------------------------------------------------