├── .gitignore ├── p0.py ├── p1 gradient descent.py ├── p2 origin SGD.py ├── p3 minibatch SGD.py ├── p4 momentum SGD.py ├── p4 momentum.py ├── p5 Nesterov momentum.py ├── p6 adagrad.py ├── p7 adadelta.py ├── p8 adam.py └── readme.md /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Created by https://www.gitignore.io/api/python,pycharm 3 | 4 | ### PyCharm ### 5 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm 6 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 7 | 8 | # User-specific stuff: 9 | .idea/**/workspace.xml 10 | .idea/**/tasks.xml 11 | .idea/dictionaries 12 | 13 | # Sensitive or high-churn files: 14 | .idea/**/dataSources/ 15 | .idea/**/dataSources.ids 16 | .idea/**/dataSources.xml 17 | .idea/**/dataSources.local.xml 18 | .idea/**/sqlDataSources.xml 19 | .idea/**/dynamic.xml 20 | .idea/**/uiDesigner.xml 21 | 22 | # Gradle: 23 | .idea/**/gradle.xml 24 | .idea/**/libraries 25 | 26 | # CMake 27 | cmake-build-debug/ 28 | 29 | # Mongo Explorer plugin: 30 | .idea/**/mongoSettings.xml 31 | 32 | ## File-based project format: 33 | *.iws 34 | 35 | ## Plugin-specific files: 36 | 37 | # IntelliJ 38 | /out/ 39 | 40 | # mpeltonen/sbt-idea plugin 41 | .idea_modules/ 42 | 43 | # JIRA plugin 44 | atlassian-ide-plugin.xml 45 | 46 | # Cursive Clojure plugin 47 | .idea/replstate.xml 48 | 49 | # Crashlytics plugin (for Android Studio and IntelliJ) 50 | com_crashlytics_export_strings.xml 51 | crashlytics.properties 52 | crashlytics-build.properties 53 | fabric.properties 54 | 55 | ### PyCharm Patch ### 56 | # Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721 57 | 58 | # *.iml 59 | # modules.xml 60 | # .idea/misc.xml 61 | # *.ipr 62 | 63 | # Sonarlint plugin 64 | .idea/sonarlint 65 | 66 | ### Python ### 67 | # Byte-compiled / optimized / DLL files 68 | __pycache__/ 69 | *.py[cod] 70 | *$py.class 71 | 72 | # C extensions 73 | *.so 74 | 75 | # Distribution / packaging 76 | .Python 77 | env/ 78 | build/ 79 | develop-eggs/ 80 | dist/ 81 | downloads/ 82 | eggs/ 83 | .eggs/ 84 | lib/ 85 | lib64/ 86 | parts/ 87 | sdist/ 88 | var/ 89 | wheels/ 90 | *.egg-info/ 91 | .installed.cfg 92 | *.egg 93 | 94 | # PyInstaller 95 | # Usually these files are written by a python script from a template 96 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 97 | *.manifest 98 | *.spec 99 | 100 | # Installer logs 101 | pip-log.txt 102 | pip-delete-this-directory.txt 103 | 104 | # Unit test / coverage reports 105 | htmlcov/ 106 | .tox/ 107 | .coverage 108 | .coverage.* 109 | .cache 110 | nosetests.xml 111 | coverage.xml 112 | *,cover 113 | .hypothesis/ 114 | 115 | # Translations 116 | *.mo 117 | *.pot 118 | 119 | # Django stuff: 120 | *.log 121 | local_settings.py 122 | 123 | # Flask stuff: 124 | instance/ 125 | .webassets-cache 126 | 127 | # Scrapy stuff: 128 | .scrapy 129 | 130 | # Sphinx documentation 131 | docs/_build/ 132 | 133 | # PyBuilder 134 | target/ 135 | 136 | # Jupyter Notebook 137 | .ipynb_checkpoints 138 | 139 | # pyenv 140 | .python-version 141 | 142 | # celery beat schedule file 143 | celerybeat-schedule 144 | 145 | # SageMath parsed files 146 | *.sage.py 147 | 148 | # dotenv 149 | .env 150 | 151 | # virtualenv 152 | .venv 153 | venv/ 154 | ENV/ 155 | 156 | # Spyder project settings 157 | .spyderproject 158 | .spyproject 159 | 160 | # Rope project settings 161 | .ropeproject 162 | 163 | # mkdocs documentation 164 | /site 165 | 166 | # End of https://www.gitignore.io/api/python,pycharm 167 | 168 | *.png 169 | test.py 170 | !实验结果图 -------------------------------------------------------------------------------- /p0.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | x = np.array([30 ,35,37, 59, 70, 76, 88, 100]).astype(np.float32) 4 | y = np.array([1100, 1423, 1377, 1800, 2304, 2588, 3495, 4839]).astype(np.float32) 5 | plt.rcParams['font.sans-serif']=['SimHei'] #用来正常显示中文标签 6 | plt.rcParams['axes.unicode_minus']=False #用来正常显示负号 7 | 8 | x_max = max(x) 9 | x_min = min(x) 10 | y_max = max(y) 11 | y_min = min(y) 12 | 13 | for i in range(0,len(x)): 14 | x[i] = (x[i] - x_min)/(x_max - x_min) 15 | y[i] = (y[i] - y_min)/(y_max - y_min) 16 | 17 | print(x,y) 18 | a = 1 19 | b = 0 20 | x_ = np.array([0,1]) 21 | y_ = a*x_+b 22 | yp = a*x +b 23 | r = sum(np.square(np.round(yp-y,4))) 24 | print(r/16) 25 | plt.scatter(x,y) 26 | plt.xlabel(u"x") 27 | plt.ylabel(u"y") 28 | plt.plot(x_,y_,color='green') 29 | plt.pause(3333) 30 | -------------------------------------------------------------------------------- /p1 gradient descent.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from mpl_toolkits.mplot3d import Axes3D 4 | # 本代码是一个最简单的线形回归问题,优化函数为经典的gradient descent 5 | rate = 0.2 # learning rate 6 | def da(y,y_p,x): 7 | return (y-y_p)*(-x) 8 | 9 | def db(y,y_p): 10 | return (y-y_p)*(-1) 11 | def calc_loss(a,b,x,y): 12 | tmp = y - (a * x + b) 13 | tmp = tmp ** 2 # 对矩阵内的每一个元素平方 14 | SSE = sum(tmp) / (2 * len(x)) 15 | return SSE 16 | def draw_hill(x,y): 17 | a = np.linspace(-20,20,100) 18 | print(a) 19 | b = np.linspace(-20,20,100) 20 | x = np.array(x) 21 | y = np.array(y) 22 | 23 | allSSE = np.zeros(shape=(len(a),len(b))) 24 | for ai in range(0,len(a)): 25 | for bi in range(0,len(b)): 26 | a0 = a[ai] 27 | b0 = b[bi] 28 | SSE = calc_loss(a=a0,b=b0,x=x,y=y) 29 | allSSE[ai][bi] = SSE 30 | 31 | a,b = np.meshgrid(a, b) 32 | 33 | return [a,b,allSSE] 34 | # 模拟数据 35 | x = [30 ,35,37, 59, 70, 76, 88, 100] 36 | y = [1100, 1423, 1377, 1800, 2304, 2588, 3495, 4839] 37 | 38 | # 数据归一化 39 | x_max = max(x) 40 | x_min = min(x) 41 | y_max = max(y) 42 | y_min = min(y) 43 | 44 | for i in range(0,len(x)): 45 | x[i] = (x[i] - x_min)/(x_max - x_min) 46 | y[i] = (y[i] - y_min)/(y_max - y_min) 47 | 48 | [ha,hb,hallSSE] = draw_hill(x,y) 49 | hallSSE = hallSSE.T# 重要,将所有的losses做一个转置。原因是矩阵是以左上角至右下角顺序排列元素,而绘图是以左下角为原点。 50 | # 初始化a,b值 51 | a = 10.0 52 | b = -20.0 53 | fig = plt.figure(1, figsize=(12, 8)) 54 | 55 | # 绘制图1的曲面 56 | ax = fig.add_subplot(2, 2, 1, projection='3d') 57 | ax.set_top_view() 58 | ax.plot_surface(ha, hb, hallSSE, rstride=2, cstride=2, cmap='rainbow') 59 | 60 | # 绘制图2的等高线图 61 | plt.subplot(2,2,2) 62 | ta = np.linspace(-20, 20, 100) 63 | tb = np.linspace(-20, 20, 100) 64 | plt.contourf(ha,hb,hallSSE,15,alpha=0.5,cmap=plt.cm.hot) 65 | C = plt.contour(ha,hb,hallSSE,15,colors='black') 66 | plt.clabel(C,inline=True) 67 | plt.xlabel('a') 68 | plt.ylabel('b') 69 | 70 | plt.ion() # iteration on 71 | 72 | all_loss = [] 73 | all_step = [] 74 | last_a = a 75 | last_b = b 76 | for step in range(1,500): 77 | loss = 0 78 | all_da = 0 79 | all_db = 0 80 | for i in range(0,len(x)): 81 | y_p = a*x[i] + b 82 | loss = loss + (y[i] - y_p)*(y[i] - y_p)/2 83 | all_da = all_da + da(y[i],y_p,x[i]) 84 | all_db = all_db + db(y[i],y_p) 85 | #loss_ = calc_loss(a = a,b=b,x=np.array(x),y=np.array(y)) 86 | loss = loss/len(x) 87 | 88 | # 绘制图1中的loss点 89 | ax.scatter(a, b, loss, color='black') 90 | # 绘制图2中的loss点 91 | plt.subplot(2,2,2) 92 | plt.scatter(a,b,s=5,color='blue') 93 | plt.plot([last_a,a],[last_b,b],color='aqua') 94 | # 绘制图3中的回归直线 95 | plt.subplot(2, 2, 3) 96 | plt.plot(x, y) 97 | plt.plot(x, y, 'o') 98 | x_ = np.linspace(0, 1, 2) 99 | y_draw = a * x_ + b 100 | plt.plot(x_, y_draw) 101 | # 绘制图4的loss更新曲线 102 | all_loss.append(loss) 103 | all_step.append(step) 104 | plt.subplot(2,2,4) 105 | plt.plot(all_step,all_loss,color='orange') 106 | plt.xlabel("step") 107 | plt.ylabel("loss") 108 | 109 | 110 | # print('a = %.3f,b = %.3f' % (a,b)) 111 | last_a = a 112 | last_b = b 113 | a = a - rate*all_da 114 | b = b - rate*all_db 115 | 116 | if step%1 == 0: 117 | print("step: ", step, " loss: ", loss) 118 | plt.show() 119 | plt.pause(0.01) 120 | plt.show() 121 | plt.pause(99999999999) -------------------------------------------------------------------------------- /p2 origin SGD.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from mpl_toolkits.mplot3d import Axes3D 4 | import random 5 | # 本代码是一个最简单的线形回归问题,优化函数为经典的 SGD 6 | rate = 0.2 # learning rate 7 | def da(y,y_p,x): 8 | return (y-y_p)*(-x) 9 | 10 | def db(y,y_p): 11 | return (y-y_p)*(-1) 12 | def calc_loss(a,b,x,y): 13 | tmp = y - (a * x + b) 14 | tmp = tmp ** 2 # 对矩阵内的每一个元素平方 15 | SSE = sum(tmp) / (2 * len(x)) 16 | return SSE 17 | def draw_hill(x,y): 18 | a = np.linspace(-20,20,100) 19 | print(a) 20 | b = np.linspace(-20,20,100) 21 | x = np.array(x) 22 | y = np.array(y) 23 | 24 | allSSE = np.zeros(shape=(len(a),len(b))) 25 | for ai in range(0,len(a)): 26 | for bi in range(0,len(b)): 27 | a0 = a[ai] 28 | b0 = b[bi] 29 | SSE = calc_loss(a=a0,b=b0,x=x,y=y) 30 | allSSE[ai][bi] = SSE 31 | 32 | a,b = np.meshgrid(a, b) 33 | 34 | return [a,b,allSSE] 35 | 36 | def shuffle_data(x,y): 37 | # 随机打乱x,y的数据,并且保持x和y一一对应 38 | seed = random.random() 39 | random.seed(seed) 40 | random.shuffle(x) 41 | random.seed(seed) 42 | random.shuffle(y) 43 | # 模拟数据 44 | x = [30 ,35,37, 59, 70, 76, 88, 100] 45 | y = [1100, 1423, 1377, 1800, 2304, 2588, 3495, 4839] 46 | 47 | # 数据归一化 48 | x_max = max(x) 49 | x_min = min(x) 50 | y_max = max(y) 51 | y_min = min(y) 52 | 53 | for i in range(0,len(x)): 54 | x[i] = (x[i] - x_min)/(x_max - x_min) 55 | y[i] = (y[i] - y_min)/(y_max - y_min) 56 | 57 | [ha,hb,hallSSE] = draw_hill(x,y) 58 | hallSSE = hallSSE.T# 重要,将所有的losses做一个转置。原因是矩阵是以左上角至右下角顺序排列元素,而绘图是以左下角为原点。 59 | # 初始化a,b值 60 | a = 10.0 61 | b = -20.0 62 | fig = plt.figure(1, figsize=(12, 8)) 63 | 64 | # 绘制图1的曲面 65 | ax = fig.add_subplot(2, 2, 1, projection='3d') 66 | ax.set_top_view() 67 | ax.plot_surface(ha, hb, hallSSE, rstride=2, cstride=2, cmap='rainbow') 68 | 69 | # 绘制图2的等高线图 70 | plt.subplot(2,2,2) 71 | ta = np.linspace(-20, 20, 100) 72 | tb = np.linspace(-20, 20, 100) 73 | plt.contourf(ha,hb,hallSSE,15,alpha=0.5,cmap=plt.cm.hot) 74 | C = plt.contour(ha,hb,hallSSE,15,colors='black') 75 | plt.clabel(C,inline=True) 76 | plt.xlabel('a') 77 | plt.ylabel('b') 78 | 79 | plt.ion() # iteration on 80 | 81 | all_loss = [] 82 | all_step = [] 83 | last_a = a 84 | last_b = b 85 | step = 1 86 | while step <= 500: 87 | loss = 0 88 | all_da = 0 89 | all_db = 0 90 | shuffle_data(x,y) 91 | for i in range(0,len(x)): 92 | y_p = a*x[i] + b 93 | loss = (y[i] - y_p)*(y[i] - y_p)/2 94 | all_da = da(y[i],y_p,x[i]) 95 | all_db = db(y[i],y_p) 96 | #loss_ = calc_loss(a = a,b=b,x=np.array(x),y=np.array(y)) 97 | #loss = loss/len(x) 98 | 99 | # 绘制图1中的loss点 100 | ax.scatter(a, b, loss, color='black') 101 | # 绘制图2中的loss点 102 | plt.subplot(2, 2, 2) 103 | plt.scatter(a,b,s=5,color='blue') 104 | plt.plot([last_a,a],[last_b,b],color='aqua') 105 | # 绘制图3中的回归直线 106 | plt.subplot(2, 2, 3) 107 | plt.plot(x, y) 108 | plt.plot(x, y, 'o') 109 | x_ = np.linspace(0, 1, 2) 110 | y_draw = a * x_ + b 111 | plt.plot(x_, y_draw) 112 | # 绘制图4的loss更新曲线 113 | all_loss.append(loss) 114 | all_step.append(step) 115 | plt.subplot(2,2,4) 116 | plt.plot(all_step,all_loss,color='orange') 117 | plt.xlabel("step") 118 | plt.ylabel("loss") 119 | 120 | last_a = a 121 | last_b = b 122 | 123 | # 更新参数 124 | a = a - rate*all_da 125 | b = b - rate*all_db 126 | 127 | if step%1 == 0: 128 | print("step: ", step, " loss: ", loss) 129 | plt.show() 130 | plt.pause(0.01) 131 | step = step + 1 132 | plt.show() 133 | plt.pause(99999999999) -------------------------------------------------------------------------------- /p3 minibatch SGD.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from mpl_toolkits.mplot3d import Axes3D 4 | import random 5 | # 本代码是一个最简单的线形回归问题,优化函数为minibatch SGD 6 | rate = 0.2 # learning rate 7 | def da(y,y_p,x): 8 | return (y-y_p)*(-x) 9 | 10 | def db(y,y_p): 11 | return (y-y_p)*(-1) 12 | def calc_loss(a,b,x,y): 13 | tmp = y - (a * x + b) 14 | tmp = tmp ** 2 # 对矩阵内的每一个元素平方 15 | SSE = sum(tmp) / (2 * len(x)) 16 | return SSE 17 | def draw_hill(x,y): 18 | a = np.linspace(-20,20,100) 19 | print(a) 20 | b = np.linspace(-20,20,100) 21 | x = np.array(x) 22 | y = np.array(y) 23 | 24 | allSSE = np.zeros(shape=(len(a),len(b))) 25 | for ai in range(0,len(a)): 26 | for bi in range(0,len(b)): 27 | a0 = a[ai] 28 | b0 = b[bi] 29 | SSE = calc_loss(a=a0,b=b0,x=x,y=y) 30 | allSSE[ai][bi] = SSE 31 | 32 | a,b = np.meshgrid(a, b) 33 | 34 | return [a,b,allSSE] 35 | 36 | 37 | def shuffle_data(x,y): 38 | # 随机打乱x,y的数据,并且保持x和y一一对应 39 | seed = random.random() 40 | random.seed(seed) 41 | random.shuffle(x) 42 | random.seed(seed) 43 | random.shuffle(y) 44 | 45 | def get_batch_data(x,y,batch=3): 46 | shuffle_data(x,y) 47 | x_new = x[0:batch] 48 | y_new = y[0:batch] 49 | return [x_new,y_new] 50 | # 模拟数据 51 | x = [30 ,35,37, 59, 70, 76, 88, 100] 52 | y = [1100, 1423, 1377, 1800, 2304, 2588, 3495, 4839] 53 | 54 | # 数据归一化 55 | x_max = max(x) 56 | x_min = min(x) 57 | y_max = max(y) 58 | y_min = min(y) 59 | 60 | for i in range(0,len(x)): 61 | x[i] = (x[i] - x_min)/(x_max - x_min) 62 | y[i] = (y[i] - y_min)/(y_max - y_min) 63 | 64 | [ha,hb,hallSSE] = draw_hill(x,y) 65 | hallSSE = hallSSE.T# 重要,将所有的losses做一个转置。原因是矩阵是以左上角至右下角顺序排列元素,而绘图是以左下角为原点。 66 | # 初始化a,b值 67 | a = 10.0 68 | b = -20.0 69 | fig = plt.figure(1, figsize=(12, 8)) 70 | 71 | # 绘制图1的曲面 72 | ax = fig.add_subplot(2, 2, 1, projection='3d') 73 | ax.set_top_view() 74 | ax.plot_surface(ha, hb, hallSSE, rstride=2, cstride=2, cmap='rainbow') 75 | 76 | # 绘制图2的等高线图 77 | plt.subplot(2,2,2) 78 | ta = np.linspace(-20, 20, 100) 79 | tb = np.linspace(-20, 20, 100) 80 | plt.contourf(ha,hb,hallSSE,15,alpha=0.5,cmap=plt.cm.hot) 81 | C = plt.contour(ha,hb,hallSSE,15,colors='black') 82 | plt.clabel(C,inline=True) 83 | plt.xlabel('a') 84 | plt.ylabel('b') 85 | 86 | plt.ion() # iteration on 87 | 88 | all_loss = [] 89 | all_step = [] 90 | last_a = a 91 | last_b = b 92 | for step in range(1,200): 93 | loss = 0 94 | all_da = 0 95 | all_db = 0 96 | shuffle_data(x,y) 97 | [x_new,y_new] = get_batch_data(x,y,batch=4) 98 | for i in range(0,len(x_new)): 99 | y_p = a*x_new[i] + b 100 | loss = loss + (y_new[i] - y_p)*(y_new[i] - y_p)/2 101 | all_da = all_da + da(y_new[i],y_p,x_new[i]) 102 | all_db = all_db + db(y_new[i],y_p) 103 | #loss_ = calc_loss(a = a,b=b,x=np.array(x),y=np.array(y)) 104 | loss = loss/len(x_new) 105 | 106 | # 绘制图1中的loss点 107 | ax.scatter(a, b, loss, color='black') 108 | # 绘制图2中的loss点 109 | plt.subplot(2,2,2) 110 | plt.scatter(a,b,s=5,color='blue') 111 | plt.plot([last_a,a],[last_b,b],color='aqua') 112 | # 绘制图3中的回归直线 113 | plt.subplot(2, 2, 3) 114 | plt.plot(x, y) 115 | plt.plot(x, y, 'o') 116 | x_ = np.linspace(0, 1, 2) 117 | y_draw = a * x_ + b 118 | plt.plot(x_, y_draw) 119 | # 绘制图4的loss更新曲线 120 | all_loss.append(loss) 121 | all_step.append(step) 122 | plt.subplot(2,2,4) 123 | plt.plot(all_step,all_loss,color='orange') 124 | plt.xlabel("step") 125 | plt.ylabel("loss") 126 | 127 | 128 | # print('a = %.3f,b = %.3f' % (a,b)) 129 | last_a = a 130 | last_b = b 131 | a = a - rate*all_da 132 | b = b - rate*all_db 133 | 134 | if step%1 == 0: 135 | print("step: ", step, " loss: ", loss) 136 | plt.show() 137 | plt.pause(0.01) 138 | plt.show() 139 | plt.pause(99999999999) -------------------------------------------------------------------------------- /p4 momentum SGD.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from mpl_toolkits.mplot3d import Axes3D 4 | import random 5 | # 本代码是一个最简单的线形回归问题,优化函数为minibatch SGD 6 | rate = 0.1 # learning rate 7 | def da(y,y_p,x): 8 | return (y-y_p)*(-x) 9 | 10 | def db(y,y_p): 11 | return (y-y_p)*(-1) 12 | def calc_loss(a,b,x,y): 13 | tmp = y - (a * x + b) 14 | tmp = tmp ** 2 # 对矩阵内的每一个元素平方 15 | SSE = sum(tmp) / (2 * len(x)) 16 | return SSE 17 | def draw_hill(x,y): 18 | a = np.linspace(-20,20,100) 19 | print(a) 20 | b = np.linspace(-20,20,100) 21 | x = np.array(x) 22 | y = np.array(y) 23 | 24 | allSSE = np.zeros(shape=(len(a),len(b))) 25 | for ai in range(0,len(a)): 26 | for bi in range(0,len(b)): 27 | a0 = a[ai] 28 | b0 = b[bi] 29 | SSE = calc_loss(a=a0,b=b0,x=x,y=y) 30 | allSSE[ai][bi] = SSE 31 | 32 | a,b = np.meshgrid(a, b) 33 | 34 | return [a,b,allSSE] 35 | 36 | 37 | def shuffle_data(x,y): 38 | # 随机打乱x,y的数据,并且保持x和y一一对应 39 | seed = random.random() 40 | random.seed(seed) 41 | random.shuffle(x) 42 | random.seed(seed) 43 | random.shuffle(y) 44 | 45 | def get_batch_data(x,y,batch=3): 46 | shuffle_data(x,y) 47 | x_new = x[0:batch] 48 | y_new = y[0:batch] 49 | return [x_new,y_new] 50 | # 模拟数据 51 | x = [30 ,35,37, 59, 70, 76, 88, 100] 52 | y = [1100, 1423, 1377, 1800, 2304, 2588, 3495, 4839] 53 | 54 | # 数据归一化 55 | x_max = max(x) 56 | x_min = min(x) 57 | y_max = max(y) 58 | y_min = min(y) 59 | 60 | for i in range(0,len(x)): 61 | x[i] = (x[i] - x_min)/(x_max - x_min) 62 | y[i] = (y[i] - y_min)/(y_max - y_min) 63 | 64 | [ha,hb,hallSSE] = draw_hill(x,y) 65 | hallSSE = hallSSE.T# 重要,将所有的losses做一个转置。原因是矩阵是以左上角至右下角顺序排列元素,而绘图是以左下角为原点。 66 | # 初始化a,b值 67 | a = 10.0 68 | b = -20.0 69 | fig = plt.figure(1, figsize=(12, 8)) 70 | fig.suptitle('learning rate: %.2f method:momentum SGD'%(rate), fontsize=15) 71 | 72 | # 绘制图1的曲面 73 | ax = fig.add_subplot(2, 2, 1, projection='3d') 74 | ax.set_top_view() 75 | ax.plot_surface(ha, hb, hallSSE, rstride=2, cstride=2, cmap='rainbow') 76 | 77 | # 绘制图2的等高线图 78 | plt.subplot(2,2,2) 79 | ta = np.linspace(-20, 20, 100) 80 | tb = np.linspace(-20, 20, 100) 81 | plt.contourf(ha,hb,hallSSE,15,alpha=0.5,cmap=plt.cm.hot) 82 | C = plt.contour(ha,hb,hallSSE,15,colors='black') 83 | plt.clabel(C,inline=True) 84 | plt.xlabel('a') 85 | plt.ylabel('b') 86 | 87 | plt.ion() # iteration on 88 | 89 | all_loss = [] 90 | all_step = [] 91 | last_a = a 92 | last_b = b 93 | va = 0 94 | vb = 0 95 | gamma = 0.9 96 | for step in range(1,100): 97 | loss = 0 98 | all_da = 0 99 | all_db = 0 100 | shuffle_data(x,y) 101 | [x_new,y_new] = get_batch_data(x,y,batch=4) 102 | for i in range(0,len(x_new)): 103 | y_p = a*x_new[i] + b 104 | loss = loss + (y_new[i] - y_p)*(y_new[i] - y_p)/2 105 | all_da = all_da + da(y_new[i],y_p,x_new[i]) 106 | all_db = all_db + db(y_new[i],y_p) 107 | #loss_ = calc_loss(a = a,b=b,x=np.array(x),y=np.array(y)) 108 | loss = loss/len(x_new) 109 | 110 | # 绘制图1中的loss点 111 | ax.scatter(a, b, loss, color='black') 112 | # 绘制图2中的loss点 113 | plt.subplot(2,2,2) 114 | plt.scatter(a,b,s=5,color='blue') 115 | plt.plot([last_a,a],[last_b,b],color='aqua') 116 | # 绘制图3中的回归直线 117 | plt.subplot(2, 2, 3) 118 | plt.plot(x, y) 119 | plt.plot(x, y, 'o') 120 | x_ = np.linspace(0, 1, 2) 121 | y_draw = a * x_ + b 122 | plt.plot(x_, y_draw) 123 | # 绘制图4的loss更新曲线 124 | all_loss.append(loss) 125 | all_step.append(step) 126 | plt.subplot(2,2,4) 127 | plt.plot(all_step,all_loss,color='orange') 128 | plt.xlabel("step") 129 | plt.ylabel("loss") 130 | 131 | 132 | # print('a = %.3f,b = %.3f' % (a,b)) 133 | last_a = a 134 | last_b = b 135 | 136 | va = gamma*va + rate*all_da 137 | vb = gamma*vb + rate*all_db 138 | a = a - va 139 | b = b - vb 140 | 141 | if step%1 == 0: 142 | print("step: ", step, " loss: ", loss) 143 | plt.show() 144 | plt.pause(0.01) 145 | plt.show() 146 | plt.pause(99999999999) -------------------------------------------------------------------------------- /p4 momentum.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from mpl_toolkits.mplot3d import Axes3D 4 | # 本代码是一个最简单的线形回归问题,优化函数为 momentum 5 | rate = 0.1 # learning rate 6 | def da(y,y_p,x): 7 | return (y-y_p)*(-x) 8 | 9 | def db(y,y_p): 10 | return (y-y_p)*(-1) 11 | def calc_loss(a,b,x,y): 12 | tmp = y - (a * x + b) 13 | tmp = tmp ** 2 # 对矩阵内的每一个元素平方 14 | SSE = sum(tmp) / (2 * len(x)) 15 | return SSE 16 | def draw_hill(x,y): 17 | a = np.linspace(-20,20,100) 18 | print(a) 19 | b = np.linspace(-20,20,100) 20 | x = np.array(x) 21 | y = np.array(y) 22 | 23 | allSSE = np.zeros(shape=(len(a),len(b))) 24 | for ai in range(0,len(a)): 25 | for bi in range(0,len(b)): 26 | a0 = a[ai] 27 | b0 = b[bi] 28 | SSE = calc_loss(a=a0,b=b0,x=x,y=y) 29 | allSSE[ai][bi] = SSE 30 | 31 | a,b = np.meshgrid(a, b) 32 | 33 | return [a,b,allSSE] 34 | # 模拟数据 35 | x = [30 ,35,37, 59, 70, 76, 88, 100] 36 | y = [1100, 1423, 1377, 1800, 2304, 2588, 3495, 4839] 37 | 38 | # 数据归一化 39 | x_max = max(x) 40 | x_min = min(x) 41 | y_max = max(y) 42 | y_min = min(y) 43 | 44 | for i in range(0,len(x)): 45 | x[i] = (x[i] - x_min)/(x_max - x_min) 46 | y[i] = (y[i] - y_min)/(y_max - y_min) 47 | 48 | [ha,hb,hallSSE] = draw_hill(x,y) 49 | hallSSE = hallSSE.T# 重要,将所有的losses做一个转置。原因是矩阵是以左上角至右下角顺序排列元素,而绘图是以左下角为原点。 50 | # 初始化a,b值 51 | a = 10.0 52 | b = -20.0 53 | fig = plt.figure(1, figsize=(12, 8)) 54 | fig.suptitle('learning rate: %.2f method:momentum'%(rate), fontsize=15) 55 | 56 | 57 | # 绘制图1的曲面 58 | ax = fig.add_subplot(2, 2, 1, projection='3d') 59 | ax.set_top_view() 60 | ax.plot_surface(ha, hb, hallSSE, rstride=2, cstride=2, cmap='rainbow') 61 | 62 | # 绘制图2的等高线图 63 | plt.subplot(2,2,2) 64 | ta = np.linspace(-20, 20, 100) 65 | tb = np.linspace(-20, 20, 100) 66 | plt.contourf(ha,hb,hallSSE,15,alpha=0.5,cmap=plt.cm.hot) 67 | C = plt.contour(ha,hb,hallSSE,15,colors='black') 68 | plt.clabel(C,inline=True) 69 | plt.xlabel('a') 70 | plt.ylabel('b') 71 | 72 | plt.ion() # iteration on 73 | 74 | all_loss = [] 75 | all_step = [] 76 | last_a = a 77 | last_b = b 78 | va = 0 79 | vb = 0 80 | gamma = 0.9 81 | for step in range(1,100): 82 | loss = 0 83 | all_da = 0 84 | all_db = 0 85 | for i in range(0,len(x)): 86 | y_p = a*x[i] + b 87 | loss = loss + (y[i] - y_p)*(y[i] - y_p)/2 88 | all_da = all_da + da(y[i],y_p,x[i]) 89 | all_db = all_db + db(y[i],y_p) 90 | #loss_ = calc_loss(a = a,b=b,x=np.array(x),y=np.array(y)) 91 | loss = loss/len(x) 92 | 93 | # 绘制图1中的loss点 94 | ax.scatter(a, b, loss, color='black') 95 | # 绘制图2中的loss点 96 | plt.subplot(2,2,2) 97 | plt.scatter(a,b,s=5,color='blue') 98 | plt.plot([last_a,a],[last_b,b],color='aqua') 99 | # 绘制图3中的回归直线 100 | plt.subplot(2, 2, 3) 101 | plt.plot(x, y) 102 | plt.plot(x, y, 'o') 103 | x_ = np.linspace(0, 1, 2) 104 | y_draw = a * x_ + b 105 | plt.plot(x_, y_draw) 106 | # 绘制图4的loss更新曲线 107 | all_loss.append(loss) 108 | all_step.append(step) 109 | plt.subplot(2,2,4) 110 | plt.plot(all_step,all_loss,color='orange') 111 | plt.xlabel("step") 112 | plt.ylabel("loss") 113 | 114 | 115 | # print('a = %.3f,b = %.3f' % (a,b)) 116 | last_a = a 117 | last_b = b 118 | va = gamma * va+ rate*all_da 119 | vb = gamma * vb+ rate*all_db 120 | a = a - va 121 | b = b - vb 122 | 123 | if step%1 == 0: 124 | print("step: ", step, " loss: ", loss) 125 | plt.show() 126 | plt.pause(0.01) 127 | plt.show() 128 | plt.pause(99999999999) -------------------------------------------------------------------------------- /p5 Nesterov momentum.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from mpl_toolkits.mplot3d import Axes3D 4 | # 本代码是一个最简单的线形回归问题,优化函数为 momentum with Nesterov 5 | rate = 0.01 # learning rate 6 | def da(y,y_p,x): 7 | return (y-y_p)*(-x) 8 | 9 | def db(y,y_p): 10 | return (y-y_p)*(-1) 11 | def calc_loss(a,b,x,y): 12 | tmp = y - (a * x + b) 13 | tmp = tmp ** 2 # 对矩阵内的每一个元素平方 14 | SSE = sum(tmp) / (2 * len(x)) 15 | return SSE 16 | def draw_hill(x,y): 17 | a = np.linspace(-20,20,100) 18 | print(a) 19 | b = np.linspace(-20,20,100) 20 | x = np.array(x) 21 | y = np.array(y) 22 | 23 | allSSE = np.zeros(shape=(len(a),len(b))) 24 | for ai in range(0,len(a)): 25 | for bi in range(0,len(b)): 26 | a0 = a[ai] 27 | b0 = b[bi] 28 | SSE = calc_loss(a=a0,b=b0,x=x,y=y) 29 | allSSE[ai][bi] = SSE 30 | 31 | a,b = np.meshgrid(a, b) 32 | 33 | return [a,b,allSSE] 34 | # 模拟数据 35 | x = [30 ,35,37, 59, 70, 76, 88, 100] 36 | y = [1100, 1423, 1377, 1800, 2304, 2588, 3495, 4839] 37 | 38 | # 数据归一化 39 | x_max = max(x) 40 | x_min = min(x) 41 | y_max = max(y) 42 | y_min = min(y) 43 | 44 | for i in range(0,len(x)): 45 | x[i] = (x[i] - x_min)/(x_max - x_min) 46 | y[i] = (y[i] - y_min)/(y_max - y_min) 47 | 48 | [ha,hb,hallSSE] = draw_hill(x,y) 49 | hallSSE = hallSSE.T# 重要,将所有的losses做一个转置。原因是矩阵是以左上角至右下角顺序排列元素,而绘图是以左下角为原点。 50 | # 初始化a,b值 51 | a = 10.0 52 | b = -20.0 53 | fig = plt.figure(1, figsize=(12, 8)) 54 | fig.suptitle('learning rate: %.2f method: Nesterov momentum'%(rate), fontsize=15) 55 | 56 | 57 | # 绘制图1的曲面 58 | ax = fig.add_subplot(2, 2, 1, projection='3d') 59 | ax.set_top_view() 60 | ax.plot_surface(ha, hb, hallSSE, rstride=2, cstride=2, cmap='rainbow') 61 | 62 | # 绘制图2的等高线图 63 | plt.subplot(2,2,2) 64 | ta = np.linspace(-20, 20, 100) 65 | tb = np.linspace(-20, 20, 100) 66 | plt.contourf(ha,hb,hallSSE,15,alpha=0.5,cmap=plt.cm.hot) 67 | C = plt.contour(ha,hb,hallSSE,15,colors='black') 68 | plt.clabel(C,inline=True) 69 | plt.xlabel('a') 70 | plt.ylabel('b') 71 | 72 | plt.ion() # iteration on 73 | 74 | all_loss = [] 75 | all_step = [] 76 | last_a = a 77 | last_b = b 78 | va = 0 79 | vb = 0 80 | gamma = 0.9 81 | for step in range(1,100): 82 | loss = 0 83 | all_da = 0 84 | all_db = 0 85 | a_ahead = a - gamma*va 86 | b_ahead = b - gamma*vb 87 | #-- 求loss 88 | for i in range(0,len(x)): 89 | y_p = a_ahead*x[i] + b_ahead 90 | loss = loss + (y[i] - y_p)*(y[i] - y_p)/2 91 | all_da = all_da + da(y[i],y_p,x[i]) 92 | all_db = all_db + db(y[i],y_p) 93 | loss = loss/len(x) 94 | ### 绘图区 95 | # 绘制图1中的loss点 96 | ax.scatter(a, b, loss, color='black') 97 | # 绘制图2中的loss点 98 | plt.subplot(2,2,2) 99 | plt.scatter(a,b,s=5,color='blue') 100 | plt.plot([last_a,a],[last_b,b],color='aqua') 101 | # 绘制图3中的回归直线 102 | plt.subplot(2, 2, 3) 103 | plt.plot(x, y) 104 | plt.plot(x, y, 'o') 105 | x_ = np.linspace(0, 1, 2) 106 | y_draw = a * x_ + b 107 | plt.plot(x_, y_draw) 108 | # 绘制图4的loss更新曲线 109 | all_loss.append(loss) 110 | all_step.append(step) 111 | plt.subplot(2,2,4) 112 | plt.plot(all_step,all_loss,color='orange') 113 | plt.xlabel("step") 114 | plt.ylabel("loss") 115 | 116 | last_a = a 117 | last_b = b 118 | ### 119 | #-- 参数更新 120 | # print('a = %.3f,b = %.3f' % (a,b)) 121 | 122 | va = gamma * va+ rate*all_da 123 | vb = gamma * vb+ rate*all_db 124 | a = a - va 125 | b = b - vb 126 | #-- 127 | if step%1 == 0: 128 | print("step: ", step, " loss: ", loss) 129 | plt.show() 130 | plt.pause(0.01) 131 | plt.show() 132 | plt.pause(99999999999) -------------------------------------------------------------------------------- /p6 adagrad.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from mpl_toolkits.mplot3d import Axes3D 4 | # 本代码是一个最简单的线形回归问题,优化函数为adagrad 5 | rate = 0.2 # learning rate 6 | def da(y,y_p,x): 7 | return (y-y_p)*(-x) 8 | 9 | def db(y,y_p): 10 | return (y-y_p)*(-1) 11 | def calc_loss(a,b,x,y): 12 | tmp = y - (a * x + b) 13 | tmp = tmp ** 2 # 对矩阵内的每一个元素平方 14 | SSE = sum(tmp) / (2 * len(x)) 15 | return SSE 16 | def draw_hill(x,y): 17 | a = np.linspace(-20,20,100) 18 | print(a) 19 | b = np.linspace(-20,20,100) 20 | x = np.array(x) 21 | y = np.array(y) 22 | 23 | allSSE = np.zeros(shape=(len(a),len(b))) 24 | for ai in range(0,len(a)): 25 | for bi in range(0,len(b)): 26 | a0 = a[ai] 27 | b0 = b[bi] 28 | SSE = calc_loss(a=a0,b=b0,x=x,y=y) 29 | allSSE[ai][bi] = SSE 30 | 31 | a,b = np.meshgrid(a, b) 32 | 33 | return [a,b,allSSE] 34 | # 模拟数据 35 | x = [30 ,35,37, 59, 70, 76, 88, 100] 36 | y = [1100, 1423, 1377, 1800, 2304, 2588, 3495, 4839] 37 | 38 | # 数据归一化 39 | x_max = max(x) 40 | x_min = min(x) 41 | y_max = max(y) 42 | y_min = min(y) 43 | 44 | for i in range(0,len(x)): 45 | x[i] = (x[i] - x_min)/(x_max - x_min) 46 | y[i] = (y[i] - y_min)/(y_max - y_min) 47 | 48 | [ha,hb,hallSSE] = draw_hill(x,y) 49 | hallSSE = hallSSE.T# 重要,将所有的losses做一个转置。原因是矩阵是以左上角至右下角顺序排列元素,而绘图是以左下角为原点。 50 | # 初始化a,b值 51 | a = 10.0 52 | b = -20.0 53 | fig = plt.figure(1, figsize=(12, 8)) 54 | fig.suptitle('learning rate: %.2f method: adagrad'%(rate), fontsize=15) 55 | # 绘制图1的曲面 56 | ax = fig.add_subplot(2, 2, 1, projection='3d') 57 | ax.set_top_view() 58 | ax.plot_surface(ha, hb, hallSSE, rstride=2, cstride=2, cmap='rainbow') 59 | 60 | # 绘制图2的等高线图 61 | plt.subplot(2,2,2) 62 | ta = np.linspace(-20, 20, 100) 63 | tb = np.linspace(-20, 20, 100) 64 | plt.contourf(ha,hb,hallSSE,15,alpha=0.5,cmap=plt.cm.hot) 65 | C = plt.contour(ha,hb,hallSSE,15,colors='black') 66 | plt.clabel(C,inline=True) 67 | plt.xlabel('a') 68 | plt.ylabel('b') 69 | 70 | plt.ion() # iteration on 71 | 72 | all_loss = [] 73 | all_step = [] 74 | last_a = a 75 | last_b = b 76 | n = np.array([0,0]) 77 | epsilon = 1e-8 78 | for step in range(1,500): 79 | loss = 0 80 | all_da = 0 81 | all_db = 0 82 | for i in range(0,len(x)): 83 | y_p = a*x[i] + b 84 | loss = loss + (y[i] - y_p)*(y[i] - y_p)/2 85 | all_da = all_da + da(y[i],y_p,x[i]) 86 | all_db = all_db + db(y[i],y_p) 87 | #loss_ = calc_loss(a = a,b=b,x=np.array(x),y=np.array(y)) 88 | loss = loss/len(x) 89 | 90 | # 绘制图1中的loss点 91 | ax.scatter(a, b, loss, color='black') 92 | # 绘制图2中的loss点 93 | plt.subplot(2,2,2) 94 | plt.scatter(a,b,s=5,color='blue') 95 | plt.plot([last_a,a],[last_b,b],color='aqua') 96 | # 绘制图3中的回归直线 97 | plt.subplot(2, 2, 3) 98 | plt.plot(x, y) 99 | plt.plot(x, y, 'o') 100 | x_ = np.linspace(0, 1, 2) 101 | y_draw = a * x_ + b 102 | plt.plot(x_, y_draw) 103 | # 绘制图4的loss更新曲线 104 | all_loss.append(loss) 105 | all_step.append(step) 106 | plt.subplot(2,2,4) 107 | plt.plot(all_step,all_loss,color='orange') 108 | plt.xlabel("step") 109 | plt.ylabel("loss") 110 | 111 | # print('a = %.3f,b = %.3f' % (a,b)) 112 | last_a = a 113 | last_b = b 114 | 115 | #-- 参数更新 116 | n[0] = n[0]+np.square(all_da) 117 | n[1] = n[1]+np.square(all_db) 118 | rate_new = rate/(np.sqrt(n + epsilon)) 119 | print('rate_new a:',rate_new[0],' b:',rate_new[1]) 120 | a = a - (rate/(np.sqrt(n[0] + epsilon)))*all_da 121 | b = b - (rate/(np.sqrt(n[1] + epsilon)))*all_db 122 | 123 | if step%1 == 0: 124 | print("step: ", step, " loss: ", loss) 125 | plt.show() 126 | plt.pause(0.01) 127 | plt.show() 128 | plt.pause(99999999999) -------------------------------------------------------------------------------- /p7 adadelta.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from mpl_toolkits.mplot3d import Axes3D 4 | # 本代码是一个最简单的线形回归问题,优化函数为adadelta 5 | epsilon = 1e-2 6 | gamma = 0.9 7 | def da(y,y_p,x): 8 | return (y-y_p)*(-x) 9 | 10 | def db(y,y_p): 11 | return (y-y_p)*(-1) 12 | def calc_loss(a,b,x,y): 13 | tmp = y - (a * x + b) 14 | tmp = tmp ** 2 # 对矩阵内的每一个元素平方 15 | SSE = sum(tmp) / (2 * len(x)) 16 | return SSE 17 | def draw_hill(x,y): 18 | a = np.linspace(-20,20,100) 19 | print(a) 20 | b = np.linspace(-20,20,100) 21 | x = np.array(x) 22 | y = np.array(y) 23 | 24 | allSSE = np.zeros(shape=(len(a),len(b))) 25 | for ai in range(0,len(a)): 26 | for bi in range(0,len(b)): 27 | a0 = a[ai] 28 | b0 = b[bi] 29 | SSE = calc_loss(a=a0,b=b0,x=x,y=y) 30 | allSSE[ai][bi] = SSE 31 | 32 | a,b = np.meshgrid(a, b) 33 | 34 | return [a,b,allSSE] 35 | # 模拟数据 36 | x = [30 ,35,37, 59, 70, 76, 88, 100] 37 | y = [1100, 1423, 1377, 1800, 2304, 2588, 3495, 4839] 38 | 39 | # 数据归一化 40 | x_max = max(x) 41 | x_min = min(x) 42 | y_max = max(y) 43 | y_min = min(y) 44 | 45 | for i in range(0,len(x)): 46 | x[i] = (x[i] - x_min)/(x_max - x_min) 47 | y[i] = (y[i] - y_min)/(y_max - y_min) 48 | 49 | [ha,hb,hallSSE] = draw_hill(x,y) 50 | hallSSE = hallSSE.T# 重要,将所有的losses做一个转置。原因是矩阵是以左上角至右下角顺序排列元素,而绘图是以左下角为原点。 51 | # 初始化a,b值 52 | a = 10.0 53 | b = -20.0 54 | fig = plt.figure(1, figsize=(12, 8)) 55 | fig.suptitle(' method: adadelta ε=%.4f, γ=%.2f'%(epsilon,gamma), fontsize=15) 56 | # 绘制图1的曲面 57 | ax = fig.add_subplot(2, 2, 1, projection='3d') 58 | ax.set_top_view() 59 | ax.plot_surface(ha, hb, hallSSE, rstride=2, cstride=2, cmap='rainbow') 60 | 61 | # 绘制图2的等高线图 62 | plt.subplot(2,2,2) 63 | ta = np.linspace(-20, 20, 100) 64 | tb = np.linspace(-20, 20, 100) 65 | plt.contourf(ha,hb,hallSSE,15,alpha=0.5,cmap=plt.cm.hot) 66 | C = plt.contour(ha,hb,hallSSE,15,colors='black') 67 | plt.clabel(C,inline=True) 68 | plt.xlabel('a') 69 | plt.ylabel('b') 70 | 71 | plt.ion() # iteration on 72 | 73 | all_loss = [] 74 | all_step = [] 75 | last_a = a 76 | last_b = b 77 | n = np.array([0,0]) 78 | theta = np.array([0,0]).astype(np.float32) # 每一次a,b迭代的更新值 79 | 80 | apple = np.array([0,0]).astype(np.float32) 81 | pear = np.array([0,0]).astype(np.float32) 82 | # 迭代 83 | for step in range(1,201): 84 | loss = 0 85 | all_da = 0 86 | all_db = 0 87 | all_d = np.array([0,0]).astype(np.float32) 88 | for i in range(0,len(x)): 89 | y_p = a*x[i] + b 90 | loss = loss + (y[i] - y_p)*(y[i] - y_p)/2 91 | all_da = all_da + da(y[i],y_p,x[i]) 92 | all_db = all_db + db(y[i],y_p) 93 | #loss_ = calc_loss(a = a,b=b,x=np.array(x),y=np.array(y)) 94 | all_d = np.array([all_da,all_db]) 95 | loss = loss/len(x) 96 | 97 | # 绘制图1中的loss点 98 | ax.scatter(a, b, loss, color='black') 99 | # 绘制图2中的loss点 100 | plt.subplot(2,2,2) 101 | plt.scatter(a,b,s=5,color='blue') 102 | plt.plot([last_a,a],[last_b,b],color='aqua') 103 | # 绘制图3中的回归直线 104 | plt.subplot(2, 2, 3) 105 | plt.plot(x, y) 106 | plt.plot(x, y, 'o') 107 | x_ = np.linspace(0, 1, 2) 108 | y_draw = a * x_ + b 109 | plt.plot(x_, y_draw) 110 | # 绘制图4的loss更新曲线 111 | all_loss.append(loss) 112 | all_step.append(step) 113 | plt.subplot(2,2,4) 114 | plt.plot(all_step,all_loss,color='orange') 115 | plt.xlabel("step") 116 | plt.ylabel("loss") 117 | 118 | # print('a = %.3f,b = %.3f' % (a,b)) 119 | last_a = a 120 | last_b = b 121 | 122 | #-- 参数更新 123 | apple = gamma*apple + (1-gamma)*(all_d**2) # apple with all_d of this step 124 | rms_apple = np.sqrt(apple + epsilon) 125 | 126 | pear = gamma*pear + (1-gamma)*(theta**2) # pear with theta of last step 127 | rms_pear = np.sqrt(pear + epsilon) 128 | 129 | theta = -(rms_pear/rms_apple)*all_d 130 | [a,b] = [a,b] + theta 131 | 132 | if step%1 == 0: 133 | print("step: ", step, " loss: ", loss,"rms_pear: ",rms_pear," rms_apple",rms_apple) 134 | plt.show() 135 | plt.pause(0.01) 136 | plt.show() 137 | plt.pause(99999999999) -------------------------------------------------------------------------------- /p8 adam.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from mpl_toolkits.mplot3d import Axes3D 4 | # 本代码是一个最简单的线形回归问题,优化函数为adam 5 | # 该组参数是推荐的初始参数,一般来说可以解决大多数问题 6 | # 注:adam算法对于大规模的深度学习应用效果会更好,故在当前小规模的简单凸函数优化问题上收敛速度并不快 7 | rate = 0.001 8 | beta1 = 0.9 9 | beta2 = 0.999 10 | epsilon = 1e-8 11 | def da(y,y_p,x): 12 | return (y-y_p)*(-x) 13 | 14 | def db(y,y_p): 15 | return (y-y_p)*(-1) 16 | def calc_loss(a,b,x,y): 17 | tmp = y - (a * x + b) 18 | tmp = tmp ** 2 # 对矩阵内的每一个元素平方 19 | SSE = sum(tmp) / (2 * len(x)) 20 | return SSE 21 | def draw_hill(x,y): 22 | a = np.linspace(-20,20,100) 23 | print(a) 24 | b = np.linspace(-20,20,100) 25 | x = np.array(x) 26 | y = np.array(y) 27 | 28 | allSSE = np.zeros(shape=(len(a),len(b))) 29 | for ai in range(0,len(a)): 30 | for bi in range(0,len(b)): 31 | a0 = a[ai] 32 | b0 = b[bi] 33 | SSE = calc_loss(a=a0,b=b0,x=x,y=y) 34 | allSSE[ai][bi] = SSE 35 | 36 | a,b = np.meshgrid(a, b) 37 | 38 | return [a,b,allSSE] 39 | # 模拟数据 40 | x = [30 ,35,37, 59, 70, 76, 88, 100] 41 | y = [1100, 1423, 1377, 1800, 2304, 2588, 3495, 4839] 42 | 43 | # 数据归一化 44 | x_max = max(x) 45 | x_min = min(x) 46 | y_max = max(y) 47 | y_min = min(y) 48 | 49 | for i in range(0,len(x)): 50 | x[i] = (x[i] - x_min)/(x_max - x_min) 51 | y[i] = (y[i] - y_min)/(y_max - y_min) 52 | 53 | [ha,hb,hallSSE] = draw_hill(x,y) 54 | hallSSE = hallSSE.T# 重要,将所有的losses做一个转置。原因是矩阵是以左上角至右下角顺序排列元素,而绘图是以左下角为原点。 55 | # 初始化a,b值 56 | a = 10.0 57 | b = -20.0 58 | fig = plt.figure(1, figsize=(12, 8)) 59 | fig.suptitle(' method: adam ε=%.4f, learning rate=%.2f, beta1=%.2f, beta2=%.3f'%(epsilon,rate,beta1,beta2), fontsize=15) 60 | 61 | # 绘制图1的曲面 62 | ax = fig.add_subplot(2, 2, 1, projection='3d') 63 | ax.set_top_view() 64 | ax.plot_surface(ha, hb, hallSSE, rstride=2, cstride=2, cmap='rainbow') 65 | 66 | # 绘制图2的等高线图 67 | plt.subplot(2,2,2) 68 | ta = np.linspace(-20, 20, 100) 69 | tb = np.linspace(-20, 20, 100) 70 | plt.contourf(ha,hb,hallSSE,15,alpha=0.5,cmap=plt.cm.hot) 71 | C = plt.contour(ha,hb,hallSSE,15,colors='black') 72 | plt.clabel(C,inline=True) 73 | plt.xlabel('a') 74 | plt.ylabel('b') 75 | 76 | plt.ion() # iteration on 77 | 78 | all_loss = [] 79 | all_step = [] 80 | last_a = a 81 | last_b = b 82 | m = 0.0 83 | v = 0.0 84 | theta = np.array([0,0]).astype(np.float32) 85 | for step in range(1,500): 86 | loss = 0 87 | all_da = 0 88 | all_db = 0 89 | for i in range(0,len(x)): 90 | y_p = a*x[i] + b 91 | loss = loss + (y[i] - y_p)*(y[i] - y_p)/2 92 | all_da = all_da + da(y[i],y_p,x[i]) 93 | all_db = all_db + db(y[i],y_p) 94 | loss = loss/len(x) 95 | all_d = np.array([all_da,all_db]).astype(np.float32) 96 | # 绘制图1中的loss点 97 | ax.scatter(a, b, loss, color='black') 98 | # 绘制图2中的loss点 99 | plt.subplot(2,2,2) 100 | plt.scatter(a,b,s=5,color='blue') 101 | plt.plot([last_a,a],[last_b,b],color='aqua') 102 | # 绘制图3中的回归直线 103 | plt.subplot(2, 2, 3) 104 | plt.plot(x, y) 105 | plt.plot(x, y, 'o') 106 | x_ = np.linspace(0, 1, 2) 107 | y_draw = a * x_ + b 108 | plt.plot(x_, y_draw) 109 | # 绘制图4的loss更新曲线 110 | all_loss.append(loss) 111 | all_step.append(step) 112 | plt.subplot(2,2,4) 113 | plt.plot(all_step,all_loss,color='orange') 114 | plt.xlabel("step") 115 | plt.ylabel("loss") 116 | 117 | 118 | # print('a = %.3f,b = %.3f' % (a,b)) 119 | last_a = a 120 | last_b = b 121 | 122 | m = beta1*m + (1-beta1)*all_d 123 | v = beta2*v + (1-beta2)*(all_d**2) 124 | 125 | m_ = m/(1 - beta1) 126 | v_ = v/(1 - beta2) 127 | 128 | 129 | theta = -(rate/(np.sqrt(v_) + epsilon))*m_ 130 | 131 | 132 | [a,b] = [a,b] + theta 133 | 134 | 135 | if step%1 == 0: 136 | print("step: ", step, " loss: ", loss) 137 | plt.show() 138 | plt.pause(0.01) 139 | plt.show() 140 | plt.pause(99999999999) -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # 一些关于机器学习优化函数的练习 2 | * p1: 最基本的梯度下降法:gradient descent 3 | * p2: 原始 SGD方法:stochastic gradient descent 4 | * p3: minibatch-SGD方法 5 | * p4 momentum SGD: minibatch-SGD with momentum 6 | * p4 momentum: momentum with SGD 7 | * p5: Nesterov方法 8 | * p6: adagrad 9 | * p7: adadelta 10 | * p8: adam 11 | 12 | #### 参考 13 | p1 参考 https://zhuanlan.zhihu.com/p/27297638 14 | 15 | p2~pn 参考 http://ruder.io/optimizing-gradient-descent/index.html 16 | 17 | ###### 单独参考 18 | 19 | p5 参考 http://cs231n.github.io/neural-networks-3/ 20 | 21 | p6 参考 https://zhuanlan.zhihu.com/p/22252270 22 | 23 | p7 参考 https://arxiv.org/abs/1212.5701 (原始论文) 24 | 25 | p8 参考 http://www.ijiandao.com/2b/baijia/63540.html --------------------------------------------------------------------------------