├── BPNN.py ├── README.md ├── data.xlsx └── stock_predict.py /BPNN.py: -------------------------------------------------------------------------------- 1 | # encoding:utf-8 2 | 3 | ''' 4 | BP神经网络Python实现 5 | ''' 6 | 7 | import random 8 | import numpy as np; 9 | 10 | 11 | def sigmoid(x): 12 | ''' 13 | 激活函数 14 | ''' 15 | return 1.0 / (1.0 + np.exp(-x)) 16 | 17 | def sigmoid_prime(x): 18 | return sigmoid(x) * (1 - sigmoid(x)) 19 | 20 | class BPNNRegression: 21 | ''' 22 | 神经网络回归与分类的差别在于: 23 | 1. 输出层不需要再经过激活函数 24 | 2. 输出层的 w 和 b 更新量计算相应更改 25 | ''' 26 | def __init__(self, sizes): 27 | 28 | # 神经网络结构 29 | self.num_layers = len(sizes) 30 | self.sizes = sizes 31 | 32 | # 初始化偏差,除输入层外, 其它每层每个节点都生成一个 biase 值(0-1) 33 | self.biases = [np.random.randn(n, 1) for n in sizes[1:]] 34 | # 随机生成每条神经元连接的 weight 值(0-1) 35 | self.weights = [np.random.randn(r, c) 36 | for c, r in zip(sizes[:-1], sizes[1:])] 37 | 38 | def feed_forward(self, a): 39 | ''' 40 | 前向传输计算输出神经元的值 41 | ''' 42 | for i, b, w in zip(range(len(self.biases)), self.biases, self.weights): 43 | # 输出神经元不需要经过激励函数 44 | if i == len(self.biases) - 1: 45 | a = np.dot(w, a) + b 46 | break 47 | a = sigmoid(np.dot(w, a) + b) 48 | return a 49 | 50 | def MSGD(self, training_data, epochs, mini_batch_size, eta, error = 0.01): 51 | ''' 52 | 小批量随机梯度下降法 53 | ''' 54 | n = len(training_data) 55 | for j in range(epochs): 56 | # 随机打乱训练集顺序 57 | random.shuffle(training_data) 58 | # 根据小样本大小划分子训练集集合 59 | mini_batchs = [training_data[k:k+mini_batch_size] 60 | for k in range(0, n, mini_batch_size)] 61 | # 利用每一个小样本训练集更新 w 和 b 62 | for mini_batch in mini_batchs: 63 | self.updata_WB_by_mini_batch(mini_batch, eta) 64 | 65 | #迭代一次后结果 66 | err_epoch = self.evaluate(training_data) 67 | print("Epoch {0} Error {1}".format(j, err_epoch)) 68 | if err_epoch < error: 69 | break; 70 | # if test_data: 71 | # print("Epoch {0}: {1} / {2}".format(j, self.evaluate(test_data), n_test)) 72 | # else: 73 | # print("Epoch {0}".format(j)) 74 | return err_epoch 75 | 76 | def updata_WB_by_mini_batch(self, mini_batch, eta): 77 | ''' 78 | 利用小样本训练集更新 w 和 b 79 | mini_batch: 小样本训练集 80 | eta: 学习率 81 | ''' 82 | # 创建存储迭代小样本得到的 b 和 w 偏导数空矩阵,大小与 biases 和 weights 一致,初始值为 0 83 | batch_par_b = [np.zeros(b.shape) for b in self.biases] 84 | batch_par_w = [np.zeros(w.shape) for w in self.weights] 85 | 86 | for x, y in mini_batch: 87 | # 根据小样本中每个样本的输入 x, 输出 y, 计算 w 和 b 的偏导 88 | delta_b, delta_w = self.back_propagation(x, y) 89 | # 累加偏导 delta_b, delta_w 90 | batch_par_b = [bb + dbb for bb, dbb in zip(batch_par_b, delta_b)] 91 | batch_par_w = [bw + dbw for bw, dbw in zip(batch_par_w, delta_w)] 92 | # 根据累加的偏导值 delta_b, delta_w 更新 b, w 93 | # 由于用了小样本,因此 eta 需除以小样本长度 94 | self.weights = [w - (eta / len(mini_batch)) * dw 95 | for w, dw in zip(self.weights, batch_par_w)] 96 | self.biases = [b - (eta / len(mini_batch)) * db 97 | for b, db in zip(self.biases, batch_par_b)] 98 | 99 | def back_propagation(self, x, y): 100 | ''' 101 | 利用误差后向传播算法对每个样本求解其 w 和 b 的更新量 102 | x: 输入神经元,行向量 103 | y: 输出神经元,行向量 104 | 105 | ''' 106 | delta_b = [np.zeros(b.shape) for b in self.biases] 107 | delta_w = [np.zeros(w.shape) for w in self.weights] 108 | 109 | # 前向传播,求得输出神经元的值 110 | a = x # 神经元输出值 111 | # 存储每个神经元输出 112 | activations = [x] 113 | # 存储经过 sigmoid 函数计算的神经元的输入值,输入神经元除外 114 | zs = [] 115 | for b, w in zip(self.biases, self.weights): 116 | z = np.dot(w, a) + b 117 | zs.append(z) 118 | a = sigmoid(z) # 输出神经元 119 | activations.append(a) 120 | #------------- 121 | activations[-1] = zs[-1] # 更改神经元输出结果 122 | #------------- 123 | # 求解输出层δ 124 | # 与分类问题不同,Delta计算不需要乘以神经元输入的倒数 125 | #delta = self.cost_function(activations[-1], y) * sigmoid_prime(zs[-1]) 126 | delta = self.cost_function(activations[-1], y) #更改后 127 | #------------- 128 | delta_b[-1] = delta 129 | delta_w[-1] = np.dot(delta, activations[-2].T) 130 | for lev in range(2, self.num_layers): 131 | # 从倒数第1层开始更新,因此需要采用-lev 132 | # 利用 lev + 1 层的 δ 计算 l 层的 δ 133 | z = zs[-lev] 134 | zp = sigmoid_prime(z) 135 | delta = np.dot(self.weights[-lev+1].T, delta) * zp 136 | delta_b[-lev] = delta 137 | delta_w[-lev] = np.dot(delta, activations[-lev-1].T) 138 | return (delta_b, delta_w) 139 | 140 | def evaluate(self, train_data): 141 | test_result = [[self.feed_forward(x), y] 142 | for x, y in train_data] 143 | return np.sum([0.5 * (x - y) ** 2 for (x, y) in test_result]) 144 | 145 | def predict(self, test_input): 146 | test_result = [self.feed_forward(x) 147 | for x in test_input] 148 | return test_result 149 | 150 | def cost_function(self, output_a, y): 151 | ''' 152 | 损失函数 153 | ''' 154 | return (output_a - y) 155 | pass -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # bpnn-stock-price-prediction 2 | 使用bp神经网络预测股票价格。BP neural network is used to predict the stock price. 3 | 参考https://github.com/tjaume/BPNeuralNetworks 给出的函数,将其应用到股价数据集上。 4 | -------------------------------------------------------------------------------- /data.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dukuanbing/bpnn-stock-price-prediction/d2bcaaaab341c5d560cd44ab82600aa314f1b377/data.xlsx -------------------------------------------------------------------------------- /stock_predict.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sat May 30 00:11:45 2020 4 | 5 | @author: Lenovo 6 | """ 7 | 8 | import numpy as np 9 | import pandas as pd 10 | import matplotlib.pyplot as plt 11 | import BPNN 12 | from sklearn.metrics import mean_absolute_error 13 | from sklearn.metrics import mean_squared_error 14 | #导入必要的库 15 | df1=pd.read_excel('data.xlsx',0) 16 | df1=df1.iloc[:,:] 17 | #进行数据归一化 18 | from sklearn import preprocessing 19 | min_max_scaler = preprocessing.MinMaxScaler() 20 | df0=min_max_scaler.fit_transform(df1) 21 | df = pd.DataFrame(df0, columns=df1.columns) 22 | x=df.iloc[:,:-1] 23 | y=df.iloc[:,-1] 24 | #划分训练集测试集 25 | cut=30#取最后cut=30天为测试集 26 | x_train, x_test=x.iloc[:-cut],x.iloc[-cut:]#列表的切片操作,X.iloc[0:2400,0:7]即为1-2400行,1-7列 27 | y_train, y_test=y.iloc[:-cut],y.iloc[-cut:] 28 | x_train, x_test=x_train.values, x_test.values 29 | y_train, y_test=y_train.values, y_test.values 30 | #神经网络搭建 31 | bp1 = BPNN.BPNNRegression([4, 16, 1]) 32 | train_data = [[sx.reshape(4,1), sy.reshape(1,1)] for sx, sy in zip(x_train, y_train)] 33 | test_data = [np.reshape(sx, (4,1)) for sx in x_test] 34 | #神经网络训练 35 | bp1.MSGD(train_data, 1000, len(train_data), 0.2) 36 | #神经网络预测 37 | y_predict=bp1.predict(test_data) 38 | aa = np.array(y_predict) # 列表转数组 39 | aa=aa.reshape(30,1) 40 | y_pre=aa[:,0] 41 | #画图 #展示在测试集上的表现 42 | draw=pd.concat([pd.DataFrame(y_test),pd.DataFrame(y_pre)],axis=1); 43 | draw.iloc[:,0].plot(figsize=(12,6)) 44 | draw.iloc[:,1].plot(figsize=(12,6)) 45 | plt.legend(('real', 'predict'),loc='upper right',fontsize='15') 46 | plt.title("Test Data",fontsize='30') #添加标题 47 | #输出精度指标 48 | print('测试集上的MAE/MSE') 49 | print(mean_absolute_error(y_pre, y_test)) 50 | print(mean_squared_error(y_pre, y_test) ) 51 | --------------------------------------------------------------------------------