├── BPNN.py
├── README.md
├── data.xlsx
└── stock_predict.py


/BPNN.py:
--------------------------------------------------------------------------------
  1 | # encoding:utf-8
  2 | 
  3 | '''
  4 | BP神经网络Python实现
  5 | '''
  6 | 
  7 | import random
  8 | import numpy as np;
  9 | 
 10 | 
 11 | def sigmoid(x):
 12 |     '''
 13 |     激活函数
 14 |     '''
 15 |     return 1.0 / (1.0 + np.exp(-x))
 16 | 
 17 | def sigmoid_prime(x):
 18 |     return sigmoid(x) * (1 - sigmoid(x))
 19 | 
 20 | class BPNNRegression:
 21 |     '''
 22 |     神经网络回归与分类的差别在于：
 23 |     1. 输出层不需要再经过激活函数
 24 |     2. 输出层的 w 和 b 更新量计算相应更改
 25 |     '''
 26 |     def __init__(self, sizes):
 27 | 
 28 |         # 神经网络结构
 29 |         self.num_layers = len(sizes)
 30 |         self.sizes = sizes
 31 | 
 32 |         # 初始化偏差，除输入层外， 其它每层每个节点都生成一个 biase 值（0-1）
 33 |         self.biases = [np.random.randn(n, 1) for n in sizes[1:]]
 34 |         # 随机生成每条神经元连接的 weight 值（0-1）
 35 |         self.weights = [np.random.randn(r, c)
 36 |                         for c, r in zip(sizes[:-1], sizes[1:])]
 37 |         
 38 |     def feed_forward(self, a):
 39 |         '''
 40 |         前向传输计算输出神经元的值
 41 |         '''
 42 |         for i, b, w in zip(range(len(self.biases)), self.biases, self.weights):
 43 |             # 输出神经元不需要经过激励函数
 44 |             if i == len(self.biases) - 1:
 45 |                 a = np.dot(w, a) + b
 46 |                 break
 47 |             a = sigmoid(np.dot(w, a) + b)
 48 |         return a
 49 |     
 50 |     def MSGD(self, training_data, epochs, mini_batch_size, eta, error = 0.01):
 51 |         '''
 52 |         小批量随机梯度下降法
 53 |         '''
 54 |         n = len(training_data)
 55 |         for j in range(epochs):
 56 |             # 随机打乱训练集顺序
 57 |             random.shuffle(training_data)
 58 |             # 根据小样本大小划分子训练集集合
 59 |             mini_batchs = [training_data[k:k+mini_batch_size]
 60 |                             for k in range(0, n, mini_batch_size)]
 61 |             # 利用每一个小样本训练集更新 w 和 b
 62 |             for mini_batch in mini_batchs:
 63 |                 self.updata_WB_by_mini_batch(mini_batch, eta)
 64 |             
 65 |             #迭代一次后结果
 66 |             err_epoch = self.evaluate(training_data)
 67 |             print("Epoch {0} Error {1}".format(j, err_epoch))
 68 |             if err_epoch < error:
 69 |                 break;
 70 |             # if test_data:
 71 |             #     print("Epoch {0}: {1} / {2}".format(j, self.evaluate(test_data), n_test))
 72 |             # else:
 73 |             # print("Epoch {0}".format(j))
 74 |         return err_epoch
 75 |     
 76 |     def updata_WB_by_mini_batch(self, mini_batch, eta):
 77 |         '''
 78 |         利用小样本训练集更新 w 和 b
 79 |         mini_batch: 小样本训练集
 80 |         eta: 学习率
 81 |         '''
 82 |         # 创建存储迭代小样本得到的 b 和 w 偏导数空矩阵，大小与 biases 和 weights 一致，初始值为 0   
 83 |         batch_par_b = [np.zeros(b.shape) for b in self.biases]
 84 |         batch_par_w = [np.zeros(w.shape) for w in self.weights]
 85 | 
 86 |         for x, y in mini_batch:
 87 |             # 根据小样本中每个样本的输入 x, 输出 y, 计算 w 和 b 的偏导
 88 |             delta_b, delta_w = self.back_propagation(x, y)
 89 |             # 累加偏导 delta_b, delta_w 
 90 |             batch_par_b = [bb + dbb for bb, dbb in zip(batch_par_b, delta_b)]
 91 |             batch_par_w = [bw + dbw for bw, dbw in zip(batch_par_w, delta_w)]
 92 |         # 根据累加的偏导值 delta_b, delta_w 更新 b, w
 93 |         # 由于用了小样本，因此 eta 需除以小样本长度
 94 |         self.weights = [w - (eta / len(mini_batch)) * dw
 95 |                         for w, dw in zip(self.weights, batch_par_w)]
 96 |         self.biases = [b - (eta / len(mini_batch)) * db
 97 |                         for b, db in zip(self.biases, batch_par_b)]
 98 | 
 99 |     def back_propagation(self, x, y):
100 |         '''
101 |         利用误差后向传播算法对每个样本求解其 w 和 b 的更新量
102 |         x: 输入神经元，行向量
103 |         y: 输出神经元，行向量
104 |         
105 |         '''
106 |         delta_b = [np.zeros(b.shape) for b in self.biases]
107 |         delta_w = [np.zeros(w.shape) for w in self.weights]
108 | 
109 |         # 前向传播，求得输出神经元的值
110 |         a = x # 神经元输出值
111 |         # 存储每个神经元输出
112 |         activations = [x] 
113 |         # 存储经过 sigmoid 函数计算的神经元的输入值，输入神经元除外
114 |         zs = []
115 |         for b, w in zip(self.biases, self.weights):
116 |             z = np.dot(w, a) + b
117 |             zs.append(z)
118 |             a = sigmoid(z) # 输出神经元
119 |             activations.append(a)
120 |         #-------------
121 |         activations[-1] = zs[-1] # 更改神经元输出结果
122 |         #-------------
123 |         # 求解输出层δ
124 |         # 与分类问题不同，Delta计算不需要乘以神经元输入的倒数
125 |         #delta = self.cost_function(activations[-1], y) * sigmoid_prime(zs[-1])
126 |         delta = self.cost_function(activations[-1], y) #更改后
127 |         #-------------
128 |         delta_b[-1] = delta
129 |         delta_w[-1] = np.dot(delta, activations[-2].T)
130 |         for lev in range(2, self.num_layers):
131 |             # 从倒数第1层开始更新，因此需要采用-lev
132 |             # 利用 lev + 1 层的 δ 计算 l 层的 δ 
133 |             z = zs[-lev]
134 |             zp = sigmoid_prime(z)
135 |             delta = np.dot(self.weights[-lev+1].T, delta) * zp
136 |             delta_b[-lev] = delta
137 |             delta_w[-lev] = np.dot(delta, activations[-lev-1].T)
138 |         return (delta_b, delta_w)
139 |     
140 |     def evaluate(self, train_data):
141 |         test_result = [[self.feed_forward(x), y]
142 |                         for x, y in train_data]
143 |         return np.sum([0.5 * (x - y) ** 2 for (x, y) in test_result])
144 |     
145 |     def predict(self, test_input):
146 |         test_result = [self.feed_forward(x)
147 |                         for x in test_input]
148 |         return test_result
149 | 
150 |     def cost_function(self, output_a, y):
151 |         '''
152 |         损失函数
153 |         '''
154 |         return (output_a - y)
155 |     pass


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # bpnn-stock-price-prediction
2 | 使用bp神经网络预测股票价格。BP neural network is used to predict the stock price.  
3 | 参考https://github.com/tjaume/BPNeuralNetworks 给出的函数，将其应用到股价数据集上。  
4 | 


--------------------------------------------------------------------------------
/data.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dukuanbing/bpnn-stock-price-prediction/d2bcaaaab341c5d560cd44ab82600aa314f1b377/data.xlsx


--------------------------------------------------------------------------------
/stock_predict.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Sat May 30 00:11:45 2020
 4 | 
 5 | @author: Lenovo
 6 | """
 7 | 
 8 | import numpy as np
 9 | import pandas as pd
10 | import matplotlib.pyplot as plt
11 | import BPNN
12 | from sklearn.metrics import mean_absolute_error
13 | from sklearn.metrics import mean_squared_error
14 | #导入必要的库
15 | df1=pd.read_excel('data.xlsx',0) 
16 | df1=df1.iloc[:,:]
17 | #进行数据归一化
18 | from sklearn import preprocessing
19 | min_max_scaler = preprocessing.MinMaxScaler()
20 | df0=min_max_scaler.fit_transform(df1)
21 | df = pd.DataFrame(df0, columns=df1.columns)
22 | x=df.iloc[:,:-1]
23 | y=df.iloc[:,-1]
24 | #划分训练集测试集
25 | cut=30#取最后cut=30天为测试集
26 | x_train, x_test=x.iloc[:-cut],x.iloc[-cut:]#列表的切片操作，X.iloc[0:2400，0:7]即为1-2400行，1-7列
27 | y_train, y_test=y.iloc[:-cut],y.iloc[-cut:]
28 | x_train, x_test=x_train.values, x_test.values
29 | y_train, y_test=y_train.values, y_test.values
30 | #神经网络搭建
31 | bp1 = BPNN.BPNNRegression([4, 16, 1])
32 | train_data = [[sx.reshape(4,1), sy.reshape(1,1)] for sx, sy in zip(x_train, y_train)]
33 | test_data = [np.reshape(sx, (4,1)) for sx in x_test]
34 | #神经网络训练
35 | bp1.MSGD(train_data, 1000, len(train_data), 0.2)
36 | #神经网络预测
37 | y_predict=bp1.predict(test_data)
38 | aa = np.array(y_predict)  # 列表转数组
39 | aa=aa.reshape(30,1)
40 | y_pre=aa[:,0]
41 | #画图 #展示在测试集上的表现
42 | draw=pd.concat([pd.DataFrame(y_test),pd.DataFrame(y_pre)],axis=1);
43 | draw.iloc[:,0].plot(figsize=(12,6))
44 | draw.iloc[:,1].plot(figsize=(12,6))
45 | plt.legend(('real', 'predict'),loc='upper right',fontsize='15')
46 | plt.title("Test Data",fontsize='30') #添加标题
47 | #输出精度指标
48 | print('测试集上的MAE/MSE')
49 | print(mean_absolute_error(y_pre, y_test))
50 | print(mean_squared_error(y_pre, y_test) )
51 | 


--------------------------------------------------------------------------------