├── LSTMModel.py ├── README.md ├── __pycache__ ├── LSTMModel.cpython-36.pyc ├── dataset.cpython-36.pyc └── parser_my.cpython-36.pyc ├── data └── 000001SH_index.csv ├── dataset.py ├── evaluate.py ├── img ├── 17.png └── 18.png ├── model └── stock.pkl ├── parser_my.py └── train.py /LSTMModel.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | class lstm(nn.Module): 5 | 6 | def __init__(self, input_size=8, hidden_size=32, num_layers=1 , output_size=1 , dropout=0, batch_first=True): 7 | super(lstm, self).__init__() 8 | # lstm的输入 #batch,seq_len, input_size 9 | self.hidden_size = hidden_size 10 | self.input_size = input_size 11 | self.num_layers = num_layers 12 | self.output_size = output_size 13 | self.dropout = dropout 14 | self.batch_first = batch_first 15 | self.rnn = nn.LSTM(input_size=self.input_size, hidden_size=self.hidden_size, num_layers=self.num_layers, batch_first=self.batch_first, dropout=self.dropout ) 16 | self.linear = nn.Linear(self.hidden_size, self.output_size) 17 | 18 | def forward(self, x): 19 | out, (hidden, cell) = self.rnn(x) # x.shape : batch, seq_len, hidden_size , hn.shape and cn.shape : num_layes * direction_numbers, batch, hidden_size 20 | # a, b, c = hidden.shape 21 | # out = self.linear(hidden.reshape(a * b, c)) 22 | out = self.linear(hidden) 23 | return out -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # stockPredict 2 | pytorch实现用LSTM做股票价格预测 3 | 4 | # 软件环境 5 | python3.0以上, 6 | pytorch 1.3.1, 7 | torchvision 0.4.1, 8 | Pillow 7.1.2, 9 | pandas 1.0.3 10 | 11 | # 项目结构 12 | ![项目结构](img/18.png) 13 | 14 | data目录:上证指数的csv文件 15 | model目录:模型保存文件 16 | dataset.py : 数据加载及预处理类,数据标准化、划分训练集及测试集等 17 | evaluate.py : 预测 18 | LSTMModel.py : 定义LSTM模型 19 | parsermy.py : 常用参数 20 | train.py:模型训练 21 | 22 | # 运行方法: 23 | 24 | 直接运行train.py开始模型训练 25 | 26 | 直接运行evaluate.py开始模型预测 27 | -------------------------------------------------------------------------------- /__pycache__/LSTMModel.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/netblind/stockPredict/de0bc534dbcc584254e14d84f65ea3bad8f1c5f7/__pycache__/LSTMModel.cpython-36.pyc -------------------------------------------------------------------------------- /__pycache__/dataset.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/netblind/stockPredict/de0bc534dbcc584254e14d84f65ea3bad8f1c5f7/__pycache__/dataset.cpython-36.pyc -------------------------------------------------------------------------------- /__pycache__/parser_my.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/netblind/stockPredict/de0bc534dbcc584254e14d84f65ea3bad8f1c5f7/__pycache__/parser_my.cpython-36.pyc -------------------------------------------------------------------------------- /dataset.py: -------------------------------------------------------------------------------- 1 | from pandas import read_csv 2 | import numpy as np 3 | from torch.utils.data import DataLoader,Dataset 4 | import torch 5 | from torchvision import transforms 6 | from parser_my import args 7 | 8 | # 9 | def getData(corpusFile,sequence_length,batchSize): 10 | # 数据预处理 ,去除id、股票代码、前一天的收盘价、交易日期等对训练无用的无效数据 11 | stock_data = read_csv(corpusFile) 12 | stock_data.drop('ts_code', axis=1, inplace=True) # 删除第二列’股票代码‘ 13 | stock_data.drop('id', axis=1, inplace=True) # 删除第一列’id‘ 14 | stock_data.drop('pre_close', axis=1, inplace=True) # 删除列’pre_close‘ 15 | stock_data.drop('trade_date', axis=1, inplace=True) # 删除列’trade_date‘ 16 | 17 | close_max = stock_data['close'].max() #收盘价的最大值 18 | close_min = stock_data['close'].min() #收盘价的最小值 19 | df = stock_data.apply(lambda x: (x - min(x)) / (max(x) - min(x))) # min-max标准化 20 | 21 | # 构造X和Y 22 | #根据前n天的数据,预测未来一天的收盘价(close), 例如:根据1月1日、1月2日、1月3日、1月4日、1月5日的数据(每一天的数据包含8个特征),预测1月6日的收盘价。 23 | sequence = sequence_length 24 | X = [] 25 | Y = [] 26 | for i in range(df.shape[0] - sequence): 27 | X.append(np.array(df.iloc[i:(i + sequence), ].values, dtype=np.float32)) 28 | Y.append(np.array(df.iloc[(i + sequence), 0], dtype=np.float32)) 29 | 30 | # 构建batch 31 | total_len = len(Y) 32 | # print(total_len) 33 | 34 | trainx, trainy = X[:int(0.99 * total_len)], Y[:int(0.99 * total_len)] 35 | testx, testy = X[int(0.99 * total_len):], Y[int(0.99 * total_len):] 36 | train_loader = DataLoader(dataset=Mydataset(trainx, trainy, transform=transforms.ToTensor()), batch_size=batchSize, 37 | shuffle=True) 38 | test_loader = DataLoader(dataset=Mydataset(testx, testy), batch_size=batchSize, shuffle=True) 39 | return close_max,close_min,train_loader,test_loader 40 | 41 | 42 | 43 | class Mydataset(Dataset): 44 | def __init__(self, xx, yy, transform=None): 45 | self.x = xx 46 | self.y = yy 47 | self.tranform = transform 48 | 49 | def __getitem__(self, index): 50 | x1 = self.x[index] 51 | y1 = self.y[index] 52 | if self.tranform != None: 53 | return self.tranform(x1), y1 54 | return x1, y1 55 | 56 | def __len__(self): 57 | return len(self.x) 58 | -------------------------------------------------------------------------------- /evaluate.py: -------------------------------------------------------------------------------- 1 | from LSTMModel import lstm 2 | from dataset import getData 3 | from parser_my import args 4 | import torch 5 | 6 | 7 | def eval(): 8 | # model = torch.load(args.save_file) 9 | model = lstm(input_size=args.input_size, hidden_size=args.hidden_size, num_layers=args.layers , output_size=1) 10 | model.to(args.device) 11 | checkpoint = torch.load(args.save_file) 12 | model.load_state_dict(checkpoint['state_dict']) 13 | preds = [] 14 | labels = [] 15 | close_max, close_min, train_loader, test_loader = getData(args.corpusFile, args.sequence_length, args.batch_size) 16 | for idx, (x, label) in enumerate(test_loader): 17 | if args.useGPU: 18 | x = x.squeeze(1).cuda() # batch_size,seq_len,input_size 19 | else: 20 | x = x.squeeze(1) 21 | pred = model(x) 22 | list = pred.data.squeeze(1).tolist() 23 | preds.extend(list[-1]) 24 | labels.extend(label.tolist()) 25 | 26 | for i in range(len(preds)): 27 | print('预测值是%.2f,真实值是%.2f' % ( 28 | preds[i][0] * (close_max - close_min) + close_min, labels[i] * (close_max - close_min) + close_min)) 29 | 30 | eval() -------------------------------------------------------------------------------- /img/17.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/netblind/stockPredict/de0bc534dbcc584254e14d84f65ea3bad8f1c5f7/img/17.png -------------------------------------------------------------------------------- /img/18.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/netblind/stockPredict/de0bc534dbcc584254e14d84f65ea3bad8f1c5f7/img/18.png -------------------------------------------------------------------------------- /model/stock.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/netblind/stockPredict/de0bc534dbcc584254e14d84f65ea3bad8f1c5f7/model/stock.pkl -------------------------------------------------------------------------------- /parser_my.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import torch 3 | 4 | parser = argparse.ArgumentParser() 5 | 6 | parser.add_argument('--corpusFile', default='data/000001SH_index.csv') 7 | 8 | 9 | # TODO 常改动参数 10 | parser.add_argument('--gpu', default=0, type=int) # gpu 卡号 11 | parser.add_argument('--epochs', default=100, type=int) # 训练轮数 12 | parser.add_argument('--layers', default=2, type=int) # LSTM层数 13 | parser.add_argument('--input_size', default=8, type=int) #输入特征的维度 14 | parser.add_argument('--hidden_size', default=32, type=int) #隐藏层的维度 15 | parser.add_argument('--lr', default=0.0001, type=float) #learning rate 学习率 16 | parser.add_argument('--sequence_length', default=5, type=int) # sequence的长度,默认是用前五天的数据来预测下一天的收盘价 17 | parser.add_argument('--batch_size', default=64, type=int) 18 | parser.add_argument('--useGPU', default=False, type=bool) #是否使用GPU 19 | parser.add_argument('--batch_first', default=True, type=bool) #是否将batch_size放在第一维 20 | parser.add_argument('--dropout', default=0.1, type=float) 21 | parser.add_argument('--save_file', default='model/stock.pkl') # 模型保存位置 22 | 23 | 24 | args = parser.parse_args() 25 | 26 | device = torch.device(f"cuda:{args.gpu}" if torch.cuda.is_available() and args.useGPU else "cpu") 27 | args.device = device -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | from torch.autograd import Variable 2 | import torch.nn as nn 3 | import torch 4 | from LSTMModel import lstm 5 | from parser_my import args 6 | from dataset import getData 7 | 8 | def train(): 9 | 10 | model = lstm(input_size=args.input_size, hidden_size=args.hidden_size, num_layers=args.layers , output_size=1, dropout=args.dropout, batch_first=args.batch_first ) 11 | model.to(args.device) 12 | criterion = nn.MSELoss() # 定义损失函数 13 | optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) # Adam梯度下降 学习率=0.001 14 | 15 | close_max, close_min, train_loader, test_loader = getData(args.corpusFile,args.sequence_length,args.batch_size ) 16 | for i in range(args.epochs): 17 | total_loss = 0 18 | for idx, (data, label) in enumerate(train_loader): 19 | if args.useGPU: 20 | data1 = data.squeeze(1).cuda() 21 | pred = model(Variable(data1).cuda()) 22 | # print(pred.shape) 23 | pred = pred[1,:,:] 24 | label = label.unsqueeze(1).cuda() 25 | # print(label.shape) 26 | else: 27 | data1 = data.squeeze(1) 28 | pred = model(Variable(data1)) 29 | pred = pred[1, :, :] 30 | label = label.unsqueeze(1) 31 | loss = criterion(pred, label) 32 | optimizer.zero_grad() 33 | loss.backward() 34 | optimizer.step() 35 | total_loss += loss.item() 36 | print(total_loss) 37 | if i % 10 == 0: 38 | # torch.save(model, args.save_file) 39 | torch.save({'state_dict': model.state_dict()}, args.save_file) 40 | print('第%d epoch,保存模型' % i) 41 | # torch.save(model, args.save_file) 42 | torch.save({'state_dict': model.state_dict()}, args.save_file) 43 | 44 | train() --------------------------------------------------------------------------------