├── README.md
├── datas
    ├── testdata_for_number.xlsx
    └── traindata.xlsx
├── images
    └── use.txt
├── label_lstm_keras.py
├── models
    └── use.txt
└── number_lstm_keras.py


/README.md:
--------------------------------------------------------------------------------
 1 | # lstm_example(lstm的实例)  
 2 | >based on keras(基于keras框架)  
 3 | 
 4 | ## 文件说明
 5 | 
 6 | - data_file1（数据文件1）:`traindata.xlsx`
 7 | 用于回归和分类问题的训练
 8 | - data_file2（数据文件2）:`testdata_for_number.xlsx` 
 9 | 用于回归问题的预测
10 | - code_file1（代码文件1）:`number_lstm_keras.py`  
11 | predict value: for **regression** question  
12 | 预测数值：**回归问题**  
13 | - code_file2（代码文件2）:`label_lstm_keras.py`  
14 | predict label: for **classification** question  
15 | 预测标签：**分类问题**  
16 | 
17 | ## 对训练过程可视化
18 | 
19 | 对训练过程可视化
20 | 以**分类**问题为例
21 | 
22 | ![](https://img-blog.csdnimg.cn/20190320145213663.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM4NDEyODY4,size_16,color_FFFFFF,t_70)  
23 | changes of loss during training(训练中loss的变化）
24 | ![](https://img-blog.csdnimg.cn/20190320145317911.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM4NDEyODY4,size_16,color_FFFFFF,t_70)  
25 | changes of accuracy during training(训练中acuracy的变化）  
26 | ## 更多信息
27 | more detailed contents will be in codes or [my blog](https://blog.csdn.net/qq_38412868/article/details/88688581)  
28 | 更多细节详见代码或者我的博客
29 | 


--------------------------------------------------------------------------------
/datas/testdata_for_number.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fff2zrx/lstm_example/ca8013bcc9e9cb598e1b3c87f0986f5f9d728d7b/datas/testdata_for_number.xlsx


--------------------------------------------------------------------------------
/datas/traindata.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fff2zrx/lstm_example/ca8013bcc9e9cb598e1b3c87f0986f5f9d728d7b/datas/traindata.xlsx


--------------------------------------------------------------------------------
/images/use.txt:
--------------------------------------------------------------------------------
1 | #储存图片用的文件夹


--------------------------------------------------------------------------------
/label_lstm_keras.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #利用lstm分类
  3 | __author__ = 'fff_zrx'
  4 | import pandas as pd
  5 | import numpy as np
  6 | from numpy import array
  7 | import random
  8 | from sklearn.preprocessing import OneHotEncoder
  9 | from sklearn import preprocessing
 10 | from keras.models import Sequential
 11 | from keras.models import load_model
 12 | from keras.layers import Dense, Dropout
 13 | from keras.layers import LSTM
 14 | from keras import regularizers
 15 | from matplotlib import pyplot
 16 | import openpyxl
 17 | import os
 18 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
 19 | # ---- 数据导入 ----
 20 | data = pd.read_excel("./datas/traindata.xlsx")
 21 | origin_data_x = data.iloc[:,2:].values
 22 | origin_data_y=data.iloc[:,1].values
 23 | index = [j for j in range(len(origin_data_x))]
 24 | random.shuffle(index)
 25 | origin_data_y = origin_data_y[index]
 26 | origin_data_x = origin_data_x[index]
 27 | # ---- 参数定义----
 28 | split_point=int(len(origin_data_x)*0.8)
 29 | input_size=11
 30 | time_step =5
 31 | labels=5
 32 | epochs=200
 33 | batch_size=72
 34 | # 标准化，工具函数
 35 | def normal(data):
 36 |     mean=np.mean(data,axis=0)
 37 |     std=np.std(data,axis=0)
 38 |     return (data-mean)/std
 39 | #对labels进行one-hot编码
 40 | def label2hot(labels):
 41 |     values = array(labels)
 42 |     onehot_encoder = OneHotEncoder(sparse=False)
 43 |     integer_encoded = values.reshape(len(values), 1)
 44 |     onehot_encoded = onehot_encoder.fit_transform(integer_encoded)
 45 |     return onehot_encoded
 46 | hot_data_y=label2hot(origin_data_y[:])
 47 | #hot_data_y.append(onehot_encoded)
 48 | #hot_data_y=array(hot_data_y).transpose((1,0,2))
 49 | # 训练集数据
 50 | train_x= origin_data_x[:split_point]
 51 | scaler = preprocessing.StandardScaler().fit(train_x)
 52 | train_x=scaler.transform(train_x)
 53 | # train_x = normal(train_x)
 54 | train_x=train_x.reshape([-1,input_size,time_step])
 55 | train_x=np.transpose(train_x,[0,2,1])
 56 | train_y = hot_data_y[:split_point]
 57 | # 测试集数据
 58 | test_x= origin_data_x[split_point:]
 59 | test_x=scaler.transform(test_x)
 60 | # test_x = normal(test_x)
 61 | test_x=test_x.reshape([-1,input_size,time_step])
 62 | test_x=np.transpose(test_x,[0,2,1])
 63 | test_y = hot_data_y[split_point:]
 64 | print("Data processing is finished!")
 65 | # design network
 66 | model = Sequential()
 67 | # model.add(LSTM(30, input_shape=(train_x.shape[1], train_x.shape[2]),kernel_regularizer=regularizers.l2(0.001),activity_regularizer=regularizers.l1(0.001)))
 68 | model.add(LSTM(30, input_shape=(train_x.shape[1], train_x.shape[2])))
 69 | # model.add(Dropout(0.2))
 70 | # model.add(LSTM(6, return_sequences=False))
 71 | model.add(Dense(labels, activation='softmax'))
 72 | model.summary() #打印出模型概况
 73 | model.compile(loss='categorical_crossentropy', optimizer='adam',metrics=['accuracy'])
 74 | # fit network
 75 | history = model.fit(train_x,train_y, epochs=epochs, batch_size=batch_size, validation_data=(test_x, test_y), verbose=2, shuffle=True)
 76 | #save model after train保存模型文件
 77 | model.save('./models/lstm_model_label.h5')
 78 | # test the model
 79 | score = model.evaluate(test_x, test_y, verbose=2) #evaluate函数按batch计算在某些输入数据上模型的误差
 80 | print('Test accuracy:', score[1])
 81 | score = model.evaluate(train_x, train_y, verbose=2) #evaluate函数按batch计算在某些输入数据上模型的误差
 82 | print('Train accuracy:', score[1])
 83 | #导出数据
 84 | prediction_label = model.predict_classes(test_x)
 85 | prediction_label=[i+1 for i in prediction_label]
 86 | fact_label=np.argmax(test_y,1)
 87 | fact_label=[i+1 for i in fact_label]
 88 | analysis=[fact_label, prediction_label]
 89 | wb = openpyxl.Workbook()
 90 | sheet = wb.active
 91 | sheet.title = 'analysis_data'
 92 | for i in range(0, 2):
 93 |     for j in range(0, len(analysis[i])):
 94 |         sheet.cell(row=j + 1, column=i + 1, value=analysis[i][j])
 95 | wb.save('./datas/analysis_label.xlsx')
 96 | print("写入预测数据成功！")
 97 | # plot history
 98 | pyplot.plot(history.history['loss'], label='train')
 99 | pyplot.plot(history.history['val_loss'], label='test')
100 | pyplot.legend()
101 | pyplot.xlabel('Epochs', fontsize = 12)
102 | pyplot.ylabel('Loss', fontsize = 12)
103 | pyplot.savefig("./images/Loss_label.png")
104 | pyplot.show()
105 | pyplot.plot(history.history['acc'], label='train')
106 | pyplot.plot(history.history['val_acc'], label='test')
107 | pyplot.legend()
108 | pyplot.xlabel('Epochs', fontsize = 12)
109 | pyplot.ylabel('Accuracy', fontsize = 12)
110 | pyplot.savefig("./images/Accuracy_label.png")
111 | pyplot.show()
112 | # deletes the existing model
113 | #del model
114 | # load model从模型文件加载模型
115 | #model=load_model('./models/lstm_model_label.h5')
116 | 


--------------------------------------------------------------------------------
/models/use.txt:
--------------------------------------------------------------------------------
1 | #储存训练完模型文件用的文件夹


--------------------------------------------------------------------------------
/number_lstm_keras.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #利用lstm预测
  3 | __author__ = 'fff_zrx'
  4 | import pandas as pd
  5 | import numpy as np
  6 | import random
  7 | import os
  8 | import keras
  9 | import pywt
 10 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
 11 | from keras.models import Sequential
 12 | from keras.models import load_model
 13 | from keras.layers import Dense,Dropout
 14 | from keras.layers import LSTM
 15 | from matplotlib import pyplot
 16 | from sklearn import preprocessing
 17 | from keras import backend as K
 18 | from keras.callbacks import ModelCheckpoint
 19 | import openpyxl
 20 | from keras import losses
 21 | from sklearn.decomposition import PCA
 22 | # ---- 数据导入 ----
 23 | data = pd.read_excel("./datas/traindata.xlsx")
 24 | origin_data_x = data.iloc[:,2:].values
 25 | origin_data_y=data.iloc[:,0].values
 26 | index = [j for j in range(len(origin_data_x))]
 27 | random.shuffle(index)
 28 | origin_data_y = origin_data_y[index]
 29 | origin_data_x = origin_data_x[index]
 30 | # ---- 参数定义----
 31 | split_point=int(len(origin_data_x)*0.8)
 32 | input_size=11 # 输入层维数
 33 | time_step =5 # 步长窗口
 34 | epochs=150
 35 | batch_size=72
 36 | # 标准化，工具函数
 37 | def calculate_mape(data_x,data_y):
 38 |     index = list(np.nonzero(data_y)[0])
 39 |     data_y = np.array([data_y[i] for i in index])
 40 |     predict= model.predict(data_x)
 41 |     predict = np.array([predict[i] for i in index])
 42 |     return np.mean(np.abs(data_y - predict) * std / (np.abs(data_y * std + mean)))
 43 | def calculate_mae(data_x, data_y):
 44 |     index = list(np.nonzero(data_y)[0])
 45 |     data_y = np.array([data_y[i] for i in index])
 46 |     predict = model.predict(data_x)
 47 |     predict = np.array([predict[i] for i in index])
 48 |     return np.mean(np.abs(data_y - predict) * std)
 49 | def mape(y_true, y_pred):
 50 |     return  K.mean(K.abs(y_true - y_pred)*std/(K.abs(y_true*std+mean)))
 51 | class mape_callback(keras.callbacks.Callback):
 52 |     def __init__(self, training_data, validation_data):
 53 |         self.x = training_data[0]
 54 |         self.y = training_data[1]
 55 |         self.x_val = validation_data[0]
 56 |         self.y_val = validation_data[1]
 57 |     def on_train_begin(self, logs={}):
 58 |         return
 59 |     def on_train_end(self, logs={}):
 60 |         return
 61 |     def on_epoch_begin(self, epoch, logs={}):
 62 |         return
 63 |     def on_epoch_end(self, epoch, logs={}):
 64 |         y_pred = self.model.predict(self.x)
 65 |         mape = np.mean(np.abs(self.y - y_pred)*std/ (np.abs(self.y*std+mean)))
 66 |         y_pred_val = self.model.predict(self.x_val)
 67 |         val_mape=np.mean(np.abs(self.y_val - y_pred_val)*std /(np.abs(self.y_val*std+mean)))
 68 |         print('mape: %s - val_mape: %s' % (mape,val_mape))
 69 |         return
 70 |     def on_batch_begin(self, batch, logs={}):
 71 |         return
 72 |     def on_batch_end(self, batch, logs={}):
 73 |         return
 74 | 
 75 | # 训练集数据
 76 | train_x= origin_data_x[:split_point]
 77 | scaler = preprocessing.StandardScaler().fit(train_x)
 78 | train_x=scaler.transform(train_x)
 79 | train_x=train_x.reshape([-1,input_size,time_step])
 80 | train_x=np.transpose(train_x,[0,2,1])
 81 | train_y = origin_data_y[:split_point]
 82 | train_y= train_y.reshape([-1,1])
 83 | scaler1 = preprocessing.StandardScaler().fit(train_y)
 84 | train_y=scaler1.transform(train_y)
 85 | mean=scaler1.mean_
 86 | std=np.sqrt(scaler1.var_)
 87 | # 测试集数据
 88 | test_x= origin_data_x[split_point:]
 89 | test_x=scaler.transform(test_x)
 90 | test_x = test_x.reshape([-1, input_size, time_step])
 91 | test_x = np.transpose(test_x, [0, 2, 1])
 92 | test_y = origin_data_y[split_point:]
 93 | test_y= test_y.reshape([-1,1])
 94 | test_y=scaler1.transform(test_y)
 95 | # design network
 96 | model = Sequential()
 97 | model.add(LSTM(30, input_shape=(train_x.shape[1], train_x.shape[2]),return_sequences=False))
 98 | # model.add(LSTM(30, return_sequences=False))
 99 | model.add(Dropout(0.5))
100 | model.add(Dense(1))
101 | model.summary() #打印出模型概况
102 | model.compile(loss=["mae"], optimizer='adam',metrics=[mape])
103 | # fit network
104 | # filepath='model_trained.h5'
105 | # # checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
106 | history = model.fit(train_x, train_y, epochs=epochs, batch_size=batch_size,  validation_data=[test_x, test_y],verbose=2, shuffle=True)
107 | #save model after train保存模型文件
108 | model.save('./models/lstm_model_number.h5')
109 | # test the model
110 | print("Testdatasets mape:",calculate_mape(test_x,test_y))
111 | print("Testdatasets mae:",calculate_mae(test_x,test_y))
112 | ##plot history画出训练过程
113 | pyplot.figure(1)
114 | pyplot.plot(history.history['loss'], label='train')
115 | pyplot.plot(history.history['val_loss'], label='test')
116 | pyplot.legend()
117 | pyplot.xlabel('Epochs', fontsize = 12)
118 | pyplot.ylabel('Loss', fontsize = 12)
119 | pyplot.savefig("./images/Loss_number.png")
120 | pyplot.show()
121 | pyplot.figure(2)
122 | pyplot.plot(history.history['mape'], label='train')
123 | pyplot.plot(history.history['val_mape'], label='test')
124 | pyplot.legend()
125 | pyplot.xlabel('Epochs', fontsize = 12)
126 | pyplot.ylabel('Mape', fontsize = 12)
127 | pyplot.savefig("./images/Mape_number.png")
128 | pyplot.show()
129 | # deletes the existing model
130 | # del model
131 | # load model
132 | # model=load_model('./models/lstm_model_number.h5')
133 | #---- 待预测数据导入 ----
134 | data = pd.read_excel("./datas/testdata_for_number.xlsx")
135 | x = data.iloc[:, 4:].values
136 | x = scaler.transform(x)
137 | x = x.reshape([-1, 11, 5])
138 | x = np.transpose(x, [0, 2, 1])
139 | y = data.iloc[:, 3].values
140 | true = y.reshape([-1, 1])
141 | t = data.iloc[:, 2]
142 | # load model
143 | model._make_predict_function()
144 | predict = model.predict(x)
145 | predict = scaler1.inverse_transform(predict)
146 | # 计算mape与mae
147 | index = list(np.nonzero(true)[0])
148 | after_true = np.array([true[i] for i in index])
149 | after_predict = np.array([predict[i] for i in index])
150 | mape = np.mean(np.abs(after_true - after_predict) / (np.abs(after_true)))
151 | mae = np.mean(np.abs(after_true - after_predict))
152 | print("testday:3月12-15日  Mape:",mape)
153 | print("testday:3月12-15日  Mae:",mae)
154 | y=y.reshape([-1,])
155 | analysis=[list(y),list(predict)]
156 | mat=np.mat(analysis)
157 | mat=mat.T
158 | np.savetxt('./datas/test_data_number_output.csv',mat,delimiter=',')
159 | print("写入预测数据成功！")
160 | 


--------------------------------------------------------------------------------