├── README.md ├── air_patna.csv ├── emd-transformer-bilstm.py ├── flowchart.png ├── transformer - (bi)lstm.py ├── transformer - linear.py └── wavelet - bilstm.py /README.md: -------------------------------------------------------------------------------- 1 | # EMD-Transformer-BiLSTM 2 | Short-term Air Quality Prediction Based on EMD-Transformer-BiLSTM 3 | 4 | **Abstract**: Actual acquired air quality time series data are highly volatile and nonstationary, and accurately predicting nonlinear time series data containing complex noise is an ongoing challenge. This paper proposes an air quality prediction method based on empirical mode decomposition (EMD), a transformer and a bidirectional long short-term memory neural network (BiLSTM), which is good at dealing with the ultrashort-term prediction of nonlinear time-series data and shows good performance for application to the air quality dataset of Patna, India (6:00 am on October 3, 2015, to 0:00 pm on July 1, 2020). The AQI sequence is first decomposed into intrinsic mode functions (IMFs) via EMD and subsequently predicted separately via the improved transformer algorithm based on BiLSTM, where linear prediction is performed for IMFs with simple trends. Finally, the predicted values of each IMF are integrated using BiLSTM to obtain the predicted AQI values. This paper predicts the AQI in Patna with a time window of 5 hours, and the RMSE, MAE and MAPE are as low as 5.6853, 2.8230 and 2.23%, respectively. Moreover, the scalability of the proposed model is validated on air quality datasets from several other cities, and the results prove that the proposed hybrid model has high performance and broad application prospects in real-time air quality prediction. 5 | 6 | **Key words**: Hourly Forecast; Air Quality Index; Transformer; BiLSTM; EMD 7 | -------------------------------------------------------------------------------- /emd-transformer-bilstm.py: -------------------------------------------------------------------------------- 1 | #%% 探索性分析 2 | # import pandas as pd 3 | # import pandas_profiling # 探索性可视化 4 | # df = pd.read_csv('air_patna.csv', usecols=['AQI']) 5 | # pandas_profiling.ProfileReport(df) 6 | 7 | # 最终的目标模型 8 | # %% 导入库和相关的参数设置 9 | import torch 10 | import torch.nn as nn 11 | import numpy as np 12 | import time 13 | import math 14 | import matplotlib.pyplot as plt 15 | from sklearn.preprocessing import MinMaxScaler 16 | import pandas as pd 17 | from PyEMD import EMD 18 | from sklearn import metrics 19 | from sklearn.linear_model import LinearRegression 20 | # from tensorboardX import SummaryWriter # 用于进行可视化 21 | # from torchviz import make_dot 22 | # logger = SummaryWriter(log_dir="data/log") 23 | # writer_g = SummaryWriter("data/generator") 24 | 25 | torch.manual_seed(0) 26 | np.random.seed(0) 27 | 28 | input_window = 5 # 更改参数 29 | output_window = 1 30 | batch_size = 64 31 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 32 | print(device) 33 | 34 | 35 | # %% 模型构建1: 位置编码 36 | class PositionalEncoding(nn.Module): 37 | 38 | 39 | def __init__(self, d_model, max_len=5000): 40 | super(PositionalEncoding, self).__init__() 41 | pe = torch.zeros(max_len, d_model) 42 | position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1) 43 | div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model)) 44 | pe[:, 0::2] = torch.sin(position * div_term) 45 | pe[:, 1::2] = torch.cos(position * div_term) 46 | pe = pe.unsqueeze(0).transpose(0, 1) 47 | self.register_buffer('pe', pe) 48 | 49 | 50 | def forward(self, x): 51 | return x + self.pe[:x.size(0), :] 52 | 53 | # %% transforme框架 54 | class TransAm(nn.Module): 55 | def __init__(self, feature_size=250, num_layers=1, dropout=0.1): 56 | super(TransAm, self).__init__() 57 | self.model_type = 'Transformer' 58 | self.src_mask = None 59 | self.pos_encoder = PositionalEncoding(feature_size) 60 | self.encoder_layer = nn.TransformerEncoderLayer(d_model=feature_size, nhead=10, dropout=dropout) 61 | self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=num_layers) 62 | self.hidden_size = 2 63 | self.decoder = nn.LSTM(input_size=feature_size,hidden_size=self.hidden_size ,num_layers=2,bias = True,bidirectional=True) 64 | self.linear1 = nn.Linear(self.hidden_size*2, 1) # 全连接层 65 | self.init_weights2() 66 | for name, param in self.decoder.named_parameters(): 67 | if name.startswith("weight"): 68 | nn.init.xavier_normal_(param) 69 | else: 70 | nn.init.zeros_(param) 71 | 72 | def init_weights2(self): 73 | initrange = 0.1 74 | self.linear1.bias.data.zero_() 75 | self.linear1.weight.data.uniform_(-initrange, initrange) 76 | 77 | def forward(self, src): 78 | if self.src_mask is None or self.src_mask.size(0) != len(src): 79 | device = src.device 80 | mask = self._generate_square_subsequent_mask(len(src)).to(device) 81 | self.src_mask = mask 82 | src = self.pos_encoder(src) 83 | output = self.transformer_encoder(src, self.src_mask) 84 | output, _ = self.decoder(output) 85 | s, b, h = output.shape 86 | output = output.view(s * b, h) 87 | output = self.linear1(output) 88 | output = output.view(s, b, -1) 89 | return output 90 | 91 | def _generate_square_subsequent_mask(self, sz): 92 | mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1) 93 | mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0)) 94 | return mask 95 | 96 | 97 | 98 | # %% 数据预处理:滑窗处理 99 | # 假设输入是1到20,则其标签就是2到21,以适应Transformer的seq2seq的形式的输出 100 | def create_inout_sequences(input_data, tw): 101 | inout_seq = [] 102 | L = len(input_data) 103 | for i in range(L - tw): 104 | train_seq = input_data[i:i + tw] 105 | train_label = input_data[i + output_window:i + tw + output_window] 106 | inout_seq.append((train_seq, train_label)) 107 | return torch.FloatTensor(inout_seq) 108 | 109 | # %% 数据分割与get 110 | def get_data(series): 111 | 112 | 113 | train_data = series[:train_samples] 114 | test_data = series[train_samples:] 115 | 116 | 117 | train_sequence = create_inout_sequences(train_data, input_window) 118 | train_sequence = train_sequence[:-output_window] 119 | 120 | 121 | test_data = create_inout_sequences(test_data, input_window) 122 | test_data = test_data[:-output_window] 123 | 124 | 125 | return train_sequence.to(device), test_data.to(device) 126 | 127 | def get_batch(source, i, batch_size): # 便于以batch形式读取 128 | seq_len = min(batch_size, len(source) - 1 - i) 129 | data = source[i:i + seq_len] 130 | input = torch.stack(torch.stack([item[0] for item in data]).chunk(input_window, 1)) 131 | target = torch.stack(torch.stack([item[1] for item in data]).chunk(input_window, 1)) 132 | return input, target 133 | 134 | # %% 模型构建 135 | # 对参数进行反向传播,其中用到了梯度裁剪的技巧用于防止梯度爆炸 136 | def train(train_data,model): 137 | model.train() # 查看结构 138 | 139 | for batch_index, i in enumerate(range(0, len(train_data) - 1, batch_size)): 140 | start_time = time.time() 141 | total_loss = 0 142 | data, targets = get_batch(train_data, i, batch_size) # 看看输入 143 | optimizer.zero_grad() 144 | output = model(data) 145 | loss = criterion(output, targets) # lstm的输出有两个 146 | loss.backward() 147 | optimizer.step() 148 | 149 | total_loss += loss.item() 150 | log_interval = int(len(train_data) / batch_size / 5) 151 | if batch_index % log_interval == 0 and batch_index > 0: 152 | cur_loss = total_loss / log_interval 153 | elapsed = time.time() - start_time 154 | print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.6f} | {:5.2f} ms | loss {:5.5f} | ppl {:8.2f}' 155 | .format(epoch, batch_index, len(train_data) // batch_size, scheduler.get_last_lr()[0], elapsed * 1000 / log_interval, cur_loss, math.exp(cur_loss))) 156 | # # 添加的第一条日志:损失函数-全局迭代次数 157 | # logger.add_scalar("loss", loss.item(), global_step=epoch) 158 | # logger.add_scalar("lr", scheduler.get_last_lr()[0] ,global_step=epoch) 159 | 160 | # %% 模型评估 161 | def evaluate(eval_model, data_source): 162 | test_result = torch.Tensor(0) 163 | truth = torch.Tensor(0) 164 | eval_model.eval() 165 | total_loss = 0 166 | eval_batch_size = 1000 167 | with torch.no_grad(): 168 | for i in range(0, len(data_source) - 1, eval_batch_size): # 运行一遍 169 | data, targets = get_batch(data_source, i, eval_batch_size) 170 | output = eval_model(data) 171 | total_loss += len(data[0]) * criterion(output, targets).cpu().item() 172 | test_result = torch.cat((test_result, output[-1].view(-1).cpu()), 0) 173 | truth = torch.cat((truth, targets[-1].view(-1).cpu()), 0) 174 | 175 | test_result = (test_result.reshape(-1,1)).detach().numpy() 176 | # truth = (truth.reshape(-1,1)).detach().numpy() 177 | # rmSE = metrics.mean_squared_error(test_result, truth)**0.5 178 | # mAE = metrics.mean_absolute_error(truth,test_result) 179 | # mAPE = metrics.mean_absolute_percentage_error(truth,test_result) 180 | 181 | # # 添加第二条日志:RMSE-全局迭代次数 182 | # logger.add_scalar("RMSE", rmSE, global_step=epoch) 183 | # logger.add_scalar("MAE", mAE, global_step=epoch) 184 | # logger.add_scalar("MAPE", mAPE, global_step=epoch) 185 | 186 | return test_result 187 | 188 | # %% 线性回归 189 | def linearm(data): 190 | open_arr =data.reshape(-1, 1).reshape(-1) # 读数据 191 | X = np.zeros(shape=(len(open_arr) - input_window, input_window)) 192 | label = np.zeros(shape=(len(open_arr) - input_window)) 193 | for i in range(len(open_arr) - input_window): 194 | X[i, :] = open_arr[i:i+input_window] 195 | label[i] = open_arr[i+input_window] 196 | train_X = X[:train_samples, :] 197 | train_label = label[:train_samples] 198 | test_X = X[train_samples:, :] 199 | test_label = label[train_samples:] 200 | 201 | linreg = LinearRegression() 202 | model=linreg.fit(train_X,train_label) 203 | y_pred = linreg.predict(test_X) 204 | 205 | return y_pred ,test_label 206 | 207 | # %% 模型训练 208 | # 运行100个epoch,每隔10个epoch在测试集上评估一下模型 209 | # series = pd.read_csv('air_patna.csv', usecols=['AQI']) 210 | 211 | series = pd.read_csv('air_patna.csv', usecols=['AQI']) 212 | scaler = MinMaxScaler(feature_range=(-1, 1)) 213 | series = scaler.fit_transform(series.values.reshape(-1, 1)).reshape(-1) 214 | train_samples = int(round(len(series)*0.75,0)) 215 | # train_samples = 30800 216 | # emd_win = 24 217 | emd = EMD() 218 | IMFs = emd(series,max_imf=12) # EMD 分解 219 | # from pyhht.visualization import plot_imfs 220 | # plot_imfs(series, np.array(IMFs)) 221 | # pd.DataFrame(IMFs).T.to_csv('imf.csv') 222 | 223 | N = len(IMFs) 224 | k=0 225 | for itm in IMFs: 226 | k=k+1 227 | globals()['series'+str(k)] = itm 228 | globals()['train_data'+str(k)],globals()['val_data'+str(k)] = get_data(locals()['series'+str(k)]) 229 | 230 | # 写一个for循环把每个模型都训练好 231 | lr = 0.001 232 | epochs = 100 233 | # j = 0 234 | for j in range(len(IMFs)): 235 | j = j+1 236 | globals()['model'+str(j)] = TransAm().to(device) 237 | criterion = nn.MSELoss() 238 | optimizer = torch.optim.AdamW(locals()['model'+str(j)].parameters(), lr=lr) 239 | scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1, gamma=0.95) 240 | # epoch = 1 241 | for epoch in range(1, epochs + 1): 242 | epoch_start_time = time.time() 243 | train(locals()['train_data'+str(j)],locals()['model'+str(j)]) # model 244 | scheduler.step() 245 | 246 | 247 | # 如果线性回归效果更好就用线性回归 248 | pred = [] 249 | # i=0 250 | for i in range(len(IMFs)): 251 | i = i+1 252 | globals()['res'+str(i)] = evaluate(locals()['model'+str(i)],locals()['val_data'+str(i)]) 253 | reslinear,ttru = linearm(IMFs[i-1]) 254 | ttru = ttru[2-len(ttru):] 255 | reslinear = reslinear[2-len(reslinear):] 256 | if metrics.mean_squared_error(reslinear, ttru)**0.5 < metrics.mean_squared_error(locals()['res'+str(i)], ttru)**0.5 : 257 | locals()['res'+str(i)] = reslinear.tolist() 258 | pred.append(locals()['res'+str(i)]) 259 | else: 260 | pred.append([token for st in locals()['res'+str(i)] for token in st]) 261 | 262 | # # 分别得到loss 263 | # data = torch.randn(2, 2, 1).to(device) # 定义一个网络的输入值,形状相似的 264 | # logger.add_graph(model, data) 265 | # # 保存成pt文件后进行可视化 266 | # torch.save(model, "../log/modelviz.pt") 267 | # # 使用graphviz进行可视化 268 | # out = model(data) 269 | # g = make_dot(out) 270 | # g.render('modelviz', view=False) 271 | # logger.close() 272 | 273 | 274 | # %% y~IMFS 275 | # 多元BILSTM对时间进行预测 276 | class LstmRNN(nn.Module): # BILSTM 277 | 278 | def __init__(self, input_size, hidden_size=1, output_size=1, num_layers=1): 279 | super().__init__() 280 | 281 | self.lstm = nn.LSTM(input_size, hidden_size, num_layers,bidirectional=True) # utilize the LSTM model in torch.nn 282 | self.linear1 = nn.Linear(2*hidden_size, output_size) # 全连接层 283 | 284 | def forward(self, _x): 285 | x, _ = self.lstm(_x) # _x is input, size (seq_len, batch, input_size) 286 | s, b, h = x.shape # x is output, size (seq_len, batch, hidden_size) 287 | x = x.view(s * b, h) 288 | x = self.linear1(x) 289 | x = x.view(s, b, -1) 290 | return x 291 | 292 | i = 0 293 | x = [] 294 | for item in IMFs: 295 | i = i+1 296 | globals()['train'+str(i)] = item[:train_samples] 297 | x.append(locals()['train'+str(i)]) 298 | X = pd.DataFrame(x).T # 检查下这个 299 | data_x = np.array(X).astype('float32') 300 | train_data = series[:train_samples] 301 | data_y = np.array(train_data).astype('float32') 302 | 303 | data_len = len(data_x) 304 | t = np.linspace(0, data_len, data_len) 305 | 306 | train_x = data_x 307 | train_y = data_y 308 | t_for_training = t 309 | 310 | INPUT_FEATURES_NUM = len(IMFs) 311 | OUTPUT_FEATURES_NUM = 1 312 | lstm_model = LstmRNN(INPUT_FEATURES_NUM, 20, output_size=OUTPUT_FEATURES_NUM, num_layers=1) # 20 hidden units 313 | lstm_model.to(device) 314 | criterion = nn.MSELoss() 315 | optimizer = torch.optim.Adam(lstm_model.parameters(), lr=1e-2) 316 | 317 | train_x_tensor = train_x.reshape(-1, 1, INPUT_FEATURES_NUM) 318 | train_y_tensor = train_y.reshape(-1, 1, OUTPUT_FEATURES_NUM) 319 | train_x_tensor = torch.from_numpy(train_x_tensor) 320 | train_y_tensor = torch.from_numpy(train_y_tensor) 321 | train_x_tensor = train_x_tensor.to(device) 322 | train_y_tensor = train_y_tensor.to(device) 323 | 324 | epoches = 1000 325 | for epoch in range(epoches): 326 | output = lstm_model(train_x_tensor).to(device) 327 | loss = criterion(output, train_y_tensor) 328 | optimizer.zero_grad() 329 | loss.backward() 330 | optimizer.step() 331 | 332 | 333 | # %% 预测值与真实值-结果评价 334 | pred = np.array(pred).astype('float32') 335 | test_x = np.transpose(pred) # IMFs的series 测试集 336 | # test_x = pred0 # IMFs的series 测试集 337 | test_x_tensor = test_x.reshape(-1, 1, INPUT_FEATURES_NUM) 338 | test_x_tensor = torch.from_numpy(test_x_tensor) 339 | test_x_tensor = test_x_tensor.to(device) 340 | pre = lstm_model(test_x_tensor).to(device) 341 | pre = (pre.cpu().reshape(-1,1)).detach().numpy().flatten() 342 | test_y = series[-len(pre):] 343 | pre = scaler.inverse_transform(pre.reshape(-1, 1)) 344 | test_y = scaler.inverse_transform(test_y.reshape(-1, 1)) # 反归一化 345 | 346 | rmSE = metrics.mean_squared_error(pre, test_y)**0.5 347 | mAE = metrics.mean_absolute_error(pre,test_y) 348 | mAPE = metrics.mean_absolute_percentage_error(pre,test_y) # 计算指标 349 | 350 | # pre = series[-7720:] 351 | # test_y = pre 352 | plt.figure(figsize=(10,5)) 353 | plt.plot(pre, color="red") 354 | plt.plot(test_y, color="blue") 355 | plt.legend(['Prediction', 'Truth'], loc='upper right',fontsize=12) 356 | plt.grid(True, which='both') 357 | plt.title('EMD-Transformer-BiLSTM',fontsize=20) 358 | plt.xlabel('Datetime/hour',fontsize=12) 359 | plt.ylabel('AQI',fontsize=12) 360 | # plt.savefig('mm.png') 361 | plt.savefig('emd-transformer-bilstm-win%d.png' % input_window) 362 | plt.close() 363 | 364 | m = [] 365 | m.append(rmSE) 366 | m.append(mAE) 367 | m.append(mAPE) 368 | m = pd.DataFrame(m) 369 | m.to_csv('emd-tr-bilstm-%d-h.csv' % input_window) 370 | 371 | # # %% 画一下模型的损失图 ,tensorboard画的太丑了 372 | # import matplotlib.pyplot as plt 373 | # import numpy as np 374 | # import pandas as pd 375 | 376 | # lossdf = pd.read_csv('loss3.csv') 377 | # plt.plot(lossdf['test_loss'], color="cornflowerblue") 378 | # plt.plot(lossdf['train_loss'], color="sandybrown") 379 | # plt.yscale('log') # 切换对数刻度 380 | # plt.legend(['test_loss', 'train_loss'], loc='upper right',fontsize=13) 381 | # # plt.grid(True, which='both') 382 | # plt.title('Test Loss and Train Loss-IMF3',fontsize=15) #写上图题 383 | # plt.xlabel('Epochs',fontsize=13) #为x轴命名 384 | # plt.ylabel('Loss',fontsize=13) #为y轴命名 385 | # plt.savefig('loss3.png') 386 | # plt.close() -------------------------------------------------------------------------------- /flowchart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aquafina2332/EMD-Transformer-BiLSTM/386a207e480f6aa6cce468703031e929f490c710/flowchart.png -------------------------------------------------------------------------------- /transformer - (bi)lstm.py: -------------------------------------------------------------------------------- 1 | # Tranformer-BiLSTM 2 | 3 | # %% 导入库和相关的参数设置 4 | import torch 5 | import torch.nn as nn 6 | import numpy as np 7 | import time 8 | import math 9 | import matplotlib.pyplot as plt 10 | from sklearn.preprocessing import MinMaxScaler 11 | import pandas as pd 12 | from sklearn import metrics 13 | 14 | 15 | torch.manual_seed(0) 16 | np.random.seed(0) 17 | 18 | 19 | input_window = 3 # transformer建立长期依赖的效果差 20 | output_window = 1 21 | batch_size = 64 22 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 23 | print(device) # 悲报:没有服务器 24 | 25 | 26 | # %% 模型构建1: 位置编码 27 | class PositionalEncoding(nn.Module): 28 | 29 | 30 | def __init__(self, d_model, max_len=5000): 31 | super(PositionalEncoding, self).__init__() 32 | pe = torch.zeros(max_len, d_model) 33 | position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1) 34 | div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model)) 35 | pe[:, 0::2] = torch.sin(position * div_term) 36 | pe[:, 1::2] = torch.cos(position * div_term) 37 | pe = pe.unsqueeze(0).transpose(0, 1) 38 | self.register_buffer('pe', pe) 39 | 40 | 41 | def forward(self, x): 42 | return x + self.pe[:x.size(0), :] 43 | 44 | # %% transforme框架 45 | # 没有采用原论文中的Encoder-Decoder的架构,而是将Decoder用了一个全连接层进行代替,用于输出预测值。 46 | # 另外,其中的create_mask将输入进行mask,从而避免引入未来信息 47 | class TransAm(nn.Module): 48 | def __init__(self, feature_size=250, num_layers=1, dropout=0.1): 49 | super(TransAm, self).__init__() 50 | self.model_type = 'Transformer' 51 | self.src_mask = None 52 | self.pos_encoder = PositionalEncoding(feature_size) 53 | self.encoder_layer = nn.TransformerEncoderLayer(d_model=feature_size, nhead=10, dropout=dropout) 54 | self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=num_layers) 55 | # self.decoder = nn.Linear(feature_size, 1) 56 | # self.decoder_layer = nn.TransformerDecoderLayer(d_model=feature_size, nhead=10,dropout=dropout) 57 | # self.decoder = nn.TransformerDecoder(self.decoder_layer, num_layers=num_layers) 58 | self.hidden_size = 2 59 | self.decoder = nn.LSTM(input_size=feature_size,hidden_size=self.hidden_size ,num_layers=2,bias = True,bidirectional=False) 60 | # self.init_weights() #这个是定义初始化 61 | self.linear1 = nn.Linear(self.hidden_size, 1) # 全连接层 62 | self.init_weights2() 63 | 64 | for name, param in self.decoder.named_parameters(): # 这个是可以运行的哦 65 | if name.startswith("weight"): 66 | nn.init.xavier_normal_(param) 67 | else: 68 | nn.init.zeros_(param) 69 | 70 | # def init_weights(self): # 这个是lstm的stvd分解 71 | # stdv = 1.0 / math.sqrt(self.hidden_size) 72 | # for weight in self.parameters(): 73 | # weight.data.uniform_(-stdv, stdv) 74 | 75 | def init_weights2(self): # 这是原来的哦 76 | initrange = 0.1 77 | self.linear1.bias.data.zero_() 78 | self.linear1.weight.data.uniform_(-initrange, initrange) 79 | 80 | def forward(self, src): 81 | if self.src_mask is None or self.src_mask.size(0) != len(src): 82 | device = src.device 83 | mask = self._generate_square_subsequent_mask(len(src)).to(device) 84 | self.src_mask = mask 85 | src = self.pos_encoder(src) 86 | output = self.transformer_encoder(src, self.src_mask) 87 | output, _ = self.decoder(output) 88 | s, b, h = output.shape 89 | output = output.view(s * b, h) 90 | output = self.linear1(output) 91 | output = output.view(s, b, -1) 92 | return output 93 | 94 | def _generate_square_subsequent_mask(self, sz): 95 | mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1) 96 | mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0)) 97 | return mask 98 | 99 | 100 | 101 | # %% 数据预处理:滑窗处理 102 | # 假设输入是1到20,则其标签就是2到21,以适应Transformer的seq2seq的形式的输出 103 | def create_inout_sequences(input_data, tw): 104 | inout_seq = [] 105 | L = len(input_data) 106 | for i in range(L - tw): 107 | train_seq = input_data[i:i + tw] 108 | train_label = input_data[i + output_window:i + tw + output_window] 109 | inout_seq.append((train_seq, train_label)) 110 | return torch.FloatTensor(inout_seq) 111 | 112 | # %% 数据分割与get 113 | def get_data(): 114 | 115 | 116 | series = pd.read_csv('air_patna.csv', usecols=['AQI']) 117 | scaler = MinMaxScaler(feature_range=(-1, 1)) 118 | series = scaler.fit_transform(series.values.reshape(-1, 1)).reshape(-1) 119 | 120 | 121 | train_samples = int(round(len(series)*0.75,0)) 122 | train_data = series[:train_samples] 123 | test_data = series[train_samples:] 124 | 125 | 126 | train_sequence = create_inout_sequences(train_data, input_window) 127 | train_sequence = train_sequence[:-output_window] 128 | 129 | 130 | test_data = create_inout_sequences(test_data, input_window) 131 | test_data = test_data[:-output_window] 132 | 133 | 134 | return train_sequence.to(device), test_data.to(device), scaler 135 | 136 | def get_batch(source, i, batch_size): # 便于以batch形式读取 137 | seq_len = min(batch_size, len(source) - 1 - i) 138 | data = source[i:i + seq_len] 139 | input = torch.stack(torch.stack([item[0] for item in data]).chunk(input_window, 1)) 140 | target = torch.stack(torch.stack([item[1] for item in data]).chunk(input_window, 1)) 141 | return input, target 142 | 143 | # %% 模型构建 144 | # 对参数进行反向传播,其中用到了梯度裁剪的技巧用于防止梯度爆炸 145 | def train(train_data): 146 | model.train() # 查看结构 147 | 148 | # bilstm = nn.LSTM(input_size=1, hidden_size=1, num_layers=2, bidirectional = False) 149 | # output, (hn, cn) =bilstm(data) 150 | # output.shape # 把前面的改一下 151 | 152 | for batch_index, i in enumerate(range(0, len(train_data) - 1, batch_size)): 153 | start_time = time.time() 154 | total_loss = 0 155 | data, targets = get_batch(train_data, i, batch_size) # 看看输入 156 | optimizer.zero_grad() 157 | output = model(data) 158 | loss = criterion(output, targets) # lstm的输出有两个 159 | loss.backward() 160 | # torch.nn.utils.clip_grad_norm_(model.parameters(), 0.7) 161 | optimizer.step() 162 | 163 | total_loss += loss.item() 164 | log_interval = int(len(train_data) / batch_size / 5) 165 | if batch_index % log_interval == 0 and batch_index > 0: 166 | cur_loss = total_loss / log_interval 167 | elapsed = time.time() - start_time 168 | print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.6f} | {:5.2f} ms | loss {:5.5f} | ppl {:8.2f}' 169 | .format(epoch, batch_index, len(train_data) // batch_size, scheduler.get_last_lr()[0], elapsed * 1000 / log_interval, cur_loss, math.exp(cur_loss))) 170 | 171 | # %% 模型评估 172 | def evaluate(eval_model, data_source): 173 | eval_model.eval() 174 | total_loss = 0 175 | eval_batch_size = 1000 176 | with torch.no_grad(): 177 | for i in range(0, len(data_source) - 1, eval_batch_size): 178 | data, targets = get_batch(data_source, i, eval_batch_size) 179 | output = eval_model(data) 180 | total_loss += len(data[0]) * criterion(output, targets).cpu().item() 181 | return total_loss / len(data_source) 182 | 183 | # %% 模型结果绘图 184 | def plot_and_loss(eval_model, data_source, epoch, scaler): 185 | eval_model.eval() 186 | # model.eval() 187 | # data_source = val_data 188 | total_loss = 0. 189 | test_result = torch.Tensor(0) 190 | truth = torch.Tensor(0) 191 | with torch.no_grad(): 192 | for i in range(0, len(data_source) - 1): 193 | data, target = get_batch(data_source, i, 1) 194 | output = eval_model(data) # 这个要返回 195 | # output,(hn,cn) = model(data) 196 | total_loss += criterion(output, target).item() # output[0]怎么是两列 197 | test_result = torch.cat((test_result, output[-1].view(-1).cpu()), 0) # 这个地方检查一下linear的output[0][-1]结构 198 | truth = torch.cat((truth, target[-1].view(-1).cpu()), 0) 199 | 200 | test_result = (test_result.reshape(-1,1)).detach().numpy() 201 | truth = (truth.reshape(-1,1)).detach().numpy() 202 | test_result = scaler.inverse_transform(test_result) 203 | truth = scaler.inverse_transform(truth) # 反归一化 204 | rmSE = metrics.mean_squared_error(test_result, truth)**0.5 # 这个地方错误 205 | mAE = metrics.mean_absolute_error(truth,test_result) 206 | mAPE = metrics.mean_absolute_percentage_error(truth,test_result) 207 | 208 | plt.figure(figsize=(10,5)) 209 | plt.plot(test_result, color="red") 210 | plt.plot(truth, color="blue") 211 | plt.legend(['Prediction', 'Truth'], loc='upper right') 212 | plt.grid(True, which='both') 213 | plt.title('Transformer-LSTM') 214 | plt.xlabel('Datetime') 215 | plt.ylabel('AQI') 216 | plt.savefig('transformer-lstm-epoch%d.png' % epoch) 217 | plt.close() 218 | 219 | return (total_loss / i) ,rmSE,mAE,mAPE 220 | 221 | # %% 模型训练 222 | # 运行100个epoch,每隔10个epoch在测试集上评估一下模型 223 | train_data, val_data, scaler= get_data() 224 | model = TransAm().to(device) 225 | criterion = nn.MSELoss() 226 | lr = 0.001 227 | optimizer = torch.optim.AdamW(model.parameters(), lr=lr) 228 | scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1, gamma=0.95) 229 | epochs = 100 230 | 231 | 232 | # epoch = 1 233 | for epoch in range(1, epochs + 1): 234 | epoch_start_time = time.time() 235 | train(train_data) 236 | 237 | 238 | if (epoch % 50 == 0): 239 | val_loss,rmSE,mAE,mAPE= plot_and_loss(model, val_data, epoch, scaler) # 这个地方是每十个循环存一次 240 | else: 241 | val_loss = evaluate(model, val_data) 242 | 243 | 244 | print('-' * 89) 245 | print('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.5f} | valid ppl {:8.2f}'.format(epoch, ( 246 | time.time() - epoch_start_time), val_loss, math.exp(val_loss))) 247 | # print(rmSE) 248 | print('-' * 89) 249 | scheduler.step() 250 | 251 | print(rmSE) 252 | print(mAE) 253 | print(mAPE) 254 | 255 | m = [] 256 | 257 | m.append(rmSE) 258 | m.append(mAE) 259 | m.append(mAPE) 260 | m = pd.DataFrame(m) 261 | m.to_csv('tr-lstm-%d.csv' % input_window) 262 | 263 | # torch.save(model.state_dict(), 'tran_bilstm1_model.pt') # 保存模型参数 264 | 265 | # #读取 266 | # model = TransAm().to(device) 267 | # model.load_state_dict(torch.load('tran_bilstm_model.pt')) 268 | 269 | -------------------------------------------------------------------------------- /transformer - linear.py: -------------------------------------------------------------------------------- 1 | # Tranformer 2 | 3 | # %% 导入库和相关的参数设置 4 | import torch 5 | import torch.nn as nn 6 | import numpy as np 7 | import time 8 | import math 9 | import matplotlib.pyplot as plt 10 | from sklearn.preprocessing import MinMaxScaler 11 | import pandas as pd 12 | from sklearn import metrics 13 | 14 | 15 | torch.manual_seed(0) 16 | np.random.seed(0) 17 | 18 | 19 | input_window = 5 # transformer建立长期依赖的效果差 20 | output_window = 1 21 | batch_size = 64 22 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 23 | print(device) # 悲报:没有服务器 24 | 25 | 26 | # %% 模型构建1: 位置编码 27 | class PositionalEncoding(nn.Module): 28 | 29 | 30 | def __init__(self, d_model, max_len=5000): 31 | super(PositionalEncoding, self).__init__() 32 | pe = torch.zeros(max_len, d_model) 33 | position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1) 34 | div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model)) 35 | pe[:, 0::2] = torch.sin(position * div_term) 36 | pe[:, 1::2] = torch.cos(position * div_term) 37 | pe = pe.unsqueeze(0).transpose(0, 1) 38 | self.register_buffer('pe', pe) 39 | 40 | 41 | def forward(self, x): 42 | return x + self.pe[:x.size(0), :] 43 | 44 | # %% transforme框架 45 | # 没有采用原论文中的Encoder-Decoder的架构,而是将Decoder用了一个全连接层进行代替,用于输出预测值。 46 | # 另外,其中的create_mask将输入进行mask,从而避免引入未来信息 47 | class TransAm(nn.Module): 48 | def __init__(self, feature_size=250, num_layers=1, dropout=0.1): 49 | super(TransAm, self).__init__() 50 | self.model_type = 'Transformer' 51 | self.src_mask = None 52 | self.pos_encoder = PositionalEncoding(feature_size) 53 | self.encoder_layer = nn.TransformerEncoderLayer(d_model=feature_size, nhead=10, dropout=dropout) 54 | self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=num_layers) 55 | self.decoder = nn.Linear(feature_size, 1) 56 | self.init_weights() #这个是定义初始化 57 | 58 | # for name, param in self.decoder.named_parameters(): 59 | # if name.startswith("weight"): 60 | # nn.init.xavier_normal_(param) 61 | # else: 62 | # nn.init.zeros_(param) 63 | 64 | def init_weights(self): # 这是原来的哦 65 | initrange = 0.1 66 | self.decoder.bias.data.zero_() 67 | self.decoder.weight.data.uniform_(-initrange, initrange) 68 | 69 | # def init_weights(self): # 这个是lstm的stvd分解 70 | # stdv = 1.0 / math.sqrt(self.hidden_size) 71 | # for weight in self.parameters(): 72 | # weight.data.uniform_(-stdv, stdv) 73 | 74 | 75 | def forward(self, src): 76 | if self.src_mask is None or self.src_mask.size(0) != len(src): 77 | device = src.device 78 | mask = self._generate_square_subsequent_mask(len(src)).to(device) 79 | self.src_mask = mask 80 | 81 | src = self.pos_encoder(src) 82 | output = self.transformer_encoder(src, self.src_mask) 83 | output = self.decoder(output) 84 | return output 85 | 86 | def _generate_square_subsequent_mask(self, sz): 87 | mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1) 88 | mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0)) 89 | return mask 90 | 91 | 92 | 93 | # %% 数据预处理:滑窗处理 94 | # 假设输入是1到20,则其标签就是2到21,以适应Transformer的seq2seq的形式的输出 95 | def create_inout_sequences(input_data, tw): 96 | inout_seq = [] 97 | L = len(input_data) 98 | for i in range(L - tw): 99 | train_seq = input_data[i:i + tw] 100 | train_label = input_data[i + output_window:i + tw + output_window] 101 | inout_seq.append((train_seq, train_label)) 102 | return torch.FloatTensor(inout_seq) 103 | 104 | # %% 数据分割与get 105 | def get_data(): 106 | 107 | 108 | series = pd.read_csv('air_patna.csv', usecols=['AQI']) 109 | scaler = MinMaxScaler(feature_range=(-1, 1)) 110 | series = scaler.fit_transform(series.values.reshape(-1, 1)).reshape(-1) 111 | 112 | 113 | train_samples = int(round(len(series)*0.75,0)) 114 | train_data = series[:train_samples] 115 | test_data = series[train_samples:] 116 | 117 | 118 | train_sequence = create_inout_sequences(train_data, input_window) 119 | train_sequence = train_sequence[:-output_window] 120 | 121 | 122 | test_data = create_inout_sequences(test_data, input_window) 123 | test_data = test_data[:-output_window] 124 | 125 | 126 | return train_sequence.to(device), test_data.to(device), scaler 127 | 128 | def get_batch(source, i, batch_size): # 便于以batch形式读取 129 | seq_len = min(batch_size, len(source) - 1 - i) 130 | data = source[i:i + seq_len] 131 | input = torch.stack(torch.stack([item[0] for item in data]).chunk(input_window, 1)) 132 | target = torch.stack(torch.stack([item[1] for item in data]).chunk(input_window, 1)) 133 | return input, target 134 | 135 | # %% 模型构建 136 | # 对参数进行反向传播,其中用到了梯度裁剪的技巧用于防止梯度爆炸 137 | def train(train_data): 138 | model.train() 139 | 140 | 141 | for batch_index, i in enumerate(range(0, len(train_data) - 1, batch_size)): 142 | start_time = time.time() 143 | total_loss = 0 144 | data, targets = get_batch(train_data, i, batch_size) 145 | optimizer.zero_grad() 146 | output = model(data) 147 | loss = criterion(output, targets) # lstm的输出有两个 148 | loss.backward() 149 | # torch.nn.utils.clip_grad_norm_(model.parameters(), 0.7) 150 | optimizer.step() 151 | 152 | total_loss += loss.item() 153 | log_interval = int(len(train_data) / batch_size / 5) 154 | if batch_index % log_interval == 0 and batch_index > 0: 155 | cur_loss = total_loss / log_interval 156 | elapsed = time.time() - start_time 157 | print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.6f} | {:5.2f} ms | loss {:5.5f} | ppl {:8.2f}' 158 | .format(epoch, batch_index, len(train_data) // batch_size, scheduler.get_last_lr()[0], elapsed * 1000 / log_interval, cur_loss, math.exp(cur_loss))) 159 | 160 | # %% 模型评估 161 | def evaluate(eval_model, data_source): 162 | eval_model.eval() 163 | total_loss = 0 164 | eval_batch_size = 1000 165 | with torch.no_grad(): 166 | for i in range(0, len(data_source) - 1, eval_batch_size): 167 | data, targets = get_batch(data_source, i, eval_batch_size) 168 | output = eval_model(data) 169 | total_loss += len(data[0]) * criterion(output, targets).cpu().item() 170 | return total_loss / len(data_source) 171 | 172 | # %% 模型结果绘图 173 | def plot_and_loss(eval_model, data_source, epoch, scaler): 174 | eval_model.eval() 175 | # model.eval() 176 | # data_source = val_data 177 | total_loss = 0. 178 | test_result = torch.Tensor(0) 179 | truth = torch.Tensor(0) 180 | with torch.no_grad(): 181 | for i in range(0, len(data_source) - 1): 182 | data, target = get_batch(data_source, i, 1) 183 | output = eval_model(data) 184 | # output = model(data) 185 | total_loss += criterion(output, target).item() 186 | test_result = torch.cat((test_result, output[-1].view(-1).cpu()), 0) # 这个地方检查一下linear的output[0][-1]结构 187 | truth = torch.cat((truth, target[-1].view(-1).cpu()), 0) 188 | 189 | test_result = (test_result.reshape(-1,1)).detach().numpy() 190 | truth = (truth.reshape(-1,1)).detach().numpy() 191 | test_result = scaler.inverse_transform(test_result) 192 | truth = scaler.inverse_transform(truth) # 反归一化 193 | rmSE = metrics.mean_squared_error(test_result, truth)**0.5 # 这个地方错误 194 | mAE = metrics.mean_absolute_error(truth,test_result) 195 | mAPE = metrics.mean_absolute_percentage_error(truth,test_result) 196 | 197 | plt.figure(figsize=(10,5)) 198 | plt.plot(test_result, color="red") 199 | plt.plot(truth, color="blue") 200 | plt.legend(['Prediction', 'Truth'], loc='upper right') 201 | plt.grid(True, which='both') 202 | plt.title('Transformer-LSTM') 203 | plt.xlabel('Datetime') 204 | plt.ylabel('AQI') 205 | plt.savefig('transformer-lstm-epoch%d.png' % epoch) 206 | plt.close() 207 | 208 | return (total_loss / i) ,rmSE,mAE,mAPE 209 | 210 | # %% 模型训练 211 | # 运行100个epoch,每隔10个epoch在测试集上评估一下模型 212 | train_data, val_data, scaler= get_data() 213 | model = TransAm().to(device) 214 | criterion = nn.MSELoss() 215 | lr = 0.001 216 | optimizer = torch.optim.AdamW(model.parameters(), lr=lr) 217 | scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1, gamma=0.95) 218 | epochs = 100 219 | 220 | 221 | # epoch = 1 222 | for epoch in range(1, epochs + 1): 223 | epoch_start_time = time.time() 224 | train(train_data) 225 | 226 | 227 | if (epoch % 50 == 0): 228 | val_loss, rmSE,mAE,mAPE= plot_and_loss(model, val_data, epoch, scaler) # 这个地方是每十个循环存一次 229 | else: 230 | val_loss = evaluate(model, val_data) 231 | 232 | 233 | print('-' * 89) 234 | print('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.5f} | valid ppl {:8.2f}'.format(epoch, ( 235 | time.time() - epoch_start_time), val_loss, math.exp(val_loss))) 236 | # print(rmSE) 237 | print('-' * 89) 238 | scheduler.step() 239 | 240 | print(rmSE) 241 | print(mAE) 242 | print(mAPE) 243 | 244 | m = [] 245 | 246 | m.append(rmSE) 247 | m.append(mAE) 248 | m.append(mAPE) 249 | m = pd.DataFrame(m) 250 | m.to_csv('tr-linear-%d.csv' % input_window) 251 | 252 | # torch.save(model.state_dict(), 'tran_model.pt') # 保存模型参数 -------------------------------------------------------------------------------- /wavelet - bilstm.py: -------------------------------------------------------------------------------- 1 | # BiLSTM和LSTM :区别只是在bidirectional=False/True和nn.Linear(hidden_size, output_size)/hidden_size*2,因此放在同一个文件中的 2 | 3 | # %% 导入库和相关的参数设置 4 | import torch 5 | import torch.nn as nn 6 | import numpy as np 7 | import time 8 | import math 9 | import matplotlib.pyplot as plt 10 | from sklearn.preprocessing import MinMaxScaler 11 | import pandas as pd 12 | from sklearn import metrics 13 | import pywt 14 | 15 | 16 | torch.manual_seed(0) 17 | np.random.seed(0) 18 | 19 | 20 | input_window = 5 # transformer建立长期依赖的效果差 21 | output_window = 1 22 | batch_size = 64 23 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 24 | print(device) 25 | 26 | 27 | 28 | class LstmRNN(nn.Module): 29 | 30 | def __init__(self, input_size=1, hidden_size=1, output_size=1, num_layers=1): 31 | super().__init__() 32 | 33 | self.lstm = nn.LSTM(input_size, hidden_size, num_layers,bias = True,bidirectional=True) # utilize the LSTM model in torch.nn 34 | self.linear1 = nn.Linear(2*hidden_size, output_size) # 全连接层 35 | self.init_weights() #这个是定义初始化 36 | 37 | def forward(self, _x): 38 | x, _ = self.lstm(_x) # _x is input, size (seq_len, batch, input_size) 39 | s, b, h = x.shape # x is output, size (seq_len, batch, hidden_size) 40 | x = x.view(s * b, h) 41 | x = self.linear1(x) 42 | x = x.view(s, b, -1) 43 | return x 44 | 45 | def init_weights(self): # 这是原来的哦 46 | initrange = 0.1 47 | self.linear1.bias.data.zero_() 48 | self.linear1.weight.data.uniform_(-initrange, initrange) 49 | 50 | 51 | 52 | # %% 数据预处理:滑窗处理 53 | # 假设输入是1到20,则其标签就是2到21,以适应Transformer的seq2seq的形式的输出 54 | def create_inout_sequences(input_data, tw): 55 | inout_seq = [] 56 | L = len(input_data) 57 | for i in range(L - tw): 58 | train_seq = input_data[i:i + tw] 59 | train_label = input_data[i + output_window:i + tw + output_window] 60 | inout_seq.append((train_seq, train_label)) 61 | return torch.FloatTensor(inout_seq) 62 | 63 | # %% 数据分割与get 64 | def get_data(): 65 | 66 | 67 | series = pd.read_csv('air_patna.csv', usecols=['AQI']) 68 | scaler = MinMaxScaler(feature_range=(-1, 1)) 69 | series = scaler.fit_transform(series.values.reshape(-1, 1)).reshape(-1) 70 | 71 | 72 | #小波去噪处理 73 | w = pywt.Wavelet('db8') # 选用Daubechies8小波 74 | maxlev = pywt.dwt_max_level(len(series), w.dec_len) 75 | threshold = 0.04 # Threshold for filtering 76 | coeffs = pywt.wavedec(series, 'db8', level=maxlev) # 将信号进行小波分解 77 | # print(coeffs[0].shape) 78 | # print(len(coeffs)) 79 | for i in range(1, len(coeffs)): 80 | coeffs[i] = pywt.threshold(coeffs[i], threshold*max(coeffs[i])) # 将噪声滤波 81 | series = pywt.waverec(coeffs, 'db8') # 将信号进行小波重构 82 | 83 | # training_set_scaled=np.array(training_set_scaled) 84 | # series=series.reshape(-1,1) 85 | 86 | train_samples = int(round(len(series)*0.75,0)) 87 | train_data = series[:train_samples] 88 | test_data = series[train_samples:] 89 | 90 | 91 | train_sequence = create_inout_sequences(train_data, input_window) 92 | train_sequence = train_sequence[:-output_window] 93 | 94 | 95 | test_data = create_inout_sequences(test_data, input_window) 96 | test_data = test_data[:-output_window] 97 | 98 | 99 | return train_sequence.to(device), test_data.to(device), scaler 100 | 101 | def get_batch(source, i, batch_size): # 便于以batch形式读取 102 | seq_len = min(batch_size, len(source) - 1 - i) 103 | data = source[i:i + seq_len] 104 | input = torch.stack(torch.stack([item[0] for item in data]).chunk(input_window, 1)) 105 | target = torch.stack(torch.stack([item[1] for item in data]).chunk(input_window, 1)) 106 | return input, target 107 | 108 | # %% 模型构建 109 | # 对参数进行反向传播,其中用到了梯度裁剪的技巧用于防止梯度爆炸 110 | def train(train_data): 111 | model.train() # 查看结构 112 | 113 | # bilstm = nn.LSTM(input_size=1, hidden_size=1, num_layers=2, bidirectional = False) 114 | # output, (hn, cn) =bilstm(data) 115 | # output[0].shape # 把前面的改一下 116 | 117 | for batch_index, i in enumerate(range(0, len(train_data) - 1, batch_size)): 118 | start_time = time.time() 119 | total_loss = 0 120 | data, targets = get_batch(train_data, i, batch_size) # 看看输入 121 | optimizer.zero_grad() 122 | output = model(data) 123 | loss = criterion(output, targets) # lstm的输出有两个 124 | loss.backward() 125 | # torch.nn.utils.clip_grad_norm_(model.parameters(), 0.7) 126 | optimizer.step() 127 | 128 | total_loss += loss.item() 129 | log_interval = int(len(train_data) / batch_size / 5) 130 | if batch_index % log_interval == 0 and batch_index > 0: 131 | cur_loss = total_loss / log_interval 132 | elapsed = time.time() - start_time 133 | print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.6f} | {:5.2f} ms | loss {:5.5f} | ppl {:8.2f}' 134 | .format(epoch, batch_index, len(train_data) // batch_size, scheduler.get_last_lr()[0], elapsed * 1000 / log_interval, cur_loss, math.exp(cur_loss))) 135 | 136 | # %% 模型评估 137 | def evaluate(eval_model, data_source): 138 | eval_model.eval() 139 | total_loss = 0 140 | eval_batch_size = 1000 141 | with torch.no_grad(): 142 | for i in range(0, len(data_source) - 1, eval_batch_size): 143 | data, targets = get_batch(data_source, i, eval_batch_size) 144 | output = eval_model(data) 145 | total_loss += len(data[0]) * criterion(output, targets).cpu().item() 146 | return total_loss / len(data_source) 147 | 148 | # %% 模型结果绘图 149 | def plot_and_loss(eval_model, data_source, epoch, scaler): 150 | eval_model.eval() 151 | # model.eval() 152 | # data_source = val_data 153 | total_loss = 0. 154 | test_result = torch.Tensor(0) 155 | truth = torch.Tensor(0) 156 | with torch.no_grad(): 157 | for i in range(0, len(data_source) - 1): 158 | data, target = get_batch(data_source, i, 1) 159 | output = eval_model(data) # 这个要返回 160 | # output,(hn,cn) = model(data) 161 | total_loss += criterion(output, target).item() # output[0]怎么是两列 162 | test_result = torch.cat((test_result, output[-1].view(-1).cpu()), 0) # 这个地方检查一下linear的output[0][-1]结构 163 | truth = torch.cat((truth, target[-1].view(-1).cpu()), 0) 164 | 165 | test_result = (test_result.reshape(-1,1)).detach().numpy() 166 | truth = (truth.reshape(-1,1)).detach().numpy() 167 | test_result = scaler.inverse_transform(test_result) 168 | truth = scaler.inverse_transform(truth) # 反归一化 169 | rmSE = metrics.mean_squared_error(test_result, truth)**0.5 # 这个地方错误 170 | mAE = metrics.mean_absolute_error(truth,test_result) 171 | mAPE = metrics.mean_absolute_percentage_error(truth,test_result) 172 | 173 | plt.figure(figsize=(10,5)) 174 | plt.plot(test_result, color="red") 175 | plt.plot(truth, color="blue") 176 | plt.legend(['Prediction', 'Truth'], loc='upper right',fontsize=12) 177 | plt.grid(True, which='both') 178 | plt.title('Wavelet-BiLSTM',fontsize=20) 179 | plt.xlabel('Datetime/hour',fontsize=12) 180 | plt.ylabel('AQI',fontsize=12) 181 | plt.savefig('wavelet-bilstm-win%d.png' % input_window) 182 | plt.close() 183 | 184 | 185 | return (total_loss / i) ,rmSE,mAE,mAPE 186 | 187 | # %% 模型训练 188 | # 运行100个epoch,每隔10个epoch在测试集上评估一下模型 189 | train_data, val_data, scaler= get_data() 190 | model = LstmRNN().to(device) 191 | criterion = nn.MSELoss() 192 | lr = 0.001 193 | optimizer = torch.optim.AdamW(model.parameters(), lr=lr) 194 | scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1, gamma=0.95) 195 | epochs = 100 196 | 197 | # epoch = 1 198 | for epoch in range(1, epochs + 1): 199 | epoch_start_time = time.time() 200 | train(train_data) 201 | 202 | 203 | if (epoch % 100 == 0): 204 | val_loss, rmSE,mAE,mAPE= plot_and_loss(model, val_data, epoch, scaler) # 这个地方是每十个循环存一次 205 | else: 206 | val_loss = evaluate(model, val_data) 207 | 208 | 209 | print('-' * 89) 210 | print('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.5f} | valid ppl {:8.2f}'.format(epoch, ( 211 | time.time() - epoch_start_time), val_loss, math.exp(val_loss))) 212 | # print(rmSE) 213 | print('-' * 89) 214 | scheduler.step() 215 | 216 | print(rmSE) 217 | print(mAE) 218 | print(mAPE) 219 | 220 | m = [] 221 | 222 | m.append(rmSE) 223 | m.append(mAE) 224 | m.append(mAPE) 225 | m = pd.DataFrame(m) 226 | m.to_csv('wavelet-bilstm-%d.csv' % input_window) 227 | 228 | # torch.save(model.state_dict(), 'bilstm_model.pt') # 保存模型参数 229 | --------------------------------------------------------------------------------